Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revisit demultiplexing, sv cli commands #191

Merged
merged 29 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a00dc51
standardize "workflow start" table into function
pdimens Feb 3, 2025
0c523e5
add shebang
pdimens Feb 3, 2025
2e6cc74
add proper support for new python deconv
pdimens Feb 3, 2025
e4c862d
add missing comma
pdimens Feb 3, 2025
ab624c9
fix the output name
pdimens Feb 3, 2025
fbaf587
rm draft script
pdimens Feb 3, 2025
f3f0e89
rm the compiled program
pdimens Feb 3, 2025
d07cf66
fix typo
pdimens Feb 3, 2025
1f5235e
rabbit fixes, better validation
pdimens Feb 3, 2025
4060a9d
fix schema validations, add more context
pdimens Feb 4, 2025
49e7177
rm error handling from script b/c harpy takes care of it in pre-proce…
pdimens Feb 4, 2025
7b986b1
make qx and rx tags opt-in #195
pdimens Feb 4, 2025
c9a2153
make gzip a separate parallelized call
pdimens Feb 4, 2025
6390c63
clean up barcode creation code
pdimens Feb 4, 2025
79bcd53
additional validation, as per the rabbit's suggestion
pdimens Feb 4, 2025
6d9dd59
Update harpy/scripts/demultiplex_gen1.py
pdimens Feb 5, 2025
7bed404
touchups
pdimens Feb 5, 2025
feaeb83
rm index markdup rule bc of redundancy in assign_mi.py
pdimens Feb 5, 2025
3075f63
--min-sv -> --min-size
pdimens Feb 5, 2025
2c07a2f
eschew indexing altogether
pdimens Feb 5, 2025
da9d0a8
fixes revisit sv leviathan cli options #196
pdimens Feb 5, 2025
6b8b46b
update text
pdimens Feb 5, 2025
9dd9349
add `0` as special value for align molecule threshold #199
pdimens Feb 5, 2025
31379c9
add option to disable linked-read stuff within align bwa/strobe #193
pdimens Feb 5, 2025
4c0be04
move ignore-bx to workflow controls, add it as an option to qc
pdimens Feb 5, 2025
b67ef22
rm commented out block
pdimens Feb 5, 2025
67d947d
add missing hyphen (oops)
pdimens Feb 5, 2025
34942c0
better arg name
pdimens Feb 5, 2025
dc0703d
fix param name, update readme pixi instructions
pdimens Feb 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions harpy/_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def create_conda_recipes(outdir: str, envs: list=None) -> None:
"bioconda::samtools",
"bioconda::tigmint"
],
"demultiplex": [
"bioconda::pheniqs",
"bioconda::pysam",
"conda-forge::python-levenshtein"
],
"metassembly": [
"bioconda::athena_meta=1.2"
],
Expand Down
13 changes: 13 additions & 0 deletions harpy/_printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,16 @@ def print_onerror(logfile):
console.rule("[bold]Workflow Error", style = "red")
console.print(f"The workflow stopped because of an error. Full workflow log:\n[bold]{logfile}[/bold]")
console.rule("[bold]Where Error Occurred", style = "red")

def workflow_info(*arg):
"""
Accepts an unlimited number of length-2 lists or tuples and returns a rich.Table with the value of the first indices as the row names and the second indices as the values
Use None instead of a list to ignore that entry (useful for conditionals). The second value will always be converted to a string.
"""
table = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
table.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
table.add_column("value", justify="left")
for i in arg:
if arg:
table.add_row(i[0], str(i[1]))
return table
45 changes: 20 additions & 25 deletions harpy/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
import sys
import yaml
from pathlib import Path
from rich import box
from rich.table import Table
import rich_click as click
from ._conda import create_conda_recipes
from ._misc import fetch_report, fetch_rule, snakemake_log
from ._cli_types_generic import ContigList, InputFile, HPCProfile, SnakemakeParams
from ._cli_types_params import BwaParams, EmaParams, StrobeAlignParams
from ._launch import launch_snakemake, SNAKEMAKE_CMD
from ._parsers import parse_fastq_inputs
from ._printing import print_error, print_solution, print_notice
from ._printing import print_error, print_solution, print_notice, workflow_info
from ._validations import check_fasta, fasta_contig_match, validate_barcodefile

@click.group(options_metavar='', context_settings={"help_option_names" : ["-h", "--help"]})
Expand Down Expand Up @@ -141,13 +139,12 @@ def bwa(inputs, output_dir, genome, depth_window, threads, keep_unmapped, extra_
if setup_only:
sys.exit(0)

start_text = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
start_text.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
start_text.add_column("value", justify="left")
start_text.add_row("Samples:", f"{sample_count}")
start_text.add_row("Genome:", genome)
start_text.add_row("Output Folder:", output_dir + "/")
start_text.add_row("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
start_text = (
("Samples:",sample_count),
("Genome:", genome),
("Output Folder:", output_dir + "/"),
("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
)
launch_snakemake(command, "align_bwa", start_text, output_dir, sm_log, quiet, "workflow/align.bwa.summary")

@click.command(no_args_is_help = True, context_settings=dict(allow_interspersed_args=False), epilog = "Documentation: https://pdimens.github.io/harpy/workflows/align/ema")
Expand Down Expand Up @@ -251,14 +248,13 @@ def ema(inputs, output_dir, platform, barcode_list, fragment_density, genome, de
if setup_only:
sys.exit(0)

start_text = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
start_text.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
start_text.add_column("value", justify="left")
start_text.add_row("Samples:", f"{sample_count}")
start_text.add_row("Genome:", genome)
start_text.add_row("Platform:", platform)
start_text.add_row("Output Folder:", output_dir + "/")
start_text.add_row("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
start_text = workflow_info(
("Samples:",sample_count),
("Genome:", genome),
("Platform:", platform),
("Output Folder:", output_dir + "/"),
("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
)
launch_snakemake(command, "align_ema", start_text, output_dir, sm_log, quiet, "workflow/align.ema.summary")

@click.command(no_args_is_help = True, epilog= "Documentation: https://pdimens.github.io/harpy/workflows/align/strobe/")
Expand Down Expand Up @@ -343,13 +339,12 @@ def strobe(inputs, output_dir, genome, read_length, keep_unmapped, depth_window,
if setup_only:
sys.exit(0)

start_text = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
start_text.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
start_text.add_column("value", justify="left")
start_text.add_row("Samples:", f"{sample_count}")
start_text.add_row("Genome:", genome)
start_text.add_row("Output Folder:", output_dir + "/")
start_text.add_row("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
start_text = workflow_info(
("Samples:",sample_count),
("Genome:", genome),
("Output Folder:", output_dir + "/"),
("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
)
launch_snakemake(command, "align_strobe", start_text, output_dir, sm_log, quiet, "workflow/align.strobealign.summary")

align.add_command(bwa)
Expand Down
19 changes: 7 additions & 12 deletions harpy/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
import os
import sys
import yaml
from rich import box
from rich.table import Table
import rich_click as click
from ._cli_types_generic import KParam, HPCProfile, SnakemakeParams
from ._cli_types_params import SpadesParams, ArcsParams
from ._conda import create_conda_recipes
from ._launch import launch_snakemake, SNAKEMAKE_CMD
from ._misc import fetch_rule, snakemake_log
from ._printing import workflow_info
from ._validations import validate_fastq_bx

docstring = {
Expand Down Expand Up @@ -129,14 +128,10 @@ def assembly(fastq_r1, fastq_r2, bx_tag, kmer_length, max_memory, output_dir, ex
if setup_only:
sys.exit(0)

start_text = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
start_text.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
start_text.add_column("value", justify="left")
start_text.add_row("Barcode Tag: ", bx_tag.upper())
if kmer_length == "auto":
start_text.add_row("Kmer Length: ", "auto")
else:
start_text.add_row("Kmer Length: ", ",".join(map(str,kmer_length)))
start_text.add_row("Output Folder:", f"{output_dir}/")
start_text.add_row("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
start_text = workflow_info(
("Barcode Tag: ", bx_tag.upper()),
("Kmer Length: ", "auto") if kmer_length == "auto" else ("Kmer Length: ", ",".join(map(str,kmer_length))),
("Output Folder:", f"{output_dir}/"),
("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
)
launch_snakemake(command, asm, start_text, output_dir, sm_log, quiet, f"workflow/{asm}.summary")
Empty file modified harpy/bin/deconvolve_alignments.py
100644 → 100755
Empty file.
Binary file removed harpy/bin/demuxGen1
Binary file not shown.
193 changes: 0 additions & 193 deletions harpy/bin/dmux_gen1.py

This file was deleted.

2 changes: 1 addition & 1 deletion harpy/bin/haplotag_acbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
}

for BC in ["A","C","B","D"]:
with open(f"{outdir}/BC_{BC}.txt", "w", encoding="utf-8") as f:
with open(f"{outdir}/segment_{BC}.bc", "w", encoding="utf-8") as f:
ID = [f"{BC}{number:02d}" for number in range(1, 97)]
delim = [" ".join(tup) for tup in zip(ID, BX[BC])]
_ = [f.write(f"{i}\n") for i in delim]
14 changes: 6 additions & 8 deletions harpy/deconvolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
import os
import sys
import yaml
from rich import box
from rich.table import Table
import rich_click as click
from ._cli_types_generic import HPCProfile, SnakemakeParams
from ._conda import create_conda_recipes
from ._launch import launch_snakemake, SNAKEMAKE_CMD
from ._misc import fetch_rule, snakemake_log
from ._parsers import parse_fastq_inputs
from ._printing import workflow_info

docstring = {
"harpy deconvolve": [
Expand Down Expand Up @@ -84,10 +83,9 @@ def deconvolve(inputs, output_dir, kmer_length, window_size, density, dropout, t
if setup_only:
sys.exit(0)

start_text = Table(show_header=False,pad_edge=False, show_edge=False, padding = (0,0), box=box.SIMPLE)
start_text.add_column("detail", justify="left", style="light_steel_blue", no_wrap=True)
start_text.add_column("value", justify="left")
start_text.add_row("Samples:", f"{sample_count}")
start_text.add_row("Output Folder:", output_dir + "/")
start_text.add_row("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
start_text = workflow_info(
("Samples:", sample_count),
("Output Folder:", output_dir + "/"),
("Workflow Log:", sm_log.replace(f"{output_dir}/", "") + "[dim].gz")
)
launch_snakemake(command, "deconvolve", start_text, output_dir, sm_log, quiet, "workflow/deconvolve.summary")
Loading
Loading