Skip to content

Commit

Permalink
more 2.0 milestones (#208)
Browse files Browse the repository at this point in the history
* add new integer-based quiet mode

* lean on callback function instead of manual int() call

* update to new quiet mode

* add diagnose

* better docstring

* add optional return length, just in case?

* convert wildcard constraints to python regex string

* set thread max to 999 to avoid snakemake error

* add catch for dry-run

* fix quiet features, rabbit input

* split simulate linkedreads and variants for sanity reasons

* why did you even disappear?

* rename test to assembly
  • Loading branch information
pdimens authored Feb 17, 2025
1 parent 9b923ad commit 9e2c68d
Show file tree
Hide file tree
Showing 44 changed files with 435 additions and 345 deletions.
4 changes: 2 additions & 2 deletions .github/filters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@ phase: &phase
simvars: &simvars
- *common
- *container
- 'harpy/simulate.py'
- 'harpy/simulate_variants.py'
- 'harpy/snakefiles/simulate_snpindel.smk'
- 'harpy/snakefiles/simulate_variants.smk'
- 'test/vcf/test.bcf'
- 'harpy/scripts/simuG.pl'
simreads: &simreads
- *common
- *container
- 'harpy/simulate.py'
- 'harpy/simulate_linkedreads.py'
- 'harpy/snakefiles/simulate_linkedreads.smk'
- 'test/genome**gz'
- 'extractReads.cpp'
Expand Down
78 changes: 39 additions & 39 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jobs:
# if: ${{ needs.changes.outputs.modules == 'true' }}
# run: |
# export APPTAINER_TMPDIR=$PWD/test/
# harpy qc --skip-reports --quiet test/fastq/sample1.*.fq.gz
# harpy qc --skip-reports --quiet 2 test/fastq/sample1.*.fq.gz
# - name: Create Singularity Artifact
# if: ${{ steps.singularity.outcome == 'success' }}
# uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:
# path: .snakemake/singularity
- name: harpy demultiplex
shell: micromamba-shell {0}
run: harpy demultiplex gen1 --quiet --schema test/demux/samples.schema test/demux/Undetermined_S0_L004_R* test/demux/Undetermined_S0_L004_I*
run: harpy demultiplex gen1 --quiet 2 --schema test/demux/samples.schema test/demux/Undetermined_S0_L004_R* test/demux/Undetermined_S0_L004_I*

preflight:
needs: [changes]
Expand Down Expand Up @@ -193,7 +193,7 @@ jobs:
- name: test preflight bam
if: always()
shell: micromamba-shell {0}
run: harpy preflight bam --quiet test/bam
run: harpy preflight bam --quiet 2 test/bam

qc:
needs: [changes]
Expand Down Expand Up @@ -231,10 +231,10 @@ jobs:
# path: .snakemake/singularity
- name: harpy qc
shell: micromamba-shell {0}
run: harpy qc -x "--low_complexity_filter" --quiet test/fastq
run: harpy qc -x "--low_complexity_filter" --quiet 2 test/fastq
- name: harpy qc all options
shell: micromamba-shell {0}
run: harpy qc -a auto -d -c 21,40,3,0 --quiet test/fastq
run: harpy qc -a auto -d -c 21,40,3,0 --quiet 2 test/fastq
deconvolve:
needs: [changes]
if: ${{ needs.changes.outputs.deconvolve == 'true' }}
Expand Down Expand Up @@ -271,7 +271,7 @@ jobs:
# path: .snakemake/singularity
- name: harpy deconvolve
shell: micromamba-shell {0}
run: harpy deconvolve --quiet test/fastq
run: harpy deconvolve --quiet 2 test/fastq
bwa:
needs: [changes]
if: ${{ needs.changes.outputs.bwa == 'true' }}
Expand Down Expand Up @@ -308,7 +308,7 @@ jobs:
# path: .snakemake/singularity
- name: test bwa
shell: micromamba-shell {0}
run: harpy align bwa --quiet -g test/genome/genome.fasta.gz -x "-A 2" test/fastq
run: harpy align bwa --quiet 2 -g test/genome/genome.fasta.gz -x "-A 2" test/fastq

ema:
needs: [changes]
Expand Down Expand Up @@ -346,7 +346,7 @@ jobs:
# path: .snakemake/singularity
- name: test ema
shell: micromamba-shell {0}
run: harpy align ema --quiet --ema-bins 150 -g test/genome/genome.fasta.gz test/fastq
run: harpy align ema --quiet 2 --ema-bins 150 -g test/genome/genome.fasta.gz test/fastq

strobe:
needs: [changes]
Expand Down Expand Up @@ -384,7 +384,7 @@ jobs:
# path: .snakemake/singularity
- name: test strobealign
shell: micromamba-shell {0}
run: harpy align strobe --quiet -l 125 -g test/genome/genome.fasta.gz test/fastq
run: harpy align strobe --quiet 2 -l 125 -g test/genome/genome.fasta.gz test/fastq

mpileup:
needs: [changes]
Expand Down Expand Up @@ -422,10 +422,10 @@ jobs:
# path: .snakemake/singularity
- name: snp mpileup
shell: micromamba-shell {0}
run: harpy snp mpileup --quiet -r test/positions.bed -g test/genome/genome.fasta.gz -x "--ignore-RG" test/bam
run: harpy snp mpileup --quiet 2 -r test/positions.bed -g test/genome/genome.fasta.gz -x "--ignore-RG" test/bam
- name: snp mpileup-pop
shell: micromamba-shell {0}
run: harpy snp mpileup --quiet -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam
run: harpy snp mpileup --quiet 2 -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam

freebayes:
needs: [changes]
Expand Down Expand Up @@ -463,10 +463,10 @@ jobs:
# path: .snakemake/singularity
- name: snp freebayes
shell: micromamba-shell {0}
run: harpy snp freebayes --quiet -r test/positions.bed -g test/genome/genome.fasta.gz -x "-g 200" test/bam
run: harpy snp freebayes --quiet 2 -r test/positions.bed -g test/genome/genome.fasta.gz -x "-g 200" test/bam
- name: snp freebayes-pop
shell: micromamba-shell {0}
run: harpy snp freebayes --quiet -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam
run: harpy snp freebayes --quiet 2 -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam

impute:
needs: [changes]
Expand Down Expand Up @@ -504,11 +504,11 @@ jobs:
# path: .snakemake/singularity
- name: impute
shell: micromamba-shell {0}
run: harpy impute --quiet --vcf test/vcf/test.bcf -p test/stitch.params test/bam
run: harpy impute --quiet 2 --vcf test/vcf/test.bcf -p test/stitch.params test/bam
- name: impute from vcf
shell: micromamba-shell {0}
if: always()
run: harpy impute --quiet --vcf-samples -o vcfImpute --vcf test/vcf/test.bcf -p test/stitch.params test/bam
run: harpy impute --quiet 2 --vcf-samples -o vcfImpute --vcf test/vcf/test.bcf -p test/stitch.params test/bam

phase:
needs: [changes]
Expand Down Expand Up @@ -546,17 +546,17 @@ jobs:
# path: .snakemake/singularity
- name: phase
shell: micromamba-shell {0}
run: harpy phase --quiet --vcf test/vcf/test.bcf -x "--max_iter 10001" test/bam
run: harpy phase --quiet 2 --vcf test/vcf/test.bcf -x "--max_iter 10001" test/bam
- name: phase with indels
shell: micromamba-shell {0}
if: always()
run: harpy phase --quiet --vcf test/vcf/test.bcf -o phaseindel -g test/genome/genome.fasta.gz test/bam
run: harpy phase --quiet 2 --vcf test/vcf/test.bcf -o phaseindel -g test/genome/genome.fasta.gz test/bam
- name: phase from vcf
shell: micromamba-shell {0}
if: always()
run: |
cp test/bam/sample1.bam test/bam/pineapple.bam && rename_bam.py -d pineapple1 test/bam/pineapple.bam
harpy phase --quiet --vcf-samples -o phasevcf --vcf test/vcf/test.bcf test/bam
harpy phase --quiet 2 --vcf-samples -o phasevcf --vcf test/vcf/test.bcf test/bam
leviathan:
needs: [changes]
Expand Down Expand Up @@ -594,12 +594,12 @@ jobs:
# path: .snakemake/singularity
- name: leviathan
shell: micromamba-shell {0}
run: harpy sv leviathan --quiet -s 100 -b 1 -g test/genome/genome.fasta.gz -x "-M 2002" test/bam
run: harpy sv leviathan --quiet 2 -s 100 -b 1 -g test/genome/genome.fasta.gz -x "-M 2002" test/bam
continue-on-error: true
- name: leviathan-pop
if: always()
shell: micromamba-shell {0}
run: harpy sv leviathan --quiet -s 100 -b 1 -g test/genome/genome.fasta.gz -o SV/leviathanpop -p test/samples.groups test/bam
run: harpy sv leviathan --quiet 2 -s 100 -b 1 -g test/genome/genome.fasta.gz -o SV/leviathanpop -p test/samples.groups test/bam

naibr:
needs: [changes]
Expand Down Expand Up @@ -637,20 +637,20 @@ jobs:
# path: .snakemake/singularity
- name: naibr
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/naibr -x "-min_sv 5000" test/bam_phased && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/naibr -x "-min_sv 5000" test/bam_phased && rm -r Genome
- name: naibr pop
if: always()
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/pop -p test/samples.groups test/bam_phased && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/pop -p test/samples.groups test/bam_phased && rm -r Genome
- name: naibr with phasing
if: always()
shell: micromamba-shell {0}
run: |
harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/phase -v test/vcf/test.phased.bcf test/bam && rm -r Genome
harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/phase -v test/vcf/test.phased.bcf test/bam && rm -r Genome
- name: naibr pop with phasing
if: always()
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/bam && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/bam && rm -r Genome


simulate_variants:
Expand Down Expand Up @@ -690,26 +690,26 @@ jobs:
- name: simulate random snps/indels
shell: micromamba-shell {0}
run: |
harpy simulate snpindel --quiet --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate snpindel --quiet --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz
harpy simulate snpindel --quiet 2 --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate snpindel --quiet 2 --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz
- name: simulate inversions
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate inversion --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate inversion --quiet --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz
harpy simulate inversion --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate inversion --quiet 2 --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz
- name: simulate cnv
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate cnv --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate cnv --quiet --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz
harpy simulate cnv --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate cnv --quiet 2 --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz
- name: simulate translocations
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate translocation --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate translocation --quiet --prefix Simulate/transvcf --vcf Simulate/translocation/haplotype_1/sim.hap1.translocation.vcf test/genome/genome.fasta.gz
harpy simulate translocation --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate translocation --quiet 2 --prefix Simulate/transvcf --vcf Simulate/translocation/haplotype_1/sim.hap1.translocation.vcf test/genome/genome.fasta.gz
simulate_linkedreads:
needs: [changes]
Expand Down Expand Up @@ -749,12 +749,12 @@ jobs:
shell: micromamba-shell {0}
run: |
haplotag_barcodes.py -n 14000000 > test/haplotag.bc
harpy simulate linkedreads --quiet -t 4 -n 2 -b test/haplotag.bc -l 100 -p 50 test/genome/genome.fasta.gz test/genome/genome2.fasta.gz
harpy simulate linkedreads --quiet 2 -t 4 -n 2 -b test/haplotag.bc -l 100 -p 50 test/genome/genome.fasta.gz test/genome/genome2.fasta.gz
assembly:
needs: [changes]
if: ${{ needs.changes.outputs.assembly == 'true' }}
name: metassembly
name: assembly
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand Down Expand Up @@ -787,13 +787,13 @@ jobs:
# path: .snakemake/singularity
- name: test assembly
shell: micromamba-shell {0}
run: harpy assembly --quiet -r 4000 test/fastq/sample1.*
run: harpy assembly --quiet 2 -r 4000 test/fastq/sample1.*
- name: test metassembly
shell: micromamba-shell {0}
run: harpy metassembly --quiet -r 4000 test/fastq/sample1.*
run: harpy metassembly --quiet 2 -r 4000 test/fastq/sample1.*
- name: test metassembly without barcodes
shell: micromamba-shell {0}
run: harpy metassembly --ignore-bx --quiet -r 4000 test/fastq/sample1.*
run: harpy metassembly --ignore-bx --quiet 2 -r 4000 test/fastq/sample1.*

extras:
needs: [changes]
Expand Down Expand Up @@ -832,10 +832,10 @@ jobs:
run: harpy popgroup test/fastq
- name: harpy downsample bam
shell: micromamba-shell {0}
run: harpy downsample -d 1 --random-seed 699 --quiet test/bam/sample1.bam
run: harpy downsample -d 1 --random-seed 699 --quiet 2 test/bam/sample1.bam
- name: harpy downsample fastq
shell: micromamba-shell {0}
run: harpy downsample -d 1 --quiet test/fastq/sample1.*gz
run: harpy downsample -d 1 --quiet 2 test/fastq/sample1.*gz
- name: harpy hpc
shell: micromamba-shell {0}
run: |
Expand Down
58 changes: 46 additions & 12 deletions harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,22 @@

import rich_click as click
from . import align
from . import diagnose, resume, view
from . import deconvolve
from . import demultiplex
from . import container
from . import hpc
from . import impute
from . import assembly
from . import metassembly
from . import assembly, metassembly
from . import qc
from . import phase
from . import preflight
from . import resume
from . import simulate
from . import simulate_linkedreads, simulate_variants
from . import snp
from . import sv
from .popgroup import popgroup
from . import downsample
from .imputeparams import imputeparams
from . import view

click.rich_click.USE_MARKDOWN = True
click.rich_click.SHOW_ARGUMENTS = False
Expand All @@ -43,6 +41,38 @@ def cli():
**Documentation**: [https://pdimens.github.io/harpy/](https://pdimens.github.io/harpy/)
"""

## unify simulate commands
@click.group(options_metavar='', context_settings={"help_option_names" : ["-h", "--help"]})
def simulate():
"""
Simulate variants or linked-reads from a genome
To simulate genomic variants, provide an additional subcommand {`snpindel`,`inversion`,`cnv`,`translocation`}
to get more information about that workflow. The variant simulator (`simuG`) can only simulate
one type of variant at a time, so you may need to run it a few times if you want multiple variant types.
Use `simulate linkedreads` to simulate haplotag linked-reads from a diploid genome, which you can create by simulating
genomic variants.
"""

simulate_commandstring = {
"harpy simulate": [
{
"name": "Linked Read Sequences",
"commands": ["linkedreads"],
},
{
"name": "Genomic Variants",
"commands": ["cnv", "inversion", "snpindel", "translocation"],
}
]
}

simulate.add_command(simulate_linkedreads.linkedreads)
simulate.add_command(simulate_variants.snpindel)
simulate.add_command(simulate_variants.inversion)
simulate.add_command(simulate_variants.cnv)
simulate.add_command(simulate_variants.translocation)

# main program
cli.add_command(downsample.downsample)
cli.add_command(popgroup)
Expand All @@ -56,28 +86,32 @@ def cli():
cli.add_command(sv.sv)
cli.add_command(impute.impute)
cli.add_command(phase.phase)
cli.add_command(simulate.simulate)
cli.add_command(simulate)
cli.add_command(container.containerize)
cli.add_command(hpc.hpc)
cli.add_command(resume.resume)
cli.add_command(deconvolve.deconvolve)
cli.add_command(metassembly.metassembly)
cli.add_command(assembly.assembly)

cli.add_command(diagnose.diagnose)
## the workflows ##
click.rich_click.COMMAND_GROUPS = {
"harpy":
[
{
"name": "workflows",
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"]),
"name": "Workflows",
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"])
},
{
"name": "Other Commands",
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup","preflight","resume", "view"])
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup"])
},
{
"name": "Troubleshoot",
"commands": sorted(["view", "resume", "diagnose", "preflight"])
}
],
} | simulate.commandstring | hpc.docstring
} | simulate_commandstring | hpc.docstring

for i in [align, deconvolve, downsample, demultiplex, impute, phase, preflight, qc, simulate, snp, sv, assembly, metassembly]:
for i in [align, deconvolve, downsample, demultiplex, impute, phase, preflight, qc, simulate_linkedreads, simulate_variants, snp, sv, assembly, metassembly]:
click.rich_click.OPTION_GROUPS |= i.docstring
Loading

0 comments on commit 9e2c68d

Please sign in to comment.