Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more 2.0 milestones #208

Merged
merged 13 commits into from
Feb 17, 2025
4 changes: 2 additions & 2 deletions .github/filters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@ phase: &phase
simvars: &simvars
- *common
- *container
- 'harpy/simulate.py'
- 'harpy/simulate_variants.py'
- 'harpy/snakefiles/simulate_snpindel.smk'
- 'harpy/snakefiles/simulate_variants.smk'
- 'test/vcf/test.bcf'
- 'harpy/scripts/simuG.pl'
simreads: &simreads
- *common
- *container
- 'harpy/simulate.py'
- 'harpy/simulate_linkedreads.py'
- 'harpy/snakefiles/simulate_linkedreads.smk'
- 'test/genome**gz'
- 'extractReads.cpp'
Expand Down
76 changes: 38 additions & 38 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jobs:
# if: ${{ needs.changes.outputs.modules == 'true' }}
# run: |
# export APPTAINER_TMPDIR=$PWD/test/
# harpy qc --skip-reports --quiet test/fastq/sample1.*.fq.gz
# harpy qc --skip-reports --quiet 2 test/fastq/sample1.*.fq.gz
# - name: Create Singularity Artifact
# if: ${{ steps.singularity.outcome == 'success' }}
# uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:
# path: .snakemake/singularity
- name: harpy demultiplex
shell: micromamba-shell {0}
run: harpy demultiplex gen1 --quiet --schema test/demux/samples.schema test/demux/Undetermined_S0_L004_R* test/demux/Undetermined_S0_L004_I*
run: harpy demultiplex gen1 --quiet 2 --schema test/demux/samples.schema test/demux/Undetermined_S0_L004_R* test/demux/Undetermined_S0_L004_I*

preflight:
needs: [changes]
Expand Down Expand Up @@ -193,7 +193,7 @@ jobs:
- name: test preflight bam
if: always()
shell: micromamba-shell {0}
run: harpy preflight bam --quiet test/bam
run: harpy preflight bam --quiet 2 test/bam

qc:
needs: [changes]
Expand Down Expand Up @@ -231,10 +231,10 @@ jobs:
# path: .snakemake/singularity
- name: harpy qc
shell: micromamba-shell {0}
run: harpy qc -x "--low_complexity_filter" --quiet test/fastq
run: harpy qc -x "--low_complexity_filter" --quiet 2 test/fastq
- name: harpy qc all options
shell: micromamba-shell {0}
run: harpy qc -a auto -d -c 21,40,3,0 --quiet test/fastq
run: harpy qc -a auto -d -c 21,40,3,0 --quiet 2 test/fastq
deconvolve:
needs: [changes]
if: ${{ needs.changes.outputs.deconvolve == 'true' }}
Expand Down Expand Up @@ -271,7 +271,7 @@ jobs:
# path: .snakemake/singularity
- name: harpy deconvolve
shell: micromamba-shell {0}
run: harpy deconvolve --quiet test/fastq
run: harpy deconvolve --quiet 2 test/fastq
bwa:
needs: [changes]
if: ${{ needs.changes.outputs.bwa == 'true' }}
Expand Down Expand Up @@ -308,7 +308,7 @@ jobs:
# path: .snakemake/singularity
- name: test bwa
shell: micromamba-shell {0}
run: harpy align bwa --quiet -g test/genome/genome.fasta.gz -x "-A 2" test/fastq
run: harpy align bwa --quiet 2 -g test/genome/genome.fasta.gz -x "-A 2" test/fastq

ema:
needs: [changes]
Expand Down Expand Up @@ -346,7 +346,7 @@ jobs:
# path: .snakemake/singularity
- name: test ema
shell: micromamba-shell {0}
run: harpy align ema --quiet --ema-bins 150 -g test/genome/genome.fasta.gz test/fastq
run: harpy align ema --quiet 2 --ema-bins 150 -g test/genome/genome.fasta.gz test/fastq

strobe:
needs: [changes]
Expand Down Expand Up @@ -384,7 +384,7 @@ jobs:
# path: .snakemake/singularity
- name: test strobealign
shell: micromamba-shell {0}
run: harpy align strobe --quiet -l 125 -g test/genome/genome.fasta.gz test/fastq
run: harpy align strobe --quiet 2 -l 125 -g test/genome/genome.fasta.gz test/fastq

mpileup:
needs: [changes]
Expand Down Expand Up @@ -422,10 +422,10 @@ jobs:
# path: .snakemake/singularity
- name: snp mpileup
shell: micromamba-shell {0}
run: harpy snp mpileup --quiet -r test/positions.bed -g test/genome/genome.fasta.gz -x "--ignore-RG" test/bam
run: harpy snp mpileup --quiet 2 -r test/positions.bed -g test/genome/genome.fasta.gz -x "--ignore-RG" test/bam
- name: snp mpileup-pop
shell: micromamba-shell {0}
run: harpy snp mpileup --quiet -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam
run: harpy snp mpileup --quiet 2 -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam

freebayes:
needs: [changes]
Expand Down Expand Up @@ -463,10 +463,10 @@ jobs:
# path: .snakemake/singularity
- name: snp freebayes
shell: micromamba-shell {0}
run: harpy snp freebayes --quiet -r test/positions.bed -g test/genome/genome.fasta.gz -x "-g 200" test/bam
run: harpy snp freebayes --quiet 2 -r test/positions.bed -g test/genome/genome.fasta.gz -x "-g 200" test/bam
- name: snp freebayes-pop
shell: micromamba-shell {0}
run: harpy snp freebayes --quiet -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam
run: harpy snp freebayes --quiet 2 -r test/positions.bed -o SNP/poptest -g test/genome/genome.fasta.gz -p test/samples.groups test/bam

impute:
needs: [changes]
Expand Down Expand Up @@ -504,11 +504,11 @@ jobs:
# path: .snakemake/singularity
- name: impute
shell: micromamba-shell {0}
run: harpy impute --quiet --vcf test/vcf/test.bcf -p test/stitch.params test/bam
run: harpy impute --quiet 2 --vcf test/vcf/test.bcf -p test/stitch.params test/bam
- name: impute from vcf
shell: micromamba-shell {0}
if: always()
run: harpy impute --quiet --vcf-samples -o vcfImpute --vcf test/vcf/test.bcf -p test/stitch.params test/bam
run: harpy impute --quiet 2 --vcf-samples -o vcfImpute --vcf test/vcf/test.bcf -p test/stitch.params test/bam

phase:
needs: [changes]
Expand Down Expand Up @@ -546,17 +546,17 @@ jobs:
# path: .snakemake/singularity
- name: phase
shell: micromamba-shell {0}
run: harpy phase --quiet --vcf test/vcf/test.bcf -x "--max_iter 10001" test/bam
run: harpy phase --quiet 2 --vcf test/vcf/test.bcf -x "--max_iter 10001" test/bam
- name: phase with indels
shell: micromamba-shell {0}
if: always()
run: harpy phase --quiet --vcf test/vcf/test.bcf -o phaseindel -g test/genome/genome.fasta.gz test/bam
run: harpy phase --quiet 2 --vcf test/vcf/test.bcf -o phaseindel -g test/genome/genome.fasta.gz test/bam
- name: phase from vcf
shell: micromamba-shell {0}
if: always()
run: |
cp test/bam/sample1.bam test/bam/pineapple.bam && rename_bam.py -d pineapple1 test/bam/pineapple.bam
harpy phase --quiet --vcf-samples -o phasevcf --vcf test/vcf/test.bcf test/bam
harpy phase --quiet 2 --vcf-samples -o phasevcf --vcf test/vcf/test.bcf test/bam

leviathan:
needs: [changes]
Expand Down Expand Up @@ -594,12 +594,12 @@ jobs:
# path: .snakemake/singularity
- name: leviathan
shell: micromamba-shell {0}
run: harpy sv leviathan --quiet -s 100 -b 1 -g test/genome/genome.fasta.gz -x "-M 2002" test/bam
run: harpy sv leviathan --quiet 2 -s 100 -b 1 -g test/genome/genome.fasta.gz -x "-M 2002" test/bam
continue-on-error: true
- name: leviathan-pop
if: always()
shell: micromamba-shell {0}
run: harpy sv leviathan --quiet -s 100 -b 1 -g test/genome/genome.fasta.gz -o SV/leviathanpop -p test/samples.groups test/bam
run: harpy sv leviathan --quiet 2 -s 100 -b 1 -g test/genome/genome.fasta.gz -o SV/leviathanpop -p test/samples.groups test/bam

naibr:
needs: [changes]
Expand Down Expand Up @@ -637,20 +637,20 @@ jobs:
# path: .snakemake/singularity
- name: naibr
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/naibr -x "-min_sv 5000" test/bam_phased && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/naibr -x "-min_sv 5000" test/bam_phased && rm -r Genome
- name: naibr pop
if: always()
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/pop -p test/samples.groups test/bam_phased && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/pop -p test/samples.groups test/bam_phased && rm -r Genome
- name: naibr with phasing
if: always()
shell: micromamba-shell {0}
run: |
harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/phase -v test/vcf/test.phased.bcf test/bam && rm -r Genome
harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/phase -v test/vcf/test.phased.bcf test/bam && rm -r Genome
- name: naibr pop with phasing
if: always()
shell: micromamba-shell {0}
run: harpy sv naibr --quiet -g test/genome/genome.fasta.gz -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/bam && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/phasepop -v test/vcf/test.phased.bcf -p test/samples.groups test/bam && rm -r Genome


simulate_variants:
Expand Down Expand Up @@ -690,26 +690,26 @@ jobs:
- name: simulate random snps/indels
shell: micromamba-shell {0}
run: |
harpy simulate snpindel --quiet --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate snpindel --quiet --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz
harpy simulate snpindel --quiet 2 --snp-count 10 --indel-count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate snpindel --quiet 2 --prefix Simulate/snpvcf --snp-vcf Simulate/snpindel/haplotype_1/sim.hap1.snp.vcf --indel-vcf Simulate/snpindel/haplotype_1/sim.hap1.indel.vcf test/genome/genome.fasta.gz
- name: simulate inversions
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate inversion --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate inversion --quiet --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz
harpy simulate inversion --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate inversion --quiet 2 --prefix Simulate/invvcf --vcf Simulate/inversion/haplotype_1/sim.hap1.inversion.vcf test/genome/genome.fasta.gz
- name: simulate cnv
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate cnv --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate cnv --quiet --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz
harpy simulate cnv --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate cnv --quiet 2 --prefix Simulate/cnvvcf --vcf Simulate/cnv/haplotype_1/sim.hap1.cnv.vcf test/genome/genome.fasta.gz
- name: simulate translocations
shell: micromamba-shell {0}
if: always()
run: |
harpy simulate translocation --quiet --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate translocation --quiet --prefix Simulate/transvcf --vcf Simulate/translocation/haplotype_1/sim.hap1.translocation.vcf test/genome/genome.fasta.gz
harpy simulate translocation --quiet 2 --count 10 -z 0.5 test/genome/genome.fasta.gz
harpy simulate translocation --quiet 2 --prefix Simulate/transvcf --vcf Simulate/translocation/haplotype_1/sim.hap1.translocation.vcf test/genome/genome.fasta.gz

simulate_linkedreads:
needs: [changes]
Expand Down Expand Up @@ -749,7 +749,7 @@ jobs:
shell: micromamba-shell {0}
run: |
haplotag_barcodes.py -n 14000000 > test/haplotag.bc
harpy simulate linkedreads --quiet -t 4 -n 2 -b test/haplotag.bc -l 100 -p 50 test/genome/genome.fasta.gz test/genome/genome2.fasta.gz
harpy simulate linkedreads --quiet 2 -t 4 -n 2 -b test/haplotag.bc -l 100 -p 50 test/genome/genome.fasta.gz test/genome/genome2.fasta.gz

assembly:
needs: [changes]
Expand Down Expand Up @@ -787,13 +787,13 @@ jobs:
# path: .snakemake/singularity
- name: test assembly
shell: micromamba-shell {0}
run: harpy assembly --quiet -r 4000 test/fastq/sample1.*
run: harpy assembly --quiet 2 -r 4000 test/fastq/sample1.*
- name: test metassembly
shell: micromamba-shell {0}
run: harpy metassembly --quiet -r 4000 test/fastq/sample1.*
run: harpy metassembly --quiet 2 -r 4000 test/fastq/sample1.*
- name: test metassembly without barcodes
shell: micromamba-shell {0}
run: harpy metassembly --ignore-bx --quiet -r 4000 test/fastq/sample1.*
run: harpy metassembly --ignore-bx --quiet 2 -r 4000 test/fastq/sample1.*

extras:
needs: [changes]
Expand Down Expand Up @@ -832,10 +832,10 @@ jobs:
run: harpy popgroup test/fastq
- name: harpy downsample bam
shell: micromamba-shell {0}
run: harpy downsample -d 1 --random-seed 699 --quiet test/bam/sample1.bam
run: harpy downsample -d 1 --random-seed 699 --quiet 2 test/bam/sample1.bam
- name: harpy downsample fastq
shell: micromamba-shell {0}
run: harpy downsample -d 1 --quiet test/fastq/sample1.*gz
run: harpy downsample -d 1 --quiet 2 test/fastq/sample1.*gz
- name: harpy hpc
shell: micromamba-shell {0}
run: |
Expand Down
58 changes: 46 additions & 12 deletions harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,22 @@

import rich_click as click
from . import align
from . import diagnose, resume, view
from . import deconvolve
from . import demultiplex
from . import container
from . import hpc
from . import impute
from . import assembly
from . import metassembly
from . import assembly, metassembly
from . import qc
from . import phase
from . import preflight
from . import resume
from . import simulate
from . import simulate_linkedreads, simulate_variants
from . import snp
from . import sv
from .popgroup import popgroup
from . import downsample
from .imputeparams import imputeparams
from . import view

click.rich_click.USE_MARKDOWN = True
click.rich_click.SHOW_ARGUMENTS = False
Expand All @@ -43,6 +41,38 @@ def cli():
**Documentation**: [https://pdimens.github.io/harpy/](https://pdimens.github.io/harpy/)
"""

## unify simulate commands
@click.group(options_metavar='', context_settings={"help_option_names" : ["-h", "--help"]})
def simulate():
"""
Simulate variants or linked-reads from a genome

To simulate genomic variants, provide an additional subcommand {`snpindel`,`inversion`,`cnv`,`translocation`}
to get more information about that workflow. The variant simulator (`simuG`) can only simulate
one type of variant at a time, so you may need to run it a few times if you want multiple variant types.
Use `simulate linkedreads` to simulate haplotag linked-reads from a diploid genome, which you can create by simulating
genomic variants.
"""

simulate_commandstring = {
"harpy simulate": [
{
"name": "Linked Read Sequences",
"commands": ["linkedreads"],
},
{
"name": "Genomic Variants",
"commands": ["cnv", "inversion", "snpindel", "translocation"],
}
]
}

simulate.add_command(simulate_linkedreads.linkedreads)
simulate.add_command(simulate_variants.snpindel)
simulate.add_command(simulate_variants.inversion)
simulate.add_command(simulate_variants.cnv)
simulate.add_command(simulate_variants.translocation)

# main program
cli.add_command(downsample.downsample)
cli.add_command(popgroup)
Expand All @@ -56,28 +86,32 @@ def cli():
cli.add_command(sv.sv)
cli.add_command(impute.impute)
cli.add_command(phase.phase)
cli.add_command(simulate.simulate)
cli.add_command(simulate)
cli.add_command(container.containerize)
cli.add_command(hpc.hpc)
cli.add_command(resume.resume)
cli.add_command(deconvolve.deconvolve)
cli.add_command(metassembly.metassembly)
cli.add_command(assembly.assembly)

cli.add_command(diagnose.diagnose)
## the workflows ##
click.rich_click.COMMAND_GROUPS = {
"harpy":
[
{
"name": "workflows",
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"]),
"name": "Workflows",
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"])
},
{
"name": "Other Commands",
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup","preflight","resume", "view"])
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup"])
},
{
"name": "Troubleshoot",
"commands": sorted(["view", "resume", "diagnose", "preflight"])
}
],
} | simulate.commandstring | hpc.docstring
} | simulate_commandstring | hpc.docstring

for i in [align, deconvolve, downsample, demultiplex, impute, phase, preflight, qc, simulate, snp, sv, assembly, metassembly]:
for i in [align, deconvolve, downsample, demultiplex, impute, phase, preflight, qc, simulate_linkedreads, simulate_variants, snp, sv, assembly, metassembly]:
click.rich_click.OPTION_GROUPS |= i.docstring
Loading
Loading