Skip to content

Commit

Permalink
quarto fixes and better divide-and-conquer for demux
Browse files Browse the repository at this point in the history
* add quarto fixes

* succinct quiet docstring

* simplify the biocircos patch logic

* update what's forbidden/allowed

* colorize the cli

* properly implement paramchecking

* add sanitation for flexible extraparams

* add logic for checkpoint

* manual divide and conquer

* fix naibr test
  • Loading branch information
pdimens authored Feb 21, 2025
1 parent 9e2c68d commit 07c227b
Show file tree
Hide file tree
Showing 27 changed files with 243 additions and 118 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ jobs:
# path: .snakemake/singularity
- name: naibr
shell: micromamba-shell {0}
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/naibr -x "-min_sv 5000" test/bam_phased && rm -r Genome
run: harpy sv naibr --quiet 2 -g test/genome/genome.fasta.gz -o SV/naibr test/bam_phased && rm -r Genome
- name: naibr pop
if: always()
shell: micromamba-shell {0}
Expand Down
13 changes: 9 additions & 4 deletions harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ def simulate():
{
"name": "Linked Read Sequences",
"commands": ["linkedreads"],
"panel_styles": {"border_style": "blue"}
},
{
"name": "Genomic Variants",
"commands": ["cnv", "inversion", "snpindel", "translocation"],
"panel_styles": {"border_style": "green"}
}
]
}
Expand Down Expand Up @@ -94,24 +96,27 @@ def simulate():
cli.add_command(metassembly.metassembly)
cli.add_command(assembly.assembly)
cli.add_command(diagnose.diagnose)
## the workflows ##
click.rich_click.COMMAND_GROUPS = {
"harpy":
[
{
"name": "Workflows",
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"])
"commands": sorted(["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "assembly", "metassembly"]),
"panel_styles": {"border_style": "blue"}
},
{
"name": "Other Commands",
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup"])
"commands": sorted(["deconvolve", "downsample", "hpc", "imputeparams", "popgroup"]),
"panel_styles": {"border_style": "dim green"}
},
{
"name": "Troubleshoot",
"commands": sorted(["view", "resume", "diagnose", "preflight"])
"commands": sorted(["view", "resume", "diagnose", "preflight"]),
"panel_styles": {"border_style": "dim"}
}
],
} | simulate_commandstring | hpc.docstring

click.rich_click.OPTIONS_PANEL_TITLE = None
for i in [align, deconvolve, downsample, demultiplex, impute, phase, preflight, qc, simulate_linkedreads, simulate_variants, snp, sv, assembly, metassembly]:
click.rich_click.OPTION_GROUPS |= i.docstring
4 changes: 2 additions & 2 deletions harpy/_cli_types_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ class SnakemakeParams(click.ParamType):
"""A class for a click type which accepts snakemake parameters. Does validations to make sure there isn't doubling up."""
name = "snakemake_params"
def convert(self, value, param, ctx):
forbidden = "--rerun-incomplete --ri --show-failed-logs --rerun-triggers --nolock --software-deployment-method --smd --deployment --deployment-method --conda-prefix --cores -c --directory -d --snakefile -s --configfile --configfiles".split()
available = "--profile --cache --jobs -j --local-cores --resources --res --set-threads --max-threads --set-resources --set-scatter --set-resource-scopes --default-resources --default-res --preemptible-rules --preemptible-retries --envvars --touch -t --keep-going -k --force -f --executor -e --forceall -F --forcerun -R --prioritize -P --batch --until -U --omit-from -O --shadow-prefixDIR --scheduler --wms-monitor --wms-monitor-arg --scheduler-ilp-solver --conda-base-path --no-subworkflows --nosw --precommand --groups --group-components --report --report-stylesheet --reporterPLUGIN --draft-notebook --edit-notebook --notebook-listen --lint --generate-unit-tests --containerize --export-cwl --list-rules --list -l --list-target-rules --lt --dag --rulegraph --filegraph --d3dag --summary -S --detailed-summary -D --archive --cleanup-metadata --cmFILE --cleanup-shadow --skip-script-cleanup --unlock --list-changes --lc --list-input-changes --li --list-params-changes --lp --list-untracked --lu --delete-all-output --delete-temp-output --keep-incomplete --drop-metadata --version -v --printshellcmds -p --debug-dag --nocolor --quiet -q --print-compilation --verbose --force-use-threads --allow-ambiguity -a --ignore-incomplete --ii --max-inventory-time --latency-wait --output-wait -w --wait-for-files --wait-for-files-file --queue-input-wait-time --notemp --nt --all-temp --unneeded-temp-files --keep-storage-local-copies --target-files-omit-workdir-adjustment --allowed-rules --max-jobs-per-timespan --max-jobs-per-second --max-status-checks-per-second --seconds-between-status-checks --retries --restart-times -T --wrapper-prefix --default-storage-provider --default-storage-prefix --local-storage-prefix --remote-job-local-storage-prefix --shared-fs-usage --scheduler-greediness --greediness --no-hooks --debug --runtime-profile --local-groupid --attempt --log-handler-script --log-service --job-deploy-sources --benchmark-extended --container-image --immediate-submit --is --jobscript --js --jobname --jn --flux --container-cleanup-images --use-conda --conda-not-block-search-path-envvars --list-conda-envs --conda-cleanup-envs --conda-cleanup-pkgs --conda-create-envs-only --conda-frontend --use-apptainer --use-singularity --apptainer-prefix --singularity-prefix --apptainer-args --singularity-args --use-envmodules --scheduler-solver-path --deploy-sources --target-jobs --mode --report-html-path --report-html-stylesheet-path".split()
forbidden = "--rerun-incomplete --ri --show-failed-logs --rerun-triggers --nolock --software-deployment-method --smd --deployment --deployment-method --conda-prefix --cores -c --directory -d --snakefile -s --configfile --configfiles --conda-cleanup-pkgs --apptainer-prefix --singularity-prefix".split()
available = "--profile --cache --jobs -j --local-cores --resources --res --set-threads --max-threads --set-resources --set-scatter --set-resource-scopes --default-resources --default-res --preemptible-rules --preemptible-retries --envvars --touch -t --keep-going -k --force -f --executor -e --forceall -F --forcerun -R --prioritize -P --batch --until -U --omit-from -O --shadow-prefixDIR --scheduler --wms-monitor --wms-monitor-arg --scheduler-ilp-solver --conda-base-path --no-subworkflows --nosw --precommand --groups --group-components --report --report-stylesheet --reporterPLUGIN --draft-notebook --edit-notebook --notebook-listen --lint --generate-unit-tests --containerize --export-cwl --list-rules --list -l --list-target-rules --lt --dag --rulegraph --filegraph --d3dag --summary -S --detailed-summary -D --archive --cleanup-metadata --cmFILE --cleanup-shadow --skip-script-cleanup --unlock --list-changes --lc --list-input-changes --li --list-params-changes --lp --list-untracked --lu --delete-all-output --delete-temp-output --keep-incomplete --drop-metadata --version -v --printshellcmds -p --nocolor --print-compilation --force-use-threads --allow-ambiguity -a --ignore-incomplete --ii --max-inventory-time --latency-wait --output-wait -w --wait-for-files --wait-for-files-file --queue-input-wait-time --notemp --nt --all-temp --unneeded-temp-files --keep-storage-local-copies --target-files-omit-workdir-adjustment --allowed-rules --max-jobs-per-timespan --max-jobs-per-second --max-status-checks-per-second --seconds-between-status-checks --retries --restart-times -T --default-storage-provider --default-storage-prefix --local-storage-prefix --remote-job-local-storage-prefix --shared-fs-usage --scheduler-greediness --greediness --runtime-profile --local-groupid --attempt --log-handler-script --log-service --job-deploy-sources --benchmark-extended --container-image --immediate-submit --is --jobscript --js --jobname --jn --flux --container-cleanup-images --conda-not-block-search-path-envvars --conda-frontend --apptainer-args --singularity-args --use-envmodules --scheduler-solver-path --deploy-sources --target-jobs --mode --report-html-path --report-html-stylesheet-path".split()
for i in value.split():
if i.startswith("-"):
if i in forbidden:
Expand Down
28 changes: 15 additions & 13 deletions harpy/_cli_types_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,31 +130,33 @@ def convert(self, value, param, ctx):
return sanitize_shell(value)

class StitchParams(click.ParamType):
"""A class for a click type that validates stitch extra-params."""
"""A class for a click type that validates stitch extra-params. Sanitizes and corrects different input styles to work with STITCH cli"""
name = "stitch_params"
def convert(self, value, param, ctx):
harpy_options = "--method --posfile --bamlist --nCores --nGen --chr --K --S --use_bx_tag --bxTagUpperLimit --outputdir --output_filename --tempdir".split()
valid_options = "--nStarts --genfile --B_bit_prob --outputInputInVCFFormat --downsampleToCov --downsampleFraction --readAware --chrStart --chrEnd --regionStart --regionEnd --buffer --maxDifferenceBetweenReads --maxEmissionMatrixDifference --alphaMatThreshold --emissionThreshold --iSizeUpperLimit --bqFilter --niterations --shuffleHaplotypeIterations --splitReadIterations --expRate --maxRate --minRate --Jmax --regenerateInput --originalRegionName --keepInterimFiles --keepTempDir --switchModelIteration --generateInputOnly --restartIterations --refillIterations --downsampleSamples --downsampleSamplesKeepList --subsetSNPsfile --useSoftClippedBases --outputBlockSize --outputSNPBlockSize --inputBundleBlockSize --genetic_map_file --reference_haplotype_file --reference_legend_file --reference_sample_file --reference_populations --reference_phred --reference_iterations --reference_shuffleHaplotypeIterations --initial_min_hapProb --initial_max_hapProb --regenerateInputWithDefaultValues --plot_shuffle_haplotype_attempts --save_sampleReadsInfo --gridWindowSize --shuffle_bin_nSNPs --shuffle_bin_radius --keepSampleReadsInRAM --useTempdirWhileWriting --output_haplotype_dosages".split()
opts = 0
docs = "https://github.com/rwdavies/STITCH/blob/master/Options.md"
clean_args = []
for i in shellsplit(value):
if not i.startswith("--"):
self.fail(f"{i} is in the wrong format and needs to begin with a double-dash for the command-line version of STITCH. Try using --{i}=VAL instead", param, ctx)
if not i.startswith("-"):
i = "--" + i.lstrip("-")
if "=" in i:
opts += 1
argsplit = i.split("=")
argsplit = [j.strip() for j in i.split("=")]
if len(argsplit) != 2:
self.fail(f"{i} is not in the proper format for STITCH. STITCH options begin with a double-dash and must be in the form --ARG=VAL (e.g. --downsampleFraction=0.5). See the stitch documentation for a list of available options: {docs}", param, ctx)
arg = argsplit[0].strip()
self.fail(f"{i} is not in the proper format for STITCH. STITCH options must be in the form ARG=VAL (e.g. --downsampleFraction=0.5). See the stitch documentation for a list of available options: {docs}", param, ctx)
arg = argsplit[0]
if arg in harpy_options:
self.fail(f"{arg} is already used by Harpy when calling STITCH.", param, ctx)
if arg not in valid_options:
self.fail(f"{arg} is not a valid STITCH option. See the STITCH documentation for a list of available options: {docs}", param, ctx)
clean_args.append("=".join(argsplit))
else:
self.fail(f"{i} is not in the proper format for STITCH. STITCH options begin with a double-dash and must be in the form --ARG=VAL (e.g. --downsampleFraction=0.5). See the stitch documentation for a list of available options: {docs}", param, ctx)
self.fail(f"{i} is not in the proper format for STITCH. STITCH options must be in the form ARG=VAL (e.g. --downsampleFraction=0.5). See the stitch documentation for a list of available options: {docs}", param, ctx)
if opts < 1:
self.fail(f"No valid options recognized. STITCH options begin with a double-dash and must be in the form --ARG=VAL (e.g. --downsampleFraction=0.5). See the stitch documentation for a list of available options: {docs}.", param, ctx)
return sanitize_shell(value)
return sanitize_shell(" ".join(clean_args))

class HapCutParams(click.ParamType):
"""A class for a click type that validates hapcut2 extra-params."""
Expand Down Expand Up @@ -202,19 +204,19 @@ def convert(self, value, param, ctx):
valid_options = "blacklist candidates".split()
opts = 0
docs = "https://github.com/pontushojer/NAIBR?tab=readme-ov-file#running-naibr"
for idx,i in enumerate(shellsplit(value)):
if i.startswith("-"):
self.fail(f"{i} begins with a dash, which is the wrong format. Try using " + i.lstrip("-") + " VAL instead", param, ctx)
clean_args = []
for idx,i in enumerate(shellsplit(value.replace("-", ""))):
# if it's an even index, it's the argument name of an arg-val pair
if idx % 2 == 0:
opts += 1
if i in harpy_options:
self.fail(f"{i} is already used by Harpy when calling naibr.", param, ctx)
if i not in valid_options:
self.fail(f"{i} is not a valid naibr option. See the naibr documentation for a list of available options: {docs}.", param, ctx)
clean_args.append(i.strip())
if opts < 1:
self.fail(f"No valid options recognized. Available naibr options begin without dashes in the form of ARG<space>VAL (e.g. blacklist inversions.ignore). See the naibr documentation for a list of available options: {docs}.", param, ctx)
return sanitize_shell(value)
self.fail(f"No valid options recognized. Available naibr options begin without dashes in the form of ARG<space>VAL (e.g. blacklist inversions.txt). See the naibr documentation for a list of available options: {docs}.", param, ctx)
return sanitize_shell(" ".join(clean_args))

class MpileupParams(click.ParamType):
"""A class for a click type that validates mpileup extra-params."""
Expand Down
2 changes: 1 addition & 1 deletion harpy/_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet, su
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else EXIT_CODE_RUNTIME_ERROR
break
# add new progress bar track if the rule doesn't have one yet
rulematch = re.search(r"rule\s\w+:", output)
rulematch = re.search(r"(rule|checkpoint)\s\w+:", output)
if rulematch:
rule = rulematch.group().replace(":","").split()[-1]
if rule not in task_ids:
Expand Down
Loading

0 comments on commit 07c227b

Please sign in to comment.