Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: SEQTK_SAMPLE {
withName: 'SEQTK_SAMPLE' {
ext.args = '-s100'
}

withName: FASTQC {
withName: 'FASTQC' {
ext.args = '--quiet'
}

Expand All @@ -35,6 +35,17 @@ process {
]
}

withName: 'RUNDIRPARSER' {
publishDir = [
path: { "${params.outdir}/rundirparser" },
mode: params.publish_dir_mode,
// The process _mqc.txt outputs should have identical names for the same sequencing platforms
// in order to be grouped together in the MultiQC report, but here we need to enforce uniqueness
// to avoid overwriting results in the publishDir.
saveAs: { filename -> filename.equals('versions.yml') ? null : "${dir_meta.dirname}_$filename" }
]
}

withName: 'MULTIQC_GLOBAL' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
Expand Down
7 changes: 7 additions & 0 deletions modules/local/rundirparser/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
dependencies:
- pip
- pip:
- PyYAML==6.0.2
43 changes: 43 additions & 0 deletions modules/local/rundirparser/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process RUNDIRPARSER {
tag "$rundir.simpleName"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/412df2cdcf04e0a12971ba61b12cacaa5a49705442afe99ad96668bebbb8f880/data' :
'community.wave.seqera.io/library/pip_pyyaml_xmltodict:a4e48bd1ab4b6a53' }"

input:
tuple val(dir_meta), path(rundir)

output:
tuple val(dir_meta), path("*_mqc.*"), emit: multiqc
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
# TODO: check what kind of seq platfrom to decide which script to use
rundirparser.py ${rundir}
parse_illumina.py ${rundir}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python --version |& sed '1!d ; s/Python //')
PyYAML: \$(python -c "import yaml; print(yaml.__version__)")
END_VERSIONS
"""

stub:
"""
touch rundir_mqc.txt

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Python: stub_version
PyYAML: stub_version
END_VERSIONS
"""
}
68 changes: 68 additions & 0 deletions modules/local/rundirparser/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "rundirparser"
## TODO nf-core: Add a description of the module and list keywords
description: write your description here
keywords:
- sort
- example
- genomics
tools:
- "rundirparser":
## TODO nf-core: Add a description and other details for the software below
description: ""
homepage: ""
documentation: ""
tool_dev_url: ""
doi: ""
licence:
identifier:

## TODO nf-core: Add a description of all of the variables used as input
input:
# Only when we have meta
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

## TODO nf-core: Delete / customise this example input
- bam:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
ontologies:
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"

## TODO nf-core: Add a description of all of the variables used as output
output:
- bam:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
## TODO nf-core: Delete / customise this example output
- "*.bam":
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
ontologies:
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"

- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@kedhammar"
maintainers:
- "@kedhammar"
95 changes: 95 additions & 0 deletions modules/local/rundirparser/resources/usr/bin/parse_illumina.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python3

import os
import yaml
import sys
from datetime import datetime
from pathlib import Path
import xmltodict


def read_run_parameters(directory):
alt_1 = directory / "runParameters.xml"
alt_2 = directory / "RunParameters.xml"
if alt_1.exists():
with open(alt_1) as f:
return xmltodict.parse(f.read())
elif alt_2.exists():
with open(alt_2) as f:
return xmltodict.parse(f.read())
else:
raise Exception("[Rr]unParameters.xml not found!")


def find(d, tag):
if isinstance(d, dict):
if tag in d:
yield d[tag]
for k, v in d.items():
if isinstance(v, dict):
yield from find(v, tag)
if isinstance(v, list):
for i in v:
yield from find(i, tag)


def construct_data(run_parameters):
run_parameters_tags = {
"RunId": "Run ID",
"RunID": "Run ID",
"InstrumentType": "Instrument type",
"ApplicationName": "Control software",
"Application": "Control software",
"ApplicationVersion": "Control software version",
"SystemSuiteVersion": "Control software version",
"Flowcell": "Flowcell type",
"FlowCellMode": "Flowcell type",
"ReagentKitVersion": "Reagent kit version",
"RTAVersion": "RTA Version",
"RtaVersion": "RTA Version",
}
data = {}
for k, v in run_parameters_tags.items():
for key, value in run_parameters_tags.items():
info = list(find(run_parameters, key))
if info:
data[value] = info[0]
return data


def construct_multiqc_yaml(directory):

directory_name = directory.name
run_parameters = read_run_parameters(directory)

data = construct_data(run_parameters)

#TODO: MultiQC currently ignores the data in this yaml RUDE
metadata = {
"custom_data": {
"my_data_type": {
"id": "mqc_seq_metadata",
"section_name": "Sequencing instrument metadata",
"description": directory_name,
"plot_type": "table",
"pconfig": {
"id": 'custom_table',
"title": 'Custom Table',
"no_headers": "true",
},
"data": data,
}
}
}

return metadata


if __name__ == "__main__":
rundir_path = Path(sys.argv[1])
output_file = "illumina_mqc.yml"

multiqc_yaml = construct_multiqc_yaml(rundir_path)

with open(output_file, "w") as f:
yaml.dump(multiqc_yaml, f)
72 changes: 72 additions & 0 deletions modules/local/rundirparser/resources/usr/bin/rundirparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python
import sys
import yaml


def parse_rundir(rundir):
# Dummy implementation, replace with actual logic

sequencing_platform = None

yml_contents = """# plot_type: 'table'
# section_name: 'rundir stats'
# description: 'dummy rundir stats'
# pconfig:
# namespace: 'Cust Data'
# headers:
# col1:
# title: '#Seqs'
# description: 'Number of sequences'
# format: '{:,.0f}'
# col2:
# title: 'Total bp'
# description: 'Total size of the dataset'
# col3:
# title: 'Avg'
# description: 'Average sequence length'
# col4:
# title: 'N50'
# description: '50% of the sequences are longer than this size'
# col5:
# title: 'N75'
# description: '75% of the sequences are longer than this size'
# col6:
# title: 'N90'
# description: '90% of the sequences are longer than this size'
# col7:
# title: 'Min'
# description: 'Length of the shortest sequence'
# col8:
# title: 'Max'
# description: 'Length of the longest sequence'
# col9:
# title: 'auN'
# description: 'Area under the Nx curve'
# col10:
# title: 'GC'
# description: 'Relative GC content (excluding Ns)'
"""
tsv_contents = f"""Sample col1 col2 col3 col4 col5 col6 col7 col8 col9 col10
{rundir} 10 147806 14780.6000000 22507 16573 15322 22801.9181765 344 33340 NaN
"""

contents = yml_contents + tsv_contents

"""
File names should be unique between sequencing platforms, but otherwise identical
so multiple rundirs of the same platform will be written to the same table
in the MultiQC report.
"""
outname = f"{sequencing_platform or 'rundirparser'}_mqc.txt"

with open(outname, "w") as f:
f.write(contents)


def main():
rundir = sys.argv[1]
parse_rundir(rundir)


if __name__ == "__main__":
main()
73 changes: 73 additions & 0 deletions modules/local/rundirparser/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
// nf-core modules test rundirparser
nextflow_process {

name "Test Process RUNDIRPARSER"
script "../main.nf"
process "RUNDIRPARSER"

tag "modules"
tag "modules_"
tag "rundirparser"

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used
test("sarscov2 - bam") {

// TODO nf-core: If you are created a test for a chained module
// (the module requires running more than one process to generate the required output)
// add the 'setup' method here.
// You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
// See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
)
}

}

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
test("sarscov2 - bam - stub") {

options "-stub"

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
)
}

}

}
Loading
Loading