nf-core · kedhammar · Mar 24, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
diff --git a/conf/modules.config b/conf/modules.config
@@ -18,11 +18,11 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: SEQTK_SAMPLE {
+    withName: 'SEQTK_SAMPLE' {
         ext.args = '-s100'
     }
 
-    withName: FASTQC {
+    withName: 'FASTQC' {
         ext.args = '--quiet'
     }
 
@@ -35,6 +35,17 @@ process {
         ]
     }
 
+    withName: 'RUNDIRPARSER' {
+        publishDir = [
+            path: { "${params.outdir}/rundirparser" },
+            mode: params.publish_dir_mode,
+            // The process _mqc.txt outputs should have identical names for the same sequencing platforms
+            // in order to be grouped together in the MultiQC report, but here we need to enforce uniqueness
+            // to avoid overwriting results in the publishDir.
+            saveAs: { filename -> filename.equals('versions.yml') ? null : "${dir_meta.dirname}_$filename" }
+        ]
+    }
+
     withName: 'MULTIQC_GLOBAL' {
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
         publishDir = [

diff --git a/modules/local/rundirparser/environment.yml b/modules/local/rundirparser/environment.yml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - pip
+  - pip:
+      - PyYAML==6.0.2
diff --git a/modules/local/rundirparser/main.nf b/modules/local/rundirparser/main.nf
@@ -0,0 +1,43 @@
+process RUNDIRPARSER {
+    tag "$rundir.simpleName"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/41/412df2cdcf04e0a12971ba61b12cacaa5a49705442afe99ad96668bebbb8f880/data' :
+        'community.wave.seqera.io/library/pip_pyyaml_xmltodict:a4e48bd1ab4b6a53' }"
+
+    input:
+    tuple val(dir_meta), path(rundir)
+
+    output:
+    tuple val(dir_meta), path("*_mqc.*"), emit: multiqc
+    path "versions.yml",                  emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    # TODO: check what kind of seq platfrom to decide which script to use
+    rundirparser.py ${rundir}
+    parse_illumina.py ${rundir}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Python: \$(python --version |& sed '1!d ; s/Python //')
+        PyYAML: \$(python -c "import yaml; print(yaml.__version__)")
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch rundir_mqc.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Python: stub_version
+        PyYAML: stub_version
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/rundirparser/meta.yml b/modules/local/rundirparser/meta.yml
@@ -0,0 +1,68 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "rundirparser"
+## TODO nf-core: Add a description of the module and list keywords
+description: write your description here
+keywords:
+  - sort
+  - example
+  - genomics
+tools:
+  - "rundirparser":
+      ## TODO nf-core: Add a description and other details for the software below
+      description: ""
+      homepage: ""
+      documentation: ""
+      tool_dev_url: ""
+      doi: ""
+      licence:
+      identifier:
+
+## TODO nf-core: Add a description of all of the variables used as input
+input:
+  # Only when we have meta
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+
+    ## TODO nf-core: Delete / customise this example input
+    - bam:
+        type: file
+        description: Sorted BAM/CRAM/SAM file
+        pattern: "*.{bam,cram,sam}"
+        ontologies:
+          - edam: "http://edamontology.org/format_25722"
+          - edam: "http://edamontology.org/format_2573"
+          - edam: "http://edamontology.org/format_3462"
+
+## TODO nf-core: Add a description of all of the variables used as output
+output:
+  - bam:
+      #Only when we have meta
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      ## TODO nf-core: Delete / customise this example output
+      - "*.bam":
+          type: file
+          description: Sorted BAM/CRAM/SAM file
+          pattern: "*.{bam,cram,sam}"
+          ontologies:
+            - edam: "http://edamontology.org/format_25722"
+            - edam: "http://edamontology.org/format_2573"
+            - edam: "http://edamontology.org/format_3462"
+
+  - versions:
+      - "versions.yml":
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+
+authors:
+  - "@kedhammar"
+maintainers:
+  - "@kedhammar"
diff --git a/modules/local/rundirparser/resources/usr/bin/parse_illumina.py b/modules/local/rundirparser/resources/usr/bin/parse_illumina.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import os
+import yaml
+import sys
+from datetime import datetime
+from pathlib import Path
+import xmltodict
+
+
+def read_run_parameters(directory):
+    alt_1 = directory / "runParameters.xml"
+    alt_2 = directory / "RunParameters.xml"
+    if alt_1.exists():
+        with open(alt_1) as f:
+            return xmltodict.parse(f.read())
+    elif alt_2.exists():
+        with open(alt_2) as f:
+            return xmltodict.parse(f.read())
+    else:
+        raise Exception("[Rr]unParameters.xml not found!")
+
+
+def find(d, tag):
+    if isinstance(d, dict):
+        if tag in d:
+            yield d[tag]
+        for k, v in d.items():
+            if isinstance(v, dict):
+                yield from find(v, tag)
+            if isinstance(v, list):
+                for i in v:
+                    yield from find(i, tag)
+
+
+def construct_data(run_parameters):
+    run_parameters_tags = {
+        "RunId": "Run ID",
+        "RunID": "Run ID",
+        "InstrumentType": "Instrument type",
+        "ApplicationName": "Control software",
+        "Application": "Control software",
+        "ApplicationVersion": "Control software version",
+        "SystemSuiteVersion": "Control software version",
+        "Flowcell": "Flowcell type",
+        "FlowCellMode": "Flowcell type",
+        "ReagentKitVersion": "Reagent kit version",
+        "RTAVersion": "RTA Version",
+        "RtaVersion": "RTA Version",
+    }
+    data = {}
+    for k, v in run_parameters_tags.items():
+        for key, value in run_parameters_tags.items():
+            info = list(find(run_parameters, key))
+            if info:
+                data[value] = info[0]
+        return data
+
+
+def construct_multiqc_yaml(directory):
+
+    directory_name = directory.name
+    run_parameters = read_run_parameters(directory)
+
+    data = construct_data(run_parameters)
+
+    #TODO: MultiQC currently ignores the data in this yaml RUDE
+    metadata = {
+        "custom_data": {
+            "my_data_type": {
+                "id": "mqc_seq_metadata",
+                "section_name": "Sequencing instrument metadata",
+                "description": directory_name,
+                "plot_type": "table",
+                "pconfig": {
+                    "id": 'custom_table',
+                    "title": 'Custom Table',
+                    "no_headers": "true",
+                    },
+                "data": data,
+            }
+        }
+    }
+
+    return metadata
+
+
+if __name__ == "__main__":
+    rundir_path = Path(sys.argv[1])
+    output_file = "illumina_mqc.yml"
+
+    multiqc_yaml = construct_multiqc_yaml(rundir_path)
+
+    with open(output_file, "w") as f:
+        yaml.dump(multiqc_yaml, f)
diff --git a/modules/local/rundirparser/resources/usr/bin/rundirparser.py b/modules/local/rundirparser/resources/usr/bin/rundirparser.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+import sys
+import yaml
+
+
+def parse_rundir(rundir):
+    # Dummy implementation, replace with actual logic
+
+    sequencing_platform = None
+
+    yml_contents = """# plot_type: 'table'
+# section_name: 'rundir stats'
+# description: 'dummy rundir stats'
+# pconfig:
+#     namespace: 'Cust Data'
+# headers:
+#     col1:
+#         title: '#Seqs'
+#         description: 'Number of sequences'
+#         format: '{:,.0f}'
+#     col2:
+#         title: 'Total bp'
+#         description: 'Total size of the dataset'
+#     col3:
+#         title: 'Avg'
+#         description: 'Average sequence length'
+#     col4:
+#         title: 'N50'
+#         description: '50% of the sequences are longer than this size'
+#     col5:
+#         title: 'N75'
+#         description: '75% of the sequences are longer than this size'
+#     col6:
+#         title: 'N90'
+#         description: '90% of the sequences are longer than this size'
+#     col7:
+#         title: 'Min'
+#         description: 'Length of the shortest sequence'
+#     col8:
+#         title: 'Max'
+#         description: 'Length of the longest sequence'
+#     col9:
+#         title: 'auN'
+#         description: 'Area under the Nx curve'
+#     col10:
+#         title: 'GC'
+#         description: 'Relative GC content (excluding Ns)'
+"""
+    tsv_contents = f"""Sample	col1	col2	col3	col4	col5	col6	col7	col8	col9	col10
+{rundir}	10	147806	14780.6000000	22507	16573	15322	22801.9181765	344	33340	NaN
+"""
+
+    contents = yml_contents + tsv_contents
+
+    """
+    File names should be unique between sequencing platforms, but otherwise identical
+    so multiple rundirs of the same platform will be written to the same table
+    in the MultiQC report.
+    """
+    outname = f"{sequencing_platform or 'rundirparser'}_mqc.txt"
+
+    with open(outname, "w") as f:
+        f.write(contents)
+
+
+def main():
+    rundir = sys.argv[1]
+    parse_rundir(rundir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/rundirparser/tests/main.nf.test b/modules/local/rundirparser/tests/main.nf.test
@@ -0,0 +1,73 @@
+// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
+// nf-core modules test rundirparser
+nextflow_process {
+
+    name "Test Process RUNDIRPARSER"
+    script "../main.nf"
+    process "RUNDIRPARSER"
+
+    tag "modules"
+    tag "modules_"
+    tag "rundirparser"
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
+    test("sarscov2 - bam") {
+
+        // TODO nf-core: If you are created a test for a chained module
+        // (the module requires running more than one process to generate the required output)
+        // add the 'setup' method here.
+        // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+                //TODO nf-core: Add all required assertions to verify the test output.
+                // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
+            )
+        }
+
+    }
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
+    test("sarscov2 - bam - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+                //TODO nf-core: Add all required assertions to verify the test output.
+            )
+        }
+
+    }
+
+}