msk-access
diff --git a/‎.github/workflows/document_package.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/document_package.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/python-publish.yml
Lines changed: 0 additions & 19 deletions b/‎.github/workflows/python-publish.yml
Lines changed: 0 additions & 19 deletions
diff --git a/‎docs/cli.md
Lines changed: 211 additions & 2 deletions b/‎docs/cli.md
Lines changed: 211 additions & 2 deletions
diff --git a/‎postprocessing_variant_calls/vardict/vardict_class.py
Lines changed: 30 additions & 1 deletion b/‎postprocessing_variant_calls/vardict/vardict_class.py
Lines changed: 30 additions & 1 deletion
@@ -40,6 +40,7 @@ jobs:
                     git checkout docs --
                     git checkout ${{ steps.extract_branch.outputs.branch }} -- README.md
                     git checkout ${{ steps.extract_branch.outputs.branch }} -- docs/cli.md
+                    mv docs/cli.md cli.md
             - uses: EndBug/add-and-commit@v9
               with:
                 default_author: github_actions
 
@@ -17,6 +17,7 @@ $ main [OPTIONS] COMMAND [ARGS]...
 
 * `maf`: operations for manipulating maf files...
 * `mutect1`: post-processing commands for MuTect...
+* `mutect2`: post-processing commands for MuTect...
 * `vardict`: post-processing commands for VarDict...
 
 ## `main maf`
@@ -58,9 +59,27 @@ $ main maf annotate [OPTIONS] COMMAND [ARGS]...
 
 **Commands**:
 
+* `extract_blocklist`: Extract values from an optional blocklist...
 * `mafbybed`: annotate a maf column by a bed file.
 * `mafbytsv`: annotate a maf column by a bed file.
 
+#### `main maf annotate extract_blocklist`
+
+Extract values from an optional blocklist file if provided. Used in SNVs/indels workflow.
+
+**Usage**:
+
+```console
+$ main maf annotate extract_blocklist [OPTIONS]
+```
+
+**Options**:
+
+* `-b, --blocklist_file FILE`: Blocklist text file to extract values from. Needs to be in TSV format  [required]
+* `-m, --maf FILE`: MAF file to subset  [required]
+* `-sep, --separator TEXT`: Specify a separator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
 #### `main maf annotate mafbybed`
 
 annotate a maf column by a bed file.
@@ -135,13 +154,66 @@ $ main maf filter [OPTIONS] COMMAND [ARGS]...
 
 **Commands**:
 
+* `access_filters`: Filter a MAF file based on all the...
+* `access_remove_variants`: Filter a MAF file based on all the...
 * `cmo_ch`: Filter a MAF file based on all the parameters
 * `hotspot`: filter a MAF file based on the presence of...
 * `mappable`: Filter a MAF file to retain only mappable...
 * `non_common_variant`: Filter a MAF file for common variants and...
 * `non_hotspot`: filter a MAF file based on the presence of...
 * `not_complex`: Filter a MAF filter for complex variants...
 
+#### `main maf filter access_filters`
+
+Filter a MAF file based on all the parameters listed in ACCESS filters python script
+
+**Usage**:
+
+```console
+$ main maf filter access_filters [OPTIONS]
+```
+
+**Options**:
+
+* `-f, --fillout_maf FILE`: Fillout MAF file to subset (direct output from traceback subworkflow)  [required]
+* `-a, --anno_maf FILE`: Annotated MAF file to subset (direct input file from beginning of traceback subworkflow)  [required]
+* `-o, --output PATH`: Maf output file name.  [default: output]
+* `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
+* `-bl, --blocklist TEXT`: Optional input blocklist file for access filtering criteria.  [default: tsv]
+* `-ts, --tumor_samplename TEXT`: Name of Tumor Sample  [required]
+* `-ns, --normal_samplename TEXT`: Name of MATCHED normal sample  [required]
+* `--tumor_detect_alt_thres TEXT`: The Minimum Alt depth required to be considered detected in fillout  [default: 2]
+* `--tumor_detect_alt_thres TEXT`: The Minimum Alt depth required to be considered detected in fillout  [default: 2]
+* `--curated_detect_alt_thres TEXT`: The Minimum Alt depth required to be considered detected in fillout  [default: 2]
+* `--plasma_detect_alt_thres TEXT`: The Minimum Alt depth required to be considered detected in fillout  [default: 2]
+* `--tumor_TD_min TEXT`: The Minimum Total Depth required in tumor to consider a variant Likely Germline  [default: 20]
+* `--normal_TD_min TEXT`: The Minimum Total Depth required in Matched Normal to consider a variant Germline  [default: 20]
+* `--tumor_vaf_germline_thres TEXT`: The threshold for variant allele fraction required in Tumor to be consider a variant Likely Germline  [default: 0.4]
+* `--tumor_vaf_germline_thres TEXT`: The threshold for variant allele fraction required in Matched Normal to be consider a variant Germline  [default: 0.4]
+* `--tier_one_alt_min TEXT`: The Minimum Alt Depth required in hotspots  [default: 3]
+* `--tier_two_alt_min TEXT`: The Minimum Alt Depth required in non-hotspots  [default: 5]
+* `--min_n_curated_samples_alt_detected TEXT`: The Minimum number of curated samples variant is detected to be flagged  [default: 2]
+* `--tn_ratio_thres TEXT`: Tumor-Normal variant fraction ratio threshold  [default: 5]
+* `--help`: Show this message and exit.
+
+#### `main maf filter access_remove_variants`
+
+Filter a MAF file based on all the parameters satisfied by the remove variants by annotations CWL script in the ACCESS pipeline
+
+**Usage**:
+
+```console
+$ main maf filter access_remove_variants [OPTIONS]
+```
+
+**Options**:
+
+* `-m, --maf FILE`: MAF file to subset  [required]
+* `-i, --intervals FILE`: Intervals file containing rows of criterion to tag input MAF by  [required]
+* `-o, --output PATH`: Maf output file name.  [default: output.maf]
+* `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
 #### `main maf filter cmo_ch`
 
 Filter a MAF file based on all the parameters
@@ -207,7 +279,7 @@ $ main maf filter non_common_variant [OPTIONS]
 
 * `-m, --maf FILE`: MAF file to subset  [required]
 * `-o, --output PATH`: Maf output file name.  [default: output.maf]
-* `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
+* `-sep, --separator TEXT`: Specify a separator for delimited data.  [default: tsv]
 * `--help`: Show this message and exit.
 
 #### `main maf filter non_hotspot`
@@ -300,13 +372,72 @@ $ main maf tag [OPTIONS] COMMAND [ARGS]...
 
 **Commands**:
 
+* `access`: Tag a variant in a MAF file based on...
+* `by_rules`: Tag a variant in a MAF file based on...
+* `by_variant_classification`: Tag filtered MAF file by variant...
 * `cmo_ch`: Tag a variant in MAF file based on all the...
 * `common_variant`: Tag a variant in a MAF file as common...
 * `germline_status`: Tag a variant in a MAF file as germline...
+* `hotspots`: Tag a variant in a MAF file based on...
 * `prevalence_in_cosmicDB`: Tag a variant in a MAF file with...
 * `traceback`: Generate combined count columns between...
 * `truncating_mut_in_TSG`: Tag a truncating mutating variant in a MAF...
 
+#### `main maf tag access`
+
+Tag a variant in a MAF file based on criterion stated by the SNV/indels ACCESS pipeline workflow
+
+**Usage**:
+
+```console
+$ main maf tag access [OPTIONS]
+```
+
+**Options**:
+
+* `-m, --maf FILE`: MAF file to tag  [required]
+* `-r, --rules FILE`: Intervals JSON file containing criterion to tag input MAF by  [required]
+* `-h, --hotspots FILE`: Text file containing hotspots to tag input MAF by  [required]
+* `-o, --output PATH`: Maf output file name.  [default: output_tagged.maf]
+* `-sep, --separator TEXT`: Specify a separator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
+#### `main maf tag by_rules`
+
+Tag a variant in a MAF file based on criterion stated by an input rules.json JSON file
+
+**Usage**:
+
+```console
+$ main maf tag by_rules [OPTIONS]
+```
+
+**Options**:
+
+* `-m, --maf FILE`: MAF file to tag  [required]
+* `-r, --rules FILE`: Intervals JSON file containing criterion to tag input MAF by  [required]
+* `-o, --output PATH`: Maf output file name.  [default: output_tagged.maf]
+* `-sep, --separator TEXT`: Specify a separator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
+#### `main maf tag by_variant_classification`
+
+Tag filtered MAF file by variant classifications and subset into individual text files.
+
+**Usage**:
+
+```console
+$ main maf tag by_variant_classification [OPTIONS]
+```
+
+**Options**:
+
+* `-m, --maf FILE`: filtered MAF file to split by annotations with  [required]
+* `-tx_ref, --canonical_tx_ref FILE`: Reference canonical transcript file  [required]
+* `-o, --output_dir PATH`: Output Directory to export individual text files to.  [default: output_dir]
+* `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
 #### `main maf tag cmo_ch`
 
 Tag a variant in MAF file based on all the parameters listed
@@ -358,6 +489,24 @@ $ main maf tag germline_status [OPTIONS]
 * `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
 * `--help`: Show this message and exit.
 
+#### `main maf tag hotspots`
+
+Tag a variant in a MAF file based on hotspots file
+
+**Usage**:
+
+```console
+$ main maf tag hotspots [OPTIONS]
+```
+
+**Options**:
+
+* `-m, --maf FILE`: MAF file to tag  [required]
+* `-h, --hotspots FILE`: Text file containing hotspots to tag input MAF by  [required]
+* `-o, --output PATH`: Maf output file name.  [default: output_tagged.maf]
+* `-sep, --separator TEXT`: Specify a separator for delimited data.  [default: tsv]
+* `--help`: Show this message and exit.
+
 #### `main maf tag prevalence_in_cosmicDB`
 
 Tag a variant in a MAF file with prevalence in COSMIC DB 
@@ -390,6 +539,7 @@ $ main maf tag traceback [OPTIONS]
 * `-m, --maf FILE`: MAF file to tag  [required]
 * `-o, --output PATH`: Maf output file name.  [default: output.maf]
 * `-sep, --separator TEXT`: Specify a seperator for delimited data.  [default: tsv]
+* `-sheet, --samplesheet PATH`: Samplesheets in nucleovar formatting. See README for more info: `https://github.com/mskcc-omics-workflows/nucleovar/blob/main/README.md`. Used to add fillout type information to maf. The `sample_id` and `type` columns must be present.
 * `--help`: Show this message and exit.
 
 #### `main maf tag truncating_mut_in_TSG`
@@ -468,6 +618,65 @@ $ main mutect1 case-control filter [OPTIONS]
 * `-o, --outDir TEXT`: Full Path to the output dir
 * `--help`: Show this message and exit.
 
+## `main mutect2`
+
+post-processing commands for MuTect version 2 VCFs.
+
+**Usage**:
+
+```console
+$ main mutect2 [OPTIONS] COMMAND [ARGS]...
+```
+
+**Options**:
+
+* `--help`: Show this message and exit.
+
+**Commands**:
+
+* `case-control`: Post-processing commands for filtering of...
+
+### `main mutect2 case-control`
+
+Post-processing commands for filtering of MuTect version 2 VCF input file.
+
+**Usage**:
+
+```console
+$ main mutect2 case-control [OPTIONS] COMMAND [ARGS]...
+```
+
+**Options**:
+
+* `--help`: Show this message and exit.
+
+**Commands**:
+
+* `filter`: This tool helps to filter MuTect version 2...
+
+#### `main mutect2 case-control filter`
+
+This tool helps to filter MuTect version 2 VCFs for case-control calling
+
+**Usage**:
+
+```console
+$ main mutect2 case-control filter [OPTIONS]
+```
+
+**Options**:
+
+* `-i, --inputVcf FILE`: Input vcf generated by MuTect2 which needs to be processed  [required]
+* `-it, --inputTxt FILE`: Input Txt generated by MuTect which needs to be processed. NOTE, a Txt file will not be used for Mutect2 filtering as it is not provided in standard output.  [default: /dev/null]
+* `--refFasta FILE`: Input reference fasta  [default: /dev/null]
+* `--tsampleName TEXT`: Name of the tumor sample.  [required]
+* `-dp, --totalDepth INTEGER RANGE`: Tumor total depth threshold  [default: 20; x>=0]
+* `-ad, --alleleDepth INTEGER RANGE`: [default: 1; x>=0]
+* `-tnr, --tnRatio INTEGER RANGE`: Tumor-Normal variant fraction ratio threshold  [default: 1; x>=0]
+* `-vf, --variantFraction FLOAT RANGE`: Tumor variant fraction threshold  [default: 5e-05; x>=0]
+* `-o, --outDir TEXT`: Full Path to the output dir
+* `--help`: Show this message and exit.
+
 ## `main vardict`
 
 post-processing commands for VarDict version 1.4.6 VCFs.
@@ -519,8 +728,8 @@ $ main vardict case-control filter [OPTIONS]
 
 * `-i, --inputVcf FILE`: Input vcf generated by vardict which needs to be processed  [required]
 * `--tsampleName TEXT`: Name of the tumor Sample  [required]
+* `-ad, --alleledepth INTEGER RANGE`: [x>=1] [required]
 * `-dp, --totalDepth INTEGER RANGE`: Tumor total depth threshold  [default: 20; x>=20]
-* `-ad, --alleledepth INTEGER RANGE`: [x>=1]
 * `-tnr, --tnRatio INTEGER`: Tumor-Normal variant fraction ratio threshold  [default: 1]
 * `-vf, --variantFraction FLOAT`: Tumor variant fraction threshold  [default: 5e-05]
 * `-mq, --minQual INTEGER`: Minimum variant call quality  [default: 0]
 
@@ -62,6 +62,10 @@ def __init__(
         self.txt_out = self.vcf_out + "_STDfilter.txt"
         self.vcf_complex_out = self.vcf_out + "_STDfilter_complex.vcf"
         self.vcf_out = self.vcf_out + "_STDfilter.vcf"
+        # vcf output from sort
+        self.vcf_out_sort = self.out_name()
+        self.vcf_complex_out_sort = self.vcf_complex_out.replace(".vcf", "_sorted.vcf")
+        self.vcf_out_sort = self.vcf_out.replace(".vcf", "_sorted.vcf")
         # vcf reader
         self.vcf_reader = self.set_reader()
         # sample list
@@ -371,8 +375,33 @@ def filter_case_control(self):
                         + "\n"
                     )
                     txt_fh.write(out_line)
-
         vcf_writer.close()
         vcf_complex_writer.close()
         txt_fh.close()
         return self.vcf_out, self.vcf_complex_out, self.txt_out
+
+    def sort_vcf(self):
+        # Read the input VCF file
+        vcf_reader = vcf.Reader(open(self.vcf_out, "r"))
+        sorted_records = sorted(
+            vcf_reader, key=lambda record: (record.CHROM, record.POS)
+        )
+        # Write sorted records to the output VCF file
+        vcf_writer = vcf.Writer(open(self.vcf_out_sort, "w"), vcf_reader)
+        for record in sorted_records:
+            vcf_writer.write_record(record)
+        vcf_writer.close()
+        return self.vcf_out_sort
+
+    def sort_vcf_complex(self):
+        # Read the input VCF file
+        vcf_reader = vcf.Reader(open(self.vcf_complex_out, "r"))
+        sorted_records = sorted(
+            vcf_reader, key=lambda record: (record.CHROM, record.POS)
+        )
+        # Write sorted records to the output VCF file
+        vcf_writer = vcf.Writer(open(self.vcf_complex_out_sort, "w"), vcf_reader)
+        for record in sorted_records:
+            vcf_writer.write_record(record)
+        vcf_writer.close()
+        return self.vcf_complex_out_sort