🇨🇳 v0.5.3 Merge pull request #22 from Puriney/master

v0.5.3
yanailab · May 10, 2018 · 2be1954 · 2be1954
2 parents 1839866 + 457245e
commit 2be1954
Show file tree

Hide file tree

Showing 6 changed files with 377 additions and 81 deletions.
diff --git a/celseq2/celseq2.py b/celseq2/celseq2.py
@@ -29,41 +29,58 @@
     * -n
     * --notemp (--nt)
 
+Name of rules to request outputs:
+    * all (default)
+    * TAG_FASTQ
+    * ANNOTATION
+    * ALIGNMENT
+    * COUNT_MATRIX
+    * QC_COUNT_MATRIX
+    * CELSEQ2_TO_ST (only available for ST data)
+
 Refs:
     - https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html
     - https://bitbucket.org/snakemake/snakemake/src/e11a57fe1f62f3f56c815d95d82871811dae81b3/snakemake/__init__.py?at=master&fileviewer=file-view-default#__init__.py-580:1127
 '''
 
+task_choices = ['all', 'TAG_FASTQ', 'ANNOTATION', 'ALIGNMENT',
+                'COUNT_MATRIX', 'QC_COUNT_MATRIX', 'CELSEQ2_TO_ST',
+                'REPORT']
+
 
 def get_argument_parser():
-    desc = ('CEL-Seq2: A Python Package for Processing CEL-Seq2 RNA-Seq Data.')
+    desc = ('celseq2: A Python Package for Processing CEL-Seq2 RNA-Seq Data.')
     parser = argparse.ArgumentParser(description=desc, add_help=True)
 
     parser.add_argument(
         "target",
         nargs="*",
         default=None,
-        help="Targets to build. May be rules or files.")
+        help=('Targets to build. '
+              'May be rules or files. '
+              'Task choices: {}').format(', '.join(task_choices)))
     parser.add_argument(
         "--config-file",
         metavar="FILE",
         required=True,
-        help=("Specify details of CEL-Seq2 and gneome information."))
+        help=("Configurations of the details of CEL-Seq2 "
+              " and running environment."))
     parser.add_argument(
         "--experiment-table",
         metavar="FILE",
         required=True,
-        help=("Space/Tab separated file specifying experiment design."))
+        help=("Space/Tab separated file specifying the R1/R2 reads "
+              "and the experiment design."))
     parser.add_argument(
         "--output-dir",
         metavar="DIRECTORY",
         required=True,
-        help=("All results are saved with here as root directory."))
+        help=("All results are saved here as root directory."))
 
     parser.add_argument(
         "--reverse-stranded", "--rs",
         action="store_true", default=False,
-        help="Read has to be mapped to the opposite strand as the feature")
+        help="Reads have to be mapped to the opposite strand of the feature.")
 
     parser.add_argument(
         "--celseq2-to-st", "--st",

diff --git a/celseq2/count_umi.py b/celseq2/count_umi.py
@@ -8,6 +8,10 @@
 import pickle
 import argparse
 from collections import defaultdict, Counter
+import plotly.graph_objs as go
+from plotly.offline import plot
+import pandas as pd
+from celseq2.helper import base_name
 
 
 def invert_strand(iv):
@@ -102,6 +106,64 @@ def _flatten_umi_set(umi_set):
 #     pass
 
 
+def plotly_alignment_stats(fpaths=[], saveto='', fnames=[]):
+    '''
+    Save a plotly box graph with a list of alignment stats files
+
+    Parameters
+    ----------
+    fpaths : list
+        A list of file paths
+    saveto : str
+        File path to save the html file as the plotly box graph
+    fnames : list
+        A list of strings to label each ``fpaths``
+
+    Returns
+    -------
+    bool
+        True if saving successfully, False otherwise
+    '''
+    if not fnames:
+        fnames = [base_name(f) for f in fpaths]
+    if len(fnames) != len(fpaths):
+        fnames = [base_name(f) for f in fpaths]
+    trace_data = []
+    # aln_diagnose_item = ["_unmapped",
+    #                      "_low_map_qual", '_multimapped', "_uniquemapped",
+    #                      "_no_feature", "_ambiguous",
+    #                      "_total"]
+    for i in range(len(fpaths)):
+        f = fpaths[i]
+        fname = fnames[i]
+
+        stats = pd.read_csv(f, index_col=0)
+
+        mapped = stats.loc['_multimapped', :] + stats.loc['_uniquemapped', :]
+        rate_mapped = mapped / stats.loc['_total', :]
+
+        overall_mapped = mapped.sum()
+        overall_total = stats.loc['_total', :].sum()
+
+        stats.fillna(value=0, inplace=True)
+        trace_data.append(
+            go.Box(
+                y=rate_mapped,
+                name='{} (#Mapped={}/#Total={})'.format(
+                    fname, overall_mapped, overall_total)))
+
+    layout = go.Layout(
+        xaxis=dict(showticklabels=False),
+        title='Mapped/Total alignments per BC per item')
+    fig = go.Figure(data=trace_data, layout=layout)
+    try:
+        plot(fig, filename=saveto, auto_open=False)
+        return(True)
+    except Exception as e:
+        print(e, flush=True)
+        return(False)
+
+
 def main():
     parser = argparse.ArgumentParser(add_help=True)
     parser.add_argument('--sam_fpath', type=str, metavar='FILENAME',

diff --git a/celseq2/demultiplex.py b/celseq2/demultiplex.py
@@ -5,7 +5,11 @@
 import argparse
 
 from celseq2.helper import filehandle_fastq_gz, print_logger
-from celseq2.helper import join_path, mkfolder
+from celseq2.helper import join_path, mkfolder, base_name
+
+import plotly.graph_objs as go
+from plotly.offline import plot
+import pandas as pd
 
 
 def str2int(s):
@@ -212,6 +216,60 @@ def write_demultiplexing(stats, dict_bc_id2seq, stats_fpath):
                                     stats['total'] / stats['total'] * 100))
 
 
+def plotly_demultiplexing_stats(fpaths=[], saveto='', fnames=[]):
+    '''
+    Save a plotly box graph with a list of demultiplexing stats files
+
+    Parameters
+    ----------
+    fpaths : list
+        A list of file paths
+    saveto : str
+        File path to save the html file as the plotly box graph
+    fnames : list
+        A list of strings to label each ``fpaths``
+
+    Returns
+    -------
+    bool
+        True if saving successfully, False otherwise
+    '''
+
+    if not fnames:
+        fnames = [base_name(f) for f in fpaths]
+    if len(fnames) != len(fpaths):
+        fnames = [base_name(f) for f in fpaths]
+
+    num_reads_data = []
+    for i in range(len(fpaths)):
+        f = fpaths[i]
+        fname = fnames[i]
+
+        stats = pd.read_csv(f, index_col=0)
+        cell_stats = stats.iloc[:-5, :]
+        # tail 5 lines are fixed as the overall stats
+        overall_stats = stats.iloc[-5:, :]
+        num_reads_data.append(
+            go.Box(
+                y=cell_stats['Reads(#)'],
+                name='{} (#Saved={}/#Total={})'.format(
+                    fname,
+                    overall_stats.loc['saved', 'Reads(#)'],
+                    overall_stats.loc['total', 'Reads(#)'])))
+
+    layout = go.Layout(
+        # legend=dict(x=-.1, y=-.2),
+        xaxis=dict(showticklabels=False),
+        title='Number of reads saved per BC per item')
+    fig = go.Figure(data=num_reads_data, layout=layout)
+    try:
+        plot(fig, filename=saveto, auto_open=False)
+        return(True)
+    except Exception as e:
+        print(e, flush=True)
+        return(False)
+
+
 def main():
     parser = argparse.ArgumentParser(add_help=True)
     parser.add_argument('read1_fpath', type=str)

diff --git a/celseq2/qc.py b/celseq2/qc.py
@@ -218,7 +218,8 @@ def plotly_qc_st(fpath, saveto, sep='\t', name=''):
         mask_by=ST_qc.total_num_UMIs,
         hover_text=ST_qc.total_num_UMIs.astype('str'),
         colorscale='Viridis',
-        mask_title='#Total UMIs {})'.format(ST_qc.total_num_UMIs.median()))
+        mask_title=('#Total UMIs '
+                    '(median={})').format(ST_qc.total_num_UMIs.median()))
     # 3/3
     plotly_ST_mt = plotly_scatter(
         x=ST_qc.Row, y=ST_qc.Col,
@@ -239,6 +240,7 @@ def plotly_qc_st(fpath, saveto, sep='\t', name=''):
     fig['layout'].update(height=600, width=1900, title=name)
 
     fig.layout.showlegend = False
+    # Manually change the locations of other two color bars to proper places
     fig.data[0].marker.colorbar.x = 0.28
     fig.data[1].marker.colorbar.x = 0.64
 

diff --git a/celseq2/version.py b/celseq2/version.py
@@ -1 +1 @@
-__version__ = '0.5.2'
+__version__ = '0.5.3'