Skip to content

Commit

Permalink
🇨🇳 v0.5.3 Merge pull request #22 from Puriney/master
Browse files Browse the repository at this point in the history
v0.5.3
  • Loading branch information
Puriney authored May 10, 2018
2 parents 1839866 + 457245e commit 2be1954
Show file tree
Hide file tree
Showing 6 changed files with 377 additions and 81 deletions.
29 changes: 23 additions & 6 deletions celseq2/celseq2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,41 +29,58 @@
* -n
* --notemp (--nt)
Name of rules to request outputs:
* all (default)
* TAG_FASTQ
* ANNOTATION
* ALIGNMENT
* COUNT_MATRIX
* QC_COUNT_MATRIX
* CELSEQ2_TO_ST (only available for ST data)
Refs:
- https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html
- https://bitbucket.org/snakemake/snakemake/src/e11a57fe1f62f3f56c815d95d82871811dae81b3/snakemake/__init__.py?at=master&fileviewer=file-view-default#__init__.py-580:1127
'''

task_choices = ['all', 'TAG_FASTQ', 'ANNOTATION', 'ALIGNMENT',
'COUNT_MATRIX', 'QC_COUNT_MATRIX', 'CELSEQ2_TO_ST',
'REPORT']


def get_argument_parser():
desc = ('CEL-Seq2: A Python Package for Processing CEL-Seq2 RNA-Seq Data.')
desc = ('celseq2: A Python Package for Processing CEL-Seq2 RNA-Seq Data.')
parser = argparse.ArgumentParser(description=desc, add_help=True)

parser.add_argument(
"target",
nargs="*",
default=None,
help="Targets to build. May be rules or files.")
help=('Targets to build. '
'May be rules or files. '
'Task choices: {}').format(', '.join(task_choices)))
parser.add_argument(
"--config-file",
metavar="FILE",
required=True,
help=("Specify details of CEL-Seq2 and gneome information."))
help=("Configurations of the details of CEL-Seq2 "
" and running environment."))
parser.add_argument(
"--experiment-table",
metavar="FILE",
required=True,
help=("Space/Tab separated file specifying experiment design."))
help=("Space/Tab separated file specifying the R1/R2 reads "
"and the experiment design."))
parser.add_argument(
"--output-dir",
metavar="DIRECTORY",
required=True,
help=("All results are saved with here as root directory."))
help=("All results are saved here as root directory."))

parser.add_argument(
"--reverse-stranded", "--rs",
action="store_true", default=False,
help="Read has to be mapped to the opposite strand as the feature")
help="Reads have to be mapped to the opposite strand of the feature.")

parser.add_argument(
"--celseq2-to-st", "--st",
Expand Down
62 changes: 62 additions & 0 deletions celseq2/count_umi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import pickle
import argparse
from collections import defaultdict, Counter
import plotly.graph_objs as go
from plotly.offline import plot
import pandas as pd
from celseq2.helper import base_name


def invert_strand(iv):
Expand Down Expand Up @@ -102,6 +106,64 @@ def _flatten_umi_set(umi_set):
# pass


def plotly_alignment_stats(fpaths=[], saveto='', fnames=[]):
'''
Save a plotly box graph with a list of alignment stats files
Parameters
----------
fpaths : list
A list of file paths
saveto : str
File path to save the html file as the plotly box graph
fnames : list
A list of strings to label each ``fpaths``
Returns
-------
bool
True if saving successfully, False otherwise
'''
if not fnames:
fnames = [base_name(f) for f in fpaths]
if len(fnames) != len(fpaths):
fnames = [base_name(f) for f in fpaths]
trace_data = []
# aln_diagnose_item = ["_unmapped",
# "_low_map_qual", '_multimapped', "_uniquemapped",
# "_no_feature", "_ambiguous",
# "_total"]
for i in range(len(fpaths)):
f = fpaths[i]
fname = fnames[i]

stats = pd.read_csv(f, index_col=0)

mapped = stats.loc['_multimapped', :] + stats.loc['_uniquemapped', :]
rate_mapped = mapped / stats.loc['_total', :]

overall_mapped = mapped.sum()
overall_total = stats.loc['_total', :].sum()

stats.fillna(value=0, inplace=True)
trace_data.append(
go.Box(
y=rate_mapped,
name='{} (#Mapped={}/#Total={})'.format(
fname, overall_mapped, overall_total)))

layout = go.Layout(
xaxis=dict(showticklabels=False),
title='Mapped/Total alignments per BC per item')
fig = go.Figure(data=trace_data, layout=layout)
try:
plot(fig, filename=saveto, auto_open=False)
return(True)
except Exception as e:
print(e, flush=True)
return(False)


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--sam_fpath', type=str, metavar='FILENAME',
Expand Down
60 changes: 59 additions & 1 deletion celseq2/demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import argparse

from celseq2.helper import filehandle_fastq_gz, print_logger
from celseq2.helper import join_path, mkfolder
from celseq2.helper import join_path, mkfolder, base_name

import plotly.graph_objs as go
from plotly.offline import plot
import pandas as pd


def str2int(s):
Expand Down Expand Up @@ -212,6 +216,60 @@ def write_demultiplexing(stats, dict_bc_id2seq, stats_fpath):
stats['total'] / stats['total'] * 100))


def plotly_demultiplexing_stats(fpaths=[], saveto='', fnames=[]):
'''
Save a plotly box graph with a list of demultiplexing stats files
Parameters
----------
fpaths : list
A list of file paths
saveto : str
File path to save the html file as the plotly box graph
fnames : list
A list of strings to label each ``fpaths``
Returns
-------
bool
True if saving successfully, False otherwise
'''

if not fnames:
fnames = [base_name(f) for f in fpaths]
if len(fnames) != len(fpaths):
fnames = [base_name(f) for f in fpaths]

num_reads_data = []
for i in range(len(fpaths)):
f = fpaths[i]
fname = fnames[i]

stats = pd.read_csv(f, index_col=0)
cell_stats = stats.iloc[:-5, :]
# tail 5 lines are fixed as the overall stats
overall_stats = stats.iloc[-5:, :]
num_reads_data.append(
go.Box(
y=cell_stats['Reads(#)'],
name='{} (#Saved={}/#Total={})'.format(
fname,
overall_stats.loc['saved', 'Reads(#)'],
overall_stats.loc['total', 'Reads(#)'])))

layout = go.Layout(
# legend=dict(x=-.1, y=-.2),
xaxis=dict(showticklabels=False),
title='Number of reads saved per BC per item')
fig = go.Figure(data=num_reads_data, layout=layout)
try:
plot(fig, filename=saveto, auto_open=False)
return(True)
except Exception as e:
print(e, flush=True)
return(False)


def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('read1_fpath', type=str)
Expand Down
4 changes: 3 additions & 1 deletion celseq2/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ def plotly_qc_st(fpath, saveto, sep='\t', name=''):
mask_by=ST_qc.total_num_UMIs,
hover_text=ST_qc.total_num_UMIs.astype('str'),
colorscale='Viridis',
mask_title='#Total UMIs {})'.format(ST_qc.total_num_UMIs.median()))
mask_title=('#Total UMIs '
'(median={})').format(ST_qc.total_num_UMIs.median()))
# 3/3
plotly_ST_mt = plotly_scatter(
x=ST_qc.Row, y=ST_qc.Col,
Expand All @@ -239,6 +240,7 @@ def plotly_qc_st(fpath, saveto, sep='\t', name=''):
fig['layout'].update(height=600, width=1900, title=name)

fig.layout.showlegend = False
# Manually change the locations of other two color bars to proper places
fig.data[0].marker.colorbar.x = 0.28
fig.data[1].marker.colorbar.x = 0.64

Expand Down
2 changes: 1 addition & 1 deletion celseq2/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.2'
__version__ = '0.5.3'
Loading

0 comments on commit 2be1954

Please sign in to comment.