Skip to content

Commit

Permalink
hotfix 3 - yaml support for integration into PANOPLY
Browse files Browse the repository at this point in the history
  • Loading branch information
Karsten Krug committed Aug 11, 2020
1 parent fdcb519 commit 4c4fa91
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 100 deletions.
15 changes: 15 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
####################################
## ssgsea parameters
panoply_ssgsea:
nperm: 1000
weight: 0.75
output_prefix: "ssgsea-results"
sample_norm_type: "rank" ## rank, log, log.rank, none
correl_type: "z.score" ## "rank", "z.score", "symm.rank"
statistic: "area.under.RES" ## "area.under.RES", "Kolmogorov-Smirnov"
output_score_type: "NES" ## 'Score type: "ES" - enrichment score, "NES" - normalized ES'
min_overlap: 10 ## 'Minimal overlap between signature and data set.'
extended_output: TRUE ## 'If TRUE additional stats on signature coverage etc. will be included as row annotations in the GCT results files.'
export_signat_gct: TRUE ## For each signature export expression GCT files.
global_fdr: FALSE ## If TRUE global FDR across all data columns is calculated.
multi_core: TRUE ## If TRUE processing will be parallized across gene sets using (N-1) CPU cores.
2 changes: 1 addition & 1 deletion src/parse_yaml_ssgsea.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
## - yaml file
## parameters in yaml file will be updated with
## parameters specified on cmd
parse_param_ssgsea <- function(cmd_option_list, yaml_section='ssgsea'){
parse_param_ssgsea <- function(cmd_option_list, yaml_section='panoply_ssgsea'){

## #########################################################
# parse command line parameters
Expand Down
26 changes: 13 additions & 13 deletions ssGSEA2.0.Rproj
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
144 changes: 72 additions & 72 deletions ssgsea-cli.R
Original file line number Diff line number Diff line change
@@ -1,72 +1,72 @@
#!/usr/bin/env Rscript
options( warn = -1 )

suppressPackageStartupMessages( if(!require("pacman")) install.packages ("pacman") )
suppressPackageStartupMessages(p_load("optparse"))

# parse the directory this file is located
script.dir <- commandArgs()[4]
script.dir <- sub('^(.*(/|\\\\)).*', '\\1', sub('.*?\\=','', script.dir))
# this doesn't seem to be reliably working on Windows OS
# if called from the directory is 'ssgsea-cli.R' resides in
# the line below attempts to fix this
script.dir <- ifelse(dir.exists(script.dir), script.dir, '.')

# specify command line arguments
option_list <- list(
make_option( c("-i", "--input"), action='store', type='character', dest='input_ds', help='Path to input GCT file.'),
make_option( c("-o", "--ouptut"), action='store', type='character', dest='output_prefix', help='File prefix for output files.', default='out'),
make_option( c("-d", "--db"), action='store', type='character', dest='gene_set_databases', help='Path to gene set database (GMT format).'),
make_option( c("-n", "--norm"), action='store', type='character', dest='sample_norm_type', help='Sample normalization: "rank", "log", "log.rank" or "none".', default = 'rank'),
make_option( c("-w", "--weight"), action='store', type='character', dest='weight', help='When weight==0, all genes have the same weight; if weight>0 actual values matter and can change the resulting score.', default = 0.75),
make_option( c("-c", "--correl"), action='store', type='character', dest='correl_type', help='Correlation type: "rank", "z.score", "symm.rank".', default = 'z.score'),
make_option( c("-t", "--test"), action='store', type='character', dest='statistic', help='Test statistic: "area.under.RES", "Kolmogorov-Smirnov"', default = 'area.under.RES'),
make_option( c("-s", "--score"), action='store', type='character', dest='output_score_type', help='Score type: "ES" - enrichment score, "NES" - normalized ES', default = 'NES'),
make_option( c("-p", "--perm"), action='store', type='character', dest='nperm', help='Number of permutations', default = 1000),
make_option( c("-m", "--minoverlap"), action='store', type='character', dest='min_overlap', help='Minimal overlap between signature and data set.', default = 10),
make_option( c("-x", "--extendedoutput"), action='store', type='character', dest='extended_output', help='If TRUE additional stats on signature coverage etc. will be included as row annotations in the GCT results files.', default = TRUE),
make_option( c("-e", "--export"), action='store', type='character', dest='export_signat_gct', help='For each signature export expression GCT files.', default = TRUE),
make_option( c("-g", "--globalfdr"), action='store', type='character', dest='global_fdr', help='If TRUE global FDR across all data columns is calculated.', default = FALSE),
make_option( c("-l", "--lightspeed"), action='store', type='character', dest='multi_core', help='If TRUE processing will be parallized across gene sets. (I ran out of single letters to define parameters...)', default = TRUE),
make_option( c("-y", "--yaml"), action='store', type='character', dest='yaml_file', help='Parameter file (.yaml)', default = NA)
)

## #####################################
## source the actual script
source(file.path(script.dir, 'src', 'ssGSEA2.0.R'))
source(file.path(script.dir, 'src','parse_yaml_ssgsea.R'))

# parse command line parameters
opt <- parse_param_ssgsea(option_list)

# hard-coded parameters
spare.cores <- 0 # use all available cpus
log.file <- paste(opt$output.prefix, '_ssgsea.log.txt', sep='')


## ######################################################################################################
##
## run ssGSEA
##
## ######################################################################################################
res <- ssGSEA2(
input.ds=opt$input_ds,
output.prefix=opt$output_prefix,
gene.set.databases=opt$gene_set_databases,
sample.norm.type=opt$sample_norm_type,
weight=opt$weight,
statistic=opt$statistic,
output.score.type=opt$output_score_type,
nperm=opt$nperm,
min.overlap=opt$min_overlap,
correl.type=opt$correl_type,
export.signat.gct=opt$export_signat_gct,
extended.output=opt$extended_output,
global.fdr=opt$global_fdr,
par=opt$multi_core,
spare.cores=spare.cores,
log.file=log.file
)



#!/usr/bin/env Rscript
options( warn = -1 )

suppressPackageStartupMessages( if(!require("pacman")) install.packages ("pacman") )
suppressPackageStartupMessages(p_load("optparse"))

# parse the directory this file is located
script.dir <- commandArgs()[4]
script.dir <- sub('^(.*(/|\\\\)).*', '\\1', sub('.*?\\=','', script.dir))
# this doesn't seem to be reliably working on Windows OS
# if called from the directory is 'ssgsea-cli.R' resides in
# the line below attempts to fix this
script.dir <- ifelse(dir.exists(script.dir), script.dir, '.')

# specify command line arguments
option_list <- list(
make_option( c("-i", "--input"), action='store', type='character', dest='input_ds', help='Path to input GCT file.'),
make_option( c("-o", "--ouptut"), action='store', type='character', dest='output_prefix', help='File prefix for output files.', default='out'),
make_option( c("-d", "--db"), action='store', type='character', dest='gene_set_databases', help='Path to gene set database (GMT format).'),
make_option( c("-n", "--norm"), action='store', type='character', dest='sample_norm_type', help='Sample normalization: "rank", "log", "log.rank" or "none".', default = 'rank'),
make_option( c("-w", "--weight"), action='store', type='character', dest='weight', help='When weight==0, all genes have the same weight; if weight>0 actual values matter and can change the resulting score.', default = 0.75),
make_option( c("-c", "--correl"), action='store', type='character', dest='correl_type', help='Correlation type: "rank", "z.score", "symm.rank".', default = 'z.score'),
make_option( c("-t", "--test"), action='store', type='character', dest='statistic', help='Test statistic: "area.under.RES", "Kolmogorov-Smirnov"', default = 'area.under.RES'),
make_option( c("-s", "--score"), action='store', type='character', dest='output_score_type', help='Score type: "ES" - enrichment score, "NES" - normalized ES', default = 'NES'),
make_option( c("-p", "--perm"), action='store', type='character', dest='nperm', help='Number of permutations', default = 1000),
make_option( c("-m", "--minoverlap"), action='store', type='character', dest='min_overlap', help='Minimal overlap between signature and data set.', default = 10),
make_option( c("-x", "--extendedoutput"), action='store', type='character', dest='extended_output', help='If TRUE additional stats on signature coverage etc. will be included as row annotations in the GCT results files.', default = TRUE),
make_option( c("-e", "--export"), action='store', type='character', dest='export_signat_gct', help='For each signature export expression GCT files.', default = TRUE),
make_option( c("-g", "--globalfdr"), action='store', type='character', dest='global_fdr', help='If TRUE global FDR across all data columns is calculated.', default = FALSE),
make_option( c("-l", "--lightspeed"), action='store', type='character', dest='multi_core', help='If TRUE processing will be parallized across gene sets. (I ran out of single letters to define parameters...)', default = TRUE),
make_option( c("-y", "--yaml"), action='store', type='character', dest='yaml_file', help='Parameter file (.yaml)', default = NA)
)

## #####################################
## source the actual script
source(file.path(script.dir, 'src', 'ssGSEA2.0.R'))
source(file.path(script.dir, 'src','parse_yaml_ssgsea.R'))

# parse command line parameters
opt <- parse_param_ssgsea(option_list)

# hard-coded parameters
spare.cores <- 0 # use all available cpus
log.file <- paste(opt$output.prefix, '_ssgsea.log.txt', sep='')


## ######################################################################################################
##
## run ssGSEA
##
## ######################################################################################################
res <- ssGSEA2(
input.ds=opt$input_ds,
output.prefix=opt$output_prefix,
gene.set.databases=opt$gene_set_databases,
sample.norm.type=opt$sample_norm_type,
weight=opt$weight,
statistic=opt$statistic,
output.score.type=opt$output_score_type,
nperm=opt$nperm,
min.overlap=opt$min_overlap,
correl.type=opt$correl_type,
export.signat.gct=opt$export_signat_gct,
extended.output=opt$extended_output,
global.fdr=opt$global_fdr,
par=opt$multi_core,
spare.cores=spare.cores,
log.file=log.file
)



14 changes: 0 additions & 14 deletions ssgsea-params.yaml

This file was deleted.

0 comments on commit 4c4fa91

Please sign in to comment.