Skip to content

Commit

Permalink
Implement caching
Browse files Browse the repository at this point in the history
  • Loading branch information
TylerSagendorf committed Jan 13, 2023
1 parent afad124 commit 6aeabdc
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 22 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Depends:
R (>= 3.5.0),
MSnbase
Imports:
BiocFileCache,
circlize,
ComplexHeatmap (>= 2.12.0),
cowplot,
Expand All @@ -45,6 +46,7 @@ Imports:
statmod,
stats,
tibble,
tools,
utils,
WGCNA
RoxygenNote: 7.2.3
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ import(ComplexHeatmap)
import(ggplot2)
import(limma)
import(statmod)
importFrom(BiocFileCache,BiocFileCache)
importFrom(BiocFileCache,bfcadd)
importFrom(BiocFileCache,bfccount)
importFrom(BiocFileCache,bfcnew)
importFrom(BiocFileCache,bfcquery)
importFrom(BiocFileCache,bfcrpath)
importFrom(MSnbase,`fData<-`)
importFrom(MSnbase,`pData<-`)
importFrom(MSnbase,exprs)
Expand Down
70 changes: 51 additions & 19 deletions R/update_GO_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
#' \href{http://geneontology.org/}{Gene Ontology Consortium}.
#'
#' @param x object of class \code{data.frame} produced by
#' \code{link[msigdbr]{msigdbr}} or
#' \code{\link[MotrpacRatTraining6moWAT]{msigdbr2}} containing columns
#' \code{link[msigdbr]{msigdbr}} or \code{\link{msigdbr2}} containing columns
#' \code{gs_description} and \code{gs_subcat}.
#' @param version character; specifies the version of \code{msigdbr} to use.
#' Defaults to the current version.
Expand Down Expand Up @@ -42,6 +41,8 @@
#' @importFrom data.table setDT setDF setkeyv `:=`
#' @importFrom ontologyIndex get_OBO
#' @importFrom utils head packageVersion
#' @importFrom BiocFileCache BiocFileCache bfcquery bfcadd bfcrpath bfcnew
#' bfccount
#'
#' @export update_GO_names
#'
Expand All @@ -50,7 +51,7 @@
#' genes = "gene_symbol",
#' gs_subcat = "GO:MF")
#' set.seed(9900)
#' idx <- sample(1:nrow(x), size = 6)
#' idx <- sample(1:nrow(x), size = 6) # random indices to illustrate changes
#' x$gs_description[idx] # before
#'
#' y <- update_GO_names(x, capitalize = TRUE)
Expand All @@ -63,20 +64,47 @@ update_GO_names <- function(x,
{
go_subcats <- c("GO:MF", "GO:CC", "GO:BP")

# setDT(x, key = "gs_exact_source")
setDT(x)

if (any(x[["gs_subcat"]] %in% go_subcats)) {
file <- obo_file(version = version)

dir <- tools::R_user_dir(package = "MotrpacRatTraining6moWAT",
which = "cache")

bfc <- BiocFileCache(cache = dir)
rname <- sprintf("MSigDB_v%s_Release_Notes", version)

q1 <- bfcquery(bfc, query = rname, field = "rname")

if (bfccount(q1) == 1L) {
file <- q1$fpath
} else {
message(sprintf("Searching MSigDB %s Release Notes for OBO file date:",
version))
file <- obo_file(version = version)
bfcadd(bfc, fpath = file, rname = rname, fname = "unique")
}

message(paste("Updating GO term descriptions with", file))

go_basic_list <- get_OBO(file,
propagate_relationships = "is_a",
extract_tags = "minimal")
go.dt <- as.data.frame(go_basic_list)
setDT(go.dt)
go.dt <- go.dt[obsolete != TRUE & grepl("^GO", id), list(id, name)]
# setkeyv(go.dt, cols = "id")
obo_date <- sub(".*org/([^/]+)/.*", "\\1", file)
obo_rname <- paste0("GO_OBO_data_", obo_date)

q2 <- bfcquery(bfc, query = obo_rname, field = "rname")

if (bfccount(q2) == 1L) {
go.dt <- readRDS(file = bfcrpath(bfc, rnames = obo_rname))
} else {
message("Downloading OBO file to cache:")
go_basic_list <- get_OBO(file,
propagate_relationships = "is_a",
extract_tags = "minimal")
go.dt <- as.data.frame(go_basic_list)
setDT(go.dt)
go.dt <- go.dt[obsolete != TRUE & grepl("^GO", id), list(id, name)]
saveRDS(go.dt, file = bfcnew(bfc, rname = obo_rname, fname = "unique"),
compress = TRUE)
}

# Update gs_description column with names from OBO file
x[go.dt, on = list(gs_exact_source = id), gs_description := i.name]
Expand All @@ -100,32 +128,36 @@ update_GO_names <- function(x,
## Helper functions ------------------------------------------------------------

# Get the path to the appropriate OBO file.
obo_file <- function(version = packageVersion("msigdbr")) {
obo_file <- function(version = packageVersion("msigdbr"))
{
# MSigDB release notes for appropriate version
path <- sprintf(file.path("https://software.broadinstitute.org/cancer",
"software/gsea/wiki/index.php",
"MSigDB_v%s_Release_Notes"), version)
x <- readLines(path)
url <- sprintf(file.path("https://software.broadinstitute.org/cancer",
"software/gsea/wiki/index.php",
"MSigDB_v%s_Release_Notes"), version)

x <- readLines(url)
x <- paste(x, collapse = "")
x <- gsub("\\\\n", "", x)

phrase <- "GO-basic obo file released on"

if (!grepl(phrase, x)) {
stop(sprintf("Phrase '%s' not found in %s", phrase, path))
stop(sprintf("Phrase '%s' not found in %s", phrase, url))
}

obo_date <- sub(sprintf(".*%s ([^ ]+).*", phrase), "\\1", x)
obo_file <- sprintf("http://release.geneontology.org/%s/ontology/go-basic.obo",
obo_date)

return(obo_file)
}


# Capitalize first letter of first word unless there is
# already a mix of uppercase and lowercase letters.
# x is a character vector.
cap_names <- function(x) {
cap_names <- function(x)
{
first_word <- sub(" .*", "", x)
idx <- first_word == tolower(first_word)

Expand Down
5 changes: 2 additions & 3 deletions man/update_GO_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6aeabdc

Please sign in to comment.