diff --git a/R/cellbrowser_prep.R b/R/cellbrowser_prep.R index e535d25..d0ff671 100644 --- a/R/cellbrowser_prep.R +++ b/R/cellbrowser_prep.R @@ -102,14 +102,14 @@ print(file.path(opt$seurat_path, opt$runspecs, "cluster.markers.dir","markers.summary.table.xlsx")) data_selected = openxlsx::read.xlsx(xlsxFile = file.path(opt$seurat_path, opt$runspecs, "cluster.markers.dir","markers.summary.table.xlsx")) -output = data_selected[,c("gene","gene_id", "cluster","avg_logFC","p.adj")] -output = output %>% group_by(cluster) %>% dplyr::arrange(desc(avg_logFC)) %>% do(head(.,n=20)) %>% ungroup() +output = data_selected[,c("gene","gene_id", "cluster","avg_log2FC","p.adj")] +output = output %>% group_by(cluster) %>% dplyr::arrange(desc(avg_log2FC)) %>% do(head(.,n=20)) %>% ungroup() output$celltype = "top20_marker_Seurat_cluster" output$cluster_marker = paste(output$celltype, output$cluster, sep="_") output = output %>% dplyr::select(-celltype) -colnames(output) = c("gene","gene_id","cluster","avg_logFC","p_adjusted","celltype_marker") -output = output[,c("cluster","gene","p_adjusted","avg_logFC","celltype_marker")] +colnames(output) = c("gene","gene_id","cluster","avg_log2FC","p_adjusted","celltype_marker") +output = output[,c("cluster","gene","p_adjusted","avg_log2FC","celltype_marker")] write.table(output, file.path(opt$outdir, "markers.tsv"), sep = "\t", quote = FALSE, row.names = FALSE) -cat("Completed") \ No newline at end of file +cat("Completed") diff --git a/R/genesetAnalysis.R b/R/genesetAnalysis.R index aec5c5e..0dfb817 100644 --- a/R/genesetAnalysis.R +++ b/R/genesetAnalysis.R @@ -64,10 +64,10 @@ cluster_de <- de[de$cluster==opt$cluster & de$p.adj<=opt$adjpthreshold,] ## only positive markers considered if(opt$direction=="positive") { - foreground <- unique(cluster_de$gene_id[cluster_de$avg_logFC>0]) + foreground <- unique(cluster_de$gene_id[cluster_de$avg_log2FC>0]) } else if (opt$direction=="negative") { - foreground <- unique(cluster_de$gene_id[cluster_de$avg_logFC<0]) + foreground <- unique(cluster_de$gene_id[cluster_de$avg_log2FC<0]) } else if (opt$direction=="both") { foreground <- unique(cluster_de$gene_id) diff --git a/R/seurat_FindMarkers.R b/R/seurat_FindMarkers.R index ba366ad..1f52c53 100644 --- a/R/seurat_FindMarkers.R +++ b/R/seurat_FindMarkers.R @@ -297,7 +297,7 @@ for (conserved.level in levels(ident.conserved)){ return.thresh = 1 if (nrow(markers) > 0) { - markers = markers[order(markers$p_val, -markers$avg_logFC), ] + markers = markers[order(markers$p_val, -markers$avg_log2FC), ] markers = subset(markers, p_val < return.thresh) if (nrow(markers) > 0){ @@ -314,7 +314,7 @@ for (conserved.level in levels(ident.conserved)){ markers$p.adj <- p.adjust(markers$p_val, method="BH") message("selecting columns of interest") - markers <- markers[,c("cluster","gene","p.adj","p_val","avg_logFC","pct.1","pct.2")] + markers <- markers[,c("cluster","gene","p.adj","p_val","avg_log2FC","pct.1","pct.2")] markers <- markers[order(markers$cluster, markers$p_val),] print(head(markers)) @@ -425,7 +425,7 @@ if (length(markers.conserved.list) > 1){ table(markers.sig) markers.sig <- names(markers.sig)[markers.sig] - markers.fc <- do.call("cbind", lapply(markers.conserved.list, function(x){x[markers.sig, "avg_logFC"]})) + markers.fc <- do.call("cbind", lapply(markers.conserved.list, function(x){x[markers.sig, "avg_log2FC"]})) rownames(markers.fc) <- markers.sig colnames(markers.fc) <- names(markers.conserved.list) @@ -480,14 +480,14 @@ if (length(markers.conserved.list) > 1){ tmp.table <- do.call( "cbind", lapply(markers.conserved.list, function(x){ - x[markers.conserved, "avg_logFC"] + x[markers.conserved, "avg_log2FC"] }) ) rownames(tmp.table) <- markers.conserved tmp.table <- exp(tmp.table) - 1 - conserved.table$avg_logFC <- log(rowMeans(as.matrix(tmp.table[markers.conserved, , drop=FALSE])) + 1) + conserved.table$avg_log2FC <- log(rowMeans(as.matrix(tmp.table[markers.conserved, , drop=FALSE])) + 1) - message("avg_logFC added") + message("avg_log2FC added") # average percentage of cells in group 1 ---- tmp.table <- do.call( @@ -547,7 +547,7 @@ if (length(markers.conserved.list) > 1){ gene_id = character(0), p_val = character(0), p.adj = character(0), - avg_logFC = character(0), + avg_log2FC = character(0), pct.1 = character(0), pct.2 = character(0), cluster_mean = character(0), diff --git a/R/seurat_cluster_marker_plots.R b/R/seurat_cluster_marker_plots.R index 9629c71..3f1ab03 100644 --- a/R/seurat_cluster_marker_plots.R +++ b/R/seurat_cluster_marker_plots.R @@ -112,13 +112,13 @@ x <- x[x$cluster==as.numeric(opt$cluster) & x$p.adj<0.1,] n_select = 16 # pull out by average and minimum log fold change -x %>% top_n(n_select, avg_logFC) -> top_by_avg_logFC +x %>% top_n(n_select, avg_log2FC) -> top_by_avg_log2FC n_select = 8 -x[!x$gene %in% top_by_avg_logFC$gene,] %>% +x[!x$gene %in% top_by_avg_log2FC$gene,] %>% top_n(n_select, min_logFC) -> top_by_min_logFC -x <- x[x$gene %in% c(top_by_avg_logFC$gene, +x <- x[x$gene %in% c(top_by_avg_log2FC$gene, top_by_min_logFC$gene),] # marker gene heatmap. diff --git a/R/seurat_summariseMarkerNumbers.R b/R/seurat_summariseMarkerNumbers.R index 28d6e57..c277187 100644 --- a/R/seurat_summariseMarkerNumbers.R +++ b/R/seurat_summariseMarkerNumbers.R @@ -81,8 +81,8 @@ message("summarising and melting the data") summarised_data <- degenes %>% dplyr::group_by(cluster) %>% dplyr::summarise( - positive=length(which(p.adj < opt$minpadj & avg_logFC >= log(opt$minfc))), - negative=length(which(p.adj < opt$minpadj & avg_logFC <= -log(opt$minfc)))) + positive=length(which(p.adj < opt$minpadj & avg_log2FC >= log(opt$minfc))), + negative=length(which(p.adj < opt$minpadj & avg_log2FC <= -log(opt$minfc)))) melted_data <- melt(summarised_data, id=c("cluster")) diff --git a/R/seurat_summariseMarkers.R b/R/seurat_summariseMarkers.R index b493031..6a981c7 100644 --- a/R/seurat_summariseMarkers.R +++ b/R/seurat_summariseMarkers.R @@ -93,7 +93,7 @@ print(dim(markers)) markers <- markers[,c("cluster","gene","gene_id", "p_val","p.adj", - "avg_logFC","pct.1","pct.2", + "avg_log2FC","pct.1","pct.2", "cluster_mean","other_mean")] markers <- markers[order(markers$cluster, markers$p.adj),] @@ -235,7 +235,7 @@ saveWorkbook(wb, file=paste(outPrefix,"table","xlsx", message("Making a heatmap of the top marker genes from each cluster") ## make a heatmap of the top DE genes. -filtered_markers %>% group_by(cluster) %>% top_n(20, avg_logFC) -> top20 +filtered_markers %>% group_by(cluster) %>% top_n(20, avg_log2FC) -> top20 if(!is.null(opt$subgroup)) { @@ -287,8 +287,8 @@ summary <- c() for(id in idents.all) { ncells = length(cluster_ids[cluster_ids==id]) - npos = length(filtered_markers$p.adj[filtered_markers$cluster==id & filtered_markers$avg_logFC > 0] ) - nneg = length(filtered_markers$p.adj[filtered_markers$cluster==id & filtered_markers$avg_logFC < 0] ) + npos = length(filtered_markers$p.adj[filtered_markers$cluster==id & filtered_markers$avg_log2FC > 0] ) + nneg = length(filtered_markers$p.adj[filtered_markers$cluster==id & filtered_markers$avg_log2FC < 0] ) ntotal = npos + nneg summary <- c(summary,c(id, ncells, npos, nneg, ntotal)) } diff --git a/R/seurat_summariseMarkersBetween.R b/R/seurat_summariseMarkersBetween.R index 98b0449..10bb19d 100644 --- a/R/seurat_summariseMarkersBetween.R +++ b/R/seurat_summariseMarkersBetween.R @@ -126,7 +126,7 @@ for(cluster in clusters) } } -res <- res[,c("cluster","gene","p.adj","p_val","avg_logFC", +res <- res[,c("cluster","gene","p.adj","p_val","avg_log2FC", "pct.1","pct.2",aName,bName,"gene_id")] out_fn <- file.path( @@ -222,7 +222,7 @@ save_ggplots(gsub(".tex",".nde",tex_fn), height=5) ## Make a heatmap -diffMat <- dcast(res, gene~cluster, value.var="avg_logFC") +diffMat <- dcast(res, gene~cluster, value.var="avg_log2FC") diffMat[is.na(diffMat)] <- 0 rownames(diffMat) <- diffMat$gene @@ -258,7 +258,7 @@ plot_fn <- function() density.info=c("none"), lwid = c(1,5), lhei = c(1,8), - key.xlab = "avg_logFC", + key.xlab = "avg_log2FC", key.ylab = "", xlab="cluster", cexRow = 0.4, diff --git a/R/seurat_topMarkerHeatmap.R b/R/seurat_topMarkerHeatmap.R index f6beb4b..a6ec708 100644 --- a/R/seurat_topMarkerHeatmap.R +++ b/R/seurat_topMarkerHeatmap.R @@ -59,7 +59,7 @@ markers <- read.table(opt$markers, filtered_markers <- data.table(markers[markers$p.adj < 0.1,]) ## make a heatmap of the top DE genes. -filtered_markers %>% group_by(cluster) %>% top_n(20, avg_logFC) -> top20 +filtered_markers %>% group_by(cluster) %>% top_n(20, avg_log2FC) -> top20 if(!is.null(opt$subgroup)) { diff --git a/pipelines/pipeline_scxl.py b/pipelines/pipeline_scxl.py index a35c2e7..eef18fa 100644 --- a/pipelines/pipeline_scxl.py +++ b/pipelines/pipeline_scxl.py @@ -2443,7 +2443,7 @@ def topClusterMarkers(infile, outfile): def _filterAndScore(data): # filter for strong cluster markers data = data[(data["p.adj"] < 0.01) & - (data["avg_logFC"].abs() > np.log(2)) & + (data["avg_log2FC"].abs() > np.log(2)) & (data["cluster_mean"] > 2) & (data["pct.1"] > 0.25)] @@ -2452,7 +2452,7 @@ def _filterAndScore(data): # for fold change, expression level and adjusted p-value. # the aim is to give "better" markers higher scores. pscore = [1 - x for x in data["p.adj"].values] - fscore = [np.exp(np.abs(x)) for x in data["avg_logFC"].values] + fscore = [np.exp(np.abs(x)) for x in data["avg_log2FC"].values] escore = [np.log2(x) for x in data["cluster_mean"].values] # construct a matrix of the scores and take the geometric mean. @@ -2535,7 +2535,7 @@ def _write_tables(d, name="none"): # keep up to n entries per cluster # note that groupby preserves the ordering. - positive_markers = data[data["avg_logFC"] > 0] + positive_markers = data[data["avg_log2FC"] > 0] positive_markers = _filterAndScore(positive_markers) positive_markers = _skimMarkers(positive_markers, PARAMS["exprsreport_n_positive"]) @@ -2547,7 +2547,7 @@ def _write_tables(d, name="none"): if stat: statements.append(stat) - negative_markers = data[data["avg_logFC"] < 0] + negative_markers = data[data["avg_log2FC"] < 0] negative_markers = _filterAndScore(negative_markers) negative_markers = _skimMarkers(negative_markers, PARAMS["exprsreport_n_negative"]) diff --git a/python/make_anndata.py b/python/make_anndata.py index 99d6a4a..940e2d8 100644 --- a/python/make_anndata.py +++ b/python/make_anndata.py @@ -5,7 +5,7 @@ import pandas as pd import logging import sys - +import hnswlib # ########################################################################### # diff --git a/python/run_paga.py b/python/run_paga.py index 5698461..e8f04c6 100644 --- a/python/run_paga.py +++ b/python/run_paga.py @@ -14,7 +14,7 @@ from scipy import sparse import logging import sys - +import igraph # ########################################################################### # diff --git a/tenxutils/R/Helper.R b/tenxutils/R/Helper.R index 566bbbe..6164e68 100644 --- a/tenxutils/R/Helper.R +++ b/tenxutils/R/Helper.R @@ -52,7 +52,7 @@ sprintfResults <- function(results_table, #' @param m_col The column containing the log2 ratio #' @param id_col A column containing unique identifiers #' @param ngenes The number of genes to demarcate -topGenes <- function(data, m_col = "avg_logFC", +topGenes <- function(data, m_col = "avg_log2FC", use_fc = TRUE, id_col="gene", ngenes=7) { @@ -76,7 +76,7 @@ topGenes <- function(data, m_col = "avg_logFC", #' @param m_col The column containing the log2 ratio #' @param ngenes The number of genes to demarcate #' @param id_col A column containing a unique identifier -categoriseGenes <- function(data,m_col="avg_logFC", use_fc=TRUE, +categoriseGenes <- function(data,m_col="avg_log2FC", use_fc=TRUE, p_col="p.adj", p_threshold=0.05, ngenes=7, id_col="gene") diff --git a/tenxutils/R/Plot.R b/tenxutils/R/Plot.R index d8a4db0..8c5380d 100644 --- a/tenxutils/R/Plot.R +++ b/tenxutils/R/Plot.R @@ -134,8 +134,8 @@ plotViolins <- function(data, seurat_object, vncol=4, vnrow=3, use.minfc=FALSE, minfc_col="min_logFC", maxfc_col="max_logFC", - avgfc_col="avg_logFC", - m_col="avg_logFC", p_col="p.adj", + avgfc_col="avg_log2FC", + m_col="avg_log2FC", p_col="p.adj", pt_size=0.1, id_col="gene") { @@ -206,7 +206,7 @@ plotViolins <- function(data, seurat_object, tmp <- tmp[!tmp[[id_col]] %in% genes_a,] ## order by largest fold change - ## tmp <- tmp[rev(order(abs(tmp$avg_logFC))),] + ## tmp <- tmp[rev(order(abs(tmp$avg_log2FC))),] ## subset to positive or negative markers if(type == "positive") @@ -545,7 +545,7 @@ plotDownsampling <- function(matrixUMI, metadata, basename) { #' A function to draw a heatmap of top cluster marker genes with #' subgroup labels. The function uses the "scale.data" slot by default to make the heatmap. #' @param seurat_object A seurat objected with scaled data and cluster information -#' @param marker_table A dataframe containing the marker information. Must contain "cluster", "gene" and "avg_logFC" columns +#' @param marker_table A dataframe containing the marker information. Must contain "cluster", "gene" and "avg_log2FC" columns #' @param n_markers The number of markers to plot #' @param cells_use The names of the cells to use. If NULL all cells will be used #' @param row_names_gp The font size for the gene names @@ -558,7 +558,7 @@ markerComplexHeatmap <- function(seurat_object, n_markers=20, cells_use=NULL, slot="scale.data", - priority="avg_logFC", + priority="avg_log2FC", row_names_gp=10, sub_group=NULL, disp_min=-2.5, @@ -574,10 +574,10 @@ markerComplexHeatmap <- function(seurat_object, "it is avaliable via devtools here: https://github.com/jokergoo/ComplexHeatmap")) } - if(priority=="avg_logFC") { + if(priority=="avg_log2FC") { top_markers <- marker_table %>% group_by(cluster) %>% - top_n(n=n_markers,wt=avg_logFC) + top_n(n=n_markers,wt=avg_log2FC) } else if (priority=="min_logFC") { top_markers <- marker_table %>% group_by(cluster) %>% diff --git a/tenxutils/man/categoriseGenes.Rd b/tenxutils/man/categoriseGenes.Rd index 7c7607a..dd823e7 100644 --- a/tenxutils/man/categoriseGenes.Rd +++ b/tenxutils/man/categoriseGenes.Rd @@ -6,7 +6,7 @@ \usage{ categoriseGenes( data, - m_col = "avg_logFC", + m_col = "avg_log2FC", use_fc = TRUE, p_col = "p.adj", p_threshold = 0.05, diff --git a/tenxutils/man/markerComplexHeatmap.Rd b/tenxutils/man/markerComplexHeatmap.Rd index 9fde54c..a787304 100644 --- a/tenxutils/man/markerComplexHeatmap.Rd +++ b/tenxutils/man/markerComplexHeatmap.Rd @@ -11,7 +11,7 @@ markerComplexHeatmap( n_markers = 20, cells_use = NULL, slot = "scale.data", - priority = "avg_logFC", + priority = "avg_log2FC", row_names_gp = 10, sub_group = NULL, disp_min = -2.5, @@ -21,7 +21,7 @@ markerComplexHeatmap( \arguments{ \item{seurat_object}{A seurat objected with scaled data and cluster information} -\item{marker_table}{A dataframe containing the marker information. Must contain "cluster", "gene" and "avg_logFC" columns} +\item{marker_table}{A dataframe containing the marker information. Must contain "cluster", "gene" and "avg_log2FC" columns} \item{n_markers}{The number of markers to plot} diff --git a/tenxutils/man/plotViolins.Rd b/tenxutils/man/plotViolins.Rd index f76b372..3c73bbc 100644 --- a/tenxutils/man/plotViolins.Rd +++ b/tenxutils/man/plotViolins.Rd @@ -16,8 +16,8 @@ plotViolins( use.minfc = FALSE, minfc_col = "min_logFC", maxfc_col = "max_logFC", - avgfc_col = "avg_logFC", - m_col = "avg_logFC", + avgfc_col = "avg_log2FC", + m_col = "avg_log2FC", p_col = "p.adj", pt_size = 0.1, id_col = "gene" diff --git a/tenxutils/man/topGenes.Rd b/tenxutils/man/topGenes.Rd index b6ad757..eaf6049 100644 --- a/tenxutils/man/topGenes.Rd +++ b/tenxutils/man/topGenes.Rd @@ -4,7 +4,7 @@ \alias{topGenes} \title{Function to add "top" indicator column to degenes} \usage{ -topGenes(data, m_col = "avg_logFC", use_fc = TRUE, id_col = "gene", ngenes = 7) +topGenes(data, m_col = "avg_log2FC", use_fc = TRUE, id_col = "gene", ngenes = 7) } \arguments{ \item{data}{The data}