diff --git a/r_notebooks/Seurat_integrate.Rmd b/r_notebooks/Seurat_integrate.Rmd new file mode 100644 index 00000000000..74c2e12e9be --- /dev/null +++ b/r_notebooks/Seurat_integrate.Rmd @@ -0,0 +1,233 @@ +--- +title: "R Seurat Integration Notebook 1" +output: html_notebook +--- + + +Load libraries: +```{r} +library(Seurat) +library(future) +options(future.globals.maxSize=990 * 1024^2) +library(dplyr) +``` +Load data: +```{r} +controls=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_controls/outs/filtered_feature_bc_matrix/",sep="")) +treated=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_treated/outs/filtered_feature_bc_matrix/",sep="")) +``` +Create Seurat objects and perfrom SC Transformation: +```{r} + seu_list = c(controls,treated) + annot_list = c("controls", "treated") + +i=1 + seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 ) + seu_list[[i]]@meta.data[,"sample"] <- annot_list[i] + + seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt") + #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20]) + #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE) + seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 800 & percent.mt < 20 ) + + # SCTransform replaces NormalizeData, FindVariableFeatures, ScaleData + # DO NOT run ScaleData after SCTransform + seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt") + +i=2 + +seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 ) + seu_list[[i]]@meta.data[,"sample"] <- annot_list[i] + + seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt") + #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20]) + #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE) + seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 300 & percent.mt < 20 ) + + seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt") +``` +Perform the integration of treated and controls: +```{r} +seu_features <- SelectIntegrationFeatures(object.list = seu_list, nfeatures = 3000) +seu_list <- PrepSCTIntegration(object.list = seu_list, anchor.features = seu_features, verbose = FALSE) + +# considering 80 nearest neighbors when filtering anchors <- close to upper limit for smallest sample +anchors <- FindIntegrationAnchors(object.list = seu_list, normalization.method = "SCT", anchor.features = seu_features, verbose = FALSE, k.filter = 50) +seu <- IntegrateData(anchorset = anchors, normalization.method = "SCT", verbose = FALSE) +``` + +look at normalised data: +```{r} +seu[["integrated"]]@scale.data +seu[["integrated"]]@counts +seu[["integrated"]]@data +``` +Run PCA: +```{r} +seu <- RunPCA(seu, features = VariableFeatures(seu)) +``` +Identify clusters and run UMAP: +```{r} +seu <-FindNeighbors(seu, dims = 1:10) +seu <- FindClusters(seu, resolution = 0.5) +seu <- RunUMAP(seu, dims = 1:10) +``` +Plot UMAP reduction, labelling cells by cluster or by group: +```{r} +dimplot1=DimPlot(seu, reduction = "umap",label = 1) +seu$group=seu@meta.data[,"sample"] +#dimplot2=DimPlot(seu, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange")) +dimplot2=DimPlot(seu, reduction = "umap",label = 1,split.by = "group") +#grid.arrange(dimplot1,dimplot2) +dimplot2 +``` + +seu2=seu +seu2$group=factor(seu2$group,levels=c("controls","treated")) +#dimplot3=DimPlot(seu2, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange")) +dimplot3=DimPlot(seu2, reduction = "umap",label = 1,group.by = "group",cols=c("red","cyan2")) + +Save the plots: +```{r} +png(paste("~/Dim_plot_aggr_controls_treated_300_800.png",sep="_"),res = 600,width=12,height=10,units='in') +grid.arrange(dimplot1,dimplot3) +dev.off() +VlnPlot(seu, features = c( "Col1a1", "Col3a1", "Col5a1", "Col12a1", "Dcn", "Fbln2"),ncol = 2) +``` +Identify markers across clusters: +```{r} +seu.markers <- FindAllMarkers(seu, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) +top10 <- seu.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) + +id="aggr_control_treated" +threshold="300_800" +pct_mt=20 + +markers_filtmat=seu.markers[seu.markers$p_val_adj<0.05 & seu.markers$avg_logFC>=1,] +write.table(markers_filtmat,file=paste("~/Seurat",id,threshold,"selected_markers.csv",sep="_"),sep=",") +write.table(top10,file=paste("~/Seurat",id,threshold,"_top10_selected_markers.csv",sep="_"),sep=",") + +clust_control_count=1:19 +for (i in 0:18) { +clust_control_count[i+1]=sum(seu$seurat_clusters[seu$group=="controls"]==i) +} + +clust_count=1:19 +for (i in 0:18) { +clust_count[i+1]=sum(seu$seurat_clusters==i) +} +``` +Identify markers between groups: +```{r} +markers_CT=FindMarkers(seu, ident.1="controls",group.by="group") + +subsetC=subset(seu,cells=labels(seu$group[seu$group=="controls"])) +subsetT=subset(seu,cells=labels(seu$group[seu$group=="treated"])) +``` +Look for genes in the selected features: +```{r fig.width=8, fig.height=5} +seu_features[grep("Pall",seu_features)] +``` +markers1=c("Notch1","Notch4","Nav3") #c("Igfbp2","Odc1","Il17f","Notch1","Notch4","Nav3") + +```{r fig.width=5, fig.height=7.5} +VlnPlot(seu, features = c( "Cd207" ,"Il1b","Il17f","S100a4"),ncol = 2,group.by = "group",pt.size = 0.2) +``` + +```{r fig.width=8, fig.height=7} +v1=VlnPlot(subsetC, features = markers1,ncol = 1) +v2=VlnPlot(subsetT, features = markers1,ncol = 1) + +grid.arrange(arrangeGrob(v1,top =text_grob("controls",size=20 ) ) , arrangeGrob(v2,top =text_grob("treated", size=20) ) ,nrow=1) +``` + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_kerat_granular.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Flg2", "Lor"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_langerhans.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd207"),ncol = 1) +dev.off() + +plot n_features across clusters: + +```{r fig.width=8, fig.height=6} +data2=data.frame(cluster=factor(seu$seurat_clusters),n_features=as.vector(seu$nFeature_SCT)) +row.names(data2)=c() +ggplot(data=data2, aes(x=cluster,y=n_features))+geom_violin(aes(x=cluster,y=n_features,fill=cluster)) +``` + +```{r} +VlnPlot(seu, features = c( "Gata3", "Tbx21"),ncol = 1) +``` + +```{r} +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD4_helper.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Gata3", "Tbx21", "Ccl4", "Cxcr6"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD4.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_exhausted.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cxcl13", "Pdcd1", "Ctla4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_naive.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Tcf7", "Ccr7", "Lef1", "Il7r", "Il6st", "Foxo1","Myc"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_memoryCD4_and_monocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd14","Lyz2","S100a4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_epithelial.png",sep="_"),res = 600,width=18,height=12,units='in') +print(VlnPlot(seu, features = c( "Krt17", "Krt79", "Cd200", "Lrig1"),ncol = 1)) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_stem.png",sep="_"),res = 600,width=18,height=12,units='in') +print(VlnPlot(seu, features = c( "Cd34", "Lgr5", "Lrig1", "Krt14"),ncol = 1)) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +VlnPlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(pbmc, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3) +dev.off() +#FeaturePlot +#FeaturePlot(pbmc, features =c("Gzma", "Gzmb", "Gzmh", "Gzmk", "Gzmm", "Prf1", "Nkg7", "Klrd1", "Gnly"),ncol=3) +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_cd8_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Gzma", "Gzmb", "Gzmm", "Prf1", "Nkg7", "Klrd1"),ncol=3) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=3) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_epithelial.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Krt17", "Krt79", "Cd44", "Cd200", "Lrig1"),ncol=2) +dev.off() +``` + + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). + diff --git a/r_notebooks/Seurat_integrate_300_600.Rmd b/r_notebooks/Seurat_integrate_300_600.Rmd new file mode 100644 index 00000000000..91f01c14ded --- /dev/null +++ b/r_notebooks/Seurat_integrate_300_600.Rmd @@ -0,0 +1,235 @@ +--- +title: "R Seurat Integration Notebook 1" +output: html_notebook +--- + + +Load libraries: +```{r} +library(Seurat) +library(future) +options(future.globals.maxSize=990 * 1024^2) +library(dplyr) +library(gridExtra) +``` +Load data: +```{r} +controls=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_controls/outs/filtered_feature_bc_matrix/",sep="")) +treated=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_treated/outs/filtered_feature_bc_matrix/",sep="")) +``` +Create Seurat objects and perfrom SC Transformation: +```{r} + seu_list = c(controls,treated) + annot_list = c("controls", "treated") + +i=1 + seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 ) + seu_list[[i]]@meta.data[,"sample"] <- annot_list[i] + + seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt") + #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20]) + #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE) + seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 600 & percent.mt < 20 ) + + # SCTransform replaces NormalizeData, FindVariableFeatures, ScaleData + # DO NOT run ScaleData after SCTransform + seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt") + +i=2 + +seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 ) + seu_list[[i]]@meta.data[,"sample"] <- annot_list[i] + + seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt") + #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20]) + #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE) + seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 300 & percent.mt < 20 ) + + seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt") +``` +Perform the integration of treated and controls: +```{r} +seu_features <- SelectIntegrationFeatures(object.list = seu_list, nfeatures = 3000) +seu_list <- PrepSCTIntegration(object.list = seu_list, anchor.features = seu_features, verbose = FALSE) + +# considering 80 nearest neighbors when filtering anchors <- close to upper limit for smallest sample +anchors <- FindIntegrationAnchors(object.list = seu_list, normalization.method = "SCT", anchor.features = seu_features, verbose = FALSE, k.filter = 50) +seu <- IntegrateData(anchorset = anchors, normalization.method = "SCT", verbose = FALSE) +``` + +look at normalised data: +```{r} +seu[["integrated"]]@scale.data +seu[["integrated"]]@counts +seu[["integrated"]]@data +``` +Run PCA: +```{r} +seu <- RunPCA(seu, features = VariableFeatures(seu)) +``` +Identify clusters and run UMAP: +```{r} +seu <-FindNeighbors(seu, dims = 1:10) +seu <- FindClusters(seu, resolution = 0.5) +seu <- RunUMAP(seu, dims = 1:10) +``` +Plot UMAP reduction, labelling cells by cluster or by group: +```{r} +dimplot1=DimPlot(seu, reduction = "umap",label = 1) +seu$group=seu@meta.data[,"sample"] +#dimplot2=DimPlot(seu, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange")) +dimplot2=DimPlot(seu, reduction = "umap",label = 1,split.by = "group") +#grid.arrange(dimplot1,dimplot2) +dimplot2 +``` + +```{r} +#seu2=seu +#seu2$group=factor(seu2$group,levels=c("controls","treated")) +#dimplot3=DimPlot(seu2, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange")) +dimplot3=DimPlot(seu, reduction = "umap",label = 1,group.by = "group",cols=c("blue","orange")) +``` +Save the plots: +```{r fig.width=7, fig.height=10} +png(paste("~/Dim_plot_aggr_controls_treated_300_600.png",sep="_"),res = 600,width=12,height=10,units='in') +grid.arrange(dimplot1,dimplot3) +dev.off() +VlnPlot(seu, features = c( "Col1a1", "Col3a1", "Col5a1", "Col12a1", "Dcn", "Fbln2"),ncol = 2) +``` +Identify markers across clusters: +```{r} +seu.markers <- FindAllMarkers(seu, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) +top10 <- seu.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) + +id="aggr_control_treated" +threshold="300_800" +pct_mt=20 + +markers_filtmat=seu.markers[seu.markers$p_val_adj<0.05 & seu.markers$avg_logFC>=1,] +write.table(markers_filtmat,file=paste("~/Seurat",id,threshold,"selected_markers.csv",sep="_"),sep=",") +write.table(top10,file=paste("~/Seurat",id,threshold,"_top10_selected_markers.csv",sep="_"),sep=",") + +clust_control_count=1:19 +for (i in 0:18) { +clust_control_count[i+1]=sum(seu$seurat_clusters[seu$group=="controls"]==i) +} + +clust_count=1:19 +for (i in 0:18) { +clust_count[i+1]=sum(seu$seurat_clusters==i) +} +``` +Identify markers between groups: +```{r} +markers_CT=FindMarkers(seu, ident.1="controls",group.by="group") + +subsetC=subset(seu,cells=labels(seu$group[seu$group=="controls"])) +subsetT=subset(seu,cells=labels(seu$group[seu$group=="treated"])) +``` +Look for genes in the selected features: +```{r fig.width=8, fig.height=5} +seu_features[grep("Pall",seu_features)] +``` +markers1=c("Notch1","Notch4","Nav3") #c("Igfbp2","Odc1","Il17f","Notch1","Notch4","Nav3") + +```{r fig.width=5, fig.height=7.5} +VlnPlot(seu, features = c( "Cd207" ,"Il1b","Il17f","S100a4"),ncol = 2,group.by = "group",pt.size = 0.2) +``` + +```{r fig.width=8, fig.height=7} +v1=VlnPlot(subsetC, features = markers1,ncol = 1) +v2=VlnPlot(subsetT, features = markers1,ncol = 1) + +grid.arrange(arrangeGrob(v1,top =text_grob("controls",size=20 ) ) , arrangeGrob(v2,top =text_grob("treated", size=20) ) ,nrow=1) +``` + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_kerat_granular.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Flg2", "Lor"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_langerhans.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd207"),ncol = 1) +dev.off() + +plot n_features across clusters: + +```{r fig.width=8, fig.height=6} +data2=data.frame(cluster=factor(seu$seurat_clusters),n_features=as.vector(seu$nFeature_SCT)) +row.names(data2)=c() +ggplot(data=data2, aes(x=cluster,y=n_features))+geom_violin(aes(x=cluster,y=n_features,fill=cluster)) +``` + +```{r} +VlnPlot(seu, features = c( "Gata3", "Tbx21"),ncol = 1) +``` + +```{r} +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD4_helper.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Gata3", "Tbx21", "Ccl4", "Cxcr6"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD4.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_exhausted.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cxcl13", "Pdcd1", "Ctla4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_naive.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Tcf7", "Ccr7", "Lef1", "Il7r", "Il6st", "Foxo1","Myc"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_memoryCD4_and_monocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Cd14","Lyz2","S100a4"),ncol = 1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_epithelial.png",sep="_"),res = 600,width=18,height=12,units='in') +print(VlnPlot(seu, features = c( "Krt17", "Krt79", "Cd200", "Lrig1"),ncol = 1)) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_stem.png",sep="_"),res = 600,width=18,height=12,units='in') +print(VlnPlot(seu, features = c( "Cd34", "Lgr5", "Lrig1", "Krt14"),ncol = 1)) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=18,height=12,units='in') +VlnPlot(seu, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +VlnPlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(pbmc, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2) +dev.off() + +png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3) +dev.off() +#FeaturePlot +#FeaturePlot(pbmc, features =c("Gzma", "Gzmb", "Gzmh", "Gzmk", "Gzmm", "Prf1", "Nkg7", "Klrd1", "Gnly"),ncol=3) +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_cd8_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Gzma", "Gzmb", "Gzmm", "Prf1", "Nkg7", "Klrd1"),ncol=3) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=3) +dev.off() + +png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_epithelial.png",sep="_"),res = 600,width=12,height=6,units='in') +FeaturePlot(seu, features =c("Krt17", "Krt79", "Cd44", "Cd200", "Lrig1"),ncol=2) +dev.off() +``` + + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). + diff --git a/r_notebooks/Seurat_integrate_300_600.nb.html b/r_notebooks/Seurat_integrate_300_600.nb.html new file mode 100644 index 00000000000..5be4059bc18 --- /dev/null +++ b/r_notebooks/Seurat_integrate_300_600.nb.html @@ -0,0 +1,2229 @@ + + + + +
+ + + + + + + + + +Load libraries:
+ + + +library(gridExtra)
+
+
+
+
+Attaching package: ‘gridExtra’
+
+The following object is masked from ‘package:dplyr’:
+
+ combine
+
+
+
+Load data:
+ + + +controls=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_controls/outs/filtered_feature_bc_matrix/",sep=""))
+treated=Read10X(data.dir = paste("/icgc/dkfzlsdf/analysis/B210/Luca/scRNA_test_015035/combined/combine_indexes/aggr_treated/outs/filtered_feature_bc_matrix/",sep=""))
+
+
+
+Create Seurat objects and perfrom SC Transformation:
+ + + + seu_list = c(controls,treated)
+ annot_list = c("controls", "treated")
+
+i=1
+ seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 )
+ seu_list[[i]]@meta.data[,"sample"] <- annot_list[i]
+
+ seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt")
+ #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20])
+ #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE)
+ seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 600 & percent.mt < 20 )
+
+ # SCTransform replaces NormalizeData, FindVariableFeatures, ScaleData
+ # DO NOT run ScaleData after SCTransform
+ seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt")
+
+i=2
+
+seu_list[[i]] <- CreateSeuratObject( seu_list[[i]], min.features = 100 )
+ seu_list[[i]]@meta.data[,"sample"] <- annot_list[i]
+
+ seu_list[[i]] <- PercentageFeatureSet(seu_list[[i]], pattern = "^mt-", col.name = "percent.mt")
+ #high_mt_cells = names(seu_list[[i]]$nFeature_RNA[seu_list[[i]]$percent.mt >= 20])
+ #seu_list[[i]] = subset(seu_list[[i]], cells = high_mt_cells, invert = TRUE)
+ seu_list[[i]] <- subset(seu_list[[i]], subset = nFeature_RNA >= 300 & percent.mt < 20 )
+
+ seu_list[[i]] <- SCTransform(seu_list[[i]], verbose = FALSE, conserve.memory = FALSE, vars.to.regress = "percent.mt")
+
+
+
+Perform the integration of treated and controls:
+ + + +seu_features <- SelectIntegrationFeatures(object.list = seu_list, nfeatures = 3000)
+seu_list <- PrepSCTIntegration(object.list = seu_list, anchor.features = seu_features, verbose = FALSE)
+
+
+# considering 80 nearest neighbors when filtering anchors <- close to upper limit for smallest sample
+anchors <- FindIntegrationAnchors(object.list = seu_list, normalization.method = "SCT", anchor.features = seu_features, verbose = FALSE, k.filter = 50)
+
+
+Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.
+
+
+seu <- IntegrateData(anchorset = anchors, normalization.method = "SCT", verbose = FALSE)
+
+
+
+look at normalised data:
+ + + +seu[["integrated"]]@scale.data
+seu[["integrated"]]@counts
+seu[["integrated"]]@data
+
+
+
+Run PCA:
+ + + +seu <- RunPCA(seu, features = VariableFeatures(seu))
+
+
+
+PC_ 1
+Positive: Fth1, Vim, Rgs1, Cd74, Ftl1, Tmsb4x, H2-Eb1, H2-Aa, H2-Ab1, Srgn
+ Tmsb10, Ubb, Crip1, Cxcl2, Lgals1, Rgs2, S100a4, Il7r, Slc3a2, Cd52
+ Cytip, Ifrd1, Fxyd5, Cd207, Dennd4a, Gadd45b, Odc1, Ctla2a, Arl4c, Hspa8
+Negative: Lgals7, Krt14, Perp, Apoe, Sfn, Ly6d, Dmkn, Hspb1, Krtdap, Krt10
+ Krt5, Stfa3, Krt17, BC100530, S100a14, Calm4, Krt1, Cstb, Fabp5, Dsp
+ Fxyd3, Dstn, Serpinb2, Sprr1a, Tacstd2, Serpinb5, Lypd3, Sbsn, Defb6, Rplp0
+PC_ 2
+Positive: Tmsb4x, Rgs1, Cd74, H2-Aa, H2-Eb1, Actg1, H2-Ab1, Srgn, Rplp0, Lgals7
+ Actb, Il7r, Cytip, Rgs2, Cd207, Cd52, Calm1, Sh3bgrl3, H2afz, Fth1
+ Cstb, H3f3b, Dennd4a, Vps37b, Sub1, Perp, Odc1, Krt14, Traf1, Slc3a2
+Negative: Col1a1, Dcn, Col1a2, Col3a1, Sparc, Tnfaip6, Lum, Ccl2, Mt1, Ccl7
+ Bgn, Mfap4, Fosb, Aebp1, Egr1, Gsn, Meg3, Dusp1, Zfp36, Igfbp5
+ Gadd45g, Timp2, Igfbp7, Jun, Cilp, Serping1, Ctsk, Jund, Hspa1a, Hspa1b
+PC_ 3
+Positive: Ctla2a, Tmsb10, Cd3g, Rgs2, Trdc, Ets1, Icos, Cd3e, Vps37b, Srgn
+ Emb, Cd3d, Ptprc, Nkg7, Gem, Neurl3, Xcl1, Dennd4a, Gm42418, Tnfaip3
+ Rora, Ubald2, Cd7, Il2rg, Cxcr6, Fosl2, Ctsw, Cd2, Stk17b, Gnas
+Negative: Cd74, H2-Eb1, H2-Aa, H2-Ab1, Cd207, Lgals3, Cxcl2, Cst3, Ftl1, Ifrd1
+ Il1r2, Ndrg1, Cxcl16, Basp1, Mfge8, Mt1, Tnfaip2, Ctsz, Pgf, Cd83
+ Csf2rb, Tyrobp, Tbc1d4, Sqstm1, Cdkn1a, Il1b, Cd86, Ctss, Alox5ap, Vim
+PC_ 4
+Positive: Serpinb2, BC100530, Ccdc71l, Stfa3, Ifi202b, Pim1, Col1a1, Plet1, Actg1, Dcn
+ Col1a2, Lypd3, Col3a1, Anxa1, Rplp0, Actb, Irf6, Odc1, Krt16, Cstb
+ Ccl2, Sparc, Uba52, Tacstd2, Klf5, Tmsb10, Tnfaip6, Ptgs2, Ndrg1, Lum
+Negative: Hspa1a, Jun, Fos, Hspa1b, Hsp90aa1, Gm42418, Atf3, Jund, Hspb1, Btg2
+ Ubc, Dnajb1, Fosb, Egr1, Hsp90ab1, Zfp36, Junb, mt-Nd4, Hspa8, Dnaja1
+ Ubb, mt-Cytb, Krt15, Klf6, Hsph1, mt-Co2, mt-Atp6, Ier2, H3f3b, Neat1
+PC_ 5
+Positive: Defb6, Krt79, Cst6, Sprr1a, Krt17, Krtdap, Gm42418, Calm4, Sbsn, Ly6g6c
+ Fabp5, Neat1, Dbi, Dsp, Klk7, Junb, Cstb, Krt10, Klf4, Alox12e
+ Cebpb, Gm26917, Sdc1, mt-Co3, Zfp36, mt-Co1, Gltp, Clec2d, Btg2, Ly6d
+Negative: Krt14, Krt5, Rplp0, S100a6, Fth1, Krt15, Hsp90ab1, S100a10, Stfa3, Hsp90aa1
+ Mt1, Ubb, Mt2, Ifitm3, Serpinb2, Hspa8, Apoe, BC100530, Tmsb4x, Hspe1
+ Col17a1, Igfbp3, Ubc, Hspa1a, Hspb1, Ifi202b, Dnajb1, Ccdc71l, Ndufa4l2, Actg1
+
+
+
+Identify clusters and run UMAP:
+ + + +seu <-FindNeighbors(seu, dims = 1:10)
+
+
+
+Computing nearest neighbor graph
+Computing SNN
+
+
+seu <- FindClusters(seu, resolution = 0.5)
+
+
+Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
+
+Number of nodes: 11714
+Number of edges: 337686
+
+Running Louvain algorithm...
+
+
+0% 10 20 30 40 50 60 70 80 90 100%
+[----|----|----|----|----|----|----|----|----|----|
+**************************************************|
+
+
+Maximum modularity in 10 random starts: 0.9316
+Number of communities: 20
+Elapsed time: 1 seconds
+
+
+seu <- RunUMAP(seu, dims = 1:10)
+
+
+The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
+To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
+This message will be shown once per session15:51:53 Read 11714 rows and found 10 numeric columns
+15:51:53 Using Annoy for neighbor search, n_neighbors = 30
+15:51:53 Building Annoy index with metric = cosine, n_trees = 50
+0% 10 20 30 40 50 60 70 80 90 100%
+[----|----|----|----|----|----|----|----|----|----|
+**************************************************|
+15:51:58 Writing NN index file to temp file /tmp/RtmpY49ENJ/file6bfe9677dfc
+15:51:58 Searching Annoy index using 1 thread, search_k = 3000
+15:52:04 Annoy recall = 100%
+15:52:05 Commencing smooth kNN distance calibration using 1 thread
+15:52:07 Initializing from normalized Laplacian + noise
+15:52:08 Commencing optimization for 200 epochs, with 442470 positive edges
+0% 10 20 30 40 50 60 70 80 90 100%
+[----|----|----|----|----|----|----|----|----|----|
+**************************************************|
+15:52:28 Optimization finished
+
+
+
+Plot UMAP reduction, labelling cells by cluster or by group:
+ + + +dimplot1=DimPlot(seu, reduction = "umap",label = 1)
+
+
+
+Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
+Please use `as_label()` or `as_name()` instead.
+[90mThis warning is displayed once per session.[39m
+
+
+seu$group=seu@meta.data[,"sample"]
+#dimplot2=DimPlot(seu, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange"))
+dimplot2=DimPlot(seu, reduction = "umap",label = 1,split.by = "group")
+#grid.arrange(dimplot1,dimplot2)
+dimplot2
+
+
+#seu2=seu
+#seu2$group=factor(seu2$group,levels=c("controls","treated"))
+#dimplot3=DimPlot(seu2, reduction = "umap",label = 1,group.by = "group",cols=c("blue3","orange"))
+dimplot3=DimPlot(seu, reduction = "umap",label = 1,group.by = "group",cols=c("blue","orange"))
+
+
+
+Save the plots:
+ + + +Identify markers across clusters:
+ + + + +Identify markers between groups:
+ + + +markers_CT=FindMarkers(seu, ident.1="controls",group.by="group")
+
+
+
+
+ | | 0 % ~calculating
+ |++++ | 7 % ~07s
+ |++++++++ | 14% ~05s
+ |+++++++++++ | 21% ~05s
+ |+++++++++++++++ | 29% ~05s
+ |++++++++++++++++++ | 36% ~04s
+ |++++++++++++++++++++++ | 43% ~04s
+ |+++++++++++++++++++++++++ | 50% ~03s
+ |+++++++++++++++++++++++++++++ | 57% ~03s
+ |+++++++++++++++++++++++++++++++++ | 64% ~02s
+ |++++++++++++++++++++++++++++++++++++ | 71% ~02s
+ |++++++++++++++++++++++++++++++++++++++++ | 79% ~01s
+ |+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
+ |+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
+ |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed = 06s
+
+
+
+Look for genes in the selected features:
+ + + +seu_features[grep("Pall",seu_features)]
+
+
+
+markers1=c(“Notch1”,“Notch4”,“Nav3”) #c(“Igfbp2”,“Odc1”,“Il17f”,“Notch1”,“Notch4”,“Nav3”)
+ + + +v1=VlnPlot(subsetC, features = markers1,ncol = 1)
+v2=VlnPlot(subsetT, features = markers1,ncol = 1)
+
+grid.arrange(arrangeGrob(v1,top =text_grob("controls",size=20 ) ) , arrangeGrob(v2,top =text_grob("treated", size=20) ) ,nrow=1)
+
+
+
+
+
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_kerat_granular.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Flg2", "Lor"),ncol = 1)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_langerhans.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Cd207"),ncol = 1)
+dev.off()
+
+
+
+plot n_features across clusters:
+ + + +data2=data.frame(cluster=factor(seu$seurat_clusters),n_features=as.vector(seu$nFeature_SCT))
+row.names(data2)=c()
+ggplot(data=data2, aes(x=cluster,y=n_features))+geom_violin(aes(x=cluster,y=n_features,fill=cluster))
+
+
+
+
+
+
+VlnPlot(seu, features = c( "Gata3", "Tbx21"),ncol = 1)
+
+
+
+
+
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD4.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Cd4"),ncol = 1)
+dev.off()
+
+
+
+
+
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_exhausted.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Cxcl13", "Pdcd1", "Ctla4"),ncol = 1)
+dev.off()
+
+
+
+
+
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_CD8_naive.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Tcf7", "Ccr7", "Lef1", "Il7r", "Il6st", "Foxo1","Myc"),ncol = 1)
+dev.off()
+
+
+
+
+
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_memoryCD4_and_monocytes.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Cd14","Lyz2","S100a4"),ncol = 1)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in')
+FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_keratinocytes.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=1)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_epithelial.png",sep="_"),res = 600,width=18,height=12,units='in')
+print(VlnPlot(seu, features = c( "Krt17", "Krt79", "Cd200", "Lrig1"),ncol = 1))
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Vln_plot_markers_stem.png",sep="_"),res = 600,width=18,height=12,units='in')
+print(VlnPlot(seu, features = c( "Cd34", "Lgr5", "Lrig1", "Krt14"),ncol = 1))
+dev.off()
+
+png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=18,height=12,units='in')
+VlnPlot(seu, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2)
+dev.off()
+
+png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Vln_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in')
+VlnPlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3)
+dev.off()
+
+png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in')
+FeaturePlot(pbmc, features = c( "Nkg7", "Gzmb", "Ifng", "Prf1"),ncol = 2)
+dev.off()
+
+png(paste("~/plot_seurat_",id,threshold,pct_mt,"_Feature_plot_markers_cd4_T_helper_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in')
+FeaturePlot(seu, features = c( "Gata3", "Tbx21", "Eomes", "Ccl4", "Ccl5", "Cxcr6"),ncol = 3)
+dev.off()
+#FeaturePlot
+#FeaturePlot(pbmc, features =c("Gzma", "Gzmb", "Gzmh", "Gzmk", "Gzmm", "Prf1", "Nkg7", "Klrd1", "Gnly"),ncol=3)
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_cd8_T_cytotoxic_Abdul.png",sep="_"),res = 600,width=12,height=6,units='in')
+FeaturePlot(seu, features =c("Gzma", "Gzmb", "Gzmm", "Prf1", "Nkg7", "Klrd1"),ncol=3)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_keratinocytes.png",sep="_"),res = 600,width=12,height=6,units='in')
+FeaturePlot(seu, features =c("Krt5","Krt10","Krt14" ,"Ptgs1"),ncol=3)
+dev.off()
+
+png(paste("~/plot_seurat",id,threshold,pct_mt,"Feature_plot_markers_epithelial.png",sep="_"),res = 600,width=12,height=6,units='in')
+FeaturePlot(seu, features =c("Krt17", "Krt79", "Cd44", "Cd200", "Lrig1"),ncol=2)
+dev.off()
+
+
+
+When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
+ + +