-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIntegrationDataSeurat.R
104 lines (72 loc) · 3.12 KB
/
IntegrationDataSeurat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(dplyr)
library(Seurat)
library(ggplot2)
library(sctransform)
library(glmGamPoi)
library(tidyverse)
library(rstudioapi)
setwd(dirname(getActiveDocumentContext()$path))
samples.list <- list.dirs(getwd(),recursive = FALSE)
samples <- basename(samples.list)
remove(samples.list)
if (!file.exists("DataQuality")) {dir.create("DataQuality")}
###Input Data
for (i in samples) {
assign("rawdata",Read10X(i))
assign(i,CreateSeuratObject(rawdata,project = i,min.cells = 3, min.features = 500))
remove("rawdata")
}
datasets <- lapply(samples, get)
names(datasets) <- samples
remove(list=samples)
for (i in seq_along(datasets)){
datasets[[i]][["percent.mt"]] <- PercentageFeatureSet(datasets[[i]], pattern = "^MT-")
}
### Data quality check
for (i in seq_along(datasets)){
pdf(file = paste("DataQuality/",samples[i],"_Vlnplot_beforeQC.pdf",sep = ""),title="Vlnplot_QC", width=20)
VlnPlot(datasets[[i]], features = c("nFeature_RNA","nCount_RNA","percent.mt"),ncol = 3)
dev.off()
pdf(file = paste("DataQuality/",samples[i],"_FeatureScatter_beforeQC.pdf",sep = ""),title="FeatureScatter_QC", width=20)
plot1 <- FeatureScatter(datasets[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(datasets[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(plot1 + plot2)
dev.off()
}
remove(list = c("plot1","plot2"))
##Quality Control
f <- function(x) median(x) + 3*mad(x)
for (i in seq_along(datasets)){
stats <- datasets[[i]]@meta.data[, c("nCount_RNA", "nFeature_RNA","percent.mt")]
bounds <- apply(stats, 2,f)
datasets[[i]] <- subset(datasets[[i]], subset = nFeature_RNA >200 &
nCount_RNA < bounds[1] &
nFeature_RNA < bounds[2] &
percent.mt < bounds[3])
remove(list = c("stats","bounds"))
}
### Data quality check after that cells are filtered out through QC metric
for (i in seq_along(datasets)){
pdf(file = paste("DataQuality/",samples[i],"_Vlnplot_afterQC.pdf",sep = ""),title="Vlnplot_QC", width=20)
VlnPlot(datasets[[i]], features = c("nFeature_RNA","nCount_RNA","percent.mt"),ncol = 3)
dev.off()
pdf(file = paste("DataQuality/",samples[i],"_FeatureScatter_afterQC.pdf",sep = ""),title="FeatureScatter_QC", width=20)
plot1 <- FeatureScatter(datasets[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(datasets[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(plot1 + plot2)
dev.off()
}
remove(list = c("plot1","plot2"))
###Normalization
datasets <-lapply(datasets,\(i) SCTransform(i, vst.flavor = "v2", verbose = FALSE))
###select repeatedly variable features
features <- SelectIntegrationFeatures(object.list = datasets,nfeatures = 5000)
datasets <- lapply(X = datasets, FUN = function(x) {
x <- ScaleData(x, features = features, verbose = FALSE)
x <- RunPCA(x, features = features, verbose = FALSE)
})
### Integration
Anchors <- FindIntegrationAnchors(object.list = datasets, anchor.features = features, reduction = "rpca")
Combined <- IntegrateData(anchorset = Anchors)
#save (Run HPC)
save("DataSeurat.RData",list = c("datasets","Combined"))