-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05_umap_predictions.R
66 lines (43 loc) · 1.62 KB
/
05_umap_predictions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#load packages and set ggplot theme
library(tidyverse)
library(data.table)
library(datawizard)
library(umap)
theme_set(theme_classic())
###load data
predictions <- fread("../data/04_standardised_predictions.csv")
df <- fread("../data/01_combined_standardised.csv")
dim(df)
df <- left_join(df, predictions)
rm(predictions)
print("read in data")
#create batch label so we can see how the different screens combine
df <- df %>%
mutate(Metadata_batch = as.factor(case_when(grepl("pseudo", Metadata_plate) ~ "timecourse",
grepl("dif", Metadata_plate) ~ "dif",
grepl("val", Metadata_plate) ~ "val")))
print("dimensions when joined and batch added:")
dim(df)
#split meta and feature data
f_dat <- df %>%
select(-starts_with("Metadata"))
m_dat <- df %>%
select(starts_with("Metadata"))
rm(df)
dim(f_dat)
### PCA on scaled dataset
data_pca <- prcomp(f_dat)
p1 <- ggplot(as.data.frame(data_pca$x)) +
geom_point(aes(PC1, PC2, color=m_dat$Metadata_doublet), alpha=0.3) +
labs(color="plate")
#scale_color_viridis_d(option="C")
ggsave("../outputs/05_standardised_pca_by_prediction.png", p1, height=6, width = 7)
rm(p1)
umap_df <- data.frame(m_dat, umap(data_pca$x[,1:10])$layout)
rm(list=c("data_pca", "f_dat", "m_dat"))
fwrite(umap_df, "../data/05_umap_predictions.csv")
p2 <- ggplot(umap_df, aes(X1, X2))+
geom_point(aes(colour = Metadata_doublet), alpha = 0.3)+
scale_color_viridis_d()+
labs(colour="")
ggsave("../outputs/05_standardised_umap_by_prediction.png",p2)