-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSimulation_MAIN.R
115 lines (83 loc) · 3.13 KB
/
Simulation_MAIN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
### This simulation Checks if the model can predict well with 10 Dimensions
### Also CHECK THE TIME REQUIRED FOR THE MODEL
### Remove The Past
rm(list = ls())
#################################### SIMULATED DATA PROPERTIES ####################################################
## Number of points
N.test = 100
N.train = 100
## Number of Clusters
F = 2
## Distribution of the points within three clusters
p.dist = c(0.5,0.5)
## Total Number of features D
D = 20
## Total Percentage of irrelevant feature
prob.noise.feature = 0.2
## Overlap between Cluster of molecular Data of the relevant features
prob.overlap = 0.01
###### Get the Data #####################################
## Initialize the Training Data
source('simulateDPMM.R')
simulateDPMM()
####### Pre process the data ###############
#source('preprocessDPMM.R')
#preprocessDPMM()
###### Use a Penalized LASSO to get to know which features are relevant##############
# smod <- Surv(exp(time), censoring)
# reg.pcox <- cv.glmnet(x = Y.dat, y = time)
# ind.rel <- unlist(predict(object =reg.pcox, newx = Y, s = mean(reg.pcox$lambda[28:30]),type = "nonzero"))
# Y <- Y.dat[,ind.rel]
# Y.new <- Y.new.dat[,ind.rel]
# D <- ncol(Y)
# pc <- prcomp(Y)
# pc.pred <- predict(pc,newdata = Y)
# plot(pc.pred[,1], pc.pred[,2], pch = 19, col =c.true, main = 'Main Training Data After Preprocessing')
######### OR Don't preprocess use this ########3
Y <- Y.dat
Y.new <- Y.new.dat
############################# PARAMETERS for GIBB's SAMPLING ####
iter = 100
iter.burnin = 50
iter.thin = 5
k = 2
######################### Initialize the Parameters ################
source('initializeDPMM.R')
initializeDPMM()
######### Ground Truth ##############################
source('SIMgroundtruth.R')
SIMgroundtruth()
source('iSIMgroundtruth.R')
iSIMgroundtruth()
########### Train the Model #########################################
source('burninDPMM.R')
burninDPMM()
source('gibbsDPMM.R')
gibbsDPMM()
########## Analyze the fit ##########################################
### Good feature selection from heatmap plus cindex plus randindex
source('SIManalyzeDPMM.R')
SIManalyzeDPMM()
######## Predict on New Data Set #####################################
source('predictCLASS.R')
predictCLASS(Y.new, time.new)
## Check the predicted Rand Index
print(posteriorprob)
test.randindex <- adjustedRandIndex(apply(posteriorprob,1,which.max),c.true.new)
######## Predict on New Data Set BASED ON JUST THE MOLECULAR DATA #####################################
source('predictCLUSTER.R')
predictCLUSTER(Y.new)
## Check the predicted Rand Index
print(posteriorprobMOL)
i.test.randindex <- adjustedRandIndex(apply(posteriorprobMOL,1,which.max),c.true.new)
source('predictchineseAFTtime.R')
predictchineseAFTtime(Y.new)
### Check of the Predicted C-index
predicted.cindex <- survConcordance(Surv(exp(time.new),censoring.new) ~ exp(-post.time.avg))[1]
########### Check Prediction Ground Truth
source('predictionGroundTruth.R')
predictionGroundTruth()
######### Use PreMiuM ##############3
source('premium.R')
direc <- as.character('/home/bit/ashar/ownCloud/Research/DPMMSIMULATIONS/OneView/D20Noise20perOverlap01per/premium')
premium(direc)