-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStatmodel.m
101 lines (79 loc) · 2.66 KB
/
Statmodel.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
function [sNet] = Statmodel(gene_names, tf_list, expression_matrix)
distribution = {'Beta','Binomial','BirnbaumSaunders','Burr','Exponential',...
'ExtremeValue','Gamma','GeneralizedExtremeValue','GeneralizedPareto',...
'HalfNormal','InverseGaussian','Kernel','Logistic','Loglogistic',...
'Lognormal','Nakagami','NegativeBinomial','Normal','Poisson','Rayleigh',...
'Rician','Stable','tLocationScale','Weibull'};
tfs = find(ismember(gene_names,tf_list));
tfexpression = expression_matrix(:,tfs);
ngenes = size(expression_matrix,2);
ntf = numel(tfs);
ndist = numel(distribution);
pv = zeros(ngenes,ntf);
for j = 1:ngenes
Y = expression_matrix(:,j);
meanY = mean(Y);
maxY = max(Y);
minY = min(Y);
stY = (Y - meanY)/(maxY - minY);
stYm = stY+abs(min(stY))+1;
parfor k = 1:ndist
warning ('off','all')
try
pd = fitdist(stYm,distribution{k});
[h(k),p(k)] = chi2gof(stYm,'CDF',pd);
catch
h(k)=-Inf;
p(k)=-Inf;
end
end
if find(h==0) == 1
bestfit = 1;
else
[~,bestfit] = max(p);
end
try
bestpdY = fitdist(stYm,distribution{bestfit});
catch
fprintf('Error fitting gene #%d \n',j);
continue
end
parfor n = 1:ntf
X = tfexpression(:,n);
meanX = mean(X);
maxX = max(X);
minX = min(X);
stX = (X - meanX)/(maxX - minX);
Xmp = logical(stX >= mean(stX));
Xmn = logical(stX <= mean(stX));
stYp = stYm(Xmp);
stYn = stYm(Xmn);
if size(stYp)<=size(stYn)
stYt=stYp;
else
stYt=stYn;
end
if abs(mean(stYp) - mean(stYn)) >= 0.05
[h0,p0] = chi2gof(stYt,'CDF',bestpdY);
if h0 == 1
pv(j,n) = p0;
else
continue
end
end
end
end
r = 1;
Net = cell(nnz(pv),3);
for i = 1:ntf
for j = 1:ngenes
if pv(j,i) ~= 0
Net{r,1} = gene_names{tfs(i)};
Net{r,2} = gene_names{j};
Net{r,3} = -log(pv(j,i));
r = r+1;
end
end
end
sNet = sortrows(Net,3,'descend');
end