-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgmm_test.py
More file actions
117 lines (93 loc) · 3.95 KB
/
gmm_test.py
File metadata and controls
117 lines (93 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# coding: utf-8
# In[13]:
#! /usr/bin/env python
'''
code for gaussian mixture solver
based on Kalai STOC'10 paper
Efficiently learning mixtures of two gaussians
'''
from __future__ import division
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn import mixture
import time
from gmm_poly import *
get_ipython().magic(u'matplotlib inline')
class Test(object):
def __init__(self, param, n_samples, seed = 1):
self.seed = seed
self.param = param
self.w1 = param[0]
self.w2 = param[1]
self.mu1 = param[2]
self.mu2 = param[3]
self.sigma1 = param[4]
self.sigma2 = param[5]
self.n_samples = n_samples
self.sample2d, self.sample1d = self.sample(seed = self.seed)
def param_dist(self, est, real):
if type(est).__module__ != 'numpy':
est = np.array(est)
if type(real).__module__ != 'numpy':
real = np.array(real)
return np.sum(np.abs(self.param_reorder(est) - self.param_reorder(real)))
def param_reorder(self, param):
if param[2] > param[3]:
res = [param[1], param[0], param[3], param[2], param[5], param[4]]
else:
res = param
return res
def sample(self, seed = 0):
np.random.seed(seed)
gmix = mixture.GaussianMixture(n_components=2, covariance_type='full')
gmix.fit(np.random.rand(3,1)) # Now it thinks it is trained
gmix.weights_ = np.array([self.w1, self.w2]) # mixture weights (n_components,)
gmix.means_ = np.array([[self.mu1], [self.mu2]]) # mixture means (n_components, 2)
gmix.covariances_ = np.array([[[self.sigma1**2]], [[self.sigma2**2]]]) # mixture cov (n_components, 2, 2)
sample, _ = gmix.sample(self.n_samples)
return (sample, sample.flatten())
def unitest(self, eps, isplot = True):
# fit a Gaussian Mixture Model with two components
start = time.time()
clf = mixture.GaussianMixture(n_components=2, covariance_type='full')
clf.fit(self.sample2d)
end = time.time()
t1 = (end - start)
start = time.time()
test = TwoGaussian(self.sample1d)
#res = test.recoverFromMoments(eps)
res = test.recover1DMixture(eps)
end = time.time()
t2 = (end - start)
#print "Skelearn GMM cost time is: {} sec. \n".format(t1)
#print "Moment method GMM cost time is: {} sec. \n".format(t2)
param1 = self.parseGMM(clf)
param2 = self.parseTwoGmm(res)
dist1 = self.param_dist(param1, self.param)
dist2 = self.param_dist(param2, self.param)
if isplot:
self.plot(param1, param2)
return ([t1, dist1, param1], [t2, dist2, param2])
def parseGMM(self, model):
res = np.round([model.weights_.flatten(),model.means_.flatten(),np.sqrt(model.covariances_.flatten())],4)
return res[0].tolist()+res[1].tolist()+res[2].tolist()
def parseTwoGmm(self, model):
model = np.round(np.array(model), 4)
return model[0].tolist()+model[1].tolist()+model[2].tolist()
def generateMixture(self, bins, param):
assert len(param) == 6
return param[0]*norm.pdf(bins, loc=param[2], scale=param[4]) +param[1]*norm.pdf(bins, loc=param[3], scale=param[5])
def plot(self, param1, param2):
n, bins, patches =plt.hist(self.sample1d, 50, normed=1, facecolor='green', alpha=0.75)
y1 = self.generateMixture(bins, param1)
plot1 = plt.plot(bins, y1, 'r--', linewidth=1, color = 'red')
y2 = self.generateMixture(bins, param2)
plot2 = plt.plot(bins, y2, 'r--', linewidth=1, color = 'blue')
# In[15]:
if __name__ == "__main__":
unitest = Test(param = [0.5, 0.5, 2, 2, 1,2],n_samples=1000, seed = 50000)
print unitest.unitest(0.5, isplot =False)
# In[ ]:
# In[ ]: