-
Notifications
You must be signed in to change notification settings - Fork 0
/
MiniBatchKmeans.py
32 lines (30 loc) · 1.06 KB
/
MiniBatchKmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import time
import numpy as np
from sklearn import datasets
from sklearn.cluster import MiniBatchKMeans
from sklearn.feature_extraction.image import extract_patches_2d
faces = datasets. fetch_olivetti_faces( )
print('Learning the dictionary...')
rng = np.random.RandomState(0)
kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)
patch_size = (20, 20)
buffer = []
to = time.time()
# The online Learning part: cycle over the whole dataset 6 times
index = 0
for _ in range(6):
for img in faces.images:
data = extract_patches_2d(img, patch_size, max_patches=50,random_state=rng)
data = np. reshape(data, (len(data), -1))
buffer.append (data)
index += 1
if index % 10 == 0:
data = np.concatenate(buffer, axis=0)
data -= np.mean(data, axis=0)
data /= np.std(data, axis=0)
kmeans . partial_fit(data)
buffer = []
if index % 100 == 0:
print('Partial fit of %4i out of %i' %(index,6*len(faces.images)))
dt = time.time() - to
print( 'done in %.2fs.' %dt)