Skip to content

Commit 7196b17

Browse files
committed
add RSLDA code
0 parents  commit 7196b17

File tree

4 files changed

+379
-0
lines changed

4 files changed

+379
-0
lines changed

PCA1.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import os
2+
import pickle
3+
from mySVD import mySVD
4+
from scipy.sparse import issparse
5+
6+
import numpy as np
7+
8+
9+
def PCA1(data, options=None):
10+
# Principal Component Analysis
11+
12+
# Input:
13+
# data - Data matrix. Each row vector of data is a data point.
14+
# options.ReducedDim - The dimensionality of the reduced subspace. If 0, all the dimensions will be kept. Default is 0.
15+
# Output:
16+
# eigvector - Each column is an embedding function, for a new data point (row vector) x, y = x*eigvector will be the embedding result of x.
17+
# eigvalue - The sorted eigvalue of PCA eigen-problem.
18+
19+
if options is None:
20+
options = {}
21+
22+
ReducedDim = 0
23+
if 'ReducedDim' in options:
24+
ReducedDim = options['ReducedDim']
25+
26+
nSmp, nFea = data.shape
27+
if (ReducedDim > nFea) or (ReducedDim <=0):
28+
ReducedDim = nFea
29+
30+
if issparse(data):
31+
data = data.toarray()
32+
sampleMean = np.mean(data, axis=0)
33+
data = (data - np.tile(sampleMean, (nSmp, 1)))
34+
35+
eigvector, eigvalue, _ = mySVD(data.T, ReducedDim)
36+
eigvalue = np.square(eigvalue)
37+
38+
if 'PCARatio' in options:
39+
sumEig = np.sum(eigvalue)
40+
sumEig *= options['PCARatio']
41+
sumNow = 0
42+
for idx in range(len(eigvalue)):
43+
sumNow += eigvalue[idx]
44+
if sumNow >= sumEig:
45+
break
46+
eigvector = eigvector[:, :idx]
47+
48+
return eigvector, eigvalue
49+
50+
51+
if __name__ == '__main__':
52+
dim = 100
53+
options = {
54+
"ReducedDim": dim
55+
}
56+
57+
DATA_FOLDER = "P:/RESEARCH/DATA/CIFAR-100/CIFARDB/train/"
58+
features = pickle.load(open("./database/features_YCrCb_CifarDataset.pkl", 'rb'))
59+
paths = pickle.load(open("./database/paths_YCrCb_CifarDataset.pkl", 'rb'))
60+
features = features
61+
paths = paths
62+
classes = os.listdir(DATA_FOLDER)
63+
X = np.array(features)
64+
y = []
65+
class_check = 'apple'
66+
for path in paths:
67+
label = 0
68+
if class_check in path:
69+
label = 1
70+
y.append(label)
71+
y = np.array(y)
72+
y = y.reshape(-1, 1)
73+
74+
eigvector, eigvalue = PCA1(X, options)
75+
print("Shape of eigvector = ", eigvector.shape)
76+
print("Shape of eigvalue = ", eigvalue.shape)
77+

RSLDA.py

+145
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from PCA1 import PCA1
2+
from ScatterMat import ScatterMat
3+
4+
import os
5+
import pickle
6+
import numpy as np
7+
8+
from tqdm.auto import tqdm
9+
10+
def RSLDA(X=None, label=None, lambda1 = 0.0002,
11+
lambda2 = 0.001,
12+
dim = 100,
13+
mu = 0.1,
14+
rho = 1.01,
15+
max_iter = 100):
16+
print("RUNNING RSLDA!")
17+
m, n = X.shape
18+
max_mu = 10**5
19+
20+
# Initialization
21+
regu = 10**-5
22+
Sw, Sb = ScatterMat(X, label)
23+
options = {}
24+
options['ReducedDim'] = dim
25+
P1, _ = PCA1(X.T, options)
26+
Q = np.ones((m, dim))
27+
E = np.zeros((m, n))
28+
Y = np.zeros((m, n))
29+
v = np.sqrt(np.sum(Q*Q, axis=1) + np.finfo(float).eps)
30+
D = np.diag(1./v)
31+
32+
# Main loop
33+
for iter in tqdm(range(1, max_iter+1), total=max_iter):
34+
35+
# Update P
36+
if iter == 1:
37+
P = P1
38+
else:
39+
M = X - E + Y/mu
40+
U1, S1, V1 = np.linalg.svd(M @ X.T @ Q, full_matrices=False)
41+
P = U1 @ V1
42+
del M
43+
44+
# Update Q
45+
M = X - E + Y/mu
46+
Q1 = 2*(Sw - regu*Sb) + lambda1*D + mu*X @ X.T
47+
Q2 = mu*X @ M.T @ P
48+
Q = np.linalg.solve(Q1, Q2)
49+
v = np.sqrt(np.sum(Q*Q, axis=1) + np.finfo(float).eps)
50+
D = np.diag(1./v)
51+
52+
# Update E
53+
eps1 = lambda2/mu
54+
temp_E = X - P @ Q.T @ X + Y/mu
55+
E = np.maximum(0, temp_E - eps1) + np.minimum(0, temp_E + eps1)
56+
57+
# Update Y, mu
58+
Y = Y + mu*(X - P @ Q.T @ X - E)
59+
mu = min(rho*mu, max_mu)
60+
leq = X - P @ Q.T @ X - E
61+
EE = np.sum(np.abs(E), axis=1)
62+
obj = np.trace(Q.T @ (Sw - regu*Sb) @ Q) + lambda1*np.sum(v) + lambda2*np.sum(EE)
63+
64+
if iter > 2:
65+
if np.linalg.norm(leq, np.inf) < 10**-7 and abs(obj - obj_prev) < 0.00001:
66+
print(iter)
67+
break
68+
69+
obj_prev = obj
70+
71+
return P, Q, E, obj
72+
73+
74+
def sort_power_of_features(Q):
75+
row_norm = np.linalg.norm(Q, axis=1)
76+
sorted_power = np.argsort(row_norm)[::-1] # DESC
77+
return sorted_power
78+
79+
80+
if __name__ == '__main__':
81+
# DATA_FOLDER = "P:/RESEARCH/DATA/CIFAR-100/CIFARDB/train/"
82+
83+
features = pickle.load(open("./database/features_handcrafted_Cifar.pkl", 'rb'))
84+
paths = pickle.load(open("./database/paths_handcrafted_Cifar.pkl", 'rb'))
85+
86+
# X = np.array(features)
87+
y = []
88+
89+
# class_check = 'apple'
90+
# for i in range(len(paths)):
91+
# path = paths[i]
92+
# label = 0
93+
# if class_check in path:
94+
# label = 1
95+
# y.append(label)
96+
97+
X_train = []
98+
n_pos = 0
99+
n_neg = 0
100+
101+
class_check = 'apple'
102+
for i in range(len(paths)):
103+
if n_pos >= 10 and n_neg >=100:
104+
break
105+
else:
106+
path = paths[i]
107+
if class_check in path and n_pos < 10:
108+
label = 1
109+
n_pos += 1
110+
y.append(label)
111+
X_train.append(features[i])
112+
elif class_check not in path and n_neg < 50:
113+
label = 0
114+
n_neg += 1
115+
y.append(label)
116+
X_train.append(features[i])
117+
118+
X_train = np.array(X_train)
119+
X_train = X_train.T
120+
# X = X.T
121+
y = np.array(y)
122+
y = y.reshape(-1, 1)
123+
124+
print(X_train.shape)
125+
print(y.shape)
126+
P,Q,E,obj = RSLDA(X=X_train,label=y)
127+
print("Shape of P = ", P.shape)
128+
print("Shape of Q = ", Q.shape)
129+
print("Shape of E = ", E.shape)
130+
131+
# pickle.dump(Q, open("./database/Q.pkl", 'wb'))
132+
133+
# TEST
134+
# Q = pickle.load(open("./database/Q.pkl", 'rb'))
135+
# print(Q.shape)
136+
# print(np.max(Q))
137+
# print(np.min(Q))
138+
# a = np.count_nonzero(Q)
139+
# print(a)
140+
# sort_power_of_features(Q=Q)
141+
# print(Q.shape)
142+
143+
144+
145+

ScatterMat.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
import numpy as np
3+
import pickle
4+
5+
6+
def ScatterMat(X, y):
7+
dim, _ = X.shape
8+
nclass = int(np.max(y)) + 1
9+
10+
mean_X = np.mean(X, axis=1)
11+
Sw = np.zeros((dim,dim))
12+
Sb = np.zeros((dim,dim))
13+
14+
for i in range(nclass):
15+
inx_i = np.where(y==i)[0]
16+
X_i = X[:,inx_i]
17+
18+
mean_Xi = np.mean(X_i, axis=1)
19+
Sw += np.cov(X_i, rowvar=True, bias=True)
20+
Sb += len(inx_i)*(mean_Xi-mean_X)[:,None]*(mean_Xi-mean_X)[None,:]
21+
22+
return Sw, Sb
23+
24+
25+
if __name__ == '__main__':
26+
DATA_FOLDER = "P:/RESEARCH/DATA/CIFAR-100/CIFARDB/train/"
27+
features = pickle.load(open("./database/features_YCrCb_CifarDataset.pkl", 'rb'))
28+
paths = pickle.load(open("./database/paths_YCrCb_CifarDataset.pkl", 'rb'))
29+
features = features
30+
paths = paths
31+
classes = os.listdir(DATA_FOLDER)
32+
X = np.array(features)
33+
y = []
34+
class_check = 'apple'
35+
for path in paths:
36+
label = 0
37+
if class_check in path:
38+
label = 1
39+
y.append(label)
40+
y = np.array(y)
41+
X = X.T
42+
y = y.reshape(-1, 1)
43+
print(X.shape)
44+
print(y.shape)
45+
Sw, Sb = ScatterMat(X, y)
46+
print(Sw.shape, Sb.shape)
47+
print(np.max(Sw), np.max(Sb))

mySVD.py

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import os
2+
import pickle
3+
import numpy as np
4+
import scipy as sp
5+
from scipy.sparse import issparse, spdiags
6+
from scipy.linalg import eig, eigh, svd
7+
8+
9+
def mySVD(X, ReducedDim=0):
10+
MAX_MATRIX_SIZE = 1600
11+
EIGVECTOR_RATIO = 0.1
12+
13+
if not ReducedDim:
14+
ReducedDim = 0
15+
16+
mFea, nSmp = X.shape
17+
18+
if nSmp/mFea > 1.0713:
19+
ddata = X @ X.T
20+
ddata = np.maximum(ddata, ddata.T)
21+
22+
dimMatrix = ddata.shape[0]
23+
if (ReducedDim > 0) and (dimMatrix > MAX_MATRIX_SIZE) and (ReducedDim < dimMatrix*EIGVECTOR_RATIO):
24+
eigvalue, U = eigh(ddata, eigvals=(dimMatrix - ReducedDim, dimMatrix - 1))
25+
eigvalue = eigvalue[::-1]
26+
U = U[:, ::-1]
27+
else:
28+
if issparse(ddata):
29+
ddata = ddata.toarray()
30+
31+
eigvalue, U = eigh(ddata)
32+
eigvalue = eigvalue[::-1]
33+
U = U[:, ::-1]
34+
35+
eigIdx = np.where(np.abs(eigvalue) / np.max(np.abs(eigvalue)) < 1e-10)[0]
36+
eigvalue = np.delete(eigvalue, eigIdx)
37+
U = np.delete(U, eigIdx, axis=1)
38+
39+
if (ReducedDim > 0) and (ReducedDim < len(eigvalue)):
40+
eigvalue = eigvalue[:ReducedDim]
41+
U = U[:, :ReducedDim]
42+
43+
eigvalue_Half = np.sqrt(np.abs(eigvalue))
44+
S = spdiags(eigvalue_Half, 0, len(eigvalue_Half), len(eigvalue_Half))
45+
46+
eigvalue_MinusHalf = eigvalue_Half ** -1
47+
V = X.T @ (U * np.tile(eigvalue_MinusHalf, (U.shape[0], 1)))
48+
else:
49+
ddata = X.T @ X
50+
ddata = np.maximum(ddata, ddata.T)
51+
52+
dimMatrix = ddata.shape[0]
53+
if (ReducedDim > 0) and (dimMatrix > MAX_MATRIX_SIZE) and (ReducedDim < dimMatrix*EIGVECTOR_RATIO):
54+
eigvalue, V = eigh(ddata, eigvals=(dimMatrix - ReducedDim, dimMatrix - 1))
55+
eigvalue = eigvalue[::-1]
56+
V = V[:, ::-1]
57+
else:
58+
if issparse(ddata):
59+
ddata = ddata.toarray()
60+
61+
eigvalue, V = eigh(ddata)
62+
eigvalue = eigvalue[::-1]
63+
V = V[:, ::-1]
64+
65+
eigIdx = np.where(np.abs(eigvalue) / np.max(np.abs(eigvalue)) < 1e-10)[0]
66+
eigvalue = np.delete(eigvalue, eigIdx)
67+
V = np.delete(V, eigIdx, axis=1)
68+
69+
if (ReducedDim > 0) and (ReducedDim < len(eigvalue)):
70+
eigvalue = eigvalue[:ReducedDim]
71+
V = V[:, :ReducedDim]
72+
73+
eigvalue_Half = eigvalue**0.5
74+
S = spdiags(eigvalue_Half,0,len(eigvalue_Half),len(eigvalue_Half))
75+
76+
eigvalue_MinusHalf = eigvalue_Half**-1
77+
78+
U = X.dot(V * np.tile(eigvalue_MinusHalf.T,(V.shape[0],1)))
79+
80+
return (U, S, V)
81+
82+
83+
84+
if __name__ == '__main__':
85+
DATA_FOLDER = "P:/RESEARCH/DATA/CIFAR-100/CIFARDB/train/"
86+
features = pickle.load(open("./database/features_YCrCb_CifarDataset.pkl", 'rb'))
87+
paths = pickle.load(open("./database/paths_YCrCb_CifarDataset.pkl", 'rb'))
88+
features = features
89+
paths = paths
90+
classes = os.listdir(DATA_FOLDER)
91+
X = np.array(features)
92+
y = []
93+
class_check = 'apple'
94+
for path in paths:
95+
label = 0
96+
if class_check in path:
97+
label = 1
98+
y.append(label)
99+
y = np.array(y)
100+
X = X.T
101+
y = y.reshape(-1, 1)
102+
print(X.shape)
103+
print(y.shape)
104+
105+
106+
# ====================================
107+
U, S, V = mySVD(X, ReducedDim=100)
108+
print("Size of U = ", U.shape)
109+
print("Size of S = ", S.shape)
110+
print("Size of V = ", V.shape)

0 commit comments

Comments
 (0)