Skip to content

Commit 7f72997

Browse files
Merge pull request #146 from Vishu26/dev
Implementation of Cost Effective Active Learning
2 parents 37ac68b + 186f2ce commit 7f72997

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed
+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""
2+
This is a modified implementation of the algorithm Cost Effective Active Learning
3+
(Pl. refer - https://arxiv.org/abs/1701.03551). This version not only picks up the
4+
top K uncertain samples but also picks up the top N highly confident samples that
5+
may represent information and diversity. It is different than the original implementation
6+
as it does not involve tuning the confidence threshold parameter for every dataset.
7+
"""
8+
9+
from keras.datasets import mnist
10+
import numpy as np
11+
from modAL.models import ActiveLearner
12+
from sklearn.ensemble import RandomForestClassifier
13+
from scipy.special import entr
14+
15+
16+
(X_train, y_train), (X_test, y_test) = mnist.load_data()
17+
18+
X_train = X_train / 255
19+
X_test = X_test / 255
20+
y_train = y_train.astype(np.uint8)
21+
y_test = y_test.astype(np.uint8)
22+
23+
X_train = X_train.reshape(-1, 784)
24+
X_test = X_test.reshape(-1, 784)
25+
26+
model = RandomForestClassifier(n_estimators=100)
27+
28+
INITIAL_SET_SIZE = 32
29+
30+
U_x = np.copy(X_train)
31+
U_y = np.copy(y_train)
32+
33+
ind = np.random.choice(range(len(U_x)), size=INITIAL_SET_SIZE)
34+
35+
X_initial = U_x[ind]
36+
y_initial = U_y[ind]
37+
38+
U_x = np.delete(U_x, ind, axis=0)
39+
U_y = np.delete(U_y, ind, axis=0)
40+
41+
42+
def assign_pseudo_labels(active_learner, X, confidence_idx):
43+
conf_samples = X[confidence_idx]
44+
labels = active_learner.predict(conf_samples)
45+
return labels
46+
47+
48+
def max_entropy(active_learner, X, K=16, N=16):
49+
50+
class_prob = active_learner.predict_proba(X)
51+
entropy = entr(class_prob).sum(axis=1)
52+
uncertain_idx = np.argpartition(entropy, -K)[-K:]
53+
54+
"""
55+
Original Implementation -- Pick most confident samples with
56+
entropy less than a threshold. Threshold is decayed in every
57+
iteration.
58+
59+
Different than original -- Pick top n most confident samples.
60+
"""
61+
62+
confidence_idx = np.argpartition(entropy, N)[:N]
63+
64+
return np.concatenate((uncertain_idx, confidence_idx), axis=0)
65+
66+
67+
active_learner = ActiveLearner(
68+
estimator=model,
69+
X_training=X_initial,
70+
y_training=y_initial,
71+
query_strategy=max_entropy
72+
)
73+
74+
N_QUERIES = 20
75+
76+
K_MAX_ENTROPY = 16
77+
N_MIN_ENTROPY = 16
78+
79+
scores = [active_learner.score(X_test, y_test)]
80+
81+
for index in range(N_QUERIES):
82+
83+
query_idx, query_instance = active_learner.query(U_x, K_MAX_ENTROPY, N_MIN_ENTROPY)
84+
85+
uncertain_idx = query_idx[:K_MAX_ENTROPY]
86+
confidence_idx = query_idx[K_MAX_ENTROPY:]
87+
88+
conf_labels = assign_pseudo_labels(active_learner, U_x, confidence_idx)
89+
90+
L_x = U_x[query_idx]
91+
L_y = np.concatenate((U_y[uncertain_idx], conf_labels), axis=0)
92+
93+
active_learner.teach(L_x, L_y)
94+
95+
U_x = np.delete(U_x, query_idx, axis=0)
96+
U_y = np.delete(U_y, query_idx, axis=0)
97+
98+
acc = active_learner.score(X_test, y_test)
99+
100+
print(F'Query {index+1}: Test Accuracy: {acc}')
101+
102+
scores.append(acc)

0 commit comments

Comments
 (0)