1
+ """
2
+ This is a modified implementation of the algorithm Cost Effective Active Learning
3
+ (Pl. refer - https://arxiv.org/abs/1701.03551). This version not only picks up the
4
+ top K uncertain samples but also picks up the top N highly confident samples that
5
+ may represent information and diversity. It is different than the original implementation
6
+ as it does not involve tuning the confidence threshold parameter for every dataset.
7
+ """
8
+
9
+ from keras .datasets import mnist
10
+ import numpy as np
11
+ from modAL .models import ActiveLearner
12
+ from sklearn .ensemble import RandomForestClassifier
13
+ from scipy .special import entr
14
+
15
+
16
+ (X_train , y_train ), (X_test , y_test ) = mnist .load_data ()
17
+
18
+ X_train = X_train / 255
19
+ X_test = X_test / 255
20
+ y_train = y_train .astype (np .uint8 )
21
+ y_test = y_test .astype (np .uint8 )
22
+
23
+ X_train = X_train .reshape (- 1 , 784 )
24
+ X_test = X_test .reshape (- 1 , 784 )
25
+
26
+ model = RandomForestClassifier (n_estimators = 100 )
27
+
28
+ INITIAL_SET_SIZE = 32
29
+
30
+ U_x = np .copy (X_train )
31
+ U_y = np .copy (y_train )
32
+
33
+ ind = np .random .choice (range (len (U_x )), size = INITIAL_SET_SIZE )
34
+
35
+ X_initial = U_x [ind ]
36
+ y_initial = U_y [ind ]
37
+
38
+ U_x = np .delete (U_x , ind , axis = 0 )
39
+ U_y = np .delete (U_y , ind , axis = 0 )
40
+
41
+
42
+ def assign_pseudo_labels (active_learner , X , confidence_idx ):
43
+ conf_samples = X [confidence_idx ]
44
+ labels = active_learner .predict (conf_samples )
45
+ return labels
46
+
47
+
48
+ def max_entropy (active_learner , X , K = 16 , N = 16 ):
49
+
50
+ class_prob = active_learner .predict_proba (X )
51
+ entropy = entr (class_prob ).sum (axis = 1 )
52
+ uncertain_idx = np .argpartition (entropy , - K )[- K :]
53
+
54
+ """
55
+ Original Implementation -- Pick most confident samples with
56
+ entropy less than a threshold. Threshold is decayed in every
57
+ iteration.
58
+
59
+ Different than original -- Pick top n most confident samples.
60
+ """
61
+
62
+ confidence_idx = np .argpartition (entropy , N )[:N ]
63
+
64
+ return np .concatenate ((uncertain_idx , confidence_idx ), axis = 0 )
65
+
66
+
67
+ active_learner = ActiveLearner (
68
+ estimator = model ,
69
+ X_training = X_initial ,
70
+ y_training = y_initial ,
71
+ query_strategy = max_entropy
72
+ )
73
+
74
+ N_QUERIES = 20
75
+
76
+ K_MAX_ENTROPY = 16
77
+ N_MIN_ENTROPY = 16
78
+
79
+ scores = [active_learner .score (X_test , y_test )]
80
+
81
+ for index in range (N_QUERIES ):
82
+
83
+ query_idx , query_instance = active_learner .query (U_x , K_MAX_ENTROPY , N_MIN_ENTROPY )
84
+
85
+ uncertain_idx = query_idx [:K_MAX_ENTROPY ]
86
+ confidence_idx = query_idx [K_MAX_ENTROPY :]
87
+
88
+ conf_labels = assign_pseudo_labels (active_learner , U_x , confidence_idx )
89
+
90
+ L_x = U_x [query_idx ]
91
+ L_y = np .concatenate ((U_y [uncertain_idx ], conf_labels ), axis = 0 )
92
+
93
+ active_learner .teach (L_x , L_y )
94
+
95
+ U_x = np .delete (U_x , query_idx , axis = 0 )
96
+ U_y = np .delete (U_y , query_idx , axis = 0 )
97
+
98
+ acc = active_learner .score (X_test , y_test )
99
+
100
+ print (F'Query { index + 1 } : Test Accuracy: { acc } ' )
101
+
102
+ scores .append (acc )
0 commit comments