-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathselection.py
83 lines (66 loc) · 3.04 KB
/
selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import tensorflow as tf
def fisher(data, num_instances: list, top_k_features=2):
"""
Performs Fisher feature selection method according to the following formula:
D(f) = (m1(f) - m2(f) / (std1(f) - std2(f))
:param data:
:param num_instances:
:param top_k_features:
:return: the list of most significant features.
"""
assert len(num_instances) == 2, "Fisher selection method can be performed for two-class problems."
data = tf.convert_to_tensor(data)
num_features = data.get_shape().as_list()[-1]
if top_k_features > num_features:
top_k_features = num_features
class1, class2 = tf.split(data, num_instances)
with tf.name_scope('fisher_selection'):
mean1, std1 = tf.nn.moments(class1, axes=0)
mean2, std2 = tf.nn.moments(class2, axes=0)
fisher_coeffs = tf.abs(mean1 - mean2) / (std1 + std2)
selected_features = tf.nn.top_k(fisher_coeffs, k=top_k_features)
return selected_features
def feature_correlation_with_class(data, num_instances: list, top_k_features=10):
"""
Makes feature correlation with class selection according to the following formula:
D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
:return: the list of most significant features.
"""
data = tf.convert_to_tensor(data)
num_features = data.get_shape().as_list()[-1]
if top_k_features > num_features:
top_k_features = num_features
class1, class2 = tf.split(data, num_instances)
with tf.name_scope('corr_selection'):
mean1, std1 = tf.nn.moments(class1, axes=0)
mean2, std2 = tf.nn.moments(class2, axes=0)
mean, std = tf.nn.moments(data, axes=0)
corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2 * tf.square(std)
selected_features = tf.nn.top_k(corr_coeffs, k=top_k_features)
return selected_features
def t_test(data, num_instances: list, top_k_features=10):
"""
Makes feature correlation with class selection according to the following formula:
D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
:return: the list of most significant features.
"""
data = tf.convert_to_tensor(data)
num_features = data.get_shape().as_list()[-1]
if top_k_features > num_features:
top_k_features = num_features
class1, class2 = tf.split(data, num_instances)
with tf.name_scope('t_test_selection'):
mean1, std1 = tf.nn.moments(class1, axes=0)
mean2, std2 = tf.nn.moments(class2, axes=0)
t_test_coeffs = tf.abs(mean1 - mean2) / tf.sqrt(
tf.square(std1) / num_instances[0] + tf.square(std2) / num_instances[1])
selected_features = tf.nn.top_k(t_test_coeffs, k=top_k_features)
return selected_features
def random(data, num_instances: list, top_k_features=10):
data = tf.convert_to_tensor(data)
num_features = data.get_shape().as_list()[-1]
if top_k_features > num_features:
top_k_features = num_features
class1, class2 = tf.split(data, num_instances)
with tf.name_scope('random_selection'):
pass