-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathSMAP_block.py
151 lines (143 loc) · 7.18 KB
/
SMAP_block.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
import similarity_measures as sm
import SMAP.MatrixProfile as mp
import memory_block as mb
import utils.utils as util
import time
def discm_profile(TS_set, MP_set, m):
#Exponential Moving Average (EMA), but the number of instances in each class is different.
#Simple Moving Average (SMA) is implemented here
mp_dict_same = []
mp_dict_differ = []
TS_set_new=[]
# mp_dict_same: [mp1, mp2, ...], Array[Array[]]
# mp_all: {ts_target.name1:mp1, ts_target.name2:mp2, ...}, dict(ts_targe.name:Array[])
# dp_all: {ts_target.name1:{index1:dp1, index2:dp2, ...}, ts_target.name2:{...}, ...}, dict(ts_target.name: dict(index:Array[]) )
print("TS_set length is: " + str(len(TS_set)))
for idx_s, ts_source in enumerate(TS_set):
for idx_t, ts_target in enumerate(TS_set):
if idx_s == idx_t:
continue
print("idx_s:idx_t is " + str(idx_s) + ":" + str(idx_t))
print("MP_set shape is: " + str(MP_set.shape))
if ts_source.class_timeseries == ts_target.class_timeseries:
mp_sameClass = MP_set[idx_s][idx_t]
mp_dict_same.append(mp_sameClass)
else:
mp_differClass = MP_set[idx_s][idx_t]
mp_dict_differ.append(mp_differClass)
#print("ts is " + str(ts_source.timeseries))
#print("mp_differClass is " + str(MP_set[idx_s][idx_t]) + "class_s is " + str(ts_source.class_timeseries) + "class_t is " + str(ts_target.class_timeseries))
# compute the average distance for each side (under the same class, or the different class)
dist_intraC = np.mean(mp_dict_same, axis=0)
dist_interC = np.mean(mp_dict_differ, axis=0)
#print("dist_interc is: " +str(dist_interC))
# compute the difference of distance for 2 sides
ts_source.discmP.update({m:np.subtract(dist_interC, dist_intraC)})
ts_source.threshP.update({m:dist_intraC})
TS_set_new.append(ts_source)
return TS_set_new
def extract_shapelet(k, TS_set, MP_set, m):
DiscmPList_dic = {}
ThreshList_dict = {}
class_list = []
shapelet_list = []
TS_set = discm_profile(TS_set, MP_set, m)
for ts in TS_set:
c = ts.class_timeseries
class_list.append(c)
if c in DiscmPList_dic.keys():
DiscmPList_dic[c].update({ts.name: ts.discmP[m]})
ThreshList_dict[c].update({ts.name: ts.threshP[m]})
else:
DiscmPList_dic[c] = {ts.name: ts.discmP[m]}
ThreshList_dict[c] = {ts.name: ts.threshP[m]}
# for each class, select top-k shapelets, then find the matching indices for top-k shapelets
# top-k aims at the shapelets of different class, or top-k shapelets of each class?
## Here, we take k shapelets for each class
### remove repetitive element in class_list
class_list = list(set(class_list))
for c in class_list:
ts_namelist = DiscmPList_dic[c].keys()
# take the k first values as the initial values, then update them
keys = range(0, k)
# take top k shapelets for each class
topk_discmPower = dict.fromkeys(keys, float('-inf'))
for ts in ts_namelist:
## Discm. Profile of source timeseries 'ts'
dp = DiscmPList_dic[c][ts]
#print("class c is " + str(c) + "DiscmProfile is " + str(dp))
# 'idx' is the position of max difference of distance for 'ts'
for idx, DiscmPower in enumerate(dp):
# if we find an element in 'topk_distdiff' which is smaller than 'dd',
# then remove it and add 'dd' into 'topk', then break
min_topk = min(topk_discmPower.values())
for idx_topk, dd_topk in topk_discmPower.items():
if dd_topk == min_topk and dd_topk < DiscmPower:
topk_discmPower.pop(idx_topk)
key_composed = str(ts) + "_" + str(idx)
topk_discmPower.update({key_composed: DiscmPower})
break
# create shapelets and put matching timeseries
# topk_discmPower: {ts_name_source+index1 : DiscmPower1, ts_name_source+index2 : DiscmPower2, ... }
#print(topk_discmPower.values())
for key, val in topk_discmPower.items():
if type(key) != int:
key_val = key.split("_")
ts_name_source = int(key_val[0])
# the position of the shapelet in the source timeseries
ts_index_source = int(key_val[1])
shap = util.Shapelet()
shap.Class = c
shap.differ_distance = val
shap.normal_distance = val / (m ** 0.5)
s = [ts for ts in TS_set if ts.name==ts_name_source][0]
#to decide the value of Shapelet, back to the off-set in original TS
if int(m / 4) == 0:
step = 1
else:
step = int(m / 4)
idx_in_rawTS = ts_index_source * step
shap.subseq = s.timeseries[idx_in_rawTS:idx_in_rawTS + m]
# hashing the raw data of subsequence as shapelet name
shap.name = hash(shap.subseq.tostring())
# 'dist_threshold_list[c]': {ts_name_source1:dist_threshold1, ts_name_source2:dist_threshold2, ...}, dict(String:Array[])
dist_thd = ThreshList_dict[c][ts_name_source][ts_index_source]
shap.dist_threshold = dist_thd
shapelet_list.append(shap)
else:
print("key is " + str(key))
# for each class, we've token k shapelets, so the final result contains k * nbr(class) shapelets
return shapelet_list
def update_shaplet(k, inputTSBatch, TS_set, MP_set, m):
#Consider Forgetting Mechanism in Streaming case
TS_set, MP_set = mb.memory_cache(TS_set, MP_set, inputTSBatch, m)
newShapList = extract_shapelet(k, TS_set, MP_set, m)
return newShapList
def extract_shapelet_all_length(k, TS_set, MP_set, m_step):
#'TS_set': [dict{}, dict{}, ...]
min_m = util.min_length_dataset(TS_set)
shap_list = []
for m in m_step:
#print("Extracting shapelet length: " + str(m))
start = time.time()
#number of shapelet in shap_list: k * nbr_class * (min_l-1)
nbr_candidate = int((min_m - m)/(0.25*m))
if 0 < nbr_candidate < k :
shap_list.extend(extract_shapelet(k, TS_set, MP_set[m], m))
elif nbr_candidate > 0:
shap_list.extend(extract_shapelet(k, TS_set, MP_set[m], m))
#print("time consumed: ", str(time.time() - start))
# for each TS class, extract k shapelets
grouped_shapelets = {}
list_all_shapelets_pruned = []
for shap in shap_list:
if shap.Class in grouped_shapelets.keys():
grouped_shapelets[shap.Class].append(shap)
else:
grouped_shapelets[shap.Class] = [shap]
for keyShapelet, groupShapelet in grouped_shapelets.items():
list_shapelet_group = list(groupShapelet)
shap_list_sorted = sorted(list_shapelet_group, key=lambda shap: shap.normal_distance, reverse=True)
list_all_shapelets_pruned += shap_list_sorted[:int(k)]
return list_all_shapelets_pruned