Skip to content

Commit a60bf01

Browse files
update & fix code
1 parent d985b13 commit a60bf01

File tree

41 files changed

+468
-275
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+468
-275
lines changed

applications/BasketballAction/predict/action_detect/reader/tsminf_reader.py

+63-55
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,22 @@ class TSMINFReader(DataReader):
3333
"""
3434
Data reader for video dataset of jpg folder.
3535
"""
36-
3736
def __init__(self, name, mode, cfg, material=None):
3837
super(TSMINFReader, self).__init__(name, mode, cfg)
3938
name = name.upper()
40-
self.seg_num = cfg[name]['seg_num']
41-
self.seglen = cfg[name]['seglen']
42-
self.short_size = cfg[name]['short_size']
43-
self.target_size = cfg[name]['target_size']
44-
self.batch_size = cfg[name]['batch_size']
39+
self.num_seg = cfg[name]['num_seg']
40+
self.seglen = cfg[name]['seglen']
41+
self.short_size = cfg[name]['short_size']
42+
self.target_size = cfg[name]['target_size']
43+
self.batch_size = cfg[name]['batch_size']
4544
self.reader_threads = cfg[name]['reader_threads']
46-
self.buf_size = cfg[name]['buf_size']
47-
self.video_path = cfg[name]['frame_list']
45+
self.buf_size = cfg[name]['buf_size']
46+
self.video_path = cfg[name]['frame_list']
4847

49-
self.img_mean = np.array(cfg[name]['image_mean']).reshape([3, 1, 1]).astype(np.float32)
50-
self.img_std = np.array(cfg[name]['image_std']).reshape([3, 1, 1]).astype(np.float32)
48+
self.img_mean = np.array(cfg[name]['image_mean']).reshape(
49+
[3, 1, 1]).astype(np.float32)
50+
self.img_std = np.array(cfg[name]['image_std']).reshape(
51+
[3, 1, 1]).astype(np.float32)
5152

5253
self.material = material
5354

@@ -56,16 +57,16 @@ def create_reader(self):
5657
batch loader for TSN
5758
"""
5859
_reader = self._inference_reader_creator_longvideo(
59-
self.video_path,
60-
self.mode,
61-
seg_num=self.seg_num,
62-
seglen=self.seglen,
63-
short_size=self.short_size,
64-
target_size=self.target_size,
65-
img_mean=self.img_mean,
66-
img_std=self.img_std,
67-
num_threads = self.reader_threads,
68-
buf_size = self.buf_size)
60+
self.video_path,
61+
self.mode,
62+
num_seg=self.num_seg,
63+
seglen=self.seglen,
64+
short_size=self.short_size,
65+
target_size=self.target_size,
66+
img_mean=self.img_mean,
67+
img_std=self.img_std,
68+
num_threads=self.reader_threads,
69+
buf_size=self.buf_size)
6970

7071
def _batch_reader():
7172
batch_out = []
@@ -81,9 +82,10 @@ def _batch_reader():
8182

8283
return _batch_reader
8384

84-
85-
def _inference_reader_creator_longvideo(self, video_path, mode, seg_num, seglen,
86-
short_size, target_size, img_mean, img_std, num_threads, buf_size):
85+
def _inference_reader_creator_longvideo(self, video_path, mode, num_seg,
86+
seglen, short_size, target_size,
87+
img_mean, img_std, num_threads,
88+
buf_size):
8789
"""
8890
inference reader for video
8991
"""
@@ -94,7 +96,7 @@ def reader():
9496
def image_buf(image_id_path_buf):
9597
"""
9698
image_buf reader
97-
"""
99+
"""
98100
try:
99101
img_path = image_id_path_buf[1]
100102
img = Image.open(img_path).convert("RGB")
@@ -103,61 +105,68 @@ def image_buf(image_id_path_buf):
103105
image_id_path_buf[2] = None
104106

105107
frame_len = len(video_path)
106-
read_thread_num = seg_num
108+
read_thread_num = num_seg
107109
for i in range(0, frame_len, read_thread_num):
108-
image_list_part = video_path[i: i + read_thread_num]
110+
image_list_part = video_path[i:i + read_thread_num]
109111
image_id_path_buf_list = []
110112
for k in range(len(image_list_part)):
111113
image_id_path_buf_list.append([k, image_list_part[k], None])
112114

113-
114-
with concurrent.futures.ThreadPoolExecutor(max_workers=read_thread_num) as executor:
115-
executor.map(lambda image_id_path_buf: image_buf(image_id_path_buf), image_id_path_buf_list)
115+
with concurrent.futures.ThreadPoolExecutor(
116+
max_workers=read_thread_num) as executor:
117+
executor.map(
118+
lambda image_id_path_buf: image_buf(image_id_path_buf),
119+
image_id_path_buf_list)
116120
imgs_seg_list = [x[2] for x in image_id_path_buf_list]
117-
121+
118122
# add the fault-tolerant for bad image
119123
for k in range(len(image_id_path_buf_list)):
120124
img_buf = image_id_path_buf_list[k][2]
121125
pad_id = 1
122-
while pad_id < seg_num and img_buf is None:
123-
img_buf = imgs_seg_list[(k + pad_id)%seg_num][2]
126+
while pad_id < num_seg and img_buf is None:
127+
img_buf = imgs_seg_list[(k + pad_id) % num_seg][2]
124128
if img_buf is None:
125-
logger.info("read img erro from {} to {}".format(i, i + read_thread_num))
129+
print("read img erro from {} to {}".format(
130+
i, i + read_thread_num))
126131
exit(0)
127132
else:
128133
imgs_seg_list[k] = img_buf
129-
for pad_id in range(len(imgs_seg_list), seg_num):
134+
for pad_id in range(len(imgs_seg_list), num_seg):
130135
imgs_seg_list.append(imgs_seg_list[-1])
131-
yield imgs_seg_list
136+
yield imgs_seg_list
132137

133138

134-
def inference_imgs_transform(imgs_list, mode, seg_num, seglen, short_size,\
139+
def inference_imgs_transform(imgs_list, mode, num_seg, seglen, short_size,\
135140
target_size, img_mean, img_std):
136141
"""
137142
inference_imgs_transform
138-
"""
139-
imgs_ret = imgs_transform(imgs_list, mode, seg_num, seglen, short_size,
140-
target_size, img_mean, img_std)
143+
"""
144+
imgs_ret = imgs_transform(imgs_list, mode, num_seg, seglen,
145+
short_size, target_size, img_mean,
146+
img_std)
141147
label_ret = 0
142148

143149
return imgs_ret, label_ret
144150

145-
mapper = functools.partial(
146-
inference_imgs_transform,
147-
mode=mode,
148-
seg_num=seg_num,
149-
seglen=seglen,
150-
short_size=short_size,
151-
target_size=target_size,
152-
img_mean=img_mean,
153-
img_std=img_std)
151+
mapper = functools.partial(inference_imgs_transform,
152+
mode=mode,
153+
num_seg=num_seg,
154+
seglen=seglen,
155+
short_size=short_size,
156+
target_size=target_size,
157+
img_mean=img_mean,
158+
img_std=img_std)
154159

155-
return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size, order=True)
160+
return paddle.reader.xmap_readers(mapper,
161+
reader,
162+
num_threads,
163+
buf_size,
164+
order=True)
156165

157166

158167
def imgs_transform(imgs,
159168
mode,
160-
seg_num,
169+
num_seg,
161170
seglen,
162171
short_size,
163172
target_size,
@@ -186,7 +195,7 @@ def imgs_transform(imgs,
186195
imgs = np_imgs
187196
imgs -= img_mean
188197
imgs /= img_std
189-
imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size))
198+
imgs = np.reshape(imgs, (num_seg, seglen * 3, target_size, target_size))
190199

191200
return imgs
192201

@@ -260,10 +269,10 @@ def _sample_crop_size(im_size):
260269
'crop_h': crop_pair[1],
261270
'offset_w': w_offset,
262271
'offset_h': h_offset
263-
}
264-
272+
}
273+
265274
return crop_info
266-
275+
267276
crop_info = _sample_crop_size(im_size)
268277
crop_w = crop_info['crop_w']
269278
crop_h = crop_info['crop_h']
@@ -355,4 +364,3 @@ def group_scale(imgs, target_size):
355364
resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
356365

357366
return resized_imgs
358-

applications/BasketballAction/predict/configs_basketball/configs_basketball.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
COMMON:
22
fps: 5
33
use_gpu: True
4-
label_dic: 'configs_basketball/index_label_basketball_6.json'
4+
label_dic: 'configs_basketball/index_label_basketball_6.json'
55
# debug
66
PCM_ONLY: False
77
DEBUG: False
@@ -14,7 +14,7 @@ PPTSM:
1414
params_file: "checkpoints_basketball/ppTSM/ppTSM.pdiparams"
1515
gpu_mem: 8000
1616
device_id: 0
17-
seg_num: 8
17+
num_seg: 8
1818
seglen: 1
1919
short_size: 256
2020
target_size: 224
@@ -57,6 +57,6 @@ ACTION:
5757
nms_thread: 0.01
5858
nms_offset: 10
5959

60-
classify_score_thread: 0.1
61-
iou_score_thread: 0.3
60+
classify_score_thread: 0.1
61+
iou_score_thread: 0.3
6262

applications/MultimodalVideoTag/scenario_lib/datareader/feature_reader.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(self, name, mode, cfg):
5858
self.batch_size = cfg[mode.upper()]['batch_size']
5959
self.filelist = cfg[mode.upper()]['filelist']
6060
self.eigen_file = cfg.MODEL.get('eigen_file', None)
61-
self.seg_num = cfg.MODEL.get('seg_num', None)
61+
self.num_seg = cfg.MODEL.get('num_seg', None)
6262
self.loss_type = cfg.TRAIN['loss_type']
6363
vocab_file = os.path.join(cfg.TRAIN.ernie_pretrain_dict_path,
6464
'vocab.txt')
@@ -120,10 +120,13 @@ def reader():
120120
yield batch_out
121121
batch_out = []
122122
except Exception as e:
123-
print("warning: load data {} failed, {}".format(filepath, str(e)))
123+
print("warning: load data {} failed, {}".format(
124+
filepath, str(e)))
124125
traceback.print_exc()
125126
continue
126-
# if self.mode == 'infer' and len(batch_out) > 0:
127+
128+
129+
# if self.mode == 'infer' and len(batch_out) > 0:
127130
if len(batch_out) > 0:
128131
yield batch_out
129132

@@ -224,13 +227,13 @@ def make_one_hot(label, dim=15):
224227
return one_hot_soft_label
225228

226229

227-
def generate_random_idx(feature_len, seg_num):
230+
def generate_random_idx(feature_len, num_seg):
228231
"""
229232
generate_random_idx
230233
"""
231234
idxs = []
232-
stride = float(feature_len) / seg_num
233-
for i in range(seg_num):
235+
stride = float(feature_len) / num_seg
236+
for i in range(num_seg):
234237
pos = (i + np.random.random()) * stride
235238
idxs.append(min(feature_len - 1, int(pos)))
236239
return idxs

applications/VideoQualityAssessment/paddlevideo/modeling/heads/tsm_rec_head.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def init_weights(self):
6767
self.fc.bias.learning_rate = 2.0
6868
self.fc.bias.regularizer = paddle.regularizer.L2Decay(0.)
6969

70-
def forward(self, x, seg_num):
70+
def forward(self, x, num_seg):
7171
"""Define how the head is going to run.
7272
7373
Args:
@@ -81,9 +81,9 @@ def forward(self, x, seg_num):
8181
# [N * num_segs, in_channels, 1, 1]
8282
if self.dropout is not None:
8383
x = self.dropout(x)
84-
# [N * seg_num, in_channels, 1, 1]
85-
x = paddle.reshape(x, [-1, seg_num, x.shape[1]])
86-
# [N, seg_num, in_channels]
84+
# [N * num_seg, in_channels, 1, 1]
85+
x = paddle.reshape(x, [-1, num_seg, x.shape[1]])
86+
# [N, num_seg, in_channels]
8787
x = paddle.mean(x, axis=1)
8888
# [N, 1, in_channels]
8989
x = paddle.reshape(x, shape=[-1, self.in_channels])
@@ -112,7 +112,7 @@ def loss(self, scores, labels, valid_mode=False, **kwargs):
112112
labels = labels[0]
113113
losses = dict()
114114
loss = self.loss_func(scores, labels, **kwargs)
115-
115+
116116
score_list = paddle.tolist(scores)
117117
label_list = paddle.tolist(labels)
118118
score_list_len = len(score_list)
@@ -137,7 +137,7 @@ def loss(self, scores, labels, valid_mode=False, **kwargs):
137137
loss_a = self.loss_func(scores, labels_a, **kwargs)
138138
loss_b = self.loss_func(scores, labels_a, **kwargs)
139139
loss = lam * loss_a + (1 - lam) * loss_b
140-
140+
141141
losses['loss'] = loss
142142
losses['output'] = output
143143
losses['label'] = label
@@ -151,4 +151,3 @@ def label_smooth_loss(self, scores, labels, **kwargs):
151151
labels = paddle.squeeze(labels, axis=1)
152152
loss = self.loss_func(scores, labels, **kwargs)
153153
return loss
154-

applications/VideoQualityAssessment/paddlevideo/modeling/heads/tsn_head.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ..weight_init import weight_init_
2121
import paddle.nn.functional as F
2222

23+
2324
@HEADS.register()
2425
class TSNHead(BaseHead):
2526
"""TSN Head.
@@ -66,7 +67,7 @@ def init_weights(self):
6667
mean=0.,
6768
std=self.std)
6869

69-
def forward(self, x, seg_num):
70+
def forward(self, x, num_seg):
7071
"""Define how the head is going to run.
7172
7273
Args:
@@ -83,9 +84,9 @@ def forward(self, x, seg_num):
8384
# [N * num_segs, in_channels, 1, 1]
8485
if self.dropout is not None:
8586
x = self.dropout(x)
86-
# [N * seg_num, in_channels, 1, 1]
87-
x = paddle.reshape(x, [-1, seg_num, x.shape[1]])
88-
# [N, seg_num, in_channels]
87+
# [N * num_seg, in_channels, 1, 1]
88+
x = paddle.reshape(x, [-1, num_seg, x.shape[1]])
89+
# [N, num_seg, in_channels]
8990
x = paddle.mean(x, axis=1)
9091
# [N, 1, in_channels]
9192
x = paddle.reshape(x, shape=[-1, self.in_channels])

benchmark/TimeSformer/timesformer_ucf101_videos_benchmark_bs1.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MODEL: #MODEL field
1212
mlp_ratio: 4
1313
qkv_bias: True
1414
epsilon: 1e-6
15-
seg_num: 8
15+
num_seg: 8
1616
attention_type: 'divided_space_time'
1717
head:
1818
name: "TimeSformerHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'

benchmark/TimeSformer/timesformer_ucf101_videos_benchmark_bs14.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MODEL: #MODEL field
1212
mlp_ratio: 4
1313
qkv_bias: True
1414
epsilon: 1e-6
15-
seg_num: 8
15+
num_seg: 8
1616
attention_type: 'divided_space_time'
1717
head:
1818
name: "TimeSformerHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'

benchmark/TimeSformer/timesformer_ucf101_videos_benchmark_bs14_mp.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MODEL: #MODEL field
1212
mlp_ratio: 4
1313
qkv_bias: True
1414
epsilon: 1e-6
15-
seg_num: 8
15+
num_seg: 8
1616
attention_type: 'divided_space_time'
1717
head:
1818
name: "TimeSformerHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'

benchmark/TimeSformer/timesformer_ucf101_videos_benchmark_bs1_mp.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ MODEL: #MODEL field
1212
mlp_ratio: 4
1313
qkv_bias: True
1414
epsilon: 1e-6
15-
seg_num: 8
15+
num_seg: 8
1616
attention_type: 'divided_space_time'
1717
head:
1818
name: "TimeSformerHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'

0 commit comments

Comments
 (0)