forked from boostcampaitech5/level3_cv_finalproject-cv-08
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
99 lines (83 loc) · 3.32 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import cv2
import time
import torch
import numpy as np
import streamlit as st
from ultralytics import YOLO
import concurrent.futures
import warnings
warnings.filterwarnings('ignore')
@st.cache_resource
def piano_detection_load_model(device):
model_path = "./data/model/piano_detection.pt"
model = YOLO(model_path)
model.to(device)
dummy_for_warmup = np.random.rand(720, 1280, 3)
for _ in range(10):
model.predict(source=dummy_for_warmup, device=device, verbose=False)
return model
def process_frame(frame, xmin, ymin, xmax, ymax):
frame = frame[ymin:ymax, xmin:xmax]
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame = cv2.resize(frame, (900, 100), interpolation=cv2.INTER_LINEAR) / 255.
return frame
def preprocess(video_info, key):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = piano_detection_load_model(device)
total_st = time.time()
if key == 'url': video_path = "./data/inference/01.mp4"
else: video_path = "./data/inference/02.mp4"
executor = concurrent.futures.ThreadPoolExecutor(max_workers=6)
cap = cv2.VideoCapture(video_path)
start, end = video_info['video_select_range'][0] * video_info['video_fps'], video_info['video_select_range'][1] * video_info['video_fps']
cap.set(cv2.CAP_PROP_POS_FRAMES, start)
frames = []
frame_count = start
key_detected = False
st = time.time()
results = []
while True:
if frame_count < end:
frame_count += 1
ret, frame = cap.read()
if not ret:
break
# Piano Detection
if not key_detected:
pred = model.predict(source=frame, device="0", verbose=False)
if pred[0].boxes:
if pred[0].boxes.conf.item() > 0.8:
xmin, ymin, xmax, ymax = tuple(np.array(pred[0].boxes.xyxy.detach().cpu()[0], dtype=int))
key_detected = True
future = executor.submit(process_frame, frame, xmin, ymin, xmax, ymax)
results.append(future)
continue
else:
future = executor.submit(process_frame, frame, xmin, ymin, xmax, ymax)
results.append(future)
elif (not ret) or (frame_count >= end):
break
frames = executor.map(concurrent.futures.Future.result, results)
ed = time.time()
print(f"time to video read : {ed-st:.4f} s")
executor.shutdown()
st = time.time()
frames = np.stack(frames, axis=0)
ed = time.time()
print(f"time to stack : {ed-st:.4f} s")
# 5 frame 씩
st = time.time()
frames_with5 = []
for i in range(len(frames)):
if i >= 2 and i < len(frames)-2:
file_list = [frames[i-2], frames[i-1], frames[i], frames[i+1], frames[i+2]]
elif i < 2:
file_list = [frames[i], frames[i], frames[i], frames[i+1], frames[i+2]]
else:
file_list = [frames[i-2], frames[i-1], frames[i], frames[i], frames[i]]
frames_with5.append(file_list)
ed = time.time()
print(f"time to making chunks : {ed-st:.4f} s")
total_ed = time.time()
print(f"time to total : {total_ed-total_st:.4f} s")
return frames_with5