Skip to content

Commit 176e1b9

Browse files
committed
3-frame detection with frame crop implemented
1 parent ae2179e commit 176e1b9

2 files changed

Lines changed: 230 additions & 23 deletions

File tree

detection/cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def build_argument_parser() -> argparse.ArgumentParser:
6969
default="",
7070
help="Путь для сохранения визуализированного кадра (если пусто — не сохраняем)"
7171
)
72+
argument_parser.add_argument(
73+
"--debug",
74+
action="store_true",
75+
help="Сохранять три промежуточных кадра детекции (до агрегации)"
76+
)
77+
7278
argument_parser.add_argument(
7379
"--show",
7480
action="store_true",

detection/pipeline.py

Lines changed: 224 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ def fetch_camera_and_calibration(
8383
video_source_url = camera_info["source"]
8484
calibration_raw = camera_info["calib"]
8585

86+
crop_x = calibration_raw.get("crop_x")
87+
crop_y = calibration_raw.get("crop_y")
88+
crop_width = calibration_raw.get("crop_width")
89+
crop_height = calibration_raw.get("crop_height")
90+
8691
(
8792
calibration_image_width,
8893
calibration_image_height,
@@ -100,6 +105,10 @@ def fetch_camera_and_calibration(
100105
camera_matrix,
101106
distortion_coefficients,
102107
rectified_camera_matrix_opt,
108+
crop_x,
109+
crop_y,
110+
crop_width,
111+
crop_height,
103112
)
104113

105114

@@ -493,6 +502,89 @@ def compute_zone_confidences(zone_statistics: List[Dict[str, Any]]) -> None:
493502
weighted_scores_sum += car_info["score"] * car_info["overlap_ratio"]
494503
zone_info["confidence"] = float(weighted_scores_sum / len(cars_in_zone))
495504

505+
def aggregate_detections_across_frames(
506+
list_of_boxes: List[np.ndarray],
507+
list_of_scores: List[np.ndarray],
508+
list_of_class_ids: List[np.ndarray],
509+
iou_threshold: float = 0.5,
510+
min_appearances: int = 2,
511+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
512+
"""
513+
Оставляет только те боксы, которые встретились не меньше min_appearances раз
514+
на разных кадрах. Координаты и score усредняются.
515+
"""
516+
clusters: List[Dict[str, Any]] = []
517+
518+
def iou(box_a, box_b) -> float:
519+
xa1, ya1, xa2, ya2 = box_a
520+
xb1, yb1, xb2, yb2 = box_b
521+
522+
inter_x1 = max(xa1, xb1)
523+
inter_y1 = max(ya1, yb1)
524+
inter_x2 = min(xa2, xb2)
525+
inter_y2 = min(ya2, yb2)
526+
527+
if inter_x2 <= inter_x1 or inter_y2 <= inter_y1:
528+
return 0.0
529+
530+
inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
531+
area_a = (xa2 - xa1) * (ya2 - ya1)
532+
area_b = (xb2 - xb1) * (yb2 - yb1)
533+
union = area_a + area_b - inter_area
534+
if union <= 0.0:
535+
return 0.0
536+
return inter_area / union
537+
538+
for boxes, scores, class_ids in zip(list_of_boxes, list_of_scores, list_of_class_ids):
539+
for box, score, cls_id in zip(boxes, scores, class_ids):
540+
best_cluster = None
541+
best_iou = 0.0
542+
for cluster in clusters:
543+
if cluster["class_id"] != int(cls_id):
544+
continue
545+
cluster_box = cluster["mean_box"]
546+
current_iou = iou(cluster_box, box)
547+
if current_iou > best_iou:
548+
best_iou = current_iou
549+
best_cluster = cluster
550+
551+
if best_cluster is None or best_iou < iou_threshold:
552+
clusters.append(
553+
{
554+
"class_id": int(cls_id),
555+
"boxes": [box.astype(float)],
556+
"scores": [float(score)],
557+
"mean_box": box.astype(float),
558+
}
559+
)
560+
else:
561+
best_cluster["boxes"].append(box.astype(float))
562+
best_cluster["scores"].append(float(score))
563+
best_cluster["mean_box"] = np.mean(best_cluster["boxes"], axis=0)
564+
565+
aggregated_boxes = []
566+
aggregated_scores = []
567+
aggregated_class_ids = []
568+
569+
for cluster in clusters:
570+
if len(cluster["boxes"]) >= min_appearances:
571+
aggregated_boxes.append(cluster["mean_box"])
572+
aggregated_scores.append(np.mean(cluster["scores"]))
573+
aggregated_class_ids.append(cluster["class_id"])
574+
575+
if not aggregated_boxes:
576+
return (
577+
np.zeros((0, 4), dtype=np.float32),
578+
np.zeros((0,), dtype=np.float32),
579+
np.zeros((0,), dtype=np.int32),
580+
)
581+
582+
return (
583+
np.stack(aggregated_boxes).astype(np.float32),
584+
np.array(aggregated_scores, dtype=np.float32),
585+
np.array(aggregated_class_ids, dtype=np.int32),
586+
)
587+
496588

497589
def render_visualization_frame(
498590
base_frame_bgr: np.ndarray,
@@ -687,12 +779,56 @@ def run_single_frame_pipeline(args):
687779
camera_matrix,
688780
distortion_coefficients,
689781
rectified_camera_matrix_opt,
782+
crop_x,
783+
crop_y,
784+
crop_width,
785+
crop_height,
690786
) = fetch_camera_and_calibration(http_session, base_api_url)
691787

692-
# 3. Первый кадр
693-
first_frame_bgr = grab_first_frame(video_source_url)
788+
# 3. Три кадра с интервалом примерно 2 секунды из одного потока
789+
video_capture = cv2.VideoCapture(video_source_url, cv2.CAP_FFMPEG)
790+
if not video_capture.isOpened():
791+
raise RuntimeError(f"cannot open source: {video_source_url}")
792+
793+
frames_bgr = []
794+
targets = [0.0, 5.0, 10.0] # целевые моменты (секунды) относительно старта
795+
start_time = time.time()
796+
current_target_idx = 0
797+
798+
while current_target_idx < len(targets):
799+
ok, frame = video_capture.read()
800+
if not ok or frame is None:
801+
raise RuntimeError("cannot read frame from source")
802+
803+
now = time.time()
804+
elapsed = now - start_time
805+
806+
# как только прошли нужные секунды — фиксируем кадр
807+
if elapsed >= targets[current_target_idx]:
808+
frames_bgr.append(frame.copy())
809+
current_target_idx += 1
810+
811+
video_capture.release()
812+
813+
first_frame_bgr = frames_bgr[0]
694814
frame_height, frame_width = first_frame_bgr.shape[:2]
695815

816+
# 3a. Обрезка кадров по параметрам из calib (если они заданы)
817+
use_crop = (
818+
crop_x is not None
819+
and crop_y is not None
820+
and crop_width is not None
821+
and crop_height is not None
822+
)
823+
824+
if use_crop:
825+
detection_frames_bgr = [
826+
frame[crop_y:crop_y + crop_height, crop_x:crop_x + crop_width].copy()
827+
for frame in frames_bgr
828+
]
829+
else:
830+
detection_frames_bgr = frames_bgr
831+
696832
# 4. Подгонка матрицы камеры
697833
camera_matrix = adjust_camera_matrix_to_frame_size(
698834
camera_matrix,
@@ -725,34 +861,99 @@ def run_single_frame_pipeline(args):
725861
)
726862
zone_colors_bgr = vivid_palette(len(curved_zone_polygons))
727863

728-
# 7. Инференс
864+
# 7. Инференс на трёх кадрах
729865
model_xml_path = Path(args.model).expanduser().resolve()
866+
867+
all_boxes_full: List[np.ndarray] = []
868+
all_scores: List[np.ndarray] = []
869+
all_class_ids: List[np.ndarray] = []
870+
class_names = None
871+
872+
for det_frame_bgr in detection_frames_bgr:
873+
(
874+
boxes,
875+
scores,
876+
class_ids,
877+
class_names_local,
878+
resize_ratio,
879+
padding_width,
880+
padding_height,
881+
) = run_openvino_inference_on_frame(
882+
det_frame_bgr,
883+
model_xml_path=model_xml_path,
884+
device=args.device,
885+
img_size=args.imgsz,
886+
confidence_threshold=args.conf,
887+
car_only=args.car_only,
888+
)
889+
890+
det_h, det_w = det_frame_bgr.shape[:2]
891+
boxes = restore_boxes_to_original_frame(
892+
boxes,
893+
resize_ratio=resize_ratio,
894+
padding_width=padding_width,
895+
padding_height=padding_height,
896+
frame_width=det_w,
897+
frame_height=det_h,
898+
)
899+
900+
# Если кадр был обрезан по ROI, возвращаемся в координаты полного кадра
901+
if use_crop:
902+
boxes[:, [0, 2]] += crop_x
903+
boxes[:, [1, 3]] += crop_y
904+
905+
all_boxes_full.append(boxes)
906+
all_scores.append(scores)
907+
all_class_ids.append(class_ids)
908+
909+
if class_names is None:
910+
class_names = class_names_local
911+
912+
# 7b. Агрегация: берём боксы, которые попали на 2 или 3 кадра
730913
(
731914
bounding_boxes_xyxy,
732915
detection_scores,
733916
detection_class_ids,
734-
class_names,
735-
resize_ratio,
736-
padding_width,
737-
padding_height,
738-
) = run_openvino_inference_on_frame(
739-
first_frame_bgr,
740-
model_xml_path=model_xml_path,
741-
device=args.device,
742-
img_size=args.imgsz,
743-
confidence_threshold=args.conf,
744-
car_only=args.car_only,
917+
) = aggregate_detections_across_frames(
918+
all_boxes_full,
919+
all_scores,
920+
all_class_ids,
921+
iou_threshold=0.5,
922+
min_appearances=2,
745923
)
746924

747-
# 8. Перенос боксов в координаты оригинального кадра
748-
bounding_boxes_xyxy = restore_boxes_to_original_frame(
749-
bounding_boxes_xyxy,
750-
resize_ratio=resize_ratio,
751-
padding_width=padding_width,
752-
padding_height=padding_height,
753-
frame_width=frame_width,
754-
frame_height=frame_height,
755-
)
925+
# 8. Отладочные кадры до агрегации
926+
if args.debug and args.out_img:
927+
base_out_path = Path(args.out_img)
928+
stem = base_out_path.stem
929+
suffix = base_out_path.suffix or ".jpg"
930+
931+
for idx, (frame_bgr, boxes, scores, class_ids) in enumerate(
932+
zip(frames_bgr, all_boxes_full, all_scores, all_class_ids),
933+
start=1,
934+
):
935+
debug_frame = frame_bgr.copy()
936+
for box, score, cls_id in zip(boxes, scores, class_ids):
937+
if 0 <= int(cls_id) < len(class_names):
938+
cls_name = class_names[int(cls_id)]
939+
else:
940+
cls_name = str(int(cls_id))
941+
score_percent = int(round(float(score) * 100))
942+
label = f"{cls_name} {score_percent}%"
943+
944+
draw_box_with_alpha(
945+
debug_frame,
946+
box,
947+
label,
948+
edge_color_bgr=(0, 255, 0),
949+
fill_color_bgr=None,
950+
alpha=0.0,
951+
thickness=2,
952+
)
953+
954+
debug_path = base_out_path.with_name(f"{stem}_debug{idx}{suffix}")
955+
print(str(debug_path))
956+
cv2.imwrite(str(debug_path), debug_frame)
756957

757958
# 9. Назначение машин зонам
758959
zone_statistics, car_assigned_zone_indices = assign_detections_to_zones(

0 commit comments

Comments
 (0)