@@ -83,6 +83,11 @@ def fetch_camera_and_calibration(
8383 video_source_url = camera_info ["source" ]
8484 calibration_raw = camera_info ["calib" ]
8585
86+ crop_x = calibration_raw .get ("crop_x" )
87+ crop_y = calibration_raw .get ("crop_y" )
88+ crop_width = calibration_raw .get ("crop_width" )
89+ crop_height = calibration_raw .get ("crop_height" )
90+
8691 (
8792 calibration_image_width ,
8893 calibration_image_height ,
@@ -100,6 +105,10 @@ def fetch_camera_and_calibration(
100105 camera_matrix ,
101106 distortion_coefficients ,
102107 rectified_camera_matrix_opt ,
108+ crop_x ,
109+ crop_y ,
110+ crop_width ,
111+ crop_height ,
103112 )
104113
105114
@@ -493,6 +502,89 @@ def compute_zone_confidences(zone_statistics: List[Dict[str, Any]]) -> None:
493502 weighted_scores_sum += car_info ["score" ] * car_info ["overlap_ratio" ]
494503 zone_info ["confidence" ] = float (weighted_scores_sum / len (cars_in_zone ))
495504
505+ def aggregate_detections_across_frames (
506+ list_of_boxes : List [np .ndarray ],
507+ list_of_scores : List [np .ndarray ],
508+ list_of_class_ids : List [np .ndarray ],
509+ iou_threshold : float = 0.5 ,
510+ min_appearances : int = 2 ,
511+ ) -> Tuple [np .ndarray , np .ndarray , np .ndarray ]:
512+ """
513+ Оставляет только те боксы, которые встретились не меньше min_appearances раз
514+ на разных кадрах. Координаты и score усредняются.
515+ """
516+ clusters : List [Dict [str , Any ]] = []
517+
518+ def iou (box_a , box_b ) -> float :
519+ xa1 , ya1 , xa2 , ya2 = box_a
520+ xb1 , yb1 , xb2 , yb2 = box_b
521+
522+ inter_x1 = max (xa1 , xb1 )
523+ inter_y1 = max (ya1 , yb1 )
524+ inter_x2 = min (xa2 , xb2 )
525+ inter_y2 = min (ya2 , yb2 )
526+
527+ if inter_x2 <= inter_x1 or inter_y2 <= inter_y1 :
528+ return 0.0
529+
530+ inter_area = (inter_x2 - inter_x1 ) * (inter_y2 - inter_y1 )
531+ area_a = (xa2 - xa1 ) * (ya2 - ya1 )
532+ area_b = (xb2 - xb1 ) * (yb2 - yb1 )
533+ union = area_a + area_b - inter_area
534+ if union <= 0.0 :
535+ return 0.0
536+ return inter_area / union
537+
538+ for boxes , scores , class_ids in zip (list_of_boxes , list_of_scores , list_of_class_ids ):
539+ for box , score , cls_id in zip (boxes , scores , class_ids ):
540+ best_cluster = None
541+ best_iou = 0.0
542+ for cluster in clusters :
543+ if cluster ["class_id" ] != int (cls_id ):
544+ continue
545+ cluster_box = cluster ["mean_box" ]
546+ current_iou = iou (cluster_box , box )
547+ if current_iou > best_iou :
548+ best_iou = current_iou
549+ best_cluster = cluster
550+
551+ if best_cluster is None or best_iou < iou_threshold :
552+ clusters .append (
553+ {
554+ "class_id" : int (cls_id ),
555+ "boxes" : [box .astype (float )],
556+ "scores" : [float (score )],
557+ "mean_box" : box .astype (float ),
558+ }
559+ )
560+ else :
561+ best_cluster ["boxes" ].append (box .astype (float ))
562+ best_cluster ["scores" ].append (float (score ))
563+ best_cluster ["mean_box" ] = np .mean (best_cluster ["boxes" ], axis = 0 )
564+
565+ aggregated_boxes = []
566+ aggregated_scores = []
567+ aggregated_class_ids = []
568+
569+ for cluster in clusters :
570+ if len (cluster ["boxes" ]) >= min_appearances :
571+ aggregated_boxes .append (cluster ["mean_box" ])
572+ aggregated_scores .append (np .mean (cluster ["scores" ]))
573+ aggregated_class_ids .append (cluster ["class_id" ])
574+
575+ if not aggregated_boxes :
576+ return (
577+ np .zeros ((0 , 4 ), dtype = np .float32 ),
578+ np .zeros ((0 ,), dtype = np .float32 ),
579+ np .zeros ((0 ,), dtype = np .int32 ),
580+ )
581+
582+ return (
583+ np .stack (aggregated_boxes ).astype (np .float32 ),
584+ np .array (aggregated_scores , dtype = np .float32 ),
585+ np .array (aggregated_class_ids , dtype = np .int32 ),
586+ )
587+
496588
497589def render_visualization_frame (
498590 base_frame_bgr : np .ndarray ,
@@ -687,12 +779,56 @@ def run_single_frame_pipeline(args):
687779 camera_matrix ,
688780 distortion_coefficients ,
689781 rectified_camera_matrix_opt ,
782+ crop_x ,
783+ crop_y ,
784+ crop_width ,
785+ crop_height ,
690786 ) = fetch_camera_and_calibration (http_session , base_api_url )
691787
692- # 3. Первый кадр
693- first_frame_bgr = grab_first_frame (video_source_url )
788+ # 3. Три кадра с интервалом примерно 2 секунды из одного потока
789+ video_capture = cv2 .VideoCapture (video_source_url , cv2 .CAP_FFMPEG )
790+ if not video_capture .isOpened ():
791+ raise RuntimeError (f"cannot open source: { video_source_url } " )
792+
793+ frames_bgr = []
794+ targets = [0.0 , 5.0 , 10.0 ] # целевые моменты (секунды) относительно старта
795+ start_time = time .time ()
796+ current_target_idx = 0
797+
798+ while current_target_idx < len (targets ):
799+ ok , frame = video_capture .read ()
800+ if not ok or frame is None :
801+ raise RuntimeError ("cannot read frame from source" )
802+
803+ now = time .time ()
804+ elapsed = now - start_time
805+
806+ # как только прошли нужные секунды — фиксируем кадр
807+ if elapsed >= targets [current_target_idx ]:
808+ frames_bgr .append (frame .copy ())
809+ current_target_idx += 1
810+
811+ video_capture .release ()
812+
813+ first_frame_bgr = frames_bgr [0 ]
694814 frame_height , frame_width = first_frame_bgr .shape [:2 ]
695815
816+ # 3a. Обрезка кадров по параметрам из calib (если они заданы)
817+ use_crop = (
818+ crop_x is not None
819+ and crop_y is not None
820+ and crop_width is not None
821+ and crop_height is not None
822+ )
823+
824+ if use_crop :
825+ detection_frames_bgr = [
826+ frame [crop_y :crop_y + crop_height , crop_x :crop_x + crop_width ].copy ()
827+ for frame in frames_bgr
828+ ]
829+ else :
830+ detection_frames_bgr = frames_bgr
831+
696832 # 4. Подгонка матрицы камеры
697833 camera_matrix = adjust_camera_matrix_to_frame_size (
698834 camera_matrix ,
@@ -725,34 +861,99 @@ def run_single_frame_pipeline(args):
725861 )
726862 zone_colors_bgr = vivid_palette (len (curved_zone_polygons ))
727863
728- # 7. Инференс
864+ # 7. Инференс на трёх кадрах
729865 model_xml_path = Path (args .model ).expanduser ().resolve ()
866+
867+ all_boxes_full : List [np .ndarray ] = []
868+ all_scores : List [np .ndarray ] = []
869+ all_class_ids : List [np .ndarray ] = []
870+ class_names = None
871+
872+ for det_frame_bgr in detection_frames_bgr :
873+ (
874+ boxes ,
875+ scores ,
876+ class_ids ,
877+ class_names_local ,
878+ resize_ratio ,
879+ padding_width ,
880+ padding_height ,
881+ ) = run_openvino_inference_on_frame (
882+ det_frame_bgr ,
883+ model_xml_path = model_xml_path ,
884+ device = args .device ,
885+ img_size = args .imgsz ,
886+ confidence_threshold = args .conf ,
887+ car_only = args .car_only ,
888+ )
889+
890+ det_h , det_w = det_frame_bgr .shape [:2 ]
891+ boxes = restore_boxes_to_original_frame (
892+ boxes ,
893+ resize_ratio = resize_ratio ,
894+ padding_width = padding_width ,
895+ padding_height = padding_height ,
896+ frame_width = det_w ,
897+ frame_height = det_h ,
898+ )
899+
900+ # Если кадр был обрезан по ROI, возвращаемся в координаты полного кадра
901+ if use_crop :
902+ boxes [:, [0 , 2 ]] += crop_x
903+ boxes [:, [1 , 3 ]] += crop_y
904+
905+ all_boxes_full .append (boxes )
906+ all_scores .append (scores )
907+ all_class_ids .append (class_ids )
908+
909+ if class_names is None :
910+ class_names = class_names_local
911+
912+ # 7b. Агрегация: берём боксы, которые попали на 2 или 3 кадра
730913 (
731914 bounding_boxes_xyxy ,
732915 detection_scores ,
733916 detection_class_ids ,
734- class_names ,
735- resize_ratio ,
736- padding_width ,
737- padding_height ,
738- ) = run_openvino_inference_on_frame (
739- first_frame_bgr ,
740- model_xml_path = model_xml_path ,
741- device = args .device ,
742- img_size = args .imgsz ,
743- confidence_threshold = args .conf ,
744- car_only = args .car_only ,
917+ ) = aggregate_detections_across_frames (
918+ all_boxes_full ,
919+ all_scores ,
920+ all_class_ids ,
921+ iou_threshold = 0.5 ,
922+ min_appearances = 2 ,
745923 )
746924
747- # 8. Перенос боксов в координаты оригинального кадра
748- bounding_boxes_xyxy = restore_boxes_to_original_frame (
749- bounding_boxes_xyxy ,
750- resize_ratio = resize_ratio ,
751- padding_width = padding_width ,
752- padding_height = padding_height ,
753- frame_width = frame_width ,
754- frame_height = frame_height ,
755- )
925+ # 8. Отладочные кадры до агрегации
926+ if args .debug and args .out_img :
927+ base_out_path = Path (args .out_img )
928+ stem = base_out_path .stem
929+ suffix = base_out_path .suffix or ".jpg"
930+
931+ for idx , (frame_bgr , boxes , scores , class_ids ) in enumerate (
932+ zip (frames_bgr , all_boxes_full , all_scores , all_class_ids ),
933+ start = 1 ,
934+ ):
935+ debug_frame = frame_bgr .copy ()
936+ for box , score , cls_id in zip (boxes , scores , class_ids ):
937+ if 0 <= int (cls_id ) < len (class_names ):
938+ cls_name = class_names [int (cls_id )]
939+ else :
940+ cls_name = str (int (cls_id ))
941+ score_percent = int (round (float (score ) * 100 ))
942+ label = f"{ cls_name } { score_percent } %"
943+
944+ draw_box_with_alpha (
945+ debug_frame ,
946+ box ,
947+ label ,
948+ edge_color_bgr = (0 , 255 , 0 ),
949+ fill_color_bgr = None ,
950+ alpha = 0.0 ,
951+ thickness = 2 ,
952+ )
953+
954+ debug_path = base_out_path .with_name (f"{ stem } _debug{ idx } { suffix } " )
955+ print (str (debug_path ))
956+ cv2 .imwrite (str (debug_path ), debug_frame )
756957
757958 # 9. Назначение машин зонам
758959 zone_statistics , car_assigned_zone_indices = assign_detections_to_zones (
0 commit comments