1
+ # USAGE
2
+ # python real_time_object_detection.py --prototxt MobileNetSSD_deploy.prototxt.txt --model MobileNetSSD_deploy.caffemodel
3
+
4
+ # import the necessary packages
5
+ from imutils .video import VideoStream
6
+ from imutils .video import FPS
7
+ import numpy as np
8
+ import argparse
9
+ import imutils
10
+ import time
11
+ import cv2
12
+
13
+ # construct the argument parse and parse the arguments
14
+ ap = argparse .ArgumentParser ()
15
+ ap .add_argument ("-p" , "--prototxt" , required = True ,
16
+ help = "path to Caffe 'deploy' prototxt file" )
17
+ ap .add_argument ("-m" , "--model" , required = True ,
18
+ help = "path to Caffe pre-trained model" )
19
+ ap .add_argument ("-c" , "--confidence" , type = float , default = 0.2 ,
20
+ help = "minimum probability to filter weak detections" )
21
+ args = vars (ap .parse_args ())
22
+
23
+ # initialize the list of class labels MobileNet SSD was trained to
24
+ # detect, then generate a set of bounding box colors for each class
25
+ CLASSES = ["background" , "aeroplane" , "bicycle" , "bird" , "boat" ,
26
+ "bottle" , "bus" , "car" , "cat" , "chair" , "cow" , "diningtable" ,
27
+ "dog" , "horse" , "motorbike" , "person" , "pottedplant" , "sheep" ,
28
+ "sofa" , "train" , "tvmonitor" ]
29
+ COLORS = np .random .uniform (0 , 255 , size = (len (CLASSES ), 3 ))
30
+
31
+ # load our serialized model from disk
32
+ print ("[INFO] loading model..." )
33
+ net = cv2 .dnn .readNetFromCaffe (args ["prototxt" ], args ["model" ])
34
+
35
+ # initialize the video stream, allow the cammera sensor to warmup,
36
+ # and initialize the FPS counter
37
+ print ("[INFO] starting video stream..." )
38
+ vs = VideoStream (src = 0 ).start ()
39
+ time .sleep (2.0 )
40
+ fps = FPS ().start ()
41
+
42
+ # loop over the frames from the video stream
43
+ while True :
44
+ # grab the frame from the threaded video stream and resize it
45
+ # to have a maximum width of 400 pixels
46
+ frame = vs .read ()
47
+ frame = imutils .resize (frame , width = 400 )
48
+
49
+ # grab the frame dimensions and convert it to a blob
50
+ (h , w ) = frame .shape [:2 ]
51
+ blob = cv2 .dnn .blobFromImage (cv2 .resize (frame , (300 , 300 )),
52
+ 0.007843 , (300 , 300 ), 127.5 )
53
+
54
+ # pass the blob through the network and obtain the detections and
55
+ # predictions
56
+ net .setInput (blob )
57
+ detections = net .forward ()
58
+
59
+ # loop over the detections
60
+ for i in np .arange (0 , detections .shape [2 ]):
61
+ # extract the confidence (i.e., probability) associated with
62
+ # the prediction
63
+ confidence = detections [0 , 0 , i , 2 ]
64
+ print (type (confidence ))
65
+
66
+ # filter out weak detections by ensuring the `confidence` is
67
+ # greater than the minimum confidence
68
+ if confidence > args ["confidence" ]:
69
+ # extract the index of the class label from the
70
+ # `detections`, then compute the (x, y)-coordinates of
71
+ # the bounding box for the object
72
+ idx = int (detections [0 , 0 , i , 1 ])
73
+ box = detections [0 , 0 , i , 3 :7 ] * np .array ([w , h , w , h ])
74
+ (startX , startY , endX , endY ) = box .astype ("int" )
75
+
76
+ # draw the prediction on the frame
77
+ label = "{}: {:.2f}%" .format (CLASSES [idx ],
78
+ confidence * 100 )
79
+ cv2 .rectangle (frame , (startX , startY ), (endX , endY ),
80
+ COLORS [idx ], 2 )
81
+ y = startY - 15 if startY - 15 > 15 else startY + 15
82
+ cv2 .putText (frame , label , (startX , y ),
83
+ cv2 .FONT_HERSHEY_SIMPLEX , 0.5 , COLORS [idx ], 2 )
84
+
85
+ # show the output frame
86
+ cv2 .imshow ("Frame" , frame )
87
+ key = cv2 .waitKey (1 ) & 0xFF
88
+
89
+ # if the `q` key was pressed, break from the loop
90
+ if key == ord ("q" ):
91
+ break
92
+
93
+ # update the FPS counter
94
+ fps .update ()
95
+
96
+ # stop the timer and display FPS information
97
+ fps .stop ()
98
+ print ("[INFO] elapsed time: {:.2f}" .format (fps .elapsed ()))
99
+ print ("[INFO] approx. FPS: {:.2f}" .format (fps .fps ()))
100
+
101
+ # do a bit of cleanup
102
+ cv2 .destroyAllWindows ()
103
+ vs .stop ()
0 commit comments