Skip to content

Commit 96f89a9

Browse files
authored
Add files via upload
1 parent 508bd25 commit 96f89a9

10 files changed

+322
-0
lines changed

text-opencv/Images/billboard1.jpeg

181 KB
Loading

text-opencv/Images/billboard2.jpeg

22.8 KB
Loading

text-opencv/Images/car_wash.png

284 KB
Loading

text-opencv/Images/lebron_james.jpg

284 KB
Loading

text-opencv/Images/sign.jpg

32.8 KB
Loading

text-opencv/Images/stadium1.jpeg

893 KB
Loading

text-opencv/Images/stadium2.jpeg

883 KB
Loading

text-opencv/Images/stadium3.jpeg

172 KB
Loading

text-opencv/image_text_detection.py

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# USAGE
2+
# python text_detection.py --image images/lebron_james.jpg --east frozen_east_text_detection.pb
3+
4+
# import the necessary packages
5+
from imutils.object_detection import non_max_suppression
6+
import numpy as np
7+
import argparse
8+
import time
9+
import cv2
10+
11+
# construct the argument parser and parse the arguments
12+
ap = argparse.ArgumentParser()
13+
ap.add_argument("-i", "--image", type=str,
14+
help="path to input image")
15+
ap.add_argument("-east", "--east", type=str,
16+
help="path to input EAST text detector")
17+
ap.add_argument("-c", "--min-confidence", type=float, default=0.5,
18+
help="minimum probability required to inspect a region")
19+
ap.add_argument("-w", "--width", type=int, default=320,
20+
help="resized image width (should be multiple of 32)")
21+
ap.add_argument("-e", "--height", type=int, default=320,
22+
help="resized image height (should be multiple of 32)")
23+
args = vars(ap.parse_args())
24+
25+
# load the input image and grab the image dimensions
26+
image = cv2.imread(args["image"])
27+
orig = image.copy()
28+
(H, W) = image.shape[:2]
29+
30+
# set the new width and height and then determine the ratio in change
31+
# for both the width and height
32+
(newW, newH) = (args["width"], args["height"])
33+
rW = W / float(newW)
34+
rH = H / float(newH)
35+
36+
# resize the image and grab the new image dimensions
37+
image = cv2.resize(image, (newW, newH))
38+
(H, W) = image.shape[:2]
39+
40+
# define the two output layer names for the EAST detector model that
41+
# we are interested -- the first is the output probabilities and the
42+
# second can be used to derive the bounding box coordinates of text
43+
layerNames = [
44+
"feature_fusion/Conv_7/Sigmoid",
45+
"feature_fusion/concat_3"]
46+
47+
# load the pre-trained EAST text detector
48+
print("[INFO] loading EAST text detector...")
49+
net = cv2.dnn.readNet(args["east"])
50+
51+
# construct a blob from the image and then perform a forward pass of
52+
# the model to obtain the two output layer sets
53+
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
54+
(123.68, 116.78, 103.94), swapRB=True, crop=False)
55+
start = time.time()
56+
net.setInput(blob)
57+
(scores, geometry) = net.forward(layerNames)
58+
end = time.time()
59+
60+
# show timing information on text prediction
61+
print("[INFO] text detection took {:.6f} seconds".format(end - start))
62+
63+
# grab the number of rows and columns from the scores volume, then
64+
# initialize our set of bounding box rectangles and corresponding
65+
# confidence scores
66+
(numRows, numCols) = scores.shape[2:4]
67+
rects = []
68+
confidences = []
69+
70+
# loop over the number of rows
71+
for y in range(0, numRows):
72+
# extract the scores (probabilities), followed by the geometrical
73+
# data used to derive potential bounding box coordinates that
74+
# surround text
75+
scoresData = scores[0, 0, y]
76+
xData0 = geometry[0, 0, y]
77+
xData1 = geometry[0, 1, y]
78+
xData2 = geometry[0, 2, y]
79+
xData3 = geometry[0, 3, y]
80+
anglesData = geometry[0, 4, y]
81+
82+
# loop over the number of columns
83+
for x in range(0, numCols):
84+
# if our score does not have sufficient probability, ignore it
85+
if scoresData[x] < args["min_confidence"]:
86+
continue
87+
88+
# compute the offset factor as our resulting feature maps will
89+
# be 4x smaller than the input image
90+
(offsetX, offsetY) = (x * 4.0, y * 4.0)
91+
92+
# extract the rotation angle for the prediction and then
93+
# compute the sin and cosine
94+
angle = anglesData[x]
95+
cos = np.cos(angle)
96+
sin = np.sin(angle)
97+
98+
# use the geometry volume to derive the width and height of
99+
# the bounding box
100+
h = xData0[x] + xData2[x]
101+
w = xData1[x] + xData3[x]
102+
103+
# compute both the starting and ending (x, y)-coordinates for
104+
# the text prediction bounding box
105+
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
106+
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
107+
startX = int(endX - w)
108+
startY = int(endY - h)
109+
110+
# add the bounding box coordinates and probability score to
111+
# our respective lists
112+
rects.append((startX, startY, endX, endY))
113+
confidences.append(scoresData[x])
114+
115+
# apply non-maxima suppression to suppress weak, overlapping bounding
116+
# boxes
117+
boxes = non_max_suppression(np.array(rects), probs=confidences)
118+
119+
# loop over the bounding boxes
120+
for (startX, startY, endX, endY) in boxes:
121+
# scale the bounding box coordinates based on the respective
122+
# ratios
123+
startX = int(startX * rW)
124+
startY = int(startY * rH)
125+
endX = int(endX * rW)
126+
endY = int(endY * rH)
127+
128+
# draw the bounding box on the image
129+
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
130+
131+
# show the output image
132+
cv2.imshow("Text Detection", orig)
133+
cv2.waitKey(0)

text-opencv/video_text_detection.py

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# USAGE
2+
# python text_detection_video.py --east frozen_east_text_detection.pb
3+
4+
# import the necessary packages
5+
from imutils.video import VideoStream
6+
from imutils.video import FPS
7+
from imutils.object_detection import non_max_suppression
8+
import numpy as np
9+
import argparse
10+
import imutils
11+
import time
12+
import cv2
13+
14+
def decode_predictions(scores, geometry):
15+
# grab the number of rows and columns from the scores volume, then
16+
# initialize our set of bounding box rectangles and corresponding
17+
# confidence scores
18+
(numRows, numCols) = scores.shape[2:4]
19+
rects = []
20+
confidences = []
21+
22+
# loop over the number of rows
23+
for y in range(0, numRows):
24+
# extract the scores (probabilities), followed by the
25+
# geometrical data used to derive potential bounding box
26+
# coordinates that surround text
27+
scoresData = scores[0, 0, y]
28+
xData0 = geometry[0, 0, y]
29+
xData1 = geometry[0, 1, y]
30+
xData2 = geometry[0, 2, y]
31+
xData3 = geometry[0, 3, y]
32+
anglesData = geometry[0, 4, y]
33+
34+
# loop over the number of columns
35+
for x in range(0, numCols):
36+
# if our score does not have sufficient probability,
37+
# ignore it
38+
if scoresData[x] < args["min_confidence"]:
39+
continue
40+
41+
# compute the offset factor as our resulting feature
42+
# maps will be 4x smaller than the input image
43+
(offsetX, offsetY) = (x * 4.0, y * 4.0)
44+
45+
# extract the rotation angle for the prediction and
46+
# then compute the sin and cosine
47+
angle = anglesData[x]
48+
cos = np.cos(angle)
49+
sin = np.sin(angle)
50+
51+
# use the geometry volume to derive the width and height
52+
# of the bounding box
53+
h = xData0[x] + xData2[x]
54+
w = xData1[x] + xData3[x]
55+
56+
# compute both the starting and ending (x, y)-coordinates
57+
# for the text prediction bounding box
58+
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
59+
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
60+
startX = int(endX - w)
61+
startY = int(endY - h)
62+
63+
# add the bounding box coordinates and probability score
64+
# to our respective lists
65+
rects.append((startX, startY, endX, endY))
66+
confidences.append(scoresData[x])
67+
68+
# return a tuple of the bounding boxes and associated confidences
69+
return (rects, confidences)
70+
71+
# construct the argument parser and parse the arguments
72+
ap = argparse.ArgumentParser()
73+
ap.add_argument("-east", "--east", type=str, required=True,
74+
help="path to input EAST text detector")
75+
ap.add_argument("-v", "--video", type=str,
76+
help="path to optinal input video file")
77+
ap.add_argument("-c", "--min-confidence", type=float, default=0.5,
78+
help="minimum probability required to inspect a region")
79+
ap.add_argument("-w", "--width", type=int, default=320,
80+
help="resized image width (should be multiple of 32)")
81+
ap.add_argument("-e", "--height", type=int, default=320,
82+
help="resized image height (should be multiple of 32)")
83+
args = vars(ap.parse_args())
84+
85+
# initialize the original frame dimensions, new frame dimensions,
86+
# and ratio between the dimensions
87+
(W, H) = (None, None)
88+
(newW, newH) = (args["width"], args["height"])
89+
(rW, rH) = (None, None)
90+
91+
# define the two output layer names for the EAST detector model that
92+
# we are interested -- the first is the output probabilities and the
93+
# second can be used to derive the bounding box coordinates of text
94+
layerNames = [
95+
"feature_fusion/Conv_7/Sigmoid",
96+
"feature_fusion/concat_3"]
97+
98+
# load the pre-trained EAST text detector
99+
print("[INFO] loading EAST text detector...")
100+
net = cv2.dnn.readNet(args["east"])
101+
102+
# if a video path was not supplied, grab the reference to the web cam
103+
if not args.get("video", False):
104+
print("[INFO] starting video stream...")
105+
vs = VideoStream(src=0).start()
106+
time.sleep(1.0)
107+
108+
# otherwise, grab a reference to the video file
109+
else:
110+
vs = cv2.VideoCapture(args["video"])
111+
112+
# start the FPS throughput estimator
113+
fps = FPS().start()
114+
115+
# loop over frames from the video stream
116+
while True:
117+
# grab the current frame, then handle if we are using a
118+
# VideoStream or VideoCapture object
119+
frame = vs.read()
120+
frame = frame[1] if args.get("video", False) else frame
121+
122+
# check to see if we have reached the end of the stream
123+
if frame is None:
124+
break
125+
126+
# resize the frame, maintaining the aspect ratio
127+
frame = imutils.resize(frame, width=1000)
128+
orig = frame.copy()
129+
130+
# if our frame dimensions are None, we still need to compute the
131+
# ratio of old frame dimensions to new frame dimensions
132+
if W is None or H is None:
133+
(H, W) = frame.shape[:2]
134+
rW = W / float(newW)
135+
rH = H / float(newH)
136+
137+
# resize the frame, this time ignoring aspect ratio
138+
frame = cv2.resize(frame, (newW, newH))
139+
140+
# construct a blob from the frame and then perform a forward pass
141+
# of the model to obtain the two output layer sets
142+
blob = cv2.dnn.blobFromImage(frame, 1.0, (newW, newH),
143+
(123.68, 116.78, 103.94), swapRB=True, crop=False)
144+
net.setInput(blob)
145+
(scores, geometry) = net.forward(layerNames)
146+
147+
# decode the predictions, then apply non-maxima suppression to
148+
# suppress weak, overlapping bounding boxes
149+
(rects, confidences) = decode_predictions(scores, geometry)
150+
boxes = non_max_suppression(np.array(rects), probs=confidences)
151+
152+
# loop over the bounding boxes
153+
for (startX, startY, endX, endY) in boxes:
154+
# scale the bounding box coordinates based on the respective
155+
# ratios
156+
startX = int(startX * rW)
157+
startY = int(startY * rH)
158+
endX = int(endX * rW)
159+
endY = int(endY * rH)
160+
161+
# draw the bounding box on the frame
162+
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
163+
164+
# update the FPS counter
165+
fps.update()
166+
167+
# show the output frame
168+
cv2.imshow("Text Detection", orig)
169+
key = cv2.waitKey(1) & 0xFF
170+
171+
# if the `q` key was pressed, break from the loop
172+
if key == ord("q"):
173+
break
174+
175+
# stop the timer and display FPS information
176+
fps.stop()
177+
print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
178+
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
179+
180+
# if we are using a webcam, release the pointer
181+
if not args.get("video", False):
182+
vs.stop()
183+
184+
# otherwise, release the file pointer
185+
else:
186+
vs.release()
187+
188+
# close all windows
189+
cv2.destroyAllWindows()

0 commit comments

Comments
 (0)