forked from lozuwa/impy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGeometricAugmenters.py
executable file
·345 lines (332 loc) · 11.1 KB
/
GeometricAugmenters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
"""
package: Images2Dataset
class: DataAugmentation
Email: [email protected]
Author: Rodrigo Loza
Description: Common data augmentation operations
for an image.
Log:
Novemeber, 2017 -> Re-estructured class.
December, 2017 -> Researched most used data augmentation techniques.
March, 2018 -> Coded methods.
April, 2018 -> Redesigned the methods to support multiple bounding
boxes (traditional data augmentation tools.)
April, 2018 -> Redefined list of augmenters:
---------------
Space dimension
---------------
1. Scaling
Resize image to (h' x w').
2. Translate
Translate an image.
3. Jitter boxes
Draws random color boxes inside the image.
4. Flip horizontally
Flip the image horizontally.
5. Flip vertically
Flip the image vertically.
6. Rotation
Randomly rotates the bounding boxes.
"""
# Libraries
from interface import implements
import math
import random
import cv2
import numpy as np
try:
from .GeometricAugmentersMethods import *
except:
from GeometricAugmentersMethods import *
try:
from .VectorOperations import *
except:
from VectorOperations import *
try:
from .AssertDataTypes import *
except:
from AssertDataTypes import *
class GeometricAugmenters(implements(GeometricAugmentersMethods)):
"""
GeometricAugmenters class. This class implements a set of data augmentation
tools for bouding boxes.
IMPORTANT
- This class assumes input images are numpy tensors that follow the opencv
color format BGR.
"""
def __init__(self):
super(GeometricAugmenters, self).__init__()
self.assertion = AssertDataTypes()
def scale(self, frame = None, size = None, interpolationMethod = None):
"""
Scales an image to another size.
Args:
frame: A tensor that contains an image.
size: A tuple that contains the resizing values.
interpolationMethod: Set the type of interpolation method.
(INTER_NEAREST -> 0,
INTER_LINEAR -> 1,
INTER_CUBIC -> 2,
INTER_LANCZOS4 -> 4)
Returns:
An image that has been scaled.
"""
# Local variable assertions
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
if (size == None):
raise ValueError("size cannot be empty.")
if ((type(size) == tuple) or (type(size) == list)):
pass
else:
raise ValueError("size has to be either a tuple or a list (width, height)")
if (type(size) == list):
size = tuple(size)
if (len(size) != 2):
raise ValueError("size must be a tuple of size 2 (width, height)")
else:
resizeWidth, resizeHeight = size
if (resizeWidth == 0 or resizeHeight == 0):
raise ValueError("Neither width nor height can be 0.")
if (interpolationMethod == None):
interpolationMethod = 2
# Local variables
height, width, depth = frame.shape
reduX = height / resizeHeight
reduY = width / resizeWidth
print(type(size))
# Scale image
frame = cv2.resize(frame.copy(), size, interpolationMethod)
# Return values
return frame
def translate(self, frame = None, offset = None):
"""
Given an image and its bounding boxes, this method translates the bounding boxes
to create an alteration of the image.
Args:
frame: A tensor that contains an image.
offset: A tuple that contains the amoung of space to move on each axis.
(widthXheight)
Returns:
A translated tensor by offset.
"""
# Local variables
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
if (len(frame.shape) == 3):
height, width, depth = frame.shape
elif (len(frame.shape) == 2):
height, width = frame.shape
else:
raise ValueError("Type of data not understood.")
if (offset == None):
raise ValueError("Offset cannot be empty.")
if (len(offset) == 2):
tx, ty = offset
elif (len(offset) == 1):
tx, ty = offset, offset
else:
raise ValueError("offset is not understood.")
# Translate image
M = np.float32([[1, 0, 100], [0, 1, 50]])
frame = cv2.warpAffine(frame, M, (width, height))
return frame
def crop(self, frame = None, size = None):
"""
Apply a cropping transformation to a list of bounding boxes.
Args:
frame: A tensor that contains an image.
size: A 2-length tuple that contains the size of the crops to be performed.
Returns:
A list of lists with the updated coordinates of the bounding boxes after
being cropped.
Example:
- Corner cc has been picked. So:
Original
ca----------cb Crop
| | -----------
| | | |
| | | |
| | | |
cc----------cd -----------
"""
# Assertions.
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
if (size == None):
size = [0, 0]
if ((type(size) == list) or (type(size) == tuple)):
pass
else:
raise TypeError("Size has to be either a list or a tuple.")
if (len(size) != 2):
raise Exception("Size must be of length 2.")
# Local variables.
height, width = frame.shape[0], frame.shape[1]
cropWidth, cropHeight = size
ix, iy, x, y = 0, 0, width, height
# Logic.
if ((cropWidth >= width) or (cropWidth == 0)):
print("WARNING: The specified cropping size for width is bigger than" + \
" the width of the tensor. Setting the cropping width " +\
" to 3/4 of the current tensor. This operation is done for" +\
" only this image.")
cropWidth = int(width*(3/4))
if ((cropHeight >= height) or (cropHeight == 0)):
print("WARNING: The specified cropping size for height is bigger than" + \
" the height of the tensor. Setting the cropping height " +\
" to 3/4 of the current tensor. This operation is done for" +\
" only this image.")
cropHeight = int(height*(3/4))
# Pick one corner randomly.
pickedCorner = int(np.random.rand()*4)
if (pickedCorner == 0):
return frame[iy:iy+cropHeight, ix:ix+cropWidth]
elif (pickedCorner == 1):
return frame[iy:iy+cropHeight, x-cropWidth:x]
elif (pickedCorner == 2):
return frame[y-cropHeight:y, ix:ix+cropWidth]
elif (pickedCorner == 3):
return frame[y-cropHeight:y, x-cropWidth:x]
else:
raise Exception("An unkwon error ocurred.")
def jitterBoxes(self, frame = None, size = None, quantity = None, color = None):
"""
Draws random jitter boxes in the bounding boxes.
Args:
frame: A tensor that contains an image.
size: A tuple that contains the size of the jitter boxes to draw.
quantity: An int that tells how many jitter boxes to create inside
the frame.
color: A 3-sized tuple that contains the RGB code for a color. Default
is black (0,0,0)
Returns:
A tensor that contains an image altered by jitter boxes.
"""
# Assertions
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
if (quantity == None):
quantity = 10
if (size == None):
raise Exception("Size cannot be empty.")
# Local variables
rows, cols, depth = frame.shape
# Create boxes
for i in range(quantity):
y = int(random.random() * rows) - (rows // 3)
x = int(random.random() * cols) - (cols // 3)
# Draw boxes on top of the image
frame = cv2.rectangle(frame, (x, y), (x+size[0], y+size[1]), color, -1)
# Return frame
return frame
def horizontalFlip(self, frame = None):
"""
Flip a frame by its horizontal axis.
Args:
frame: A tensor that contains an image.
Returns:
A tensor that has been flipped by its horizontal axis.
"""
# Assertions
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
# Flip
frame = cv2.flip(frame, 1)
return frame
def verticalFlip(self, frame = None):
"""
Flip a bouding box by its vertical axis.
Args:
frame: A tensor that contains an image.
Returns:
A tensor that has been flipped by its vertical axis.
"""
# Assertions
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
# Flip frame with opencv.
frame = cv2.flip(frame, 0)
return frame
def rotation(self, frame = None, bndbox = None, theta = None):
"""
Rotate a frame clockwise by random degrees. Random degrees
is a number that is between 20-360.
Args:
frame: A tensor that contains an image.
bndbox: A tuple that contains the ix, iy, x, y coordinates
of the bounding box in the image.
theta: An int that contains the amount of degrees to move.
Default is random.
Returns:
A tensor that contains the rotated image and a tuple
that contains the rotated coordinates of the bounding box.
"""
# Assertions
if (self.assertion.assertNumpyType(frame) == False):
raise ValueError("Frame has to be a numpy array.")
if (bndbox == None):
raise Exception("Bnbdbox cannot be empty")
if (theta == None):
theta = (random.random() * math.pi) + math.pi / 3
# Local variables
thetaDegrees = theta * 180 / math.pi
rows, cols, depth = frame.shape
# print("Degrees: ", thetaDegrees)
# print("Rows, cols: ", rows//2, cols//2)
# Decode the bouding box
ix, iy, x, y = bndbox
# print("Original: ", bndbox)
# Fix the y coordinate since matrix transformations
# assume 0,0 is at the left bottom corner.
iy, y = rows-iy, rows-y
# print(ix, iy, x, y)
# Center the coordinates with respect to the
# center of the image.
ix, iy, x, y = ix-(cols//2), iy-(rows//2), x-(cols//2), y-(rows//2)
# print("Centered: ", ix, iy, x, y)
# print(ix, iy, x, y)
# Write down coordinates
p0 = [ix, iy]
p1 = [x, iy]
p2 = [ix, y]
p3 = [x, y]
# Compute rotations on coordinates
p0[0], p0[1] = VectorOperations.rotation_equations(p0[0], p0[1], theta)
p1[0], p1[1] = VectorOperations.rotation_equations(p1[0], p1[1], theta)
p2[0], p2[1] = VectorOperations.rotation_equations(p2[0], p2[1], theta)
p3[0], p3[1] = VectorOperations.rotation_equations(p3[0], p3[1], theta)
# Add centers to compensate
p0[0], p0[1] = p0[0] + (cols//2), rows - (p0[1] + (rows//2))
p1[0], p1[1] = p1[0] + (cols//2), rows - (p1[1] + (rows//2))
p2[0], p2[1] = p2[0] + (cols//2), rows - (p2[1] + (rows//2))
p3[0], p3[1] = p3[0] + (cols//2), rows - (p3[1] + (rows//2))
# Rotate image
M = cv2.getRotationMatrix2D((cols/2, rows/2), thetaDegrees, 1)
frame = cv2.warpAffine(frame, M, (cols, rows))
xs = [p0[0], p1[0], p2[0], p3[0]]
ys = [p0[1], p1[1], p2[1], p3[1]]
ix, x = min(xs), max(xs)
iy, y = min(ys), max(ys)
# print(p0, p1, p2, p3)
# Make sure ix, iy, x, y are valid
if ix < 0:
# If ix is smaller, then it was impossible to place
# the coordinate inside the image because of the angle.
# In this case, the safest option is to set ix to 0.
print("WARNING: ix is negative.", ix)
ix = 0
if iy < 0:
# If iy is smaller, then it was impossible to place
# the coordinate inside the image because of the angle.
# In this case, the safest option is to set iy to 0.
print("WARNING: iy is negative.", iy)
iy = 0
if x >= cols:
print("WARNING: x was the width of the frame.", x, cols)
x = cols - 1
if y >= rows:
print("WARNING: y was the height of the frame.", y, rows)
y = rows - 1
# Return frame and coordinates
return frame, [ix, iy, x, y]