Skip to content

Commit c9bbd2c

Browse files
committed
First commit
0 parents  commit c9bbd2c

22 files changed

+2032
-0
lines changed

Readme.md

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Deep Visual Miner for photos and videos
2+
3+
Deep visual mining for your photos and videos using YOLOv2 deep convolutional neural network based object detector
4+
and traditional face recognition algorithms.
5+
6+
## Download the Docker image
7+
8+
```
9+
docker pull pathbreak/deepvisualminer
10+
11+
```
12+
13+
## Build your own Visual Miner Docker image
14+
15+
If you don't want to pull my Docker image published at https://hub.docker.com/r/pathbreak/deepvisualminer/,
16+
you can build it on your own machine locally.
17+
18+
1. Clone deepvisualminer project repo from GitHub:
19+
20+
```
21+
git clone https://github.com/pathbreak/deepvisualminer
22+
```
23+
24+
The files required for building docker image are in `docker-deepvisualminer` subdirectory:
25+
26+
2. First, you need to build or download prerequisites.
27+
The following script builds Darkflow in a temporary container and downloads OpenCV detector data files:
28+
29+
```
30+
cd deepvisualminer/docker-deepvisualminer
31+
chmod +x *.sh
32+
sudo ./build-image.sh
33+
```
34+
35+
This command launches a temporary container just to build the Darkflow object detection project.
36+
37+
It's done in a separate container to avoid bloating up the primary deepvisualminer docker image
38+
with build tools and artifacts.
39+
40+
This script also downloads pretrained neural network weight files shared by author via Google Drive.
41+
42+
Once the build is complete, the complete Darkflow release directory is copied under host system's `./host/` directory.
43+
44+
3. Finally build the primary deepvisualminer docker image.
45+
46+
```
47+
sudo docker build -t deepvisualminer .
48+
```
49+
50+
4. Verify that the image is created.
51+
52+
```
53+
sudo docker images
54+
```

annotatedframewriter.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from annotator import annotate
2+
from basecomponent import BaseComponent
3+
4+
import imageio
5+
import cv2
6+
import os.path
7+
8+
class AnnotatedFrameWriter(BaseComponent):
9+
def __init__(self, cfg):
10+
BaseComponent.__init__(self, cfg)
11+
12+
def execute(self, input_data, input_directory, output_directory):
13+
if not input_data['isvideo']:
14+
return {}
15+
16+
img = input_data['img'].copy()
17+
18+
for comp in self.cfg['inputs']:
19+
comp_outputs = input_data.get(comp)
20+
comp_reports = comp_outputs['reports']
21+
if not comp_reports:
22+
print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
23+
comp, self.name, comp
24+
))
25+
continue
26+
27+
annotate(img, comp_reports)
28+
29+
# The output directory structure should match input directory structure.
30+
relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
31+
relparent_of_input_file = os.path.dirname(relpath_of_input_file)
32+
inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
33+
34+
output_filedir = os.path.join(output_directory, relparent_of_input_file)
35+
if not os.path.exists(output_filedir):
36+
os.makedirs(output_filedir)
37+
38+
output_filepath = os.path.join(output_filedir,
39+
inp_filename + '-frame-' + str(input_data['frame']) + '-annotated.' + self.cfg['params']['format'])
40+
41+
final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
42+
43+
print(output_filepath)
44+
imageio.imwrite(output_filepath, final_img)
45+
46+
return {'file':output_filepath}
47+
48+

annotatedphotowriter.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from annotator import annotate
2+
from basecomponent import BaseComponent
3+
4+
import imageio
5+
import cv2
6+
import os.path
7+
8+
class AnnotatedPhotoWriter(BaseComponent):
9+
def __init__(self, cfg):
10+
BaseComponent.__init__(self, cfg)
11+
12+
def execute(self, input_data, input_directory, output_directory):
13+
if not input_data['isphoto']:
14+
return {}
15+
16+
img = input_data['img'].copy()
17+
18+
for comp in self.cfg['inputs']:
19+
comp_outputs = input_data.get(comp)
20+
comp_reports = comp_outputs['reports']
21+
if not comp_reports:
22+
print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
23+
comp, self.name, comp
24+
))
25+
continue
26+
27+
annotate(img, comp_reports)
28+
29+
# The output directory structure should match input directory structure.
30+
relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
31+
relparent_of_input_file = os.path.dirname(relpath_of_input_file)
32+
inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
33+
34+
output_filedir = os.path.join(output_directory, relparent_of_input_file)
35+
if not os.path.exists(output_filedir):
36+
os.makedirs(output_filedir)
37+
38+
output_filepath = os.path.join(output_filedir,
39+
inp_filename + '-annotated.' + self.cfg['params']['format'])
40+
41+
final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
42+
43+
print(output_filepath)
44+
imageio.imwrite(output_filepath, final_img)
45+
46+
return {'file':output_filepath}

annotatedvideowriter.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from annotator import annotate
2+
from basecomponent import BaseComponent
3+
4+
import imageio
5+
import cv2
6+
import os.path
7+
8+
class AnnotatedVideoWriter(BaseComponent):
9+
'''
10+
This is a outputter component that writes annotated frames to an output video.
11+
Unlike other components, design of this component has to be stateful - it opens output
12+
video stream when it receives first frame and then keeps it open writing each frame to the video
13+
till completed notification is received.
14+
'''
15+
16+
def __init__(self, cfg):
17+
BaseComponent.__init__(self, cfg)
18+
19+
self.output_video = None
20+
self.output_filepath = None
21+
22+
23+
def execute(self, input_data, input_directory, output_directory):
24+
if not input_data['isvideo']:
25+
return {}
26+
27+
# Open output video stream if this is first frame.
28+
if input_data['frame'] == 0:
29+
# The output directory structure should match input directory structure.
30+
relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
31+
relparent_of_input_file = os.path.dirname(relpath_of_input_file)
32+
inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
33+
34+
output_filedir = os.path.join(output_directory, relparent_of_input_file)
35+
if not os.path.exists(output_filedir):
36+
os.makedirs(output_filedir)
37+
38+
self.output_filepath = os.path.join(output_filedir,
39+
inp_filename + '-annotated.' + self.cfg['params']['format'])
40+
41+
self.output_video = imageio.get_writer(self.output_filepath, 'ffmpeg')
42+
43+
44+
45+
img = input_data['img'].copy()
46+
47+
for comp in self.cfg['inputs']:
48+
comp_outputs = input_data.get(comp)
49+
comp_reports = comp_outputs['reports']
50+
if not comp_reports:
51+
print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
52+
comp, self.name, comp
53+
))
54+
continue
55+
56+
annotate(img, comp_reports)
57+
58+
59+
final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
60+
61+
self.output_video.append_data(final_img)
62+
63+
return {'file': self.output_filepath}
64+
65+
66+
def completed(self, input_data, input_directory, output_directory):
67+
if self.output_video:
68+
self.output_video.close()
69+
70+
self.output_video = None
71+
self.output_filepath = None
72+

annotator.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import cv2
2+
3+
def annotate(img, comp_reports):
4+
'''
5+
Draws boxes and label text around a detection region.
6+
Caution: Since img is overwritten, ensure caller passes a copy instead of original image.
7+
'''
8+
for r in comp_reports:
9+
rect = r['rect']
10+
cv2.rectangle(img, (rect[0], rect[1]), (rect[2], rect[3]), (255,255,255), 2)
11+
cv2.putText(img, r['labels'][0]['label'], (rect[0], rect[1]-5), cv2.FONT_HERSHEY_PLAIN, 2.0, (255,255,255), 2)

basecomponent.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
class BaseComponent(object):
2+
'''
3+
Base class for all components. Host for common helpers instead
4+
of repeating in each subclass.
5+
'''
6+
def __init__(self, cfg):
7+
self.cfg = cfg
8+
self.name = cfg['name']
9+
10+
11+
def execute(self, input_data, input_directory, output_directory):
12+
'''
13+
Every component should perform its primary operation - detection, recognition or
14+
file writing - in execute().
15+
16+
Returns: a dict of the component's outputs, where keys are 'reports', 'annotatedimages',
17+
'annotatedframes', etc.
18+
19+
Each report should be like this
20+
[
21+
{'labels':[{'label':'cat', 'confidence':0.8}, {'label':'lion', 'confidence':0.3}], 'rect':[x1,y1,x2,y2] },
22+
{'labels':['dog','sheep'], 'rect':[x1,y1,x2,y2], 'confidence':0.8}
23+
]
24+
confidence values are optional.
25+
The coordinates should always be full image coordinates even if input to the component was ROI output of another
26+
component.
27+
'''
28+
29+
pass
30+
31+
32+
33+
34+
def completed(self, input_data, input_directory, output_directory):
35+
'''
36+
Some components need to know when processing of input file
37+
has completed.
38+
'''
39+
pass
40+

deepdetector.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from net.build import TFNet
2+
from basecomponent import BaseComponent
3+
from annotator import annotate
4+
5+
class DeepDetector(BaseComponent):
6+
'''
7+
A DeepDetector uses a YOLOv2 convolutional neural network model for
8+
object detection.
9+
'''
10+
11+
def __init__(self, cfg):
12+
BaseComponent.__init__(self, cfg)
13+
14+
params = self.cfg['params']
15+
16+
tfnet_cfg = {
17+
"model": params['model'],
18+
"load": params['weights'],
19+
"config" : '/root/darkflow/cfg',
20+
"verbalise" : True,
21+
"threshold": 0.1
22+
}
23+
24+
self.nn = TFNet(tfnet_cfg)
25+
26+
27+
def execute(self, input_data, input_directory, output_directory):
28+
29+
# Check what configured inputs are - whether complete image or ROIs output by some
30+
# other components.
31+
all_detections = []
32+
for source in self.cfg['inputs']:
33+
if source == 'files':
34+
detections = self.detect_in_image(input_data)
35+
all_detections.extend(detections)
36+
37+
else:
38+
triggerlabels = self.cfg['params'].get('triggerlabels')
39+
if not triggerlabels:
40+
print("Warning: pipeline file specifies {} in inputs but there are no triggerlabels in params".format(source))
41+
continue
42+
43+
comp_outputs = input_data.get(source)
44+
if comp_outputs:
45+
comp_reports = comp_outputs['reports']
46+
detections = self.detect_in_rois(self, input_data, comp_reports)
47+
all_detections.extend(detections)
48+
49+
# Each detection is of the form
50+
# {"label":"person", "confidence": 0.56, "topleft": {"x": 184, "y": 101}, "bottomright": {"x": 274, "y": 382}}
51+
# These should be transformed to our preferred JSON output documented in basecomponent.py
52+
53+
reports = []
54+
for d in all_detections:
55+
r = {
56+
'labels' : [
57+
{
58+
'label' : d['label'],
59+
# The float() here is because that confidence value is actually a np.float32
60+
# and that creates serialization typeerror problems while writing report to
61+
# json.
62+
'confidence' : float(d['confidence'])
63+
}
64+
],
65+
'rect' : [
66+
d['topleft']['x'],
67+
d['topleft']['y'],
68+
d['bottomright']['x'],
69+
d['bottomright']['y'],
70+
]
71+
}
72+
73+
reports.append(r)
74+
75+
results = {
76+
'reports' : reports
77+
}
78+
79+
80+
print(results)
81+
return results
82+
83+
84+
85+
86+
def detect_in_image(self, input_data):
87+
detections = self.nn.return_predict(input_data['img'])
88+
return detections
89+
90+
91+
92+
93+
def detect_in_rois(self, input_data, comp_reports):
94+
img = input_data['img']
95+
roi_detections = []
96+
97+
for r in comp_reports:
98+
99+
if ('all' in self.cfg['params']['triggerlabels']) or \
100+
any( [ l['label'] in self.cfg['params']['triggerlabels'] for l in r['labels'] ] ) :
101+
102+
rect = r['rect']
103+
x_offset = rect[0]
104+
y_offset = rect[1]
105+
roi = img[ rect[1]:rect[3], rect[0]:rect[2], :]
106+
107+
detections = self.nn.return_predict(roi)
108+
# These detections in ROI are relative to ROI. So we must add ROI origin to
109+
# those coordinates to make them full image coordinates.
110+
for d in detections:
111+
d['topleft']['x'] += x_offset
112+
d['bottomright']['x'] += x_offset
113+
114+
d['topleft']['y'] += y_offset
115+
d['bottomright']['y'] += y_offset
116+
117+
roi_detections.extend(detections)
118+
119+
return roi_detections

0 commit comments

Comments
 (0)