pathbreak
diff --git a/‎Readme.md
+54 b/‎Readme.md
+54
diff --git a/‎annotatedframewriter.py
+48 b/‎annotatedframewriter.py
+48
diff --git a/‎annotatedphotowriter.py
+46 b/‎annotatedphotowriter.py
+46
diff --git a/‎annotatedvideowriter.py
+72 b/‎annotatedvideowriter.py
+72
diff --git a/‎annotator.py
+11 b/‎annotator.py
+11
diff --git a/‎basecomponent.py
+40 b/‎basecomponent.py
+40
diff --git a/‎deepdetector.py
+119 b/‎deepdetector.py
+119
@@ -0,0 +1,54 @@
+# Deep Visual Miner for photos and videos
+
+Deep visual mining for your photos and videos using YOLOv2 deep convolutional neural network based object detector 
+and traditional face recognition algorithms.
+
+## Download the Docker image
+
+ ```
+   docker pull pathbreak/deepvisualminer
+   
+ ```
+
+## Build your own Visual Miner Docker image
+
+If you don't want to pull my Docker image published at https://hub.docker.com/r/pathbreak/deepvisualminer/,
+you can build it on your own machine locally.
+
+1. Clone deepvisualminer project repo from GitHub:
+
+ ```
+ git clone https://github.com/pathbreak/deepvisualminer
+ ```
+ 
+ The files required for building docker image are in `docker-deepvisualminer` subdirectory:
+ 
+2. First, you need to build or download prerequisites. 
+   The following script builds Darkflow in a temporary container and downloads OpenCV detector data files:
+
+ ```
+ cd deepvisualminer/docker-deepvisualminer
+ chmod +x *.sh
+ sudo ./build-image.sh
+ ```
+ 
+  This command launches a temporary container just to build the Darkflow object detection project.
+  
+  It's done in a separate container to avoid bloating up the primary deepvisualminer docker image
+  with build tools and artifacts.
+  
+  This script also downloads pretrained neural network weight files shared by author via Google Drive.
+  
+  Once the build is complete, the complete Darkflow release directory is copied under host system's `./host/` directory.
+  
+3. Finally build the primary deepvisualminer docker image.
+
+ ```
+ sudo docker build -t deepvisualminer .
+ ```
+
+4. Verify that the image is created.
+
+ ```
+ sudo docker images
+ ```
@@ -0,0 +1,48 @@
+from annotator import annotate
+from basecomponent import BaseComponent
+
+import imageio
+import cv2
+import os.path
+
+class AnnotatedFrameWriter(BaseComponent):
+    def __init__(self, cfg):
+        BaseComponent.__init__(self, cfg)
+        
+    def execute(self, input_data, input_directory, output_directory):
+        if not input_data['isvideo']:
+            return {}
+        
+        img = input_data['img'].copy()
+        
+        for comp in self.cfg['inputs']:
+            comp_outputs = input_data.get(comp)
+            comp_reports = comp_outputs['reports']
+            if not comp_reports:
+                print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
+                    comp, self.name, comp
+                ))
+                continue
+            
+            annotate(img, comp_reports)
+        
+        # The output directory structure should match input directory structure.
+        relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
+        relparent_of_input_file = os.path.dirname(relpath_of_input_file)
+        inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
+        
+        output_filedir = os.path.join(output_directory, relparent_of_input_file)
+        if not os.path.exists(output_filedir):
+            os.makedirs(output_filedir)
+            
+        output_filepath =  os.path.join(output_filedir,
+            inp_filename + '-frame-' + str(input_data['frame']) + '-annotated.' + self.cfg['params']['format'])
+            
+        final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
+            
+        print(output_filepath)
+        imageio.imwrite(output_filepath, final_img)
+        
+        return {'file':output_filepath}
+                
+        
@@ -0,0 +1,46 @@
+from annotator import annotate
+from basecomponent import BaseComponent
+
+import imageio
+import cv2
+import os.path
+
+class AnnotatedPhotoWriter(BaseComponent):
+    def __init__(self, cfg):
+        BaseComponent.__init__(self, cfg)
+        
+    def execute(self, input_data, input_directory, output_directory):
+        if not input_data['isphoto']:
+            return {}
+        
+        img = input_data['img'].copy()
+        
+        for comp in self.cfg['inputs']:
+            comp_outputs = input_data.get(comp)
+            comp_reports = comp_outputs['reports']
+            if not comp_reports:
+                print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
+                    comp, self.name, comp
+                ))
+                continue
+            
+            annotate(img, comp_reports)
+        
+        # The output directory structure should match input directory structure.
+        relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
+        relparent_of_input_file = os.path.dirname(relpath_of_input_file)
+        inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
+        
+        output_filedir = os.path.join(output_directory, relparent_of_input_file)
+        if not os.path.exists(output_filedir):
+            os.makedirs(output_filedir)
+            
+        output_filepath =  os.path.join(output_filedir,
+            inp_filename + '-annotated.' + self.cfg['params']['format'])
+            
+        final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
+            
+        print(output_filepath)
+        imageio.imwrite(output_filepath, final_img)
+                
+        return {'file':output_filepath}
@@ -0,0 +1,72 @@
+from annotator import annotate
+from basecomponent import BaseComponent
+
+import imageio
+import cv2
+import os.path
+
+class AnnotatedVideoWriter(BaseComponent):
+    '''
+    This is a outputter component that writes annotated frames to an output video.
+    Unlike other components, design of this component has to be stateful - it opens output 
+    video stream when it receives first frame and then keeps it open writing each frame to the video
+    till completed notification is received.
+    '''
+    
+    def __init__(self, cfg):
+        BaseComponent.__init__(self, cfg)
+        
+        self.output_video = None
+        self.output_filepath = None
+        
+        
+    def execute(self, input_data, input_directory, output_directory):
+        if not input_data['isvideo']:
+            return {}
+            
+        # Open output video stream if this is first frame.
+        if input_data['frame'] == 0:
+            # The output directory structure should match input directory structure.
+            relpath_of_input_file = os.path.relpath(input_data['file'], input_directory)
+            relparent_of_input_file = os.path.dirname(relpath_of_input_file)
+            inp_filename,inp_extension = os.path.splitext(os.path.basename(relpath_of_input_file))
+            
+            output_filedir = os.path.join(output_directory, relparent_of_input_file)
+            if not os.path.exists(output_filedir):
+                os.makedirs(output_filedir)
+                
+            self.output_filepath =  os.path.join(output_filedir,
+                inp_filename + '-annotated.' + self.cfg['params']['format'])
+
+            self.output_video = imageio.get_writer(self.output_filepath, 'ffmpeg')
+            
+            
+            
+        img = input_data['img'].copy()
+        
+        for comp in self.cfg['inputs']:
+            comp_outputs = input_data.get(comp)
+            comp_reports = comp_outputs['reports']
+            if not comp_reports:
+                print("Warning: pipeline file specifies {} as input for {} but {} is not outputting any location reports".format(
+                    comp, self.name, comp
+                ))
+                continue
+            
+            annotate(img, comp_reports)
+        
+            
+        final_img = cv2.resize(img, (self.cfg['params']['size']['width'], self.cfg['params']['size']['height']))
+            
+        self.output_video.append_data(final_img)
+        
+        return {'file': self.output_filepath}
+                
+                
+    def completed(self, input_data, input_directory, output_directory):
+        if self.output_video:
+            self.output_video.close()
+        
+        self.output_video = None
+        self.output_filepath = None
+        
@@ -0,0 +1,11 @@
+import cv2
+
+def annotate(img, comp_reports):
+    '''
+    Draws boxes and label text around a detection region.
+    Caution: Since img is overwritten, ensure caller passes a copy instead of original image.
+    '''
+    for r in comp_reports:
+        rect = r['rect']
+        cv2.rectangle(img, (rect[0], rect[1]), (rect[2], rect[3]), (255,255,255), 2)
+        cv2.putText(img,  r['labels'][0]['label'], (rect[0], rect[1]-5), cv2.FONT_HERSHEY_PLAIN, 2.0, (255,255,255), 2)
@@ -0,0 +1,40 @@
+class BaseComponent(object):
+    '''
+    Base class for all components. Host for common helpers instead
+    of repeating in each subclass.
+    '''
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.name = cfg['name']
+        
+        
+    def execute(self, input_data, input_directory, output_directory):
+        '''
+        Every component should perform its primary operation - detection, recognition or
+        file writing - in execute().
+        
+        Returns: a dict of the component's outputs, where keys are 'reports', 'annotatedimages', 
+            'annotatedframes', etc.
+            
+            Each report should be like this
+            [
+                {'labels':[{'label':'cat', 'confidence':0.8}, {'label':'lion', 'confidence':0.3}], 'rect':[x1,y1,x2,y2] },
+                {'labels':['dog','sheep'], 'rect':[x1,y1,x2,y2], 'confidence':0.8}
+            ]
+            confidence values are optional.
+            The coordinates should always be full image coordinates even if input to the component was ROI output of another 
+            component.
+        '''
+        
+        pass
+        
+        
+        
+        
+    def completed(self, input_data, input_directory, output_directory):
+        '''
+        Some components need to know when processing of input file
+        has completed.
+        '''
+        pass
+    
@@ -0,0 +1,119 @@
+from net.build import TFNet
+from basecomponent import BaseComponent
+from annotator import annotate
+
+class DeepDetector(BaseComponent):
+    '''
+    A DeepDetector uses a YOLOv2 convolutional neural network model for
+    object detection.
+    '''
+    
+    def __init__(self, cfg):
+        BaseComponent.__init__(self, cfg)
+        
+        params = self.cfg['params']
+        
+        tfnet_cfg = {
+            "model": params['model'],
+            "load": params['weights'], 
+            "config" : '/root/darkflow/cfg',
+            "verbalise" : True,
+            "threshold": 0.1
+        }
+        
+        self.nn = TFNet(tfnet_cfg)
+        
+        
+    def execute(self, input_data, input_directory, output_directory):
+        
+        # Check what configured inputs are - whether complete image or ROIs output by some
+        # other components.
+        all_detections = []
+        for source in self.cfg['inputs']:
+            if source == 'files':
+                detections = self.detect_in_image(input_data)
+                all_detections.extend(detections)
+                
+            else:
+                triggerlabels = self.cfg['params'].get('triggerlabels')
+                if not triggerlabels:
+                    print("Warning: pipeline file specifies {} in inputs but there are no triggerlabels in params".format(source))
+                    continue
+                    
+                comp_outputs = input_data.get(source)
+                if comp_outputs:
+                    comp_reports = comp_outputs['reports']
+                    detections = self.detect_in_rois(self, input_data, comp_reports)
+                    all_detections.extend(detections)
+        
+        # Each detection is of the form 
+        # {"label":"person", "confidence": 0.56, "topleft": {"x": 184, "y": 101}, "bottomright": {"x": 274, "y": 382}}
+        # These should be transformed to our preferred JSON output documented in basecomponent.py
+        
+        reports = []
+        for d in all_detections:
+            r = {
+                'labels' : [
+                    {
+                        'label' : d['label'],
+                        # The float() here is because that confidence value is actually a np.float32
+                        # and that creates serialization typeerror problems while writing report to
+                        # json.
+                        'confidence' : float(d['confidence'])
+                    }
+                ],
+                'rect' : [
+                    d['topleft']['x'],
+                    d['topleft']['y'],
+                    d['bottomright']['x'],
+                    d['bottomright']['y'],
+                ]
+            }
+            
+            reports.append(r)
+            
+        results = {
+            'reports' : reports
+        }
+        
+           
+        print(results)
+        return results
+        
+        
+
+
+    def detect_in_image(self, input_data):
+        detections = self.nn.return_predict(input_data['img'])
+        return detections
+
+
+
+
+    def detect_in_rois(self, input_data, comp_reports):
+        img = input_data['img']
+        roi_detections = []
+        
+        for r in comp_reports:
+            
+            if ('all' in self.cfg['params']['triggerlabels']) or \
+                any( [ l['label'] in self.cfg['params']['triggerlabels'] for l in r['labels'] ] ) :
+            
+                rect = r['rect']
+                x_offset = rect[0]
+                y_offset = rect[1]
+                roi = img[ rect[1]:rect[3], rect[0]:rect[2], :]
+                
+                detections = self.nn.return_predict(roi)
+                # These detections in ROI are relative to ROI. So we must add ROI origin to
+                # those coordinates to make them full image coordinates.
+                for d in detections:
+                    d['topleft']['x'] += x_offset
+                    d['bottomright']['x'] += x_offset
+                    
+                    d['topleft']['y'] += y_offset
+                    d['bottomright']['y'] += y_offset
+                
+                roi_detections.extend(detections)
+            
+        return roi_detections