facerec_train.py

from __future__ import print_function

import cv2
import cv2.face as face
import numpy as np

# skvideo doesn't seem to be able to handle some mp4 files
#from skvideo.io import vread, vreader
import imageio

import os
import os.path
import sys
import shutil

import csv
import json
from sklearn.model_selection import train_test_split

# Given a directory of original images with directory structure like this:
#  <top directory>
#     - label #1
#        - person1-image1
#        - person1-image2
#        - person1-image3
#        ...
#     - label #2
#        - person2-image1
#        - person2-image2
#        - person2-image3
#        ...
#     - label #3
#         ...
#
# this script can do the following tasks:
#
# - print statistics like mean and median dimensions of all images in entire dataset
#
# - scale all images to the same specified dimensions, either enlarge or shrink
#   and save them to a different location with the same directory structure
#
# - split an images directory into a pair of train and test directories
#
# - create a CSV file of image paths and labels from the directory structure
#
# - train a face recognizer using preferred algorithm and save the model for inference

def statistics(top_dir):
    widths = np.empty((0), dtype=np.uint16)
    heights = np.empty((0), dtype=np.uint16)
    
    for label in os.listdir(top_dir):
        label_dir = os.path.join(top_dir, label)
        for imgfilename in os.listdir(label_dir):
            imgfilepath = os.path.join(label_dir, imgfilename)
            print(imgfilepath)
            img = cv2.imread(imgfilepath)
            
            widths = np.append(widths, img.shape[0])
            heights = np.append(heights, img.shape[1])
            
            
    mean_width = np.mean(widths)
    median_width = np.median(widths)
    width_hist = np.histogram(widths)
    print('Mean width=', mean_width)
    print('Median width=', median_width)
    print('Width histogram: ', width_hist)
    
    
    mean_height = np.mean(heights)
    median_height = np.median(heights)
    height_hist = np.histogram(heights)
    print('Mean height=', mean_height)
    print('Median height=', median_height)
    print('Height histogram: ', height_hist)


def scale(orig_top_dir, scaled_dest_dir, width, height, make_grayscale = True, equalize_hist = False):
    
    if not os.path.exists(scaled_dest_dir):
        os.makedirs(scaled_dest_dir)
    
    for label in os.listdir(orig_top_dir):
        label_dir = os.path.join(orig_top_dir, label)
        dest_label_dir = os.path.join(scaled_dest_dir, label)
        
        if not os.path.exists(dest_label_dir):
            os.mkdir(dest_label_dir)
        
        for imgfilename in os.listdir(label_dir):
            orig_imgfilepath = os.path.join(label_dir, imgfilename)
            print(orig_imgfilepath)
            
            img = cv2.imread(orig_imgfilepath)
            
            if make_grayscale:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
                if equalize_hist:
                    print("Equalizing")
                    img = cv2.equalizeHist(img)
                    
            elif equalize_hist:
                print("Warning: Invalid arguments. Histogram equalization can be done only if grayscale is enabled. Ignoring")
            
            img = cv2.resize(img, (width, height))
            
            dest_imgfilepath = os.path.join(dest_label_dir, imgfilename)
            
            cv2.imwrite(dest_imgfilepath, img)
            
            print(orig_imgfilepath,' -> ', dest_imgfilepath)
            
 
def split_into_train_test_dirs(top_dir, train_dir, test_dir, train_percent):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
        
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
    
    for label in os.listdir(top_dir):
        label_dir = os.path.join(top_dir, label)
        
        images = os.listdir(label_dir)
        
        train_indexes = np.random.choice(np.arange(len(images)), int(train_percent * len(images) // 100), replace=False)
        
        for idx in xrange(len(images)):
            dest_dir = train_dir if idx in train_indexes else test_dir
            
            dest_label_dir = os.path.join(dest_dir, label)
            if not os.path.exists(dest_label_dir):
                os.mkdir(dest_label_dir)
            
            src_filename = os.path.join(label_dir, images[idx])
            
            shutil.copy(src_filename, dest_label_dir)
            
                
def export_csv(top_dir, dest_csv_file):
    
    with open(dest_csv_file, 'w', encoding='utf-8') as csvfile:
        labelwriter = csv.writer(csvfile, delimiter=',')
        
        for label_idx, label in enumerate(os.listdir(top_dir)):
            label_dir = os.path.join(top_dir, label)
            for imgfilename in os.listdir(label_dir):
                imgfilepath = os.path.abspath(os.path.join(label_dir, imgfilename))
                
                labelwriter.writerow([imgfilepath, label, label_idx])
    

def train(csv_file, train_percent, test_file_csv, models_dir, eigen=True, fischer=True, lbp=True):
    
    # OMG np.genfromtxt is horribly broken when moving from py2 to py3 because it returns byte arrays in py3
    # and nothing else can handle byte arrays properly without other conversion hacks. 
    # Whatever happened to the "pythonic" way?! Avoid!
    # data = np.genfromtxt(csv_file,  delimiter=',', dtype=None, names=['file','label','labelnum'])
    data = []
    all_labels = {}
    label_counts = {}
    labelnum_col = []
    with open(csv_file, 'r', encoding='utf-8', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            f, label, label_idx = row
            labelnum_col.append(label_idx)
            data.append(row)
            if all_labels.get(label_idx) is None:
                all_labels[label_idx] = label
                label_counts[label_idx] = 1
            else:
                label_counts[label_idx] += 1
            
    # Every label should have atleast 2 data points. Delete those rows which don't 
    # satisfy that condition.
    data = [ d for d in data if label_counts[ d[2] ] >= 2 ]
    labelnum_col = [ d[2] for d in data ]
    train_imagefiles, test_imagefiles = train_test_split(data, train_size=train_percent/100.0, stratify=labelnum_col)

    with open(test_file_csv, 'w', encoding='utf-8') as csvfile:
        testwriter = csv.writer(csvfile, delimiter=',')
        
        for test_imgfile in test_imagefiles:
            testwriter.writerow(list(test_imgfile))
    
    training_labels = np.array( [ d[2] for d in train_imagefiles ], dtype = np.int32 )
    
    train_images = []
    for train_imgfile in train_imagefiles:
        #f = train_imgfile[0].decode("utf-8")
        print(f)
        img = cv2.imread(train_imgfile[0], cv2.IMREAD_GRAYSCALE) 
        print(f, img.shape)
        train_images.append(img)

    if not os.path.exists(models_dir):
        os.makedirs(models_dir)
        
    print(train_images[0].shape, len(training_labels))
    
    if eigen:
        eigen_recog = face.createEigenFaceRecognizer()
        eigen_recog.train(train_images, training_labels)
        eigen_recog.save(os.path.join(models_dir, 'eigen.yml'))
        print('Eigen done')
    
    if fischer:
        fischer_recog = face.createFisherFaceRecognizer()
        fischer_recog.train(train_images, training_labels)
        fischer_recog.save(os.path.join(models_dir, 'fischer.yml'))
        print('Fischer done')
    
    if lbp:
        lbp_recog = face.createLBPHFaceRecognizer()
        lbp_recog.train(train_images, training_labels)
        lbp_recog.save(os.path.join(models_dir, 'lbp.yml'))
        print('LBP done')
    
    # Record the training image dimensions because at prediction time we need to resize images 
    # to those dimensions.
    model = {'width' : train_images[0].shape[1], 'height' : train_images[0].shape[0], 'labels' : all_labels}
    with open(os.path.join(models_dir, 'model.json'), 'w') as model_file:
        json.dump(model, model_file, indent=4, separators=(',', ': '))
    

def recognize(img_file, expected_label, models_dir, eigen=True, fischer=True, lbp=True, equalize_hist=False):

    eigen_label = fischer_label = lbp_label = -1

    with open(os.path.join(models_dir, 'model.json'), 'r') as model_file:
        model = json.load(model_file)
        train_img_size = (model['height'], model['width'])
       
    img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
    # If training images were equalized, better to perform the same 
    # operation during recognition too.
    if equalize_hist:
        img = cv2.equalizeHist(img)
        
    if img.shape != train_img_size:
        img = cv2.resize( img, train_img_size[::-1] )
    
    if eigen:
        eigen_recog = face.createEigenFaceRecognizer();
        eigen_recog.load(os.path.join(models_dir, 'eigen.yml'))
        eigen_label = eigen_recog.predict(img)
        print('Eigen done')
    
    if fischer:
        fischer_recog = face.createFisherFaceRecognizer();
        fischer_recog.load(os.path.join(models_dir, 'fischer.yml'))
        fischer_label = fischer_recog.predict(img)
        print('Fischer done')
    
    if lbp:
        lbp_recog = face.createLBPHFaceRecognizer();
        lbp_recog.load(os.path.join(models_dir, 'lbp.yml'))
        lbp_label = lbp_recog.predict(img)
        print('LBP done')
    
    
    print(eigen_label, fischer_label, lbp_label)
    return  eigen_label, fischer_label, lbp_label


def test(test_csv, models_dir, eigen=True, fischer=True, lbp=True):

    eigen_label = fischer_label = lbp_label = -1

    if eigen:
        eigen_recog = face.createEigenFaceRecognizer();
        eigen_recog.load(os.path.join(models_dir, 'eigen.yml'))

    if fischer:
        fischer_recog = face.createFisherFaceRecognizer();
        fischer_recog.load(os.path.join(models_dir, 'fischer.yml'))

    if lbp:
        lbp_recog = face.createLBPHFaceRecognizer();
        lbp_recog.load(os.path.join(models_dir, 'lbp.yml'))
    
    with open(os.path.join(models_dir, 'model.json'), 'r') as model_file:
        train_img_size = json.load(model_file)
        train_img_size = (train_img_size['height'], train_img_size['width'])
       
    #test_imgfiles = np.genfromtxt(test_csv,  delimiter=',', dtype=None, names=['file','label','labelnum'])
    
    test_imgfiles = []
    with open(test_csv, 'r', encoding='utf-8', newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            f, label, label_idx = row
            test_imgfiles.append(row)
    
    
    eigen_correct = fischer_correct = lbph_correct = 0
    
    for test_imgfile in test_imgfiles:
        
        img = cv2.imread(test_imgfile[0], cv2.IMREAD_GRAYSCALE)

        if img.shape != train_img_size:
            img = cv2.resize( img, train_img_size[::-1] )

        expected_label = int(test_imgfile[2])
        
        eigen_label, eigen_conf = eigen_recog.predict(img) if eigen else (-1,0)
        eigen_correct += 1 if eigen_label == expected_label else 0
        
        fischer_label, fischer_conf = fischer_recog.predict(img) if fischer else (-1,0)
        fischer_correct += 1 if fischer_label == expected_label else 0
  
        
        lbp_label, lbp_conf = lbp_recog.predict(img) if lbp  else (-1,0)
        lbph_correct += 1 if lbp_label == expected_label else 0

        print("%s: expected=%d | eigen=%d | fischer=%d | lbph=%d\n" % (
            test_imgfile[0], expected_label, eigen_label, fischer_label, lbp_label))
            
    if eigen:
        print("Eigenfaces accuracy: ", eigen_correct / len(test_imgfiles))

    if fischer:
        print("Fischerfaces accuracy: ", fischer_correct / len(test_imgfiles))

    if lbp:
        print("LBPH accuracy: ", lbph_correct / len(test_imgfiles))


def detect(img_file, detector_xml_path, dest_img_file):
    img = cv2.imread(img_file)
    
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    detector = cv2.CascadeClassifier(detector_xml_path)
    
    min_size = (min(50, gray_img.shape[0] // 10), min(50, gray_img.shape[1] // 10))
    hits = detector.detectMultiScale(gray_img, 1.1, 4, 0, min_size)
    #cv2.groupRectangles(hits, 2)
    print(hits)
    
    hits_img = np.copy(img)
    for (x,y,w,h) in hits:
        cv2.rectangle(hits_img, (x,y), (x+w, y+h), (0,0,255), 2)
    cv2.imwrite(dest_img_file, hits_img)


def detectvideo(vid_file, detector_xml_path, dest_img_dir):
    
    if not os.path.exists(dest_img_dir):
        os.makedirs(dest_img_dir)

    detector = cv2.CascadeClassifier(detector_xml_path)
    
    vid = imageio.get_reader(vid_file, 'ffmpeg')
    # If size and source_size are not equal, then device was probably
    # rotated (like a mobile) and we should compensate for the rotation.
    # Images will have 'source_size' dimensions but we need 'size'.
    metadata = vid.get_meta_data()
    rotate = False
    if metadata['source_size'] != metadata['size']:
        print('Rotating')
        rotate = True
    
    for i, img in enumerate(vid):
        if rotate:
            #img = np.transpose(img, axes=(1, 0, 2)).copy()
            img = np.rot90(img).copy()
            
        print('Frame ',i, img.shape)
        
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        min_size = (min(20, gray_img.shape[0] // 10), min(20, gray_img.shape[1] // 10))
        hits = detector.detectMultiScale(gray_img, 1.1, 3, 0, min_size)
        #cv2.groupRectangles(hits, 2)
        print(len(hits), ' hits')

        hits_img = np.copy(img)
        
        if len(hits) > 0:
            for (x,y,w,h) in hits:
                cv2.rectangle(hits_img, (x,y), (x+w, y+h), (0,0,255), 2)

        cv2.imwrite(os.path.join(dest_img_dir, 'frame-%d.png'%(i)), hits_img)


def recognizemany(img_file, detector_xml_path, models_dir, dest_img_file, eigen=True, fischer=True, lbp=True, equalize_hist=False):

    img = cv2.imread(img_file)
    
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #gray_img = cv2.resize(gray_img, (640, 480))
    
    detector = cv2.CascadeClassifier(detector_xml_path)
    
    min_size = (min(50, gray_img.shape[0] // 10), min(50, gray_img.shape[1] // 10))
    #min_size = (0,0)
    hits = detector.detectMultiScale(gray_img, 1.1, 3, 0, min_size)

    eigen_label = fischer_label = lbp_label = -1

    with open(os.path.join(models_dir, 'model.json'), 'r') as model_file:
        model = json.load(model_file)
        train_img_size = (model['height'], model['width'])
        labels = model['labels']
    
    print('# hits:', len(hits))
    
    hits_img = np.copy(img)

    # If training images were equalized, better to perform the same 
    # operation during recognition too.
    if equalize_hist:
        gray_img = cv2.equalizeHist(gray_img)
    
    
    i = 1
    for (x,y,w,h) in hits:
        print('ROI ', i)
        roi = gray_img[y:y+h, x:x+w]
        i += 1

        if roi.shape != train_img_size:
            roi = cv2.resize( roi, train_img_size[::-1] )
    
        if eigen:
            eigen_recog = face.createEigenFaceRecognizer();
            eigen_recog.load(os.path.join(models_dir, 'eigen.yml'))
            eigen_label = eigen_recog.predict(roi)
            print('Eigen done')
        
        if fischer:
            fischer_recog = face.createFisherFaceRecognizer();
            fischer_recog.load(os.path.join(models_dir, 'fischer.yml'))
            fischer_label = fischer_recog.predict(roi)
            print('Fischer done')
        
        if lbp:
            lbp_recog = face.createLBPHFaceRecognizer();
            lbp_recog.load(os.path.join(models_dir, 'lbp.yml'))
            lbp_label = lbp_recog.predict(roi)
            print('LBP done')

        cv2.rectangle(hits_img, (x,y), (x+w, y+h), (255,255,255), 2)
        cv2.putText(hits_img,  labels[str(fischer_label)], (x, y-5), cv2.FONT_HERSHEY_PLAIN, 2.0, (255,255,255), 2)
            
    
        print(labels[str(eigen_label)], labels[str(fischer_label)], labels[str(lbp_label)])
        #return  eigen_label, fischer_label, lbp_label

    
    cv2.imwrite(dest_img_file, hits_img)

    
#########################################3

if __name__ == '__main__':
    if sys.argv[1] == 'stats':
        statistics(sys.argv[2])
 
    elif sys.argv[1] == 'resize':
        scale( sys.argv[2], sys.argv[3], int(sys.argv[4]), int(sys.argv[5]), bool(sys.argv[6]), bool(sys.argv[7]) )

    elif sys.argv[1] == 'split':
        split_into_train_test_dirs( sys.argv[2], sys.argv[3], sys.argv[4], int(sys.argv[5]) )
        
    elif sys.argv[1] == 'csv':
        export_csv( sys.argv[2], sys.argv[3])
        
    elif sys.argv[1] == 'train':
        train( sys.argv[2], int(sys.argv[3]), sys.argv[4], sys.argv[5], bool(sys.argv[6]), bool(sys.argv[7]), bool(sys.argv[8]) )
        
    elif sys.argv[1] == 'test':
        test( sys.argv[2], sys.argv[3], bool(sys.argv[4]), bool(sys.argv[5]), bool(sys.argv[6])  )
        
    elif sys.argv[1] == 'recognize':
        recognize( sys.argv[2], int(sys.argv[3]), sys.argv[4], bool(sys.argv[5]), bool(sys.argv[6]), bool(sys.argv[7]) )
        
    elif sys.argv[1] == 'detect':
        detect( sys.argv[2], sys.argv[3], sys.argv[4])

    elif sys.argv[1] == 'recognizemany':
        recognizemany( sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], bool(sys.argv[6]), bool(sys.argv[7]), bool(sys.argv[8]),
            bool(sys.argv[9]) )
        
    elif sys.argv[1] == 'detectvideo':
        detectvideo( sys.argv[2], sys.argv[3], sys.argv[4])