-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f5cac30
Showing
17 changed files
with
3,354 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# EyeXception: Gaze Estimation with Deep Learning | ||
|
||
This project is collaborated with Peter Grönquist ([email protected]). Both authors contributed equally to this work. | ||
|
||
We explore different classification model adaptations | ||
stemming from deep learning, whilst combining it with methods | ||
from computer vision. We show that by using the combined methods and | ||
applying a statistical evaluation it is possible to obtain state | ||
of the art predictions in eye-gaze approximation. | ||
|
||
## Setup | ||
To obtain the results from our submission follow these steps: | ||
|
||
- Install the dependencies, we only use tqdm as an additional dependency to the ones provided in setup.py | ||
- Clone the repository | ||
- Run preprocessing.py | ||
- Adapt the paths in parameters.py to match the dataset path | ||
- Copy the repository at least 10 times | ||
- Run train.py in each | ||
- After training is finished, run generate.py | ||
- Gather all predictions from the respective files in ```./tf/``` | ||
- Take the mean of all predictions | ||
|
||
### Necessary datasets | ||
|
||
Either run preprocessing.py on the provided h5 datasets or ask for them, we will happily make them available on a harddisk or by other means on leonhard, as polybox is limited to 50GB. | ||
|
||
### Installing dependencies | ||
|
||
Run (with `sudo` appended if necessary), | ||
``` | ||
python3 setup.py install | ||
``` | ||
and additionally install tqdm | ||
|
||
Note that this can be done within a [virtual environment](https://docs.python.org/3/tutorial/venv.html). In this case, the sequence of commands would be similar to: | ||
``` | ||
mkvirtualenv -p $(which python3) myenv | ||
python3 setup.py install | ||
``` | ||
|
||
when using [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). | ||
|
||
## Structure | ||
|
||
* `src/` - all source code. | ||
* `Dated_Training/` - dated training files for old neural networks | ||
* `models/` - neural network definitions | ||
* `tb/` - Tensorboard logs | ||
* `tf/` - Tensorflow trained models and predictions | ||
* `util/` - utility methods | ||
* `generate.py` - generator for test dataset script | ||
* `Mean.py` - simple script to get the mean of 10 predictions | ||
* `parameters.py` - parameters file | ||
* `preprocessing.py` - preprocessing script | ||
* `train.py` - training script | ||
|
||
|
||
### Outputs | ||
Once training is complete for all models and the predictions have been generated, you will find each individual prediction in the corresponding ```./tf/``` folder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
"""Copyright (c) 2019 AIT Lab, ETH Zurich, Seonwook Park | ||
""""""Setup module for GazeML.""" | ||
|
||
from setuptools import setup, find_packages | ||
|
||
setup( | ||
name='gazeml', | ||
version='0.1', | ||
description='Data-driven gaze estimation using machine learning.', | ||
|
||
author='Seonwook Park', | ||
author_email='[email protected]', | ||
|
||
packages=find_packages(exclude=[]), | ||
python_requires='>=3.5', | ||
install_requires=[ | ||
'coloredlogs', | ||
'h5py', | ||
'numpy', | ||
'opencv-python', | ||
'pandas', | ||
'ujson', | ||
# Install the most appropriate version of Tensorflow | ||
# Ref. https://www.tensorflow.org/install/ | ||
# 'tensorflow', | ||
# tensorflow-gpu==1.15 | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import tensorflow as tf | ||
import parameters | ||
from models import Model1NetNV | ||
import numpy as np | ||
from tqdm import tqdm, trange #progressbar | ||
|
||
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #back to 0 afterwards | ||
tf.logging.set_verbosity(tf.logging.ERROR) | ||
|
||
def TFRparsert(example):#, Size=12500): | ||
features = { | ||
'eye-region': tf.FixedLenFeature([3, 60, 224], tf.float32), | ||
'face': tf.FixedLenFeature([3, 224, 224], tf.float32), | ||
'face-landmarks': tf.FixedLenFeature([33, 2], tf.float32), | ||
'head': tf.FixedLenFeature([2], tf.float32), | ||
'left-eye': tf.FixedLenFeature([3, 60, 90], tf.float32), | ||
'right-eye': tf.FixedLenFeature([3, 60, 90], tf.float32) | ||
} | ||
parsedf = tf.parse_single_example(example, features) | ||
return parsedf['eye-region'], parsedf['face'], parsedf['face-landmarks'], parsedf['head'], parsedf['left-eye'], parsedf['right-eye'] | ||
#val Size=4000): | ||
|
||
|
||
def TFRparser(example):#, Size=100000): | ||
features = { | ||
'eye-region': tf.FixedLenFeature([3, 60, 224], tf.float32), | ||
'face': tf.FixedLenFeature([3, 224, 224], tf.float32), | ||
'face-landmarks': tf.FixedLenFeature([33, 2], tf.float32), | ||
'head': tf.FixedLenFeature([2], tf.float32), | ||
'left-eye': tf.FixedLenFeature([3, 60, 90], tf.float32), | ||
'right-eye': tf.FixedLenFeature([3, 60, 90], tf.float32), | ||
'gaze': tf.FixedLenFeature([2], tf.float32) | ||
} | ||
parsedf = tf.parse_single_example(example, features) | ||
return parsedf['eye-region'], parsedf['face'], parsedf['face-landmarks'], parsedf['head'], parsedf['left-eye'], parsedf['right-eye'], parsedf['gaze'] | ||
|
||
def TFRecord2FLRD(filenames, buffersize=3000, batchsize=parameters.BATCH_SIZE):#TODO set shufflesize | ||
train_dataset = tf.data.TFRecordDataset(filenames=[filenames]) | ||
train_dataset = train_dataset.map(TFRparser) | ||
train_dataset = train_dataset.shuffle(buffersize) | ||
#train_dataset = train_dataset.prefetch(buffersize) #15 was too much#TODO: set prefetchsize | ||
train_dataset = train_dataset.batch(batchsize) | ||
return train_dataset.make_initializable_iterator() | ||
|
||
tf.reset_default_graph()#tf.compat.v1.reset_default_graph | ||
|
||
ditert = TFRecord2FLRD(filenames=parameters.PATHT) | ||
diterv = TFRecord2FLRD(filenames=parameters.PATHV) | ||
erdata, fdata, fldata, hdata, ledata, redata, gdata = ditert.get_next() # here xdata is the input, and ydata what it will be compared to | ||
erdatav, fdatav, fldatav, hdatav, ledatav, redatav, gdatav = diterv.get_next() | ||
|
||
|
||
|
||
LR = parameters.LEARNING_RATE | ||
nvnet = Model1NetNV() | ||
init = tf.global_variables_initializer() | ||
saver = tf.train.Saver(tf.global_variables()) | ||
config = tf.ConfigProto( | ||
device_count={'GPU': 1}) # XLA_GPU is experimental, might get errors, only ~10% better performance on ResNet50 | ||
with tf.Session(config=config) as sess: | ||
TBwriter = tf.summary.FileWriter(parameters.LOGS_PATH, sess.graph) #TODO: currently saving values at beginning of iterations | ||
bestvalloss = float('inf') | ||
trloss = tf.summary.scalar('mse_training_loss', nvnet.loss) | ||
ltrloss = tf.summary.scalar('angular_training_loss', nvnet.angular_loss) | ||
valoss = tf.summary.scalar('mse_validation_loss', nvnet.loss) | ||
lvaloss = tf.summary.scalar('angular_validation_loss', nvnet.angular_loss) | ||
if parameters.LOAD_MODEL: | ||
print('Trying to load saved model...') | ||
try: | ||
print('Loading from: ', parameters.SAVE_PATH + parameters.MODEL_NAME + '.meta') | ||
restorer = tf.train.import_meta_graph(parameters.SAVE_PATH + parameters.MODEL_NAME + '.meta') | ||
restorer.restore(sess, tf.train.latest_checkpoint(parameters.SAVE_PATH)) | ||
print("Model successfully restored") | ||
except IOError: | ||
sess.run(init) | ||
print("No previous model found, running default initialization") | ||
v_loss = np.empty(parameters.EPOCHS) | ||
patience_c = 0 | ||
for epoch_no in trange(parameters.EPOCHS, desc='Epochs', position=0): | ||
train_loss = 0 | ||
train_aloss = 0 | ||
val_loss = 0 | ||
val_aloss = 0 | ||
tr_loss = 0 | ||
va_loss = 0 | ||
sess.run(ditert.initializer) | ||
# x, y = sess.run([xdata, ydata])#xdata.eval(), ydata.eval()#sess.run([xdata, ydata]) | ||
# # Initialize iterator with training data | ||
# train_dict = { | ||
# unet.training: True, | ||
# unet.model_input: x,#xdata, | ||
# unet.model_cout: y#ydata | ||
# } | ||
itt = 0 | ||
if epoch_no > 0 and epoch_no%4==0: | ||
LR *= 0.1 | ||
try: | ||
with tqdm(total=int(100000/parameters.BATCH_SIZE)+1 ,desc='Batches', leave=False) as pbar: | ||
while True: | ||
er, f, fl, h, le, re, g = sess.run([erdata, fdata, fldata, hdata, ledata, redata, gdata]) # xdata.eval(), ydata.eval()#sess.run([xdata, ydata]) | ||
# Initialize iterator with training data | ||
#merge = tf.summary.merge_all() | ||
train_dict = { | ||
nvnet.training: True, | ||
nvnet.LR: LR, | ||
nvnet.er: er, | ||
nvnet.f: f, | ||
nvnet.fl: fl, | ||
nvnet.h: h, | ||
nvnet.le: le, | ||
nvnet.re: re, | ||
nvnet.g: g, | ||
} | ||
if itt == 0: | ||
_, loss, aloss, traloss, ltraloss = sess.run([nvnet.train_op, nvnet.loss, nvnet.angular_loss, trloss, ltrloss], feed_dict=train_dict) | ||
TBwriter.add_summary(traloss, (epoch_no+1)) | ||
TBwriter.add_summary(ltraloss, (epoch_no+1)) | ||
else: | ||
_, loss, aloss = sess.run([nvnet.train_op, nvnet.loss, nvnet.angular_loss], feed_dict=train_dict) | ||
train_loss += loss | ||
train_aloss += aloss | ||
itt += 1 | ||
|
||
pbar.set_postfix(MSE_Loss=loss, ANGULAR_Loss=aloss) | ||
pbar.update() | ||
except tf.errors.OutOfRangeError: | ||
pass | ||
tr_loss = loss | ||
sess.run(diterv.initializer) | ||
|
||
itc = 0 | ||
try: | ||
while True: | ||
er, f, fl, h, le, re, g = sess.run([erdatav, fdatav, fldatav, hdatav, ledatav, redatav, gdatav]) # xdatav.eval(), ydatav.eval()#sess.run([xdatav, ydatav]) | ||
# Initialize iterator with validation data | ||
#merge = tf.summary.merge_all() | ||
train_dict = { | ||
nvnet.training: False, | ||
nvnet.er: er, | ||
nvnet.f: f, | ||
nvnet.fl: fl, | ||
nvnet.h: h, | ||
nvnet.le: le, | ||
nvnet.re: re, | ||
nvnet.g: g, | ||
} | ||
if itc == 0: | ||
loss, aloss, vall, lvall = sess.run([nvnet.loss, nvnet.angular_loss, valoss, lvaloss], feed_dict=train_dict) | ||
TBwriter.add_summary(vall, (epoch_no+1)) | ||
TBwriter.add_summary(lvall, (epoch_no+1)) | ||
else: | ||
loss, aloss = sess.run([nvnet.loss, nvnet.angular_loss], feed_dict=train_dict) | ||
val_loss += loss | ||
val_aloss += aloss | ||
itc += 1 | ||
except tf.errors.OutOfRangeError: | ||
pass | ||
va_loss = loss | ||
tott_loss = train_loss / itt # average training loss in 1 epoch #TODO maybe not get average training loss but only last | ||
totv_loss = val_loss / itc # average validation loss in 1 epoch | ||
totat_loss = train_aloss / itt | ||
totav_loss = val_aloss / itc | ||
v_loss[epoch_no] = totv_loss # average val loss saved for early stopping | ||
print('\nEpoch No: {}'.format(epoch_no + 1)) | ||
print('MSE Train loss = {:.8f}'.format(tott_loss)) | ||
print('Angular Train loss = {:.8f}'.format(totat_loss)) | ||
print('MSE Val loss = {:.8f}'.format(totv_loss)) | ||
print('Angular Val loss = {:.8f}'.format(totav_loss)) | ||
|
||
if (bestvalloss - v_loss[epoch_no]) > 0.0000000001: | ||
print('Saving model at epoch: ', (epoch_no + 1)) # Save periodically | ||
saver.save(sess, parameters.SAVE_PATH + parameters.MODEL_NAME + 'best') | ||
bestvalloss = v_loss[epoch_no] | ||
patience_c = 0 | ||
else: | ||
# can save model here if patience is too big | ||
patience_c += 1 | ||
|
||
if patience_c > parameters.PATIENCE: | ||
print("early stopping...") | ||
break | ||
|
||
if (epoch_no+1) % 100 == 0 and epoch_no > 0: | ||
print('Saving model at epoch: ', (epoch_no + 1)) # Save periodically | ||
saver.save(sess, parameters.SAVE_PATH + parameters.MODEL_NAME, global_step=(epoch_no + 1)) | ||
|
||
saver.save(sess, parameters.SAVE_PATH + parameters.MODEL_NAME, global_step=( | ||
epoch_no + 1)) # Final save #saver.save(sess, 'my-test-model', nr of setps after when to save) | ||
|
||
# closing session not needed with 'with' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import numpy as np | ||
|
||
# Example of easy averaging script without looping | ||
Path = '/home/peter/Documents/MP/calculatemode/' | ||
|
||
# Calculates mode of distribution, based on the amount of members in the distribution | ||
# Not used in our submission | ||
n = 10 # Nr of members in distribution | ||
def modefm(mi, ma, ar): | ||
eps = 0.0000001 | ||
step = (ma-mi)/n | ||
ma = ma+eps | ||
mi = mi-eps | ||
ttot = 0 | ||
tcount = 0 | ||
begin = 2 | ||
while True: | ||
for i in range(n): | ||
count = 0 | ||
tot = 0 | ||
for v in ar: | ||
if v >= mi+i*step and v < mi+(i+1)*step: | ||
count += 1 | ||
tot += v | ||
if count >= begin: | ||
tcount += count | ||
ttot += tot | ||
|
||
if tcount != 0: | ||
return ttot/tcount | ||
else: | ||
begin -= 1 | ||
|
||
b1 = np.loadtxt(Path+'predictions1.txt.gz') | ||
# b1 = b1[:,np.newaxis,:] | ||
b2 = np.loadtxt(Path+'predictions2.txt.gz') | ||
# b2 = b2[:,np.newaxis,:] | ||
b3 = np.loadtxt(Path+'predictions3.txt.gz') | ||
# b3 = b3[:,np.newaxis,:] | ||
b4 = np.loadtxt(Path+'predictions4.txt.gz') | ||
# b4 = b4[:,np.newaxis,:] | ||
b5 = np.loadtxt(Path+'predictions5.txt.gz') | ||
# b5 = b5[:,np.newaxis,:] | ||
b6 = np.loadtxt(Path+'predictions6.txt.gz') | ||
# b6 = b6[:,np.newaxis,:] | ||
b7 = np.loadtxt(Path+'predictions7.txt.gz') | ||
# b7 = b7[:,np.newaxis,:] | ||
b8 = np.loadtxt(Path+'predictions8.txt.gz') | ||
# b8 = b8[:,np.newaxis,:] | ||
b9 = np.loadtxt(Path+'predictions9.txt.gz') | ||
# b9 = b9[:,np.newaxis,:] | ||
b10 = np.loadtxt(Path+'predictions10.txt.gz') | ||
# b10 = b10[:,np.newaxis,:] | ||
|
||
# fulll = np.concatenate([b1[:,:,0],b2[:,:,0],b3[:,:,0],b4[:,:,0],b5[:,:,0],b6[:,:,0],b7[:,:,0],b8[:,:,0],b9[:,:,0],b10[:,:,0]],1) | ||
# fullr = np.concatenate([b1[:,:,1],b2[:,:,1],b3[:,:,1],b4[:,:,1],b5[:,:,1],b6[:,:,1],b7[:,:,1],b8[:,:,1],b9[:,:,1],b10[:,:,1]],1) | ||
# print(fulll.shape) | ||
# smode = np.zeros([len(fulll),2]) | ||
# for i in range(len(fulll)): | ||
# mil = np.min(fulll[i]) | ||
# mal = np.max(fulll[i]) | ||
# mir = np.min(fullr[i]) | ||
# mar = np.max(fullr[i]) | ||
# smode[i,0]=modefm(mil,mal,fulll[i]) | ||
# smode[i,1]=modefm(mir,mar,fullr[i]) | ||
|
||
|
||
tot = (b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8 + b9 + b10) / 10 | ||
np.savetxt('/home/peter/Documents/MP/calculatemode/predictions.txt.gz',tot) |
Oops, something went wrong.