├── .flake8 ├── .gitignore ├── LICENSE ├── README.md ├── datasets └── UnityEyes │ └── README.md ├── get_mpiigaze_hdf.bash ├── get_trained_weights.bash ├── setup.py └── src ├── core ├── __init__.py ├── checkpoint_manager.py ├── data_source.py ├── live_tester.py ├── model.py ├── summary_manager.py └── time_manager.py ├── datasources ├── __init__.py ├── frames.py ├── hdf5.py ├── unityeyes.py ├── video.py └── webcam.py ├── dpg_train.py ├── elg_demo.py ├── elg_train.py ├── models ├── __init__.py ├── dpg.py └── elg.py └── util ├── gaze.py ├── gazemap.py └── heatmap.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | doctests = True 3 | enable-extensions = docstrings 4 | max-line-length = 100 5 | statistics = True 6 | show-source = True 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Datasets folder 2 | datasets/* 3 | 4 | # Outputs folder 5 | outputs/ 6 | 7 | # 3rd party files 8 | src/3rdparty 9 | 10 | # Temporary files 11 | *.pyc 12 | __pycache__/ 13 | 14 | # Python package caches 15 | dist/ 16 | *.egg-info/ 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 ETH Zurich, Seonwook Park 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GazeML 2 | A deep learning framework based on Tensorflow for the training of high performance gaze estimation. 3 | 4 | *Please note that though this framework may work on various platforms, it has only been tested on an Ubuntu 16.04 system.* 5 | 6 | *All implementations are re-implementations of published algorithms and thus provided models should not be considered as reference.* 7 | 8 | This framework currently integrates the following models: 9 | 10 | ## ELG 11 | 12 | Eye region Landmarks based Gaze Estimation. 13 | 14 | > Seonwook Park, Xucong Zhang, Andreas Bulling, and Otmar Hilliges. "Learning to find eye region landmarks for remote gaze estimation in unconstrained settings." In Proceedings of the 2018 ACM Symposium on Eye Tracking Research & Applications, p. 21. ACM, 2018. 15 | 16 | - Project page: https://ait.ethz.ch/landmarks-gaze 17 | - Video: https://youtu.be/cLUHKYfZN5s 18 | 19 | ## DPG 20 | 21 | Deep Pictorial Gaze Estimation 22 | 23 | > Seonwook Park, Adrian Spurr, and Otmar Hilliges. "Deep Pictorial Gaze Estimation". In European Conference on Computer Vision. 2018 24 | 25 | - Project page: https://ait.ethz.ch/pictorial-gaze 26 | 27 | *To download the MPIIGaze training data, please run `bash get_mpiigaze_hdf.bash`* 28 | 29 | *Note: This reimplementation differs from the original proposed implementation and reaches 4.63 degrees in the within-MPIIGaze setting. The changes were made to attain comparable performance and results in a leaner model.* 30 | 31 | ## Installing dependencies 32 | 33 | Run (with `sudo` appended if necessary), 34 | ``` 35 | python3 setup.py install 36 | ``` 37 | 38 | Note that this can be done within a [virtual environment](https://docs.python.org/3/tutorial/venv.html). In this case, the sequence of commands would be similar to: 39 | ``` 40 | mkvirtualenv -p $(which python3) myenv 41 | python3 setup.py install 42 | ``` 43 | 44 | when using [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). 45 | 46 | ### Tensorflow 47 | Tensorflow is assumed to be installed separately, to allow for usage of [custom wheel files](https://github.com/mind/wheels) if necessary. 48 | 49 | Please follow the official installation guide for Tensorflow [here](https://www.tensorflow.org/install/). 50 | 51 | ## Getting pre-trained weights 52 | To acquire the pre-trained weights provided with this repository, please run: 53 | ``` 54 | bash get_trained_weights.bash 55 | ``` 56 | 57 | ## Running the demo 58 | To run the webcam demo, perform the following: 59 | ``` 60 | cd src 61 | python3 elg_demo.py 62 | ``` 63 | 64 | To see available options, please run `python3 elg_demo.py --help` instead. 65 | 66 | ## Structure 67 | 68 | * `datasets/` - all data sources required for training/validation/testing. 69 | * `outputs/` - any output for a model will be placed here, including logs, summaries, and checkpoints. 70 | * `src/` - all source code. 71 | * `core/` - base classes 72 | * `datasources/` - routines for reading and preprocessing entries for training and testing 73 | * `models/` - neural network definitions 74 | * `util/` - utility methods 75 | -------------------------------------------------------------------------------- /datasets/UnityEyes/README.md: -------------------------------------------------------------------------------- 1 | # UnityEyes 2 | 3 | UnityEyes is a synthetic dataset used to train the ELG CNN for eye region landmarks localization. 4 | 5 | The published code and software can be found at https://www.cl.cam.ac.uk/research/rainbow/projects/unityeyes/ 6 | 7 | Please download the software and generate images such that there exists the folder `imgs/` when you are done. 8 | 9 | The more images you generate, the more robust the trained neural network would be. 10 | -------------------------------------------------------------------------------- /get_mpiigaze_hdf.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd datasets 4 | 5 | # MPIIGaze dataset with 60x36 greyscale eye images 6 | wget -Nnv https://files.ait.ethz.ch/projects/pictorial-gaze/MPIIGaze.h5 7 | -------------------------------------------------------------------------------- /get_trained_weights.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -d outputs ]; then 4 | mkdir outputs 5 | fi 6 | cd outputs 7 | 8 | ## ELG model 9 | # eye_image_shape = (108, 180) 10 | # first_layer_stride = 3 11 | # num_modules = 3 12 | # num_feature_maps = 64 13 | wget -Nnv https://files.ait.ethz.ch/projects/landmarks-gaze/ELG_i180x108_f60x36_n64_m3.zip 14 | unzip -oq ELG_i180x108_f60x36_n64_m3.zip 15 | 16 | ## ELG model 17 | # eye_image_shape = (36, 60) 18 | # first_layer_stride = 1 19 | # num_modules = 2 20 | # num_feature_maps = 32 21 | wget -Nnv https://files.ait.ethz.ch/projects/landmarks-gaze/ELG_i60x36_f60x36_n32_m2.zip 22 | unzip -oq ELG_i60x36_f60x36_n32_m2.zip 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup module for GazeML.""" 2 | 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | name='gazeml', 7 | version='0.1', 8 | description='Data-driven gaze estimation using machine learning.', 9 | 10 | author='Seonwook Park', 11 | author_email='spark@inf.ethz.ch', 12 | 13 | packages=find_packages(exclude=[]), 14 | python_requires='>=3.5', 15 | install_requires=[ 16 | 'coloredlogs', 17 | 'h5py', 18 | 'numpy', 19 | 'opencv-python', 20 | 'pandas', 21 | 'ujson', 22 | 'dlib', 23 | 24 | # Install the most appropriate version of Tensorflow 25 | # Ref. https://www.tensorflow.org/install/ 26 | # 'tensorflow', 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Exported classes and methods for core package.""" 2 | from .data_source import BaseDataSource 3 | from .model import BaseModel 4 | from .live_tester import LiveTester 5 | from .time_manager import TimeManager 6 | from .summary_manager import SummaryManager 7 | 8 | __all__ = ( 9 | 'BaseDataSource', 10 | 'BaseModel', 11 | 'LiveTester', 12 | 'SummaryManager', 13 | 'TimeManager', 14 | ) 15 | -------------------------------------------------------------------------------- /src/core/checkpoint_manager.py: -------------------------------------------------------------------------------- 1 | """Manage saving and loading of model checkpoints.""" 2 | import os 3 | import re 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | import logging 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class CheckpointManager(object): 13 | """Manager to coordinate saving and loading of trainable parameters.""" 14 | 15 | def __init__(self, model): 16 | """Initialize manager based on given model instance.""" 17 | self._tensorflow_session = model._tensorflow_session 18 | self._model = model 19 | 20 | def build_savers(self): 21 | """Create tf.train.Saver instances.""" 22 | all_saveable_vars = sorted(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + 23 | tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS) + 24 | tf.get_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES) + 25 | tf.get_collection_ref('batch_norm_non_trainable'), 26 | key=lambda v: v.name) 27 | 28 | # Grab all available prefixes 29 | all_prefixes = [] 30 | for v in all_saveable_vars: 31 | name = v.name 32 | if '/' not in name: 33 | continue 34 | prefix = name.split('/')[0] 35 | if prefix == 'test' or prefix == 'learning_params': 36 | continue 37 | if prefix not in all_prefixes: 38 | all_prefixes.append(prefix) 39 | 40 | # For each prefix, create saver 41 | self._savers = {} 42 | for prefix in all_prefixes: 43 | vars_to_save = [v for v in all_saveable_vars if v.name.startswith(prefix + '/')] 44 | if len(vars_to_save): 45 | self._savers[prefix] = tf.train.Saver(vars_to_save, max_to_keep=2) 46 | 47 | def load_all(self): 48 | """Load all available weights for each known prefix.""" 49 | iteration_number = 0 50 | iteration_numbers = [] 51 | for prefix, saver in self._savers.items(): 52 | output_path = '%s/checkpoints/%s' % (self._model.output_path, prefix) 53 | checkpoint = tf.train.get_checkpoint_state(output_path) 54 | if checkpoint and checkpoint.model_checkpoint_path: 55 | checkpoint_name = os.path.basename(checkpoint.model_checkpoint_path) 56 | try: # Attempt to restore saveable variables 57 | self._savers[prefix].restore(self._tensorflow_session, 58 | '%s/%s' % (output_path, checkpoint_name)) 59 | iteration_numbers.append( 60 | int(next(re.finditer("(\d+)(?!.*\d)", checkpoint_name)).group(0)) 61 | ) 62 | except Exception as e: 63 | import traceback 64 | traceback.print_exc() 65 | if len(iteration_numbers) > 0: 66 | iteration_number = np.amax(iteration_numbers) 67 | return iteration_number 68 | 69 | def save_all(self, iteration_number): 70 | """Save all prefixes.""" 71 | prefixes_to_use = [] 72 | for schedule in self._model._learning_schedule: 73 | for prefixes in schedule['loss_terms_to_optimize'].values(): 74 | prefixes_to_use += prefixes 75 | prefixes_to_use = list(set(prefixes_to_use)) 76 | 77 | for prefix, saver in self._savers.items(): 78 | if prefix not in prefixes_to_use: 79 | continue 80 | output_path = '%s/checkpoints/%s' % (self._model.output_path, prefix) 81 | if not os.path.isdir(output_path): 82 | os.makedirs(output_path) 83 | saver.save(self._tensorflow_session, output_path + '/model', 84 | global_step=iteration_number) 85 | logger.debug('Saved %s' % output_path) 86 | logger.info('CheckpointManager::save_all call done') 87 | -------------------------------------------------------------------------------- /src/core/data_source.py: -------------------------------------------------------------------------------- 1 | """Default specification of a data source.""" 2 | from collections import OrderedDict 3 | import multiprocessing 4 | import queue 5 | import threading 6 | import time 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | import logging 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class BaseDataSource(object): 16 | """Base DataSource class.""" 17 | 18 | def __init__(self, 19 | tensorflow_session: tf.Session, 20 | data_format: str = 'NHWC', 21 | batch_size: int = 32, 22 | num_threads: int = max(4, multiprocessing.cpu_count()), 23 | min_after_dequeue: int = 1000, 24 | fread_queue_capacity: int = 0, 25 | preprocess_queue_capacity: int = 0, 26 | staging=False, 27 | shuffle=None, 28 | testing=False, 29 | ): 30 | """Initialize a data source instance.""" 31 | assert tensorflow_session is not None and isinstance(tensorflow_session, tf.Session) 32 | assert isinstance(batch_size, int) and batch_size > 0 33 | if shuffle is None: 34 | shuffle = staging 35 | self.testing = testing 36 | if testing: 37 | assert not shuffle and not staging 38 | # if num_threads != 1: 39 | # logger.info('Forcing use of single thread for live testing.') 40 | # num_threads = 1 41 | self.staging = staging 42 | self.shuffle = shuffle 43 | self.data_format = data_format.upper() 44 | assert self.data_format == 'NHWC' or self.data_format == 'NCHW' 45 | self.batch_size = batch_size 46 | self.num_threads = num_threads 47 | self._tensorflow_session = tensorflow_session 48 | self._coordinator = tf.train.Coordinator() 49 | self.all_threads = [] 50 | 51 | # Setup file read queue 52 | self._fread_queue_capacity = fread_queue_capacity 53 | if self._fread_queue_capacity == 0: 54 | self._fread_queue_capacity = (num_threads + 1) * batch_size 55 | self._fread_queue = queue.Queue(maxsize=self._fread_queue_capacity) 56 | 57 | with tf.variable_scope(''.join(c for c in self.short_name if c.isalnum())): 58 | # Setup preprocess queue 59 | labels, dtypes, shapes = self._determine_dtypes_and_shapes() 60 | self._preprocess_queue_capacity = (min_after_dequeue + (num_threads + 1) * batch_size 61 | if preprocess_queue_capacity == 0 62 | else preprocess_queue_capacity) 63 | if shuffle: 64 | self._preprocess_queue = tf.RandomShuffleQueue( 65 | capacity=self._preprocess_queue_capacity, 66 | min_after_dequeue=min_after_dequeue, 67 | dtypes=dtypes, shapes=shapes, 68 | ) 69 | else: 70 | self._preprocess_queue = tf.FIFOQueue( 71 | capacity=self._preprocess_queue_capacity, 72 | dtypes=dtypes, shapes=shapes, 73 | ) 74 | self._tensors_to_enqueue = OrderedDict([ 75 | (label, tf.placeholder(dtype, shape=shape, name=label)) 76 | for label, dtype, shape in zip(labels, dtypes, shapes) 77 | ]) 78 | 79 | self._enqueue_op = \ 80 | self._preprocess_queue.enqueue(tuple(self._tensors_to_enqueue.values())) 81 | self._preprocess_queue_close_op = \ 82 | self._preprocess_queue.close(cancel_pending_enqueues=True) 83 | self._preprocess_queue_size_op = self._preprocess_queue.size() 84 | self._preprocess_queue_clear_op = \ 85 | self._preprocess_queue.dequeue_up_to(self._preprocess_queue.size()) 86 | if not staging: 87 | output_tensors = self._preprocess_queue.dequeue_many(self.batch_size) 88 | if not isinstance(output_tensors, list): 89 | output_tensors = [output_tensors] 90 | self._output_tensors = dict([ 91 | (label, tensor) for label, tensor in zip(labels, output_tensors) 92 | ]) 93 | else: 94 | # Setup on-GPU staging area 95 | self._staging_area = tf.contrib.staging.StagingArea( 96 | dtypes=dtypes, 97 | shapes=[tuple([batch_size] + list(shape)) for shape in shapes], 98 | capacity=1, # This does not have to be high 99 | ) 100 | self._staging_area_put_op = \ 101 | self._staging_area.put(self._preprocess_queue.dequeue_many(batch_size)) 102 | self._staging_area_clear_op = self._staging_area.clear() 103 | 104 | self._output_tensors = dict([ 105 | (label, tensor) for label, tensor in zip(labels, self._staging_area.get()) 106 | ]) 107 | 108 | logger.info('Initialized data source: "%s"' % self.short_name) 109 | 110 | def __del__(self): 111 | """Destruct and clean up instance.""" 112 | self.cleanup() 113 | 114 | @property 115 | def num_entries(self): 116 | """Number of entries in this data source. 117 | 118 | Used to calculate number of steps to train when asked to be trained for # epochs. 119 | """ 120 | raise NotImplementedError('BaseDataSource::num_entries not specified.') 121 | 122 | @property 123 | def short_name(self): 124 | """Short identifier for data source. 125 | 126 | Overload this magic method if the class is generic, eg. supporting h5py/numpy arrays as 127 | input with specific data sources. 128 | """ 129 | raise NotImplementedError('BaseDataSource::short_name not specified.') 130 | 131 | __cleaned_up = False 132 | 133 | def cleanup(self): 134 | """Force-close all threads.""" 135 | if self.__cleaned_up: 136 | return 137 | 138 | # Clear queues 139 | fread_threads = [t for t in self.all_threads if t.name.startswith('fread_')] 140 | preprocess_threads = [t for t in self.all_threads if t.name.startswith('preprocess_')] 141 | transfer_threads = [t for t in self.all_threads if t.name.startswith('transfer_')] 142 | 143 | self._coordinator.request_stop() 144 | 145 | # Unblock any self._fread_queue.put calls 146 | while True: 147 | try: 148 | self._fread_queue.get_nowait() 149 | except queue.Empty: 150 | break 151 | time.sleep(0.1) 152 | 153 | # Push data through to trigger exits in preprocess/transfer threads 154 | for _ in range(self.batch_size * self.num_threads): 155 | self._fread_queue.put(None) 156 | self._tensorflow_session.run(self._preprocess_queue_close_op) 157 | if self.staging: 158 | self._tensorflow_session.run(self._staging_area_clear_op) 159 | 160 | self._coordinator.join(self.all_threads, stop_grace_period_secs=5) 161 | self.__cleaned_up = True 162 | 163 | def reset(self): 164 | """Reset threads and empty queues (where possible).""" 165 | assert self.testing is True 166 | 167 | # Clear queues 168 | self._coordinator.request_stop() 169 | with self._fread_queue.mutex: # Unblock any self._fread_queue.get calls 170 | self._fread_queue.queue.clear() 171 | for _ in range(2*self.num_threads): 172 | self._fread_queue.put(None) 173 | while True: # Unblock any enqueue requests 174 | preprocess_queue_size = self._tensorflow_session.run(self._preprocess_queue_size_op) 175 | if preprocess_queue_size == 0: 176 | break 177 | self._tensorflow_session.run(self._preprocess_queue_clear_op) 178 | time.sleep(0.1) 179 | while True: # Unblock any self._fread_queue.put calls 180 | try: 181 | self._fread_queue.get_nowait() 182 | except queue.Empty: 183 | break 184 | time.sleep(0.1) 185 | self._coordinator.join(self.all_threads, stop_grace_period_secs=5) 186 | 187 | # Restart threads 188 | self._coordinator.clear_stop() 189 | self.create_and_start_threads() 190 | 191 | def _determine_dtypes_and_shapes(self): 192 | """Determine the dtypes and shapes of Tensorflow queue and staging area entries.""" 193 | while True: 194 | raw_entry = next(self.entry_generator(yield_just_one=True)) 195 | if raw_entry is None: 196 | continue 197 | preprocessed_entry_dict = self.preprocess_entry(raw_entry) 198 | if preprocessed_entry_dict is not None: 199 | break 200 | labels, values = zip(*list(preprocessed_entry_dict.items())) 201 | dtypes = [value.dtype for value in values] 202 | shapes = [value.shape for value in values] 203 | return labels, dtypes, shapes 204 | 205 | def entry_generator(self, yield_just_one=False): 206 | """Return a generator which reads an entry from disk or memory. 207 | 208 | This method should be thread-safe so make sure to use threading.Lock where necessary. 209 | The implemented method should explicitly handle the `yield_just_one=True` case to only 210 | yield one entry without hanging in the middle of an infinite loop. 211 | """ 212 | raise NotImplementedError('BaseDataSource::entry_generator not implemented.') 213 | 214 | def preprocess_entry(self, entry): 215 | """Preprocess a "raw" data entry and yield a dict. 216 | 217 | Each element of an entry is provided to this method as separate arguments. 218 | This method should be thread-safe so make sure to use threading.Lock where necessary. 219 | """ 220 | raise NotImplementedError('BaseDataSource::preprocess_entry not implemented.') 221 | 222 | def read_entry_job(self): 223 | """Job to read an entry and enqueue to _fread_queue.""" 224 | read_entry = self.entry_generator() 225 | while not self._coordinator.should_stop(): 226 | try: 227 | entry = next(read_entry) 228 | except StopIteration: 229 | if not self.testing: 230 | continue 231 | else: 232 | logger.debug('Reached EOF in %s' % threading.current_thread().name) 233 | break 234 | if entry is not None: 235 | self._fread_queue.put(entry) 236 | read_entry.close() 237 | logger.debug('Exiting thread %s' % threading.current_thread().name) 238 | 239 | def preprocess_job(self): 240 | """Job to fetch and preprocess an entry.""" 241 | while not self._coordinator.should_stop(): 242 | raw_entry = self._fread_queue.get() 243 | if raw_entry is None: 244 | return 245 | preprocessed_entry_dict = self.preprocess_entry(raw_entry) 246 | if preprocessed_entry_dict is not None: 247 | feed_dict = dict([(self._tensors_to_enqueue[label], value) 248 | for label, value in preprocessed_entry_dict.items()]) 249 | try: 250 | self._tensorflow_session.run(self._enqueue_op, feed_dict=feed_dict) 251 | except (tf.errors.CancelledError, RuntimeError): 252 | break 253 | logger.debug('Exiting thread %s' % threading.current_thread().name) 254 | 255 | def transfer_to_gpu_job(self): 256 | """Transfer a data entry from CPU memory to GPU memory.""" 257 | while not self._coordinator.should_stop(): 258 | try: 259 | self._tensorflow_session.run(self._staging_area_put_op) 260 | except tf.errors.CancelledError or tf.errors.OutOfRangeError: 261 | break 262 | logger.debug('Exiting thread %s' % threading.current_thread().name) 263 | 264 | def create_threads(self): 265 | """Create Python threads for multi-threaded read and preprocess jobs.""" 266 | name = self.short_name 267 | self.all_threads = [] 268 | 269 | def _create_and_register_thread(*args, **kwargs): 270 | thread = threading.Thread(*args, **kwargs) 271 | thread.daemon = True 272 | self.all_threads.append(thread) 273 | 274 | for i in range(self.num_threads): 275 | # File read thread 276 | _create_and_register_thread(target=self.read_entry_job, name='fread_%s_%d' % (name, i)) 277 | 278 | # Preprocess thread 279 | _create_and_register_thread(target=self.preprocess_job, 280 | name='preprocess_%s_%d' % (name, i)) 281 | 282 | if self.staging: 283 | # Send-to-GPU thread 284 | _create_and_register_thread(target=self.transfer_to_gpu_job, 285 | name='transfer_%s_%d' % (name, i)) 286 | 287 | def start_threads(self): 288 | """Begin executing all created threads.""" 289 | assert len(self.all_threads) > 0 290 | for thread in self.all_threads: 291 | thread.start() 292 | 293 | def create_and_start_threads(self): 294 | """Create and begin threads for preprocessing.""" 295 | self.create_threads() 296 | self.start_threads() 297 | 298 | @property 299 | def output_tensors(self): 300 | """Return tensors holding a preprocessed batch.""" 301 | return self._output_tensors 302 | -------------------------------------------------------------------------------- /src/core/live_tester.py: -------------------------------------------------------------------------------- 1 | """Concurrent testing during training.""" 2 | import collections 3 | import platform 4 | import threading 5 | import time 6 | import traceback 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | import logging 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class LiveTester(object): 16 | """Manage concurrent testing on test data source.""" 17 | 18 | def __init__(self, model, data_source, use_batch_statistics=True): 19 | """Initialize tester with reference to model and data sources.""" 20 | self.model = model 21 | self.data = data_source 22 | self.time = self.model.time 23 | self.summary = self.model.summary 24 | self._tensorflow_session = model._tensorflow_session 25 | 26 | self._is_testing = False 27 | self._condition = threading.Condition() 28 | 29 | self._use_batch_statistics = use_batch_statistics 30 | 31 | def stop(self): 32 | logger.info('LiveTester::stop is being called.') 33 | self._is_testing = False 34 | 35 | def __del__(self): 36 | """Handle deletion of instance by closing thread.""" 37 | if not hasattr(self, '_coordinator'): 38 | return 39 | self._coordinator.request_stop() 40 | with self._condition: 41 | self._is_testing = True # Break wait if waiting 42 | self._condition.notify_all() 43 | self._coordinator.join([self._thread], stop_grace_period_secs=1) 44 | 45 | def _true_if_testing(self): 46 | return self._is_testing 47 | 48 | def trigger_test_if_not_testing(self, current_step): 49 | """If not currently testing, run test.""" 50 | if not self._is_testing: 51 | with self._condition: 52 | self._is_testing = True 53 | self._testing_at_step = current_step 54 | self._condition.notify_all() 55 | 56 | def test_job(self): 57 | """Evaluate requested metric over entire test set.""" 58 | while not self._coordinator.should_stop(): 59 | with self._condition: 60 | self._condition.wait_for(self._true_if_testing) 61 | if self._coordinator.should_stop(): 62 | break 63 | should_stop = False 64 | try: 65 | should_stop = self.do_full_test() 66 | except: 67 | traceback.print_exc() 68 | self._is_testing = False 69 | if should_stop is True: 70 | break 71 | logger.debug('Exiting thread %s' % threading.current_thread().name) 72 | 73 | def do_full_test(self, sleep_between_batches=0.2): 74 | # Copy current weights over 75 | self.copy_model_weights() 76 | 77 | # Reset data sources 78 | for data_source_name, data_source in self.data.items(): 79 | data_source.reset() 80 | num_batches = int(data_source.num_entries / data_source.batch_size) 81 | 82 | # Decide what to evaluate 83 | fetches = self._tensors_to_evaluate 84 | outputs = dict([(name, list()) for name in fetches.keys()]) 85 | 86 | # Select random index to produce (image) summaries at 87 | summary_index = np.random.randint(num_batches) 88 | 89 | self.time.start('full test') 90 | for i in range(num_batches): 91 | if self._is_testing is not True: 92 | logger.debug('Testing flag found to be `False` at iter. %d' % i) 93 | break 94 | logger.debug('Testing on %03d/%03d batches.' % (i + 1, num_batches)) 95 | if i == summary_index: 96 | fetches['summaries'] = self.summary.get_ops(mode='test') 97 | try: 98 | output = self._tensorflow_session.run( 99 | fetches=fetches, 100 | feed_dict={ 101 | self.model.is_training: False, 102 | self.model.use_batch_statistics: self._use_batch_statistics, 103 | }, 104 | ) 105 | except (tf.errors.CancelledError, RuntimeError): 106 | return True 107 | time.sleep(sleep_between_batches) # Brief pause to prioritise training 108 | if 'summaries' in output: # Write summaries on first batch 109 | self.summary.write_summaries(output['summaries'], self._testing_at_step) 110 | del fetches['summaries'] 111 | del output['summaries'] 112 | for name, value in output.items(): # Gather results from this batch 113 | outputs[name].append(output[name]) 114 | self.time.end('full test') 115 | 116 | # If incomplete, skip this round of tests (most likely shutting down) 117 | if len(list(outputs.values())[0]) != num_batches: 118 | return True 119 | 120 | # Calculate mean values 121 | for name, values in outputs.items(): 122 | outputs[name] = np.mean(values) 123 | 124 | # TODO: Log metric as summary 125 | to_print = '[Test at step %06d] ' % self._testing_at_step 126 | to_print += ', '.join([ 127 | '%s = %f' % (name, value) for name, value in outputs.items() 128 | ]) 129 | logger.info(to_print) 130 | 131 | # Store mean metrics/losses (and other summaries) 132 | feed_dict = dict([(self._placeholders[name], value) 133 | for name, value in outputs.items()]) 134 | feed_dict[self.model.is_training] = False 135 | feed_dict[self.model.use_batch_statistics] = True 136 | try: 137 | summaries = self._tensorflow_session.run( 138 | fetches=self.summary.get_ops(mode='full_test'), 139 | feed_dict=feed_dict, 140 | ) 141 | except (tf.errors.CancelledError, RuntimeError): 142 | return True 143 | self.summary.write_summaries(summaries, self._testing_at_step) 144 | 145 | return False 146 | 147 | def do_final_full_test(self, current_step): 148 | logger.info('Stopping the live testing threads.') 149 | 150 | # Stop thread(s) 151 | self._is_testing = False 152 | self._coordinator.request_stop() 153 | with self._condition: 154 | self._is_testing = True # Break wait if waiting 155 | self._condition.notify_all() 156 | self._coordinator.join([self._thread], stop_grace_period_secs=1) 157 | 158 | # Start final full test 159 | logger.info('Running final full test') 160 | self.copy_model_weights() 161 | self._is_testing = True 162 | self._testing_at_step = current_step 163 | self.do_full_test(sleep_between_batches=0) 164 | 165 | def _post_model_build(self): 166 | """Prepare combined operation to copy model parameters over from CPU/GPU to CPU.""" 167 | with tf.variable_scope('copy2test'): 168 | all_variables = tf.global_variables() 169 | train_vars = dict([(v.name, v) for v in all_variables 170 | if not v.name.startswith('test/')]) 171 | test_vars = dict([(v.name, v) for v in all_variables 172 | if v.name.startswith('test/')]) 173 | self._copy_variables_to_test_model_op = tf.tuple([ 174 | test_vars['test/' + k].assign(train_vars[k]) for k in train_vars.keys() 175 | if 'test/' + k in test_vars 176 | ]) 177 | 178 | # Begin testing thread 179 | self._coordinator = tf.train.Coordinator() 180 | self._thread = threading.Thread(target=self.test_job, 181 | name='%s_tester' % self.model.identifier) 182 | self._thread.daemon = True 183 | self._thread.start() 184 | 185 | # Pick tensors we need to evaluate 186 | all_tensors = dict(self.model.loss_terms['test'], **self.model.metrics['test']) 187 | self._tensors_to_evaluate = dict([(n, t) for n, t in all_tensors.items()]) 188 | loss_terms_to_evaluate = dict([(n, t) for n, t in self.model.loss_terms['test'].items() 189 | if t in self._tensors_to_evaluate.values()]) 190 | metrics_to_evaluate = dict([(n, t) for n, t in self.model.metrics['test'].items() 191 | if t in self._tensors_to_evaluate.values()]) 192 | 193 | # Placeholders for writing summaries at end of test run 194 | self._placeholders = {} 195 | for type_, tensors in (('loss', loss_terms_to_evaluate), 196 | ('metric', metrics_to_evaluate)): 197 | for name in tensors.keys(): 198 | name = '%s/test/%s' % (type_, name) 199 | placeholder = tf.placeholder(dtype=np.float32, name=name + '_placeholder') 200 | self.summary.scalar(name, placeholder) 201 | self._placeholders[name.split('/')[-1]] = placeholder 202 | 203 | def copy_model_weights(self): 204 | """Copy weights from main model used for training. 205 | 206 | This operation should stop-the-world, that is, training should not occur. 207 | """ 208 | assert self._copy_variables_to_test_model_op is not None 209 | self._tensorflow_session.run(self._copy_variables_to_test_model_op) 210 | logger.debug('Copied over trainable model parameters for testing.') 211 | -------------------------------------------------------------------------------- /src/core/model.py: -------------------------------------------------------------------------------- 1 | """Base model class for Tensorflow-based model construction.""" 2 | from .data_source import BaseDataSource 3 | import os 4 | import sys 5 | import time 6 | from typing import Any, Dict, List 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from .live_tester import LiveTester 12 | from .time_manager import TimeManager 13 | from .summary_manager import SummaryManager 14 | from .checkpoint_manager import CheckpointManager 15 | import logging 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class BaseModel(object): 20 | """Base model class for Tensorflow-based model construction. 21 | 22 | This class assumes that there exist no other Tensorflow models defined. 23 | That is, any variable that exists in the Python session will be grabbed by the class. 24 | """ 25 | 26 | def __init__(self, 27 | tensorflow_session: tf.Session, 28 | learning_schedule: List[Dict[str, Any]] = [], 29 | train_data: Dict[str, BaseDataSource] = {}, 30 | test_data: Dict[str, BaseDataSource] = {}, 31 | test_losses_or_metrics: str = None, 32 | use_batch_statistics_at_test: bool = True, 33 | identifier: str = None): 34 | """Initialize model with data sources and parameters.""" 35 | self._tensorflow_session = tensorflow_session 36 | self._train_data = train_data 37 | self._test_data = test_data 38 | self._test_losses_or_metrics = test_losses_or_metrics 39 | self._initialized = False 40 | self.__identifier = identifier 41 | 42 | # Extract and keep known prefixes/scopes 43 | self._learning_schedule = learning_schedule 44 | self._known_prefixes = [schedule for schedule in learning_schedule] 45 | 46 | # Check consistency of given data sources 47 | train_data_sources = list(train_data.values()) 48 | test_data_sources = list(test_data.values()) 49 | all_data_sources = train_data_sources + test_data_sources 50 | first_data_source = all_data_sources.pop() 51 | self._batch_size = first_data_source.batch_size 52 | self._data_format = first_data_source.data_format 53 | for data_source in all_data_sources: 54 | if data_source.batch_size != self._batch_size: 55 | raise ValueError(('Data source "%s" has anomalous batch size of %d ' + 56 | 'when detected batch size is %d.') % (data_source.short_name, 57 | data_source.batch_size, 58 | self._batch_size)) 59 | if data_source.data_format != self._data_format: 60 | raise ValueError(('Data source "%s" has anomalous data_format of %s ' + 61 | 'when detected data_format is %s.') % (data_source.short_name, 62 | data_source.data_format, 63 | self._data_format)) 64 | self._data_format_longer = ('channels_first' if self._data_format == 'NCHW' 65 | else 'channels_last') 66 | 67 | # Make output dir 68 | if not os.path.isdir(self.output_path): 69 | os.makedirs(self.output_path) 70 | 71 | # Log messages to file 72 | root_logger = logging.getLogger() 73 | file_handler = logging.FileHandler(self.output_path + '/messages.log') 74 | file_handler.setFormatter(root_logger.handlers[0].formatter) 75 | for handler in root_logger.handlers[1:]: # all except stdout 76 | root_logger.removeHandler(handler) 77 | root_logger.addHandler(file_handler) 78 | 79 | # Register a manager for tf.Summary 80 | self.summary = SummaryManager(self) 81 | 82 | # Register a manager for checkpoints 83 | self.checkpoint = CheckpointManager(self) 84 | 85 | # Register a manager for timing related operations 86 | self.time = TimeManager(self) 87 | 88 | # Prepare for live (concurrent) validation/testing during training, on the CPU 89 | self._enable_live_testing = (len(self._train_data) > 0) and (len(self._test_data) > 0) 90 | self._tester = LiveTester(self, self._test_data, use_batch_statistics_at_test) 91 | 92 | # Run-time parameters 93 | with tf.variable_scope('learning_params'): 94 | self.is_training = tf.placeholder(tf.bool) 95 | self.use_batch_statistics = tf.placeholder(tf.bool) 96 | self.learning_rate_multiplier = tf.Variable(1.0, trainable=False, dtype=tf.float32) 97 | self.learning_rate_multiplier_placeholder = tf.placeholder(dtype=tf.float32) 98 | self.assign_learning_rate_multiplier = \ 99 | tf.assign(self.learning_rate_multiplier, self.learning_rate_multiplier_placeholder) 100 | 101 | self._build_all_models() 102 | 103 | def __del__(self): 104 | """Explicitly call methods to cleanup any live threads.""" 105 | train_data_sources = list(self._train_data.values()) 106 | test_data_sources = list(self._test_data.values()) 107 | all_data_sources = train_data_sources + test_data_sources 108 | for data_source in all_data_sources: 109 | data_source.cleanup() 110 | self._tester.__del__() 111 | 112 | __identifier_stem = None 113 | 114 | @property 115 | def identifier(self): 116 | """Identifier for model based on time.""" 117 | if self.__identifier is not None: # If loading from checkpoints or having naming enforced 118 | return self.__identifier 119 | if self.__identifier_stem is None: 120 | self.__identifier_stem = self.__class__.__name__ + '/' + time.strftime('%y%m%d%H%M%S') 121 | return self.__identifier_stem + self._identifier_suffix 122 | 123 | @property 124 | def _identifier_suffix(self): 125 | """Identifier suffix for model based on data sources and parameters.""" 126 | return '' 127 | 128 | @property 129 | def output_path(self): 130 | """Path to store logs and model weights into.""" 131 | return '%s/%s' % (os.path.abspath(os.path.dirname(__file__) + '/../../outputs'), 132 | self.identifier) 133 | 134 | def _build_all_models(self): 135 | """Build training (GPU/CPU) and testing (CPU) streams.""" 136 | self.output_tensors = {} 137 | self.loss_terms = {} 138 | self.metrics = {} 139 | 140 | def _build_datasource_summaries(data_sources, mode): 141 | """Register summary operations for input data from given data sources.""" 142 | with tf.variable_scope('%s_data' % mode): 143 | for data_source_name, data_source in data_sources.items(): 144 | tensors = data_source.output_tensors 145 | for key, tensor in tensors.items(): 146 | summary_name = '%s/%s' % (data_source_name, key) 147 | shape = tensor.shape.as_list() 148 | num_dims = len(shape) 149 | if num_dims == 4: # Image data 150 | if shape[1] == 1 or shape[1] == 3: 151 | self.summary.image(summary_name, tensor, 152 | data_format='channels_first') 153 | elif shape[3] == 1 or shape[3] == 3: 154 | self.summary.image(summary_name, tensor, 155 | data_format='channels_last') 156 | # TODO: fix issue with no summary otherwise 157 | elif num_dims == 2: 158 | self.summary.histogram(summary_name, tensor) 159 | else: 160 | logger.debug('I do not know how to create a summary for %s (%s)' % 161 | (summary_name, tensor.shape.as_list())) 162 | 163 | def _build_train_or_test(mode): 164 | data_sources = self._train_data if mode == 'train' else self._test_data 165 | 166 | # Build model 167 | output_tensors, loss_terms, metrics = self.build_model(data_sources, mode=mode) 168 | 169 | # Record important tensors 170 | self.output_tensors[mode] = output_tensors 171 | self.loss_terms[mode] = loss_terms 172 | self.metrics[mode] = metrics 173 | 174 | # Create summaries for scalars 175 | if mode == 'train': 176 | for name, loss_term in loss_terms.items(): 177 | self.summary.scalar('loss/%s/%s' % (mode, name), loss_term) 178 | for name, metric in metrics.items(): 179 | self.summary.scalar('metric/%s/%s' % (mode, name), metric) 180 | 181 | # Build the main model 182 | if len(self._train_data) > 0: 183 | _build_datasource_summaries(self._train_data, mode='train') 184 | _build_train_or_test(mode='train') 185 | logger.info('Built model.') 186 | 187 | # Print no. of parameters and lops 188 | flops = tf.profiler.profile( 189 | options=tf.profiler.ProfileOptionBuilder( 190 | tf.profiler.ProfileOptionBuilder.float_operation() 191 | ).with_empty_output().build()) 192 | logger.info('------------------------------') 193 | logger.info(' Approximate Model Statistics ') 194 | logger.info('------------------------------') 195 | logger.info('FLOPS per input: {:,}'.format(flops.total_float_ops / self._batch_size)) 196 | logger.info( 197 | 'Trainable Parameters: {:,}'.format( 198 | np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()]) 199 | ) 200 | ) 201 | logger.info('------------------------------') 202 | 203 | # If there are any test data streams, build same model with different scope 204 | # Trainable parameters will be copied at test time 205 | if len(self._test_data) > 0: 206 | _build_datasource_summaries(self._test_data, mode='test') 207 | with tf.variable_scope('test'): 208 | _build_train_or_test(mode='test') 209 | logger.info('Built model for live testing.') 210 | 211 | if self._enable_live_testing: 212 | self._tester._post_model_build() # Create copy ops to be run before every test run 213 | 214 | def build_model(self, data_sources: Dict[str, BaseDataSource], mode: str): 215 | """Build model.""" 216 | raise NotImplementedError('BaseModel::build_model is not yet implemented.') 217 | 218 | def initialize_if_not(self, training=False): 219 | """Initialize variables and begin preprocessing threads.""" 220 | if self._initialized: 221 | return 222 | 223 | # Build supporting operations 224 | with tf.variable_scope('savers'): 225 | self.checkpoint.build_savers() # Create savers 226 | if training: 227 | with tf.variable_scope('optimize'): 228 | self._build_optimizers() 229 | 230 | # Start pre-processing routines 231 | for _, datasource in self._train_data.items(): 232 | datasource.create_and_start_threads() 233 | 234 | # Initialize all variables 235 | self._tensorflow_session.run(tf.global_variables_initializer()) 236 | self._initialized = True 237 | 238 | def _build_optimizers(self): 239 | """Based on learning schedule, create optimizer instances.""" 240 | self._optimize_ops = [] 241 | all_trainable_variables = tf.trainable_variables() 242 | all_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 243 | all_reg_losses = tf.losses.get_regularization_losses() 244 | for spec in self._learning_schedule: 245 | optimize_ops = [] 246 | update_ops = [] 247 | loss_terms = spec['loss_terms_to_optimize'] 248 | reg_losses = [] 249 | assert isinstance(loss_terms, dict) 250 | for loss_term_key, prefixes in loss_terms.items(): 251 | assert loss_term_key in self.loss_terms['train'].keys() 252 | variables_to_train = [] 253 | for prefix in prefixes: 254 | variables_to_train += [ 255 | v for v in all_trainable_variables 256 | if v.name.startswith(prefix) 257 | ] 258 | update_ops += [ 259 | o for o in all_update_ops 260 | if o.name.startswith(prefix) 261 | ] 262 | reg_losses += [ 263 | l for l in all_reg_losses 264 | if l.name.startswith(prefix) 265 | ] 266 | 267 | optimizer_class = tf.train.AdamOptimizer 268 | optimizer = optimizer_class( 269 | learning_rate=self.learning_rate_multiplier * spec['learning_rate'], 270 | # beta1=0.9, 271 | # beta2=0.999, 272 | ) 273 | final_loss = self.loss_terms['train'][loss_term_key] 274 | if len(reg_losses) > 0: 275 | final_loss += tf.reduce_sum(reg_losses) 276 | with tf.control_dependencies(update_ops): 277 | gradients, variables = zip(*optimizer.compute_gradients( 278 | loss=final_loss, 279 | var_list=variables_to_train, 280 | aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, 281 | )) 282 | # gradients, _ = tf.clip_by_global_norm(gradients, 5.0) # TODO: generalize 283 | optimize_op = optimizer.apply_gradients(zip(gradients, variables)) 284 | optimize_ops.append(optimize_op) 285 | self._optimize_ops.append(optimize_ops) 286 | logger.info('Built optimizer for: %s' % ', '.join(loss_terms.keys())) 287 | 288 | def train_loop_pre(self, current_step): 289 | """Run this at beginning of training loop.""" 290 | pass 291 | 292 | def train_loop_post(self, current_step): 293 | """Run this at end of training loop.""" 294 | pass 295 | 296 | def train(self, num_epochs=None, num_steps=None): 297 | """Train model as requested.""" 298 | if num_steps is None: 299 | num_entries = np.min([s.num_entries for s in list(self._train_data.values())]) 300 | num_steps = int(num_epochs * num_entries / self._batch_size) 301 | self.initialize_if_not(training=True) 302 | 303 | try: 304 | initial_step = self.checkpoint.load_all() 305 | current_step = initial_step 306 | for current_step in range(initial_step, num_steps): 307 | # Extra operations defined in implementation of this base class 308 | self.train_loop_pre(current_step) 309 | 310 | # Select loss terms, optimize operations, and metrics tensors to evaluate 311 | fetches = {} 312 | schedule_id = current_step % len(self._learning_schedule) 313 | schedule = self._learning_schedule[schedule_id] 314 | fetches['optimize_ops'] = self._optimize_ops[schedule_id] 315 | loss_term_keys, _ = zip(*list(schedule['loss_terms_to_optimize'].items())) 316 | fetches['loss_terms'] = [self.loss_terms['train'][k] for k in loss_term_keys] 317 | summary_op = self.summary.get_ops(mode='train') 318 | if len(summary_op) > 0: 319 | fetches['summaries'] = summary_op 320 | 321 | # Run one optimization iteration and retrieve calculated loss values 322 | self.time.start('train_iteration', average_over_last_n_timings=100) 323 | outcome = self._tensorflow_session.run( 324 | fetches=fetches, 325 | feed_dict={ 326 | self.is_training: True, 327 | self.use_batch_statistics: True, 328 | } 329 | ) 330 | self.time.end('train_iteration') 331 | 332 | # Print progress 333 | to_print = '%07d> ' % current_step 334 | to_print += ', '.join(['%s = %g' % (k, v) 335 | for k, v in zip(loss_term_keys, outcome['loss_terms'])]) 336 | self.time.log_every('train_iteration', to_print, seconds=2) 337 | 338 | # Trigger copy weights & concurrent testing (if not already running) 339 | if self._enable_live_testing: 340 | self._tester.trigger_test_if_not_testing(current_step) 341 | 342 | # Write summaries 343 | if 'summaries' in outcome: 344 | self.summary.write_summaries(outcome['summaries'], current_step) 345 | 346 | # Save model weights 347 | if self.time.has_been_n_seconds_since_last('save_weights', 600) \ 348 | and current_step > initial_step: 349 | self.checkpoint.save_all(current_step) 350 | 351 | # Extra operations defined in implementation of this base class 352 | self.train_loop_post(current_step) 353 | 354 | except KeyboardInterrupt: 355 | # Handle CTRL-C graciously 356 | self.checkpoint.save_all(current_step) 357 | sys.exit(0) 358 | 359 | # Stop live testing, and run final full test 360 | if self._enable_live_testing: 361 | self._tester.do_final_full_test(current_step) 362 | 363 | # Save final weights 364 | if current_step > initial_step: 365 | self.checkpoint.save_all(current_step) 366 | 367 | def inference_generator(self): 368 | """Perform inference on test data and yield a batch of output.""" 369 | self.initialize_if_not(training=False) 370 | self.checkpoint.load_all() # Load available weights 371 | 372 | # TODO: Make more generic by not picking first source 373 | data_source = next(iter(self._train_data.values())) 374 | while True: 375 | fetches = dict(self.output_tensors['train'], **data_source.output_tensors) 376 | start_time = time.time() 377 | outputs = self._tensorflow_session.run( 378 | fetches=fetches, 379 | feed_dict={ 380 | self.is_training: False, 381 | self.use_batch_statistics: True, 382 | }, 383 | ) 384 | outputs['inference_time'] = 1e3*(time.time() - start_time) 385 | yield outputs 386 | -------------------------------------------------------------------------------- /src/core/summary_manager.py: -------------------------------------------------------------------------------- 1 | """Manage registration and evaluation of summary operations.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | import logging 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class SummaryManager(object): 10 | """Manager to remember and run summary operations as necessary.""" 11 | 12 | def __init__(self, model, cheap_ops_every_n_secs=2, expensive_ops_every_n_mins=2): 13 | """Initialize manager based on given model instance.""" 14 | self._tensorflow_session = model._tensorflow_session 15 | self._model = model 16 | self._cheap_ops = { 17 | 'train': {}, 18 | 'test': {}, 19 | 'full_test': {}, 20 | } 21 | self._expensive_ops = { 22 | 'train': {}, 23 | 'test': {}, 24 | 'full_test': {}, 25 | } 26 | self._cheap_ops_every_n_secs = cheap_ops_every_n_secs 27 | self._expensive_ops_every_n_secs = 60 * expensive_ops_every_n_mins 28 | 29 | self._ready_to_write = False 30 | 31 | def _prepare_for_write(self): 32 | """Merge together cheap and expensive ops separately.""" 33 | self._writer = tf.summary.FileWriter(self._model.output_path, 34 | self._tensorflow_session.graph) 35 | for mode in ('train', 'test', 'full_test'): 36 | self._expensive_ops[mode].update(self._cheap_ops[mode]) 37 | self._ready_to_write = True 38 | 39 | def get_ops(self, mode='train'): 40 | """Retrieve summary ops to evaluate at given iteration number.""" 41 | if not self._ready_to_write: 42 | self._prepare_for_write() 43 | if mode == 'test' or mode == 'full_test': # Always return all ops for test case 44 | return self._expensive_ops[mode] 45 | elif mode == 'train': # Select ops to evaluate based on defined frequency 46 | check_func = self._model.time.has_been_n_seconds_since_last 47 | if check_func('expensive_summaries_train', self._expensive_ops_every_n_secs): 48 | return self._expensive_ops[mode] 49 | elif check_func('cheap_summaries_train', self._cheap_ops_every_n_secs): 50 | return self._cheap_ops[mode] 51 | return {} 52 | 53 | def write_summaries(self, summary_outputs, iteration_number): 54 | """Write given outputs to `self._writer`.""" 55 | for _, summary in summary_outputs.items(): 56 | self._writer.add_summary(summary, global_step=iteration_number) 57 | 58 | def _get_clean_name(self, operation): 59 | name = operation.name 60 | 61 | # Determine mode 62 | mode = 'train' 63 | if name.startswith('test/') or name.startswith('test_data/'): 64 | mode = 'test' 65 | elif name.startswith('loss/test/') or name.startswith('metric/test/'): 66 | mode = 'full_test' 67 | 68 | # Correct name 69 | if mode == 'test': 70 | name = name[name.index('/') + 1:] 71 | elif mode == 'full_test': 72 | name = '/'.join(name.split('/')[2:]) 73 | if name[-2] == ':': 74 | name = name[:-2] 75 | return mode, name 76 | 77 | def _register_cheap_op(self, operation): 78 | mode, name = self._get_clean_name(operation) 79 | try: 80 | assert name not in self._cheap_ops[mode] and name not in self._expensive_ops[mode] 81 | except AssertionError: 82 | raise Exception('Duplicate definition of summary item: "%s"' % name) 83 | self._cheap_ops[mode][name] = operation 84 | 85 | def _register_expensive_op(self, operation): 86 | mode, name = self._get_clean_name(operation) 87 | try: 88 | assert name not in self._cheap_ops[mode] and name not in self._expensive_ops[mode] 89 | except AssertionError: 90 | raise Exception('Duplicate definition of summary item: "%s"' % name) 91 | self._expensive_ops[mode][name] = operation 92 | 93 | def audio(self, name, tensor, **kwargs): 94 | """TODO: Log summary of audio.""" 95 | raise NotImplementedError('SummaryManager::audio not implemented.') 96 | 97 | def text(self, name, tensor, **kwargs): 98 | """TODO: Log summary of text.""" 99 | raise NotImplementedError('SummaryManager::text not implemented.') 100 | 101 | def histogram(self, name, tensor, **kwargs): 102 | """TODO: Log summary of audio.""" 103 | operation = tf.summary.histogram(name, tensor, **kwargs) 104 | self._register_expensive_op(operation) 105 | 106 | def image(self, name, tensor, data_format='channels_last', **kwargs): 107 | """TODO: Log summary of image.""" 108 | if data_format == 'channels_first': 109 | tensor = tf.transpose(tensor, perm=(0, 2, 3, 1)) 110 | c = tensor.shape.as_list()[-1] 111 | if c == 3: # Assume RGB and convert to BGR for visualization 112 | tensor = tensor[:, :, :, ::-1] # TODO: find better solution 113 | operation = tf.summary.image(name, tensor, **kwargs) 114 | self._register_expensive_op(operation) 115 | 116 | def _4d_tensor(self, name, tensor, **kwargs): 117 | """Display all filters in a grid for visualization.""" 118 | h, w, c, num_tensor = tensor.shape.as_list() 119 | 120 | # Try to visualise convolutional filters or feature maps 121 | # See: https://gist.github.com/kukuruza/03731dc494603ceab0c5 122 | # input shape: (Y, X, C, N) 123 | if c != 1 and c != 3: 124 | tensor = tf.reduce_mean(tensor, axis=2, keep_dims=True) 125 | c = 1 126 | # shape is now: (Y, X, 1|C, N) 127 | v_min = tf.reduce_min(tensor) 128 | v_max = tf.reduce_max(tensor) 129 | tensor -= v_min 130 | tensor *= 1.0 / (v_max - v_min) 131 | tensor = tf.pad(tensor, [[1, 0], [1, 0], [0, 0], [0, 0]], 'CONSTANT') 132 | tensor = tf.transpose(tensor, perm=(3, 0, 1, 2)) 133 | # shape is now: (N, Y, X, C) 134 | # place tensor on grid 135 | num_tensor_x = int(np.round(np.sqrt(num_tensor))) 136 | num_tensor_y = num_tensor / num_tensor_x 137 | while not num_tensor_y.is_integer(): 138 | num_tensor_x += 1 139 | num_tensor_y = num_tensor / num_tensor_x 140 | num_tensor_y = int(num_tensor_y) 141 | h += 1 142 | w += 1 143 | tensor = tf.reshape(tensor, (num_tensor_x, h * num_tensor_y, w, c)) 144 | # shape is now: (N_x, Y * N_y, X, c) 145 | tensor = tf.transpose(tensor, (0, 2, 1, 3)) 146 | # shape is now: (N_x, X, Y * N_y, c) 147 | tensor = tf.reshape(tensor, (1, w * num_tensor_x, h * num_tensor_y, c)) 148 | # shape is now: (1, X * N_x, Y * N_y, c) 149 | tensor = tf.transpose(tensor, (0, 2, 1, 3)) 150 | # shape is now: (1, Y * N_y, X * N_x, c) 151 | tensor = tf.pad(tensor, [[0, 0], [0, 1], [0, 1], [0, 0]], 'CONSTANT') 152 | 153 | self.image(name, tensor, **kwargs) 154 | 155 | def filters(self, name, tensor, **kwargs): 156 | """Log summary of convolutional filters. 157 | 158 | Note that this method expects the output of the convolutional layer when using 159 | `tf.layers.conv2d` or for the filters to be defined in the same scope as the output tensor. 160 | """ 161 | assert 'data_format' not in kwargs 162 | with tf.name_scope('viz_filters'): 163 | # Find tensor holding trainable kernel weights 164 | name_stem = '/'.join(tensor.name.split('/')[:-1]) + '/kernel' 165 | matching_tensors = [t for t in tf.trainable_variables() if t.name.startswith(name_stem)] 166 | assert len(matching_tensors) == 1 167 | filters = matching_tensors[0] 168 | 169 | # H x W x C x N 170 | h, w, c, n = filters.shape.as_list() 171 | filters = tf.transpose(filters, perm=(3, 2, 0, 1)) 172 | # N x C x H x W 173 | filters = tf.reshape(filters, (n*c, 1, h, w)) 174 | # NC x 1 x H x W 175 | filters = tf.transpose(filters, perm=(2, 3, 1, 0)) 176 | # H x W x 1 x NC 177 | 178 | self._4d_tensor(name, filters, **kwargs) 179 | 180 | def feature_maps(self, name, tensor, mean_across_channels=True, data_format='channels_last', 181 | **kwargs): 182 | """Log summary of feature maps / image activations.""" 183 | with tf.name_scope('viz_featuremaps'): 184 | if data_format == 'channels_first': 185 | # N x C x H x W 186 | tensor = tf.transpose(tensor, perm=(0, 2, 3, 1)) 187 | # N x H x W x C 188 | if mean_across_channels: 189 | tensor = tf.reduce_mean(tensor, axis=3, keepdims=True) 190 | # N x H x W x 1 191 | tensor = tf.transpose(tensor, perm=(1, 2, 3, 0)) 192 | else: 193 | n, c, h, w = tensor.shape.as_list() 194 | tensor = tf.reshape(tensor, (n*c, 1, h, w)) 195 | # N x 1 x H x W 196 | tensor = tf.transpose(tensor, perm=(2, 3, 1, 0)) 197 | # H x W x 1 x N 198 | 199 | self._4d_tensor(name, tensor, **kwargs) 200 | 201 | def tiled_images(self, name, tensor, data_format='channels_last', **kwargs): 202 | """Log summary of feature maps / image activations.""" 203 | with tf.name_scope('viz_featuremaps'): 204 | if data_format == 'channels_first': 205 | # N x C x H x W 206 | tensor = tf.transpose(tensor, perm=(0, 2, 3, 1)) 207 | # N x H x W x C 208 | tensor = tf.transpose(tensor, perm=(1, 2, 3, 0)) 209 | # H x W x C x N 210 | self._4d_tensor(name, tensor, **kwargs) 211 | 212 | def scalar(self, name, tensor, **kwargs): 213 | """Log summary of scalar.""" 214 | operation = tf.summary.scalar(name, tensor, **kwargs) 215 | self._register_cheap_op(operation) 216 | -------------------------------------------------------------------------------- /src/core/time_manager.py: -------------------------------------------------------------------------------- 1 | """Routines to time events and restrict logs or operations by frequency.""" 2 | import time 3 | 4 | import numpy as np 5 | 6 | import logging 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class TimeManager(object): 11 | """Manage timing of event executions or measure timings.""" 12 | 13 | def __init__(self, model): 14 | """Initialize manager based on given model instance.""" 15 | self._tensorflow_session = model._tensorflow_session 16 | self._model = model 17 | 18 | self._timers = {} 19 | self._last_time = {} 20 | 21 | def start(self, name, **kwargs): 22 | """Begin timer for given event/operation.""" 23 | if name not in self._timers: 24 | timer = Timer(**kwargs) 25 | self._timers[name] = timer 26 | else: 27 | timer = self._timers[name] 28 | timer.start() 29 | 30 | def end(self, name): 31 | """End timer for given event/operation.""" 32 | assert name in self._timers 33 | return self._timers[name].end() 34 | 35 | def has_been_n_seconds_since_last(self, identifier, seconds): 36 | """Indicate if enough time has passed since last time. 37 | 38 | Also updates the `last time` record based on identifier. 39 | """ 40 | current_time = time.time() 41 | if identifier not in self._last_time or \ 42 | (current_time - self._last_time[identifier] > seconds): 43 | self._last_time[identifier] = current_time 44 | return True 45 | return False 46 | 47 | def log_every(self, identifier, message, seconds=1): 48 | """Limit logging of messages based on specified interval and identifier.""" 49 | if self.has_been_n_seconds_since_last(identifier, seconds): 50 | logger.info(message) 51 | else: 52 | logger.debug(message) 53 | 54 | 55 | # A local Timer class for timing 56 | class Timer(object): 57 | """Record start and end times as requested and provide summaries.""" 58 | 59 | def __init__(self, average_over_last_n_timings=10): 60 | """Store keyword parameters.""" 61 | self._average_over_last_n_timings = average_over_last_n_timings 62 | self._active = False 63 | self._timings = [] 64 | self._start_time = -1 65 | 66 | def start(self): 67 | """Cache starting time.""" 68 | # assert not self._active 69 | self._start_time = time.time() 70 | self._active = True 71 | 72 | def end(self): 73 | """Check ending time and store difference.""" 74 | assert self._active and self._start_time > 0 75 | 76 | # Calculate difference 77 | end_time = time.time() 78 | time_difference = end_time - self._start_time 79 | 80 | # Record timing (and trim history) 81 | self._timings.append(time_difference) 82 | if len(self._timings) > self._average_over_last_n_timings: 83 | self._timings = self._timings[-self._average_over_last_n_timings:] 84 | 85 | # Reset 86 | self._start_time = -1 87 | self._active = False 88 | 89 | return time_difference 90 | 91 | @property 92 | def current_mean(self): 93 | """Calculate mean timing for as many trials as specified in constructor.""" 94 | values = self._timings 95 | return np.mean(values) 96 | -------------------------------------------------------------------------------- /src/datasources/__init__.py: -------------------------------------------------------------------------------- 1 | """Data-source definitions (one class per file).""" 2 | from .frames import FramesSource 3 | from .hdf5 import HDF5Source 4 | from .unityeyes import UnityEyes 5 | from .video import Video 6 | from .webcam import Webcam 7 | __all__ = ('FramesSource', 'HDF5Source', 'UnityEyes', 'Video', 'Webcam') 8 | -------------------------------------------------------------------------------- /src/datasources/frames.py: -------------------------------------------------------------------------------- 1 | """Data source of stream of frames.""" 2 | import bz2 3 | import dlib 4 | import queue 5 | import shutil 6 | import threading 7 | import time 8 | from typing import Tuple 9 | import os 10 | from urllib.request import urlopen 11 | 12 | import cv2 as cv 13 | import numpy as np 14 | import tensorflow as tf 15 | 16 | from core import BaseDataSource 17 | 18 | 19 | class FramesSource(BaseDataSource): 20 | """Preprocessing of stream of frames.""" 21 | 22 | def __init__(self, 23 | tensorflow_session: tf.Session, 24 | batch_size: int, 25 | eye_image_shape: Tuple[int, int], 26 | staging: bool=False, 27 | **kwargs): 28 | """Create queues and threads to read and preprocess data.""" 29 | self._eye_image_shape = eye_image_shape 30 | self._proc_mutex = threading.Lock() 31 | self._read_mutex = threading.Lock() 32 | 33 | self._frame_read_queue = queue.Queue(maxsize=1) 34 | self._frame_read_thread = threading.Thread(target=self.frame_read_job, name='frame_read') 35 | self._frame_read_thread.daemon = True 36 | self._frame_read_thread.start() 37 | 38 | self._current_index = 0 39 | self._last_frame_index = 0 40 | self._indices = [] 41 | self._frames = {} 42 | self._open = True 43 | 44 | # Call parent class constructor 45 | super().__init__(tensorflow_session, batch_size=batch_size, num_threads=1, 46 | fread_queue_capacity=batch_size, preprocess_queue_capacity=batch_size, 47 | shuffle=False, staging=staging, **kwargs) 48 | 49 | _short_name = 'Frames' 50 | 51 | @property 52 | def short_name(self): 53 | """Short name specifying source.""" 54 | return self._short_name 55 | 56 | def frame_read_job(self): 57 | """Read frame from webcam.""" 58 | generate_frame = self.frame_generator() 59 | while True: 60 | before_frame_read = time.time() 61 | bgr = next(generate_frame) 62 | if bgr is not None: 63 | after_frame_read = time.time() 64 | with self._read_mutex: 65 | self._frame_read_queue.queue.clear() 66 | self._frame_read_queue.put_nowait((before_frame_read, bgr, after_frame_read)) 67 | self._open = False 68 | 69 | def frame_generator(self): 70 | """Read frame from webcam.""" 71 | raise NotImplementedError('Frames::frame_generator not implemented.') 72 | 73 | def entry_generator(self, yield_just_one=False): 74 | """Generate eye image entries by detecting faces and facial landmarks.""" 75 | try: 76 | while range(1) if yield_just_one else True: 77 | # Grab frame 78 | with self._proc_mutex: 79 | before_frame_read, bgr, after_frame_read = self._frame_read_queue.get() 80 | bgr = cv.flip(bgr, flipCode=1) # Mirror 81 | current_index = self._last_frame_index + 1 82 | self._last_frame_index = current_index 83 | 84 | grey = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY) 85 | frame = { 86 | 'frame_index': current_index, 87 | 'time': { 88 | 'before_frame_read': before_frame_read, 89 | 'after_frame_read': after_frame_read, 90 | }, 91 | 'bgr': bgr, 92 | 'grey': grey, 93 | } 94 | self._frames[current_index] = frame 95 | self._indices.append(current_index) 96 | 97 | # Keep just a few frames around 98 | frames_to_keep = 120 99 | if len(self._indices) > frames_to_keep: 100 | for index in self._indices[:-frames_to_keep]: 101 | del self._frames[index] 102 | self._indices = self._indices[-frames_to_keep:] 103 | 104 | # Eye image segmentation pipeline 105 | self.detect_faces(frame) 106 | self.detect_landmarks(frame) 107 | self.calculate_smoothed_landmarks(frame) 108 | self.segment_eyes(frame) 109 | self.update_face_boxes(frame) 110 | frame['time']['after_preprocessing'] = time.time() 111 | 112 | for i, eye_dict in enumerate(frame['eyes']): 113 | yield { 114 | 'frame_index': np.int64(current_index), 115 | 'eye': eye_dict['image'], 116 | 'eye_index': np.uint8(i), 117 | } 118 | 119 | finally: 120 | # Execute any cleanup operations as necessary 121 | pass 122 | 123 | def preprocess_entry(self, entry): 124 | """Preprocess segmented eye images for use as neural network input.""" 125 | eye = entry['eye'] 126 | eye = cv.equalizeHist(eye) 127 | eye = eye.astype(np.float32) 128 | eye *= 2.0 / 255.0 129 | eye -= 1.0 130 | eye = np.expand_dims(eye, -1 if self.data_format == 'NHWC' else 0) 131 | entry['eye'] = eye 132 | return entry 133 | 134 | def detect_faces(self, frame): 135 | """Detect all faces in a frame.""" 136 | frame_index = frame['frame_index'] 137 | previous_index = self._indices[self._indices.index(frame_index) - 1] 138 | previous_frame = self._frames[previous_index] 139 | if ('last_face_detect_index' not in previous_frame or 140 | frame['frame_index'] - previous_frame['last_face_detect_index'] > 59): 141 | detector = get_face_detector() 142 | if detector.__class__.__name__ == 'CascadeClassifier': 143 | detections = detector.detectMultiScale(frame['grey']) 144 | else: 145 | detections = detector(cv.resize(frame['grey'], (0, 0), fx=0.5, fy=0.5), 0) 146 | faces = [] 147 | for d in detections: 148 | try: 149 | l, t, r, b = d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom() 150 | l *= 2 151 | t *= 2 152 | r *= 2 153 | b *= 2 154 | w, h = r - l, b - t 155 | except AttributeError: # Using OpenCV LBP detector on CPU 156 | l, t, w, h = d 157 | faces.append((l, t, w, h)) 158 | faces.sort(key=lambda bbox: bbox[0]) 159 | frame['faces'] = faces 160 | frame['last_face_detect_index'] = frame['frame_index'] 161 | 162 | # Clear previous known landmarks. This is to disable smoothing when new face detect 163 | # occurs. This allows for recovery of drifted detections. 164 | previous_frame['landmarks'] = [] 165 | else: 166 | frame['faces'] = previous_frame['faces'] 167 | frame['last_face_detect_index'] = previous_frame['last_face_detect_index'] 168 | 169 | def detect_landmarks(self, frame): 170 | """Detect 5-point facial landmarks for faces in frame.""" 171 | predictor = get_landmarks_predictor() 172 | landmarks = [] 173 | for face in frame['faces']: 174 | l, t, w, h = face 175 | rectangle = dlib.rectangle(left=int(l), top=int(t), right=int(l+w), bottom=int(t+h)) 176 | landmarks_dlib = predictor(frame['grey'], rectangle) 177 | 178 | def tuple_from_dlib_shape(index): 179 | p = landmarks_dlib.part(index) 180 | return (p.x, p.y) 181 | 182 | num_landmarks = landmarks_dlib.num_parts 183 | landmarks.append(np.array([tuple_from_dlib_shape(i) for i in range(num_landmarks)])) 184 | frame['landmarks'] = landmarks 185 | 186 | _smoothing_window_size = 10 187 | _smoothing_coefficient_decay = 0.5 188 | _smoothing_coefficients = None 189 | 190 | def calculate_smoothed_landmarks(self, frame): 191 | """If there are previous landmark detections, try to smooth current prediction.""" 192 | # Cache coefficients based on defined sliding window size 193 | if self._smoothing_coefficients is None: 194 | coefficients = np.power(self._smoothing_coefficient_decay, 195 | list(reversed(list(range(self._smoothing_window_size))))) 196 | coefficients /= np.sum(coefficients) 197 | self._smoothing_coefficients = coefficients.reshape(-1, 1) 198 | 199 | # Get a window of frames 200 | current_index = self._indices.index(frame['frame_index']) 201 | a = current_index - self._smoothing_window_size + 1 202 | if a < 0: 203 | """If slice extends before last known frame.""" 204 | return 205 | window_indices = self._indices[a:current_index + 1] 206 | window_frames = [self._frames[idx] for idx in window_indices] 207 | window_num_landmark_entries = np.array([len(f['landmarks']) for f in window_frames]) 208 | if np.any(window_num_landmark_entries == 0): 209 | """Any frame has zero faces detected.""" 210 | return 211 | if not np.all(window_num_landmark_entries == window_num_landmark_entries[0]): 212 | """Not the same number of faces detected in entire window.""" 213 | return 214 | 215 | # Apply coefficients to landmarks in window 216 | window_landmarks = np.asarray([f['landmarks'] for f in window_frames]) 217 | frame['smoothed_landmarks'] = np.sum( 218 | np.multiply(window_landmarks.reshape(self._smoothing_window_size, -1), 219 | self._smoothing_coefficients), 220 | axis=0, 221 | ).reshape(window_num_landmark_entries[-1], -1, 2) 222 | 223 | def segment_eyes(self, frame): 224 | """From found landmarks in previous steps, segment eye image.""" 225 | eyes = [] 226 | 227 | # Final output dimensions 228 | oh, ow = self._eye_image_shape 229 | 230 | # Select which landmarks (raw/smoothed) to use 231 | frame_landmarks = (frame['smoothed_landmarks'] if 'smoothed_landmarks' in frame 232 | else frame['landmarks']) 233 | 234 | for face, landmarks in zip(frame['faces'], frame_landmarks): 235 | # Segment eyes 236 | # for corner1, corner2, is_left in [(36, 39, True), (42, 45, False)]: 237 | for corner1, corner2, is_left in [(2, 3, True), (0, 1, False)]: 238 | x1, y1 = landmarks[corner1, :] 239 | x2, y2 = landmarks[corner2, :] 240 | eye_width = 1.5 * np.linalg.norm(landmarks[corner1, :] - landmarks[corner2, :]) 241 | if eye_width == 0.0: 242 | continue 243 | cx, cy = 0.5 * (x1 + x2), 0.5 * (y1 + y2) 244 | 245 | # Centre image on middle of eye 246 | translate_mat = np.asmatrix(np.eye(3)) 247 | translate_mat[:2, 2] = [[-cx], [-cy]] 248 | inv_translate_mat = np.asmatrix(np.eye(3)) 249 | inv_translate_mat[:2, 2] = -translate_mat[:2, 2] 250 | 251 | # Rotate to be upright 252 | roll = 0.0 if x1 == x2 else np.arctan((y2 - y1) / (x2 - x1)) 253 | rotate_mat = np.asmatrix(np.eye(3)) 254 | cos = np.cos(-roll) 255 | sin = np.sin(-roll) 256 | rotate_mat[0, 0] = cos 257 | rotate_mat[0, 1] = -sin 258 | rotate_mat[1, 0] = sin 259 | rotate_mat[1, 1] = cos 260 | inv_rotate_mat = rotate_mat.T 261 | 262 | # Scale 263 | scale = ow / eye_width 264 | scale_mat = np.asmatrix(np.eye(3)) 265 | scale_mat[0, 0] = scale_mat[1, 1] = scale 266 | inv_scale = 1.0 / scale 267 | inv_scale_mat = np.asmatrix(np.eye(3)) 268 | inv_scale_mat[0, 0] = inv_scale_mat[1, 1] = inv_scale 269 | 270 | # Centre image 271 | centre_mat = np.asmatrix(np.eye(3)) 272 | centre_mat[:2, 2] = [[0.5 * ow], [0.5 * oh]] 273 | inv_centre_mat = np.asmatrix(np.eye(3)) 274 | inv_centre_mat[:2, 2] = -centre_mat[:2, 2] 275 | 276 | # Get rotated and scaled, and segmented image 277 | transform_mat = centre_mat * scale_mat * rotate_mat * translate_mat 278 | inv_transform_mat = (inv_translate_mat * inv_rotate_mat * inv_scale_mat * 279 | inv_centre_mat) 280 | eye_image = cv.warpAffine(frame['grey'], transform_mat[:2, :], (ow, oh)) 281 | if is_left: 282 | eye_image = np.fliplr(eye_image) 283 | eyes.append({ 284 | 'image': eye_image, 285 | 'inv_landmarks_transform_mat': inv_transform_mat, 286 | 'side': 'left' if is_left else 'right', 287 | }) 288 | frame['eyes'] = eyes 289 | 290 | def update_face_boxes(self, frame): 291 | """Update face bounding box based on detected landmarks.""" 292 | frame_landmarks = (frame['smoothed_landmarks'] if 'smoothed_landmarks' in frame 293 | else frame['landmarks']) 294 | for i, (face, landmarks) in enumerate(zip(frame['faces'], frame_landmarks)): 295 | x_min, y_min = np.amin(landmarks, axis=0) 296 | x_max, y_max = np.amax(landmarks, axis=0) 297 | x_mid, y_mid = 0.5 * (x_max + x_min), 0.5 * (y_max + y_min) 298 | w, h = x_max - x_min, y_max - y_min 299 | new_w = 2.2 * max(h, w) 300 | half_w = 0.5 * new_w 301 | frame['faces'][i] = (int(x_mid - half_w), int(y_mid - half_w), int(new_w), int(new_w)) 302 | 303 | # x1, y1 = landmarks[0, :] 304 | # x2, y2 = landmarks[3, :] 305 | # face_width = 2.5 * np.sqrt((x1 - x2)**2 + (y1 - y2)**2) 306 | # if face_width == 0.0: 307 | # continue 308 | # 309 | # cx, cy = landmarks[4, :] 310 | # roll = 0.0 if x1 == x2 else np.arctan((y2 - y1) / (x2 - x1)) 311 | # 312 | # hdx = 0.5 * face_width * (2. - np.abs(np.cos(roll))) 313 | # hdy = 0.5 * face_width * (1. + np.abs(np.sin(roll))) 314 | # print(np.degrees(roll), face_width, hdx, hdy) 315 | # frame['faces'][i] = (int(cx - hdx), int(cy - hdy), int(2*hdx), int(2*hdy)) 316 | 317 | _face_detector = None 318 | _landmarks_predictor = None 319 | 320 | 321 | def _get_dlib_data_file(dat_name): 322 | dat_dir = os.path.relpath('%s/../3rdparty' % os.path.basename(__file__)) 323 | dat_path = '%s/%s' % (dat_dir, dat_name) 324 | if not os.path.isdir(dat_dir): 325 | os.mkdir(dat_dir) 326 | 327 | # Download trained shape detector 328 | if not os.path.isfile(dat_path): 329 | with urlopen('http://dlib.net/files/%s.bz2' % dat_name) as response: 330 | with bz2.BZ2File(response) as bzf, open(dat_path, 'wb') as f: 331 | shutil.copyfileobj(bzf, f) 332 | 333 | return dat_path 334 | 335 | 336 | def _get_opencv_xml(xml_name): 337 | xml_dir = os.path.relpath('%s/../3rdparty' % os.path.basename(__file__)) 338 | xml_path = '%s/%s' % (xml_dir, xml_name) 339 | if not os.path.isdir(xml_dir): 340 | os.mkdir(xml_dir) 341 | 342 | # Download trained shape detector 343 | if not os.path.isfile(xml_path): 344 | url_stem = 'https://raw.githubusercontent.com/opencv/opencv/master/data/lbpcascades' 345 | with urlopen('%s/%s' % (url_stem, xml_name)) as response: 346 | with open(xml_path, 'wb') as f: 347 | shutil.copyfileobj(response, f) 348 | 349 | return xml_path 350 | 351 | 352 | def get_face_detector(): 353 | """Get a singleton dlib face detector.""" 354 | global _face_detector 355 | if not _face_detector: 356 | try: 357 | dat_path = _get_dlib_data_file('mmod_human_face_detector.dat') 358 | _face_detector = dlib.cnn_face_detection_model_v1(dat_path) 359 | except: 360 | xml_path = _get_opencv_xml('lbpcascade_frontalface_improved.xml') 361 | _face_detector = cv.CascadeClassifier(xml_path) 362 | return _face_detector 363 | 364 | 365 | def get_landmarks_predictor(): 366 | """Get a singleton dlib face landmark predictor.""" 367 | global _landmarks_predictor 368 | if not _landmarks_predictor: 369 | dat_path = _get_dlib_data_file('shape_predictor_5_face_landmarks.dat') 370 | # dat_path = _get_dlib_data_file('shape_predictor_68_face_landmarks.dat') 371 | _landmarks_predictor = dlib.shape_predictor(dat_path) 372 | return _landmarks_predictor 373 | -------------------------------------------------------------------------------- /src/datasources/hdf5.py: -------------------------------------------------------------------------------- 1 | """HDF5 data source for gaze estimation.""" 2 | from threading import Lock 3 | from typing import List 4 | 5 | import cv2 as cv 6 | import h5py 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | from core import BaseDataSource 11 | import util.gazemap 12 | 13 | 14 | class HDF5Source(BaseDataSource): 15 | """HDF5 data loading class (using h5py).""" 16 | 17 | def __init__(self, 18 | tensorflow_session: tf.Session, 19 | batch_size: int, 20 | keys_to_use: List[str], 21 | hdf_path: str, 22 | testing=False, 23 | eye_image_shape=(36, 60), 24 | **kwargs): 25 | """Create queues and threads to read and preprocess data from specified keys.""" 26 | hdf5 = h5py.File(hdf_path, 'r') 27 | self._short_name = 'HDF:%s' % '/'.join(hdf_path.split('/')[-2:]) 28 | if testing: 29 | self._short_name += ':test' 30 | 31 | # Cache some parameters 32 | self._eye_image_shape = eye_image_shape 33 | 34 | # Create global index over all specified keys 35 | self._index_to_key = {} 36 | index_counter = 0 37 | for key in keys_to_use: 38 | n = hdf5[key]['eye'].shape[0] 39 | for i in range(n): 40 | self._index_to_key[index_counter] = (key, i) 41 | index_counter += 1 42 | self._num_entries = index_counter 43 | 44 | self._hdf5 = hdf5 45 | self._mutex = Lock() 46 | self._current_index = 0 47 | super().__init__(tensorflow_session, batch_size=batch_size, testing=testing, **kwargs) 48 | 49 | # Set index to 0 again as base class constructor called HDF5Source::entry_generator once to 50 | # get preprocessed sample. 51 | self._current_index = 0 52 | 53 | @property 54 | def num_entries(self): 55 | """Number of entries in this data source.""" 56 | return self._num_entries 57 | 58 | @property 59 | def short_name(self): 60 | """Short name specifying source HDF5.""" 61 | return self._short_name 62 | 63 | def cleanup(self): 64 | """Close HDF5 file before running base class cleanup routine.""" 65 | super().cleanup() 66 | 67 | def reset(self): 68 | """Reset index.""" 69 | with self._mutex: 70 | super().reset() 71 | self._current_index = 0 72 | 73 | def entry_generator(self, yield_just_one=False): 74 | """Read entry from HDF5.""" 75 | try: 76 | while range(1) if yield_just_one else True: 77 | with self._mutex: 78 | if self._current_index >= self.num_entries: 79 | if self.testing: 80 | break 81 | else: 82 | self._current_index = 0 83 | current_index = self._current_index 84 | self._current_index += 1 85 | 86 | key, index = self._index_to_key[current_index] 87 | data = self._hdf5[key] 88 | entry = {} 89 | for name in ('eye', 'gaze', 'head'): 90 | if name in data: 91 | entry[name] = data[name][index, :] 92 | yield entry 93 | finally: 94 | # Execute any cleanup operations as necessary 95 | pass 96 | 97 | def preprocess_entry(self, entry): 98 | """Resize eye image and normalize intensities.""" 99 | oh, ow = self._eye_image_shape 100 | eye = entry['eye'] 101 | eye = cv.resize(eye, (ow, oh)) 102 | eye = eye.astype(np.float32) 103 | eye *= 2.0 / 255.0 104 | eye -= 1.0 105 | eye = np.expand_dims(eye, axis=0 if self.data_format == 'NCHW' else -1) 106 | entry['eye'] = eye 107 | 108 | entry['gazemaps'] = util.gazemap.from_gaze2d( 109 | entry['gaze'], output_size=(oh, ow), scale=0.5, 110 | ).astype(np.float32) 111 | if self.data_format == 'NHWC': 112 | np.transpose(entry['gazemaps'], (1, 2, 0)) 113 | 114 | # Ensure all values in an entry are 4-byte floating point numbers 115 | for key, value in entry.items(): 116 | entry[key] = value.astype(np.float32) 117 | 118 | return entry 119 | -------------------------------------------------------------------------------- /src/datasources/unityeyes.py: -------------------------------------------------------------------------------- 1 | """UnityEyes data source for gaze estimation.""" 2 | import os 3 | from threading import Lock 4 | 5 | import cv2 as cv 6 | import numpy as np 7 | import tensorflow as tf 8 | import ujson 9 | 10 | from core import BaseDataSource 11 | import util.gaze 12 | import util.heatmap 13 | 14 | 15 | class UnityEyes(BaseDataSource): 16 | """UnityEyes data loading class.""" 17 | 18 | def __init__(self, 19 | tensorflow_session: tf.Session, 20 | batch_size: int, 21 | unityeyes_path: str, 22 | testing=False, 23 | generate_heatmaps=False, 24 | eye_image_shape=(36, 60), 25 | heatmaps_scale=1.0, 26 | **kwargs): 27 | """Create queues and threads to read and preprocess data.""" 28 | self._short_name = 'UnityEyes' 29 | if testing: 30 | self._short_name += ':test' 31 | 32 | # Cache some parameters 33 | self._eye_image_shape = eye_image_shape 34 | self._heatmaps_scale = heatmaps_scale 35 | 36 | # Create global index over all specified keys 37 | self._images_path = unityeyes_path 38 | self._file_stems = sorted([p[:-5] for p in os.listdir(unityeyes_path) 39 | if p.endswith('.json')]) 40 | self._num_entries = len(self._file_stems) 41 | 42 | self._mutex = Lock() 43 | self._current_index = 0 44 | 45 | # Define bounds for noise values for different augmentation types 46 | self._difficulty = 0.0 47 | self._augmentation_ranges = { # (easy, hard) 48 | 'translation': (2.0, 10.0), 49 | 'rotation': (0.1, 2.0), 50 | 'intensity': (0.5, 20.0), 51 | 'blur': (0.1, 1.0), 52 | 'scale': (0.01, 0.1), 53 | 'rescale': (1.0, 0.2), 54 | 'num_line': (0.0, 2.0), 55 | 'heatmap_sigma': (5.0, 2.5), 56 | } 57 | self._generate_heatmaps = generate_heatmaps 58 | 59 | # Call parent class constructor 60 | super().__init__(tensorflow_session, batch_size=batch_size, testing=testing, **kwargs) 61 | 62 | @property 63 | def num_entries(self): 64 | """Number of entries in this data source.""" 65 | return self._num_entries 66 | 67 | @property 68 | def short_name(self): 69 | """Short name specifying source UnityEyes.""" 70 | return self._short_name 71 | 72 | def reset(self): 73 | """Reset index.""" 74 | with self._mutex: 75 | super().reset() 76 | self._current_index = 0 77 | 78 | def entry_generator(self, yield_just_one=False): 79 | """Read entry from UnityEyes.""" 80 | try: 81 | while range(1) if yield_just_one else True: 82 | with self._mutex: 83 | if self._current_index >= self.num_entries: 84 | if self.testing: 85 | break 86 | else: 87 | self._current_index = 0 88 | current_index = self._current_index 89 | self._current_index += 1 90 | 91 | file_stem = self._file_stems[current_index] 92 | jpg_path = '%s/%s.jpg' % (self._images_path, file_stem) 93 | json_path = '%s/%s.json' % (self._images_path, file_stem) 94 | if not os.path.isfile(jpg_path) or not os.path.isfile(json_path): 95 | continue 96 | with open(json_path, 'r') as f: 97 | json_data = ujson.load(f) 98 | entry = { 99 | 'full_image': cv.imread(jpg_path, cv.IMREAD_GRAYSCALE), 100 | 'json_data': json_data, 101 | } 102 | assert entry['full_image'] is not None 103 | yield entry 104 | finally: 105 | # Execute any cleanup operations as necessary 106 | pass 107 | 108 | def set_difficulty(self, difficulty): 109 | """Set difficulty of training data.""" 110 | assert isinstance(difficulty, float) 111 | assert 0.0 <= difficulty <= 1.0 112 | self._difficulty = difficulty 113 | 114 | def set_augmentation_range(self, augmentation_type, easy_value, hard_value): 115 | """Set 'range' for a known augmentation type.""" 116 | assert isinstance(augmentation_type, str) 117 | assert augmentation_type in self._augmentation_ranges 118 | assert isinstance(easy_value, float) or isinstance(easy_value, int) 119 | assert isinstance(hard_value, float) or isinstance(hard_value, int) 120 | self._augmentation_ranges[augmentation_type] = (easy_value, hard_value) 121 | 122 | def preprocess_entry(self, entry): 123 | """Use annotations to segment eyes and calculate gaze direction.""" 124 | full_image = entry['full_image'] 125 | json_data = entry['json_data'] 126 | del entry['full_image'] 127 | del entry['json_data'] 128 | 129 | ih, iw = full_image.shape 130 | iw_2, ih_2 = 0.5 * iw, 0.5 * ih 131 | oh, ow = self._eye_image_shape 132 | 133 | def process_coords(coords_list): 134 | coords = [eval(l) for l in coords_list] 135 | return np.array([(x, ih-y, z) for (x, y, z) in coords]) 136 | interior_landmarks = process_coords(json_data['interior_margin_2d']) 137 | caruncle_landmarks = process_coords(json_data['caruncle_2d']) 138 | iris_landmarks = process_coords(json_data['iris_2d']) 139 | 140 | random_multipliers = [] 141 | 142 | def value_from_type(augmentation_type): 143 | # Scale to be in range 144 | easy_value, hard_value = self._augmentation_ranges[augmentation_type] 145 | value = (hard_value - easy_value) * self._difficulty + easy_value 146 | value = (np.clip(value, easy_value, hard_value) 147 | if easy_value < hard_value 148 | else np.clip(value, hard_value, easy_value)) 149 | return value 150 | 151 | def noisy_value_from_type(augmentation_type): 152 | # Get normal distributed random value 153 | if len(random_multipliers) == 0: 154 | random_multipliers.extend( 155 | list(np.random.normal(size=(len(self._augmentation_ranges),)))) 156 | return random_multipliers.pop() * value_from_type(augmentation_type) 157 | 158 | # Only select almost frontal images 159 | h_pitch, h_yaw, _ = eval(json_data['head_pose']) 160 | if h_pitch > 180.0: # Need to correct pitch 161 | h_pitch -= 360.0 162 | h_yaw -= 180.0 # Need to correct yaw 163 | if abs(h_pitch) > 20 or abs(h_yaw) > 20: 164 | return None 165 | 166 | # Prepare to segment eye image 167 | left_corner = np.mean(caruncle_landmarks[:, :2], axis=0) 168 | right_corner = interior_landmarks[8, :2] 169 | eye_width = 1.5 * abs(left_corner[0] - right_corner[0]) 170 | eye_middle = np.mean([np.amin(interior_landmarks[:, :2], axis=0), 171 | np.amax(interior_landmarks[:, :2], axis=0)], axis=0) 172 | 173 | # Centre axes to eyeball centre 174 | translate_mat = np.asmatrix(np.eye(3)) 175 | translate_mat[:2, 2] = [[-iw_2], [-ih_2]] 176 | 177 | # Rotate eye image if requested 178 | rotate_mat = np.asmatrix(np.eye(3)) 179 | rotation_noise = noisy_value_from_type('rotation') 180 | if rotation_noise > 0: 181 | rotate_angle = np.radians(rotation_noise) 182 | cos_rotate = np.cos(rotate_angle) 183 | sin_rotate = np.sin(rotate_angle) 184 | rotate_mat[0, 0] = cos_rotate 185 | rotate_mat[0, 1] = -sin_rotate 186 | rotate_mat[1, 0] = sin_rotate 187 | rotate_mat[1, 1] = cos_rotate 188 | 189 | # Scale image to fit output dimensions (with a little bit of noise) 190 | scale_mat = np.asmatrix(np.eye(3)) 191 | scale = 1. + noisy_value_from_type('scale') 192 | scale_inv = 1. / scale 193 | np.fill_diagonal(scale_mat, ow / eye_width * scale) 194 | original_eyeball_radius = 71.7593 195 | eyeball_radius = original_eyeball_radius * scale_mat[0, 0] # See: https://goo.gl/ZnXgDE 196 | entry['radius'] = np.float32(eyeball_radius) 197 | 198 | # Re-centre eye image such that eye fits (based on determined `eye_middle`) 199 | recentre_mat = np.asmatrix(np.eye(3)) 200 | recentre_mat[0, 2] = iw/2 - eye_middle[0] + 0.5 * eye_width * scale_inv 201 | recentre_mat[1, 2] = ih/2 - eye_middle[1] + 0.5 * oh / ow * eye_width * scale_inv 202 | recentre_mat[0, 2] += noisy_value_from_type('translation') # x 203 | recentre_mat[1, 2] += noisy_value_from_type('translation') # y 204 | 205 | # Apply transforms 206 | transform_mat = recentre_mat * scale_mat * rotate_mat * translate_mat 207 | eye = cv.warpAffine(full_image, transform_mat[:2, :3], (ow, oh)) 208 | 209 | # Convert look vector to gaze direction in polar angles 210 | look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3] 211 | look_vec[0] = -look_vec[0] 212 | original_gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten() 213 | look_vec = rotate_mat * look_vec.reshape(3, 1) 214 | gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten() 215 | if gaze[1] > 0.0: 216 | gaze[1] = np.pi - gaze[1] 217 | elif gaze[1] < 0.0: 218 | gaze[1] = -(np.pi + gaze[1]) 219 | entry['gaze'] = gaze.astype(np.float32) 220 | 221 | # Draw line randomly 222 | num_line_noise = int(np.round(noisy_value_from_type('num_line'))) 223 | if num_line_noise > 0: 224 | line_rand_nums = np.random.rand(5 * num_line_noise) 225 | for i in range(num_line_noise): 226 | j = 5 * i 227 | lx0, ly0 = int(ow * line_rand_nums[j]), oh 228 | lx1, ly1 = ow, int(oh * line_rand_nums[j + 1]) 229 | direction = line_rand_nums[j + 2] 230 | if direction < 0.25: 231 | lx1 = ly0 = 0 232 | elif direction < 0.5: 233 | lx1 = 0 234 | elif direction < 0.75: 235 | ly0 = 0 236 | line_colour = int(255 * line_rand_nums[j + 3]) 237 | eye = cv.line(eye, (lx0, ly0), (lx1, ly1), 238 | color=(line_colour, line_colour, line_colour), 239 | thickness=max(1, int(6*line_rand_nums[j + 4])), 240 | lineType=cv.LINE_AA) 241 | 242 | # Rescale image if required 243 | rescale_max = value_from_type('rescale') 244 | if rescale_max < 1.0: 245 | rescale_noise = np.random.uniform(low=rescale_max, high=1.0) 246 | interpolation = cv.INTER_CUBIC 247 | eye = cv.resize(eye, dsize=(0, 0), fx=rescale_noise, fy=rescale_noise, 248 | interpolation=interpolation) 249 | eye = cv.equalizeHist(eye) 250 | eye = cv.resize(eye, dsize=(ow, oh), interpolation=interpolation) 251 | 252 | # Add rgb noise to eye image 253 | intensity_noise = int(value_from_type('intensity')) 254 | if intensity_noise > 0: 255 | eye = eye.astype(np.int16) 256 | eye += np.random.randint(low=-intensity_noise, high=intensity_noise, 257 | size=eye.shape, dtype=np.int16) 258 | cv.normalize(eye, eye, alpha=0, beta=255, norm_type=cv.NORM_MINMAX) 259 | eye = eye.astype(np.uint8) 260 | 261 | # Add blur to eye image 262 | blur_noise = noisy_value_from_type('blur') 263 | if blur_noise > 0: 264 | eye = cv.GaussianBlur(eye, (7, 7), 0.5 + np.abs(blur_noise)) 265 | 266 | # Histogram equalization and preprocessing for NN 267 | eye = cv.equalizeHist(eye) 268 | eye = eye.astype(np.float32) 269 | eye *= 2.0 / 255.0 270 | eye -= 1.0 271 | eye = np.expand_dims(eye, -1 if self.data_format == 'NHWC' else 0) 272 | entry['eye'] = eye 273 | 274 | # Select and transform landmark coordinates 275 | iris_centre = np.asarray([ 276 | iw_2 + original_eyeball_radius * -np.cos(original_gaze[0]) * np.sin(original_gaze[1]), 277 | ih_2 + original_eyeball_radius * -np.sin(original_gaze[0]), 278 | ]) 279 | landmarks = np.concatenate([interior_landmarks[::2, :2], # 8 280 | iris_landmarks[::4, :2], # 8 281 | iris_centre.reshape((1, 2)), 282 | [[iw_2, ih_2]], # Eyeball centre 283 | ]) # 18 in total 284 | landmarks = np.asmatrix(np.pad(landmarks, ((0, 0), (0, 1)), 'constant', 285 | constant_values=1)) 286 | landmarks = np.asarray(landmarks * transform_mat.T) 287 | landmarks = landmarks[:, :2] # We only need x, y 288 | entry['landmarks'] = landmarks.astype(np.float32) 289 | 290 | # Generate heatmaps if necessary 291 | if self._generate_heatmaps: 292 | # Should be half-scale (compared to eye image) 293 | entry['heatmaps'] = np.asarray([ 294 | util.heatmap.gaussian_2d( 295 | shape=(self._heatmaps_scale*oh, self._heatmaps_scale*ow), 296 | centre=self._heatmaps_scale*landmark, 297 | sigma=value_from_type('heatmap_sigma'), 298 | ) 299 | for landmark in entry['landmarks'] 300 | ]).astype(np.float32) 301 | if self.data_format == 'NHWC': 302 | entry['heatmaps'] = np.transpose(entry['heatmaps'], (1, 2, 0)) 303 | 304 | return entry 305 | -------------------------------------------------------------------------------- /src/datasources/video.py: -------------------------------------------------------------------------------- 1 | """Video (file) data source for gaze estimation.""" 2 | import os 3 | import time 4 | 5 | import cv2 as cv 6 | 7 | from .frames import FramesSource 8 | 9 | 10 | class Video(FramesSource): 11 | """Video frame grabbing and preprocessing.""" 12 | 13 | def __init__(self, video_path, **kwargs): 14 | """Create queues and threads to read and preprocess data.""" 15 | self._short_name = 'Video' 16 | 17 | assert os.path.isfile(video_path) 18 | self._video_path = video_path 19 | self._capture = cv.VideoCapture(video_path) 20 | 21 | # Call parent class constructor 22 | super().__init__(staging=False, **kwargs) 23 | 24 | def frame_generator(self): 25 | """Read frame from webcam.""" 26 | last_frame = None 27 | while True: 28 | ret, frame = self._capture.read() 29 | if ret: 30 | yield frame 31 | last_frame = frame 32 | else: 33 | yield last_frame 34 | break 35 | 36 | def frame_read_job(self): 37 | """Read frame from video (without skipping).""" 38 | generate_frame = self.frame_generator() 39 | while True: 40 | before_frame_read = time.time() 41 | try: 42 | bgr = next(generate_frame) 43 | except StopIteration: 44 | break 45 | if bgr is not None: 46 | after_frame_read = time.time() 47 | with self._read_mutex: 48 | self._frame_read_queue.put((before_frame_read, bgr, after_frame_read)) 49 | 50 | print('Video "%s" closed.' % self._video_path) 51 | self._open = False 52 | -------------------------------------------------------------------------------- /src/datasources/webcam.py: -------------------------------------------------------------------------------- 1 | """Webcam data source for gaze estimation.""" 2 | import cv2 as cv 3 | 4 | from .frames import FramesSource 5 | 6 | 7 | class Webcam(FramesSource): 8 | """Webcam frame grabbing and preprocessing.""" 9 | 10 | def __init__(self, camera_id=0, fps=60, **kwargs): 11 | """Create queues and threads to read and preprocess data.""" 12 | self._short_name = 'Webcam' 13 | 14 | self._capture = cv.VideoCapture(camera_id) 15 | self._capture.set(cv.CAP_PROP_FRAME_WIDTH, 1280) 16 | self._capture.set(cv.CAP_PROP_FRAME_HEIGHT, 720) 17 | self._capture.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG')) 18 | self._capture.set(cv.CAP_PROP_FPS, fps) 19 | 20 | # Call parent class constructor 21 | super().__init__(**kwargs) 22 | 23 | def frame_generator(self): 24 | """Read frame from webcam.""" 25 | while True: 26 | ret, bgr = self._capture.read() 27 | if ret: 28 | yield bgr 29 | -------------------------------------------------------------------------------- /src/dpg_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Main script for training the DPG model for within-MPIIGaze evaluations.""" 3 | import argparse 4 | 5 | import coloredlogs 6 | import tensorflow as tf 7 | 8 | if __name__ == '__main__': 9 | 10 | # Set global log level 11 | parser = argparse.ArgumentParser(description='Train the Deep Pictorial Gaze model.') 12 | parser.add_argument('-v', type=str, help='logging level', default='info', 13 | choices=['debug', 'info', 'warning', 'error', 'critical']) 14 | args = parser.parse_args() 15 | coloredlogs.install( 16 | datefmt='%d/%m %H:%M', 17 | fmt='%(asctime)s %(levelname)s %(message)s', 18 | level=args.v.upper(), 19 | ) 20 | 21 | for i in range(0, 15): 22 | # Specify which people to train on, and which to test on 23 | person_id = 'p%02d' % i 24 | other_person_ids = ['p%02d' % j for j in range(15) if i != j] 25 | 26 | # Initialize Tensorflow session 27 | tf.reset_default_graph() 28 | tf.logging.set_verbosity(tf.logging.ERROR) 29 | gpu_options = tf.GPUOptions(allow_growth=True) 30 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: 31 | 32 | # Declare some parameters 33 | batch_size = 32 34 | 35 | # Define training data source 36 | from datasources import HDF5Source 37 | 38 | # Define model 39 | from models import DPG 40 | model = DPG( 41 | session, 42 | learning_schedule=[ 43 | { 44 | 'loss_terms_to_optimize': { 45 | 'combined_loss': ['hourglass', 'densenet'], 46 | }, 47 | 'metrics': ['gaze_mse', 'gaze_ang'], 48 | 'learning_rate': 0.0002, 49 | }, 50 | ], 51 | extra_tags=[person_id], 52 | 53 | # Data sources for training (and testing). 54 | train_data={ 55 | 'mpi': HDF5Source( 56 | session, 57 | data_format='NCHW', 58 | batch_size=batch_size, 59 | keys_to_use=['train/' + s for s in other_person_ids], 60 | hdf_path='../datasets/MPIIGaze.h5', 61 | eye_image_shape=(90, 150), 62 | testing=False, 63 | min_after_dequeue=30000, 64 | staging=True, 65 | shuffle=True, 66 | ), 67 | }, 68 | test_data={ 69 | 'mpi': HDF5Source( 70 | session, 71 | data_format='NCHW', 72 | batch_size=batch_size, 73 | keys_to_use=['test/' + person_id], 74 | hdf_path='../datasets/MPIIGaze.h5', 75 | eye_image_shape=(90, 150), 76 | testing=True, 77 | ), 78 | }, 79 | ) 80 | 81 | # Train this model for a set number of epochs 82 | model.train( 83 | num_epochs=20, 84 | ) 85 | 86 | model.__del__() 87 | session.close() 88 | del session 89 | -------------------------------------------------------------------------------- /src/elg_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Main script for gaze direction inference from webcam feed.""" 3 | import argparse 4 | import os 5 | import queue 6 | import threading 7 | import time 8 | 9 | import coloredlogs 10 | import cv2 as cv 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | from datasources import Video, Webcam 15 | from models import ELG 16 | import util.gaze 17 | 18 | if __name__ == '__main__': 19 | 20 | # Set global log level 21 | parser = argparse.ArgumentParser(description='Demonstration of landmarks localization.') 22 | parser.add_argument('-v', type=str, help='logging level', default='info', 23 | choices=['debug', 'info', 'warning', 'error', 'critical']) 24 | parser.add_argument('--from_video', type=str, help='Use this video path instead of webcam') 25 | parser.add_argument('--record_video', type=str, help='Output path of video of demonstration.') 26 | parser.add_argument('--fullscreen', action='store_true') 27 | parser.add_argument('--headless', action='store_true') 28 | 29 | parser.add_argument('--fps', type=int, default=60, help='Desired sampling rate of webcam') 30 | parser.add_argument('--camera_id', type=int, default=0, help='ID of webcam to use') 31 | 32 | args = parser.parse_args() 33 | coloredlogs.install( 34 | datefmt='%d/%m %H:%M', 35 | fmt='%(asctime)s %(levelname)s %(message)s', 36 | level=args.v.upper(), 37 | ) 38 | 39 | # Check if GPU is available 40 | from tensorflow.python.client import device_lib 41 | session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) 42 | gpu_available = False 43 | try: 44 | gpus = [d for d in device_lib.list_local_devices(config=session_config) 45 | if d.device_type == 'GPU'] 46 | gpu_available = len(gpus) > 0 47 | except: 48 | pass 49 | 50 | # Initialize Tensorflow session 51 | tf.logging.set_verbosity(tf.logging.INFO) 52 | with tf.Session(config=session_config) as session: 53 | 54 | # Declare some parameters 55 | batch_size = 2 56 | 57 | # Define webcam stream data source 58 | # Change data_format='NHWC' if not using CUDA 59 | if args.from_video: 60 | assert os.path.isfile(args.from_video) 61 | data_source = Video(args.from_video, 62 | tensorflow_session=session, batch_size=batch_size, 63 | data_format='NCHW' if gpu_available else 'NHWC', 64 | eye_image_shape=(108, 180)) 65 | else: 66 | data_source = Webcam(tensorflow_session=session, batch_size=batch_size, 67 | camera_id=args.camera_id, fps=args.fps, 68 | data_format='NCHW' if gpu_available else 'NHWC', 69 | eye_image_shape=(36, 60)) 70 | 71 | # Define model 72 | if args.from_video: 73 | model = ELG( 74 | session, train_data={'videostream': data_source}, 75 | first_layer_stride=3, 76 | num_modules=3, 77 | num_feature_maps=64, 78 | learning_schedule=[ 79 | { 80 | 'loss_terms_to_optimize': {'dummy': ['hourglass', 'radius']}, 81 | }, 82 | ], 83 | ) 84 | else: 85 | model = ELG( 86 | session, train_data={'videostream': data_source}, 87 | first_layer_stride=1, 88 | num_modules=2, 89 | num_feature_maps=32, 90 | learning_schedule=[ 91 | { 92 | 'loss_terms_to_optimize': {'dummy': ['hourglass', 'radius']}, 93 | }, 94 | ], 95 | ) 96 | 97 | # Record output frames to file if requested 98 | if args.record_video: 99 | video_out = None 100 | video_out_queue = queue.Queue() 101 | video_out_should_stop = False 102 | video_out_done = threading.Condition() 103 | 104 | def _record_frame(): 105 | global video_out 106 | last_frame_time = None 107 | out_fps = 30 108 | out_frame_interval = 1.0 / out_fps 109 | while not video_out_should_stop: 110 | frame_index = video_out_queue.get() 111 | if frame_index is None: 112 | break 113 | assert frame_index in data_source._frames 114 | frame = data_source._frames[frame_index]['bgr'] 115 | h, w, _ = frame.shape 116 | if video_out is None: 117 | video_out = cv.VideoWriter( 118 | args.record_video, cv.VideoWriter_fourcc(*'H264'), 119 | out_fps, (w, h), 120 | ) 121 | now_time = time.time() 122 | if last_frame_time is not None: 123 | time_diff = now_time - last_frame_time 124 | while time_diff > 0.0: 125 | video_out.write(frame) 126 | time_diff -= out_frame_interval 127 | last_frame_time = now_time 128 | video_out.release() 129 | with video_out_done: 130 | video_out_done.notify_all() 131 | record_thread = threading.Thread(target=_record_frame, name='record') 132 | record_thread.daemon = True 133 | record_thread.start() 134 | 135 | # Begin visualization thread 136 | inferred_stuff_queue = queue.Queue() 137 | 138 | def _visualize_output(): 139 | last_frame_index = 0 140 | last_frame_time = time.time() 141 | fps_history = [] 142 | all_gaze_histories = [] 143 | 144 | if args.fullscreen: 145 | cv.namedWindow('vis', cv.WND_PROP_FULLSCREEN) 146 | cv.setWindowProperty('vis', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN) 147 | 148 | while True: 149 | # If no output to visualize, show unannotated frame 150 | if inferred_stuff_queue.empty(): 151 | next_frame_index = last_frame_index + 1 152 | if next_frame_index in data_source._frames: 153 | next_frame = data_source._frames[next_frame_index] 154 | if 'faces' in next_frame and len(next_frame['faces']) == 0: 155 | if not args.headless: 156 | cv.imshow('vis', next_frame['bgr']) 157 | if args.record_video: 158 | video_out_queue.put_nowait(next_frame_index) 159 | last_frame_index = next_frame_index 160 | if cv.waitKey(1) & 0xFF == ord('q'): 161 | return 162 | continue 163 | 164 | # Get output from neural network and visualize 165 | output = inferred_stuff_queue.get() 166 | bgr = None 167 | for j in range(batch_size): 168 | frame_index = output['frame_index'][j] 169 | if frame_index not in data_source._frames: 170 | continue 171 | frame = data_source._frames[frame_index] 172 | 173 | # Decide which landmarks are usable 174 | heatmaps_amax = np.amax(output['heatmaps'][j, :].reshape(-1, 18), axis=0) 175 | can_use_eye = np.all(heatmaps_amax > 0.7) 176 | can_use_eyelid = np.all(heatmaps_amax[0:8] > 0.75) 177 | can_use_iris = np.all(heatmaps_amax[8:16] > 0.8) 178 | 179 | start_time = time.time() 180 | eye_index = output['eye_index'][j] 181 | bgr = frame['bgr'] 182 | eye = frame['eyes'][eye_index] 183 | eye_image = eye['image'] 184 | eye_side = eye['side'] 185 | eye_landmarks = output['landmarks'][j, :] 186 | eye_radius = output['radius'][j][0] 187 | if eye_side == 'left': 188 | eye_landmarks[:, 0] = eye_image.shape[1] - eye_landmarks[:, 0] 189 | eye_image = np.fliplr(eye_image) 190 | 191 | # Embed eye image and annotate for picture-in-picture 192 | eye_upscale = 2 193 | eye_image_raw = cv.cvtColor(cv.equalizeHist(eye_image), cv.COLOR_GRAY2BGR) 194 | eye_image_raw = cv.resize(eye_image_raw, (0, 0), fx=eye_upscale, fy=eye_upscale) 195 | eye_image_annotated = np.copy(eye_image_raw) 196 | if can_use_eyelid: 197 | cv.polylines( 198 | eye_image_annotated, 199 | [np.round(eye_upscale*eye_landmarks[0:8]).astype(np.int32) 200 | .reshape(-1, 1, 2)], 201 | isClosed=True, color=(255, 255, 0), thickness=1, lineType=cv.LINE_AA, 202 | ) 203 | if can_use_iris: 204 | cv.polylines( 205 | eye_image_annotated, 206 | [np.round(eye_upscale*eye_landmarks[8:16]).astype(np.int32) 207 | .reshape(-1, 1, 2)], 208 | isClosed=True, color=(0, 255, 255), thickness=1, lineType=cv.LINE_AA, 209 | ) 210 | cv.drawMarker( 211 | eye_image_annotated, 212 | tuple(np.round(eye_upscale*eye_landmarks[16, :]).astype(np.int32)), 213 | color=(0, 255, 255), markerType=cv.MARKER_CROSS, markerSize=4, 214 | thickness=1, line_type=cv.LINE_AA, 215 | ) 216 | face_index = int(eye_index / 2) 217 | eh, ew, _ = eye_image_raw.shape 218 | v0 = face_index * 2 * eh 219 | v1 = v0 + eh 220 | v2 = v1 + eh 221 | u0 = 0 if eye_side == 'left' else ew 222 | u1 = u0 + ew 223 | bgr[v0:v1, u0:u1] = eye_image_raw 224 | bgr[v1:v2, u0:u1] = eye_image_annotated 225 | 226 | # Visualize preprocessing results 227 | frame_landmarks = (frame['smoothed_landmarks'] 228 | if 'smoothed_landmarks' in frame 229 | else frame['landmarks']) 230 | for f, face in enumerate(frame['faces']): 231 | for landmark in frame_landmarks[f][:-1]: 232 | cv.drawMarker(bgr, tuple(np.round(landmark).astype(np.int32)), 233 | color=(0, 0, 255), markerType=cv.MARKER_STAR, 234 | markerSize=2, thickness=1, line_type=cv.LINE_AA) 235 | cv.rectangle( 236 | bgr, tuple(np.round(face[:2]).astype(np.int32)), 237 | tuple(np.round(np.add(face[:2], face[2:])).astype(np.int32)), 238 | color=(0, 255, 255), thickness=1, lineType=cv.LINE_AA, 239 | ) 240 | 241 | # Transform predictions 242 | eye_landmarks = np.concatenate([eye_landmarks, 243 | [[eye_landmarks[-1, 0] + eye_radius, 244 | eye_landmarks[-1, 1]]]]) 245 | eye_landmarks = np.asmatrix(np.pad(eye_landmarks, ((0, 0), (0, 1)), 246 | 'constant', constant_values=1.0)) 247 | eye_landmarks = (eye_landmarks * 248 | eye['inv_landmarks_transform_mat'].T)[:, :2] 249 | eye_landmarks = np.asarray(eye_landmarks) 250 | eyelid_landmarks = eye_landmarks[0:8, :] 251 | iris_landmarks = eye_landmarks[8:16, :] 252 | iris_centre = eye_landmarks[16, :] 253 | eyeball_centre = eye_landmarks[17, :] 254 | eyeball_radius = np.linalg.norm(eye_landmarks[18, :] - 255 | eye_landmarks[17, :]) 256 | 257 | # Smooth and visualize gaze direction 258 | num_total_eyes_in_frame = len(frame['eyes']) 259 | if len(all_gaze_histories) != num_total_eyes_in_frame: 260 | all_gaze_histories = [list() for _ in range(num_total_eyes_in_frame)] 261 | gaze_history = all_gaze_histories[eye_index] 262 | if can_use_eye: 263 | # Visualize landmarks 264 | cv.drawMarker( # Eyeball centre 265 | bgr, tuple(np.round(eyeball_centre).astype(np.int32)), 266 | color=(0, 255, 0), markerType=cv.MARKER_CROSS, markerSize=4, 267 | thickness=1, line_type=cv.LINE_AA, 268 | ) 269 | # cv.circle( # Eyeball outline 270 | # bgr, tuple(np.round(eyeball_centre).astype(np.int32)), 271 | # int(np.round(eyeball_radius)), color=(0, 255, 0), 272 | # thickness=1, lineType=cv.LINE_AA, 273 | # ) 274 | 275 | # Draw "gaze" 276 | # from models.elg import estimate_gaze_from_landmarks 277 | # current_gaze = estimate_gaze_from_landmarks( 278 | # iris_landmarks, iris_centre, eyeball_centre, eyeball_radius) 279 | i_x0, i_y0 = iris_centre 280 | e_x0, e_y0 = eyeball_centre 281 | theta = -np.arcsin(np.clip((i_y0 - e_y0) / eyeball_radius, -1.0, 1.0)) 282 | phi = np.arcsin(np.clip((i_x0 - e_x0) / (eyeball_radius * -np.cos(theta)), 283 | -1.0, 1.0)) 284 | current_gaze = np.array([theta, phi]) 285 | gaze_history.append(current_gaze) 286 | gaze_history_max_len = 10 287 | if len(gaze_history) > gaze_history_max_len: 288 | gaze_history = gaze_history[-gaze_history_max_len:] 289 | util.gaze.draw_gaze(bgr, iris_centre, np.mean(gaze_history, axis=0), 290 | length=120.0, thickness=1) 291 | else: 292 | gaze_history.clear() 293 | 294 | if can_use_eyelid: 295 | cv.polylines( 296 | bgr, [np.round(eyelid_landmarks).astype(np.int32).reshape(-1, 1, 2)], 297 | isClosed=True, color=(255, 255, 0), thickness=1, lineType=cv.LINE_AA, 298 | ) 299 | 300 | if can_use_iris: 301 | cv.polylines( 302 | bgr, [np.round(iris_landmarks).astype(np.int32).reshape(-1, 1, 2)], 303 | isClosed=True, color=(0, 255, 255), thickness=1, lineType=cv.LINE_AA, 304 | ) 305 | cv.drawMarker( 306 | bgr, tuple(np.round(iris_centre).astype(np.int32)), 307 | color=(0, 255, 255), markerType=cv.MARKER_CROSS, markerSize=4, 308 | thickness=1, line_type=cv.LINE_AA, 309 | ) 310 | 311 | dtime = 1e3*(time.time() - start_time) 312 | if 'visualization' not in frame['time']: 313 | frame['time']['visualization'] = dtime 314 | else: 315 | frame['time']['visualization'] += dtime 316 | 317 | def _dtime(before_id, after_id): 318 | return int(1e3 * (frame['time'][after_id] - frame['time'][before_id])) 319 | 320 | def _dstr(title, before_id, after_id): 321 | return '%s: %dms' % (title, _dtime(before_id, after_id)) 322 | 323 | if eye_index == len(frame['eyes']) - 1: 324 | # Calculate timings 325 | frame['time']['after_visualization'] = time.time() 326 | fps = int(np.round(1.0 / (time.time() - last_frame_time))) 327 | fps_history.append(fps) 328 | if len(fps_history) > 60: 329 | fps_history = fps_history[-60:] 330 | fps_str = '%d FPS' % np.mean(fps_history) 331 | last_frame_time = time.time() 332 | fh, fw, _ = bgr.shape 333 | cv.putText(bgr, fps_str, org=(fw - 110, fh - 20), 334 | fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=0.8, 335 | color=(0, 0, 0), thickness=1, lineType=cv.LINE_AA) 336 | cv.putText(bgr, fps_str, org=(fw - 111, fh - 21), 337 | fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=0.79, 338 | color=(255, 255, 255), thickness=1, lineType=cv.LINE_AA) 339 | if not args.headless: 340 | cv.imshow('vis', bgr) 341 | last_frame_index = frame_index 342 | 343 | # Record frame? 344 | if args.record_video: 345 | video_out_queue.put_nowait(frame_index) 346 | 347 | # Quit? 348 | if cv.waitKey(1) & 0xFF == ord('q'): 349 | return 350 | 351 | # Print timings 352 | if frame_index % 60 == 0: 353 | latency = _dtime('before_frame_read', 'after_visualization') 354 | processing = _dtime('after_frame_read', 'after_visualization') 355 | timing_string = ', '.join([ 356 | _dstr('read', 'before_frame_read', 'after_frame_read'), 357 | _dstr('preproc', 'after_frame_read', 'after_preprocessing'), 358 | 'infer: %dms' % int(frame['time']['inference']), 359 | 'vis: %dms' % int(frame['time']['visualization']), 360 | 'proc: %dms' % processing, 361 | 'latency: %dms' % latency, 362 | ]) 363 | print('%08d [%s] %s' % (frame_index, fps_str, timing_string)) 364 | 365 | visualize_thread = threading.Thread(target=_visualize_output, name='visualization') 366 | visualize_thread.daemon = True 367 | visualize_thread.start() 368 | 369 | # Do inference forever 370 | infer = model.inference_generator() 371 | while True: 372 | output = next(infer) 373 | for frame_index in np.unique(output['frame_index']): 374 | if frame_index not in data_source._frames: 375 | continue 376 | frame = data_source._frames[frame_index] 377 | if 'inference' in frame['time']: 378 | frame['time']['inference'] += output['inference_time'] 379 | else: 380 | frame['time']['inference'] = output['inference_time'] 381 | inferred_stuff_queue.put_nowait(output) 382 | 383 | if not visualize_thread.isAlive(): 384 | break 385 | 386 | if not data_source._open: 387 | break 388 | 389 | # Close video recording 390 | if args.record_video and video_out is not None: 391 | video_out_should_stop = True 392 | video_out_queue.put_nowait(None) 393 | with video_out_done: 394 | video_out_done.wait() 395 | -------------------------------------------------------------------------------- /src/elg_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Main script for training a model for gaze estimation.""" 3 | import argparse 4 | 5 | import coloredlogs 6 | import tensorflow as tf 7 | 8 | if __name__ == '__main__': 9 | 10 | # Set global log level 11 | parser = argparse.ArgumentParser(description='Train a gaze estimation model.') 12 | parser.add_argument('-v', type=str, help='logging level', default='info', 13 | choices=['debug', 'info', 'warning', 'error', 'critical']) 14 | args = parser.parse_args() 15 | coloredlogs.install( 16 | datefmt='%d/%m %H:%M', 17 | fmt='%(asctime)s %(levelname)s %(message)s', 18 | level=args.v.upper(), 19 | ) 20 | 21 | # Initialize Tensorflow session 22 | tf.logging.set_verbosity(tf.logging.ERROR) 23 | gpu_options = tf.GPUOptions(allow_growth=True) 24 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session: 25 | 26 | # Declare some parameters 27 | batch_size = 32 28 | 29 | # Define some model-specific parameters 30 | elg_first_layer_stride = 1 31 | elg_num_modules = 3 32 | elg_num_feature_maps = 32 33 | 34 | # Define training data source 35 | from datasources import UnityEyes 36 | unityeyes = UnityEyes( 37 | session, 38 | batch_size=batch_size, 39 | data_format='NCHW', 40 | unityeyes_path='../datasets/UnityEyes/imgs', 41 | min_after_dequeue=1000, 42 | generate_heatmaps=True, 43 | shuffle=True, 44 | staging=True, 45 | eye_image_shape=(36, 60), 46 | heatmaps_scale=1.0 / elg_first_layer_stride, 47 | ) 48 | unityeyes.set_augmentation_range('translation', 2.0, 10.0) 49 | unityeyes.set_augmentation_range('rotation', 1.0, 10.0) 50 | unityeyes.set_augmentation_range('intensity', 0.5, 20.0) 51 | unityeyes.set_augmentation_range('blur', 0.1, 1.0) 52 | unityeyes.set_augmentation_range('scale', 0.01, 0.1) 53 | unityeyes.set_augmentation_range('rescale', 1.0, 0.5) 54 | unityeyes.set_augmentation_range('num_line', 0.0, 2.0) 55 | unityeyes.set_augmentation_range('heatmap_sigma', 7.5, 2.5) 56 | 57 | # Define model 58 | from models import ELG 59 | model = ELG( 60 | # Tensorflow session 61 | # Note: The same session must be used for the model and the data sources. 62 | session, 63 | 64 | # Model configuration parameters 65 | # first_layer_stride describes how much the input image is downsampled before producing 66 | # feature maps for eventual heatmaps regression 67 | # num_modules defines the number of hourglass modules, and thus the number of times repeated 68 | # coarse-to-fine refinement is done. 69 | # num_feature_maps describes how many feature maps are refined over the entire network. 70 | first_layer_stride=elg_first_layer_stride, 71 | num_feature_maps=elg_num_feature_maps, 72 | num_modules=elg_num_modules, 73 | 74 | # The learning schedule describes in which order which part of the network should be 75 | # trained and with which learning rate. 76 | # 77 | # A standard network would have one entry (dict) in this argument where all model 78 | # parameters are optimized. To do this, you must specify which variables must be 79 | # optimized and this is done by specifying which prefixes to look for. 80 | # The prefixes are defined by using `tf.variable_scope`. 81 | # 82 | # The loss terms which can be specified depends on model specifications, specifically 83 | # the `loss_terms` output of `BaseModel::build_model`. 84 | learning_schedule=[ 85 | { 86 | 'loss_terms_to_optimize': { 87 | 'heatmaps_mse': ['hourglass'], 88 | 'radius_mse': ['radius'], 89 | }, 90 | 'learning_rate': 1e-3, 91 | }, 92 | ], 93 | 94 | # Data sources for training (and testing). 95 | train_data={'synthetic': unityeyes}, 96 | ) 97 | 98 | # Train this model for a set number of epochs 99 | model.train( 100 | num_epochs=100, 101 | ) 102 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Model definitions (one class per file) to define NN architectures.""" 2 | from .elg import ELG 3 | from .dpg import DPG 4 | 5 | __all__ = ('ELG', 'DPG') 6 | -------------------------------------------------------------------------------- /src/models/dpg.py: -------------------------------------------------------------------------------- 1 | """Deep Pictorial Gaze architecture.""" 2 | from typing import Dict 3 | 4 | import numpy as np 5 | import scipy 6 | import tensorflow as tf 7 | 8 | from core import BaseDataSource, BaseModel 9 | from datasources import UnityEyes 10 | import util.gaze 11 | 12 | 13 | class DPG(BaseModel): 14 | """Deep Pictorial Gaze architecture as introduced in [Park et al. ECCV'18].""" 15 | 16 | def __init__(self, tensorflow_session=None, first_layer_stride=2, num_modules=3, 17 | num_feature_maps=32, growth_rate=8, extra_tags=[], **kwargs): 18 | """Specify DPG-specific parameters.""" 19 | self._hg_first_layer_stride = first_layer_stride 20 | self._hg_num_modules = num_modules 21 | self._hg_num_feature_maps= num_feature_maps 22 | self._dn_growth_rate = growth_rate 23 | self._extra_tags = extra_tags 24 | 25 | # Call parent class constructor 26 | super().__init__(tensorflow_session, **kwargs) 27 | 28 | _hg_first_layer_stride = 2 29 | _hg_num_modules = 3 30 | _hg_num_feature_maps = 32 31 | _hg_num_residual_blocks = 1 32 | _hg_num_gazemaps = 2 33 | 34 | _dn_growth_rate = 8 35 | _dn_compression_factor = 0.5 36 | _dn_num_layers_per_block = (4, 4, 4, 4) 37 | _dn_num_dense_blocks = len(_dn_num_layers_per_block) 38 | 39 | @property 40 | def identifier(self): 41 | """Identifier for model based on data sources and parameters.""" 42 | first_data_source = next(iter(self._train_data.values())) 43 | input_tensors = first_data_source.output_tensors 44 | if self._data_format == 'NHWC': 45 | _, eh, ew, _ = input_tensors['eye'].shape.as_list() 46 | else: 47 | _, _, eh, ew = input_tensors['eye'].shape.as_list() 48 | return 'DPG_i%dx%d_f%dx%d_n%d_m%d_k%d_%s' % ( 49 | ew, eh, 50 | int(ew / self._hg_first_layer_stride), 51 | int(eh / self._hg_first_layer_stride), 52 | self._hg_num_feature_maps, self._hg_num_modules, 53 | self._dn_growth_rate, 54 | '-'.join(self._extra_tags) if len(self._extra_tags) > 0 else '', 55 | ) 56 | 57 | def train_loop_pre(self, current_step): 58 | """Run this at beginning of training loop.""" 59 | # Step learning rate decay 60 | multiplier = np.power(0.1, int(current_step / 10000)) 61 | self._tensorflow_session.run(self.assign_learning_rate_multiplier, feed_dict={ 62 | self.learning_rate_multiplier_placeholder: multiplier, 63 | }) 64 | 65 | _column_of_ones = None 66 | _column_of_zeros = None 67 | 68 | def _augment_training_images(self, images, mode): 69 | if mode == 'test': 70 | return images 71 | with tf.variable_scope('augment'): 72 | if self._data_format == 'NCHW': 73 | images = tf.transpose(images, perm=[0, 2, 3, 1]) 74 | n, h, w, _ = images.shape.as_list() 75 | if self._column_of_ones is None: 76 | self._column_of_ones = tf.ones((n, 1)) 77 | self._column_of_zeros = tf.zeros((n, 1)) 78 | transforms = tf.concat([ 79 | self._column_of_ones, 80 | self._column_of_zeros, 81 | tf.truncated_normal((n, 1), mean=0, stddev=.05*w), 82 | self._column_of_zeros, 83 | self._column_of_ones, 84 | tf.truncated_normal((n, 1), mean=0, stddev=.05*h), 85 | self._column_of_zeros, 86 | self._column_of_zeros, 87 | ], axis=1) 88 | images = tf.contrib.image.transform(images, transforms, interpolation='BILINEAR') 89 | if self._data_format == 'NCHW': 90 | images = tf.transpose(images, perm=[0, 3, 1, 2]) 91 | return images 92 | 93 | def build_model(self, data_sources: Dict[str, BaseDataSource], mode: str): 94 | """Build model.""" 95 | data_source = next(iter(data_sources.values())) 96 | input_tensors = data_source.output_tensors 97 | x = input_tensors['eye'] 98 | y1 = input_tensors['gazemaps'] if 'gazemaps' in input_tensors else None 99 | y2 = input_tensors['gaze'] if 'gaze' in input_tensors else None 100 | 101 | with tf.variable_scope('input_data'): 102 | # self.summary.feature_maps('eyes', x, data_format=self._data_format_longer) 103 | if y1 is not None: 104 | self.summary.feature_maps('gazemaps', y1, data_format=self._data_format_longer) 105 | 106 | outputs = {} 107 | loss_terms = {} 108 | metrics = {} 109 | 110 | # Lightly augment training data 111 | x = self._augment_training_images(x, mode) 112 | 113 | with tf.variable_scope('hourglass'): 114 | # Prepare for Hourglass by downscaling via conv 115 | with tf.variable_scope('pre'): 116 | n = self._hg_num_feature_maps 117 | x = self._apply_conv(x, num_features=n, kernel_size=7, 118 | stride=self._hg_first_layer_stride) 119 | x = tf.nn.relu(self._apply_bn(x)) 120 | x = self._build_residual_block(x, n, 2*n, name='res1') 121 | x = self._build_residual_block(x, 2*n, n, name='res2') 122 | 123 | # Hourglass blocks 124 | x_prev = x 125 | gmap = None 126 | for i in range(self._hg_num_modules): 127 | with tf.variable_scope('hg_%d' % (i + 1)): 128 | x = self._build_hourglass(x, steps_to_go=4, num_features=self._hg_num_feature_maps) 129 | x, gmap = self._build_hourglass_after( 130 | x_prev, x, do_merge=(i < (self._hg_num_modules - 1)), 131 | ) 132 | x_prev = x 133 | if y1 is not None: 134 | # Cross-entropy loss 135 | metrics['gazemaps_ce'] = -tf.reduce_mean(tf.reduce_sum( 136 | y1 * tf.log(tf.clip_by_value(gmap, 1e-10, 1.0)), # avoid NaN 137 | axis=[1, 2, 3])) 138 | # metrics['gazemaps_ce'] = tf.losses.softmax_cross_entropy( 139 | # tf.reshape(y1, (self._batch_size, -1)), 140 | # tf.reshape(gmap, (self._batch_size, -1)), 141 | # loss_collection=None, 142 | # ) 143 | x = gmap 144 | outputs['gazemaps'] = gmap 145 | self.summary.feature_maps('bottleneck', gmap, data_format=self._data_format_longer) 146 | 147 | with tf.variable_scope('densenet'): 148 | # DenseNet blocks to regress to gaze 149 | for i in range(self._dn_num_dense_blocks): 150 | with tf.variable_scope('block%d' % (i + 1)): 151 | x = self._apply_dense_block(x, 152 | num_layers=self._dn_num_layers_per_block[i]) 153 | if i == self._dn_num_dense_blocks - 1: 154 | break 155 | with tf.variable_scope('trans%d' % (i + 1)): 156 | x = self._apply_transition_layer(x) 157 | 158 | # Global average pooling 159 | with tf.variable_scope('post'): 160 | x = self._apply_bn(x) 161 | x = tf.nn.relu(x) 162 | if self._data_format == 'NCHW': 163 | x = tf.reduce_mean(x, axis=[2, 3]) 164 | else: 165 | x = tf.reduce_mean(x, axis=[1, 2]) 166 | x = tf.contrib.layers.flatten(x) 167 | 168 | # Output layer 169 | with tf.variable_scope('output'): 170 | x = self._apply_fc(x, 2) 171 | outputs['gaze'] = x 172 | if y2 is not None: 173 | metrics['gaze_mse'] = tf.reduce_mean(tf.squared_difference(x, y2)) 174 | metrics['gaze_ang'] = util.gaze.tensorflow_angular_error_from_pitchyaw(y2, x) 175 | 176 | # Combine two loss terms 177 | if y1 is not None and y2 is not None: 178 | loss_terms['combined_loss'] = 1e-5*metrics['gazemaps_ce'] + metrics['gaze_mse'] 179 | 180 | # Define outputs 181 | return outputs, loss_terms, metrics 182 | 183 | def _apply_conv(self, tensor, num_features, kernel_size=3, stride=1): 184 | return tf.layers.conv2d( 185 | tensor, 186 | num_features, 187 | kernel_size=kernel_size, 188 | strides=stride, 189 | padding='SAME', 190 | kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), 191 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-4), 192 | bias_initializer=tf.zeros_initializer(), 193 | data_format=self._data_format_longer, 194 | name='conv', 195 | ) 196 | 197 | def _apply_fc(self, tensor, num_outputs): 198 | return tf.layers.dense( 199 | tensor, 200 | num_outputs, 201 | use_bias=True, 202 | kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), 203 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-4), 204 | bias_initializer=tf.zeros_initializer(), 205 | name='fc', 206 | ) 207 | 208 | def _apply_pool(self, tensor, kernel_size=3, stride=2): 209 | tensor = tf.layers.max_pooling2d( 210 | tensor, 211 | pool_size=kernel_size, 212 | strides=stride, 213 | padding='SAME', 214 | data_format=self._data_format_longer, 215 | name='pool', 216 | ) 217 | return tensor 218 | 219 | def _apply_bn(self, tensor): 220 | return tf.contrib.layers.batch_norm( 221 | tensor, 222 | scale=True, 223 | center=True, 224 | is_training=self.use_batch_statistics, 225 | trainable=True, 226 | data_format=self._data_format, 227 | updates_collections=None, 228 | ) 229 | 230 | def _build_residual_block(self, x, num_in, num_out, name='res_block'): 231 | with tf.variable_scope(name): 232 | half_num_out = max(int(num_out/2), 1) 233 | c = x 234 | with tf.variable_scope('conv1'): 235 | c = tf.nn.relu(self._apply_bn(c)) 236 | c = self._apply_conv(c, num_features=half_num_out, kernel_size=1, stride=1) 237 | with tf.variable_scope('conv2'): 238 | c = tf.nn.relu(self._apply_bn(c)) 239 | c = self._apply_conv(c, num_features=half_num_out, kernel_size=3, stride=1) 240 | with tf.variable_scope('conv3'): 241 | c = tf.nn.relu(self._apply_bn(c)) 242 | c = self._apply_conv(c, num_features=num_out, kernel_size=1, stride=1) 243 | with tf.variable_scope('skip'): 244 | if num_in == num_out: 245 | s = tf.identity(x) 246 | else: 247 | s = self._apply_conv(x, num_features=num_out, kernel_size=1, stride=1) 248 | x = c + s 249 | return x 250 | 251 | def _build_hourglass(self, x, steps_to_go, num_features, depth=1): 252 | with tf.variable_scope('depth%d' % depth): 253 | # Upper branch 254 | up1 = x 255 | for i in range(self._hg_num_residual_blocks): 256 | up1 = self._build_residual_block(up1, num_features, num_features, 257 | name='up1_%d' % (i + 1)) 258 | # Lower branch 259 | low1 = self._apply_pool(x, kernel_size=2, stride=2) 260 | for i in range(self._hg_num_residual_blocks): 261 | low1 = self._build_residual_block(low1, num_features, num_features, 262 | name='low1_%d' % (i + 1)) 263 | # Recursive 264 | low2 = None 265 | if steps_to_go > 1: 266 | low2 = self._build_hourglass(low1, steps_to_go - 1, num_features, depth=depth+1) 267 | else: 268 | low2 = low1 269 | for i in range(self._hg_num_residual_blocks): 270 | low2 = self._build_residual_block(low2, num_features, num_features, 271 | name='low2_%d' % (i + 1)) 272 | # Additional residual blocks 273 | low3 = low2 274 | for i in range(self._hg_num_residual_blocks): 275 | low3 = self._build_residual_block(low3, num_features, num_features, 276 | name='low3_%d' % (i + 1)) 277 | # Upsample 278 | if self._data_format == 'NCHW': # convert to NHWC 279 | low3 = tf.transpose(low3, (0, 2, 3, 1)) 280 | up2 = tf.image.resize_bilinear( 281 | low3, 282 | up1.shape[1:3] if self._data_format == 'NHWC' else up1.shape[2:4], 283 | align_corners=True, 284 | ) 285 | if self._data_format == 'NCHW': # convert back from NHWC 286 | up2 = tf.transpose(up2, (0, 3, 1, 2)) 287 | 288 | return up1 + up2 289 | 290 | def _build_hourglass_after(self, x_prev, x_now, do_merge=True): 291 | with tf.variable_scope('after'): 292 | for j in range(self._hg_num_residual_blocks): 293 | x_now = self._build_residual_block(x_now, self._hg_num_feature_maps, 294 | self._hg_num_feature_maps, 295 | name='after_hg_%d' % (j + 1)) 296 | x_now = self._apply_conv(x_now, self._hg_num_feature_maps, kernel_size=1, stride=1) 297 | x_now = self._apply_bn(x_now) 298 | x_now = tf.nn.relu(x_now) 299 | 300 | with tf.variable_scope('gmap'): 301 | gmap = self._apply_conv(x_now, self._hg_num_gazemaps, kernel_size=1, stride=1) 302 | 303 | x_next = x_now 304 | if do_merge: 305 | with tf.variable_scope('merge'): 306 | with tf.variable_scope('gmap'): 307 | x_gmaps = self._apply_conv(gmap, self._hg_num_feature_maps, kernel_size=1, stride=1) 308 | with tf.variable_scope('x'): 309 | x_now = self._apply_conv(x_now, self._hg_num_feature_maps, kernel_size=1, stride=1) 310 | x_next += x_prev + x_gmaps 311 | 312 | # Perform softmax on gazemaps 313 | if self._data_format == 'NCHW': 314 | n, c, h, w = gmap.shape.as_list() 315 | gmap = tf.reshape(gmap, (n, -1)) 316 | gmap = tf.nn.softmax(gmap) 317 | gmap = tf.reshape(gmap, (n, c, h, w)) 318 | else: 319 | n, h, w, c = gmap.shape.as_list() 320 | gmap = tf.transpose(gmap, perm=[0, 3, 1, 2]) 321 | gmap = tf.reshape(gmap, (n, -1)) 322 | gmap = tf.nn.softmax(gmap) 323 | gmap = tf.reshape(gmap, (n, c, h, w)) 324 | gmap = tf.transpose(gmap, perm=[0, 2, 3, 1]) 325 | return x_next, gmap 326 | 327 | def _apply_dense_block(self, x, num_layers): 328 | assert isinstance(num_layers, int) and num_layers > 0 329 | c_index = 1 if self._data_format == 'NCHW' else 3 330 | x_prev = x 331 | for i in range(num_layers): 332 | with tf.variable_scope('layer%d' % (i + 1)): 333 | n = x.shape.as_list()[c_index] 334 | with tf.variable_scope('bottleneck'): 335 | x = self._apply_composite_function(x, 336 | num_features=min(n, 4*self._dn_growth_rate), 337 | kernel_size=1) 338 | with tf.variable_scope('composite'): 339 | x = self._apply_composite_function(x, num_features=self._dn_growth_rate, 340 | kernel_size=3) 341 | if self._data_format == 'NCHW': 342 | x = tf.concat([x, x_prev], axis=1) 343 | else: 344 | x = tf.concat([x, x_prev], axis=-1) 345 | x_prev = x 346 | return x 347 | 348 | def _apply_transition_layer(self, x): 349 | c_index = 1 if self._data_format == 'NCHW' else 3 350 | x = self._apply_composite_function( 351 | x, num_features=int(self._dn_compression_factor * x.shape.as_list()[c_index]), 352 | kernel_size=1) 353 | x = tf.layers.average_pooling2d(x, pool_size=2, strides=2, padding='valid', 354 | data_format=self._data_format_longer) 355 | return x 356 | 357 | def _apply_composite_function(self, x, num_features=_dn_growth_rate, kernel_size=3): 358 | x = self._apply_bn(x) 359 | x = tf.nn.relu(x) 360 | x = self._apply_conv(x, num_features=num_features, kernel_size=kernel_size, stride=1) 361 | return x 362 | -------------------------------------------------------------------------------- /src/models/elg.py: -------------------------------------------------------------------------------- 1 | """ELG architecture.""" 2 | from typing import Dict 3 | 4 | import numpy as np 5 | import scipy 6 | import tensorflow as tf 7 | 8 | from core import BaseDataSource, BaseModel 9 | 10 | 11 | def _tf_mse(x, y): 12 | """Tensorflow call for mean-squared error.""" 13 | return tf.reduce_mean(tf.squared_difference(x, y)) 14 | 15 | 16 | class ELG(BaseModel): 17 | """ELG architecture as introduced in [Park et al. ETRA'18].""" 18 | 19 | def __init__(self, tensorflow_session=None, first_layer_stride=1, 20 | num_modules=2, num_feature_maps=32, **kwargs): 21 | """Specify ELG-specific parameters.""" 22 | self._hg_first_layer_stride = first_layer_stride 23 | self._hg_num_modules = num_modules 24 | self._hg_num_feature_maps= num_feature_maps 25 | 26 | # Call parent class constructor 27 | super().__init__(tensorflow_session, **kwargs) 28 | 29 | _hg_first_layer_stride = 1 30 | _hg_num_modules = 2 31 | _hg_num_feature_maps = 32 32 | _hg_num_landmarks = 18 33 | _hg_num_residual_blocks = 1 34 | 35 | @property 36 | def identifier(self): 37 | """Identifier for model based on data sources and parameters.""" 38 | first_data_source = next(iter(self._train_data.values())) 39 | input_tensors = first_data_source.output_tensors 40 | if self._data_format == 'NHWC': 41 | _, eh, ew, _ = input_tensors['eye'].shape.as_list() 42 | else: 43 | _, _, eh, ew = input_tensors['eye'].shape.as_list() 44 | return 'ELG_i%dx%d_f%dx%d_n%d_m%d' % ( 45 | ew, eh, 46 | int(ew / self._hg_first_layer_stride), 47 | int(eh / self._hg_first_layer_stride), 48 | self._hg_num_feature_maps, self._hg_num_modules, 49 | ) 50 | 51 | def train_loop_pre(self, current_step): 52 | """Run this at beginning of training loop.""" 53 | # Set difficulty of training data 54 | data_source = next(iter(self._train_data.values())) 55 | data_source.set_difficulty(min((1. / 1e6) * current_step, 1.)) 56 | 57 | def build_model(self, data_sources: Dict[str, BaseDataSource], mode: str): 58 | """Build model.""" 59 | data_source = next(iter(data_sources.values())) 60 | input_tensors = data_source.output_tensors 61 | x = input_tensors['eye'] 62 | y1 = input_tensors['heatmaps'] if 'heatmaps' in input_tensors else None 63 | y2 = input_tensors['landmarks'] if 'landmarks' in input_tensors else None 64 | y3 = input_tensors['radius'] if 'radius' in input_tensors else None 65 | 66 | with tf.variable_scope('input_data'): 67 | self.summary.feature_maps('eyes', x, data_format=self._data_format_longer) 68 | if y1 is not None: 69 | self.summary.feature_maps('hmaps_true', y1, data_format=self._data_format_longer) 70 | 71 | outputs = {} 72 | loss_terms = {} 73 | metrics = {} 74 | 75 | with tf.variable_scope('hourglass'): 76 | # TODO: Find better way to specify no. landmarks 77 | if y1 is not None: 78 | if self._data_format == 'NCHW': 79 | self._hg_num_landmarks = y1.shape.as_list()[1] 80 | if self._data_format == 'NHWC': 81 | self._hg_num_landmarks = y1.shape.as_list()[3] 82 | else: 83 | self._hg_num_landmarks = 18 84 | assert self._hg_num_landmarks == 18 85 | 86 | # Prepare for Hourglass by downscaling via conv 87 | with tf.variable_scope('pre'): 88 | n = self._hg_num_feature_maps 89 | x = self._apply_conv(x, num_features=n, kernel_size=7, 90 | stride=self._hg_first_layer_stride) 91 | x = tf.nn.relu(self._apply_bn(x)) 92 | x = self._build_residual_block(x, n, 2*n, name='res1') 93 | x = self._build_residual_block(x, 2*n, n, name='res2') 94 | 95 | # Hourglass blocks 96 | x_prev = x 97 | for i in range(self._hg_num_modules): 98 | with tf.variable_scope('hg_%d' % (i + 1)): 99 | x = self._build_hourglass(x, steps_to_go=4, num_features=self._hg_num_feature_maps) 100 | x, h = self._build_hourglass_after( 101 | x_prev, x, do_merge=(i < (self._hg_num_modules - 1)), 102 | ) 103 | self.summary.feature_maps('hmap%d' % i, h, data_format=self._data_format_longer) 104 | if y1 is not None: 105 | metrics['heatmap%d_mse' % (i + 1)] = _tf_mse(h, y1) 106 | x_prev = x 107 | if y1 is not None: 108 | loss_terms['heatmaps_mse'] = tf.reduce_mean([ 109 | metrics['heatmap%d_mse' % (i + 1)] for i in range(self._hg_num_modules) 110 | ]) 111 | x = h 112 | outputs['heatmaps'] = x 113 | 114 | # Soft-argmax 115 | x = self._calculate_landmarks(x) 116 | with tf.variable_scope('upscale'): 117 | # Upscale since heatmaps are half-scale of original image 118 | x *= self._hg_first_layer_stride 119 | if y2 is not None: 120 | metrics['landmarks_mse'] = _tf_mse(x, y2) 121 | outputs['landmarks'] = x 122 | 123 | # Fully-connected layers for radius regression 124 | with tf.variable_scope('radius'): 125 | x = tf.contrib.layers.flatten(tf.transpose(x, perm=[0, 2, 1])) 126 | for i in range(3): 127 | with tf.variable_scope('fc%d' % (i + 1)): 128 | x = tf.nn.relu(self._apply_bn(self._apply_fc(x, 100))) 129 | with tf.variable_scope('out'): 130 | x = self._apply_fc(x, 1) 131 | outputs['radius'] = x 132 | if y3 is not None: 133 | metrics['radius_mse'] = _tf_mse(tf.reshape(x, [-1]), y3) 134 | loss_terms['radius_mse'] = 1e-7 * metrics['radius_mse'] 135 | self.summary.histogram('radius', x) 136 | 137 | # Define outputs 138 | return outputs, loss_terms, metrics 139 | 140 | def _apply_conv(self, tensor, num_features, kernel_size=3, stride=1): 141 | return tf.layers.conv2d( 142 | tensor, 143 | num_features, 144 | kernel_size=kernel_size, 145 | strides=stride, 146 | padding='SAME', 147 | kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), 148 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-4), 149 | bias_initializer=tf.zeros_initializer(), 150 | data_format=self._data_format_longer, 151 | name='conv', 152 | ) 153 | 154 | def _apply_fc(self, tensor, num_outputs): 155 | return tf.layers.dense( 156 | tensor, 157 | num_outputs, 158 | use_bias=True, 159 | kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), 160 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-4), 161 | bias_initializer=tf.zeros_initializer(), 162 | name='fc', 163 | ) 164 | 165 | def _apply_pool(self, tensor, kernel_size=3, stride=2): 166 | tensor = tf.layers.max_pooling2d( 167 | tensor, 168 | pool_size=kernel_size, 169 | strides=stride, 170 | padding='SAME', 171 | data_format=self._data_format_longer, 172 | name='pool', 173 | ) 174 | return tensor 175 | 176 | def _apply_bn(self, tensor): 177 | return tf.contrib.layers.batch_norm( 178 | tensor, 179 | scale=True, 180 | center=True, 181 | is_training=self.use_batch_statistics, 182 | trainable=True, 183 | data_format=self._data_format, 184 | updates_collections=None, 185 | ) 186 | 187 | def _build_residual_block(self, x, num_in, num_out, name='res_block'): 188 | with tf.variable_scope(name): 189 | half_num_out = max(int(num_out/2), 1) 190 | c = x 191 | with tf.variable_scope('conv1'): 192 | c = tf.nn.relu(self._apply_bn(c)) 193 | c = self._apply_conv(c, num_features=half_num_out, kernel_size=1, stride=1) 194 | with tf.variable_scope('conv2'): 195 | c = tf.nn.relu(self._apply_bn(c)) 196 | c = self._apply_conv(c, num_features=half_num_out, kernel_size=3, stride=1) 197 | with tf.variable_scope('conv3'): 198 | c = tf.nn.relu(self._apply_bn(c)) 199 | c = self._apply_conv(c, num_features=num_out, kernel_size=1, stride=1) 200 | with tf.variable_scope('skip'): 201 | if num_in == num_out: 202 | s = tf.identity(x) 203 | else: 204 | s = self._apply_conv(x, num_features=num_out, kernel_size=1, stride=1) 205 | x = c + s 206 | return x 207 | 208 | def _build_hourglass(self, x, steps_to_go, num_features, depth=1): 209 | with tf.variable_scope('depth%d' % depth): 210 | # Upper branch 211 | up1 = x 212 | for i in range(self._hg_num_residual_blocks): 213 | up1 = self._build_residual_block(up1, num_features, num_features, 214 | name='up1_%d' % (i + 1)) 215 | # Lower branch 216 | low1 = self._apply_pool(x, kernel_size=2, stride=2) 217 | for i in range(self._hg_num_residual_blocks): 218 | low1 = self._build_residual_block(low1, num_features, num_features, 219 | name='low1_%d' % (i + 1)) 220 | # Recursive 221 | low2 = None 222 | if steps_to_go > 1: 223 | low2 = self._build_hourglass(low1, steps_to_go - 1, num_features, depth=depth+1) 224 | else: 225 | low2 = low1 226 | for i in range(self._hg_num_residual_blocks): 227 | low2 = self._build_residual_block(low2, num_features, num_features, 228 | name='low2_%d' % (i + 1)) 229 | # Additional residual blocks 230 | low3 = low2 231 | for i in range(self._hg_num_residual_blocks): 232 | low3 = self._build_residual_block(low3, num_features, num_features, 233 | name='low3_%d' % (i + 1)) 234 | # Upsample 235 | if self._data_format == 'NCHW': # convert to NHWC 236 | low3 = tf.transpose(low3, (0, 2, 3, 1)) 237 | up2 = tf.image.resize_bilinear( 238 | low3, 239 | up1.shape[1:3] if self._data_format == 'NHWC' else up1.shape[2:4], 240 | align_corners=True, 241 | ) 242 | if self._data_format == 'NCHW': # convert back from NHWC 243 | up2 = tf.transpose(up2, (0, 3, 1, 2)) 244 | 245 | return up1 + up2 246 | 247 | def _build_hourglass_after(self, x_prev, x_now, do_merge=True): 248 | with tf.variable_scope('after'): 249 | for j in range(self._hg_num_residual_blocks): 250 | x_now = self._build_residual_block(x_now, self._hg_num_feature_maps, 251 | self._hg_num_feature_maps, 252 | name='after_hg_%d' % (j + 1)) 253 | x_now = self._apply_conv(x_now, self._hg_num_feature_maps, kernel_size=1, stride=1) 254 | x_now = self._apply_bn(x_now) 255 | x_now = tf.nn.relu(x_now) 256 | 257 | with tf.variable_scope('hmap'): 258 | h = self._apply_conv(x_now, self._hg_num_landmarks, kernel_size=1, stride=1) 259 | 260 | x_next = x_now 261 | if do_merge: 262 | with tf.variable_scope('merge'): 263 | with tf.variable_scope('h'): 264 | x_hmaps = self._apply_conv(h, self._hg_num_feature_maps, kernel_size=1, stride=1) 265 | with tf.variable_scope('x'): 266 | x_now = self._apply_conv(x_now, self._hg_num_feature_maps, kernel_size=1, stride=1) 267 | x_next += x_prev + x_hmaps 268 | return x_next, h 269 | 270 | _softargmax_coords = None 271 | 272 | def _calculate_landmarks(self, x): 273 | """Estimate landmark location from heatmaps.""" 274 | with tf.variable_scope('argsoftmax'): 275 | if self._data_format == 'NHWC': 276 | _, h, w, _ = x.shape.as_list() 277 | else: 278 | _, _, h, w = x.shape.as_list() 279 | if self._softargmax_coords is None: 280 | # Assume normalized coordinate [0, 1] for numeric stability 281 | ref_xs, ref_ys = np.meshgrid(np.linspace(0, 1.0, num=w, endpoint=True), 282 | np.linspace(0, 1.0, num=h, endpoint=True), 283 | indexing='xy') 284 | ref_xs = np.reshape(ref_xs, [-1, h*w]) 285 | ref_ys = np.reshape(ref_ys, [-1, h*w]) 286 | self._softargmax_coords = ( 287 | tf.constant(ref_xs, dtype=tf.float32), 288 | tf.constant(ref_ys, dtype=tf.float32), 289 | ) 290 | ref_xs, ref_ys = self._softargmax_coords 291 | 292 | # Assuming N x 18 x 45 x 75 (NCHW) 293 | beta = 1e2 294 | if self._data_format == 'NHWC': 295 | x = tf.transpose(x, (0, 3, 1, 2)) 296 | x = tf.reshape(x, [-1, self._hg_num_landmarks, h*w]) 297 | x = tf.nn.softmax(beta * x, axis=-1) 298 | lmrk_xs = tf.reduce_sum(ref_xs * x, axis=[2]) 299 | lmrk_ys = tf.reduce_sum(ref_ys * x, axis=[2]) 300 | 301 | # Return to actual coordinates ranges 302 | return tf.stack([ 303 | lmrk_xs * (w - 1.0) + 0.5, 304 | lmrk_ys * (h - 1.0) + 0.5, 305 | ], axis=2) # N x 18 x 2 306 | 307 | 308 | def estimate_gaze_from_landmarks(iris_landmarks, iris_centre, eyeball_centre, eyeball_radius, 309 | initial_gaze=None): 310 | """Given iris edge landmarks and other coordinates, estimate gaze direction. 311 | 312 | More correctly stated, estimate gaze from iris edge landmark coordinates, iris centre 313 | coordinates, eyeball centre coordinates, and eyeball radius in pixels. 314 | """ 315 | e_x0, e_y0 = eyeball_centre 316 | i_x0, i_y0 = iris_centre 317 | 318 | if initial_gaze is not None: 319 | theta, phi = initial_gaze 320 | # theta = -theta 321 | else: 322 | theta = np.arcsin(np.clip((i_y0 - e_y0) / eyeball_radius, -1.0, 1.0)) 323 | phi = np.arcsin(np.clip((i_x0 - e_x0) / (eyeball_radius * -np.cos(theta)), -1.0, 1.0)) 324 | 325 | delta = 0.1 * np.pi 326 | if iris_landmarks[0, 0] < iris_landmarks[4, 0]: # flipped 327 | alphas = np.flip(np.arange(0.0, 2.0 * np.pi, step=np.pi/4.0), axis=0) 328 | else: 329 | alphas = np.arange(-np.pi, np.pi, step=np.pi/4.0) + np.pi/4.0 330 | sin_alphas = np.sin(alphas) 331 | cos_alphas = np.cos(alphas) 332 | 333 | def gaze_fit_loss_func(inputs): 334 | theta, phi, delta, phase = inputs 335 | sin_phase = np.sin(phase) 336 | cos_phase = np.cos(phase) 337 | # sin_alphas_shifted = np.sin(alphas + phase) 338 | sin_alphas_shifted = sin_alphas * cos_phase + cos_alphas * sin_phase 339 | # cos_alphas_shifted = np.cos(alphas + phase) 340 | cos_alphas_shifted = cos_alphas * cos_phase - sin_alphas * sin_phase 341 | 342 | sin_theta = np.sin(theta) 343 | cos_theta = np.cos(theta) 344 | sin_phi = np.sin(phi) 345 | cos_phi = np.cos(phi) 346 | sin_delta_sin = np.sin(delta * sin_alphas_shifted) 347 | sin_delta_cos = np.sin(delta * cos_alphas_shifted) 348 | cos_delta_sin = np.cos(delta * sin_alphas_shifted) 349 | cos_delta_cos = np.cos(delta * cos_alphas_shifted) 350 | # x = -np.cos(theta + delta * sin_alphas_shifted) 351 | x1 = -cos_theta * cos_delta_sin + sin_theta * sin_delta_sin 352 | # x *= np.sin(phi + delta * cos_alphas_shifted) 353 | x2 = sin_phi * cos_delta_cos + cos_phi * sin_delta_cos 354 | x = x1 * x2 355 | # y = np.sin(theta + delta * sin_alphas_shifted) 356 | y1 = sin_theta * cos_delta_sin 357 | y2 = cos_theta * sin_delta_sin 358 | y = y1 + y2 359 | 360 | ix = e_x0 + eyeball_radius * x 361 | iy = e_y0 + eyeball_radius * y 362 | dx = ix - iris_landmarks[:, 0] 363 | dy = iy - iris_landmarks[:, 1] 364 | out = np.mean(dx ** 2 + dy ** 2) 365 | 366 | # In addition, match estimated and actual iris centre 367 | iris_dx = e_x0 + eyeball_radius * -cos_theta * sin_phi - i_x0 368 | iris_dy = e_y0 + eyeball_radius * sin_theta - i_y0 369 | out += iris_dx ** 2 + iris_dy ** 2 370 | 371 | # sin_alphas_shifted = sin_alphas * cos_phase + cos_alphas * sin_phase 372 | # cos_alphas_shifted = cos_alphas * cos_phase - sin_alphas * sin_phase 373 | dsin_alphas_shifted_dphase = -sin_alphas * sin_phase + cos_alphas * cos_phase 374 | dcos_alphas_shifted_dphase = -cos_alphas * sin_phase - sin_alphas * cos_phase 375 | 376 | # sin_delta_sin = np.sin(delta * sin_alphas_shifted) 377 | # sin_delta_cos = np.sin(delta * cos_alphas_shifted) 378 | # cos_delta_sin = np.cos(delta * sin_alphas_shifted) 379 | # cos_delta_cos = np.cos(delta * cos_alphas_shifted) 380 | dsin_delta_sin_ddelta = cos_delta_sin * sin_alphas_shifted 381 | dsin_delta_cos_ddelta = cos_delta_cos * cos_alphas_shifted 382 | dcos_delta_sin_ddelta = -sin_delta_sin * sin_alphas_shifted 383 | dcos_delta_cos_ddelta = -sin_delta_cos * cos_alphas_shifted 384 | dsin_delta_sin_dphase = cos_delta_sin * delta * dsin_alphas_shifted_dphase 385 | dsin_delta_cos_dphase = cos_delta_cos * delta * dcos_alphas_shifted_dphase 386 | dcos_delta_sin_dphase = -sin_delta_sin * delta * dsin_alphas_shifted_dphase 387 | dcos_delta_cos_dphase = -sin_delta_cos * delta * dcos_alphas_shifted_dphase 388 | 389 | # x1 = -cos_theta * cos_delta_sin + sin_theta * sin_delta_sin 390 | # x2 = sin_phi * cos_delta_cos + cos_phi * sin_delta_cos 391 | dx1_dtheta = sin_theta * cos_delta_sin + cos_theta * sin_delta_sin 392 | dx2_dtheta = 0.0 393 | dx1_dphi = 0.0 394 | dx2_dphi = cos_phi * cos_delta_cos - sin_phi * sin_delta_cos 395 | dx1_ddelta = -cos_theta * dcos_delta_sin_ddelta + sin_theta * dsin_delta_sin_ddelta 396 | dx2_ddelta = sin_phi * dcos_delta_cos_ddelta + cos_phi * dsin_delta_cos_ddelta 397 | dx1_dphase = -cos_theta * dcos_delta_sin_dphase + sin_theta * dsin_delta_sin_dphase 398 | dx2_dphase = sin_phi * dcos_delta_cos_dphase + cos_phi * dsin_delta_cos_dphase 399 | 400 | # y1 = sin_theta * cos_delta_sin 401 | # y2 = cos_theta * sin_delta_sin 402 | dy1_dtheta = cos_theta * cos_delta_sin 403 | dy2_dtheta = -sin_theta * sin_delta_sin 404 | dy1_dphi = 0.0 405 | dy2_dphi = 0.0 406 | dy1_ddelta = sin_theta * dcos_delta_sin_ddelta 407 | dy2_ddelta = cos_theta * dsin_delta_sin_ddelta 408 | dy1_dphase = sin_theta * dcos_delta_sin_dphase 409 | dy2_dphase = cos_theta * dsin_delta_sin_dphase 410 | 411 | # x = x1 * x2 412 | # y = y1 + y2 413 | dx_dtheta = dx1_dtheta * x2 + x1 * dx2_dtheta 414 | dx_dphi = dx1_dphi * x2 + x1 * dx2_dphi 415 | dx_ddelta = dx1_ddelta * x2 + x1 * dx2_ddelta 416 | dx_dphase = dx1_dphase * x2 + x1 * dx2_dphase 417 | dy_dtheta = dy1_dtheta + dy2_dtheta 418 | dy_dphi = dy1_dphi + dy2_dphi 419 | dy_ddelta = dy1_ddelta + dy2_ddelta 420 | dy_dphase = dy1_dphase + dy2_dphase 421 | 422 | # ix = w_2 + eyeball_radius * x 423 | # iy = h_2 + eyeball_radius * y 424 | dix_dtheta = eyeball_radius * dx_dtheta 425 | dix_dphi = eyeball_radius * dx_dphi 426 | dix_ddelta = eyeball_radius * dx_ddelta 427 | dix_dphase = eyeball_radius * dx_dphase 428 | diy_dtheta = eyeball_radius * dy_dtheta 429 | diy_dphi = eyeball_radius * dy_dphi 430 | diy_ddelta = eyeball_radius * dy_ddelta 431 | diy_dphase = eyeball_radius * dy_dphase 432 | 433 | # dx = ix - iris_landmarks[:, 0] 434 | # dy = iy - iris_landmarks[:, 1] 435 | ddx_dtheta = dix_dtheta 436 | ddx_dphi = dix_dphi 437 | ddx_ddelta = dix_ddelta 438 | ddx_dphase = dix_dphase 439 | ddy_dtheta = diy_dtheta 440 | ddy_dphi = diy_dphi 441 | ddy_ddelta = diy_ddelta 442 | ddy_dphase = diy_dphase 443 | 444 | # out = dx ** 2 + dy ** 2 445 | dout_dtheta = np.mean(2 * (dx * ddx_dtheta + dy * ddy_dtheta)) 446 | dout_dphi = np.mean(2 * (dx * ddx_dphi + dy * ddy_dphi)) 447 | dout_ddelta = np.mean(2 * (dx * ddx_ddelta + dy * ddy_ddelta)) 448 | dout_dphase = np.mean(2 * (dx * ddx_dphase + dy * ddy_dphase)) 449 | 450 | # iris_dx = e_x0 + eyeball_radius * -cos_theta * sin_phi - i_x0 451 | # iris_dy = e_y0 + eyeball_radius * sin_theta - i_y0 452 | # out += iris_dx ** 2 + iris_dy ** 2 453 | dout_dtheta += 2 * eyeball_radius * (sin_theta * sin_phi * iris_dx + cos_theta * iris_dy) 454 | dout_dphi += 2 * eyeball_radius * (-cos_theta * cos_phi * iris_dx) 455 | 456 | return out, np.array([dout_dtheta, dout_dphi, dout_ddelta, dout_dphase]) 457 | 458 | phase = 0.02 459 | result = scipy.optimize.minimize(gaze_fit_loss_func, x0=[theta, phi, delta, phase], 460 | bounds=( 461 | (-0.4*np.pi, 0.4*np.pi), 462 | (-0.4*np.pi, 0.4*np.pi), 463 | (0.01*np.pi, 0.5*np.pi), 464 | (-np.pi, np.pi), 465 | ), 466 | jac=True, 467 | tol=1e-6, 468 | method='TNC', 469 | options={ 470 | # 'disp': True, 471 | 'gtol': 1e-6, 472 | 'maxiter': 100, 473 | }) 474 | if result.success: 475 | theta, phi, delta, phase = result.x 476 | 477 | return np.array([-theta, phi]) 478 | -------------------------------------------------------------------------------- /src/util/gaze.py: -------------------------------------------------------------------------------- 1 | """Utility methods for gaze angle and error calculations.""" 2 | import cv2 as cv 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | def pitchyaw_to_vector(pitchyaws): 8 | r"""Convert given yaw (:math:`\theta`) and pitch (:math:`\phi`) angles to unit gaze vectors. 9 | 10 | Args: 11 | pitchyaws (:obj:`numpy.array`): yaw and pitch angles :math:`(n\times 2)` in radians. 12 | 13 | Returns: 14 | :obj:`numpy.array` of shape :math:`(n\times 3)` with 3D vectors per row. 15 | """ 16 | n = pitchyaws.shape[0] 17 | sin = np.sin(pitchyaws) 18 | cos = np.cos(pitchyaws) 19 | out = np.empty((n, 3)) 20 | out[:, 0] = np.multiply(cos[:, 0], sin[:, 1]) 21 | out[:, 1] = sin[:, 0] 22 | out[:, 2] = np.multiply(cos[:, 0], cos[:, 1]) 23 | return out 24 | 25 | 26 | def vector_to_pitchyaw(vectors): 27 | r"""Convert given gaze vectors to yaw (:math:`\theta`) and pitch (:math:`\phi`) angles. 28 | 29 | Args: 30 | vectors (:obj:`numpy.array`): gaze vectors in 3D :math:`(n\times 3)`. 31 | 32 | Returns: 33 | :obj:`numpy.array` of shape :math:`(n\times 2)` with values in radians. 34 | """ 35 | n = vectors.shape[0] 36 | out = np.empty((n, 2)) 37 | vectors = np.divide(vectors, np.linalg.norm(vectors, axis=1).reshape(n, 1)) 38 | out[:, 0] = np.arcsin(vectors[:, 1]) # theta 39 | out[:, 1] = np.arctan2(vectors[:, 0], vectors[:, 2]) # phi 40 | return out 41 | 42 | radians_to_degrees = 180.0 / np.pi 43 | 44 | 45 | def angular_error(a, b): 46 | """Calculate angular error (via cosine similarity).""" 47 | a = pitchyaw_to_vector(a) if a.shape[1] == 2 else a 48 | b = pitchyaw_to_vector(b) if b.shape[1] == 2 else b 49 | 50 | ab = np.sum(np.multiply(a, b), axis=1) 51 | a_norm = np.linalg.norm(a, axis=1) 52 | b_norm = np.linalg.norm(b, axis=1) 53 | 54 | # Avoid zero-values (to avoid NaNs) 55 | a_norm = np.clip(a_norm, a_min=1e-7, a_max=None) 56 | b_norm = np.clip(b_norm, a_min=1e-7, a_max=None) 57 | 58 | similarity = np.divide(ab, np.multiply(a_norm, b_norm)) 59 | 60 | return np.arccos(similarity) * radians_to_degrees 61 | 62 | 63 | def mean_angular_error(a, b): 64 | """Calculate mean angular error (via cosine similarity).""" 65 | return np.mean(angular_error(a, b)) 66 | 67 | 68 | def tensorflow_angular_error_from_pitchyaw(y_true, y_pred): 69 | """Tensorflow method to calculate angular loss from head angles.""" 70 | def angles_to_unit_vectors(y): 71 | sin = tf.sin(y) 72 | cos = tf.cos(y) 73 | return tf.stack([ 74 | tf.multiply(cos[:, 0], sin[:, 1]), 75 | sin[:, 0], 76 | tf.multiply(cos[:, 0], cos[:, 1]), 77 | ], axis=1) 78 | 79 | with tf.name_scope('mean_angular_error'): 80 | v_true = angles_to_unit_vectors(y_true) 81 | v_pred = angles_to_unit_vectors(y_pred) 82 | return tensorflow_angular_error_from_vector(v_true, v_pred) 83 | 84 | 85 | def tensorflow_angular_error_from_vector(v_true, v_pred): 86 | """Tensorflow method to calculate angular loss from 3D vector.""" 87 | with tf.name_scope('mean_angular_error'): 88 | v_true_norm = tf.sqrt(tf.reduce_sum(tf.square(v_true), axis=1)) 89 | v_pred_norm = tf.sqrt(tf.reduce_sum(tf.square(v_pred), axis=1)) 90 | 91 | sim = tf.div(tf.reduce_sum(tf.multiply(v_true, v_pred), axis=1), 92 | tf.multiply(v_true_norm, v_pred_norm)) 93 | 94 | # Floating point precision can cause sim values to be slightly outside of 95 | # [-1, 1] so we clip values 96 | sim = tf.clip_by_value(sim, -1.0 + 1e-6, 1.0 - 1e-6) 97 | 98 | ang = tf.scalar_mul(radians_to_degrees, tf.acos(sim)) 99 | return tf.reduce_mean(ang) 100 | 101 | 102 | def draw_gaze(image_in, eye_pos, pitchyaw, length=40.0, thickness=2, color=(0, 0, 255)): 103 | """Draw gaze angle on given image with a given eye positions.""" 104 | image_out = image_in 105 | if len(image_out.shape) == 2 or image_out.shape[2] == 1: 106 | image_out = cv.cvtColor(image_out, cv.COLOR_GRAY2BGR) 107 | dx = -length * np.sin(pitchyaw[1]) 108 | dy = -length * np.sin(pitchyaw[0]) 109 | cv.arrowedLine(image_out, tuple(np.round(eye_pos).astype(np.int32)), 110 | tuple(np.round([eye_pos[0] + dx, eye_pos[1] + dy]).astype(int)), color, 111 | thickness, cv.LINE_AA, tipLength=0.2) 112 | return image_out 113 | -------------------------------------------------------------------------------- /src/util/gazemap.py: -------------------------------------------------------------------------------- 1 | """Utility methods for generating gazemaps.""" 2 | import cv2 as cv 3 | import numpy as np 4 | 5 | height_to_eyeball_radius_ratio = 1.1 6 | eyeball_radius_to_iris_diameter_ratio = 1.0 7 | 8 | def from_gaze2d(gaze, output_size, scale=1.0): 9 | """Generate a normalized pictorial representation of 3D gaze direction.""" 10 | gazemaps = [] 11 | oh, ow = np.round(scale * np.asarray(output_size)).astype(np.int32) 12 | oh_2 = int(np.round(0.5 * oh)) 13 | ow_2 = int(np.round(0.5 * ow)) 14 | r = int(height_to_eyeball_radius_ratio * oh_2) 15 | theta, phi = gaze 16 | theta = -theta 17 | sin_theta = np.sin(theta) 18 | cos_theta = np.cos(theta) 19 | sin_phi = np.sin(phi) 20 | cos_phi = np.cos(phi) 21 | 22 | # Draw iris 23 | eyeball_radius = int(height_to_eyeball_radius_ratio * oh_2) 24 | iris_radius_angle = np.arcsin(0.5 * eyeball_radius_to_iris_diameter_ratio) 25 | iris_radius = eyeball_radius_to_iris_diameter_ratio * eyeball_radius 26 | iris_distance = float(eyeball_radius) * np.cos(iris_radius_angle) 27 | iris_offset = np.asarray([ 28 | -iris_distance * sin_phi * cos_theta, 29 | iris_distance * sin_theta, 30 | ]) 31 | iris_centre = np.asarray([ow_2, oh_2]) + iris_offset 32 | angle = np.degrees(np.arctan2(iris_offset[1], iris_offset[0])) 33 | ellipse_max = eyeball_radius_to_iris_diameter_ratio * iris_radius 34 | ellipse_min = np.abs(ellipse_max * cos_phi * cos_theta) 35 | gazemap = np.zeros((oh, ow), dtype=np.float32) 36 | gazemap = cv.ellipse(gazemap, box=(iris_centre, (ellipse_min, ellipse_max), angle), 37 | color=1.0, thickness=-1, lineType=cv.LINE_AA) 38 | gazemaps.append(gazemap) 39 | 40 | # Draw eyeball 41 | gazemap = np.zeros((oh, ow), dtype=np.float32) 42 | gazemap = cv.circle(gazemap, (ow_2, oh_2), r, color=1, thickness=-1) 43 | gazemaps.append(gazemap) 44 | 45 | return np.asarray(gazemaps) 46 | -------------------------------------------------------------------------------- /src/util/heatmap.py: -------------------------------------------------------------------------------- 1 | """Utility methods for generating and visualizing heatmaps.""" 2 | import numpy as np 3 | 4 | 5 | def gaussian_2d(shape, centre, sigma=1.0): 6 | """Generate heatmap with single 2D gaussian.""" 7 | xs = np.arange(0.5, shape[1] + 0.5, step=1.0, dtype=np.float32) 8 | ys = np.expand_dims(np.arange(0.5, shape[0] + 0.5, step=1.0, dtype=np.float32), -1) 9 | alpha = -0.5 / (sigma**2) 10 | heatmap = np.exp(alpha * ((xs - centre[0])**2 + (ys - centre[1])**2)) 11 | return heatmap 12 | --------------------------------------------------------------------------------