├── models ├── NTURGBD │ ├── __init__.py │ ├── nturgbd_rnn.py │ └── bnlstm.py ├── __init__.py ├── HybridModel │ ├── __init__.py │ ├── hybrid.py │ ├── bnlstm.py │ ├── vgg.py │ └── inception_resnet_v2.py └── models.py ├── README.md ├── scripts ├── __init__.py ├── do_test.sh ├── do_train.sh ├── preprocess ├── nturgbd.py └── preprocess.py ├── data ├── __init__.py └── loaders.py ├── utils ├── __init__.py ├── losses.py ├── export_model.py ├── eval_util.py └── utils.py ├── run.sh ├── LICENSE ├── .gitignore ├── test.py └── train.py /models/NTURGBD/__init__.py: -------------------------------------------------------------------------------- 1 | from nturgbd_rnn import * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 3DActionRecognition 2 | 3D skeleton-based human action recognition (for WACV) 3 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | import nturgbd, preprocess 2 | 3 | __all__ = ['nturgbd', 'preprocess'] 4 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from models import NTURGBD_RNN, Hybrid 2 | 3 | __all__ = ['NTURGBD_RNN', 'Hybrid'] 4 | -------------------------------------------------------------------------------- /models/HybridModel/__init__.py: -------------------------------------------------------------------------------- 1 | from inception_resnet_v2 import * 2 | from bnlstm import * 3 | from hybrid import * 4 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from loaders import NTURGBD, HybridModelReader 2 | 3 | __all__ = ['NTURGBD', 'HybridModelReader'] 4 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | import utils, eval_util, export_model, losses 2 | 3 | __all__ = ['utils', 'eval_util', 'export_model', 'losses'] 4 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python train.py --train_dir=/home/procastinator/nturgbd_twostreamcnn --dataset_dir=/home/procastinator/nturgb+d_images_new \ 4 | --splits_dir=/home/procastinator/NTU_data --split_num 2 --checkpoint_file=/home/procastinator/pretrainedCheckpoints/vgg_19.ckpt 5 | -------------------------------------------------------------------------------- /scripts/do_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./preprocess "/home/procastinator/NTU_data/nturgb+d_skeletons" "/home/procastinator/NTU_data" "/home/procastinator/nturgb+d_images" 1 False 4 | ./preprocess "/home/procastinator/NTU_data/nturgb+d_skeletons" "/home/procastinator/NTU_data" "/home/procastinator/nturgb+d_images" 2 False 5 | -------------------------------------------------------------------------------- /scripts/do_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./preprocess "/users/kalpit.t/NTU_data/nturgb+d_skeletons" "/users/kalpit.t/NTU_data" "/Pulsar2/mohit.jain/kalpit_data/nturgb+d_images_0" 1 True 4 | ./preprocess "/users/kalpit.t/NTU_data/nturgb+d_skeletons" "/users/kalpit.t/NTU_data" "/Pulsa2/mohit.jain/kalpit_data/nturgb+d_images_0" 2 True 5 | -------------------------------------------------------------------------------- /scripts/preprocess: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Preprocess the data 4 | if [ $# -lt 3 ] 5 | then 6 | echo "Usage: ./preprocess " 7 | exit 1 8 | fi 9 | 10 | if [ $# -eq 3 ] 11 | then 12 | `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3` 13 | elif [ $# -eq 4 ] 14 | then 15 | `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3 --split_num=$4` 16 | elif [ $# -eq 5 ] 17 | then 18 | `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3 --split_num=$4 --is_training=$5` 19 | fi 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Kalpit Thakkar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /models/models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | slim = tf.contrib.slim 3 | framework = tf.contrib.framework 4 | 5 | from NTURGBD import * 6 | from HybridModel import * 7 | 8 | class NTURGBD_RNN(object): 9 | 10 | def __init__(self): 11 | self.name = "NTURGBD_RNN" 12 | 13 | def create_model(self, inputs, num_classes, labels, **unused_params): 14 | model = nturgbd_rnn.SkeletonHRNNet() 15 | output = model.create_model(inputs, num_classes, labels) 16 | 17 | return output 18 | 19 | class Hybrid(object): 20 | 21 | def __init__(self): 22 | self.name = "HybridModel" 23 | 24 | def create_feature_model(self, inputs, scope='', is_training=True, **unused_params): 25 | feature, restore_vars, tvars = hybrid.get_pretrained_model_feats(inputs, scope, is_training) 26 | return feature, restore_vars, tvars 27 | 28 | def create_aux_model(self, inputs, is_training=True, **unused_params): 29 | outputs, tvars = hybrid.get_temporal_mean_pooled_feats(inputs, is_training) 30 | return outputs, tvars 31 | 32 | def create_logits_model(self, inputs, num_classes, is_training=True, scope="logits", reuse=None): 33 | outputs, tvars = hybrid.get_classifier_logits(inputs, num_classes, is_training=is_training, lscope=scope, reuse=reuse) 34 | return outputs, tvars 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # Run script 104 | run.sh 105 | 106 | # checkpoints 107 | *.ckpt* 108 | -------------------------------------------------------------------------------- /models/HybridModel/hybrid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import tensorflow.contrib.framework as framework 4 | import tensorflow.contrib.rnn as rnn 5 | 6 | from inception_resnet_v2 import * 7 | from vgg import * 8 | from bnlstm import * 9 | 10 | def length(sequence): 11 | used = tf.sign(tf.reduce_max(tf.abs(sequence), 2)) 12 | length = tf.reduce_sum(used, 1) 13 | length = tf.cast(length, tf.int32) 14 | return length 15 | 16 | def common_arg_scope(weight_decay=0.00004, 17 | batch_norm_decay=0.9997, 18 | batch_norm_epsilon=0.001): 19 | # Set weight_decay for weights in conv2d and fully_connected layers. 20 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 21 | weights_regularizer=slim.l2_regularizer(weight_decay), 22 | biases_regularizer=slim.l2_regularizer(weight_decay)): 23 | 24 | batch_norm_params = { 25 | 'decay': batch_norm_decay, 26 | 'epsilon': batch_norm_epsilon, 27 | } 28 | # Set activation_fn and parameters for batch_norm. 29 | with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, 30 | normalizer_fn=slim.batch_norm, 31 | normalizer_params=batch_norm_params) as scope: 32 | return scope 33 | 34 | def last_relevant(output, length): 35 | batch_size = tf.shape(output)[0] 36 | max_length = tf.shape(output)[1] 37 | out_size = int(output.get_shape()[2]) 38 | index = tf.range(0, batch_size) * max_length + (length - 1) 39 | flat = tf.reshape(output, [-1, out_size]) 40 | relevant = tf.gather(flat, index) 41 | return relevant 42 | 43 | def get_pretrained_model_feats(inputs, scopename='', is_training=True): 44 | # VGG 19 for feature extraction 45 | scope = vgg_arg_scope() 46 | with slim.arg_scope(scope): 47 | with tf.variable_scope(scopename): 48 | _, end_points = vgg_19(inputs) 49 | features = end_points[scopename+'/vgg_19/conv5/conv5_1'] # 14 x 14 x 512 50 | restore_vars = framework.get_variables(scopename) 51 | 52 | tvars = [] 53 | 54 | return features, restore_vars, tvars 55 | 56 | def get_temporal_mean_pooled_feats(inputs, is_training=True): 57 | # Temporal Average pooling 58 | with tf.variable_scope('temporal_mean_pool'): 59 | pooled_features = slim.avg_pool2d(inputs, (14, 1), stride=1, padding='VALID', scope='AvgPool_8x1') 60 | features = slim.flatten(pooled_features) 61 | tvars = framework.get_variables('temporal_mean_pool') 62 | 63 | return features, tvars 64 | 65 | def get_classifier_logits(inputs, num_classes, is_training=True, lscope='', reuse=None): 66 | # Primary Classifier 67 | scope = common_arg_scope() 68 | with slim.arg_scope(scope): 69 | with tf.variable_scope(lscope, reuse=reuse): 70 | plogits = slim.fully_connected(inputs, 2048, activation_fn=tf.nn.relu, scope='PreLogits') 71 | dropout = slim.dropout(plogits, 0.8, is_training=is_training, scope='Logits_dropout') 72 | logits = slim.fully_connected(dropout, num_classes, activation_fn=None, scope='Final_Logits') 73 | 74 | tvars = framework.get_variables(lscope) 75 | return logits, tvars 76 | -------------------------------------------------------------------------------- /utils/losses.py: -------------------------------------------------------------------------------- 1 | """Provides definitions for non-regularized training or test losses.""" 2 | 3 | import tensorflow as tf 4 | 5 | class BaseLoss(object): 6 | """Inherit from this class when implementing new losses.""" 7 | 8 | def calculate_loss(self, unused_predictions, unused_labels, **unused_params): 9 | """Calculates the average loss of the examples in a mini-batch. 10 | 11 | Args: 12 | unused_predictions: a 2-d tensor storing the prediction scores, in which 13 | each row represents a sample in the mini-batch and each column 14 | represents a class. 15 | unused_labels: a 2-d tensor storing the labels, which has the same shape 16 | as the unused_predictions. The labels must be in the range of 0 and 1. 17 | unused_params: loss specific parameters. 18 | 19 | Returns: 20 | A scalar loss tensor. 21 | """ 22 | raise NotImplementedError() 23 | 24 | 25 | class CrossEntropyLoss(BaseLoss): 26 | """Calculate the cross entropy loss between the predictions and labels. 27 | """ 28 | 29 | def calculate_loss(self, predictions, labels, **unused_params): 30 | with tf.name_scope("loss_xent"): 31 | epsilon = 10e-6 32 | float_labels = tf.cast(labels, tf.float32) 33 | cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 34 | 1 - float_labels) * tf.log(1 - predictions + epsilon) 35 | cross_entropy_loss = tf.negative(cross_entropy_loss) 36 | return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1)) 37 | 38 | 39 | class HingeLoss(BaseLoss): 40 | """Calculate the hinge loss between the predictions and labels. 41 | 42 | Note the subgradient is used in the backpropagation, and thus the optimization 43 | may converge slower. The predictions trained by the hinge loss are between -1 44 | and +1. 45 | """ 46 | 47 | def calculate_loss(self, predictions, labels, b=1.0, **unused_params): 48 | with tf.name_scope("loss_hinge"): 49 | float_labels = tf.cast(labels, tf.float32) 50 | all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32) 51 | all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32) 52 | sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones) 53 | hinge_loss = tf.maximum( 54 | all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions) 55 | return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1)) 56 | 57 | 58 | class SoftmaxLoss(BaseLoss): 59 | """Calculate the softmax loss between the predictions and labels. 60 | 61 | The function calculates the loss in the following way: first we feed the 62 | predictions to the softmax activation function and then we calculate 63 | the minus linear dot product between the logged softmax activations and the 64 | normalized ground truth label. 65 | 66 | It is an extension to the one-hot label. It allows for more than one positive 67 | labels for each sample. 68 | """ 69 | 70 | def calculate_loss(self, predictions, labels, **unused_params): 71 | with tf.name_scope("loss_softmax"): 72 | epsilon = 10e-8 73 | float_labels = tf.cast(labels, tf.float32) 74 | # l1 normalization (labels are no less than 0) 75 | label_rowsum = tf.maximum( 76 | tf.reduce_sum(float_labels, 1, keep_dims=True), 77 | epsilon) 78 | norm_float_labels = tf.div(float_labels, label_rowsum) 79 | softmax_outputs = tf.nn.softmax(predictions) 80 | softmax_loss = tf.negative(tf.reduce_sum( 81 | tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1)) 82 | return tf.reduce_mean(softmax_loss) 83 | -------------------------------------------------------------------------------- /data/loaders.py: -------------------------------------------------------------------------------- 1 | import os, argparse 2 | import threading 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.python.framework import ops 6 | from tensorflow.python.framework import dtypes 7 | 8 | gfile = tf.gfile 9 | slim = tf.contrib.slim 10 | 11 | class NTURGBD(object): 12 | 13 | def __init__(self, dataset_dir, split_dir, num_epochs, batch_size, split=1): 14 | self.dataset_dir = dataset_dir 15 | self.splits_dir = split_dir 16 | self.num_splits = 2 17 | self.num_classes = 60 18 | self.num_epochs = num_epochs 19 | self.batch_size = batch_size 20 | self.present_split = split 21 | 22 | self.train_split_files = {'1': 'train1', '2': 'train2'} 23 | self.val_split_files = {'1': 'val1', '2': 'val2'} 24 | self.test_split_files = {'1': 'test1', '2': 'test2'} 25 | f = open(os.path.join(split_dir, 'faulty_skeletons'), 'r') 26 | self.faulty_samples = f.readlines() 27 | f.close() 28 | 29 | def _read_filelist(self, split='1', train=True, **kwargs): 30 | if train: 31 | files = gfile.ListDirectory(os.path.join(self.dataset_dir, self.train_split_files[split])) 32 | files = [os.path.join(self.dataset_dir, self.train_split_files[split], x) for x in files] 33 | else: 34 | files = gfile.ListDirectory(os.path.join(self.dataset_dir, self.test_split_files[split])) 35 | files = [os.path.join(self.dataset_dir, self.test_split_files[split], x) for x in files] 36 | return files 37 | 38 | def _read_samples(self, input_queue): 39 | reader = tf.TFRecordReader() 40 | _, serialized_example = reader.read(input_queue) 41 | 42 | features = tf.parse_single_example( 43 | serialized_example, 44 | features={ 45 | 'feature': tf.FixedLenFeature([172800], tf.float32), 46 | 'label': tf.FixedLenFeature([], tf.int64)}) 47 | 48 | label = tf.cast(features['label'], tf.int64) 49 | feat_vec = tf.reshape(features['feature'], [300, 576]) 50 | 51 | return feat_vec, label 52 | 53 | class HybridModelReader(object): 54 | 55 | def __init__(self, dataset_dir, split_dir, num_epochs, batch_size, split=1): 56 | self.dataset_dir = dataset_dir 57 | self.splits_dir = split_dir 58 | self.num_splits = 2 59 | self.num_classes = 60 60 | self.num_epochs = num_epochs 61 | self.batch_size = batch_size 62 | self.present_split = split 63 | 64 | self.train_split_files = {'1': 'train1', '2': 'train2'} 65 | self.test_split_files = {'1': 'test1', '2': 'test2'} 66 | 67 | def _read_filelist(self, split='1', train=True, **kwargs): 68 | files = []; labels = [] 69 | if train: 70 | dirname = self.train_split_files[split] 71 | else: 72 | dirname = self.test_split_files[split] 73 | 74 | dirs = gfile.ListDirectory(os.path.join(self.dataset_dir, dirname)) 75 | for d in dirs: 76 | label = long(d.split('.')[0][-3:]) 77 | dpath = os.path.join(self.dataset_dir, dirname, d) 78 | dfiles = sorted(os.listdir(dpath)) 79 | flist = [] 80 | for fn in dfiles: 81 | flist.append(os.path.join(dpath, fn)) 82 | files.append(flist) 83 | labels.append(label) 84 | return files, labels 85 | 86 | def _read_samples(self, input_queue): 87 | file_list = tf.split(input_queue[0], [1]*12) 88 | images = [] 89 | for fn in file_list: 90 | file_content = tf.read_file(fn[0]) 91 | image = tf.image.decode_jpeg(file_content, channels=3) 92 | image = tf.image.resize_images(image, (224, 224)) 93 | image = tf.cast(image, tf.float32) 94 | images.append(image) 95 | 96 | label = input_queue[1] 97 | label = tf.cast(label - 1, tf.int64) 98 | image = ops.convert_to_tensor(images, dtypes.float32) 99 | 100 | return image, label 101 | -------------------------------------------------------------------------------- /utils/export_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | from tensorflow.python.saved_model import builder as saved_model_builder 7 | from tensorflow.python.saved_model import signature_constants 8 | from tensorflow.python.saved_model import signature_def_utils 9 | from tensorflow.python.saved_model import tag_constants 10 | from tensorflow.python.saved_model import utils as saved_model_utils 11 | from tensorflow.python.framework import ops 12 | from tensorflow.python.framework import dtypes 13 | 14 | _TOP_PREDICTIONS_IN_OUTPUT = 20 15 | 16 | class ModelExporter(object): 17 | 18 | def __init__(self, model, reader): 19 | self.model = model 20 | self.reader = reader 21 | 22 | with tf.Graph().as_default() as graph: 23 | self.inputs, self.outputs = self.build_inputs_and_outputs() 24 | self.graph = graph 25 | self.saver = tf.train.Saver(tf.trainable_variables(), sharded=True) 26 | 27 | def export_model(self, model_dir, global_step_val, last_checkpoint): 28 | """ Exports the model so that it can used for batch predictions """ 29 | 30 | with self.graph.as_default(): 31 | with tf.Session() as session: 32 | session.run(tf.global_variables_initializer()) 33 | self.saver.restore(session, last_checkpoint) 34 | 35 | signature = signature_def_utils.build_signature_def( 36 | inputs=self.inputs, 37 | outputs=self.outputs, 38 | method_name=signature_constants.PREDICT_METHOD_NAME) 39 | 40 | signature_map = {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: 41 | signature} 42 | 43 | model_builder = saved_model_builder.SavedModelBuilder(model_dir) 44 | model_builder.add_meta_graph_and_variables(session, 45 | tags=[tag_constants.SERVING], 46 | signature_def_map=signature_map, 47 | clear_devices=True) 48 | model_builder.save() 49 | 50 | def build_inputs_and_outputs(self): 51 | split_num = self.reader.present_split 52 | 53 | top_indices_output, top_predictions_output = ( 54 | self.build_prediction_graph(split_num)) 55 | 56 | inputs = {"example_bytes": 57 | saved_model_utils.build_tensor_info(tf.constant(split_num))} 58 | 59 | outputs = { 60 | "class_indexes": saved_model_utils.build_tensor_info(top_indices_output), 61 | "predictions": saved_model_utils.build_tensor_info(top_predictions_output)} 62 | 63 | return inputs, outputs 64 | 65 | def build_prediction_graph(self, split): 66 | files, labels = self.reader._read_filelist(split=split) 67 | 68 | files = ops.convert_to_tensor(files, dtypes.string) 69 | labels = ops.convert_to_tensor(labels, dtypes.int64) 70 | 71 | input_queue = tf.train.slice_input_producer( 72 | [files, labels], 73 | num_epochs = self.reader.num_epochs, 74 | shuffle = True) 75 | image, label = self.reader._read_samples(input_queue) 76 | image = tf.image.resize_images(image, (299, 299)) 77 | 78 | image_loader, label_loader = tf.train.shuffle_batch( 79 | [image, label], 80 | batch_size = self.reader.batch_size, 81 | capacity = 5 * self.reader.batch_size, 82 | min_after_dequeue = self.reader.batch_size) 83 | 84 | #with tf.variable_scope("tower"): 85 | result = self.model.create_model( 86 | image_loader, 87 | self.reader.num_classes, 88 | label_loader, 89 | is_training=False) 90 | 91 | for variable in slim.get_model_variables(): 92 | tf.summary.histogram(variable.op.name, variable) 93 | 94 | predictions = result["predictions"] 95 | 96 | top_predictions, top_indices = tf.nn.top_k(predictions, 97 | _TOP_PREDICTIONS_IN_OUTPUT) 98 | return top_indices, top_predictions 99 | -------------------------------------------------------------------------------- /utils/eval_util.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import numpy 3 | 4 | from tensorflow.python.platform import gfile 5 | 6 | def flatten(l): 7 | """ Merges a list of lists into a single list. """ 8 | return [item for sublist in l for item in sublist] 9 | 10 | def calculate_hit_at_one(predictions, actuals): 11 | """Performs a local (numpy) calculation of the hit at one. 12 | 13 | Args: 14 | predictions: Matrix containing the outputs of the model. 15 | Dimensions are 'batch' x 'num_classes'. 16 | actuals: Matrix containing the ground truth labels. 17 | Dimensions are 'batch'. 18 | 19 | Returns: 20 | float: The average hit at one across the entire batch. 21 | """ 22 | top_prediction = numpy.argmax(predictions, 1) 23 | hits = [1 if (x == y) else 0 for x,y in zip(actuals, top_prediction)] 24 | return numpy.average(hits) 25 | 26 | def calculate_hit_at_five(predictions, actuals): 27 | """Performs a local (numpy) calculation of the hit at one. 28 | 29 | Args: 30 | predictions: Matrix containing the outputs of the model. 31 | Dimensions are 'batch' x 'num_classes'. 32 | actuals: Matrix containing the ground truth labels. 33 | Dimensions are 'batch'. 34 | 35 | Returns: 36 | float: The average hit at five across the entire batch. 37 | """ 38 | top_five_prediction = numpy.stack([numpy.argsort(predictions)[x][-5:] for x in range(predictions.shape[0])], 0) 39 | hits = [1 if (x in y) else 0 for x,y in zip(actuals, top_five_prediction)] 40 | return numpy.average(hits) 41 | 42 | class EvaluationMetrics(object): 43 | """A class to store the evaluation metrics.""" 44 | 45 | def __init__(self, num_class, top_k): 46 | """Construct an EvaluationMetrics object to store the evaluation metrics. 47 | 48 | Args: 49 | num_class: A positive integer specifying the number of classes. 50 | top_k: A positive integer specifying how many predictions are considered per video. 51 | 52 | Raises: 53 | ValueError: An error occurred when MeanAveragePrecisionCalculator cannot 54 | not be constructed. 55 | """ 56 | self.sum_hit_at_one = 0.0 57 | self.sum_loss = 0.0 58 | self.top_k = top_k 59 | self.num_examples = 0 60 | 61 | def accumulate(self, predictions, labels, loss): 62 | """Accumulate the metrics calculated locally for this mini-batch. 63 | 64 | Args: 65 | predictions: A numpy matrix containing the outputs of the model. 66 | Dimensions are 'batch' x 'num_classes'. 67 | labels: A numpy matrix containing the ground truth labels. 68 | Dimensions are 'batch' x 'num_classes'. 69 | loss: A numpy array containing the loss for each sample. 70 | 71 | Returns: 72 | dictionary: A dictionary storing the metrics for the mini-batch. 73 | 74 | Raises: 75 | ValueError: An error occurred when the shape of predictions and actuals 76 | does not match. 77 | """ 78 | batch_size = labels.shape[0] 79 | mean_hit_at_one = calculate_hit_at_one(predictions, labels) 80 | mean_loss = numpy.mean(loss) 81 | 82 | self.num_examples += batch_size 83 | self.sum_hit_at_one += mean_hit_at_one * batch_size 84 | self.sum_loss += mean_loss * batch_size 85 | 86 | return {"hit_at_one": mean_hit_at_one, "loss": mean_loss} 87 | 88 | def get(self): 89 | """Calculate the evaluation metrics for the whole epoch. 90 | 91 | Raises: 92 | ValueError: If no examples were accumulated. 93 | 94 | Returns: 95 | dictionary: a dictionary storing the evaluation metrics for the epoch. The 96 | dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and 97 | aps (default nan). 98 | """ 99 | if self.num_examples <= 0: 100 | raise ValueError("total_sample must be positive.") 101 | avg_hit_at_one = self.sum_hit_at_one / self.num_examples 102 | avg_loss = self.sum_loss / self.num_examples 103 | 104 | epoch_info_dict = {} 105 | return {"avg_hit_at_one": avg_hit_at_one, "avg_loss": avg_loss} 106 | 107 | def clear(self): 108 | """Clear the evaluation metrics and reset the EvaluationMetrics object.""" 109 | self.sum_hit_at_one = 0.0 110 | self.sum_loss = 0.0 111 | self.num_examples = 0 112 | -------------------------------------------------------------------------------- /models/NTURGBD/nturgbd_rnn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from six.moves import xrange 7 | 8 | slim = tf.contrib.slim 9 | framework = tf.contrib.framework 10 | rnn = tf.contrib.rnn 11 | layers = tf.contrib.layers 12 | 13 | import bnlstm 14 | 15 | from tensorflow.python.ops import variable_scope as vs 16 | 17 | from tensorflow.contrib.rnn import RNNCell 18 | from tensorflow.python.ops.math_ops import sigmoid 19 | from tensorflow.python.ops.math_ops import tanh 20 | 21 | def get_state_variables(batch_size, cell): 22 | # For each layer, get the initial state and make a variable out of it 23 | # to enable updating its value. 24 | state_variables = [] 25 | for state_c, state_h in cell.zero_state(batch_size, tf.float32): 26 | state_variables.append(tf.contrib.rnn.LSTMStateTuple( 27 | tf.Variable(state_c, trainable=False), 28 | tf.Variable(state_h, trainable=False))) 29 | # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state 30 | return tuple(state_variables) 31 | 32 | 33 | def get_state_update_op(state_variables, new_states): 34 | # Add an operation to update the train states with the last state tensors 35 | update_ops = [] 36 | for state_variable, new_state in zip(state_variables, new_states): 37 | # Assign the new state to the state variables on this layer 38 | update_ops.extend([state_variable[0].assign(new_state[0]), 39 | state_variable[1].assign(new_state[1])]) 40 | # Return a tuple in order to combine all update_ops into a single operation. 41 | # The tuple's actual value should not be used. 42 | return tf.tuple(update_ops) 43 | 44 | def length(sequence): 45 | used = tf.sign(tf.reduce_max(tf.abs(sequence), 2)) 46 | length = tf.reduce_sum(used, 1) 47 | length = tf.cast(length, tf.int32) 48 | return length 49 | 50 | def cost(output, target): 51 | # Compute cross entropy for each frame. 52 | cross_entropy = target * tf.log(output) 53 | cross_entropy = -tf.reduce_sum(cross_entropy, 2) 54 | mask = tf.sign(tf.reduce_max(tf.abs(target), 2)) 55 | cross_entropy *= mask 56 | # Average over actual sequence lengths. 57 | cross_entropy = tf.reduce_sum(cross_entropy, 1) 58 | cross_entropy /= tf.reduce_sum(mask, 1) 59 | return tf.reduce_mean(cross_entropy) 60 | 61 | def last_relevant(output, length): 62 | batch_size = tf.shape(output)[0] 63 | max_length = tf.shape(output)[1] 64 | out_size = int(output.get_shape()[2]) 65 | index = tf.range(0, batch_size) * max_length + (length - 1) 66 | flat = tf.reshape(output, [-1, out_size]) 67 | relevant = tf.gather(flat, index) 68 | return relevant 69 | 70 | class SkeletonHRNNet(object): 71 | 72 | def __init__(self): 73 | 74 | self._num_layers_spatial = 3 75 | 76 | def create_model(self, inputs, num_classes, labels, is_training=True, **unused_params): 77 | outputs = {} 78 | 79 | is_training = tf.constant(is_training, dtype=tf.bool) 80 | with tf.variable_scope('spatial'): 81 | cells = [] 82 | num_hidden = [256, 256, 256] 83 | for i in range(self._num_layers_spatial): 84 | cell = bnlstm.BNLSTMCell(num_hidden[i], training=is_training) 85 | cell = rnn.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5) 86 | cells.append(cell) 87 | spatial = rnn.MultiRNNCell(cells) 88 | output, new_states = tf.nn.dynamic_rnn(spatial, inputs, 89 | dtype=tf.float32, sequence_length=length(inputs)) 90 | 91 | last = last_relevant(output, length(output)) 92 | fc4 = layers.fully_connected(last, 128, activation_fn=tf.nn.relu) 93 | fc5 = layers.fully_connected(fc4, 64, activation_fn=tf.nn.relu) 94 | logits = layers.fully_connected(fc5, num_classes, activation_fn=None) 95 | 96 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 97 | labels=labels, logits=logits, name='xentropy') 98 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) 99 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') 100 | 101 | outputs['predictions'] = logits 102 | outputs['loss'] = loss 103 | outputs['reg_loss'] = l2_loss 104 | return outputs 105 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import tensorflow as tf 3 | from tensorflow import logging 4 | 5 | def MakeSummary(name, value): 6 | """Creates a tf.Summary proto with the given name and value.""" 7 | summary = tf.Summary() 8 | val = summary.value.add() 9 | val.tag = str(name) 10 | val.simple_value = float(value) 11 | return summary 12 | 13 | 14 | def AddGlobalStepSummary(summary_writer, 15 | global_step_val, 16 | global_step_info_dict, 17 | summary_scope="Eval"): 18 | """Add the global_step summary to the Tensorboard. 19 | 20 | Args: 21 | summary_writer: Tensorflow summary_writer. 22 | global_step_val: a int value of the global step. 23 | global_step_info_dict: a dictionary of the evaluation metrics calculated for 24 | a mini-batch. 25 | summary_scope: Train or Eval. 26 | 27 | Returns: 28 | A string of this global_step summary 29 | """ 30 | this_hit_at_one = global_step_info_dict["hit_at_one"] 31 | this_loss = global_step_info_dict["loss"] 32 | examples_per_second = global_step_info_dict.get("examples_per_second", -1) 33 | 34 | summary_writer.add_summary( 35 | MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one), 36 | global_step_val) 37 | summary_writer.add_summary( 38 | MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss), 39 | global_step_val) 40 | 41 | if examples_per_second != -1: 42 | summary_writer.add_summary( 43 | MakeSummary("GlobalStep/" + summary_scope + "_Example_Second", 44 | examples_per_second), global_step_val) 45 | 46 | summary_writer.flush() 47 | info = ("global_step {0} | Batch Hit@1: {1:.3f} | Batch Loss: {2:.3f} " 48 | "| Examples_per_sec: {3:.3f}").format( 49 | global_step_val, this_hit_at_one, this_loss, 50 | examples_per_second) 51 | return info 52 | 53 | 54 | def AddEpochSummary(summary_writer, 55 | global_step_val, 56 | epoch_info_dict, 57 | summary_scope="Eval"): 58 | """Add the epoch summary to the Tensorboard. 59 | 60 | Args: 61 | summary_writer: Tensorflow summary_writer. 62 | global_step_val: a int value of the global step. 63 | epoch_info_dict: a dictionary of the evaluation metrics calculated for the 64 | whole epoch. 65 | summary_scope: Train or Eval. 66 | 67 | Returns: 68 | A string of this global_step summary 69 | """ 70 | epoch_id = epoch_info_dict["epoch_id"] 71 | avg_hit_at_one = epoch_info_dict["avg_hit_at_one"] 72 | avg_loss = epoch_info_dict["avg_loss"] 73 | 74 | summary_writer.add_summary( 75 | MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one), 76 | global_step_val) 77 | summary_writer.add_summary( 78 | MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss), 79 | global_step_val) 80 | summary_writer.flush() 81 | 82 | info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_Loss: {2:3f}").format( 83 | epoch_id, avg_hit_at_one, avg_loss) 84 | return info 85 | 86 | def clip_gradient_norms(gradients_to_variables, max_norm): 87 | """Clips the gradients by the given value. 88 | 89 | Args: 90 | gradients_to_variables: A list of gradient to variable pairs (tuples). 91 | max_norm: the maximum norm value. 92 | 93 | Returns: 94 | A list of clipped gradient to variable pairs. 95 | """ 96 | clipped_grads_and_vars = [] 97 | for grad, var in gradients_to_variables: 98 | if grad is not None: 99 | if isinstance(grad, tf.IndexedSlices): 100 | tmp = tf.clip_by_norm(grad.values, max_norm) 101 | grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) 102 | else: 103 | grad = tf.clip_by_norm(grad, max_norm) 104 | clipped_grads_and_vars.append((grad, var)) 105 | return clipped_grads_and_vars 106 | 107 | def combine_gradients(tower_grads): 108 | """Calculate the combined gradient for each shared variable across all towers. 109 | 110 | Note that this function provides a synchronization point across all towers. 111 | 112 | Args: 113 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 114 | is over individual gradients. The inner list is over the gradient 115 | calculation for each tower. 116 | Returns: 117 | List of pairs of (gradient, variable) where the gradient has been summed 118 | across all towers. 119 | """ 120 | filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads] 121 | final_grads = [] 122 | for i in xrange(len(filtered_grads[0])): 123 | grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))] 124 | grad = tf.stack([x[0] for x in grads], 0) 125 | grad = tf.reduce_sum(grad, 0) 126 | final_grads.append((grad, filtered_grads[0][i][1],)) 127 | 128 | return final_grads 129 | -------------------------------------------------------------------------------- /models/NTURGBD/bnlstm.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.contrib.rnn import RNNCell 5 | 6 | class LSTMCell(RNNCell): 7 | '''Vanilla LSTM implemented with same initializations as BN-LSTM''' 8 | def __init__(self, num_units): 9 | self.num_units = num_units 10 | 11 | @property 12 | def state_size(self): 13 | return (self.num_units, self.num_units) 14 | 15 | @property 16 | def output_size(self): 17 | return self.num_units 18 | 19 | def __call__(self, x, state, scope=None): 20 | with tf.variable_scope(scope or type(self).__name__): 21 | c, h = state 22 | 23 | # Keep W_xh and W_hh separate here as well to reuse initialization methods 24 | x_size = x.get_shape().as_list()[1] 25 | W_xh = tf.get_variable('W_xh', 26 | [x_size, 4 * self.num_units], 27 | initializer=orthogonal_initializer()) 28 | W_hh = tf.get_variable('W_hh', 29 | [self.num_units, 4 * self.num_units], 30 | initializer=bn_lstm_identity_initializer(0.95)) 31 | bias = tf.get_variable('bias', [4 * self.num_units]) 32 | 33 | # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias 34 | # improve speed by concat. 35 | concat = tf.concat(1, [x, h]) 36 | W_both = tf.concat(0, [W_xh, W_hh]) 37 | hidden = tf.matmul(concat, W_both) + bias 38 | 39 | i, j, f, o = tf.split(1, 4, hidden) 40 | 41 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 42 | new_h = tf.tanh(new_c) * tf.sigmoid(o) 43 | 44 | return new_h, (new_c, new_h) 45 | 46 | class BNLSTMCell(RNNCell): 47 | '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025''' 48 | def __init__(self, num_units, training): 49 | self.num_units = num_units 50 | self.training = training 51 | 52 | @property 53 | def state_size(self): 54 | return (self.num_units, self.num_units) 55 | 56 | @property 57 | def output_size(self): 58 | return self.num_units 59 | 60 | def __call__(self, x, state, scope=None): 61 | with tf.variable_scope(scope or type(self).__name__): 62 | c, h = state 63 | 64 | x_size = x.get_shape().as_list()[1] 65 | W_xh = tf.get_variable('W_xh', 66 | [x_size, 4 * self.num_units], 67 | initializer=orthogonal_initializer()) 68 | W_hh = tf.get_variable('W_hh', 69 | [self.num_units, 4 * self.num_units], 70 | initializer=bn_lstm_identity_initializer(0.95)) 71 | bias = tf.get_variable('bias', [4 * self.num_units]) 72 | 73 | xh = tf.matmul(x, W_xh) 74 | hh = tf.matmul(h, W_hh) 75 | 76 | bn_xh = batch_norm(xh, 'xh', self.training) 77 | bn_hh = batch_norm(hh, 'hh', self.training) 78 | 79 | hidden = bn_xh + bn_hh + bias 80 | 81 | i, j, f, o = tf.split(hidden, 4, 1) 82 | 83 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 84 | bn_new_c = batch_norm(new_c, 'c', self.training) 85 | 86 | new_h = tf.tanh(bn_new_c) * tf.sigmoid(o) 87 | 88 | return new_h, (new_c, new_h) 89 | 90 | def orthogonal(shape): 91 | flat_shape = (shape[0], np.prod(shape[1:])) 92 | a = np.random.normal(0.0, 1.0, flat_shape) 93 | u, _, v = np.linalg.svd(a, full_matrices=False) 94 | q = u if u.shape == flat_shape else v 95 | return q.reshape(shape) 96 | 97 | def bn_lstm_identity_initializer(scale): 98 | def _initializer(shape, dtype=tf.float32, partition_info=None): 99 | '''Ugly cause LSTM params calculated in one matrix multiply''' 100 | size = shape[0] 101 | # gate (j) is identity 102 | t = np.zeros(shape) 103 | t[:, size:size * 2] = np.identity(size) * scale 104 | t[:, :size] = orthogonal([size, size]) 105 | t[:, size * 2:size * 3] = orthogonal([size, size]) 106 | t[:, size * 3:] = orthogonal([size, size]) 107 | return tf.constant(t, dtype) 108 | 109 | return _initializer 110 | 111 | def orthogonal_initializer(): 112 | def _initializer(shape, dtype=tf.float32, partition_info=None): 113 | return tf.constant(orthogonal(shape), dtype) 114 | return _initializer 115 | 116 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999): 117 | '''Assume 2d [batch, values] tensor''' 118 | 119 | with tf.variable_scope(name_scope): 120 | size = x.get_shape().as_list()[1] 121 | 122 | scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1)) 123 | offset = tf.get_variable('offset', [size]) 124 | 125 | pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False) 126 | pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False) 127 | batch_mean, batch_var = tf.nn.moments(x, [0]) 128 | 129 | train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) 130 | train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) 131 | 132 | def batch_statistics(): 133 | with tf.control_dependencies([train_mean_op, train_var_op]): 134 | return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) 135 | 136 | def population_statistics(): 137 | return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) 138 | 139 | return tf.cond(training, batch_statistics, population_statistics) 140 | -------------------------------------------------------------------------------- /models/HybridModel/bnlstm.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.contrib.rnn import RNNCell 5 | 6 | class LSTMCell(RNNCell): 7 | '''Vanilla LSTM implemented with same initializations as BN-LSTM''' 8 | def __init__(self, num_units): 9 | self.num_units = num_units 10 | 11 | @property 12 | def state_size(self): 13 | return (self.num_units, self.num_units) 14 | 15 | @property 16 | def output_size(self): 17 | return self.num_units 18 | 19 | def __call__(self, x, state, scope=None): 20 | with tf.variable_scope(scope or type(self).__name__): 21 | c, h = state 22 | 23 | # Keep W_xh and W_hh separate here as well to reuse initialization methods 24 | x_size = x.get_shape().as_list()[1] 25 | W_xh = tf.get_variable('W_xh', 26 | [x_size, 4 * self.num_units], 27 | initializer=orthogonal_initializer()) 28 | W_hh = tf.get_variable('W_hh', 29 | [self.num_units, 4 * self.num_units], 30 | initializer=bn_lstm_identity_initializer(0.95)) 31 | bias = tf.get_variable('bias', [4 * self.num_units]) 32 | 33 | # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias 34 | # improve speed by concat. 35 | concat = tf.concat(1, [x, h]) 36 | W_both = tf.concat(0, [W_xh, W_hh]) 37 | hidden = tf.matmul(concat, W_both) + bias 38 | 39 | i, j, f, o = tf.split(1, 4, hidden) 40 | 41 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 42 | new_h = tf.tanh(new_c) * tf.sigmoid(o) 43 | 44 | return new_h, (new_c, new_h) 45 | 46 | class BNLSTMCell(RNNCell): 47 | '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025''' 48 | def __init__(self, num_units, training): 49 | self.num_units = num_units 50 | self.training = training 51 | 52 | @property 53 | def state_size(self): 54 | return (self.num_units, self.num_units) 55 | 56 | @property 57 | def output_size(self): 58 | return self.num_units 59 | 60 | def __call__(self, x, state, scope=None): 61 | with tf.variable_scope(scope or type(self).__name__): 62 | c, h = state 63 | 64 | x_size = x.get_shape().as_list()[1] 65 | W_xh = tf.get_variable('W_xh', 66 | [x_size, 4 * self.num_units], 67 | initializer=orthogonal_initializer()) 68 | W_hh = tf.get_variable('W_hh', 69 | [self.num_units, 4 * self.num_units], 70 | initializer=bn_lstm_identity_initializer(0.95)) 71 | bias = tf.get_variable('bias', [4 * self.num_units]) 72 | 73 | xh = tf.matmul(x, W_xh) 74 | hh = tf.matmul(h, W_hh) 75 | 76 | bn_xh = batch_norm(xh, 'xh', self.training) 77 | bn_hh = batch_norm(hh, 'hh', self.training) 78 | 79 | hidden = bn_xh + bn_hh + bias 80 | 81 | i, j, f, o = tf.split(hidden, 4, 1) 82 | 83 | new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) 84 | bn_new_c = batch_norm(new_c, 'c', self.training) 85 | 86 | new_h = tf.tanh(bn_new_c) * tf.sigmoid(o) 87 | 88 | return new_h, (new_c, new_h) 89 | 90 | def orthogonal(shape): 91 | flat_shape = (shape[0], np.prod(shape[1:])) 92 | a = np.random.normal(0.0, 1.0, flat_shape) 93 | u, _, v = np.linalg.svd(a, full_matrices=False) 94 | q = u if u.shape == flat_shape else v 95 | return q.reshape(shape) 96 | 97 | def bn_lstm_identity_initializer(scale): 98 | def _initializer(shape, dtype=tf.float32, partition_info=None): 99 | '''Ugly cause LSTM params calculated in one matrix multiply''' 100 | size = shape[0] 101 | # gate (j) is identity 102 | t = np.zeros(shape) 103 | t[:, size:size * 2] = np.identity(size) * scale 104 | t[:, :size] = orthogonal([size, size]) 105 | t[:, size * 2:size * 3] = orthogonal([size, size]) 106 | t[:, size * 3:] = orthogonal([size, size]) 107 | return tf.constant(t, dtype) 108 | 109 | return _initializer 110 | 111 | def orthogonal_initializer(): 112 | def _initializer(shape, dtype=tf.float32, partition_info=None): 113 | return tf.constant(orthogonal(shape), dtype) 114 | return _initializer 115 | 116 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999): 117 | '''Assume 2d [batch, values] tensor''' 118 | 119 | with tf.variable_scope(name_scope): 120 | size = x.get_shape().as_list()[1] 121 | 122 | scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1)) 123 | offset = tf.get_variable('offset', [size]) 124 | 125 | pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False) 126 | pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False) 127 | batch_mean, batch_var = tf.nn.moments(x, [0]) 128 | 129 | train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay)) 130 | train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay)) 131 | 132 | def batch_statistics(): 133 | with tf.control_dependencies([train_mean_op, train_var_op]): 134 | return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon) 135 | 136 | def population_statistics(): 137 | return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon) 138 | 139 | return tf.cond(training, batch_statistics, population_statistics) 140 | -------------------------------------------------------------------------------- /models/HybridModel/vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains model definitions for versions of the Oxford VGG network. 16 | 17 | These model definitions were introduced in the following technical report: 18 | 19 | Very Deep Convolutional Networks For Large-Scale Image Recognition 20 | Karen Simonyan and Andrew Zisserman 21 | arXiv technical report, 2015 22 | PDF: http://arxiv.org/pdf/1409.1556.pdf 23 | ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf 24 | CC-BY-4.0 25 | 26 | More information can be obtained from the VGG website: 27 | www.robots.ox.ac.uk/~vgg/research/very_deep/ 28 | 29 | Usage: 30 | with slim.arg_scope(vgg.vgg_arg_scope()): 31 | outputs, end_points = vgg.vgg_a(inputs) 32 | 33 | with slim.arg_scope(vgg.vgg_arg_scope()): 34 | outputs, end_points = vgg.vgg_16(inputs) 35 | 36 | @@vgg_a 37 | @@vgg_16 38 | @@vgg_19 39 | """ 40 | 41 | from __future__ import absolute_import 42 | from __future__ import division 43 | from __future__ import print_function 44 | 45 | from tensorflow.contrib import layers 46 | from tensorflow.contrib.framework.python.ops import arg_scope 47 | from tensorflow.contrib.layers.python.layers import layers as layers_lib 48 | from tensorflow.contrib.layers.python.layers import regularizers 49 | from tensorflow.contrib.layers.python.layers import utils 50 | from tensorflow.python.ops import array_ops 51 | from tensorflow.python.ops import init_ops 52 | from tensorflow.python.ops import nn_ops 53 | from tensorflow.python.ops import variable_scope 54 | 55 | 56 | def vgg_arg_scope(weight_decay=0.0005): 57 | """Defines the VGG arg scope. 58 | 59 | Args: 60 | weight_decay: The l2 regularization coefficient. 61 | 62 | Returns: 63 | An arg_scope. 64 | """ 65 | with arg_scope( 66 | [layers.conv2d, layers_lib.fully_connected], 67 | activation_fn=nn_ops.relu, 68 | weights_regularizer=regularizers.l2_regularizer(weight_decay), 69 | biases_initializer=init_ops.zeros_initializer()): 70 | with arg_scope([layers.conv2d], padding='SAME') as arg_sc: 71 | return arg_sc 72 | 73 | 74 | def vgg_a(inputs, 75 | num_classes=1000, 76 | is_training=True, 77 | dropout_keep_prob=0.5, 78 | spatial_squeeze=True, 79 | scope='vgg_a'): 80 | """Oxford Net VGG 11-Layers version A Example. 81 | 82 | Note: All the fully_connected layers have been transformed to conv2d layers. 83 | To use in classification mode, resize input to 224x224. 84 | 85 | Args: 86 | inputs: a tensor of size [batch_size, height, width, channels]. 87 | num_classes: number of predicted classes. 88 | is_training: whether or not the model is being trained. 89 | dropout_keep_prob: the probability that activations are kept in the dropout 90 | layers during training. 91 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 92 | outputs. Useful to remove unnecessary dimensions for classification. 93 | scope: Optional scope for the variables. 94 | 95 | Returns: 96 | the last op containing the log predictions and end_points dict. 97 | """ 98 | with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: 99 | end_points_collection = sc.original_name_scope + '_end_points' 100 | # Collect outputs for conv2d, fully_connected and max_pool2d. 101 | with arg_scope( 102 | [layers.conv2d, layers_lib.max_pool2d], 103 | outputs_collections=end_points_collection): 104 | net = layers_lib.repeat( 105 | inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') 106 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 107 | net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') 108 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 109 | net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') 110 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 111 | net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') 112 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 113 | net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') 114 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 115 | # Use conv2d instead of fully_connected layers. 116 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 117 | net = layers_lib.dropout( 118 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 119 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 120 | net = layers_lib.dropout( 121 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 122 | net = layers.conv2d( 123 | net, 124 | num_classes, [1, 1], 125 | activation_fn=None, 126 | normalizer_fn=None, 127 | scope='fc8') 128 | # Convert end_points_collection into a end_point dict. 129 | end_points = utils.convert_collection_to_dict(end_points_collection) 130 | if spatial_squeeze: 131 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 132 | end_points[sc.name + '/fc8'] = net 133 | return net, end_points 134 | 135 | 136 | vgg_a.default_image_size = 224 137 | 138 | 139 | def vgg_16(inputs, 140 | num_classes=1000, 141 | is_training=True, 142 | dropout_keep_prob=0.5, 143 | spatial_squeeze=True, 144 | scope='vgg_16'): 145 | """Oxford Net VGG 16-Layers version D Example. 146 | 147 | Note: All the fully_connected layers have been transformed to conv2d layers. 148 | To use in classification mode, resize input to 224x224. 149 | 150 | Args: 151 | inputs: a tensor of size [batch_size, height, width, channels]. 152 | num_classes: number of predicted classes. 153 | is_training: whether or not the model is being trained. 154 | dropout_keep_prob: the probability that activations are kept in the dropout 155 | layers during training. 156 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 157 | outputs. Useful to remove unnecessary dimensions for classification. 158 | scope: Optional scope for the variables. 159 | 160 | Returns: 161 | the last op containing the log predictions and end_points dict. 162 | """ 163 | with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: 164 | end_points_collection = sc.original_name_scope + '_end_points' 165 | # Collect outputs for conv2d, fully_connected and max_pool2d. 166 | with arg_scope( 167 | [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 168 | outputs_collections=end_points_collection): 169 | net = layers_lib.repeat( 170 | inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') 171 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 172 | net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') 173 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 174 | net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') 175 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 176 | net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') 177 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 178 | net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') 179 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 180 | # Use conv2d instead of fully_connected layers. 181 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 182 | net = layers_lib.dropout( 183 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 184 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 185 | net = layers_lib.dropout( 186 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 187 | net = layers.conv2d( 188 | net, 189 | num_classes, [1, 1], 190 | activation_fn=None, 191 | normalizer_fn=None, 192 | scope='fc8') 193 | # Convert end_points_collection into a end_point dict. 194 | end_points = utils.convert_collection_to_dict(end_points_collection) 195 | if spatial_squeeze: 196 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 197 | end_points[sc.name + '/fc8'] = net 198 | return net, end_points 199 | 200 | 201 | vgg_16.default_image_size = 224 202 | 203 | 204 | def vgg_19(inputs, 205 | num_classes=1000, 206 | is_training=True, 207 | dropout_keep_prob=0.5, 208 | spatial_squeeze=True, 209 | scope='vgg_19'): 210 | """Oxford Net VGG 19-Layers version E Example. 211 | 212 | Note: All the fully_connected layers have been transformed to conv2d layers. 213 | To use in classification mode, resize input to 224x224. 214 | 215 | Args: 216 | inputs: a tensor of size [batch_size, height, width, channels]. 217 | num_classes: number of predicted classes. 218 | is_training: whether or not the model is being trained. 219 | dropout_keep_prob: the probability that activations are kept in the dropout 220 | layers during training. 221 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 222 | outputs. Useful to remove unnecessary dimensions for classification. 223 | scope: Optional scope for the variables. 224 | 225 | Returns: 226 | the last op containing the log predictions and end_points dict. 227 | """ 228 | with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: 229 | end_points_collection = sc.name + '_end_points' 230 | # Collect outputs for conv2d, fully_connected and max_pool2d. 231 | with arg_scope( 232 | [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 233 | outputs_collections=end_points_collection): 234 | net = layers_lib.repeat( 235 | inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') 236 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 237 | net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') 238 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 239 | net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') 240 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 241 | net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') 242 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 243 | net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') 244 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 245 | # Use conv2d instead of fully_connected layers. 246 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 247 | net = layers_lib.dropout( 248 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 249 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 250 | net = layers_lib.dropout( 251 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 252 | net = layers.conv2d( 253 | net, 254 | num_classes, [1, 1], 255 | activation_fn=None, 256 | normalizer_fn=None, 257 | scope='fc8') 258 | # Convert end_points_collection into a end_point dict. 259 | end_points = utils.convert_collection_to_dict(end_points_collection) 260 | if spatial_squeeze: 261 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 262 | end_points[sc.name + '/fc8'] = net 263 | return net, end_points 264 | 265 | 266 | vgg_19.default_image_size = 224 267 | 268 | # Alias 269 | vgg_d = vgg_16 270 | vgg_e = vgg_19 271 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | import tensorflow as tf 5 | from tensorflow import app 6 | from tensorflow import flags 7 | from tensorflow import gfile 8 | from tensorflow import logging 9 | from tensorflow.python.framework import ops 10 | from tensorflow.python.framework import dtypes 11 | from utils import * 12 | 13 | import data, models 14 | 15 | FLAGS = flags.FLAGS 16 | 17 | def find_class_by_name(name, modules): 18 | """Searches the provided modules for the named class and returns it.""" 19 | modules = [getattr(module, name, None) for module in modules] 20 | return next(a for a in modules if a) 21 | 22 | 23 | def get_input_evaluation_tensors(reader, 24 | batch_size=1024, 25 | num_readers=1): 26 | """Creates the section of the graph which reads the evaluation data. 27 | 28 | Args: 29 | reader: A class which parses the training data. 30 | data_pattern: A 'glob' style path to the data files. 31 | batch_size: How many examples to process at a time. 32 | num_readers: How many I/O threads to use. 33 | 34 | Returns: 35 | A tuple containing the features tensor, labels tensor, and optionally a 36 | tensor containing the number of frames per video. The exact dimensions 37 | depend on the reader being used. 38 | 39 | Raises: 40 | IOError: If no files matching the given pattern were found. 41 | """ 42 | logging.info("Using batch size of " + str(batch_size) + " for evaluation.") 43 | with tf.name_scope("eval_input"): 44 | files, labels = reader._read_filelist(split=reader.present_split, train=False) 45 | if not files: 46 | raise IOError("Unable to find the evaluation files.") 47 | logging.info("number of evaluation files: " + str(len(files))) 48 | files = ops.convert_to_tensor(files, dtypes.string) 49 | labels = ops.convert_to_tensor(labels, dtypes.int64) 50 | 51 | input_queue = tf.train.slice_input_producer( 52 | [files, labels], 53 | num_epochs = 1, 54 | shuffle = False) 55 | image, label = reader._read_samples(input_queue) 56 | 57 | test_image_loader, test_label_loader = tf.train.batch( 58 | [image, label], 59 | batch_size = batch_size, 60 | capacity = 5 * batch_size) 61 | return test_image_loader, test_label_loader 62 | 63 | def build_graph(reader, 64 | model, 65 | label_loss_fn, 66 | batch_size=1024, 67 | num_readers=1): 68 | """Creates the Tensorflow graph for evaluation. 69 | 70 | Args: 71 | reader: The data file reader. It should inherit from BaseReader. 72 | model: The core model (e.g. logistic or neural net). It should inherit 73 | from BaseModel. 74 | eval_data_pattern: glob path to the evaluation data files. 75 | label_loss_fn: What kind of loss to apply to the model. It should inherit 76 | from BaseLoss. 77 | batch_size: How many examples to process at a time. 78 | num_readers: How many threads to use for I/O operations. 79 | """ 80 | global_step = tf.Variable(0, trainable=False, name="global_step") 81 | images_loader, labels_loader = get_input_evaluation_tensors( # pylint: disable=g-line-too-long 82 | reader, 83 | batch_size=batch_size, 84 | num_readers=num_readers) 85 | images_batch = tf.placeholder(tf.float32, (None, 224, 224, 3)) 86 | labels_batch = tf.placeholder(tf.int64, (None,)) 87 | # (224, 224, 3) -> (14, 14, 512) 88 | feature_0, restore_vars_0, train_v0 = model.create_feature_model( 89 | images_batch, scope="rgb", is_training=False) 90 | # (224, 224, 3) -> (14, 14, 512) 91 | feature_1, restore_vars_1, train_v1 = model.create_feature_model( 92 | images_batch, scope="rgbdiff", is_training=False) 93 | # (14, 14, 512) -> (7168,) 94 | aux_feat_batch = tf.placeholder(tf.float32, (None, 14, 14, 512)) 95 | aux_output, train_v2 = model.create_aux_model( 96 | aux_feat_batch) 97 | # (21504,) -> (60,) 98 | aux_fc_batch_0 = tf.placeholder(tf.float32, (None, 21504)) 99 | logits_aux_0, train_v3 = model.create_logits_model( 100 | aux_fc_batch_0, 60, is_training=False, scope="auxlogs", reuse=None) 101 | # (21504,) -> (60,) 102 | aux_fc_batch_1 = tf.placeholder(tf.float32, (None, 21504)) 103 | logits_aux_1, train_v4 = model.create_logits_model( 104 | aux_fc_batch_1, 60, is_training=False, scope="auxlogs", reuse=True) 105 | # (21504,) -> (60,) 106 | aux_fc_batch_2 = tf.placeholder(tf.float32, (None, 21504)) 107 | logits_aux_2, train_v5 = model.create_logits_model( 108 | aux_fc_batch_2, 60, is_training=False, scope="auxlogs", reuse=True) 109 | # (21504,) -> (60,) 110 | aux_fc_batch_3 = tf.placeholder(tf.float32, (None, 21504)) 111 | logits_aux_3, train_v6 = model.create_logits_model( 112 | aux_fc_batch_3, 60, is_training=False, scope="auxlogs", reuse=True) 113 | 114 | loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_0, labels=labels_batch)) 115 | loss_1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_1, labels=labels_batch)) 116 | loss_2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_2, labels=labels_batch)) 117 | loss_3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_3, labels=labels_batch)) 118 | 119 | loss = loss_0 + loss_1 + loss_2 + loss_3 120 | predictions = ( logits_aux_0 + logits_aux_1 + logits_aux_2 + logits_aux_3 ) 121 | 122 | tf.add_to_collection("global_step", global_step) 123 | tf.add_to_collection("loss", loss) 124 | tf.add_to_collection("feature_0", feature_0) 125 | tf.add_to_collection("feature_1", feature_1) 126 | tf.add_to_collection("aux_feat_batch", aux_feat_batch) 127 | tf.add_to_collection("aux_output", aux_output) 128 | tf.add_to_collection("aux_fc_batch_0", aux_fc_batch_0) 129 | tf.add_to_collection("logits_aux_0", logits_aux_0) 130 | tf.add_to_collection("aux_fc_batch_1", aux_fc_batch_1) 131 | tf.add_to_collection("logits_aux_1", logits_aux_1) 132 | tf.add_to_collection("aux_fc_batch_2", aux_fc_batch_2) 133 | tf.add_to_collection("logits_aux_2", logits_aux_2) 134 | tf.add_to_collection("aux_fc_batch_3", aux_fc_batch_3) 135 | tf.add_to_collection("logits_aux_3", logits_aux_3) 136 | tf.add_to_collection("input_batch", images_batch) 137 | tf.add_to_collection("labels", labels_batch) 138 | tf.add_to_collection("predictions", predictions) 139 | tf.add_to_collection("images_loader", images_loader) 140 | tf.add_to_collection("labels_loader", labels_loader) 141 | 142 | return restore_vars_0.extend(restore_vars_1) 143 | 144 | def evaluation_loop(predictions, labels, loss, 145 | inputs, aux_feat_batch, aux_output, aux_fc_batch_0, logits_aux_0, 146 | aux_fc_batch_1, logits_aux_1, aux_fc_batch_2, logits_aux_2, 147 | aux_fc_batch_3, logits_aux_3, inputs_loader, feature_0, feature_1, 148 | labels_loader, saver, summary_writer, train_dir, evl_metrics, last_global_step_val): 149 | 150 | """Run the evaluation loop once. 151 | 152 | Args: 153 | video_id_batch: a tensor of video ids mini-batch. 154 | prediction_batch: a tensor of predictions mini-batch. 155 | label_batch: a tensor of label_batch mini-batch. 156 | loss: a tensor of loss for the examples in the mini-batch. 157 | summary_op: a tensor which runs the tensorboard summary operations. 158 | saver: a tensorflow saver to restore the model. 159 | summary_writer: a tensorflow summary_writer 160 | evl_metrics: an EvaluationMetrics object. 161 | last_global_step_val: the global step used in the previous evaluation. 162 | 163 | Returns: 164 | The global_step used in the latest model. 165 | """ 166 | 167 | global_step_val = -1 168 | with tf.Session() as sess: 169 | latest_checkpoint = tf.train.latest_checkpoint(train_dir) 170 | if latest_checkpoint: 171 | logging.info("Loading checkpoint for eval: " + latest_checkpoint) 172 | # Restores from checkpoint 173 | saver.restore(sess, latest_checkpoint) 174 | 175 | # Assuming model_checkpoint_path looks something like: 176 | # /my-favorite-path/train_dir/model.ckpt-0, extract global_step from it. 177 | global_step_val = latest_checkpoint.split("/")[-1].split("-")[-1] 178 | else: 179 | logging.info("No checkpoint file found.") 180 | return global_step_val 181 | 182 | if global_step_val == last_global_step_val: 183 | logging.info("skip this checkpoint global_step_val=%s " 184 | "(same as the previous one).", global_step_val) 185 | return global_step_val 186 | 187 | sess.run([tf.local_variables_initializer()]) 188 | 189 | # Start the queue runners. 190 | fetches = [predictions, labels, loss] 191 | coord = tf.train.Coordinator() 192 | try: 193 | threads = [] 194 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 195 | threads.extend(qr.create_threads( 196 | sess, coord=coord, daemon=True, 197 | start=True)) 198 | logging.info("enter eval_once loop global_step_val = %s. ", 199 | global_step_val) 200 | 201 | evl_metrics.clear() 202 | 203 | examples_processed = 0 204 | while not coord.should_stop(): 205 | batch_start_time = time.time() 206 | 207 | input_batch, label_batch = sess.run([inputs_loader, labels_loader]) 208 | 209 | # (batch_size, 12, 224, 224, 3) 210 | input_batch = np.transpose(input_batch, [1, 0, 2, 3, 4]) 211 | 212 | # list of (batch_size, 224, 224, 3) of size 6 213 | tw_inputs = np.split(input_batch, 12) 214 | tw_inputs = [np.reshape(x, [-1, 224, 224, 3]) for x in tw_inputs] 215 | s0_inputs = tw_inputs[:6] 216 | s1_inputs = tw_inputs[6:] 217 | 218 | # [(224, 224, 3), ..] -> [(14, 14, 512), ..] 219 | features_0 = [] 220 | for inp in s0_inputs: 221 | feat_vec = sess.run(feature_0, feed_dict={inputs: inp}) 222 | features_0.append(feat_vec) 223 | 224 | # [(224, 224, 3), ..] -> [(14, 14, 512), ..] 225 | features_1 = [] 226 | for inp in s1_inputs: 227 | feat_vec = sess.run(feature_1, feed_dict={inputs: inp}) 228 | features_1.append(feat_vec) 229 | 230 | # [(14, 14, 512), ..] -> [(7168,), ..] 231 | feats_for_aux = [] 232 | for feat in features_0: 233 | out = sess.run(aux_output, feed_dict={aux_feat_batch: feat}) 234 | feats_for_aux.append(out) 235 | 236 | # [(7168,), ..] -> [(21504,), (21504,)] (RGB stream) 237 | aux_fcs_0 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2], 238 | feats_for_aux[i+4]], axis=1) for i in range(2)] 239 | 240 | # [(14, 14, 512), ..] -> [(7168,), ..] 241 | feats_for_aux = [] 242 | for feat in features_1: 243 | out = sess.run(aux_output, feed_dict={aux_feat_batch: feat}) 244 | feats_for_aux.append(out) 245 | 246 | # [(7168,), ..] -> [(21504,), (21504,)] (RGB difference stream) 247 | aux_fcs_1 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2], 248 | feats_for_aux[i+4]], axis=1) for i in range(2)] 249 | 250 | predictions_val, labels_val, loss_val = sess.run( 251 | fetches, feed_dict={labels: label_batch, 252 | aux_fc_batch_0: aux_fcs_0[0], aux_fc_batch_1: aux_fcs_0[1], 253 | aux_fc_batch_2: aux_fcs_1[0], aux_fc_batch_3: aux_fcs_1[1]}) 254 | seconds_per_batch = time.time() - batch_start_time 255 | example_per_second = labels_val.shape[0] / seconds_per_batch 256 | examples_processed += labels_val.shape[0] 257 | 258 | iteration_info_dict = evl_metrics.accumulate(predictions_val, 259 | labels_val, loss_val) 260 | iteration_info_dict["examples_per_second"] = example_per_second 261 | 262 | iterinfo = utils.AddGlobalStepSummary( 263 | summary_writer, 264 | global_step_val, 265 | iteration_info_dict, 266 | summary_scope="Eval") 267 | logging.info("examples_processed: %d | %s", examples_processed, 268 | iterinfo) 269 | 270 | except tf.errors.OutOfRangeError as e: 271 | logging.info( 272 | "Done with batched inference. Now calculating global performance " 273 | "metrics.") 274 | # calculate the metrics for the entire epoch 275 | epoch_info_dict = evl_metrics.get() 276 | epoch_info_dict["epoch_id"] = global_step_val 277 | 278 | #summary_writer.add_summary(summary_val, global_step_val) 279 | epochinfo = utils.AddEpochSummary( 280 | summary_writer, 281 | global_step_val, 282 | epoch_info_dict, 283 | summary_scope="Eval") 284 | logging.info(epochinfo) 285 | evl_metrics.clear() 286 | except Exception as e: # pylint: disable=broad-except 287 | logging.info("Unexpected exception: " + str(e)) 288 | coord.request_stop(e) 289 | 290 | coord.request_stop() 291 | coord.join(threads, stop_grace_period_secs=10) 292 | 293 | return global_step_val, epoch_info_dict['avg_hit_at_one'] 294 | 295 | 296 | def evaluate(dataset, 297 | model, 298 | train_dir, 299 | dataset_dir, 300 | splits_dir, 301 | num_epochs, 302 | batch_size, 303 | split_num, 304 | label_loss='CrossEntropyLoss', 305 | run_once=True): 306 | tf.set_random_seed(0) # for reproducibility 307 | with tf.Graph().as_default(): 308 | reader = getattr(data, dataset)(dataset_dir, splits_dir, 309 | num_epochs, batch_size, split_num) 310 | 311 | model = find_class_by_name(model, 312 | [models])() 313 | label_loss_fn = find_class_by_name(label_loss, [losses])() 314 | 315 | restore_vars = \ 316 | build_graph( 317 | reader=reader, 318 | model=model, 319 | label_loss_fn=label_loss_fn, 320 | num_readers=1, 321 | batch_size=batch_size) 322 | logging.info("built evaluation graph") 323 | loss = tf.get_collection("loss")[0] 324 | predictions = tf.get_collection("predictions")[0] 325 | labels = tf.get_collection("labels")[0] 326 | inputs = tf.get_collection("input_batch")[0] 327 | feature_0 = tf.get_collection("feature_0")[0] 328 | feature_1 = tf.get_collection("feature_1")[0] 329 | aux_feat_batch = tf.get_collection("aux_feat_batch")[0] 330 | aux_output = tf.get_collection("aux_output")[0] 331 | aux_fc_batch_0 = tf.get_collection("aux_fc_batch_0")[0] 332 | logits_aux_0 = tf.get_collection("logits_aux_0")[0] 333 | aux_fc_batch_1 = tf.get_collection("aux_fc_batch_1")[0] 334 | logits_aux_1 = tf.get_collection("logits_aux_1")[0] 335 | aux_fc_batch_2 = tf.get_collection("aux_fc_batch_2")[0] 336 | logits_aux_2 = tf.get_collection("logits_aux_2")[0] 337 | aux_fc_batch_3 = tf.get_collection("aux_fc_batch_3")[0] 338 | logits_aux_3 = tf.get_collection("logits_aux_3")[0] 339 | inputs_loader = tf.get_collection("images_loader")[0] 340 | labels_loader = tf.get_collection("labels_loader")[0] 341 | 342 | saver = tf.train.Saver(tf.global_variables()) 343 | summary_writer = tf.summary.FileWriter(train_dir, graph=tf.get_default_graph()) 344 | 345 | evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, 20) 346 | 347 | last_global_step_val = -1 348 | while True: 349 | last_global_step_val, h1 = evaluation_loop(predictions, labels, loss, 350 | inputs, aux_feat_batch, aux_output, aux_fc_batch_0, logits_aux_0, 351 | aux_fc_batch_1, logits_aux_1, aux_fc_batch_2, logits_aux_2, 352 | aux_fc_batch_3, logits_aux_3, inputs_loader, feature_0, feature_1, 353 | labels_loader, saver, summary_writer, train_dir, evl_metrics, last_global_step_val) 354 | 355 | if run_once: 356 | break 357 | return h1 358 | -------------------------------------------------------------------------------- /scripts/nturgbd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | #import matplotlib.pyplot as plt 5 | #from mpl_toolkits.mplot3d import Axes3D 6 | #from sklearn import preprocessing 7 | 8 | import tensorflow as tf 9 | from tensorflow import logging 10 | logging.set_verbosity(tf.logging.INFO) 11 | 12 | class Joint(object): 13 | def __init__(self, 14 | x, y, z, 15 | dX=None, dY=None, 16 | cX=None, cY=None, 17 | orX=None, orY=None, orZ=None, orW=None, 18 | tState=None): 19 | self.x = x; self.y = y; self.z = z 20 | self.depthX = dX; self.depthY = dY 21 | self.colorX = cX; self.colorY = cY 22 | self.orientX = orX; self.orientY = orY 23 | self.orientZ = orZ; self.orientW = orW 24 | self.tracker = tState 25 | 26 | def _calculate_cylindrical_coordinates(self): 27 | rho = np.sqrt(self.x**2 + self.y**2) 28 | if self.x == 0 and self.y == 0: 29 | phi = 0 30 | elif self.x >= 0: 31 | phi = np.arcsin(self.y/rho) 32 | elif self.x > 0: 33 | phi = np.arctan2(self.y, self.x) 34 | elif self.x < 0: 35 | phi = -np.arcsin(self.y/rho) + np.pi 36 | return rho, phi, self.z 37 | 38 | def _calculate_spherical_coordinates(self): 39 | xy = self.x**2 + self.y**2 40 | r = np.sqrt(xy + self.z**2) 41 | theta = np.arctan2(self.z, xy) 42 | if self.x == 0 and self.y == 0: 43 | phi = 0 44 | elif self.x >= 0: 45 | phi = np.arcsin(self.y/np.sqrt(xy)) 46 | elif self.x > 0: 47 | phi = np.arctan2(self.y, self.x) 48 | elif self.x < 0: 49 | phi = -np.arcsin(self.y/np.sqrt(xy)) + np.pi 50 | return r, theta, phi 51 | 52 | def _get_cartesian_coordinates(self): 53 | return self.x, self.y, self.z 54 | 55 | def _set_cartesian_coordinates(self, x, y, z): 56 | self.x = x; self.y = y; self.z = z 57 | 58 | def _get_cylindrical_coordinates(self): 59 | return self._calculate_cylindrical_coordinates() 60 | 61 | def _get_spherical_coordinates(self): 62 | return self._calculate_spherical_coordinates() 63 | 64 | def _get_depth_coordinates(self): 65 | return self.depthX, self.depthY 66 | 67 | def _get_rgb_coordinates(self): 68 | return self.colorX, self.colorY 69 | 70 | def _get_orientation_coordinates(self): 71 | return self.orientX, self.orientY, self.orientZ, self.orientW 72 | 73 | class Skeleton(object): 74 | def __init__(self, 75 | skelid=None, 76 | njoints=None, 77 | clip=None, 78 | lconfidence=None, lstate=None, 79 | rconfidence=None, rstate=None, 80 | restrict=None, 81 | lX=None, lY=None, 82 | tracker=None, 83 | joints=None): 84 | self.skeletonID = skelid 85 | self.num_joints = njoints 86 | if joints is None: 87 | self.joints = [] 88 | else: 89 | self.joints = joints 90 | self.clip_edges = clip 91 | self.left_hand_confidence = lconfidence 92 | self.left_hand_state = lstate 93 | self.right_hand_confidence = rconfidence 94 | self.right_hand_state = rstate 95 | self.tracker = tracker 96 | self._is_zero_skeleton = False 97 | 98 | def _get_skeleton_id(self): 99 | return self.skeletonID 100 | 101 | def _get_num_joints(self): 102 | return self.num_joints 103 | 104 | def _get_joint_objects(self): 105 | return self.joints 106 | 107 | def _get_clip_edges_val(self): 108 | return self.clip_edges 109 | 110 | def _get_left_hand(self): 111 | return self.left_hand_confidence, self.left_hand_state 112 | 113 | def _get_right_hand(self): 114 | return self.right_hand_confidence, self.right_hand_state 115 | 116 | def _set_joint_objects(self, joints): 117 | self.joints = joints 118 | 119 | def _add_joint_object(self, joint): 120 | self.joints.append(joint) 121 | 122 | 123 | class Frame(object): 124 | def __init__(self, 125 | nskels=None, 126 | skels=None): 127 | self.num_skeletons = nskels 128 | if skels is None: 129 | self.skeletons = [] 130 | 131 | def _get_num_skeletons(self): 132 | return self.num_skeletons 133 | 134 | def _get_skeleton_objects(self): 135 | return self.skeletons 136 | 137 | def _set_skeleton_objects(self, skeletons): 138 | self.skeletons = skeletons 139 | 140 | def _add_skeleton_object(self, skeleton): 141 | self.skeletons.append(skeleton) 142 | 143 | class SkeletonVideo(object): 144 | def __init__(self, 145 | nframes, 146 | frames=None): 147 | self.num_frames = nframes 148 | if frames is None: 149 | self.frames = [] 150 | 151 | def _get_num_frames(self): 152 | return self.num_frames 153 | 154 | def _get_frame_objects(self): 155 | return self.frames 156 | 157 | def _set_frame_objects(self, frames): 158 | self.frames = frames 159 | 160 | def _add_frame_object(self, frame): 161 | self.frames.append(frame) 162 | 163 | def _get_main_actor_skeletons(self): 164 | 165 | def _get_motion_for_skeletons(skeletons): 166 | total_dist = 0 167 | for i in range(len(skeletons)-1): 168 | joints_1 = skeletons[i]._get_joint_objects() 169 | joints_2 = skeletons[i+1]._get_joint_objects() 170 | for j, k in zip(joints_1, joints_2): 171 | p1 = np.array(j._get_cartesian_coordinates()) 172 | p2 = np.array(k._get_cartesian_coordinates()) 173 | dist = np.sqrt(np.sum(np.square(p1 - p2))) 174 | total_dist += dist 175 | return total_dist 176 | 177 | def _is_noisy_skeleton(skeleton): 178 | joints = skeleton._get_joint_objects() 179 | X = []; Y = [] 180 | for joint in joints: 181 | x, y, z = joint._get_cartesian_coordinates() 182 | X.append(x); Y.append(y) 183 | X = np.array(X); Y = np.array(Y) 184 | xspread = np.max(X) - np.min(X); yspread = np.max(Y) - np.min(Y) 185 | return (yspread / xspread) 186 | 187 | def _create_zero_skeleton(): 188 | joints = [] 189 | for i in range(25): 190 | joint = Joint(0.0, 0.0, 0.0) 191 | joints.append(joint) 192 | skeleton = Skeleton(njoints=len(joints), joints=joints) 193 | skeleton._is_zero_skeleton = True 194 | return skeleton 195 | 196 | skeletons_0 = []; skeletons_1 = [] 197 | for i in range(len(self.frames)): 198 | frame = self.frames[i] 199 | if frame._get_num_skeletons() == 1: 200 | skeletons_0.append(frame._get_skeleton_objects()[0]) 201 | skeletons_1.append(_create_zero_skeleton()) 202 | elif frame._get_num_skeletons() == 2: 203 | skeletons_0.append(frame._get_skeleton_objects()[0]) 204 | skeletons_1.append(frame._get_skeleton_objects()[1]) 205 | elif frame._get_num_skeletons() > 2: 206 | ratios = [] 207 | for i in range(frame._get_num_skeletons()): 208 | skeleton = frame._get_skeleton_objects()[i] 209 | ratios.append((_is_noisy_skeleton(skeleton), i)) 210 | ratios = sorted(ratios) 211 | idx = [x for (val, x) in ratios[-2:]] 212 | skeletons_0.append(frame._get_skeleton_objects()[idx[0]]) 213 | skeletons_1.append(frame._get_skeleton_objects()[idx[1]]) 214 | dist_0 = _get_motion_for_skeletons(skeletons_0) 215 | dist_1 = _get_motion_for_skeletons(skeletons_1) 216 | if dist_0 > dist_1: 217 | return skeletons_0, skeletons_1 218 | return skeletons_1, skeletons_0 219 | 220 | 221 | # Reads the data for a complete frame set from the NTU RGB+D Action Recognition Dataset 222 | # Included joints are: 223 | # -------------------------------------------------------------------------------------------------------------- 224 | # 0 - base of the spine 225 | # 1 - middle of the spine 226 | # 2 - neck 227 | # 3 - head 228 | # 4 - left shoulder 229 | # 5 - left elbow 230 | # 6 - left wrist 231 | # 7 - left hand 232 | # 8 - right shoulder 233 | # 9 - right elbow 234 | # 10 - right wrist 235 | # 11 - right hand 236 | # 12 - left hip 237 | # 13 - left knee 238 | # 14 - left ankle 239 | # 15 - left foot 240 | # 16 - right hip 241 | # 17 - right knee 242 | # 18 - right ankle 243 | # 19 - right foot 244 | # 20 - spine 245 | # 21 - tip of the left hand 246 | # 22 - left thumb 247 | # 23 - tip of the right hand 248 | # 24 - right thumb 249 | # -------------------------------------------------------------------------------------------------------------- 250 | class Reader(object): 251 | def __init__(self, 252 | dataset_dir, 253 | splits_dir): 254 | self.data = dataset_dir 255 | self.splits = splits_dir 256 | 257 | self.train_splits = {1: os.path.join(splits_dir, 'train_cs.txt'), 258 | 2: os.path.join(splits_dir, 'train_cv.txt')} 259 | self.test_splits = {1: os.path.join(splits_dir, 'test_cs.txt'), 260 | 2: os.path.join(splits_dir, 'test_cv.txt')} 261 | 262 | def _normalize_skeleton(self, skeleton): 263 | joints = skeleton._get_joint_objects() 264 | if not (len(joints) == 25): 265 | return None 266 | 267 | ''' Translation Matrix 268 | - T_x: neg(X coordinate of middle of spine) 269 | - T_y: neg(Y coordinate of middle of spine) 270 | - T_z: neg(Z coordinate of middle of spine) 271 | ''' 272 | origin = joints[1] 273 | transmat = np.zeros((4, 4)) 274 | transmat[0][0] = transmat[1][1] = transmat[2][2] = transmat[3][3] = 1.0 275 | transmat[3][0] = -origin.x; transmat[3][1] = -origin.y; transmat[3][2] = -origin.z 276 | 277 | ''' Rotation Matrix 278 | - New X axis: Right shoulder (rs) to Left shoulder (ls) joint 279 | - New Y axis: Base of spine (bsp) to Spine (sp) joint 280 | - New Z axis: In direction of X cross Y 281 | - Using arctan2 is always better than using arcsin/arccos, as they can be 282 | numerically unstable for certain values of the angles. 283 | ''' 284 | rs = joints[8]; ls = joints[4]; bsp = joints[0]; sp = joints[20] 285 | rs = np.array(rs._get_cartesian_coordinates()) 286 | ls = np.array(ls._get_cartesian_coordinates()) 287 | bsp = np.array(bsp._get_cartesian_coordinates()) 288 | sp = np.array(sp._get_cartesian_coordinates()) 289 | curr_x = np.array([1.0, 0.0, 0.0]); new_x = np.add(rs, -ls) 290 | curr_y = np.array([0.0, 1.0, 0.0]); new_y = np.add(bsp, -sp) 291 | curr_z = np.array([0.0, 0.0, 1.0]); new_z = np.cross(new_x, new_y) 292 | # Dot and cross product both needed for arctan2 293 | x_dot = np.dot(new_x, curr_x); y_dot = np.dot(new_y, curr_y); z_dot = np.dot(new_z, curr_z) 294 | x_cross = np.cross(new_x, curr_x); y_cross = np.cross(new_y, curr_y); z_cross = np.cross(new_z, curr_z) 295 | # arccos is numerically unstable when angles are near zero 296 | theta_x = np.arctan2(np.linalg.norm(x_cross), x_dot) 297 | theta_y = np.arctan2(np.linalg.norm(y_cross), y_dot) 298 | theta_z = np.arctan2(np.linalg.norm(z_cross), z_dot) 299 | rot_x = np.zeros((4, 4)); rot_x[0][0] = 1.0; rot_x[3][3] = 1.0 300 | rot_x[1][1] = np.cos(theta_x); rot_x[1][2] = -np.sin(theta_x) 301 | rot_x[2][1] = np.sin(theta_x); rot_x[2][2] = np.cos(theta_x) 302 | rot_y = np.zeros((4, 4)); rot_y[1][1] = 1.0; rot_y[3][3] = 1.0 303 | rot_y[0][0] = np.cos(theta_y); rot_y[0][2] = np.sin(theta_y) 304 | rot_y[2][0] = -np.sin(theta_y); rot_y[2][2] = np.cos(theta_y) 305 | rot_z = np.zeros((4, 4)); rot_z[2][2] = 1.0; rot_z[3][3] = 1.0 306 | rot_z[0][0] = np.cos(theta_z); rot_z[0][1] = -np.sin(theta_z) 307 | rot_z[1][0] = np.sin(theta_z); rot_z[1][1] = np.cos(theta_z) 308 | 309 | ''' Scaling Matrix 310 | - S_x, S_y, S_z: Inverse of distance between Base of spine (bsp) and 311 | spine (sp) joint; add a small delta to avoid division by zero (in case 312 | of very small values of distance). 313 | ''' 314 | scale = np.zeros((4, 4)); scale[3][3] = 1.0 315 | dist = np.linalg.norm(new_y) + 1e-4 316 | scale[0][0] = 1.0/(dist); scale[1][1] = 1.0/(dist); scale[2][2] = 1.0/(dist) 317 | 318 | new_joints = [] 319 | for joint in joints: 320 | x, y, z = joint._get_cartesian_coordinates() 321 | # (4,) : Homogeneous coordinates 322 | j = np.array([x, y, z, 1.0]) 323 | # (4,) x (4, 4) = (4,) for all matrix multiplications 324 | j = np.matmul(j, transmat) 325 | j = np.matmul(j, rot_x); j = np.matmul(j, rot_y); j = np.matmul(j, rot_z) 326 | j = np.matmul(j, scale) 327 | # Get (x, y, z) from Homogeneous coordinates 328 | joint._set_cartesian_coordinates(j[0], j[1], j[2]) 329 | new_joints.append(joint) 330 | skeleton._set_joint_objects(new_joints) 331 | 332 | return skeleton 333 | 334 | def _read_skeleton_file(self, f): 335 | fpath = os.path.join(self.data, f) 336 | with open(fpath, 'r') as f: 337 | out = f.read().replace('\n', '').split() 338 | 339 | index = 0 340 | nframes = int(out[index]); index += 1 341 | video = SkeletonVideo(nframes) 342 | for i in range(nframes): 343 | bods = int(out[index]); index += 1 344 | frame = Frame() 345 | for j in range(bods): 346 | skelid = long(out[index]); index += 1 347 | 348 | cedges = int(out[index]); index += 1 349 | lconfidence = int(out[index]); index += 1 350 | lstate = int(out[index]); index += 1 351 | rconfidence = int(out[index]); index += 1 352 | rstate = int(out[index]); index += 1 353 | restrict = int(out[index]); index += 1 354 | 355 | lX = float(out[index]); index += 1 356 | lY = float(out[index]); index += 1 357 | 358 | track = int(out[index]); index += 1 359 | 360 | num_joints = int(out[index]); index += 1 361 | skeleton = Skeleton(skelid, num_joints, 362 | cedges, lconfidence, lstate, 363 | rconfidence, rstate, restrict, 364 | lX, lY, track) 365 | 366 | for k in range(num_joints): 367 | x = float(out[index]); index += 1 368 | y = float(out[index]); index += 1 369 | z = float(out[index]); index += 1 370 | 371 | dX = float(out[index]); index += 1 372 | dY = float(out[index]); index += 1 373 | 374 | cX = float(out[index]); index += 1 375 | cY = float(out[index]); index += 1 376 | 377 | orW = float(out[index]); index += 1 378 | orX = float(out[index]); index += 1 379 | orY = float(out[index]); index += 1 380 | orZ = float(out[index]); index += 1 381 | 382 | track = int(out[index]); index += 1 383 | 384 | joint = Joint(x, y, z, 385 | dX, dY, 386 | cX, cY, 387 | orX, orY, orZ, orW, 388 | track) 389 | skeleton._add_joint_object(joint) 390 | skeleton = self._normalize_skeleton(skeleton) 391 | if skeleton: 392 | frame._add_skeleton_object(skeleton) 393 | frame.num_skeletons = len(frame._get_skeleton_objects()) 394 | video._add_frame_object(frame) 395 | return video 396 | 397 | def _generate_image_representation_no_features(self, skeleton): 398 | joints = skeleton._get_joint_objects() 399 | xlist = []; ylist = []; zlist = [] 400 | for joint in joints: 401 | x, y, z = joint._get_cartesian_coordinates() 402 | xlist.append(x); ylist.append(y); zlist.append(z) 403 | jointlist = [[0, 1, 2, 3], [0, 16, 17, 18, 19], [0, 12, 13, 14, 15], 404 | [20, 4, 5, 6, 7, 21], [20, 8, 9, 10, 11, 23], [11, 24], [7, 22]] 405 | fig = plt.figure() 406 | ax = fig.add_subplot(111, projection='3d') 407 | for i in range(len(jointlist)): 408 | x_plot = []; y_plot = []; z_plot = [] 409 | for j in jointlist[i]: 410 | x_plot.append(xlist[j]); y_plot.append(ylist[j]); z_plot.append(zlist[j]) 411 | ax.scatter(x_plot, y_plot, z_plot, c = 'b') 412 | ax.plot(x_plot, y_plot, z_plot, c = 'b') 413 | plt.show() 414 | -------------------------------------------------------------------------------- /models/HybridModel/inception_resnet_v2.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the definition of the Inception Resnet V2 architecture. 16 | 17 | As described in http://arxiv.org/abs/1602.07261. 18 | 19 | Inception-v4, Inception-ResNet and the Impact of Residual Connections 20 | on Learning 21 | Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | 28 | import tensorflow as tf 29 | 30 | slim = tf.contrib.slim 31 | 32 | 33 | def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 34 | """Builds the 35x35 resnet block.""" 35 | with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): 36 | with tf.variable_scope('Branch_0'): 37 | tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') 38 | with tf.variable_scope('Branch_1'): 39 | tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') 40 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') 41 | with tf.variable_scope('Branch_2'): 42 | tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') 43 | tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') 44 | tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') 45 | mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) 46 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 47 | activation_fn=None, scope='Conv2d_1x1') 48 | net += scale * up 49 | if activation_fn: 50 | net = activation_fn(net) 51 | return net 52 | 53 | 54 | def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 55 | """Builds the 17x17 resnet block.""" 56 | with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): 57 | with tf.variable_scope('Branch_0'): 58 | tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') 59 | with tf.variable_scope('Branch_1'): 60 | tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') 61 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], 62 | scope='Conv2d_0b_1x7') 63 | tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], 64 | scope='Conv2d_0c_7x1') 65 | mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) 66 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 67 | activation_fn=None, scope='Conv2d_1x1') 68 | net += scale * up 69 | if activation_fn: 70 | net = activation_fn(net) 71 | return net 72 | 73 | 74 | def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 75 | """Builds the 8x8 resnet block.""" 76 | with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): 77 | with tf.variable_scope('Branch_0'): 78 | tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') 79 | with tf.variable_scope('Branch_1'): 80 | tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') 81 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], 82 | scope='Conv2d_0b_1x3') 83 | tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], 84 | scope='Conv2d_0c_3x1') 85 | mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) 86 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 87 | activation_fn=None, scope='Conv2d_1x1') 88 | net += scale * up 89 | if activation_fn: 90 | net = activation_fn(net) 91 | return net 92 | 93 | 94 | def inception_resnet_v2_base(inputs, 95 | final_endpoint='Conv2d_7b_1x1', 96 | output_stride=16, 97 | align_feature_maps=False, 98 | scope=None): 99 | """Inception model from http://arxiv.org/abs/1602.07261. 100 | 101 | Constructs an Inception Resnet v2 network from inputs to the given final 102 | endpoint. This method can construct the network up to the final inception 103 | block Conv2d_7b_1x1. 104 | 105 | Args: 106 | inputs: a tensor of size [batch_size, height, width, channels]. 107 | final_endpoint: specifies the endpoint to construct the network up to. It 108 | can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 109 | 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 110 | 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] 111 | output_stride: A scalar that specifies the requested ratio of input to 112 | output spatial resolution. Only supports 8 and 16. 113 | align_feature_maps: When true, changes all the VALID paddings in the network 114 | to SAME padding so that the feature maps are aligned. 115 | scope: Optional variable_scope. 116 | 117 | Returns: 118 | tensor_out: output tensor corresponding to the final_endpoint. 119 | end_points: a set of activations for external use, for example summaries or 120 | losses. 121 | 122 | Raises: 123 | ValueError: if final_endpoint is not set to one of the predefined values, 124 | or if the output_stride is not 8 or 16, or if the output_stride is 8 and 125 | we request an end point after 'PreAuxLogits'. 126 | """ 127 | if output_stride != 8 and output_stride != 16: 128 | raise ValueError('output_stride must be 8 or 16.') 129 | 130 | padding = 'SAME' if align_feature_maps else 'VALID' 131 | 132 | end_points = {} 133 | 134 | def add_and_check_final(name, net): 135 | end_points[name] = net 136 | return name == final_endpoint 137 | 138 | with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): 139 | with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], 140 | stride=1, padding='SAME'): 141 | # 149 x 149 x 32 142 | net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, 143 | scope='Conv2d_1a_3x3') 144 | if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points 145 | 146 | # 147 x 147 x 32 147 | net = slim.conv2d(net, 32, 3, padding=padding, 148 | scope='Conv2d_2a_3x3') 149 | if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points 150 | # 147 x 147 x 64 151 | net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') 152 | if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points 153 | # 73 x 73 x 64 154 | net = slim.max_pool2d(net, 3, stride=2, padding=padding, 155 | scope='MaxPool_3a_3x3') 156 | if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points 157 | # 73 x 73 x 80 158 | net = slim.conv2d(net, 80, 1, padding=padding, 159 | scope='Conv2d_3b_1x1') 160 | if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points 161 | # 71 x 71 x 192 162 | net = slim.conv2d(net, 192, 3, padding=padding, 163 | scope='Conv2d_4a_3x3') 164 | if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points 165 | # 35 x 35 x 192 166 | net = slim.max_pool2d(net, 3, stride=2, padding=padding, 167 | scope='MaxPool_5a_3x3') 168 | if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points 169 | 170 | # 35 x 35 x 320 171 | with tf.variable_scope('Mixed_5b'): 172 | with tf.variable_scope('Branch_0'): 173 | tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') 174 | with tf.variable_scope('Branch_1'): 175 | tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') 176 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, 177 | scope='Conv2d_0b_5x5') 178 | with tf.variable_scope('Branch_2'): 179 | tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') 180 | tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, 181 | scope='Conv2d_0b_3x3') 182 | tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, 183 | scope='Conv2d_0c_3x3') 184 | with tf.variable_scope('Branch_3'): 185 | tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', 186 | scope='AvgPool_0a_3x3') 187 | tower_pool_1 = slim.conv2d(tower_pool, 64, 1, 188 | scope='Conv2d_0b_1x1') 189 | net = tf.concat( 190 | [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) 191 | 192 | if add_and_check_final('Mixed_5b', net): return net, end_points 193 | # TODO(alemi): Register intermediate endpoints 194 | net = slim.repeat(net, 10, block35, scale=0.17) 195 | 196 | # 17 x 17 x 1088 if output_stride == 8, 197 | # 33 x 33 x 1088 if output_stride == 16 198 | use_atrous = output_stride == 8 199 | 200 | with tf.variable_scope('Mixed_6a'): 201 | with tf.variable_scope('Branch_0'): 202 | tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, 203 | padding=padding, 204 | scope='Conv2d_1a_3x3') 205 | with tf.variable_scope('Branch_1'): 206 | tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 207 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, 208 | scope='Conv2d_0b_3x3') 209 | tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, 210 | stride=1 if use_atrous else 2, 211 | padding=padding, 212 | scope='Conv2d_1a_3x3') 213 | with tf.variable_scope('Branch_2'): 214 | tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, 215 | padding=padding, 216 | scope='MaxPool_1a_3x3') 217 | net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) 218 | 219 | if add_and_check_final('Mixed_6a', net): return net, end_points 220 | 221 | # TODO(alemi): register intermediate endpoints 222 | with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): 223 | net = slim.repeat(net, 20, block17, scale=0.10) 224 | if add_and_check_final('PreAuxLogits', net): return net, end_points 225 | 226 | if output_stride == 8: 227 | # TODO(gpapan): Properly support output_stride for the rest of the net. 228 | raise ValueError('output_stride==8 is only supported up to the ' 229 | 'PreAuxlogits end_point for now.') 230 | 231 | # 8 x 8 x 2080 232 | with tf.variable_scope('Mixed_7a'): 233 | with tf.variable_scope('Branch_0'): 234 | tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 235 | tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, 236 | padding=padding, 237 | scope='Conv2d_1a_3x3') 238 | with tf.variable_scope('Branch_1'): 239 | tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 240 | tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, 241 | padding=padding, 242 | scope='Conv2d_1a_3x3') 243 | with tf.variable_scope('Branch_2'): 244 | tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 245 | tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, 246 | scope='Conv2d_0b_3x3') 247 | tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, 248 | padding=padding, 249 | scope='Conv2d_1a_3x3') 250 | with tf.variable_scope('Branch_3'): 251 | tower_pool = slim.max_pool2d(net, 3, stride=2, 252 | padding=padding, 253 | scope='MaxPool_1a_3x3') 254 | net = tf.concat( 255 | [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) 256 | 257 | if add_and_check_final('Mixed_7a', net): return net, end_points 258 | 259 | # TODO(alemi): register intermediate endpoints 260 | net = slim.repeat(net, 9, block8, scale=0.20) 261 | net = block8(net, activation_fn=None) 262 | 263 | # 8 x 8 x 1536 264 | net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') 265 | if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points 266 | 267 | raise ValueError('final_endpoint (%s) not recognized', final_endpoint) 268 | 269 | 270 | def inception_resnet_v2(inputs, num_classes=1001, is_training=True, 271 | dropout_keep_prob=0.8, 272 | reuse=None, 273 | scope='InceptionResnetV2', 274 | create_aux_logits=True): 275 | """Creates the Inception Resnet V2 model. 276 | 277 | Args: 278 | inputs: a 4-D tensor of size [batch_size, height, width, 3]. 279 | num_classes: number of predicted classes. 280 | is_training: whether is training or not. 281 | dropout_keep_prob: float, the fraction to keep before final layer. 282 | reuse: whether or not the network and its variables should be reused. To be 283 | able to reuse 'scope' must be given. 284 | scope: Optional variable_scope. 285 | create_aux_logits: Whether to include the auxilliary logits. 286 | 287 | Returns: 288 | logits: the logits outputs of the model. 289 | end_points: the set of end_points from the inception model. 290 | """ 291 | end_points = {} 292 | 293 | with tf.variable_scope(scope, 'InceptionResnetV2', [inputs, num_classes], 294 | reuse=reuse) as scope: 295 | with slim.arg_scope([slim.batch_norm, slim.dropout], 296 | is_training=is_training): 297 | 298 | net, end_points = inception_resnet_v2_base(inputs, scope=scope) 299 | 300 | if create_aux_logits: 301 | with tf.variable_scope('AuxLogits'): 302 | aux = end_points['PreAuxLogits'] 303 | aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', 304 | scope='Conv2d_1a_3x3') 305 | aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') 306 | aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], 307 | padding='VALID', scope='Conv2d_2a_5x5') 308 | aux = slim.flatten(aux) 309 | aux = slim.fully_connected(aux, num_classes, activation_fn=None, 310 | scope='Logits') 311 | end_points['AuxLogits'] = aux 312 | 313 | with tf.variable_scope('Logits'): 314 | net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', 315 | scope='AvgPool_1a_8x8') 316 | net = slim.flatten(net) 317 | 318 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 319 | scope='Dropout') 320 | 321 | end_points['PreLogitsFlatten'] = net 322 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 323 | scope='Logits') 324 | end_points['Logits'] = logits 325 | end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') 326 | 327 | return logits, end_points 328 | inception_resnet_v2.default_image_size = 299 329 | 330 | 331 | def inception_resnet_v2_arg_scope(weight_decay=0.00004, 332 | batch_norm_decay=0.9997, 333 | batch_norm_epsilon=0.001): 334 | """Yields the scope with the default parameters for inception_resnet_v2. 335 | 336 | Args: 337 | weight_decay: the weight decay for weights variables. 338 | batch_norm_decay: decay for the moving average of batch_norm momentums. 339 | batch_norm_epsilon: small float added to variance to avoid dividing by zero. 340 | 341 | Returns: 342 | a arg_scope with the parameters needed for inception_resnet_v2. 343 | """ 344 | # Set weight_decay for weights in conv2d and fully_connected layers. 345 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 346 | weights_regularizer=slim.l2_regularizer(weight_decay), 347 | biases_regularizer=slim.l2_regularizer(weight_decay)): 348 | 349 | batch_norm_params = { 350 | 'decay': batch_norm_decay, 351 | 'epsilon': batch_norm_epsilon, 352 | } 353 | # Set activation_fn and parameters for batch_norm. 354 | with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, 355 | normalizer_fn=slim.batch_norm, 356 | normalizer_params=batch_norm_params) as scope: 357 | return scope 358 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os, argparse, shutil, time, glob 2 | 3 | import tensorflow as tf 4 | from tensorflow import app 5 | from tensorflow import flags 6 | from tensorflow import logging 7 | from tensorflow import gfile 8 | from tensorflow.python.client import device_lib 9 | from tensorflow.python.framework import ops 10 | from tensorflow.python.framework import dtypes 11 | from tensorflow.python.tools.inspect_checkpoint import * 12 | 13 | from tensorflow.contrib.tensorboard.plugins import projector 14 | 15 | import data, models 16 | from utils import * 17 | from test import * 18 | 19 | slim = tf.contrib.slim 20 | layers = tf.contrib.layers 21 | framework = tf.contrib.framework 22 | 23 | FLAGS = flags.FLAGS 24 | 25 | if __name__ == '__main__': 26 | flags.DEFINE_string("train_dir", "", 27 | "Directory to save the model files in") 28 | flags.DEFINE_string("dataset", "HybridModelReader", "Which dataset to load \ 29 | for Action Recognition") 30 | flags.DEFINE_string("dataset_dir", "", \ 31 | "Path to base directory for video frames (rgb / rgb+flow)") 32 | flags.DEFINE_string("splits_dir", "", \ 33 | "Directory where train and test splits are stored") 34 | flags.DEFINE_string("checkpoint_file", "", \ 35 | "Checkpoint file to restore variables") 36 | flags.DEFINE_string("model", "Hybrid", "Which architecture to use for the model") 37 | flags.DEFINE_string("label_loss", "CrossEntropyLoss", "Which loss function to use \ 38 | for training the model") 39 | flags.DEFINE_string("optimizer", "AdamOptimizer", "What optimizer class to use") 40 | flags.DEFINE_string("split_num", "1", "The train/test split to run the model on") 41 | 42 | flags.DEFINE_integer("batch_size", 24, "Number of examples to process per batch \ 43 | for training") 44 | flags.DEFINE_integer("num_epochs", 50, "How many passes to make over the dataset \ 45 | before halting training") 46 | flags.DEFINE_integer("export_model_steps", 10000, "The period, in number of steps, \ 47 | with which the model is exported for batch prediction") 48 | flags.DEFINE_integer("max_steps", None, "The maximum number of iterations of the \ 49 | training loop") 50 | flags.DEFINE_integer("learning_rate_decay_examples", 10000000, "Multiply current learning \ 51 | rate by learning_rate_decay every learning_rate_decay_examples") 52 | 53 | flags.DEFINE_float("base_learning_rate", 0.01, "Which learning rate to start with") 54 | flags.DEFINE_float("learning_rate_decay", 0.95, "Learning rate decay factor to be \ 55 | applied every learning_rate_decay_examples") 56 | flags.DEFINE_float("clip_gradient_norm", 1.0, "Norm to clip gradients to") 57 | flags.DEFINE_float("regularization_penalty", 0.00005, "How much weight to give to the \ 58 | regularization loss (the label loss has a weight of 1).") 59 | 60 | flags.DEFINE_bool("start_new_model", False, "If set, this will not resume from a checkpoint \ 61 | and will instead create a new model instance") 62 | flags.DEFINE_bool("log_device_placement", False, "Whether to write the device on which every \ 63 | op will run into the logs on startup.") 64 | 65 | 66 | def get_input_data_tensors(reader, 67 | data_pattern, 68 | batch_size=2, 69 | num_epochs=None, 70 | num_readers=1): 71 | logging.info("Using batch size of " + str(batch_size) + " for training.") 72 | files, labels = reader._read_filelist(split=reader.present_split) 73 | 74 | with tf.name_scope("train_input"): 75 | logging.info("Number of training files: %s", str(len(files))) 76 | 77 | files = ops.convert_to_tensor(files, dtypes.string) 78 | labels = ops.convert_to_tensor(labels, dtypes.int64) 79 | 80 | input_queue = tf.train.slice_input_producer( 81 | [files, labels], 82 | num_epochs = num_epochs, 83 | shuffle = True) 84 | image, label = reader._read_samples(input_queue) 85 | 86 | train_image_loader, train_label_loader = tf.train.shuffle_batch( 87 | [image, label], 88 | batch_size = batch_size, 89 | capacity = 5 * batch_size, 90 | min_after_dequeue = batch_size) 91 | 92 | return train_image_loader, train_label_loader 93 | 94 | def build_graph(reader, 95 | model, 96 | split_num, 97 | label_loss_fn=losses.CrossEntropyLoss(), 98 | batch_size=1000, 99 | base_learning_rate=0.01, 100 | learning_rate_decay_examples=1000000, 101 | learning_rate_decay=0.95, 102 | optimizer_class=tf.train.AdamOptimizer, 103 | clip_gradient_norm=1.0, 104 | regularization_penalty=1, 105 | num_readers=1, 106 | num_epochs=None): 107 | 108 | global_step = tf.Variable(0, name="global_step", trainable=False) 109 | 110 | learning_rate = tf.train.exponential_decay( 111 | base_learning_rate, 112 | global_step * batch_size, 113 | learning_rate_decay_examples, 114 | learning_rate_decay, 115 | staircase=True) 116 | tf.summary.scalar('learning_rate', learning_rate) 117 | 118 | optimizer = optimizer_class(learning_rate) 119 | images_loader, labels_loader = ( 120 | get_input_data_tensors( 121 | reader, 122 | reader.train_split_files[split_num], 123 | batch_size=batch_size, 124 | num_readers=num_readers, 125 | num_epochs=num_epochs)) 126 | images_batch = tf.placeholder(tf.float32, (None, 224, 224, 3)) 127 | labels_batch = tf.placeholder(tf.int64, (None,)) 128 | # (224, 224, 3) -> (14, 14, 512) 129 | feature_0, restore_vars_0, train_v0 = model.create_feature_model( 130 | images_batch, scope="rgb") 131 | feature_1, restore_vars_1, train_v1 = model.create_feature_model( 132 | images_batch, scope="rgbdiff") 133 | # (14, 14, 512) -> (7168,) 134 | aux_feat_batch = tf.placeholder(tf.float32, (None, 14, 14, 512)) 135 | aux_output, train_v2 = model.create_aux_model( 136 | aux_feat_batch) 137 | # (21504,) -> (60,) 138 | aux_fc_batch_0 = tf.placeholder(tf.float32, (None, 21504)) 139 | logits_aux_0, train_v3 = model.create_logits_model( 140 | aux_fc_batch_0, 60, scope="auxlogs", reuse=None) 141 | # (21504,) -> (60,) 142 | aux_fc_batch_1 = tf.placeholder(tf.float32, (None, 21504)) 143 | logits_aux_1, train_v4 = model.create_logits_model( 144 | aux_fc_batch_1, 60, scope="auxlogs", reuse=True) 145 | # (21504,) -> (60,) 146 | aux_fc_batch_2 = tf.placeholder(tf.float32, (None, 21504)) 147 | logits_aux_2, train_v5 = model.create_logits_model( 148 | aux_fc_batch_2, 60, scope="auxlogs", reuse=True) 149 | # (21504,) -> (60,) 150 | aux_fc_batch_3 = tf.placeholder(tf.float32, (None, 21504)) 151 | logits_aux_3, train_v6 = model.create_logits_model( 152 | aux_fc_batch_3, 60, scope="auxlogs", reuse=True) 153 | 154 | loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_0, labels=labels_batch)) 155 | loss_1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_1, labels=labels_batch)) 156 | loss_2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_2, labels=labels_batch)) 157 | loss_3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_3, labels=labels_batch)) 158 | 159 | loss = loss_0 + loss_1 + loss_2 + loss_3 160 | predictions = ( logits_aux_0 + logits_aux_1 + logits_aux_2 + logits_aux_3 ) 161 | 162 | train_vars = train_v0 163 | train_vars.extend(train_v1) 164 | train_vars.extend(train_v2) 165 | train_vars.extend(train_v3) 166 | train_vars.extend(train_v4) 167 | train_vars.extend(train_v5) 168 | train_vars.extend(train_v6) 169 | train_op = optimizer.minimize(loss, global_step=global_step, var_list=train_vars) 170 | 171 | tf.add_to_collection("global_step", global_step) 172 | tf.add_to_collection("loss", loss) 173 | tf.add_to_collection("feature_0", feature_0) 174 | tf.add_to_collection("feature_1", feature_1) 175 | tf.add_to_collection("aux_feat_batch", aux_feat_batch) 176 | tf.add_to_collection("aux_output", aux_output) 177 | tf.add_to_collection("aux_fc_batch_0", aux_fc_batch_0) 178 | tf.add_to_collection("logits_aux_0", logits_aux_0) 179 | tf.add_to_collection("aux_fc_batch_1", aux_fc_batch_1) 180 | tf.add_to_collection("logits_aux_1", logits_aux_1) 181 | tf.add_to_collection("aux_fc_batch_2", aux_fc_batch_2) 182 | tf.add_to_collection("logits_aux_2", logits_aux_2) 183 | tf.add_to_collection("aux_fc_batch_3", aux_fc_batch_3) 184 | tf.add_to_collection("logits_aux_3", logits_aux_3) 185 | tf.add_to_collection("input_batch", images_batch) 186 | tf.add_to_collection("labels", labels_batch) 187 | tf.add_to_collection("predictions", predictions) 188 | tf.add_to_collection("train_op", train_op) 189 | tf.add_to_collection("images_loader", images_loader) 190 | tf.add_to_collection("labels_loader", labels_loader) 191 | 192 | restore_vars_0_dict = {v.name[4:][:-2]: v 193 | for v in restore_vars_0} 194 | restore_vars_1_dict = {v.name[8:][:-2]: v 195 | for v in restore_vars_1} 196 | return restore_vars_0_dict, restore_vars_1_dict 197 | 198 | def find_class_by_name(name, modules): 199 | modules = [getattr(module, name, None) for module in modules] 200 | return next(a for a in modules if a) 201 | 202 | def task_as_string(task): 203 | return "/job:%s/task:%s" % (task.type, task.index) 204 | 205 | class Trainer(object): 206 | def __init__(self, cluster, task, train_dir, model, reader, 207 | log_device_placement=True, max_steps=None, 208 | export_model_steps=1000): 209 | 210 | self.cluster = cluster 211 | self.task = task 212 | self.is_master = (task.type == "master" and task.index == 0) 213 | self.train_dir = train_dir 214 | self.config = tf.ConfigProto( 215 | allow_soft_placement=True, log_device_placement=log_device_placement) 216 | self.model = model 217 | self.reader = reader 218 | self.max_steps = max_steps 219 | self.max_steps_reached = False 220 | self.export_model_steps = export_model_steps 221 | self.last_model_export_step = 0 222 | 223 | def remove_training_directory(self, train_dir): 224 | """Removes the training directory.""" 225 | try: 226 | logging.info( 227 | "%s: Removing existing train directory.", 228 | task_as_string(self.task)) 229 | gfile.DeleteRecursively(train_dir) 230 | except: 231 | logging.error( 232 | "%s: Failed to delete directory " + train_dir + 233 | " when starting a new model. Please delete it manually and" + 234 | " try again.", task_as_string(self.task)) 235 | 236 | def start_server_if_distributed(self): 237 | """ Starts a server if the execution is distributed """ 238 | 239 | if self.cluster: 240 | logging.info("%s: Starting trainer within cluster %s.", 241 | task_as_string(self.task), self.cluster.as_dict()) 242 | server = start_server(self.cluster, self.task) 243 | target = server.target 244 | device_fn = tf.train.replica_device_setter( 245 | ps_device="/job:ps", 246 | worker_device="/job:%s/task:%d" % (self.task.type, self.task.index), 247 | cluster=self.cluster) 248 | else: 249 | target = "" 250 | device_fn = "" 251 | return (target, device_fn) 252 | 253 | def get_meta_filename(self, start_new_model, train_dir): 254 | if start_new_model: 255 | logging.info("%s: Flag 'start_new_model' is set. Building a new model.", 256 | task_as_string(self.task)) 257 | return None 258 | 259 | latest_checkpoint = tf.train.latest_checkpoint(train_dir) 260 | if not latest_checkpoint: 261 | logging.info("%s: No checkpoint file found. Building a new model.", 262 | task_as_string(self.task)) 263 | return None 264 | 265 | meta_filename = latest_checkpoint + ".meta" 266 | if not gfile.Exists(meta_filename): 267 | logging.info("%s: No meta graph file found. Building a new model.", 268 | task_as_string(self.task)) 269 | return None 270 | else: 271 | return meta_filename 272 | 273 | def recover_model(self, meta_filename): 274 | logging.info("%s: Restoring from meta graph file %s", 275 | task_as_string(self.task), meta_filename) 276 | return tf.train.import_meta_graph(meta_filename) 277 | 278 | def build_model(self, model, reader): 279 | """ Find the model and build the graph """ 280 | 281 | label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])() 282 | optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train]) 283 | 284 | restore_vars = \ 285 | build_graph(reader=reader, 286 | model=model, 287 | optimizer_class=optimizer_class, 288 | clip_gradient_norm=FLAGS.clip_gradient_norm, 289 | split_num=FLAGS.split_num, 290 | label_loss_fn=label_loss_fn, 291 | base_learning_rate=FLAGS.base_learning_rate, 292 | learning_rate_decay=FLAGS.learning_rate_decay, 293 | learning_rate_decay_examples=FLAGS.learning_rate_decay_examples, 294 | regularization_penalty=FLAGS.regularization_penalty, 295 | num_readers=1, 296 | batch_size=FLAGS.batch_size, 297 | num_epochs=FLAGS.num_epochs) 298 | 299 | saver_0 = tf.train.Saver(var_list=restore_vars[0]) 300 | saver_1 = tf.train.Saver(var_list=restore_vars[1]) 301 | 302 | return saver_0, saver_1, tf.train.Saver(max_to_keep=2) 303 | 304 | def run(self, start_new_model=0): 305 | if self.is_master and start_new_model: 306 | self.remove_training_directory(self.train_dir) 307 | 308 | target, device_fn = self.start_server_if_distributed() 309 | 310 | meta_filename = self.get_meta_filename(start_new_model, self.train_dir) 311 | 312 | with tf.Graph().as_default() as graph: 313 | if meta_filename: 314 | saver = self.recover_model(meta_filename) 315 | 316 | with tf.device(device_fn): 317 | if not meta_filename: 318 | saver_0, saver_1, saver = self.build_model(self.model, self.reader) 319 | 320 | global_step = tf.get_collection("global_step")[0] 321 | loss = tf.get_collection("loss")[0] 322 | predictions = tf.get_collection("predictions")[0] 323 | labels = tf.get_collection("labels")[0] 324 | inputs = tf.get_collection("input_batch")[0] 325 | train_op = tf.get_collection("train_op")[0] 326 | feature_0 = tf.get_collection("feature_0")[0] 327 | feature_1 = tf.get_collection("feature_1")[0] 328 | aux_feat_batch = tf.get_collection("aux_feat_batch")[0] 329 | aux_output = tf.get_collection("aux_output")[0] 330 | aux_fc_batch_0 = tf.get_collection("aux_fc_batch_0")[0] 331 | logits_aux_0 = tf.get_collection("logits_aux_0")[0] 332 | aux_fc_batch_1 = tf.get_collection("aux_fc_batch_1")[0] 333 | logits_aux_1 = tf.get_collection("logits_aux_1")[0] 334 | aux_fc_batch_2 = tf.get_collection("aux_fc_batch_2")[0] 335 | logits_aux_2 = tf.get_collection("logits_aux_2")[0] 336 | aux_fc_batch_3 = tf.get_collection("aux_fc_batch_3")[0] 337 | logits_aux_3 = tf.get_collection("logits_aux_3")[0] 338 | inputs_loader = tf.get_collection("images_loader")[0] 339 | labels_loader = tf.get_collection("labels_loader")[0] 340 | if not meta_filename: 341 | init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 342 | else: 343 | init_op = tf.global_variables_initializer() 344 | 345 | sv = tf.train.Supervisor( 346 | graph, 347 | logdir=self.train_dir, 348 | init_op=init_op, 349 | is_chief=self.is_master, 350 | global_step=global_step, 351 | save_model_secs=60 * 60, 352 | save_summaries_secs=7200, 353 | saver=saver) 354 | 355 | with tf.Session(graph=graph) as sess: 356 | if not meta_filename: 357 | saver_0.restore(sess, FLAGS.checkpoint_file) 358 | saver_1.restore(sess, FLAGS.checkpoint_file) 359 | 360 | pp = os.path.join(FLAGS.train_dir, 'best_checkpoint_path') 361 | if os.path.exists(pp): 362 | with open(pp, 'r') as f: 363 | lines = f.readlines() 364 | if len(lines): 365 | line = lines[-1].strip().split(',')[1].strip() 366 | global_h1 = float(line) 367 | else: 368 | global_h1 = -1 369 | else: 370 | global_h1 = -1 371 | validation_steps = 10001 372 | logging.info("%s: Starting managed session.", task_as_string(self.task)) 373 | with sv.managed_session(config=self.config) as sess: 374 | try: 375 | logging.info("%s: Entering training loop.", task_as_string(self.task)) 376 | while (not sv.should_stop()) and (not self.max_steps_reached): 377 | 378 | batch_start_time = time.time() 379 | input_batch, label_batch = sess.run([inputs_loader, labels_loader]) 380 | 381 | # (batch_size, 12, 224, 224, 3) 382 | input_batch = np.transpose(input_batch, [1, 0, 2, 3, 4]) 383 | 384 | # list of (batch_size, 224, 224, 3) of size 12 385 | tw_inputs = np.split(input_batch, 12) 386 | tw_inputs = [np.reshape(x, [-1, 224, 224, 3]) for x in tw_inputs] 387 | s0_inputs = tw_inputs[:6] 388 | s1_inputs = tw_inputs[6:] 389 | 390 | # [(224, 224, 3), ..] -> [(14, 14, 512), ..] 391 | features_0 = [] 392 | for inp in s0_inputs: 393 | feat_vec = sess.run(feature_0, feed_dict={inputs: inp}) 394 | features_0.append(feat_vec) 395 | 396 | # [(224, 224, 3), ..] -> [(14, 14, 512), ..] 397 | features_1 = [] 398 | for inp in s1_inputs: 399 | feat_vec = sess.run(feature_1, feed_dict={inputs: inp}) 400 | features_1.append(feat_vec) 401 | 402 | # [(14, 14, 512), ..] -> [(7168,), ..] 403 | feats_for_aux = [] 404 | for feat in features_0: 405 | out = sess.run(aux_output, feed_dict={aux_feat_batch: feat}) 406 | feats_for_aux.append(out) 407 | 408 | # [(7168,), ..] -> [(21504,), (21504,)] (RGB stream) 409 | aux_fcs_0 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2], 410 | feats_for_aux[i+4]], axis=1) for i in range(2)] 411 | 412 | # [(14, 14, 512), ..] -> [(7168,), ..] 413 | feats_for_aux = [] 414 | for feat in features_1: 415 | out = sess.run(aux_output, feed_dict={aux_feat_batch: feat}) 416 | feats_for_aux.append(out) 417 | 418 | # [(7168,), ..] -> [(21504,), (21504,)] (RGB difference stream) 419 | aux_fcs_1 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2], 420 | feats_for_aux[i+4]], axis=1) for i in range(2)] 421 | 422 | _, global_step_val, predictions_val, labels_val, loss_val = sess.run([train_op, 423 | global_step, predictions, labels, loss], feed_dict={labels: label_batch, 424 | aux_fc_batch_0: aux_fcs_0[0], aux_fc_batch_1: aux_fcs_0[1], 425 | aux_fc_batch_2: aux_fcs_1[0], aux_fc_batch_3: aux_fcs_1[1]}) 426 | 427 | seconds_per_batch = time.time() - batch_start_time 428 | examples_per_second = labels_val.shape[0] / seconds_per_batch # TODO 429 | 430 | if self.max_steps and self.max_steps <= global_step_val: 431 | self.max_steps_reached = True 432 | 433 | if self.is_master and global_step_val % 10 == 0 and self.train_dir: 434 | eval_start_time = time.time() 435 | hit_at_one = eval_util.calculate_hit_at_one(predictions_val, labels_val) 436 | hit_at_five = eval_util.calculate_hit_at_five(predictions_val, labels_val) 437 | 438 | eval_end_time = time.time() 439 | eval_time = eval_end_time - eval_start_time 440 | 441 | logging.info("training step " + str(global_step_val) + " | Loss: " + ("%.2f" % loss_val) + 442 | " Examples/sec: " + ("%.2f" % examples_per_second) + " | Hit@1: " + ("%.2f" % hit_at_one) + 443 | " Hit@5: " + ("%.2f" % hit_at_five)) 444 | 445 | sv.summary_writer.add_summary( 446 | utils.MakeSummary("model/Training_Loss", loss_val), 447 | global_step_val) 448 | sv.summary_writer.add_summary( 449 | utils.MakeSummary("model/Training_Hit@1", hit_at_one), 450 | global_step_val) 451 | sv.summary_writer.add_summary( 452 | utils.MakeSummary("model/Training_Hit@5", hit_at_five), 453 | global_step_val) 454 | sv.summary_writer.add_summary( 455 | utils.MakeSummary("global_step/Examples/Second", 456 | examples_per_second), global_step_val) 457 | sv.summary_writer.flush() 458 | 459 | else: 460 | logging.info("training step " + str(global_step_val) + " | Loss: " + ("%.2f" % loss_val) + 461 | " Examples/sec: " + ("%.2f" % examples_per_second)) 462 | 463 | if global_step_val and not (global_step_val % validation_steps): 464 | f = open(pp, 'a+') 465 | p = os.path.join(FLAGS.train_dir, 'best_checkpoint') 466 | if not os.path.exists(p): 467 | os.makedirs(p) 468 | avg_h1 = evaluate(FLAGS.dataset, FLAGS.model, FLAGS.train_dir, 469 | FLAGS.dataset_dir, FLAGS.splits_dir, 1, FLAGS.batch_size, FLAGS.split_num) 470 | if avg_h1 > global_h1: 471 | global_h1 = avg_h1 472 | latest_checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir) 473 | f.write("%s, %f\n" % (latest_checkpoint, global_h1)) 474 | del_files = glob.glob(p + '/*') 475 | for d in del_files: 476 | os.remove(d) 477 | files = glob.glob(latest_checkpoint + '.*') 478 | for fn in files: 479 | shutil.copy(fn, p) 480 | f.close() 481 | 482 | except tf.errors.OutOfRangeError: 483 | logging.info("%s: Done training -- epoch limit reached.", 484 | task_as_string(self.task)) 485 | logging.info("%s: Exited training loop.", task_as_string(self.task)) 486 | sv.Stop() 487 | 488 | def main(unused_argv): 489 | 490 | cluster = None 491 | task_data = {"type": "master", "index": 0} 492 | task = type("TaskSpec", (object,), task_data) 493 | 494 | logging.set_verbosity(tf.logging.INFO) 495 | logging.info("%s: Tensorflow version: %s", 496 | task_as_string(task), tf.__version__) 497 | 498 | if not cluster or task.type == "master" or task.type == "worker": 499 | model = find_class_by_name(FLAGS.model, 500 | [models])() 501 | reader = getattr(data, FLAGS.dataset)(FLAGS.dataset_dir, FLAGS.splits_dir, 502 | FLAGS.num_epochs, FLAGS.batch_size, FLAGS.split_num) 503 | 504 | #model_exporter = export_model.ModelExporter( 505 | # model=model, 506 | # reader=reader) 507 | 508 | Trainer(cluster, task, FLAGS.train_dir, model, reader, 509 | FLAGS.log_device_placement, FLAGS.max_steps, 510 | FLAGS.export_model_steps).run(start_new_model=FLAGS.start_new_model) 511 | 512 | elif task.type == "ps": 513 | # Distributed server 514 | raise NotImplementedError() 515 | else: 516 | raise ValueError("%s: Invalid task_type: %s." % 517 | (task_as_string(task), task.type)) 518 | 519 | if __name__ == '__main__': 520 | app.run() 521 | -------------------------------------------------------------------------------- /scripts/preprocess.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | from PIL import Image 5 | 6 | from tensorflow import app 7 | from tensorflow import flags 8 | from tensorflow import logging 9 | 10 | from nturgbd import Joint, Reader 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | if __name__ == '__main__': 15 | flags.DEFINE_string("dataset_dir", "", \ 16 | "Path to base directory for skeleton files") 17 | flags.DEFINE_string("splits_dir", "", \ 18 | "Directory where train and test splits are stored") 19 | flags.DEFINE_string("output_dir", "", \ 20 | "Directory where TFRecord files are to be stored") 21 | 22 | flags.DEFINE_integer("split_num", 1, \ 23 | "The present train / test split to preprocess") 24 | 25 | flags.DEFINE_bool("is_training", True, \ 26 | "Whether the present split is for train or test") 27 | flags.DEFINE_bool("tfrecords", False, \ 28 | "Whether to create TFRecords or create images") 29 | 30 | def task_as_string(task): 31 | return "/job:%s/task:%s" % (task.type, task.index) 32 | 33 | def _write_to_tfrecords(task, 34 | reader, 35 | split, 36 | outdir='', 37 | train=True): 38 | def _float32_feature(value): 39 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 40 | 41 | def _int64_feature(value): 42 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 43 | 44 | if train: 45 | fpath = reader.train_splits[split] 46 | splitname = "train" 47 | else: 48 | fpath = reader.test_splits[split] 49 | splitname = "test" 50 | logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split) 51 | 52 | with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f: 53 | remove = f.readlines() 54 | remove = [x.strip() for x in remove] 55 | 56 | with open(fpath, 'r') as f: 57 | lines = f.readlines() 58 | files = [x.strip().split()[0] for x in lines] 59 | labels = [int(x.strip().split()[1]) for x in lines] 60 | n = 0; l = len(files); two_person = 0 61 | for fname, label in zip(files, labels): 62 | if not any(fname.split('.')[0] in x for x in remove): 63 | n += 1 64 | if not os.path.exists(outdir): 65 | os.makedirs(outdir) 66 | os.makedirs(os.path.join(outdir, splitname + str(split))) 67 | elif not os.path.exists(os.path.join(outdir, splitname + str(split))): 68 | os.makedirs(os.path.join(outdir, splitname + str(split))) 69 | 70 | tfrecord_file = os.path.join(outdir, splitname + str(split), fname + '.tfrecord') 71 | if not os.path.exists(tfrecord_file): 72 | writer = tf.python_io.TFRecordWriter(tfrecord_file) 73 | else: 74 | continue 75 | video = reader._read_skeleton_file(fname) 76 | skeletons_0, skeletons_1 = video._get_main_actor_skeletons() 77 | # logging.info("%s: Number of skeletons: %d", task_as_string(task), len(skeletons)) 78 | 79 | # Spatial Feature: Relative coordinates for each joint in the frame [300 x 900] 80 | features = np.zeros((300, 576), dtype=np.float32) 81 | two_person_action = True 82 | for i in skeletons_1: 83 | if i._is_zero_skeleton: 84 | two_person_action = False 85 | one_person_feat_len = 288 86 | for nn, skeleton in enumerate(skeletons_0): 87 | feat_len = 0 88 | joints = skeleton._get_joint_objects() 89 | assert len(joints) == 25 90 | for i in [5, 8, 12, 16]: 91 | joint = joints[i] 92 | x, y, z = joint._get_cartesian_coordinates() 93 | for j in range(len(joints)): 94 | if not j == i: 95 | joint_ = joints[j] 96 | x_, y_, z_ = joint_._get_cartesian_coordinates() 97 | r = x - x_; theta = y - y_; phi = z - z_ 98 | j = Joint(r, theta, phi) 99 | r, theta, phi = j._get_spherical_coordinates() 100 | features[nn, feat_len] = r; feat_len += 1 101 | features[nn, feat_len] = theta; feat_len += 1 102 | features[nn, feat_len] = phi; feat_len += 1 103 | if two_person_action: 104 | two_person += 1 105 | logging.info("%s: Two person Action", task_as_string(task)) 106 | for nn, skeleton in enumerate(skeletons_0): 107 | feat_len = 0 108 | joints_0 = skeleton._get_joint_objects() 109 | joints_1 = skeletons_1[nn]._get_joint_objects() 110 | assert len(joints_0) == 25; assert len(joints_1) == 25 111 | for i in [5, 8, 12, 16]: 112 | joint = joints_0[i] 113 | x, y, z = joint._get_cartesian_coordinates() 114 | for j in range(len(joints_1)): 115 | if not j == i: 116 | joint_ = joints_1[j] 117 | x_, y_, z_ = joint_._get_cartesian_coordinates() 118 | r = x - x_; theta = y - y_; phi = z - z_ 119 | j = Joint(r, theta, phi) 120 | r, theta, phi = j._get_spherical_coordinates() 121 | features[nn, one_person_feat_len+feat_len] = r; feat_len += 1 122 | features[nn, one_person_feat_len+feat_len] = theta; feat_len += 1 123 | features[nn, one_person_feat_len+feat_len] = phi; feat_len += 1 124 | feats_spatial = features 125 | feats_spatial = np.pad(feats_spatial, [[0, (300 - feats_spatial.shape[0])], [0, 0]], 'constant', constant_values=0) 126 | feats_spatial = np.hstack(feats_spatial) 127 | 128 | example = tf.train.Example(features=tf.train.Features(feature={ 129 | 'feature': _float32_feature(feats_spatial), 130 | 'label': _int64_feature(label)})) 131 | writer.write(example.SerializeToString()) 132 | writer.close() 133 | logging.info("%s: status: %d of %d done", task_as_string(task), n, l) 134 | return n, two_person 135 | 136 | def _write_to_images(task, 137 | reader, 138 | split, 139 | outdir='', 140 | train=True): 141 | if train: 142 | fpath = reader.train_splits[split] 143 | splitname = "train" 144 | else: 145 | fpath = reader.test_splits[split] 146 | splitname = "test" 147 | logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split) 148 | 149 | with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f: 150 | remove = f.readlines() 151 | remove = [x.strip() for x in remove] 152 | 153 | with open(fpath, 'r') as f: 154 | lines = f.readlines() 155 | files = [x.strip().split()[0] for x in lines] 156 | labels = [int(x.strip().split()[1]) for x in lines] 157 | n = 0; l = len(files); two_person = 0 158 | for fname, label in zip(files, labels): 159 | if not any(fname.split('.')[0] in x for x in remove): 160 | n += 1 161 | if not os.path.exists(outdir): 162 | os.makedirs(outdir) 163 | os.makedirs(os.path.join(outdir, splitname + str(split))) 164 | elif not os.path.exists(os.path.join(outdir, splitname + str(split))): 165 | os.makedirs(os.path.join(outdir, splitname + str(split))) 166 | 167 | image_dir = os.path.join(outdir, splitname + str(split), fname) 168 | if not os.path.exists(image_dir): 169 | os.makedirs(image_dir) 170 | else: 171 | if len(os.listdir(image_dir)) == 6: 172 | continue 173 | video = reader._read_skeleton_file(fname) 174 | skeletons_0, skeletons_1 = video._get_main_actor_skeletons() 175 | 176 | two_person_action = True 177 | for i in skeletons_1: 178 | if i._is_zero_skeleton: 179 | two_person_action = False 180 | 181 | if two_person_action: 182 | im_size = (2, len(skeletons_0), 48) 183 | else: 184 | im_size = (2, len(skeletons_0), 24) 185 | 186 | im_r = np.zeros(im_size); im_theta = np.zeros(im_size); im_phi = np.zeros(im_size) 187 | for nn, skeleton in enumerate(skeletons_0): 188 | joints = skeleton._get_joint_objects() 189 | assert len(joints) == 25 190 | im_num = 0 191 | for i in [0, 20]: 192 | feat_len = 0 193 | joint = joints[i] 194 | x, y, z = joint._get_cartesian_coordinates() 195 | for j in range(len(joints)): 196 | if not j == i: 197 | joint_ = joints[j] 198 | x_, y_, z_ = joint_._get_cartesian_coordinates() 199 | r = x - x_; theta = y - y_; phi = z - z_ 200 | im_r[im_num, nn, feat_len] = r; im_theta[im_num, nn, feat_len] = theta; im_phi[im_num, nn, feat_len] = phi 201 | feat_len += 1 202 | im_num += 1 203 | if two_person_action: 204 | processed = 24 205 | two_person += 1 206 | for nn, skeleton in enumerate(skeletons_0): 207 | joints_0 = skeleton._get_joint_objects() 208 | joints_1 = skeletons_1[nn]._get_joint_objects() 209 | assert len(joints_0) == 25; assert len(joints_1) == 25 210 | im_num = 0 211 | for i in [0, 20]: 212 | feat_len = processed 213 | joint = joints_0[i] 214 | x, y, z = joint._get_cartesian_coordinates() 215 | for j in range(len(joints_1)): 216 | if not j == i: 217 | joint_ = joints_1[j] 218 | x_, y_, z_ = joint_._get_cartesian_coordinates() 219 | r = x - x_; theta = y - y_; phi = z - z_ 220 | im_r[im_num, nn, feat_len] = r; im_theta[im_num, nn, feat_len] = theta; im_phi[im_num, nn, feat_len] = phi 221 | feat_len += 1 222 | im_num += 1 223 | 224 | count = 0 225 | for im in im_r: 226 | im += np.amin(im) 227 | im *= 255.0 / np.amax(im) 228 | im = np.repeat(im[:, :, np.newaxis], 3, axis=2) 229 | image = Image.fromarray(im.astype(np.uint8), 'RGB') 230 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 231 | image.save(path) 232 | count += 1 233 | for im in im_theta: 234 | im += np.amin(im) 235 | im *= 255.0 / np.amax(im) 236 | im = np.repeat(im[:, :, np.newaxis], 3, axis=2) 237 | image = Image.fromarray(im.astype(np.uint8), 'RGB') 238 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 239 | image.save(path) 240 | count += 1 241 | for im in im_phi: 242 | im += np.amin(im) 243 | im *= 255.0 / np.amax(im) 244 | im = np.repeat(im[:, :, np.newaxis], 3, axis=2) 245 | image = Image.fromarray(im.astype(np.uint8), 'RGB') 246 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 247 | image.save(path) 248 | count += 1 249 | logging.info("%s: status: %d of %d done", task_as_string(task), n, l) 250 | return n, two_person 251 | 252 | def _write_to_images_new(task, 253 | reader, 254 | split, 255 | outdir='', 256 | train=True): 257 | if train: 258 | fpath = reader.train_splits[split] 259 | splitname = "train" 260 | else: 261 | fpath = reader.test_splits[split] 262 | splitname = "test" 263 | logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split) 264 | 265 | with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f: 266 | remove = f.readlines() 267 | remove = [x.strip() for x in remove] 268 | 269 | with open(fpath, 'r') as f: 270 | lines = f.readlines() 271 | files = [x.strip().split()[0] for x in lines] 272 | labels = [int(x.strip().split()[1]) for x in lines] 273 | n = 0; l = len(files); two_person = 0 274 | for fname, label in zip(files, labels): 275 | if not any(fname.split('.')[0] in x for x in remove): 276 | n += 1 277 | if not os.path.exists(outdir): 278 | os.makedirs(outdir) 279 | os.makedirs(os.path.join(outdir, splitname + str(split))) 280 | elif not os.path.exists(os.path.join(outdir, splitname + str(split))): 281 | os.makedirs(os.path.join(outdir, splitname + str(split))) 282 | 283 | image_dir = os.path.join(outdir, splitname + str(split), fname) 284 | if not os.path.exists(image_dir): 285 | os.makedirs(image_dir) 286 | else: 287 | if len(os.listdir(image_dir)) == 6: 288 | continue 289 | video = reader._read_skeleton_file(fname) 290 | skeletons_0, skeletons_1 = video._get_main_actor_skeletons() 291 | 292 | two_person_action = True 293 | for i in skeletons_1: 294 | if i._is_zero_skeleton: 295 | two_person_action = False 296 | 297 | L = len(skeletons_0) 298 | if two_person_action: 299 | im_size = (2, L * 3, 48 * 3) 300 | im_size_d = (2, (L-1) * 3, 48 * 3) 301 | else: 302 | im_size = (2, L * 3, 24 * 3) 303 | im_size_d = (2, (L-1) * 3, 24 * 3) 304 | 305 | im_r = np.zeros(im_size); im_theta = np.zeros(im_size); im_phi = np.zeros(im_size) 306 | im_r_d = np.zeros(im_size_d); im_theta_d = np.zeros(im_size_d); im_phi_d = np.zeros(im_size_d) 307 | for nn, skeleton in enumerate(skeletons_0): 308 | joints = skeleton._get_joint_objects() 309 | assert len(joints) == 25 310 | im_num = 0 311 | for i in [0, 20]: 312 | feat_len = 0 313 | joint = joints[i] 314 | x, y, z = joint._get_cartesian_coordinates() 315 | for j in range(len(joints)): 316 | if not j == i: 317 | joint_ = joints[j] 318 | x_, y_, z_ = joint_._get_cartesian_coordinates() 319 | r = np.full((3,3), (x - x_)) 320 | theta = np.full((3,3), (y - y_)) 321 | phi = np.full((3,3), (z - z_)) 322 | im_r[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = r 323 | im_theta[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = theta 324 | im_phi[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = phi 325 | feat_len += 1 326 | im_num += 1 327 | if two_person_action: 328 | processed = 24 329 | two_person += 1 330 | for nn, skeleton in enumerate(skeletons_0): 331 | joints_0 = skeleton._get_joint_objects() 332 | joints_1 = skeletons_1[nn]._get_joint_objects() 333 | assert len(joints_0) == 25; assert len(joints_1) == 25 334 | im_num = 0 335 | for i in [0, 20]: 336 | feat_len = processed 337 | joint = joints_0[i] 338 | x, y, z = joint._get_cartesian_coordinates() 339 | for j in range(len(joints_1)): 340 | if not j == i: 341 | joint_ = joints_1[j] 342 | x_, y_, z_ = joint_._get_cartesian_coordinates() 343 | r = np.full((3,3), (x - x_)) 344 | theta = np.full((3,3), (y - y_)) 345 | phi = np.full((3,3), (z - z_)) 346 | im_r[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = r 347 | im_theta[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = theta 348 | im_phi[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = phi 349 | feat_len += 1 350 | im_num += 1 351 | for i in range(2): 352 | im1 = im_r[i] 353 | im2 = im_theta[i] 354 | im3 = im_phi[i] 355 | for nn in range(1, len(im1)/3): 356 | f1 = im1[(nn-1)*3:nn*3]; f2 = im1[nn*3:(nn+1)*3] 357 | im_r_d[i, (nn-1)*3:nn*3] = (f1 - f2) 358 | f1 = im2[(nn-1)*3:nn*3]; f2 = im2[nn*3:(nn+1)*3] 359 | im_theta_d[i, (nn-1)*3:nn*3] = (f1 - f2) 360 | f1 = im3[(nn-1)*3:nn*3]; f2 = im3[nn*3:(nn+1)*3] 361 | im_phi_d[i, (nn-1)*3:nn*3] = (f1 - f2) 362 | 363 | count = 0 364 | for im in im_r: 365 | im += np.amin(im) 366 | im *= 255.0 / np.amax(im) 367 | image = Image.fromarray(im.astype(np.uint8), 'L') 368 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 369 | image.save(path) 370 | count += 1 371 | for im in im_theta: 372 | im += np.amin(im) 373 | im *= 255.0 / np.amax(im) 374 | image = Image.fromarray(im.astype(np.uint8), 'L') 375 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 376 | image.save(path) 377 | count += 1 378 | for im in im_phi: 379 | im += np.amin(im) 380 | im *= 255.0 / np.amax(im) 381 | image = Image.fromarray(im.astype(np.uint8), 'L') 382 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 383 | image.save(path) 384 | count += 1 385 | for im in im_r_d: 386 | im += np.amin(im) 387 | im *= 255.0 / np.amax(im) 388 | image = Image.fromarray(im.astype(np.uint8), 'L') 389 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 390 | image.save(path) 391 | count += 1 392 | for im in im_theta_d: 393 | im += np.amin(im) 394 | im *= 255.0 / np.amax(im) 395 | image = Image.fromarray(im.astype(np.uint8), 'L') 396 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 397 | image.save(path) 398 | count += 1 399 | for im in im_phi_d: 400 | im += np.amin(im) 401 | im *= 255.0 / np.amax(im) 402 | image = Image.fromarray(im.astype(np.uint8), 'L') 403 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 404 | image.save(path) 405 | count += 1 406 | logging.info("%s: status: %d of %d done", task_as_string(task), n, l) 407 | return n, two_person 408 | 409 | def _write_to_frames(task, 410 | reader, 411 | split, 412 | outdir='', 413 | train=True): 414 | if train: 415 | fpath = reader.train_splits[split] 416 | splitname = "train" 417 | else: 418 | fpath = reader.test_splits[split] 419 | splitname = "test" 420 | logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split) 421 | 422 | with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f: 423 | remove = f.readlines() 424 | remove = [x.strip() for x in remove] 425 | 426 | with open(fpath, 'r') as f: 427 | lines = f.readlines() 428 | files = [x.strip().split()[0] for x in lines] 429 | labels = [int(x.strip().split()[1]) for x in lines] 430 | n = 0; l = len(files); two_person = 0 431 | for fname, label in zip(files, labels): 432 | if not any(fname.split('.')[0] in x for x in remove): 433 | n += 1 434 | if not os.path.exists(outdir): 435 | os.makedirs(outdir) 436 | os.makedirs(os.path.join(outdir, splitname + str(split))) 437 | elif not os.path.exists(os.path.join(outdir, splitname + str(split))): 438 | os.makedirs(os.path.join(outdir, splitname + str(split))) 439 | 440 | image_dir = os.path.join(outdir, splitname + str(split), fname) 441 | if not os.path.exists(image_dir): 442 | os.makedirs(image_dir) 443 | else: 444 | if len(os.listdir(image_dir)) == 6: 445 | continue 446 | video = reader._read_skeleton_file(fname) 447 | skeletons_0, skeletons_1 = video._get_main_actor_skeletons() 448 | 449 | two_person_action = True 450 | for i in skeletons_1: 451 | if i._is_zero_skeleton: 452 | two_person_action = False 453 | 454 | if two_person_action: 455 | im_size = (len(skeletons_0), 25, 48, 3) 456 | else: 457 | im_size = (len(skeletons_0), 25, 24, 3) 458 | 459 | im = np.zeros(im_size) 460 | im_num = 0 461 | for nn, skeleton in enumerate(skeletons_0): 462 | joints = skeleton._get_joint_objects() 463 | assert len(joints) == 25 464 | for i in range(len(joints)): 465 | feat_len = 0 466 | joint = joints[i] 467 | x, y, z = joint._get_cartesian_coordinates() 468 | for j in range(len(joints)): 469 | if not j == i: 470 | joint_ = joints[j] 471 | x_, y_, z_ = joint_._get_cartesian_coordinates() 472 | r = x - x_; theta = y - y_; phi = z - z_ 473 | j = Joint(r, theta, phi) 474 | r, theta, phi = j._get_spherical_coordinates() 475 | im[im_num, i, feat_len, 0] = r 476 | im[im_num, i, feat_len, 1] = theta 477 | im[im_num, i, feat_len, 2] = phi 478 | feat_len += 1 479 | im_num += 1 480 | if two_person_action: 481 | processed = 24 482 | two_person += 1 483 | im_num = 0 484 | for nn, skeleton in enumerate(skeletons_0): 485 | joints_0 = skeleton._get_joint_objects() 486 | joints_1 = skeletons_1[nn]._get_joint_objects() 487 | assert len(joints_0) == 25; assert len(joints_1) == 25 488 | for i in range(len(joints)): 489 | if not j == i: 490 | feat_len = processed 491 | joint = joints_0[i] 492 | x, y, z = joint._get_cartesian_coordinates() 493 | for j in range(len(joints_1)): 494 | if not j == i: 495 | joint_ = joints_1[j] 496 | x_, y_, z_ = joint_._get_cartesian_coordinates() 497 | r = x - x_; theta = y - y_; phi = z - z_ 498 | j = Joint(r, theta, phi) 499 | r, theta, phi = j._get_spherical_coordinates() 500 | im[im_num, i, processed+feat_len, 0] = r 501 | im[im_num, i, processed+feat_len, 1] = theta 502 | im[im_num, i, processed+feat_len, 2] = phi 503 | feat_len += 1 504 | im_num += 1 505 | count = 0 506 | for img in im: 507 | img += np.amin(img) 508 | img *= 255.0 / np.amax(img) 509 | image = Image.fromarray(img.astype(np.uint8), 'RGB') 510 | path = os.path.join(image_dir, 'img_%.4d.jpg' % count) 511 | image.save(path) 512 | count += 1 513 | logging.info("%s: status: %d of %d done", task_as_string(task), n, l) 514 | break 515 | return n, two_person 516 | 517 | def main(unused_argv): 518 | task_data = {"type": "master", "index": 0} 519 | task = type("TaskSpec", (object,), task_data) 520 | 521 | logging.set_verbosity(tf.logging.INFO) 522 | logging.info("%s: Tensorflow version: %s", 523 | task_as_string(task), tf.__version__) 524 | 525 | if FLAGS.dataset_dir == '': 526 | logging.info("%s: No dataset directory provided. " 527 | "Please set the --dataset_dir flag when running the script.", task_as_string(task)) 528 | return EXIT_ERROR 529 | if FLAGS.splits_dir == '': 530 | logging.info("%s: No split files directory provided. " 531 | "Please set the --splits_dir flag when running the script.", task_as_string(task)) 532 | return EXIT_ERROR 533 | if FLAGS.output_dir == '': 534 | logging.info("%s: No target directory for TFRecords provided. " 535 | "Please set the --tfrecords_dir flag when running the script.", task_as_string(task)) 536 | return EXIT_ERROR 537 | 538 | logging.info("%s: Using >\n" 539 | " Dataset directory: %s\n" 540 | " Split files directory: %s\n" 541 | " Directory to store the output: %s\n" 542 | " Split number: %d\n" 543 | " Train/Test split: %s\n", task_as_string(task), 544 | FLAGS.dataset_dir, FLAGS.splits_dir, FLAGS.output_dir, 545 | FLAGS.split_num, "train" if FLAGS.is_training else "test") 546 | 547 | data_dir = FLAGS.dataset_dir 548 | split_dir = FLAGS.splits_dir 549 | reader = Reader(dataset_dir=data_dir, splits_dir=split_dir) 550 | 551 | if FLAGS.tfrecords: 552 | n, two_person = _write_to_tfrecords(task, 553 | reader=reader, 554 | split=FLAGS.split_num, 555 | outdir=FLAGS.output_dir, 556 | train=FLAGS.is_training) 557 | else: 558 | n, two_person = _write_to_images_new(task, 559 | reader=reader, 560 | split=FLAGS.split_num, 561 | outdir=FLAGS.output_dir, 562 | train=FLAGS.is_training) 563 | 564 | logging.info("%s: Converting to output format done! Total files: %d, Two person actions: %d. Exiting.", task_as_string(task), n, two_person) 565 | 566 | if __name__ == '__main__': 567 | app.run() 568 | --------------------------------------------------------------------------------