├── models
    ├── NTURGBD
    │   ├── __init__.py
    │   ├── nturgbd_rnn.py
    │   └── bnlstm.py
    ├── __init__.py
    ├── HybridModel
    │   ├── __init__.py
    │   ├── hybrid.py
    │   ├── bnlstm.py
    │   ├── vgg.py
    │   └── inception_resnet_v2.py
    └── models.py
├── README.md
├── scripts
    ├── __init__.py
    ├── do_test.sh
    ├── do_train.sh
    ├── preprocess
    ├── nturgbd.py
    └── preprocess.py
├── data
    ├── __init__.py
    └── loaders.py
├── utils
    ├── __init__.py
    ├── losses.py
    ├── export_model.py
    ├── eval_util.py
    └── utils.py
├── run.sh
├── LICENSE
├── .gitignore
├── test.py
└── train.py


/models/NTURGBD/__init__.py:
--------------------------------------------------------------------------------
1 | from nturgbd_rnn import *
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 3DActionRecognition
2 | 3D skeleton-based human action recognition (for WACV)
3 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | import nturgbd, preprocess
2 | 
3 | __all__ = ['nturgbd', 'preprocess']
4 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from models import NTURGBD_RNN, Hybrid
2 | 
3 | __all__ = ['NTURGBD_RNN', 'Hybrid']
4 | 


--------------------------------------------------------------------------------
/models/HybridModel/__init__.py:
--------------------------------------------------------------------------------
1 | from inception_resnet_v2 import *
2 | from bnlstm import *
3 | from hybrid import *
4 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | from loaders import NTURGBD, HybridModelReader
2 | 
3 | __all__ = ['NTURGBD', 'HybridModelReader']
4 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | import utils, eval_util, export_model, losses
2 | 
3 | __all__ = ['utils', 'eval_util', 'export_model', 'losses']
4 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python train.py --train_dir=/home/procastinator/nturgbd_twostreamcnn --dataset_dir=/home/procastinator/nturgb+d_images_new \
4 |     --splits_dir=/home/procastinator/NTU_data --split_num 2 --checkpoint_file=/home/procastinator/pretrainedCheckpoints/vgg_19.ckpt
5 | 


--------------------------------------------------------------------------------
/scripts/do_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | ./preprocess "/home/procastinator/NTU_data/nturgb+d_skeletons" "/home/procastinator/NTU_data" "/home/procastinator/nturgb+d_images" 1 False
4 | ./preprocess "/home/procastinator/NTU_data/nturgb+d_skeletons" "/home/procastinator/NTU_data" "/home/procastinator/nturgb+d_images" 2 False
5 | 


--------------------------------------------------------------------------------
/scripts/do_train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | ./preprocess "/users/kalpit.t/NTU_data/nturgb+d_skeletons" "/users/kalpit.t/NTU_data" "/Pulsar2/mohit.jain/kalpit_data/nturgb+d_images_0" 1 True
4 | ./preprocess "/users/kalpit.t/NTU_data/nturgb+d_skeletons" "/users/kalpit.t/NTU_data" "/Pulsa2/mohit.jain/kalpit_data/nturgb+d_images_0" 2 True
5 | 


--------------------------------------------------------------------------------
/scripts/preprocess:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Preprocess the data
 4 | if [ $# -lt 3 ]
 5 | then
 6 |   echo "Usage: ./preprocess <path_to_dataset> <path_to_split_files> <path_to_output_dir>"
 7 |   exit 1
 8 | fi
 9 | 
10 | if [ $# -eq 3 ]
11 | then
12 |   `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3`
13 | elif [ $# -eq 4 ]
14 | then
15 |   `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3 --split_num=$4`
16 | elif [ $# -eq 5 ]
17 | then
18 |   `python preprocess.py --dataset_dir=$1 --splits_dir=$2 --output_dir=$3 --split_num=$4 --is_training=$5`
19 | fi
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Kalpit Thakkar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | slim = tf.contrib.slim
 3 | framework = tf.contrib.framework
 4 | 
 5 | from NTURGBD import *
 6 | from HybridModel import *
 7 | 
 8 | class NTURGBD_RNN(object):
 9 | 
10 |     def __init__(self):
11 |         self.name = "NTURGBD_RNN"
12 | 
13 |     def create_model(self, inputs, num_classes, labels, **unused_params):
14 |         model = nturgbd_rnn.SkeletonHRNNet()
15 |         output = model.create_model(inputs, num_classes, labels)
16 | 
17 |         return output
18 | 
19 | class Hybrid(object):
20 | 
21 |     def __init__(self):
22 |         self.name = "HybridModel"
23 | 
24 |     def create_feature_model(self, inputs, scope='', is_training=True, **unused_params):
25 |         feature, restore_vars, tvars = hybrid.get_pretrained_model_feats(inputs, scope, is_training)
26 |         return feature, restore_vars, tvars
27 | 
28 |     def create_aux_model(self, inputs, is_training=True, **unused_params):
29 |         outputs, tvars = hybrid.get_temporal_mean_pooled_feats(inputs, is_training)
30 |         return outputs, tvars
31 | 
32 |     def create_logits_model(self, inputs, num_classes, is_training=True, scope="logits", reuse=None):
33 |         outputs, tvars = hybrid.get_classifier_logits(inputs, num_classes, is_training=is_training, lscope=scope, reuse=reuse)
34 |         return outputs, tvars
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # Run script
104 | run.sh
105 | 
106 | # checkpoints
107 | *.ckpt*
108 | 


--------------------------------------------------------------------------------
/models/HybridModel/hybrid.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | import tensorflow.contrib.framework as framework
 4 | import tensorflow.contrib.rnn as rnn
 5 | 
 6 | from inception_resnet_v2 import *
 7 | from vgg import *
 8 | from bnlstm import *
 9 | 
10 | def length(sequence):
11 |     used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
12 |     length = tf.reduce_sum(used, 1)
13 |     length = tf.cast(length, tf.int32)
14 |     return length
15 | 
16 | def common_arg_scope(weight_decay=0.00004,
17 |                      batch_norm_decay=0.9997,
18 |                      batch_norm_epsilon=0.001):
19 |   # Set weight_decay for weights in conv2d and fully_connected layers.
20 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
21 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
22 |                       biases_regularizer=slim.l2_regularizer(weight_decay)):
23 | 
24 |     batch_norm_params = {
25 |         'decay': batch_norm_decay,
26 |         'epsilon': batch_norm_epsilon,
27 |     }
28 |     # Set activation_fn and parameters for batch_norm.
29 |     with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,
30 |                         normalizer_fn=slim.batch_norm,
31 |                         normalizer_params=batch_norm_params) as scope:
32 |       return scope
33 | 
34 | def last_relevant(output, length):
35 |     batch_size = tf.shape(output)[0]
36 |     max_length = tf.shape(output)[1]
37 |     out_size = int(output.get_shape()[2])
38 |     index = tf.range(0, batch_size) * max_length + (length - 1)
39 |     flat = tf.reshape(output, [-1, out_size])
40 |     relevant = tf.gather(flat, index)
41 |     return relevant
42 | 
43 | def get_pretrained_model_feats(inputs, scopename='', is_training=True):
44 |     # VGG 19 for feature extraction
45 |     scope = vgg_arg_scope()
46 |     with slim.arg_scope(scope):
47 |         with tf.variable_scope(scopename):
48 |             _, end_points = vgg_19(inputs)
49 |             features = end_points[scopename+'/vgg_19/conv5/conv5_1']           # 14 x 14 x 512
50 |             restore_vars = framework.get_variables(scopename)
51 | 
52 |     tvars = []
53 | 
54 |     return features, restore_vars, tvars
55 | 
56 | def get_temporal_mean_pooled_feats(inputs, is_training=True):
57 |     # Temporal Average pooling
58 |     with tf.variable_scope('temporal_mean_pool'):
59 |         pooled_features = slim.avg_pool2d(inputs, (14, 1), stride=1, padding='VALID', scope='AvgPool_8x1')
60 |         features = slim.flatten(pooled_features)
61 |     tvars = framework.get_variables('temporal_mean_pool')
62 | 
63 |     return features, tvars
64 | 
65 | def get_classifier_logits(inputs, num_classes, is_training=True, lscope='', reuse=None):
66 |     # Primary Classifier
67 |     scope = common_arg_scope()
68 |     with slim.arg_scope(scope):
69 |         with tf.variable_scope(lscope, reuse=reuse):
70 |             plogits = slim.fully_connected(inputs, 2048, activation_fn=tf.nn.relu, scope='PreLogits')
71 |             dropout = slim.dropout(plogits, 0.8, is_training=is_training, scope='Logits_dropout')
72 |             logits = slim.fully_connected(dropout, num_classes, activation_fn=None, scope='Final_Logits')
73 | 
74 |     tvars = framework.get_variables(lscope)
75 |     return logits, tvars
76 | 


--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
 1 | """Provides definitions for non-regularized training or test losses."""
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | class BaseLoss(object):
 6 |     """Inherit from this class when implementing new losses."""
 7 | 
 8 |     def calculate_loss(self, unused_predictions, unused_labels, **unused_params):
 9 |         """Calculates the average loss of the examples in a mini-batch.
10 | 
11 |          Args:
12 |           unused_predictions: a 2-d tensor storing the prediction scores, in which
13 |             each row represents a sample in the mini-batch and each column
14 |             represents a class.
15 |           unused_labels: a 2-d tensor storing the labels, which has the same shape
16 |             as the unused_predictions. The labels must be in the range of 0 and 1.
17 |           unused_params: loss specific parameters.
18 | 
19 |         Returns:
20 |           A scalar loss tensor.
21 |         """
22 |         raise NotImplementedError()
23 | 
24 | 
25 | class CrossEntropyLoss(BaseLoss):
26 |     """Calculate the cross entropy loss between the predictions and labels.
27 |     """
28 | 
29 |     def calculate_loss(self, predictions, labels, **unused_params):
30 |         with tf.name_scope("loss_xent"):
31 |             epsilon = 10e-6
32 |             float_labels = tf.cast(labels, tf.float32)
33 |             cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
34 |                 1 - float_labels) * tf.log(1 - predictions + epsilon)
35 |             cross_entropy_loss = tf.negative(cross_entropy_loss)
36 |             return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
37 | 
38 | 
39 | class HingeLoss(BaseLoss):
40 |     """Calculate the hinge loss between the predictions and labels.
41 | 
42 |     Note the subgradient is used in the backpropagation, and thus the optimization
43 |     may converge slower. The predictions trained by the hinge loss are between -1
44 |     and +1.
45 |     """
46 | 
47 |     def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
48 |         with tf.name_scope("loss_hinge"):
49 |           float_labels = tf.cast(labels, tf.float32)
50 |           all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
51 |           all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
52 |           sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
53 |           hinge_loss = tf.maximum(
54 |               all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
55 |           return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
56 | 
57 | 
58 | class SoftmaxLoss(BaseLoss):
59 |     """Calculate the softmax loss between the predictions and labels.
60 | 
61 |     The function calculates the loss in the following way: first we feed the
62 |     predictions to the softmax activation function and then we calculate
63 |     the minus linear dot product between the logged softmax activations and the
64 |     normalized ground truth label.
65 | 
66 |     It is an extension to the one-hot label. It allows for more than one positive
67 |     labels for each sample.
68 |     """
69 | 
70 |     def calculate_loss(self, predictions, labels, **unused_params):
71 |         with tf.name_scope("loss_softmax"):
72 |           epsilon = 10e-8
73 |           float_labels = tf.cast(labels, tf.float32)
74 |           # l1 normalization (labels are no less than 0)
75 |           label_rowsum = tf.maximum(
76 |               tf.reduce_sum(float_labels, 1, keep_dims=True),
77 |               epsilon)
78 |           norm_float_labels = tf.div(float_labels, label_rowsum)
79 |           softmax_outputs = tf.nn.softmax(predictions)
80 |           softmax_loss = tf.negative(tf.reduce_sum(
81 |               tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
82 |         return tf.reduce_mean(softmax_loss)
83 | 


--------------------------------------------------------------------------------
/data/loaders.py:
--------------------------------------------------------------------------------
  1 | import os, argparse
  2 | import threading
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.python.framework import ops
  6 | from tensorflow.python.framework import dtypes
  7 | 
  8 | gfile = tf.gfile
  9 | slim = tf.contrib.slim
 10 | 
 11 | class NTURGBD(object):
 12 | 
 13 |     def __init__(self, dataset_dir, split_dir, num_epochs, batch_size, split=1):
 14 |         self.dataset_dir = dataset_dir
 15 |         self.splits_dir = split_dir
 16 |         self.num_splits = 2
 17 |         self.num_classes = 60
 18 |         self.num_epochs = num_epochs
 19 |         self.batch_size = batch_size
 20 |         self.present_split = split
 21 | 
 22 |         self.train_split_files = {'1': 'train1', '2': 'train2'}
 23 |         self.val_split_files = {'1': 'val1', '2': 'val2'}
 24 |         self.test_split_files = {'1': 'test1', '2': 'test2'}
 25 |         f = open(os.path.join(split_dir, 'faulty_skeletons'), 'r')
 26 |         self.faulty_samples = f.readlines()
 27 |         f.close()
 28 | 
 29 |     def _read_filelist(self, split='1', train=True, **kwargs):
 30 |         if train:
 31 |             files = gfile.ListDirectory(os.path.join(self.dataset_dir, self.train_split_files[split]))
 32 |             files = [os.path.join(self.dataset_dir, self.train_split_files[split], x) for x in files]
 33 |         else:
 34 |             files = gfile.ListDirectory(os.path.join(self.dataset_dir, self.test_split_files[split]))
 35 |             files = [os.path.join(self.dataset_dir, self.test_split_files[split], x) for x in files]
 36 |         return files
 37 | 
 38 |     def _read_samples(self, input_queue):
 39 |         reader = tf.TFRecordReader()
 40 |         _, serialized_example = reader.read(input_queue)
 41 | 
 42 |         features = tf.parse_single_example(
 43 |                 serialized_example,
 44 |                 features={
 45 |                     'feature': tf.FixedLenFeature([172800], tf.float32),
 46 |                     'label': tf.FixedLenFeature([], tf.int64)})
 47 | 
 48 |         label = tf.cast(features['label'], tf.int64)
 49 |         feat_vec = tf.reshape(features['feature'], [300, 576])
 50 | 
 51 |         return feat_vec, label
 52 | 
 53 | class HybridModelReader(object):
 54 | 
 55 |     def __init__(self, dataset_dir, split_dir, num_epochs, batch_size, split=1):
 56 |         self.dataset_dir = dataset_dir
 57 |         self.splits_dir = split_dir
 58 |         self.num_splits = 2
 59 |         self.num_classes = 60
 60 |         self.num_epochs = num_epochs
 61 |         self.batch_size = batch_size
 62 |         self.present_split = split
 63 | 
 64 |         self.train_split_files = {'1': 'train1', '2': 'train2'}
 65 |         self.test_split_files = {'1': 'test1', '2': 'test2'}
 66 | 
 67 |     def _read_filelist(self, split='1', train=True, **kwargs):
 68 |         files = []; labels = []
 69 |         if train:
 70 |             dirname = self.train_split_files[split]
 71 |         else:
 72 |             dirname = self.test_split_files[split]
 73 | 
 74 |         dirs = gfile.ListDirectory(os.path.join(self.dataset_dir, dirname))
 75 |         for d in dirs:
 76 |             label = long(d.split('.')[0][-3:])
 77 |             dpath = os.path.join(self.dataset_dir, dirname, d)
 78 |             dfiles = sorted(os.listdir(dpath))
 79 |             flist = []
 80 |             for fn in dfiles:
 81 |                 flist.append(os.path.join(dpath, fn))
 82 |             files.append(flist)
 83 |             labels.append(label)
 84 |         return files, labels
 85 | 
 86 |     def _read_samples(self, input_queue):
 87 |         file_list = tf.split(input_queue[0], [1]*12)
 88 |         images = []
 89 |         for fn in file_list:
 90 |             file_content = tf.read_file(fn[0])
 91 |             image = tf.image.decode_jpeg(file_content, channels=3)
 92 |             image = tf.image.resize_images(image, (224, 224))
 93 |             image = tf.cast(image, tf.float32)
 94 |             images.append(image)
 95 | 
 96 |         label = input_queue[1]
 97 |         label = tf.cast(label - 1, tf.int64)
 98 |         image = ops.convert_to_tensor(images, dtypes.float32)
 99 | 
100 |         return image, label
101 | 


--------------------------------------------------------------------------------
/utils/export_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import tensorflow as tf
 4 | import tensorflow.contrib.slim as slim
 5 | 
 6 | from tensorflow.python.saved_model import builder as saved_model_builder
 7 | from tensorflow.python.saved_model import signature_constants
 8 | from tensorflow.python.saved_model import signature_def_utils
 9 | from tensorflow.python.saved_model import tag_constants
10 | from tensorflow.python.saved_model import utils as saved_model_utils
11 | from tensorflow.python.framework import ops
12 | from tensorflow.python.framework import dtypes
13 | 
14 | _TOP_PREDICTIONS_IN_OUTPUT = 20
15 | 
16 | class ModelExporter(object):
17 | 
18 |     def __init__(self, model, reader):
19 |         self.model = model
20 |         self.reader = reader
21 | 
22 |         with tf.Graph().as_default() as graph:
23 |             self.inputs, self.outputs = self.build_inputs_and_outputs()
24 |             self.graph = graph
25 |             self.saver = tf.train.Saver(tf.trainable_variables(), sharded=True)
26 | 
27 |     def export_model(self, model_dir, global_step_val, last_checkpoint):
28 |         """ Exports the model so that it can used for batch predictions """
29 | 
30 |         with self.graph.as_default():
31 |             with tf.Session() as session:
32 |                 session.run(tf.global_variables_initializer())
33 |                 self.saver.restore(session, last_checkpoint)
34 | 
35 |                 signature = signature_def_utils.build_signature_def(
36 |                     inputs=self.inputs,
37 |                     outputs=self.outputs,
38 |                     method_name=signature_constants.PREDICT_METHOD_NAME)
39 | 
40 |                 signature_map = {signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
41 |                                 signature}
42 | 
43 |                 model_builder = saved_model_builder.SavedModelBuilder(model_dir)
44 |                 model_builder.add_meta_graph_and_variables(session,
45 |                     tags=[tag_constants.SERVING],
46 |                     signature_def_map=signature_map,
47 |                     clear_devices=True)
48 |                 model_builder.save()
49 | 
50 |     def build_inputs_and_outputs(self):
51 |         split_num = self.reader.present_split
52 | 
53 |         top_indices_output, top_predictions_output = (
54 |               self.build_prediction_graph(split_num))
55 | 
56 |         inputs = {"example_bytes":
57 |                   saved_model_utils.build_tensor_info(tf.constant(split_num))}
58 | 
59 |         outputs = {
60 |             "class_indexes": saved_model_utils.build_tensor_info(top_indices_output),
61 |             "predictions": saved_model_utils.build_tensor_info(top_predictions_output)}
62 | 
63 |         return inputs, outputs
64 | 
65 |     def build_prediction_graph(self, split):
66 |         files, labels = self.reader._read_filelist(split=split)
67 | 
68 |         files = ops.convert_to_tensor(files, dtypes.string)
69 |         labels = ops.convert_to_tensor(labels, dtypes.int64)
70 | 
71 |         input_queue = tf.train.slice_input_producer(
72 |                             [files, labels],
73 |                             num_epochs = self.reader.num_epochs,
74 |                             shuffle = True)
75 |         image, label = self.reader._read_samples(input_queue)
76 |         image = tf.image.resize_images(image, (299, 299))
77 | 
78 |         image_loader, label_loader = tf.train.shuffle_batch(
79 |                 [image, label],
80 |                 batch_size = self.reader.batch_size,
81 |                 capacity = 5 * self.reader.batch_size,
82 |                 min_after_dequeue = self.reader.batch_size)
83 | 
84 |         #with tf.variable_scope("tower"):
85 |         result = self.model.create_model(
86 |           image_loader,
87 |           self.reader.num_classes,
88 |           label_loader,
89 |           is_training=False)
90 | 
91 |         for variable in slim.get_model_variables():
92 |             tf.summary.histogram(variable.op.name, variable)
93 | 
94 |         predictions = result["predictions"]
95 | 
96 |         top_predictions, top_indices = tf.nn.top_k(predictions,
97 |           _TOP_PREDICTIONS_IN_OUTPUT)
98 |         return top_indices, top_predictions
99 | 


--------------------------------------------------------------------------------
/utils/eval_util.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import numpy
  3 | 
  4 | from tensorflow.python.platform import gfile
  5 | 
  6 | def flatten(l):
  7 |     """ Merges a list of lists into a single list. """
  8 |     return [item for sublist in l for item in sublist]
  9 | 
 10 | def calculate_hit_at_one(predictions, actuals):
 11 |     """Performs a local (numpy) calculation of the hit at one.
 12 | 
 13 |     Args:
 14 |     predictions: Matrix containing the outputs of the model.
 15 |       Dimensions are 'batch' x 'num_classes'.
 16 |     actuals: Matrix containing the ground truth labels.
 17 |       Dimensions are 'batch'.
 18 | 
 19 |     Returns:
 20 |     float: The average hit at one across the entire batch.
 21 |     """
 22 |     top_prediction = numpy.argmax(predictions, 1)
 23 |     hits = [1 if (x == y) else 0 for x,y in zip(actuals, top_prediction)]
 24 |     return numpy.average(hits)
 25 | 
 26 | def calculate_hit_at_five(predictions, actuals):
 27 |     """Performs a local (numpy) calculation of the hit at one.
 28 | 
 29 |     Args:
 30 |     predictions: Matrix containing the outputs of the model.
 31 |       Dimensions are 'batch' x 'num_classes'.
 32 |     actuals: Matrix containing the ground truth labels.
 33 |       Dimensions are 'batch'.
 34 | 
 35 |     Returns:
 36 |     float: The average hit at five across the entire batch.
 37 |     """
 38 |     top_five_prediction = numpy.stack([numpy.argsort(predictions)[x][-5:] for x in range(predictions.shape[0])], 0)
 39 |     hits = [1 if (x in y) else 0 for x,y in zip(actuals, top_five_prediction)]
 40 |     return numpy.average(hits)
 41 | 
 42 | class EvaluationMetrics(object):
 43 |   """A class to store the evaluation metrics."""
 44 | 
 45 |   def __init__(self, num_class, top_k):
 46 |     """Construct an EvaluationMetrics object to store the evaluation metrics.
 47 | 
 48 |     Args:
 49 |       num_class: A positive integer specifying the number of classes.
 50 |       top_k: A positive integer specifying how many predictions are considered per video.
 51 | 
 52 |     Raises:
 53 |       ValueError: An error occurred when MeanAveragePrecisionCalculator cannot
 54 |         not be constructed.
 55 |     """
 56 |     self.sum_hit_at_one = 0.0
 57 |     self.sum_loss = 0.0
 58 |     self.top_k = top_k
 59 |     self.num_examples = 0
 60 | 
 61 |   def accumulate(self, predictions, labels, loss):
 62 |     """Accumulate the metrics calculated locally for this mini-batch.
 63 | 
 64 |     Args:
 65 |       predictions: A numpy matrix containing the outputs of the model.
 66 |         Dimensions are 'batch' x 'num_classes'.
 67 |       labels: A numpy matrix containing the ground truth labels.
 68 |         Dimensions are 'batch' x 'num_classes'.
 69 |       loss: A numpy array containing the loss for each sample.
 70 | 
 71 |     Returns:
 72 |       dictionary: A dictionary storing the metrics for the mini-batch.
 73 | 
 74 |     Raises:
 75 |       ValueError: An error occurred when the shape of predictions and actuals
 76 |         does not match.
 77 |     """
 78 |     batch_size = labels.shape[0]
 79 |     mean_hit_at_one = calculate_hit_at_one(predictions, labels)
 80 |     mean_loss = numpy.mean(loss)
 81 | 
 82 |     self.num_examples += batch_size
 83 |     self.sum_hit_at_one += mean_hit_at_one * batch_size
 84 |     self.sum_loss += mean_loss * batch_size
 85 | 
 86 |     return {"hit_at_one": mean_hit_at_one, "loss": mean_loss}
 87 | 
 88 |   def get(self):
 89 |     """Calculate the evaluation metrics for the whole epoch.
 90 | 
 91 |     Raises:
 92 |       ValueError: If no examples were accumulated.
 93 | 
 94 |     Returns:
 95 |       dictionary: a dictionary storing the evaluation metrics for the epoch. The
 96 |         dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and
 97 |         aps (default nan).
 98 |     """
 99 |     if self.num_examples <= 0:
100 |       raise ValueError("total_sample must be positive.")
101 |     avg_hit_at_one = self.sum_hit_at_one / self.num_examples
102 |     avg_loss = self.sum_loss / self.num_examples
103 | 
104 |     epoch_info_dict = {}
105 |     return {"avg_hit_at_one": avg_hit_at_one, "avg_loss": avg_loss}
106 | 
107 |   def clear(self):
108 |     """Clear the evaluation metrics and reset the EvaluationMetrics object."""
109 |     self.sum_hit_at_one = 0.0
110 |     self.sum_loss = 0.0
111 |     self.num_examples = 0
112 | 


--------------------------------------------------------------------------------
/models/NTURGBD/nturgbd_rnn.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | 
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | from six.moves import xrange
  7 | 
  8 | slim = tf.contrib.slim
  9 | framework = tf.contrib.framework
 10 | rnn = tf.contrib.rnn
 11 | layers = tf.contrib.layers
 12 | 
 13 | import bnlstm
 14 | 
 15 | from tensorflow.python.ops import variable_scope as vs
 16 | 
 17 | from tensorflow.contrib.rnn import RNNCell
 18 | from tensorflow.python.ops.math_ops import sigmoid
 19 | from tensorflow.python.ops.math_ops import tanh
 20 | 
 21 | def get_state_variables(batch_size, cell):
 22 |     # For each layer, get the initial state and make a variable out of it
 23 |     # to enable updating its value.
 24 |     state_variables = []
 25 |     for state_c, state_h in cell.zero_state(batch_size, tf.float32):
 26 |         state_variables.append(tf.contrib.rnn.LSTMStateTuple(
 27 |             tf.Variable(state_c, trainable=False),
 28 |             tf.Variable(state_h, trainable=False)))
 29 |     # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
 30 |     return tuple(state_variables)
 31 | 
 32 | 
 33 | def get_state_update_op(state_variables, new_states):
 34 |     # Add an operation to update the train states with the last state tensors
 35 |     update_ops = []
 36 |     for state_variable, new_state in zip(state_variables, new_states):
 37 |         # Assign the new state to the state variables on this layer
 38 |         update_ops.extend([state_variable[0].assign(new_state[0]),
 39 |                            state_variable[1].assign(new_state[1])])
 40 |     # Return a tuple in order to combine all update_ops into a single operation.
 41 |     # The tuple's actual value should not be used.
 42 |     return tf.tuple(update_ops)
 43 | 
 44 | def length(sequence):
 45 |     used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
 46 |     length = tf.reduce_sum(used, 1)
 47 |     length = tf.cast(length, tf.int32)
 48 |     return length
 49 | 
 50 | def cost(output, target):
 51 |     # Compute cross entropy for each frame.
 52 |     cross_entropy = target * tf.log(output)
 53 |     cross_entropy = -tf.reduce_sum(cross_entropy, 2)
 54 |     mask = tf.sign(tf.reduce_max(tf.abs(target), 2))
 55 |     cross_entropy *= mask
 56 |     # Average over actual sequence lengths.
 57 |     cross_entropy = tf.reduce_sum(cross_entropy, 1)
 58 |     cross_entropy /= tf.reduce_sum(mask, 1)
 59 |     return tf.reduce_mean(cross_entropy)
 60 | 
 61 | def last_relevant(output, length):
 62 |     batch_size = tf.shape(output)[0]
 63 |     max_length = tf.shape(output)[1]
 64 |     out_size = int(output.get_shape()[2])
 65 |     index = tf.range(0, batch_size) * max_length + (length - 1)
 66 |     flat = tf.reshape(output, [-1, out_size])
 67 |     relevant = tf.gather(flat, index)
 68 |     return relevant
 69 | 
 70 | class SkeletonHRNNet(object):
 71 | 
 72 |     def __init__(self):
 73 | 
 74 |         self._num_layers_spatial = 3
 75 | 
 76 |     def create_model(self, inputs, num_classes, labels, is_training=True, **unused_params):
 77 |         outputs = {}
 78 | 
 79 |         is_training = tf.constant(is_training, dtype=tf.bool)
 80 |         with tf.variable_scope('spatial'):
 81 |             cells = []
 82 |             num_hidden = [256, 256, 256]
 83 |             for i in range(self._num_layers_spatial):
 84 |                 cell = bnlstm.BNLSTMCell(num_hidden[i], training=is_training)
 85 |                 cell = rnn.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5)
 86 |                 cells.append(cell)
 87 |             spatial = rnn.MultiRNNCell(cells)
 88 |             output, new_states = tf.nn.dynamic_rnn(spatial, inputs,
 89 |                 dtype=tf.float32, sequence_length=length(inputs))
 90 | 
 91 |         last = last_relevant(output, length(output))
 92 |         fc4 = layers.fully_connected(last, 128, activation_fn=tf.nn.relu)
 93 |         fc5 = layers.fully_connected(fc4, 64, activation_fn=tf.nn.relu)
 94 |         logits = layers.fully_connected(fc5, num_classes, activation_fn=None)
 95 | 
 96 |         cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 97 |                 labels=labels, logits=logits, name='xentropy')
 98 |         l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
 99 |         loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
100 | 
101 |         outputs['predictions'] = logits
102 |         outputs['loss'] = loss
103 |         outputs['reg_loss'] = l2_loss
104 |         return outputs
105 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import tensorflow as tf
  3 | from tensorflow import logging
  4 | 
  5 | def MakeSummary(name, value):
  6 |   """Creates a tf.Summary proto with the given name and value."""
  7 |   summary = tf.Summary()
  8 |   val = summary.value.add()
  9 |   val.tag = str(name)
 10 |   val.simple_value = float(value)
 11 |   return summary
 12 | 
 13 | 
 14 | def AddGlobalStepSummary(summary_writer,
 15 |                          global_step_val,
 16 |                          global_step_info_dict,
 17 |                          summary_scope="Eval"):
 18 |   """Add the global_step summary to the Tensorboard.
 19 | 
 20 |   Args:
 21 |     summary_writer: Tensorflow summary_writer.
 22 |     global_step_val: a int value of the global step.
 23 |     global_step_info_dict: a dictionary of the evaluation metrics calculated for
 24 |       a mini-batch.
 25 |     summary_scope: Train or Eval.
 26 | 
 27 |   Returns:
 28 |     A string of this global_step summary
 29 |   """
 30 |   this_hit_at_one = global_step_info_dict["hit_at_one"]
 31 |   this_loss = global_step_info_dict["loss"]
 32 |   examples_per_second = global_step_info_dict.get("examples_per_second", -1)
 33 | 
 34 |   summary_writer.add_summary(
 35 |       MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one),
 36 |       global_step_val)
 37 |   summary_writer.add_summary(
 38 |       MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss),
 39 |       global_step_val)
 40 | 
 41 |   if examples_per_second != -1:
 42 |     summary_writer.add_summary(
 43 |         MakeSummary("GlobalStep/" + summary_scope + "_Example_Second",
 44 |                     examples_per_second), global_step_val)
 45 | 
 46 |   summary_writer.flush()
 47 |   info = ("global_step {0} | Batch Hit@1: {1:.3f} | Batch Loss: {2:.3f} "
 48 |           "| Examples_per_sec: {3:.3f}").format(
 49 |               global_step_val, this_hit_at_one, this_loss,
 50 |               examples_per_second)
 51 |   return info
 52 | 
 53 | 
 54 | def AddEpochSummary(summary_writer,
 55 |                     global_step_val,
 56 |                     epoch_info_dict,
 57 |                     summary_scope="Eval"):
 58 |   """Add the epoch summary to the Tensorboard.
 59 | 
 60 |   Args:
 61 |     summary_writer: Tensorflow summary_writer.
 62 |     global_step_val: a int value of the global step.
 63 |     epoch_info_dict: a dictionary of the evaluation metrics calculated for the
 64 |       whole epoch.
 65 |     summary_scope: Train or Eval.
 66 | 
 67 |   Returns:
 68 |     A string of this global_step summary
 69 |   """
 70 |   epoch_id = epoch_info_dict["epoch_id"]
 71 |   avg_hit_at_one = epoch_info_dict["avg_hit_at_one"]
 72 |   avg_loss = epoch_info_dict["avg_loss"]
 73 | 
 74 |   summary_writer.add_summary(
 75 |       MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one),
 76 |       global_step_val)
 77 |   summary_writer.add_summary(
 78 |       MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss),
 79 |       global_step_val)
 80 |   summary_writer.flush()
 81 | 
 82 |   info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_Loss: {2:3f}").format(
 83 |           epoch_id, avg_hit_at_one, avg_loss)
 84 |   return info
 85 | 
 86 | def clip_gradient_norms(gradients_to_variables, max_norm):
 87 |   """Clips the gradients by the given value.
 88 | 
 89 |   Args:
 90 |     gradients_to_variables: A list of gradient to variable pairs (tuples).
 91 |     max_norm: the maximum norm value.
 92 | 
 93 |   Returns:
 94 |     A list of clipped gradient to variable pairs.
 95 |   """
 96 |   clipped_grads_and_vars = []
 97 |   for grad, var in gradients_to_variables:
 98 |     if grad is not None:
 99 |       if isinstance(grad, tf.IndexedSlices):
100 |         tmp = tf.clip_by_norm(grad.values, max_norm)
101 |         grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape)
102 |       else:
103 |         grad = tf.clip_by_norm(grad, max_norm)
104 |     clipped_grads_and_vars.append((grad, var))
105 |   return clipped_grads_and_vars
106 | 
107 | def combine_gradients(tower_grads):
108 |   """Calculate the combined gradient for each shared variable across all towers.
109 | 
110 |   Note that this function provides a synchronization point across all towers.
111 | 
112 |   Args:
113 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
114 |       is over individual gradients. The inner list is over the gradient
115 |       calculation for each tower.
116 |   Returns:
117 |      List of pairs of (gradient, variable) where the gradient has been summed
118 |      across all towers.
119 |   """
120 |   filtered_grads = [[x for x in grad_list if x[0] is not None] for grad_list in tower_grads]
121 |   final_grads = []
122 |   for i in xrange(len(filtered_grads[0])):
123 |     grads = [filtered_grads[t][i] for t in xrange(len(filtered_grads))]
124 |     grad = tf.stack([x[0] for x in grads], 0)
125 |     grad = tf.reduce_sum(grad, 0)
126 |     final_grads.append((grad, filtered_grads[0][i][1],))
127 | 
128 |   return final_grads
129 | 


--------------------------------------------------------------------------------
/models/NTURGBD/bnlstm.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from tensorflow.contrib.rnn import RNNCell
  5 | 
  6 | class LSTMCell(RNNCell):
  7 |     '''Vanilla LSTM implemented with same initializations as BN-LSTM'''
  8 |     def __init__(self, num_units):
  9 |         self.num_units = num_units
 10 | 
 11 |     @property
 12 |     def state_size(self):
 13 |         return (self.num_units, self.num_units)
 14 | 
 15 |     @property
 16 |     def output_size(self):
 17 |         return self.num_units
 18 | 
 19 |     def __call__(self, x, state, scope=None):
 20 |         with tf.variable_scope(scope or type(self).__name__):
 21 |             c, h = state
 22 | 
 23 |             # Keep W_xh and W_hh separate here as well to reuse initialization methods
 24 |             x_size = x.get_shape().as_list()[1]
 25 |             W_xh = tf.get_variable('W_xh',
 26 |                 [x_size, 4 * self.num_units],
 27 |                 initializer=orthogonal_initializer())
 28 |             W_hh = tf.get_variable('W_hh',
 29 |                 [self.num_units, 4 * self.num_units],
 30 |                 initializer=bn_lstm_identity_initializer(0.95))
 31 |             bias = tf.get_variable('bias', [4 * self.num_units])
 32 | 
 33 |             # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias
 34 |             # improve speed by concat.
 35 |             concat = tf.concat(1, [x, h])
 36 |             W_both = tf.concat(0, [W_xh, W_hh])
 37 |             hidden = tf.matmul(concat, W_both) + bias
 38 | 
 39 |             i, j, f, o = tf.split(1, 4, hidden)
 40 | 
 41 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 42 |             new_h = tf.tanh(new_c) * tf.sigmoid(o)
 43 | 
 44 |             return new_h, (new_c, new_h)
 45 | 
 46 | class BNLSTMCell(RNNCell):
 47 |     '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025'''
 48 |     def __init__(self, num_units, training):
 49 |         self.num_units = num_units
 50 |         self.training = training
 51 | 
 52 |     @property
 53 |     def state_size(self):
 54 |         return (self.num_units, self.num_units)
 55 | 
 56 |     @property
 57 |     def output_size(self):
 58 |         return self.num_units
 59 | 
 60 |     def __call__(self, x, state, scope=None):
 61 |         with tf.variable_scope(scope or type(self).__name__):
 62 |             c, h = state
 63 | 
 64 |             x_size = x.get_shape().as_list()[1]
 65 |             W_xh = tf.get_variable('W_xh',
 66 |                 [x_size, 4 * self.num_units],
 67 |                 initializer=orthogonal_initializer())
 68 |             W_hh = tf.get_variable('W_hh',
 69 |                 [self.num_units, 4 * self.num_units],
 70 |                 initializer=bn_lstm_identity_initializer(0.95))
 71 |             bias = tf.get_variable('bias', [4 * self.num_units])
 72 | 
 73 |             xh = tf.matmul(x, W_xh)
 74 |             hh = tf.matmul(h, W_hh)
 75 | 
 76 |             bn_xh = batch_norm(xh, 'xh', self.training)
 77 |             bn_hh = batch_norm(hh, 'hh', self.training)
 78 | 
 79 |             hidden = bn_xh + bn_hh + bias
 80 | 
 81 |             i, j, f, o = tf.split(hidden, 4, 1)
 82 | 
 83 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 84 |             bn_new_c = batch_norm(new_c, 'c', self.training)
 85 | 
 86 |             new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)
 87 | 
 88 |             return new_h, (new_c, new_h)
 89 | 
 90 | def orthogonal(shape):
 91 |     flat_shape = (shape[0], np.prod(shape[1:]))
 92 |     a = np.random.normal(0.0, 1.0, flat_shape)
 93 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 94 |     q = u if u.shape == flat_shape else v
 95 |     return q.reshape(shape)
 96 | 
 97 | def bn_lstm_identity_initializer(scale):
 98 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
 99 |         '''Ugly cause LSTM params calculated in one matrix multiply'''
100 |         size = shape[0]
101 |         # gate (j) is identity
102 |         t = np.zeros(shape)
103 |         t[:, size:size * 2] = np.identity(size) * scale
104 |         t[:, :size] = orthogonal([size, size])
105 |         t[:, size * 2:size * 3] = orthogonal([size, size])
106 |         t[:, size * 3:] = orthogonal([size, size])
107 |         return tf.constant(t, dtype)
108 | 
109 |     return _initializer
110 | 
111 | def orthogonal_initializer():
112 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
113 |         return tf.constant(orthogonal(shape), dtype)
114 |     return _initializer
115 | 
116 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):
117 |     '''Assume 2d [batch, values] tensor'''
118 | 
119 |     with tf.variable_scope(name_scope):
120 |         size = x.get_shape().as_list()[1]
121 | 
122 |         scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1))
123 |         offset = tf.get_variable('offset', [size])
124 | 
125 |         pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False)
126 |         pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False)
127 |         batch_mean, batch_var = tf.nn.moments(x, [0])
128 | 
129 |         train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
130 |         train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
131 | 
132 |         def batch_statistics():
133 |             with tf.control_dependencies([train_mean_op, train_var_op]):
134 |                 return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon)
135 | 
136 |         def population_statistics():
137 |             return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon)
138 | 
139 |         return tf.cond(training, batch_statistics, population_statistics)
140 | 


--------------------------------------------------------------------------------
/models/HybridModel/bnlstm.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | from tensorflow.contrib.rnn import RNNCell
  5 | 
  6 | class LSTMCell(RNNCell):
  7 |     '''Vanilla LSTM implemented with same initializations as BN-LSTM'''
  8 |     def __init__(self, num_units):
  9 |         self.num_units = num_units
 10 | 
 11 |     @property
 12 |     def state_size(self):
 13 |         return (self.num_units, self.num_units)
 14 | 
 15 |     @property
 16 |     def output_size(self):
 17 |         return self.num_units
 18 | 
 19 |     def __call__(self, x, state, scope=None):
 20 |         with tf.variable_scope(scope or type(self).__name__):
 21 |             c, h = state
 22 | 
 23 |             # Keep W_xh and W_hh separate here as well to reuse initialization methods
 24 |             x_size = x.get_shape().as_list()[1]
 25 |             W_xh = tf.get_variable('W_xh',
 26 |                 [x_size, 4 * self.num_units],
 27 |                 initializer=orthogonal_initializer())
 28 |             W_hh = tf.get_variable('W_hh',
 29 |                 [self.num_units, 4 * self.num_units],
 30 |                 initializer=bn_lstm_identity_initializer(0.95))
 31 |             bias = tf.get_variable('bias', [4 * self.num_units])
 32 | 
 33 |             # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias
 34 |             # improve speed by concat.
 35 |             concat = tf.concat(1, [x, h])
 36 |             W_both = tf.concat(0, [W_xh, W_hh])
 37 |             hidden = tf.matmul(concat, W_both) + bias
 38 | 
 39 |             i, j, f, o = tf.split(1, 4, hidden)
 40 | 
 41 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 42 |             new_h = tf.tanh(new_c) * tf.sigmoid(o)
 43 | 
 44 |             return new_h, (new_c, new_h)
 45 | 
 46 | class BNLSTMCell(RNNCell):
 47 |     '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025'''
 48 |     def __init__(self, num_units, training):
 49 |         self.num_units = num_units
 50 |         self.training = training
 51 | 
 52 |     @property
 53 |     def state_size(self):
 54 |         return (self.num_units, self.num_units)
 55 | 
 56 |     @property
 57 |     def output_size(self):
 58 |         return self.num_units
 59 | 
 60 |     def __call__(self, x, state, scope=None):
 61 |         with tf.variable_scope(scope or type(self).__name__):
 62 |             c, h = state
 63 | 
 64 |             x_size = x.get_shape().as_list()[1]
 65 |             W_xh = tf.get_variable('W_xh',
 66 |                 [x_size, 4 * self.num_units],
 67 |                 initializer=orthogonal_initializer())
 68 |             W_hh = tf.get_variable('W_hh',
 69 |                 [self.num_units, 4 * self.num_units],
 70 |                 initializer=bn_lstm_identity_initializer(0.95))
 71 |             bias = tf.get_variable('bias', [4 * self.num_units])
 72 | 
 73 |             xh = tf.matmul(x, W_xh)
 74 |             hh = tf.matmul(h, W_hh)
 75 | 
 76 |             bn_xh = batch_norm(xh, 'xh', self.training)
 77 |             bn_hh = batch_norm(hh, 'hh', self.training)
 78 | 
 79 |             hidden = bn_xh + bn_hh + bias
 80 | 
 81 |             i, j, f, o = tf.split(hidden, 4, 1)
 82 | 
 83 |             new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j)
 84 |             bn_new_c = batch_norm(new_c, 'c', self.training)
 85 | 
 86 |             new_h = tf.tanh(bn_new_c) * tf.sigmoid(o)
 87 | 
 88 |             return new_h, (new_c, new_h)
 89 | 
 90 | def orthogonal(shape):
 91 |     flat_shape = (shape[0], np.prod(shape[1:]))
 92 |     a = np.random.normal(0.0, 1.0, flat_shape)
 93 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 94 |     q = u if u.shape == flat_shape else v
 95 |     return q.reshape(shape)
 96 | 
 97 | def bn_lstm_identity_initializer(scale):
 98 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
 99 |         '''Ugly cause LSTM params calculated in one matrix multiply'''
100 |         size = shape[0]
101 |         # gate (j) is identity
102 |         t = np.zeros(shape)
103 |         t[:, size:size * 2] = np.identity(size) * scale
104 |         t[:, :size] = orthogonal([size, size])
105 |         t[:, size * 2:size * 3] = orthogonal([size, size])
106 |         t[:, size * 3:] = orthogonal([size, size])
107 |         return tf.constant(t, dtype)
108 | 
109 |     return _initializer
110 | 
111 | def orthogonal_initializer():
112 |     def _initializer(shape, dtype=tf.float32, partition_info=None):
113 |         return tf.constant(orthogonal(shape), dtype)
114 |     return _initializer
115 | 
116 | def batch_norm(x, name_scope, training, epsilon=1e-3, decay=0.999):
117 |     '''Assume 2d [batch, values] tensor'''
118 | 
119 |     with tf.variable_scope(name_scope):
120 |         size = x.get_shape().as_list()[1]
121 | 
122 |         scale = tf.get_variable('scale', [size], initializer=tf.constant_initializer(0.1))
123 |         offset = tf.get_variable('offset', [size])
124 | 
125 |         pop_mean = tf.get_variable('pop_mean', [size], initializer=tf.zeros_initializer, trainable=False)
126 |         pop_var = tf.get_variable('pop_var', [size], initializer=tf.ones_initializer, trainable=False)
127 |         batch_mean, batch_var = tf.nn.moments(x, [0])
128 | 
129 |         train_mean_op = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
130 |         train_var_op = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
131 | 
132 |         def batch_statistics():
133 |             with tf.control_dependencies([train_mean_op, train_var_op]):
134 |                 return tf.nn.batch_normalization(x, batch_mean, batch_var, offset, scale, epsilon)
135 | 
136 |         def population_statistics():
137 |             return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, epsilon)
138 | 
139 |         return tf.cond(training, batch_statistics, population_statistics)
140 | 


--------------------------------------------------------------------------------
/models/HybridModel/vgg.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains model definitions for versions of the Oxford VGG network.
 16 | 
 17 | These model definitions were introduced in the following technical report:
 18 | 
 19 |   Very Deep Convolutional Networks For Large-Scale Image Recognition
 20 |   Karen Simonyan and Andrew Zisserman
 21 |   arXiv technical report, 2015
 22 |   PDF: http://arxiv.org/pdf/1409.1556.pdf
 23 |   ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
 24 |   CC-BY-4.0
 25 | 
 26 | More information can be obtained from the VGG website:
 27 | www.robots.ox.ac.uk/~vgg/research/very_deep/
 28 | 
 29 | Usage:
 30 |   with slim.arg_scope(vgg.vgg_arg_scope()):
 31 |     outputs, end_points = vgg.vgg_a(inputs)
 32 | 
 33 |   with slim.arg_scope(vgg.vgg_arg_scope()):
 34 |     outputs, end_points = vgg.vgg_16(inputs)
 35 | 
 36 | @@vgg_a
 37 | @@vgg_16
 38 | @@vgg_19
 39 | """
 40 | 
 41 | from __future__ import absolute_import
 42 | from __future__ import division
 43 | from __future__ import print_function
 44 | 
 45 | from tensorflow.contrib import layers
 46 | from tensorflow.contrib.framework.python.ops import arg_scope
 47 | from tensorflow.contrib.layers.python.layers import layers as layers_lib
 48 | from tensorflow.contrib.layers.python.layers import regularizers
 49 | from tensorflow.contrib.layers.python.layers import utils
 50 | from tensorflow.python.ops import array_ops
 51 | from tensorflow.python.ops import init_ops
 52 | from tensorflow.python.ops import nn_ops
 53 | from tensorflow.python.ops import variable_scope
 54 | 
 55 | 
 56 | def vgg_arg_scope(weight_decay=0.0005):
 57 |   """Defines the VGG arg scope.
 58 | 
 59 |   Args:
 60 |     weight_decay: The l2 regularization coefficient.
 61 | 
 62 |   Returns:
 63 |     An arg_scope.
 64 |   """
 65 |   with arg_scope(
 66 |       [layers.conv2d, layers_lib.fully_connected],
 67 |       activation_fn=nn_ops.relu,
 68 |       weights_regularizer=regularizers.l2_regularizer(weight_decay),
 69 |       biases_initializer=init_ops.zeros_initializer()):
 70 |     with arg_scope([layers.conv2d], padding='SAME') as arg_sc:
 71 |       return arg_sc
 72 | 
 73 | 
 74 | def vgg_a(inputs,
 75 |           num_classes=1000,
 76 |           is_training=True,
 77 |           dropout_keep_prob=0.5,
 78 |           spatial_squeeze=True,
 79 |           scope='vgg_a'):
 80 |   """Oxford Net VGG 11-Layers version A Example.
 81 | 
 82 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 83 |         To use in classification mode, resize input to 224x224.
 84 | 
 85 |   Args:
 86 |     inputs: a tensor of size [batch_size, height, width, channels].
 87 |     num_classes: number of predicted classes.
 88 |     is_training: whether or not the model is being trained.
 89 |     dropout_keep_prob: the probability that activations are kept in the dropout
 90 |       layers during training.
 91 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 92 |       outputs. Useful to remove unnecessary dimensions for classification.
 93 |     scope: Optional scope for the variables.
 94 | 
 95 |   Returns:
 96 |     the last op containing the log predictions and end_points dict.
 97 |   """
 98 |   with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc:
 99 |     end_points_collection = sc.original_name_scope + '_end_points'
100 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
101 |     with arg_scope(
102 |         [layers.conv2d, layers_lib.max_pool2d],
103 |         outputs_collections=end_points_collection):
104 |       net = layers_lib.repeat(
105 |           inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1')
106 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
107 |       net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2')
108 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
109 |       net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3')
110 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
111 |       net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4')
112 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
113 |       net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5')
114 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
115 |       # Use conv2d instead of fully_connected layers.
116 |       net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
117 |       net = layers_lib.dropout(
118 |           net, dropout_keep_prob, is_training=is_training, scope='dropout6')
119 |       net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
120 |       net = layers_lib.dropout(
121 |           net, dropout_keep_prob, is_training=is_training, scope='dropout7')
122 |       net = layers.conv2d(
123 |           net,
124 |           num_classes, [1, 1],
125 |           activation_fn=None,
126 |           normalizer_fn=None,
127 |           scope='fc8')
128 |       # Convert end_points_collection into a end_point dict.
129 |       end_points = utils.convert_collection_to_dict(end_points_collection)
130 |       if spatial_squeeze:
131 |         net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
132 |         end_points[sc.name + '/fc8'] = net
133 |       return net, end_points
134 | 
135 | 
136 | vgg_a.default_image_size = 224
137 | 
138 | 
139 | def vgg_16(inputs,
140 |            num_classes=1000,
141 |            is_training=True,
142 |            dropout_keep_prob=0.5,
143 |            spatial_squeeze=True,
144 |            scope='vgg_16'):
145 |   """Oxford Net VGG 16-Layers version D Example.
146 | 
147 |   Note: All the fully_connected layers have been transformed to conv2d layers.
148 |         To use in classification mode, resize input to 224x224.
149 | 
150 |   Args:
151 |     inputs: a tensor of size [batch_size, height, width, channels].
152 |     num_classes: number of predicted classes.
153 |     is_training: whether or not the model is being trained.
154 |     dropout_keep_prob: the probability that activations are kept in the dropout
155 |       layers during training.
156 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
157 |       outputs. Useful to remove unnecessary dimensions for classification.
158 |     scope: Optional scope for the variables.
159 | 
160 |   Returns:
161 |     the last op containing the log predictions and end_points dict.
162 |   """
163 |   with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
164 |     end_points_collection = sc.original_name_scope + '_end_points'
165 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
166 |     with arg_scope(
167 |         [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
168 |         outputs_collections=end_points_collection):
169 |       net = layers_lib.repeat(
170 |           inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1')
171 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
172 |       net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
173 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
174 |       net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
175 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
176 |       net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
177 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
178 |       net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
179 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
180 |       # Use conv2d instead of fully_connected layers.
181 |       net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
182 |       net = layers_lib.dropout(
183 |           net, dropout_keep_prob, is_training=is_training, scope='dropout6')
184 |       net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
185 |       net = layers_lib.dropout(
186 |           net, dropout_keep_prob, is_training=is_training, scope='dropout7')
187 |       net = layers.conv2d(
188 |           net,
189 |           num_classes, [1, 1],
190 |           activation_fn=None,
191 |           normalizer_fn=None,
192 |           scope='fc8')
193 |       # Convert end_points_collection into a end_point dict.
194 |       end_points = utils.convert_collection_to_dict(end_points_collection)
195 |       if spatial_squeeze:
196 |         net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
197 |         end_points[sc.name + '/fc8'] = net
198 |       return net, end_points
199 | 
200 | 
201 | vgg_16.default_image_size = 224
202 | 
203 | 
204 | def vgg_19(inputs,
205 |            num_classes=1000,
206 |            is_training=True,
207 |            dropout_keep_prob=0.5,
208 |            spatial_squeeze=True,
209 |            scope='vgg_19'):
210 |   """Oxford Net VGG 19-Layers version E Example.
211 | 
212 |   Note: All the fully_connected layers have been transformed to conv2d layers.
213 |         To use in classification mode, resize input to 224x224.
214 | 
215 |   Args:
216 |     inputs: a tensor of size [batch_size, height, width, channels].
217 |     num_classes: number of predicted classes.
218 |     is_training: whether or not the model is being trained.
219 |     dropout_keep_prob: the probability that activations are kept in the dropout
220 |       layers during training.
221 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
222 |       outputs. Useful to remove unnecessary dimensions for classification.
223 |     scope: Optional scope for the variables.
224 | 
225 |   Returns:
226 |     the last op containing the log predictions and end_points dict.
227 |   """
228 |   with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc:
229 |     end_points_collection = sc.name + '_end_points'
230 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
231 |     with arg_scope(
232 |         [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
233 |         outputs_collections=end_points_collection):
234 |       net = layers_lib.repeat(
235 |           inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1')
236 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
237 |       net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
238 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
239 |       net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3')
240 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
241 |       net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4')
242 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
243 |       net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5')
244 |       net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
245 |       # Use conv2d instead of fully_connected layers.
246 |       net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
247 |       net = layers_lib.dropout(
248 |           net, dropout_keep_prob, is_training=is_training, scope='dropout6')
249 |       net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
250 |       net = layers_lib.dropout(
251 |           net, dropout_keep_prob, is_training=is_training, scope='dropout7')
252 |       net = layers.conv2d(
253 |           net,
254 |           num_classes, [1, 1],
255 |           activation_fn=None,
256 |           normalizer_fn=None,
257 |           scope='fc8')
258 |       # Convert end_points_collection into a end_point dict.
259 |       end_points = utils.convert_collection_to_dict(end_points_collection)
260 |       if spatial_squeeze:
261 |         net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
262 |         end_points[sc.name + '/fc8'] = net
263 |       return net, end_points
264 | 
265 | 
266 | vgg_19.default_image_size = 224
267 | 
268 | # Alias
269 | vgg_d = vgg_16
270 | vgg_e = vgg_19
271 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | 
  4 | import tensorflow as tf
  5 | from tensorflow import app
  6 | from tensorflow import flags
  7 | from tensorflow import gfile
  8 | from tensorflow import logging
  9 | from tensorflow.python.framework import ops
 10 | from tensorflow.python.framework import dtypes
 11 | from utils import *
 12 | 
 13 | import data, models
 14 | 
 15 | FLAGS = flags.FLAGS
 16 | 
 17 | def find_class_by_name(name, modules):
 18 |   """Searches the provided modules for the named class and returns it."""
 19 |   modules = [getattr(module, name, None) for module in modules]
 20 |   return next(a for a in modules if a)
 21 | 
 22 | 
 23 | def get_input_evaluation_tensors(reader,
 24 |                                  batch_size=1024,
 25 |                                  num_readers=1):
 26 |   """Creates the section of the graph which reads the evaluation data.
 27 | 
 28 |   Args:
 29 |     reader: A class which parses the training data.
 30 |     data_pattern: A 'glob' style path to the data files.
 31 |     batch_size: How many examples to process at a time.
 32 |     num_readers: How many I/O threads to use.
 33 | 
 34 |   Returns:
 35 |     A tuple containing the features tensor, labels tensor, and optionally a
 36 |     tensor containing the number of frames per video. The exact dimensions
 37 |     depend on the reader being used.
 38 | 
 39 |   Raises:
 40 |     IOError: If no files matching the given pattern were found.
 41 |   """
 42 |   logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
 43 |   with tf.name_scope("eval_input"):
 44 |     files, labels = reader._read_filelist(split=reader.present_split, train=False)
 45 |     if not files:
 46 |       raise IOError("Unable to find the evaluation files.")
 47 |     logging.info("number of evaluation files: " + str(len(files)))
 48 |     files = ops.convert_to_tensor(files, dtypes.string)
 49 |     labels = ops.convert_to_tensor(labels, dtypes.int64)
 50 | 
 51 |     input_queue = tf.train.slice_input_producer(
 52 |                         [files, labels],
 53 |                         num_epochs = 1,
 54 |                         shuffle = False)
 55 |     image, label = reader._read_samples(input_queue)
 56 | 
 57 |     test_image_loader, test_label_loader = tf.train.batch(
 58 |         [image, label],
 59 |         batch_size = batch_size,
 60 |         capacity = 5 * batch_size)
 61 |     return test_image_loader, test_label_loader
 62 | 
 63 | def build_graph(reader,
 64 |                 model,
 65 |                 label_loss_fn,
 66 |                 batch_size=1024,
 67 |                 num_readers=1):
 68 |   """Creates the Tensorflow graph for evaluation.
 69 | 
 70 |   Args:
 71 |     reader: The data file reader. It should inherit from BaseReader.
 72 |     model: The core model (e.g. logistic or neural net). It should inherit
 73 |            from BaseModel.
 74 |     eval_data_pattern: glob path to the evaluation data files.
 75 |     label_loss_fn: What kind of loss to apply to the model. It should inherit
 76 |                 from BaseLoss.
 77 |     batch_size: How many examples to process at a time.
 78 |     num_readers: How many threads to use for I/O operations.
 79 |   """
 80 |   global_step = tf.Variable(0, trainable=False, name="global_step")
 81 |   images_loader, labels_loader = get_input_evaluation_tensors(  # pylint: disable=g-line-too-long
 82 |         reader,
 83 |         batch_size=batch_size,
 84 |         num_readers=num_readers)
 85 |   images_batch = tf.placeholder(tf.float32, (None, 224, 224, 3))
 86 |   labels_batch = tf.placeholder(tf.int64, (None,))
 87 |   # (224, 224, 3) -> (14, 14, 512)
 88 |   feature_0, restore_vars_0, train_v0 = model.create_feature_model(
 89 |         images_batch, scope="rgb", is_training=False)
 90 |   # (224, 224, 3) -> (14, 14, 512)
 91 |   feature_1, restore_vars_1, train_v1 = model.create_feature_model(
 92 |         images_batch, scope="rgbdiff", is_training=False)
 93 |   # (14, 14, 512) -> (7168,)
 94 |   aux_feat_batch = tf.placeholder(tf.float32, (None, 14, 14, 512))
 95 |   aux_output, train_v2 = model.create_aux_model(
 96 |       aux_feat_batch)
 97 |   # (21504,) -> (60,)
 98 |   aux_fc_batch_0 = tf.placeholder(tf.float32, (None, 21504))
 99 |   logits_aux_0, train_v3 = model.create_logits_model(
100 |       aux_fc_batch_0, 60, is_training=False, scope="auxlogs", reuse=None)
101 |   # (21504,) -> (60,)
102 |   aux_fc_batch_1 = tf.placeholder(tf.float32, (None, 21504))
103 |   logits_aux_1, train_v4 = model.create_logits_model(
104 |       aux_fc_batch_1, 60, is_training=False, scope="auxlogs", reuse=True)
105 |   # (21504,) -> (60,)
106 |   aux_fc_batch_2 = tf.placeholder(tf.float32, (None, 21504))
107 |   logits_aux_2, train_v5 = model.create_logits_model(
108 |       aux_fc_batch_2, 60, is_training=False, scope="auxlogs", reuse=True)
109 |   # (21504,) -> (60,)
110 |   aux_fc_batch_3 = tf.placeholder(tf.float32, (None, 21504))
111 |   logits_aux_3, train_v6 = model.create_logits_model(
112 |       aux_fc_batch_3, 60, is_training=False, scope="auxlogs", reuse=True)
113 | 
114 |   loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_0, labels=labels_batch))
115 |   loss_1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_1, labels=labels_batch))
116 |   loss_2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_2, labels=labels_batch))
117 |   loss_3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_3, labels=labels_batch))
118 | 
119 |   loss = loss_0 + loss_1 + loss_2 + loss_3
120 |   predictions = ( logits_aux_0 + logits_aux_1 + logits_aux_2 + logits_aux_3 )
121 | 
122 |   tf.add_to_collection("global_step", global_step)
123 |   tf.add_to_collection("loss", loss)
124 |   tf.add_to_collection("feature_0", feature_0)
125 |   tf.add_to_collection("feature_1", feature_1)
126 |   tf.add_to_collection("aux_feat_batch", aux_feat_batch)
127 |   tf.add_to_collection("aux_output", aux_output)
128 |   tf.add_to_collection("aux_fc_batch_0", aux_fc_batch_0)
129 |   tf.add_to_collection("logits_aux_0", logits_aux_0)
130 |   tf.add_to_collection("aux_fc_batch_1", aux_fc_batch_1)
131 |   tf.add_to_collection("logits_aux_1", logits_aux_1)
132 |   tf.add_to_collection("aux_fc_batch_2", aux_fc_batch_2)
133 |   tf.add_to_collection("logits_aux_2", logits_aux_2)
134 |   tf.add_to_collection("aux_fc_batch_3", aux_fc_batch_3)
135 |   tf.add_to_collection("logits_aux_3", logits_aux_3)
136 |   tf.add_to_collection("input_batch", images_batch)
137 |   tf.add_to_collection("labels", labels_batch)
138 |   tf.add_to_collection("predictions", predictions)
139 |   tf.add_to_collection("images_loader", images_loader)
140 |   tf.add_to_collection("labels_loader", labels_loader)
141 | 
142 |   return restore_vars_0.extend(restore_vars_1)
143 | 
144 | def evaluation_loop(predictions, labels, loss,
145 |               inputs, aux_feat_batch, aux_output, aux_fc_batch_0, logits_aux_0,
146 |               aux_fc_batch_1, logits_aux_1, aux_fc_batch_2, logits_aux_2,
147 |               aux_fc_batch_3, logits_aux_3, inputs_loader, feature_0, feature_1,
148 |               labels_loader, saver, summary_writer, train_dir, evl_metrics, last_global_step_val):
149 | 
150 |   """Run the evaluation loop once.
151 | 
152 |   Args:
153 |     video_id_batch: a tensor of video ids mini-batch.
154 |     prediction_batch: a tensor of predictions mini-batch.
155 |     label_batch: a tensor of label_batch mini-batch.
156 |     loss: a tensor of loss for the examples in the mini-batch.
157 |     summary_op: a tensor which runs the tensorboard summary operations.
158 |     saver: a tensorflow saver to restore the model.
159 |     summary_writer: a tensorflow summary_writer
160 |     evl_metrics: an EvaluationMetrics object.
161 |     last_global_step_val: the global step used in the previous evaluation.
162 | 
163 |   Returns:
164 |     The global_step used in the latest model.
165 |   """
166 | 
167 |   global_step_val = -1
168 |   with tf.Session() as sess:
169 |     latest_checkpoint = tf.train.latest_checkpoint(train_dir)
170 |     if latest_checkpoint:
171 |       logging.info("Loading checkpoint for eval: " + latest_checkpoint)
172 |       # Restores from checkpoint
173 |       saver.restore(sess, latest_checkpoint)
174 | 
175 |       # Assuming model_checkpoint_path looks something like:
176 |       # /my-favorite-path/train_dir/model.ckpt-0, extract global_step from it.
177 |       global_step_val = latest_checkpoint.split("/")[-1].split("-")[-1]
178 |     else:
179 |       logging.info("No checkpoint file found.")
180 |       return global_step_val
181 | 
182 |     if global_step_val == last_global_step_val:
183 |       logging.info("skip this checkpoint global_step_val=%s "
184 |                    "(same as the previous one).", global_step_val)
185 |       return global_step_val
186 | 
187 |     sess.run([tf.local_variables_initializer()])
188 | 
189 |     # Start the queue runners.
190 |     fetches = [predictions, labels, loss]
191 |     coord = tf.train.Coordinator()
192 |     try:
193 |       threads = []
194 |       for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
195 |         threads.extend(qr.create_threads(
196 |             sess, coord=coord, daemon=True,
197 |             start=True))
198 |       logging.info("enter eval_once loop global_step_val = %s. ",
199 |                    global_step_val)
200 | 
201 |       evl_metrics.clear()
202 | 
203 |       examples_processed = 0
204 |       while not coord.should_stop():
205 |         batch_start_time = time.time()
206 | 
207 |         input_batch, label_batch = sess.run([inputs_loader, labels_loader])
208 | 
209 |         # (batch_size, 12, 224, 224, 3)
210 |         input_batch = np.transpose(input_batch, [1, 0, 2, 3, 4])
211 | 
212 |         # list of (batch_size, 224,  224, 3) of size 6
213 |         tw_inputs = np.split(input_batch, 12)
214 |         tw_inputs = [np.reshape(x, [-1, 224, 224, 3]) for x in tw_inputs]
215 |         s0_inputs = tw_inputs[:6]
216 |         s1_inputs = tw_inputs[6:]
217 | 
218 |         # [(224, 224, 3), ..] -> [(14, 14, 512), ..]
219 |         features_0 = []
220 |         for inp in s0_inputs:
221 |             feat_vec = sess.run(feature_0, feed_dict={inputs: inp})
222 |             features_0.append(feat_vec)
223 | 
224 |         # [(224, 224, 3), ..] -> [(14, 14, 512), ..]
225 |         features_1 = []
226 |         for inp in s1_inputs:
227 |             feat_vec = sess.run(feature_1, feed_dict={inputs: inp})
228 |             features_1.append(feat_vec)
229 | 
230 |         # [(14, 14, 512), ..] -> [(7168,), ..]
231 |         feats_for_aux = []
232 |         for feat in features_0:
233 |             out = sess.run(aux_output, feed_dict={aux_feat_batch: feat})
234 |             feats_for_aux.append(out)
235 | 
236 |         # [(7168,), ..] -> [(21504,), (21504,)] (RGB stream)
237 |         aux_fcs_0 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2],
238 |             feats_for_aux[i+4]], axis=1) for i in range(2)]
239 | 
240 |         # [(14, 14, 512), ..] -> [(7168,), ..]
241 |         feats_for_aux = []
242 |         for feat in features_1:
243 |             out = sess.run(aux_output, feed_dict={aux_feat_batch: feat})
244 |             feats_for_aux.append(out)
245 | 
246 |         # [(7168,), ..] -> [(21504,), (21504,)] (RGB difference stream)
247 |         aux_fcs_1 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2],
248 |             feats_for_aux[i+4]], axis=1) for i in range(2)]
249 | 
250 |         predictions_val, labels_val, loss_val  = sess.run(
251 |                 fetches, feed_dict={labels: label_batch,
252 |                     aux_fc_batch_0: aux_fcs_0[0], aux_fc_batch_1: aux_fcs_0[1],
253 |                     aux_fc_batch_2: aux_fcs_1[0], aux_fc_batch_3: aux_fcs_1[1]})
254 |         seconds_per_batch = time.time() - batch_start_time
255 |         example_per_second = labels_val.shape[0] / seconds_per_batch
256 |         examples_processed += labels_val.shape[0]
257 | 
258 |         iteration_info_dict = evl_metrics.accumulate(predictions_val,
259 |                                                      labels_val, loss_val)
260 |         iteration_info_dict["examples_per_second"] = example_per_second
261 | 
262 |         iterinfo = utils.AddGlobalStepSummary(
263 |             summary_writer,
264 |             global_step_val,
265 |             iteration_info_dict,
266 |             summary_scope="Eval")
267 |         logging.info("examples_processed: %d | %s", examples_processed,
268 |                      iterinfo)
269 | 
270 |     except tf.errors.OutOfRangeError as e:
271 |       logging.info(
272 |           "Done with batched inference. Now calculating global performance "
273 |           "metrics.")
274 |       # calculate the metrics for the entire epoch
275 |       epoch_info_dict = evl_metrics.get()
276 |       epoch_info_dict["epoch_id"] = global_step_val
277 | 
278 |       #summary_writer.add_summary(summary_val, global_step_val)
279 |       epochinfo = utils.AddEpochSummary(
280 |           summary_writer,
281 |           global_step_val,
282 |           epoch_info_dict,
283 |           summary_scope="Eval")
284 |       logging.info(epochinfo)
285 |       evl_metrics.clear()
286 |     except Exception as e:  # pylint: disable=broad-except
287 |       logging.info("Unexpected exception: " + str(e))
288 |       coord.request_stop(e)
289 | 
290 |     coord.request_stop()
291 |     coord.join(threads, stop_grace_period_secs=10)
292 | 
293 |     return global_step_val, epoch_info_dict['avg_hit_at_one']
294 | 
295 | 
296 | def evaluate(dataset,
297 |              model,
298 |              train_dir,
299 |              dataset_dir,
300 |              splits_dir,
301 |              num_epochs,
302 |              batch_size,
303 |              split_num,
304 |              label_loss='CrossEntropyLoss',
305 |              run_once=True):
306 |   tf.set_random_seed(0)  # for reproducibility
307 |   with tf.Graph().as_default():
308 |     reader = getattr(data, dataset)(dataset_dir, splits_dir,
309 |                     num_epochs, batch_size, split_num)
310 | 
311 |     model = find_class_by_name(model,
312 |                             [models])()
313 |     label_loss_fn = find_class_by_name(label_loss, [losses])()
314 | 
315 |     restore_vars = \
316 |         build_graph(
317 |             reader=reader,
318 |             model=model,
319 |             label_loss_fn=label_loss_fn,
320 |             num_readers=1,
321 |             batch_size=batch_size)
322 |     logging.info("built evaluation graph")
323 |     loss = tf.get_collection("loss")[0]
324 |     predictions = tf.get_collection("predictions")[0]
325 |     labels = tf.get_collection("labels")[0]
326 |     inputs = tf.get_collection("input_batch")[0]
327 |     feature_0 = tf.get_collection("feature_0")[0]
328 |     feature_1 = tf.get_collection("feature_1")[0]
329 |     aux_feat_batch = tf.get_collection("aux_feat_batch")[0]
330 |     aux_output = tf.get_collection("aux_output")[0]
331 |     aux_fc_batch_0 = tf.get_collection("aux_fc_batch_0")[0]
332 |     logits_aux_0 = tf.get_collection("logits_aux_0")[0]
333 |     aux_fc_batch_1 = tf.get_collection("aux_fc_batch_1")[0]
334 |     logits_aux_1 = tf.get_collection("logits_aux_1")[0]
335 |     aux_fc_batch_2 = tf.get_collection("aux_fc_batch_2")[0]
336 |     logits_aux_2 = tf.get_collection("logits_aux_2")[0]
337 |     aux_fc_batch_3 = tf.get_collection("aux_fc_batch_3")[0]
338 |     logits_aux_3 = tf.get_collection("logits_aux_3")[0]
339 |     inputs_loader = tf.get_collection("images_loader")[0]
340 |     labels_loader = tf.get_collection("labels_loader")[0]
341 | 
342 |     saver = tf.train.Saver(tf.global_variables())
343 |     summary_writer = tf.summary.FileWriter(train_dir, graph=tf.get_default_graph())
344 | 
345 |     evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, 20)
346 | 
347 |     last_global_step_val = -1
348 |     while True:
349 |       last_global_step_val, h1 = evaluation_loop(predictions, labels, loss,
350 |               inputs, aux_feat_batch, aux_output, aux_fc_batch_0, logits_aux_0,
351 |               aux_fc_batch_1, logits_aux_1, aux_fc_batch_2, logits_aux_2,
352 |               aux_fc_batch_3, logits_aux_3, inputs_loader, feature_0, feature_1,
353 |               labels_loader, saver, summary_writer, train_dir, evl_metrics, last_global_step_val)
354 | 
355 |       if run_once:
356 |         break
357 |     return h1
358 | 


--------------------------------------------------------------------------------
/scripts/nturgbd.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from PIL import Image
  4 | #import matplotlib.pyplot as plt
  5 | #from mpl_toolkits.mplot3d import Axes3D
  6 | #from sklearn import preprocessing
  7 | 
  8 | import tensorflow as tf
  9 | from tensorflow import logging
 10 | logging.set_verbosity(tf.logging.INFO)
 11 | 
 12 | class Joint(object):
 13 |     def __init__(self,
 14 |                  x, y, z,
 15 |                  dX=None, dY=None,
 16 |                  cX=None, cY=None,
 17 |                  orX=None, orY=None, orZ=None, orW=None,
 18 |                  tState=None):
 19 |         self.x = x; self.y = y; self.z = z
 20 |         self.depthX = dX; self.depthY = dY
 21 |         self.colorX = cX; self.colorY = cY
 22 |         self.orientX = orX; self.orientY = orY
 23 |         self.orientZ = orZ; self.orientW = orW
 24 |         self.tracker = tState
 25 | 
 26 |     def _calculate_cylindrical_coordinates(self):
 27 |         rho = np.sqrt(self.x**2 + self.y**2)
 28 |         if self.x == 0 and self.y == 0:
 29 |             phi = 0
 30 |         elif self.x >= 0:
 31 |             phi = np.arcsin(self.y/rho)
 32 |         elif self.x > 0:
 33 |             phi = np.arctan2(self.y, self.x)
 34 |         elif self.x < 0:
 35 |             phi = -np.arcsin(self.y/rho) + np.pi
 36 |         return rho, phi, self.z
 37 | 
 38 |     def _calculate_spherical_coordinates(self):
 39 |         xy = self.x**2 + self.y**2
 40 |         r = np.sqrt(xy + self.z**2)
 41 |         theta = np.arctan2(self.z, xy)
 42 |         if self.x == 0 and self.y == 0:
 43 |             phi = 0
 44 |         elif self.x >= 0:
 45 |             phi = np.arcsin(self.y/np.sqrt(xy))
 46 |         elif self.x > 0:
 47 |             phi = np.arctan2(self.y, self.x)
 48 |         elif self.x < 0:
 49 |             phi = -np.arcsin(self.y/np.sqrt(xy)) + np.pi
 50 |         return r, theta, phi
 51 | 
 52 |     def _get_cartesian_coordinates(self):
 53 |         return self.x, self.y, self.z
 54 | 
 55 |     def _set_cartesian_coordinates(self, x, y, z):
 56 |         self.x = x; self.y = y; self.z = z
 57 | 
 58 |     def _get_cylindrical_coordinates(self):
 59 |         return self._calculate_cylindrical_coordinates()
 60 | 
 61 |     def _get_spherical_coordinates(self):
 62 |         return self._calculate_spherical_coordinates()
 63 | 
 64 |     def _get_depth_coordinates(self):
 65 |         return self.depthX, self.depthY
 66 | 
 67 |     def _get_rgb_coordinates(self):
 68 |         return self.colorX, self.colorY
 69 | 
 70 |     def _get_orientation_coordinates(self):
 71 |         return self.orientX, self.orientY, self.orientZ, self.orientW
 72 | 
 73 | class Skeleton(object):
 74 |     def __init__(self,
 75 |                  skelid=None,
 76 |                  njoints=None,
 77 |                  clip=None,
 78 |                  lconfidence=None, lstate=None,
 79 |                  rconfidence=None, rstate=None,
 80 |                  restrict=None,
 81 |                  lX=None, lY=None,
 82 |                  tracker=None,
 83 |                  joints=None):
 84 |         self.skeletonID = skelid
 85 |         self.num_joints = njoints
 86 |         if joints is None:
 87 |             self.joints = []
 88 |         else:
 89 |             self.joints = joints
 90 |         self.clip_edges = clip
 91 |         self.left_hand_confidence = lconfidence
 92 |         self.left_hand_state = lstate
 93 |         self.right_hand_confidence = rconfidence
 94 |         self.right_hand_state = rstate
 95 |         self.tracker = tracker
 96 |         self._is_zero_skeleton = False
 97 | 
 98 |     def _get_skeleton_id(self):
 99 |         return self.skeletonID
100 | 
101 |     def _get_num_joints(self):
102 |         return self.num_joints
103 | 
104 |     def _get_joint_objects(self):
105 |         return self.joints
106 | 
107 |     def _get_clip_edges_val(self):
108 |         return self.clip_edges
109 | 
110 |     def _get_left_hand(self):
111 |         return self.left_hand_confidence, self.left_hand_state
112 | 
113 |     def _get_right_hand(self):
114 |         return self.right_hand_confidence, self.right_hand_state
115 | 
116 |     def _set_joint_objects(self, joints):
117 |         self.joints = joints
118 | 
119 |     def _add_joint_object(self, joint):
120 |         self.joints.append(joint)
121 | 
122 | 
123 | class Frame(object):
124 |     def __init__(self,
125 |                  nskels=None,
126 |                  skels=None):
127 |         self.num_skeletons = nskels
128 |         if skels is None:
129 |             self.skeletons = []
130 | 
131 |     def _get_num_skeletons(self):
132 |         return self.num_skeletons
133 | 
134 |     def _get_skeleton_objects(self):
135 |         return self.skeletons
136 | 
137 |     def _set_skeleton_objects(self, skeletons):
138 |         self.skeletons = skeletons
139 | 
140 |     def _add_skeleton_object(self, skeleton):
141 |         self.skeletons.append(skeleton)
142 | 
143 | class SkeletonVideo(object):
144 |     def __init__(self,
145 |                  nframes,
146 |                  frames=None):
147 |         self.num_frames = nframes
148 |         if frames is None:
149 |             self.frames = []
150 | 
151 |     def _get_num_frames(self):
152 |         return self.num_frames
153 | 
154 |     def _get_frame_objects(self):
155 |         return self.frames
156 | 
157 |     def _set_frame_objects(self, frames):
158 |         self.frames = frames
159 | 
160 |     def _add_frame_object(self, frame):
161 |         self.frames.append(frame)
162 | 
163 |     def _get_main_actor_skeletons(self):
164 | 
165 |         def _get_motion_for_skeletons(skeletons):
166 |             total_dist = 0
167 |             for i in range(len(skeletons)-1):
168 |                 joints_1 = skeletons[i]._get_joint_objects()
169 |                 joints_2 = skeletons[i+1]._get_joint_objects()
170 |                 for j, k in zip(joints_1, joints_2):
171 |                     p1 = np.array(j._get_cartesian_coordinates())
172 |                     p2 = np.array(k._get_cartesian_coordinates())
173 |                     dist = np.sqrt(np.sum(np.square(p1 - p2)))
174 |                     total_dist += dist
175 |             return total_dist
176 | 
177 |         def _is_noisy_skeleton(skeleton):
178 |             joints = skeleton._get_joint_objects()
179 |             X = []; Y = []
180 |             for joint in joints:
181 |                 x, y, z = joint._get_cartesian_coordinates()
182 |                 X.append(x); Y.append(y)
183 |             X = np.array(X); Y = np.array(Y)
184 |             xspread = np.max(X) - np.min(X); yspread = np.max(Y) - np.min(Y)
185 |             return (yspread / xspread)
186 | 
187 |         def _create_zero_skeleton():
188 |             joints = []
189 |             for i in range(25):
190 |                 joint = Joint(0.0, 0.0, 0.0)
191 |                 joints.append(joint)
192 |             skeleton = Skeleton(njoints=len(joints), joints=joints)
193 |             skeleton._is_zero_skeleton = True
194 |             return skeleton
195 | 
196 |         skeletons_0 = []; skeletons_1 = []
197 |         for i in range(len(self.frames)):
198 |             frame = self.frames[i]
199 |             if frame._get_num_skeletons() == 1:
200 |                 skeletons_0.append(frame._get_skeleton_objects()[0])
201 |                 skeletons_1.append(_create_zero_skeleton())
202 |             elif frame._get_num_skeletons() == 2:
203 |                 skeletons_0.append(frame._get_skeleton_objects()[0])
204 |                 skeletons_1.append(frame._get_skeleton_objects()[1])
205 |             elif frame._get_num_skeletons() > 2:
206 |                 ratios = []
207 |                 for i in range(frame._get_num_skeletons()):
208 |                     skeleton = frame._get_skeleton_objects()[i]
209 |                     ratios.append((_is_noisy_skeleton(skeleton), i))
210 |                 ratios = sorted(ratios)
211 |                 idx = [x for (val, x) in ratios[-2:]]
212 |                 skeletons_0.append(frame._get_skeleton_objects()[idx[0]])
213 |                 skeletons_1.append(frame._get_skeleton_objects()[idx[1]])
214 |         dist_0 = _get_motion_for_skeletons(skeletons_0)
215 |         dist_1 = _get_motion_for_skeletons(skeletons_1)
216 |         if dist_0 > dist_1:
217 |             return skeletons_0, skeletons_1
218 |         return skeletons_1, skeletons_0
219 | 
220 | 
221 | # Reads the data for a complete frame set from the NTU RGB+D Action Recognition Dataset
222 | # Included joints are:
223 | # --------------------------------------------------------------------------------------------------------------
224 | # 	0 -  base of the spine
225 | # 	1 -  middle of the spine
226 | # 	2 -  neck
227 | #	3 -  head
228 | # 	4 -  left shoulder
229 | # 	5 -  left elbow
230 | # 	6 -  left wrist
231 | # 	7 -  left hand
232 | # 	8 -  right shoulder
233 | # 	9 - right elbow
234 | # 	10 - right wrist
235 | # 	11 - right hand
236 | # 	12 - left hip
237 | # 	13 - left knee
238 | # 	14 - left ankle
239 | # 	15 - left foot
240 | # 	16 - right hip
241 | # 	17 - right knee
242 | # 	18 - right ankle
243 | # 	19 - right foot
244 | # 	20 - spine
245 | #	21 - tip of the left hand
246 | # 	22 - left thumb
247 | # 	23 - tip of the right hand
248 | # 	24 - right thumb
249 | # --------------------------------------------------------------------------------------------------------------
250 | class Reader(object):
251 |     def __init__(self,
252 |                  dataset_dir,
253 |                  splits_dir):
254 |         self.data = dataset_dir
255 |         self.splits = splits_dir
256 | 
257 |         self.train_splits = {1: os.path.join(splits_dir, 'train_cs.txt'),
258 |                 2: os.path.join(splits_dir, 'train_cv.txt')}
259 |         self.test_splits = {1: os.path.join(splits_dir, 'test_cs.txt'),
260 |                 2: os.path.join(splits_dir, 'test_cv.txt')}
261 | 
262 |     def _normalize_skeleton(self, skeleton):
263 |         joints = skeleton._get_joint_objects()
264 |         if not (len(joints) == 25):
265 |             return None
266 | 
267 |         ''' Translation Matrix
268 |         - T_x: neg(X coordinate of middle of spine)
269 |         - T_y: neg(Y coordinate of middle of spine)
270 |         - T_z: neg(Z coordinate of middle of spine)
271 |         '''
272 |         origin = joints[1]
273 |         transmat = np.zeros((4, 4))
274 |         transmat[0][0] = transmat[1][1] = transmat[2][2] = transmat[3][3] = 1.0
275 |         transmat[3][0] = -origin.x; transmat[3][1] = -origin.y; transmat[3][2] = -origin.z
276 | 
277 |         ''' Rotation Matrix
278 |         - New X axis: Right shoulder (rs) to Left shoulder (ls) joint
279 |         - New Y axis: Base of spine (bsp) to Spine (sp) joint
280 |         - New Z axis: In direction  of X cross Y
281 |         - Using arctan2 is always better than using arcsin/arccos, as they can be
282 |         numerically unstable for certain values of the angles.
283 |         '''
284 |         rs = joints[8]; ls = joints[4]; bsp = joints[0]; sp = joints[20]
285 |         rs = np.array(rs._get_cartesian_coordinates())
286 |         ls = np.array(ls._get_cartesian_coordinates())
287 |         bsp = np.array(bsp._get_cartesian_coordinates())
288 |         sp = np.array(sp._get_cartesian_coordinates())
289 |         curr_x = np.array([1.0, 0.0, 0.0]); new_x = np.add(rs, -ls)
290 |         curr_y = np.array([0.0, 1.0, 0.0]); new_y = np.add(bsp, -sp)
291 |         curr_z = np.array([0.0, 0.0, 1.0]); new_z = np.cross(new_x, new_y)
292 |         # Dot and cross product both needed for arctan2
293 |         x_dot = np.dot(new_x, curr_x); y_dot = np.dot(new_y, curr_y); z_dot = np.dot(new_z, curr_z)
294 |         x_cross = np.cross(new_x, curr_x); y_cross = np.cross(new_y, curr_y); z_cross = np.cross(new_z, curr_z)
295 |         # arccos is numerically unstable when angles are near zero
296 |         theta_x = np.arctan2(np.linalg.norm(x_cross), x_dot)
297 |         theta_y = np.arctan2(np.linalg.norm(y_cross), y_dot)
298 |         theta_z = np.arctan2(np.linalg.norm(z_cross), z_dot)
299 |         rot_x = np.zeros((4, 4)); rot_x[0][0] = 1.0; rot_x[3][3] = 1.0
300 |         rot_x[1][1] = np.cos(theta_x); rot_x[1][2] = -np.sin(theta_x)
301 |         rot_x[2][1] = np.sin(theta_x); rot_x[2][2] = np.cos(theta_x)
302 |         rot_y = np.zeros((4, 4)); rot_y[1][1] = 1.0; rot_y[3][3] = 1.0
303 |         rot_y[0][0] = np.cos(theta_y); rot_y[0][2] = np.sin(theta_y)
304 |         rot_y[2][0] = -np.sin(theta_y); rot_y[2][2] = np.cos(theta_y)
305 |         rot_z = np.zeros((4, 4)); rot_z[2][2] = 1.0; rot_z[3][3] = 1.0
306 |         rot_z[0][0] = np.cos(theta_z); rot_z[0][1] = -np.sin(theta_z)
307 |         rot_z[1][0] = np.sin(theta_z); rot_z[1][1] = np.cos(theta_z)
308 | 
309 |         ''' Scaling Matrix
310 |         - S_x, S_y, S_z: Inverse of distance between Base of spine (bsp) and
311 |         spine (sp) joint; add a small delta to avoid division by zero (in case
312 |         of very small values of distance).
313 |         '''
314 |         scale = np.zeros((4, 4)); scale[3][3] = 1.0
315 |         dist = np.linalg.norm(new_y) + 1e-4
316 |         scale[0][0] = 1.0/(dist); scale[1][1] = 1.0/(dist); scale[2][2] = 1.0/(dist)
317 | 
318 |         new_joints = []
319 |         for joint in joints:
320 |             x, y, z = joint._get_cartesian_coordinates()
321 |             # (4,) : Homogeneous coordinates
322 |             j = np.array([x, y, z, 1.0])
323 |             # (4,) x (4, 4) = (4,) for all matrix multiplications
324 |             j = np.matmul(j, transmat)
325 |             j = np.matmul(j, rot_x); j = np.matmul(j, rot_y); j = np.matmul(j, rot_z)
326 |             j = np.matmul(j, scale)
327 |             # Get (x, y, z) from Homogeneous coordinates
328 |             joint._set_cartesian_coordinates(j[0], j[1], j[2])
329 |             new_joints.append(joint)
330 |         skeleton._set_joint_objects(new_joints)
331 | 
332 |         return skeleton
333 | 
334 |     def _read_skeleton_file(self, f):
335 |         fpath = os.path.join(self.data, f)
336 |         with open(fpath, 'r') as f:
337 |             out = f.read().replace('\n', '').split()
338 | 
339 |         index = 0
340 |         nframes = int(out[index]); index += 1
341 |         video = SkeletonVideo(nframes)
342 |         for i in range(nframes):
343 |             bods = int(out[index]); index += 1
344 |             frame = Frame()
345 |             for j in range(bods):
346 |                 skelid = long(out[index]); index += 1
347 | 
348 |                 cedges = int(out[index]); index += 1
349 |                 lconfidence = int(out[index]); index += 1
350 |                 lstate = int(out[index]); index += 1
351 |                 rconfidence = int(out[index]); index += 1
352 |                 rstate = int(out[index]); index += 1
353 |                 restrict = int(out[index]); index += 1
354 | 
355 |                 lX = float(out[index]); index += 1
356 |                 lY = float(out[index]); index += 1
357 | 
358 |                 track = int(out[index]); index += 1
359 | 
360 |                 num_joints = int(out[index]); index += 1
361 |                 skeleton = Skeleton(skelid, num_joints,
362 |                                     cedges, lconfidence, lstate,
363 |                                     rconfidence, rstate, restrict,
364 |                                     lX, lY, track)
365 | 
366 |                 for k in range(num_joints):
367 |                     x = float(out[index]); index += 1
368 |                     y = float(out[index]); index += 1
369 |                     z = float(out[index]); index += 1
370 | 
371 |                     dX = float(out[index]); index += 1
372 |                     dY = float(out[index]); index += 1
373 | 
374 |                     cX = float(out[index]); index += 1
375 |                     cY = float(out[index]); index += 1
376 | 
377 |                     orW = float(out[index]); index += 1
378 |                     orX = float(out[index]); index += 1
379 |                     orY = float(out[index]); index += 1
380 |                     orZ = float(out[index]); index += 1
381 | 
382 |                     track = int(out[index]); index += 1
383 | 
384 |                     joint = Joint(x, y, z,
385 |                                   dX, dY,
386 |                                   cX, cY,
387 |                                   orX, orY, orZ, orW,
388 |                                   track)
389 |                     skeleton._add_joint_object(joint)
390 |                 skeleton = self._normalize_skeleton(skeleton)
391 |                 if skeleton:
392 |                     frame._add_skeleton_object(skeleton)
393 |                 frame.num_skeletons = len(frame._get_skeleton_objects())
394 |             video._add_frame_object(frame)
395 |         return video
396 | 
397 |     def _generate_image_representation_no_features(self, skeleton):
398 |         joints = skeleton._get_joint_objects()
399 |         xlist = []; ylist = []; zlist = []
400 |         for joint in joints:
401 |             x, y, z = joint._get_cartesian_coordinates()
402 |             xlist.append(x); ylist.append(y); zlist.append(z)
403 |         jointlist = [[0, 1, 2, 3], [0, 16, 17, 18, 19], [0, 12, 13, 14, 15],
404 |                 [20, 4, 5, 6, 7, 21], [20, 8, 9, 10, 11, 23], [11, 24], [7, 22]]
405 |         fig = plt.figure()
406 |         ax = fig.add_subplot(111, projection='3d')
407 |         for i in range(len(jointlist)):
408 |             x_plot = []; y_plot = []; z_plot = []
409 |             for j in jointlist[i]:
410 |                 x_plot.append(xlist[j]); y_plot.append(ylist[j]); z_plot.append(zlist[j])
411 |             ax.scatter(x_plot, y_plot, z_plot, c = 'b')
412 |             ax.plot(x_plot, y_plot, z_plot, c = 'b')
413 |         plt.show()
414 | 


--------------------------------------------------------------------------------
/models/HybridModel/inception_resnet_v2.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the definition of the Inception Resnet V2 architecture.
 16 | 
 17 | As described in http://arxiv.org/abs/1602.07261.
 18 | 
 19 |   Inception-v4, Inception-ResNet and the Impact of Residual Connections
 20 |     on Learning
 21 |   Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
 22 | """
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | 
 28 | import tensorflow as tf
 29 | 
 30 | slim = tf.contrib.slim
 31 | 
 32 | 
 33 | def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 34 |   """Builds the 35x35 resnet block."""
 35 |   with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
 36 |     with tf.variable_scope('Branch_0'):
 37 |       tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
 38 |     with tf.variable_scope('Branch_1'):
 39 |       tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 40 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
 41 |     with tf.variable_scope('Branch_2'):
 42 |       tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
 43 |       tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
 44 |       tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
 45 |     mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])
 46 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 47 |                      activation_fn=None, scope='Conv2d_1x1')
 48 |     net += scale * up
 49 |     if activation_fn:
 50 |       net = activation_fn(net)
 51 |   return net
 52 | 
 53 | 
 54 | def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 55 |   """Builds the 17x17 resnet block."""
 56 |   with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
 57 |     with tf.variable_scope('Branch_0'):
 58 |       tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
 59 |     with tf.variable_scope('Branch_1'):
 60 |       tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
 61 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
 62 |                                   scope='Conv2d_0b_1x7')
 63 |       tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
 64 |                                   scope='Conv2d_0c_7x1')
 65 |     mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
 66 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 67 |                      activation_fn=None, scope='Conv2d_1x1')
 68 |     net += scale * up
 69 |     if activation_fn:
 70 |       net = activation_fn(net)
 71 |   return net
 72 | 
 73 | 
 74 | def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
 75 |   """Builds the 8x8 resnet block."""
 76 |   with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
 77 |     with tf.variable_scope('Branch_0'):
 78 |       tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
 79 |     with tf.variable_scope('Branch_1'):
 80 |       tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
 81 |       tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
 82 |                                   scope='Conv2d_0b_1x3')
 83 |       tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
 84 |                                   scope='Conv2d_0c_3x1')
 85 |     mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
 86 |     up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
 87 |                      activation_fn=None, scope='Conv2d_1x1')
 88 |     net += scale * up
 89 |     if activation_fn:
 90 |       net = activation_fn(net)
 91 |   return net
 92 | 
 93 | 
 94 | def inception_resnet_v2_base(inputs,
 95 |                              final_endpoint='Conv2d_7b_1x1',
 96 |                              output_stride=16,
 97 |                              align_feature_maps=False,
 98 |                              scope=None):
 99 |   """Inception model from  http://arxiv.org/abs/1602.07261.
100 | 
101 |   Constructs an Inception Resnet v2 network from inputs to the given final
102 |   endpoint. This method can construct the network up to the final inception
103 |   block Conv2d_7b_1x1.
104 | 
105 |   Args:
106 |     inputs: a tensor of size [batch_size, height, width, channels].
107 |     final_endpoint: specifies the endpoint to construct the network up to. It
108 |       can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
109 |       'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
110 |       'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
111 |     output_stride: A scalar that specifies the requested ratio of input to
112 |       output spatial resolution. Only supports 8 and 16.
113 |     align_feature_maps: When true, changes all the VALID paddings in the network
114 |       to SAME padding so that the feature maps are aligned.
115 |     scope: Optional variable_scope.
116 | 
117 |   Returns:
118 |     tensor_out: output tensor corresponding to the final_endpoint.
119 |     end_points: a set of activations for external use, for example summaries or
120 |                 losses.
121 | 
122 |   Raises:
123 |     ValueError: if final_endpoint is not set to one of the predefined values,
124 |       or if the output_stride is not 8 or 16, or if the output_stride is 8 and
125 |       we request an end point after 'PreAuxLogits'.
126 |   """
127 |   if output_stride != 8 and output_stride != 16:
128 |     raise ValueError('output_stride must be 8 or 16.')
129 | 
130 |   padding = 'SAME' if align_feature_maps else 'VALID'
131 | 
132 |   end_points = {}
133 | 
134 |   def add_and_check_final(name, net):
135 |     end_points[name] = net
136 |     return name == final_endpoint
137 | 
138 |   with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]):
139 |     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
140 |                         stride=1, padding='SAME'):
141 |       # 149 x 149 x 32
142 |       net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding,
143 |                         scope='Conv2d_1a_3x3')
144 |       if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
145 | 
146 |       # 147 x 147 x 32
147 |       net = slim.conv2d(net, 32, 3, padding=padding,
148 |                         scope='Conv2d_2a_3x3')
149 |       if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
150 |       # 147 x 147 x 64
151 |       net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
152 |       if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
153 |       # 73 x 73 x 64
154 |       net = slim.max_pool2d(net, 3, stride=2, padding=padding,
155 |                             scope='MaxPool_3a_3x3')
156 |       if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points
157 |       # 73 x 73 x 80
158 |       net = slim.conv2d(net, 80, 1, padding=padding,
159 |                         scope='Conv2d_3b_1x1')
160 |       if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points
161 |       # 71 x 71 x 192
162 |       net = slim.conv2d(net, 192, 3, padding=padding,
163 |                         scope='Conv2d_4a_3x3')
164 |       if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points
165 |       # 35 x 35 x 192
166 |       net = slim.max_pool2d(net, 3, stride=2, padding=padding,
167 |                             scope='MaxPool_5a_3x3')
168 |       if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points
169 | 
170 |       # 35 x 35 x 320
171 |       with tf.variable_scope('Mixed_5b'):
172 |         with tf.variable_scope('Branch_0'):
173 |           tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
174 |         with tf.variable_scope('Branch_1'):
175 |           tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
176 |           tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
177 |                                       scope='Conv2d_0b_5x5')
178 |         with tf.variable_scope('Branch_2'):
179 |           tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
180 |           tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
181 |                                       scope='Conv2d_0b_3x3')
182 |           tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
183 |                                       scope='Conv2d_0c_3x3')
184 |         with tf.variable_scope('Branch_3'):
185 |           tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
186 |                                        scope='AvgPool_0a_3x3')
187 |           tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
188 |                                      scope='Conv2d_0b_1x1')
189 |         net = tf.concat(
190 |             [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3)
191 | 
192 |       if add_and_check_final('Mixed_5b', net): return net, end_points
193 |       # TODO(alemi): Register intermediate endpoints
194 |       net = slim.repeat(net, 10, block35, scale=0.17)
195 | 
196 |       # 17 x 17 x 1088 if output_stride == 8,
197 |       # 33 x 33 x 1088 if output_stride == 16
198 |       use_atrous = output_stride == 8
199 | 
200 |       with tf.variable_scope('Mixed_6a'):
201 |         with tf.variable_scope('Branch_0'):
202 |           tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2,
203 |                                    padding=padding,
204 |                                    scope='Conv2d_1a_3x3')
205 |         with tf.variable_scope('Branch_1'):
206 |           tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
207 |           tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
208 |                                       scope='Conv2d_0b_3x3')
209 |           tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
210 |                                       stride=1 if use_atrous else 2,
211 |                                       padding=padding,
212 |                                       scope='Conv2d_1a_3x3')
213 |         with tf.variable_scope('Branch_2'):
214 |           tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2,
215 |                                        padding=padding,
216 |                                        scope='MaxPool_1a_3x3')
217 |         net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
218 | 
219 |       if add_and_check_final('Mixed_6a', net): return net, end_points
220 | 
221 |       # TODO(alemi): register intermediate endpoints
222 |       with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
223 |         net = slim.repeat(net, 20, block17, scale=0.10)
224 |       if add_and_check_final('PreAuxLogits', net): return net, end_points
225 | 
226 |       if output_stride == 8:
227 |         # TODO(gpapan): Properly support output_stride for the rest of the net.
228 |         raise ValueError('output_stride==8 is only supported up to the '
229 |                          'PreAuxlogits end_point for now.')
230 | 
231 |       # 8 x 8 x 2080
232 |       with tf.variable_scope('Mixed_7a'):
233 |         with tf.variable_scope('Branch_0'):
234 |           tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
235 |           tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
236 |                                      padding=padding,
237 |                                      scope='Conv2d_1a_3x3')
238 |         with tf.variable_scope('Branch_1'):
239 |           tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
240 |           tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
241 |                                       padding=padding,
242 |                                       scope='Conv2d_1a_3x3')
243 |         with tf.variable_scope('Branch_2'):
244 |           tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
245 |           tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
246 |                                       scope='Conv2d_0b_3x3')
247 |           tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
248 |                                       padding=padding,
249 |                                       scope='Conv2d_1a_3x3')
250 |         with tf.variable_scope('Branch_3'):
251 |           tower_pool = slim.max_pool2d(net, 3, stride=2,
252 |                                        padding=padding,
253 |                                        scope='MaxPool_1a_3x3')
254 |         net = tf.concat(
255 |             [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
256 | 
257 |       if add_and_check_final('Mixed_7a', net): return net, end_points
258 | 
259 |       # TODO(alemi): register intermediate endpoints
260 |       net = slim.repeat(net, 9, block8, scale=0.20)
261 |       net = block8(net, activation_fn=None)
262 | 
263 |       # 8 x 8 x 1536
264 |       net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
265 |       if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points
266 | 
267 |     raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
268 | 
269 | 
270 | def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
271 |                         dropout_keep_prob=0.8,
272 |                         reuse=None,
273 |                         scope='InceptionResnetV2',
274 |                         create_aux_logits=True):
275 |   """Creates the Inception Resnet V2 model.
276 | 
277 |   Args:
278 |     inputs: a 4-D tensor of size [batch_size, height, width, 3].
279 |     num_classes: number of predicted classes.
280 |     is_training: whether is training or not.
281 |     dropout_keep_prob: float, the fraction to keep before final layer.
282 |     reuse: whether or not the network and its variables should be reused. To be
283 |       able to reuse 'scope' must be given.
284 |     scope: Optional variable_scope.
285 |     create_aux_logits: Whether to include the auxilliary logits.
286 | 
287 |   Returns:
288 |     logits: the logits outputs of the model.
289 |     end_points: the set of end_points from the inception model.
290 |   """
291 |   end_points = {}
292 | 
293 |   with tf.variable_scope(scope, 'InceptionResnetV2', [inputs, num_classes],
294 |                          reuse=reuse) as scope:
295 |     with slim.arg_scope([slim.batch_norm, slim.dropout],
296 |                         is_training=is_training):
297 | 
298 |       net, end_points = inception_resnet_v2_base(inputs, scope=scope)
299 | 
300 |       if create_aux_logits:
301 |         with tf.variable_scope('AuxLogits'):
302 |           aux = end_points['PreAuxLogits']
303 |           aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID',
304 |                                 scope='Conv2d_1a_3x3')
305 |           aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
306 |           aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
307 |                             padding='VALID', scope='Conv2d_2a_5x5')
308 |           aux = slim.flatten(aux)
309 |           aux = slim.fully_connected(aux, num_classes, activation_fn=None,
310 |                                      scope='Logits')
311 |           end_points['AuxLogits'] = aux
312 | 
313 |       with tf.variable_scope('Logits'):
314 |         net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
315 |                               scope='AvgPool_1a_8x8')
316 |         net = slim.flatten(net)
317 | 
318 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
319 |                            scope='Dropout')
320 | 
321 |         end_points['PreLogitsFlatten'] = net
322 |         logits = slim.fully_connected(net, num_classes, activation_fn=None,
323 |                                       scope='Logits')
324 |         end_points['Logits'] = logits
325 |         end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
326 | 
327 |     return logits, end_points
328 | inception_resnet_v2.default_image_size = 299
329 | 
330 | 
331 | def inception_resnet_v2_arg_scope(weight_decay=0.00004,
332 |                                   batch_norm_decay=0.9997,
333 |                                   batch_norm_epsilon=0.001):
334 |   """Yields the scope with the default parameters for inception_resnet_v2.
335 | 
336 |   Args:
337 |     weight_decay: the weight decay for weights variables.
338 |     batch_norm_decay: decay for the moving average of batch_norm momentums.
339 |     batch_norm_epsilon: small float added to variance to avoid dividing by zero.
340 | 
341 |   Returns:
342 |     a arg_scope with the parameters needed for inception_resnet_v2.
343 |   """
344 |   # Set weight_decay for weights in conv2d and fully_connected layers.
345 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
346 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
347 |                       biases_regularizer=slim.l2_regularizer(weight_decay)):
348 | 
349 |     batch_norm_params = {
350 |         'decay': batch_norm_decay,
351 |         'epsilon': batch_norm_epsilon,
352 |     }
353 |     # Set activation_fn and parameters for batch_norm.
354 |     with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,
355 |                         normalizer_fn=slim.batch_norm,
356 |                         normalizer_params=batch_norm_params) as scope:
357 |       return scope
358 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os, argparse, shutil, time, glob
  2 | 
  3 | import tensorflow as tf
  4 | from tensorflow import app
  5 | from tensorflow import flags
  6 | from tensorflow import logging
  7 | from tensorflow import gfile
  8 | from tensorflow.python.client import device_lib
  9 | from tensorflow.python.framework import ops
 10 | from tensorflow.python.framework import dtypes
 11 | from tensorflow.python.tools.inspect_checkpoint import *
 12 | 
 13 | from tensorflow.contrib.tensorboard.plugins import projector
 14 | 
 15 | import data, models
 16 | from utils import *
 17 | from test import *
 18 | 
 19 | slim = tf.contrib.slim
 20 | layers = tf.contrib.layers
 21 | framework = tf.contrib.framework
 22 | 
 23 | FLAGS = flags.FLAGS
 24 | 
 25 | if __name__ == '__main__':
 26 |     flags.DEFINE_string("train_dir", "",
 27 |                         "Directory to save the model files in")
 28 |     flags.DEFINE_string("dataset", "HybridModelReader", "Which dataset to load \
 29 |                         for Action Recognition")
 30 |     flags.DEFINE_string("dataset_dir", "", \
 31 |                         "Path to base directory for video frames (rgb / rgb+flow)")
 32 |     flags.DEFINE_string("splits_dir", "", \
 33 |                         "Directory where train and test splits are stored")
 34 |     flags.DEFINE_string("checkpoint_file", "", \
 35 |                         "Checkpoint file to restore variables")
 36 |     flags.DEFINE_string("model", "Hybrid", "Which architecture to use for the model")
 37 |     flags.DEFINE_string("label_loss", "CrossEntropyLoss", "Which loss function to use \
 38 |                             for training the model")
 39 |     flags.DEFINE_string("optimizer", "AdamOptimizer", "What optimizer class to use")
 40 |     flags.DEFINE_string("split_num", "1", "The train/test split to run the model on")
 41 | 
 42 |     flags.DEFINE_integer("batch_size", 24, "Number of examples to process per batch \
 43 |                             for training")
 44 |     flags.DEFINE_integer("num_epochs", 50, "How many passes to make over the dataset \
 45 |                             before halting training")
 46 |     flags.DEFINE_integer("export_model_steps", 10000, "The period, in number of steps, \
 47 |                             with which the model is exported for batch prediction")
 48 |     flags.DEFINE_integer("max_steps", None, "The maximum number of iterations of the \
 49 |                             training loop")
 50 |     flags.DEFINE_integer("learning_rate_decay_examples", 10000000, "Multiply current learning \
 51 |                             rate by learning_rate_decay every learning_rate_decay_examples")
 52 | 
 53 |     flags.DEFINE_float("base_learning_rate", 0.01, "Which learning rate to start with")
 54 |     flags.DEFINE_float("learning_rate_decay", 0.95, "Learning rate decay factor to be \
 55 |                             applied every learning_rate_decay_examples")
 56 |     flags.DEFINE_float("clip_gradient_norm", 1.0, "Norm to clip gradients to")
 57 |     flags.DEFINE_float("regularization_penalty", 0.00005, "How much weight to give to the \
 58 |                             regularization loss (the label loss has a weight of 1).")
 59 | 
 60 |     flags.DEFINE_bool("start_new_model", False, "If set, this will not resume from a checkpoint \
 61 |                             and will instead create a new model instance")
 62 |     flags.DEFINE_bool("log_device_placement", False, "Whether to write the device on which every \
 63 |                             op will run into the logs on startup.")
 64 | 
 65 | 
 66 | def get_input_data_tensors(reader,
 67 |                            data_pattern,
 68 |                            batch_size=2,
 69 |                            num_epochs=None,
 70 |                            num_readers=1):
 71 |     logging.info("Using batch size of " + str(batch_size) + " for training.")
 72 |     files, labels = reader._read_filelist(split=reader.present_split)
 73 | 
 74 |     with tf.name_scope("train_input"):
 75 |         logging.info("Number of training files: %s", str(len(files)))
 76 | 
 77 |         files = ops.convert_to_tensor(files, dtypes.string)
 78 |         labels = ops.convert_to_tensor(labels, dtypes.int64)
 79 | 
 80 |         input_queue = tf.train.slice_input_producer(
 81 |                             [files, labels],
 82 |                             num_epochs = num_epochs,
 83 |                             shuffle = True)
 84 |         image, label = reader._read_samples(input_queue)
 85 | 
 86 |         train_image_loader, train_label_loader = tf.train.shuffle_batch(
 87 |             [image, label],
 88 |             batch_size = batch_size,
 89 |             capacity = 5 * batch_size,
 90 |             min_after_dequeue = batch_size)
 91 | 
 92 |     return train_image_loader, train_label_loader
 93 | 
 94 | def build_graph(reader,
 95 |                 model,
 96 |                 split_num,
 97 |                 label_loss_fn=losses.CrossEntropyLoss(),
 98 |                 batch_size=1000,
 99 |                 base_learning_rate=0.01,
100 |                 learning_rate_decay_examples=1000000,
101 |                 learning_rate_decay=0.95,
102 |                 optimizer_class=tf.train.AdamOptimizer,
103 |                 clip_gradient_norm=1.0,
104 |                 regularization_penalty=1,
105 |                 num_readers=1,
106 |                 num_epochs=None):
107 | 
108 |     global_step = tf.Variable(0, name="global_step", trainable=False)
109 | 
110 |     learning_rate = tf.train.exponential_decay(
111 |         base_learning_rate,
112 |         global_step * batch_size,
113 |         learning_rate_decay_examples,
114 |         learning_rate_decay,
115 |         staircase=True)
116 |     tf.summary.scalar('learning_rate', learning_rate)
117 | 
118 |     optimizer = optimizer_class(learning_rate)
119 |     images_loader, labels_loader = (
120 |             get_input_data_tensors(
121 |                     reader,
122 |                     reader.train_split_files[split_num],
123 |                     batch_size=batch_size,
124 |                     num_readers=num_readers,
125 |                     num_epochs=num_epochs))
126 |     images_batch = tf.placeholder(tf.float32, (None, 224, 224, 3))
127 |     labels_batch = tf.placeholder(tf.int64, (None,))
128 |     # (224, 224, 3) -> (14, 14, 512)
129 |     feature_0, restore_vars_0, train_v0 = model.create_feature_model(
130 |         images_batch, scope="rgb")
131 |     feature_1, restore_vars_1, train_v1 = model.create_feature_model(
132 |         images_batch, scope="rgbdiff")
133 |     # (14, 14, 512) -> (7168,)
134 |     aux_feat_batch = tf.placeholder(tf.float32, (None, 14, 14, 512))
135 |     aux_output, train_v2 = model.create_aux_model(
136 |         aux_feat_batch)
137 |     # (21504,) -> (60,)
138 |     aux_fc_batch_0 = tf.placeholder(tf.float32, (None, 21504))
139 |     logits_aux_0, train_v3 = model.create_logits_model(
140 |         aux_fc_batch_0, 60, scope="auxlogs", reuse=None)
141 |     # (21504,) -> (60,)
142 |     aux_fc_batch_1 = tf.placeholder(tf.float32, (None, 21504))
143 |     logits_aux_1, train_v4 = model.create_logits_model(
144 |         aux_fc_batch_1, 60, scope="auxlogs", reuse=True)
145 |     # (21504,) -> (60,)
146 |     aux_fc_batch_2 = tf.placeholder(tf.float32, (None, 21504))
147 |     logits_aux_2, train_v5 = model.create_logits_model(
148 |         aux_fc_batch_2, 60, scope="auxlogs", reuse=True)
149 |     # (21504,) -> (60,)
150 |     aux_fc_batch_3 = tf.placeholder(tf.float32, (None, 21504))
151 |     logits_aux_3, train_v6 = model.create_logits_model(
152 |         aux_fc_batch_3, 60, scope="auxlogs", reuse=True)
153 | 
154 |     loss_0 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_0, labels=labels_batch))
155 |     loss_1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_1, labels=labels_batch))
156 |     loss_2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_2, labels=labels_batch))
157 |     loss_3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_aux_3, labels=labels_batch))
158 | 
159 |     loss = loss_0 + loss_1 + loss_2 + loss_3
160 |     predictions = ( logits_aux_0 + logits_aux_1 + logits_aux_2 + logits_aux_3 )
161 | 
162 |     train_vars = train_v0
163 |     train_vars.extend(train_v1)
164 |     train_vars.extend(train_v2)
165 |     train_vars.extend(train_v3)
166 |     train_vars.extend(train_v4)
167 |     train_vars.extend(train_v5)
168 |     train_vars.extend(train_v6)
169 |     train_op = optimizer.minimize(loss, global_step=global_step, var_list=train_vars)
170 | 
171 |     tf.add_to_collection("global_step", global_step)
172 |     tf.add_to_collection("loss", loss)
173 |     tf.add_to_collection("feature_0", feature_0)
174 |     tf.add_to_collection("feature_1", feature_1)
175 |     tf.add_to_collection("aux_feat_batch", aux_feat_batch)
176 |     tf.add_to_collection("aux_output", aux_output)
177 |     tf.add_to_collection("aux_fc_batch_0", aux_fc_batch_0)
178 |     tf.add_to_collection("logits_aux_0", logits_aux_0)
179 |     tf.add_to_collection("aux_fc_batch_1", aux_fc_batch_1)
180 |     tf.add_to_collection("logits_aux_1", logits_aux_1)
181 |     tf.add_to_collection("aux_fc_batch_2", aux_fc_batch_2)
182 |     tf.add_to_collection("logits_aux_2", logits_aux_2)
183 |     tf.add_to_collection("aux_fc_batch_3", aux_fc_batch_3)
184 |     tf.add_to_collection("logits_aux_3", logits_aux_3)
185 |     tf.add_to_collection("input_batch", images_batch)
186 |     tf.add_to_collection("labels", labels_batch)
187 |     tf.add_to_collection("predictions", predictions)
188 |     tf.add_to_collection("train_op", train_op)
189 |     tf.add_to_collection("images_loader", images_loader)
190 |     tf.add_to_collection("labels_loader", labels_loader)
191 | 
192 |     restore_vars_0_dict = {v.name[4:][:-2]: v
193 |             for v in restore_vars_0}
194 |     restore_vars_1_dict = {v.name[8:][:-2]: v
195 |             for v in restore_vars_1}
196 |     return restore_vars_0_dict, restore_vars_1_dict
197 | 
198 | def find_class_by_name(name, modules):
199 |     modules = [getattr(module, name, None) for module in modules]
200 |     return next(a for a in modules if a)
201 | 
202 | def task_as_string(task):
203 |     return "/job:%s/task:%s" % (task.type, task.index)
204 | 
205 | class Trainer(object):
206 |     def __init__(self, cluster, task, train_dir, model, reader,
207 |                 log_device_placement=True, max_steps=None,
208 |                 export_model_steps=1000):
209 | 
210 |         self.cluster = cluster
211 |         self.task = task
212 |         self.is_master = (task.type == "master" and task.index == 0)
213 |         self.train_dir = train_dir
214 |         self.config = tf.ConfigProto(
215 |             allow_soft_placement=True, log_device_placement=log_device_placement)
216 |         self.model = model
217 |         self.reader = reader
218 |         self.max_steps = max_steps
219 |         self.max_steps_reached = False
220 |         self.export_model_steps = export_model_steps
221 |         self.last_model_export_step = 0
222 | 
223 |     def remove_training_directory(self, train_dir):
224 |         """Removes the training directory."""
225 |         try:
226 |             logging.info(
227 |               "%s: Removing existing train directory.",
228 |               task_as_string(self.task))
229 |             gfile.DeleteRecursively(train_dir)
230 |         except:
231 |             logging.error(
232 |               "%s: Failed to delete directory " + train_dir +
233 |               " when starting a new model. Please delete it manually and" +
234 |               " try again.", task_as_string(self.task))
235 | 
236 |     def start_server_if_distributed(self):
237 |         """ Starts a server if the execution is distributed """
238 | 
239 |         if self.cluster:
240 |             logging.info("%s: Starting trainer within cluster %s.",
241 |                            task_as_string(self.task), self.cluster.as_dict())
242 |             server = start_server(self.cluster, self.task)
243 |             target = server.target
244 |             device_fn = tf.train.replica_device_setter(
245 |                 ps_device="/job:ps",
246 |                 worker_device="/job:%s/task:%d" % (self.task.type, self.task.index),
247 |                 cluster=self.cluster)
248 |         else:
249 |             target = ""
250 |             device_fn = ""
251 |         return (target, device_fn)
252 | 
253 |     def get_meta_filename(self, start_new_model, train_dir):
254 |         if start_new_model:
255 |             logging.info("%s: Flag 'start_new_model' is set. Building a new model.",
256 |                    task_as_string(self.task))
257 |             return None
258 | 
259 |         latest_checkpoint = tf.train.latest_checkpoint(train_dir)
260 |         if not latest_checkpoint:
261 |             logging.info("%s: No checkpoint file found. Building a new model.",
262 |                    task_as_string(self.task))
263 |             return None
264 | 
265 |         meta_filename = latest_checkpoint + ".meta"
266 |         if not gfile.Exists(meta_filename):
267 |             logging.info("%s: No meta graph file found. Building a new model.",
268 |                      task_as_string(self.task))
269 |             return None
270 |         else:
271 |             return meta_filename
272 | 
273 |     def recover_model(self, meta_filename):
274 |         logging.info("%s: Restoring from meta graph file %s",
275 |                  task_as_string(self.task), meta_filename)
276 |         return tf.train.import_meta_graph(meta_filename)
277 | 
278 |     def build_model(self, model, reader):
279 |         """ Find the model and build the graph """
280 | 
281 |         label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()
282 |         optimizer_class = find_class_by_name(FLAGS.optimizer, [tf.train])
283 | 
284 |         restore_vars = \
285 |             build_graph(reader=reader,
286 |                          model=model,
287 |                          optimizer_class=optimizer_class,
288 |                          clip_gradient_norm=FLAGS.clip_gradient_norm,
289 |                          split_num=FLAGS.split_num,
290 |                          label_loss_fn=label_loss_fn,
291 |                          base_learning_rate=FLAGS.base_learning_rate,
292 |                          learning_rate_decay=FLAGS.learning_rate_decay,
293 |                          learning_rate_decay_examples=FLAGS.learning_rate_decay_examples,
294 |                          regularization_penalty=FLAGS.regularization_penalty,
295 |                          num_readers=1,
296 |                          batch_size=FLAGS.batch_size,
297 |                          num_epochs=FLAGS.num_epochs)
298 | 
299 |         saver_0 = tf.train.Saver(var_list=restore_vars[0])
300 |         saver_1 = tf.train.Saver(var_list=restore_vars[1])
301 | 
302 |         return saver_0, saver_1, tf.train.Saver(max_to_keep=2)
303 | 
304 |     def run(self, start_new_model=0):
305 |         if self.is_master and start_new_model:
306 |             self.remove_training_directory(self.train_dir)
307 | 
308 |         target, device_fn = self.start_server_if_distributed()
309 | 
310 |         meta_filename = self.get_meta_filename(start_new_model, self.train_dir)
311 | 
312 |         with tf.Graph().as_default() as graph:
313 |             if meta_filename:
314 |                 saver = self.recover_model(meta_filename)
315 | 
316 |             with tf.device(device_fn):
317 |                 if not meta_filename:
318 |                     saver_0, saver_1, saver = self.build_model(self.model, self.reader)
319 | 
320 |             global_step = tf.get_collection("global_step")[0]
321 |             loss = tf.get_collection("loss")[0]
322 |             predictions = tf.get_collection("predictions")[0]
323 |             labels = tf.get_collection("labels")[0]
324 |             inputs = tf.get_collection("input_batch")[0]
325 |             train_op = tf.get_collection("train_op")[0]
326 |             feature_0 = tf.get_collection("feature_0")[0]
327 |             feature_1 = tf.get_collection("feature_1")[0]
328 |             aux_feat_batch = tf.get_collection("aux_feat_batch")[0]
329 |             aux_output = tf.get_collection("aux_output")[0]
330 |             aux_fc_batch_0 = tf.get_collection("aux_fc_batch_0")[0]
331 |             logits_aux_0 = tf.get_collection("logits_aux_0")[0]
332 |             aux_fc_batch_1 = tf.get_collection("aux_fc_batch_1")[0]
333 |             logits_aux_1 = tf.get_collection("logits_aux_1")[0]
334 |             aux_fc_batch_2 = tf.get_collection("aux_fc_batch_2")[0]
335 |             logits_aux_2 = tf.get_collection("logits_aux_2")[0]
336 |             aux_fc_batch_3 = tf.get_collection("aux_fc_batch_3")[0]
337 |             logits_aux_3 = tf.get_collection("logits_aux_3")[0]
338 |             inputs_loader = tf.get_collection("images_loader")[0]
339 |             labels_loader = tf.get_collection("labels_loader")[0]
340 |             if not meta_filename:
341 |                 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
342 |             else:
343 |                 init_op = tf.global_variables_initializer()
344 | 
345 |         sv = tf.train.Supervisor(
346 |             graph,
347 |             logdir=self.train_dir,
348 |             init_op=init_op,
349 |             is_chief=self.is_master,
350 |             global_step=global_step,
351 |             save_model_secs=60 * 60,
352 |             save_summaries_secs=7200,
353 |             saver=saver)
354 | 
355 |         with tf.Session(graph=graph) as sess:
356 |             if not meta_filename:
357 |                 saver_0.restore(sess, FLAGS.checkpoint_file)
358 |                 saver_1.restore(sess, FLAGS.checkpoint_file)
359 | 
360 |         pp = os.path.join(FLAGS.train_dir, 'best_checkpoint_path')
361 |         if os.path.exists(pp):
362 |             with open(pp, 'r') as f:
363 |                 lines = f.readlines()
364 |                 if len(lines):
365 |                     line = lines[-1].strip().split(',')[1].strip()
366 |                     global_h1 = float(line)
367 |                 else:
368 |                     global_h1 = -1
369 |         else:
370 |             global_h1 = -1
371 |         validation_steps = 10001
372 |         logging.info("%s: Starting managed session.", task_as_string(self.task))
373 |         with sv.managed_session(config=self.config) as sess:
374 |             try:
375 |                 logging.info("%s: Entering training loop.", task_as_string(self.task))
376 |                 while (not sv.should_stop()) and (not self.max_steps_reached):
377 | 
378 |                     batch_start_time = time.time()
379 |                     input_batch, label_batch = sess.run([inputs_loader, labels_loader])
380 | 
381 |                     # (batch_size, 12, 224, 224, 3)
382 |                     input_batch = np.transpose(input_batch, [1, 0, 2, 3, 4])
383 | 
384 |                     # list of (batch_size, 224,  224, 3) of size 12
385 |                     tw_inputs = np.split(input_batch, 12)
386 |                     tw_inputs = [np.reshape(x, [-1, 224, 224, 3]) for x in tw_inputs]
387 |                     s0_inputs = tw_inputs[:6]
388 |                     s1_inputs = tw_inputs[6:]
389 | 
390 |                     # [(224, 224, 3), ..] -> [(14, 14, 512), ..]
391 |                     features_0 = []
392 |                     for inp in s0_inputs:
393 |                         feat_vec = sess.run(feature_0, feed_dict={inputs: inp})
394 |                         features_0.append(feat_vec)
395 | 
396 |                     # [(224, 224, 3), ..] -> [(14, 14, 512), ..]
397 |                     features_1 = []
398 |                     for inp in s1_inputs:
399 |                         feat_vec = sess.run(feature_1, feed_dict={inputs: inp})
400 |                         features_1.append(feat_vec)
401 | 
402 |                     # [(14, 14, 512), ..] -> [(7168,), ..]
403 | 		    feats_for_aux = []
404 |                     for feat in features_0:
405 |                         out = sess.run(aux_output, feed_dict={aux_feat_batch: feat})
406 |                         feats_for_aux.append(out)
407 | 
408 |                     # [(7168,), ..] -> [(21504,), (21504,)] (RGB stream)
409 |                     aux_fcs_0 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2],
410 |                         feats_for_aux[i+4]], axis=1) for i in range(2)]
411 | 
412 |                     # [(14, 14, 512), ..] -> [(7168,), ..]
413 | 		    feats_for_aux = []
414 |                     for feat in features_1:
415 |                         out = sess.run(aux_output, feed_dict={aux_feat_batch: feat})
416 |                         feats_for_aux.append(out)
417 | 
418 |                     # [(7168,), ..] -> [(21504,), (21504,)] (RGB difference stream)
419 |                     aux_fcs_1 = [np.concatenate([feats_for_aux[i], feats_for_aux[i+2],
420 |                         feats_for_aux[i+4]], axis=1) for i in range(2)]
421 | 
422 |                     _, global_step_val, predictions_val, labels_val, loss_val = sess.run([train_op,
423 |                         global_step, predictions, labels, loss], feed_dict={labels: label_batch,
424 |                             aux_fc_batch_0: aux_fcs_0[0], aux_fc_batch_1: aux_fcs_0[1],
425 |                             aux_fc_batch_2: aux_fcs_1[0], aux_fc_batch_3: aux_fcs_1[1]})
426 | 
427 |                     seconds_per_batch = time.time() - batch_start_time
428 |                     examples_per_second = labels_val.shape[0] / seconds_per_batch # TODO
429 | 
430 |                     if self.max_steps and self.max_steps <= global_step_val:
431 |                         self.max_steps_reached = True
432 | 
433 |                     if self.is_master and global_step_val % 10 == 0 and self.train_dir:
434 |                         eval_start_time = time.time()
435 |                         hit_at_one = eval_util.calculate_hit_at_one(predictions_val, labels_val)
436 |                         hit_at_five = eval_util.calculate_hit_at_five(predictions_val, labels_val)
437 | 
438 |                         eval_end_time = time.time()
439 |                         eval_time = eval_end_time - eval_start_time
440 | 
441 |                         logging.info("training step " + str(global_step_val) + " | Loss: " + ("%.2f" % loss_val) +
442 |                             " Examples/sec: " + ("%.2f" % examples_per_second) + " | Hit@1: " + ("%.2f" % hit_at_one) +
443 |                             " Hit@5: " + ("%.2f" % hit_at_five))
444 | 
445 |                         sv.summary_writer.add_summary(
446 |                             utils.MakeSummary("model/Training_Loss", loss_val),
447 |                             global_step_val)
448 |                         sv.summary_writer.add_summary(
449 |                             utils.MakeSummary("model/Training_Hit@1", hit_at_one),
450 |                             global_step_val)
451 |                         sv.summary_writer.add_summary(
452 |                             utils.MakeSummary("model/Training_Hit@5", hit_at_five),
453 |                             global_step_val)
454 |                         sv.summary_writer.add_summary(
455 |                             utils.MakeSummary("global_step/Examples/Second",
456 |                                               examples_per_second), global_step_val)
457 |                         sv.summary_writer.flush()
458 | 
459 |                     else:
460 |                         logging.info("training step " + str(global_step_val) + " | Loss: " + ("%.2f" % loss_val) +
461 |                             " Examples/sec: " + ("%.2f" % examples_per_second))
462 | 
463 |                     if global_step_val and not (global_step_val % validation_steps):
464 |                         f = open(pp, 'a+')
465 |                         p = os.path.join(FLAGS.train_dir, 'best_checkpoint')
466 |                         if not os.path.exists(p):
467 |                             os.makedirs(p)
468 |                         avg_h1 = evaluate(FLAGS.dataset, FLAGS.model, FLAGS.train_dir,
469 |                                         FLAGS.dataset_dir, FLAGS.splits_dir, 1, FLAGS.batch_size, FLAGS.split_num)
470 |                         if avg_h1 > global_h1:
471 |                             global_h1 = avg_h1
472 |                             latest_checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
473 |                             f.write("%s, %f\n" % (latest_checkpoint, global_h1))
474 |                             del_files = glob.glob(p + '/*')
475 |                             for d in del_files:
476 |                                 os.remove(d)
477 |                             files = glob.glob(latest_checkpoint + '.*')
478 |                             for fn in files:
479 |                                 shutil.copy(fn, p)
480 |                         f.close()
481 | 
482 |             except tf.errors.OutOfRangeError:
483 |                 logging.info("%s: Done training -- epoch limit reached.",
484 |                             task_as_string(self.task))
485 |         logging.info("%s: Exited training loop.", task_as_string(self.task))
486 |         sv.Stop()
487 | 
488 | def main(unused_argv):
489 | 
490 |     cluster = None
491 |     task_data = {"type": "master", "index": 0}
492 |     task = type("TaskSpec", (object,), task_data)
493 | 
494 |     logging.set_verbosity(tf.logging.INFO)
495 |     logging.info("%s: Tensorflow version: %s",
496 |                 task_as_string(task), tf.__version__)
497 | 
498 |     if not cluster or task.type == "master" or task.type == "worker":
499 |         model = find_class_by_name(FLAGS.model,
500 |                     [models])()
501 |         reader = getattr(data, FLAGS.dataset)(FLAGS.dataset_dir, FLAGS.splits_dir,
502 |                 FLAGS.num_epochs, FLAGS.batch_size, FLAGS.split_num)
503 | 
504 |         #model_exporter = export_model.ModelExporter(
505 |         #        model=model,
506 |         #        reader=reader)
507 | 
508 |         Trainer(cluster, task, FLAGS.train_dir, model, reader,
509 |                 FLAGS.log_device_placement, FLAGS.max_steps,
510 |                 FLAGS.export_model_steps).run(start_new_model=FLAGS.start_new_model)
511 | 
512 |     elif task.type == "ps":
513 |         # Distributed server
514 |         raise NotImplementedError()
515 |     else:
516 |         raise ValueError("%s: Invalid task_type: %s." %
517 |                      (task_as_string(task), task.type))
518 | 
519 | if __name__ == '__main__':
520 |     app.run()
521 | 


--------------------------------------------------------------------------------
/scripts/preprocess.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | from PIL import Image
  5 | 
  6 | from tensorflow import app
  7 | from tensorflow import flags
  8 | from tensorflow import logging
  9 | 
 10 | from nturgbd import Joint, Reader
 11 | 
 12 | FLAGS = flags.FLAGS
 13 | 
 14 | if __name__ == '__main__':
 15 |     flags.DEFINE_string("dataset_dir", "", \
 16 |                         "Path to base directory for skeleton files")
 17 |     flags.DEFINE_string("splits_dir", "", \
 18 |                         "Directory where train and test splits are stored")
 19 |     flags.DEFINE_string("output_dir", "", \
 20 |                         "Directory where TFRecord files are to be stored")
 21 | 
 22 |     flags.DEFINE_integer("split_num", 1, \
 23 |                          "The present train / test split to preprocess")
 24 | 
 25 |     flags.DEFINE_bool("is_training", True, \
 26 |                       "Whether the present split is for train or test")
 27 |     flags.DEFINE_bool("tfrecords", False, \
 28 |                       "Whether to create TFRecords or create images")
 29 | 
 30 | def task_as_string(task):
 31 |     return "/job:%s/task:%s" % (task.type, task.index)
 32 | 
 33 | def _write_to_tfrecords(task,
 34 |                         reader,
 35 |                         split,
 36 |                         outdir='',
 37 |                         train=True):
 38 |     def _float32_feature(value):
 39 |         return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 40 | 
 41 |     def _int64_feature(value):
 42 |         return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 43 | 
 44 |     if train:
 45 |         fpath = reader.train_splits[split]
 46 |         splitname = "train"
 47 |     else:
 48 |         fpath = reader.test_splits[split]
 49 |         splitname = "test"
 50 |     logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split)
 51 | 
 52 |     with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f:
 53 |         remove = f.readlines()
 54 |         remove = [x.strip() for x in remove]
 55 | 
 56 |     with open(fpath, 'r') as f:
 57 |         lines = f.readlines()
 58 |         files = [x.strip().split()[0] for x in lines]
 59 |         labels = [int(x.strip().split()[1]) for x in lines]
 60 |         n = 0; l = len(files); two_person = 0
 61 |         for fname, label in zip(files, labels):
 62 |             if not any(fname.split('.')[0] in x for x in remove):
 63 |                 n += 1
 64 |                 if not os.path.exists(outdir):
 65 |                     os.makedirs(outdir)
 66 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
 67 |                 elif not os.path.exists(os.path.join(outdir, splitname + str(split))):
 68 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
 69 | 
 70 |                 tfrecord_file = os.path.join(outdir, splitname + str(split), fname + '.tfrecord')
 71 |                 if not os.path.exists(tfrecord_file):
 72 |                     writer = tf.python_io.TFRecordWriter(tfrecord_file)
 73 |                 else:
 74 |                     continue
 75 |                 video = reader._read_skeleton_file(fname)
 76 |                 skeletons_0, skeletons_1 = video._get_main_actor_skeletons()
 77 |                 # logging.info("%s: Number of skeletons: %d", task_as_string(task), len(skeletons))
 78 | 
 79 |                 # Spatial Feature: Relative coordinates for each joint in the frame [300 x 900]
 80 |                 features = np.zeros((300, 576), dtype=np.float32)
 81 |                 two_person_action = True
 82 |                 for i in skeletons_1:
 83 |                     if i._is_zero_skeleton:
 84 |                         two_person_action = False
 85 |                 one_person_feat_len = 288
 86 |                 for nn, skeleton in enumerate(skeletons_0):
 87 |                     feat_len = 0
 88 |                     joints = skeleton._get_joint_objects()
 89 |                     assert len(joints) == 25
 90 |                     for i in [5, 8, 12, 16]:
 91 |                         joint = joints[i]
 92 |                         x, y, z = joint._get_cartesian_coordinates()
 93 |                         for j in range(len(joints)):
 94 |                             if not j == i:
 95 |                                 joint_ = joints[j]
 96 |                                 x_, y_, z_ = joint_._get_cartesian_coordinates()
 97 |                                 r = x - x_; theta = y - y_; phi = z - z_
 98 |                                 j = Joint(r, theta, phi)
 99 |                                 r, theta, phi = j._get_spherical_coordinates()
100 |                                 features[nn, feat_len] = r; feat_len += 1
101 |                                 features[nn, feat_len] = theta; feat_len += 1
102 |                                 features[nn, feat_len] = phi; feat_len += 1
103 |                 if two_person_action:
104 |                     two_person += 1
105 |                     logging.info("%s: Two person Action", task_as_string(task))
106 |                     for nn, skeleton in enumerate(skeletons_0):
107 |                         feat_len = 0
108 |                         joints_0 = skeleton._get_joint_objects()
109 |                         joints_1 = skeletons_1[nn]._get_joint_objects()
110 |                         assert len(joints_0) == 25; assert len(joints_1) == 25
111 |                         for i in [5, 8, 12, 16]:
112 |                             joint = joints_0[i]
113 |                             x, y, z = joint._get_cartesian_coordinates()
114 |                             for j in range(len(joints_1)):
115 |                                 if not j == i:
116 |                                     joint_ = joints_1[j]
117 |                                     x_, y_, z_ = joint_._get_cartesian_coordinates()
118 |                                     r = x - x_; theta = y - y_; phi = z - z_
119 |                                     j = Joint(r, theta, phi)
120 |                                     r, theta, phi = j._get_spherical_coordinates()
121 |                                     features[nn, one_person_feat_len+feat_len] = r; feat_len += 1
122 |                                     features[nn, one_person_feat_len+feat_len] = theta; feat_len += 1
123 |                                     features[nn, one_person_feat_len+feat_len] = phi; feat_len += 1
124 |                 feats_spatial = features
125 |                 feats_spatial = np.pad(feats_spatial, [[0, (300 - feats_spatial.shape[0])], [0, 0]], 'constant', constant_values=0)
126 |                 feats_spatial = np.hstack(feats_spatial)
127 | 
128 |                 example = tf.train.Example(features=tf.train.Features(feature={
129 |                     'feature': _float32_feature(feats_spatial),
130 |                     'label': _int64_feature(label)}))
131 |                 writer.write(example.SerializeToString())
132 |                 writer.close()
133 |                 logging.info("%s: status: %d of %d done", task_as_string(task), n, l)
134 |     return n, two_person
135 | 
136 | def _write_to_images(task,
137 |                     reader,
138 |                     split,
139 |                     outdir='',
140 |                     train=True):
141 |     if train:
142 |         fpath = reader.train_splits[split]
143 |         splitname = "train"
144 |     else:
145 |         fpath = reader.test_splits[split]
146 |         splitname = "test"
147 |     logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split)
148 | 
149 |     with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f:
150 |         remove = f.readlines()
151 |         remove = [x.strip() for x in remove]
152 | 
153 |     with open(fpath, 'r') as f:
154 |         lines = f.readlines()
155 |         files = [x.strip().split()[0] for x in lines]
156 |         labels = [int(x.strip().split()[1]) for x in lines]
157 |         n = 0; l = len(files); two_person = 0
158 |         for fname, label in zip(files, labels):
159 |             if not any(fname.split('.')[0] in x for x in remove):
160 |                 n += 1
161 |                 if not os.path.exists(outdir):
162 |                     os.makedirs(outdir)
163 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
164 |                 elif not os.path.exists(os.path.join(outdir, splitname + str(split))):
165 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
166 | 
167 |                 image_dir = os.path.join(outdir, splitname + str(split), fname)
168 |                 if not os.path.exists(image_dir):
169 |                     os.makedirs(image_dir)
170 |                 else:
171 |                     if len(os.listdir(image_dir)) == 6:
172 |                         continue
173 |                 video = reader._read_skeleton_file(fname)
174 |                 skeletons_0, skeletons_1 = video._get_main_actor_skeletons()
175 | 
176 |                 two_person_action = True
177 |                 for i in skeletons_1:
178 |                     if i._is_zero_skeleton:
179 |                         two_person_action = False
180 | 
181 |                 if two_person_action:
182 |                     im_size = (2, len(skeletons_0), 48)
183 |                 else:
184 |                     im_size = (2, len(skeletons_0), 24)
185 | 
186 |                 im_r = np.zeros(im_size); im_theta = np.zeros(im_size); im_phi = np.zeros(im_size)
187 |                 for nn, skeleton in enumerate(skeletons_0):
188 |                     joints = skeleton._get_joint_objects()
189 |                     assert len(joints) == 25
190 |                     im_num = 0
191 |                     for i in [0, 20]:
192 |                         feat_len = 0
193 |                         joint = joints[i]
194 |                         x, y, z = joint._get_cartesian_coordinates()
195 |                         for j in range(len(joints)):
196 |                             if not j == i:
197 |                                 joint_ = joints[j]
198 |                                 x_, y_, z_ = joint_._get_cartesian_coordinates()
199 |                                 r = x - x_; theta = y - y_; phi = z - z_
200 |                                 im_r[im_num, nn, feat_len] = r; im_theta[im_num, nn, feat_len] = theta; im_phi[im_num, nn, feat_len] = phi
201 |                                 feat_len += 1
202 |                         im_num += 1
203 |                 if two_person_action:
204 |                     processed = 24
205 |                     two_person += 1
206 |                     for nn, skeleton in enumerate(skeletons_0):
207 |                         joints_0 = skeleton._get_joint_objects()
208 |                         joints_1 = skeletons_1[nn]._get_joint_objects()
209 |                         assert len(joints_0) == 25; assert len(joints_1) == 25
210 |                         im_num = 0
211 |                         for i in [0, 20]:
212 |                             feat_len = processed
213 |                             joint = joints_0[i]
214 |                             x, y, z = joint._get_cartesian_coordinates()
215 |                             for j in range(len(joints_1)):
216 |                                 if not j == i:
217 |                                     joint_ = joints_1[j]
218 |                                     x_, y_, z_ = joint_._get_cartesian_coordinates()
219 |                                     r = x - x_; theta = y - y_; phi = z - z_
220 |                                     im_r[im_num, nn, feat_len] = r; im_theta[im_num, nn, feat_len] = theta; im_phi[im_num, nn, feat_len] = phi
221 |                                     feat_len += 1
222 |                             im_num += 1
223 | 
224 |                 count = 0
225 |                 for im in im_r:
226 |                     im += np.amin(im)
227 |                     im *= 255.0 / np.amax(im)
228 |                     im = np.repeat(im[:, :, np.newaxis], 3, axis=2)
229 |                     image = Image.fromarray(im.astype(np.uint8), 'RGB')
230 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
231 |                     image.save(path)
232 |                     count += 1
233 |                 for im in im_theta:
234 |                     im += np.amin(im)
235 |                     im *= 255.0 / np.amax(im)
236 |                     im = np.repeat(im[:, :, np.newaxis], 3, axis=2)
237 |                     image = Image.fromarray(im.astype(np.uint8), 'RGB')
238 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
239 |                     image.save(path)
240 |                     count += 1
241 |                 for im in im_phi:
242 |                     im += np.amin(im)
243 |                     im *= 255.0 / np.amax(im)
244 |                     im = np.repeat(im[:, :, np.newaxis], 3, axis=2)
245 |                     image = Image.fromarray(im.astype(np.uint8), 'RGB')
246 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
247 |                     image.save(path)
248 |                     count += 1
249 |                 logging.info("%s: status: %d of %d done", task_as_string(task), n, l)
250 |     return n, two_person
251 | 
252 | def _write_to_images_new(task,
253 |                     reader,
254 |                     split,
255 |                     outdir='',
256 |                     train=True):
257 |     if train:
258 |         fpath = reader.train_splits[split]
259 |         splitname = "train"
260 |     else:
261 |         fpath = reader.test_splits[split]
262 |         splitname = "test"
263 |     logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split)
264 | 
265 |     with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f:
266 |         remove = f.readlines()
267 |         remove = [x.strip() for x in remove]
268 | 
269 |     with open(fpath, 'r') as f:
270 |         lines = f.readlines()
271 |         files = [x.strip().split()[0] for x in lines]
272 |         labels = [int(x.strip().split()[1]) for x in lines]
273 |         n = 0; l = len(files); two_person = 0
274 |         for fname, label in zip(files, labels):
275 |             if not any(fname.split('.')[0] in x for x in remove):
276 |                 n += 1
277 |                 if not os.path.exists(outdir):
278 |                     os.makedirs(outdir)
279 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
280 |                 elif not os.path.exists(os.path.join(outdir, splitname + str(split))):
281 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
282 | 
283 |                 image_dir = os.path.join(outdir, splitname + str(split), fname)
284 |                 if not os.path.exists(image_dir):
285 |                     os.makedirs(image_dir)
286 |                 else:
287 |                     if len(os.listdir(image_dir)) == 6:
288 |                         continue
289 |                 video = reader._read_skeleton_file(fname)
290 |                 skeletons_0, skeletons_1 = video._get_main_actor_skeletons()
291 | 
292 |                 two_person_action = True
293 |                 for i in skeletons_1:
294 |                     if i._is_zero_skeleton:
295 |                         two_person_action = False
296 | 
297 |                 L = len(skeletons_0)
298 |                 if two_person_action:
299 |                     im_size = (2, L * 3, 48 * 3)
300 |                     im_size_d = (2, (L-1) * 3, 48 * 3)
301 |                 else:
302 |                     im_size = (2, L * 3, 24 * 3)
303 |                     im_size_d = (2, (L-1) * 3, 24 * 3)
304 | 
305 |                 im_r = np.zeros(im_size); im_theta = np.zeros(im_size); im_phi = np.zeros(im_size)
306 |                 im_r_d = np.zeros(im_size_d); im_theta_d = np.zeros(im_size_d); im_phi_d = np.zeros(im_size_d)
307 |                 for nn, skeleton in enumerate(skeletons_0):
308 |                     joints = skeleton._get_joint_objects()
309 |                     assert len(joints) == 25
310 |                     im_num = 0
311 |                     for i in [0, 20]:
312 |                         feat_len = 0
313 |                         joint = joints[i]
314 |                         x, y, z = joint._get_cartesian_coordinates()
315 |                         for j in range(len(joints)):
316 |                             if not j == i:
317 |                                 joint_ = joints[j]
318 |                                 x_, y_, z_ = joint_._get_cartesian_coordinates()
319 |                                 r = np.full((3,3), (x - x_))
320 |                                 theta = np.full((3,3), (y - y_))
321 |                                 phi = np.full((3,3), (z - z_))
322 |                                 im_r[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = r
323 |                                 im_theta[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = theta
324 |                                 im_phi[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = phi
325 |                                 feat_len += 1
326 |                         im_num += 1
327 |                 if two_person_action:
328 |                     processed = 24
329 |                     two_person += 1
330 |                     for nn, skeleton in enumerate(skeletons_0):
331 |                         joints_0 = skeleton._get_joint_objects()
332 |                         joints_1 = skeletons_1[nn]._get_joint_objects()
333 |                         assert len(joints_0) == 25; assert len(joints_1) == 25
334 |                         im_num = 0
335 |                         for i in [0, 20]:
336 |                             feat_len = processed
337 |                             joint = joints_0[i]
338 |                             x, y, z = joint._get_cartesian_coordinates()
339 |                             for j in range(len(joints_1)):
340 |                                 if not j == i:
341 |                                     joint_ = joints_1[j]
342 |                                     x_, y_, z_ = joint_._get_cartesian_coordinates()
343 |                                     r = np.full((3,3), (x - x_))
344 |                                     theta = np.full((3,3), (y - y_))
345 |                                     phi = np.full((3,3), (z - z_))
346 |                                     im_r[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = r
347 |                                     im_theta[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = theta
348 |                                     im_phi[im_num, nn*3:(nn+1)*3, feat_len*3:(feat_len+1)*3] = phi
349 |                                     feat_len += 1
350 |                             im_num += 1
351 |                 for i in range(2):
352 |                     im1 = im_r[i]
353 |                     im2 = im_theta[i]
354 |                     im3 = im_phi[i]
355 |                     for nn in range(1, len(im1)/3):
356 |                         f1 = im1[(nn-1)*3:nn*3]; f2 = im1[nn*3:(nn+1)*3]
357 |                         im_r_d[i, (nn-1)*3:nn*3] = (f1 - f2)
358 |                         f1 = im2[(nn-1)*3:nn*3]; f2 = im2[nn*3:(nn+1)*3]
359 |                         im_theta_d[i, (nn-1)*3:nn*3] = (f1 - f2)
360 |                         f1 = im3[(nn-1)*3:nn*3]; f2 = im3[nn*3:(nn+1)*3]
361 |                         im_phi_d[i, (nn-1)*3:nn*3] = (f1 - f2)
362 | 
363 |                 count = 0
364 |                 for im in im_r:
365 |                     im += np.amin(im)
366 |                     im *= 255.0 / np.amax(im)
367 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
368 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
369 |                     image.save(path)
370 |                     count += 1
371 |                 for im in im_theta:
372 |                     im += np.amin(im)
373 |                     im *= 255.0 / np.amax(im)
374 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
375 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
376 |                     image.save(path)
377 |                     count += 1
378 |                 for im in im_phi:
379 |                     im += np.amin(im)
380 |                     im *= 255.0 / np.amax(im)
381 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
382 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
383 |                     image.save(path)
384 |                     count += 1
385 |                 for im in im_r_d:
386 |                     im += np.amin(im)
387 |                     im *= 255.0 / np.amax(im)
388 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
389 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
390 |                     image.save(path)
391 |                     count += 1
392 |                 for im in im_theta_d:
393 |                     im += np.amin(im)
394 |                     im *= 255.0 / np.amax(im)
395 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
396 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
397 |                     image.save(path)
398 |                     count += 1
399 |                 for im in im_phi_d:
400 |                     im += np.amin(im)
401 |                     im *= 255.0 / np.amax(im)
402 |                     image = Image.fromarray(im.astype(np.uint8), 'L')
403 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
404 |                     image.save(path)
405 |                     count += 1
406 |                 logging.info("%s: status: %d of %d done", task_as_string(task), n, l)
407 |     return n, two_person
408 | 
409 | def _write_to_frames(task,
410 |                     reader,
411 |                     split,
412 |                     outdir='',
413 |                     train=True):
414 |     if train:
415 |         fpath = reader.train_splits[split]
416 |         splitname = "train"
417 |     else:
418 |         fpath = reader.test_splits[split]
419 |         splitname = "test"
420 |     logging.info("%s: Converting %s split%d files to TFRecords", task_as_string(task), splitname, split)
421 | 
422 |     with open(os.path.join(reader.splits, 'faulty_skeletons'), 'r') as f:
423 |         remove = f.readlines()
424 |         remove = [x.strip() for x in remove]
425 | 
426 |     with open(fpath, 'r') as f:
427 |         lines = f.readlines()
428 |         files = [x.strip().split()[0] for x in lines]
429 |         labels = [int(x.strip().split()[1]) for x in lines]
430 |         n = 0; l = len(files); two_person = 0
431 |         for fname, label in zip(files, labels):
432 |             if not any(fname.split('.')[0] in x for x in remove):
433 |                 n += 1
434 |                 if not os.path.exists(outdir):
435 |                     os.makedirs(outdir)
436 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
437 |                 elif not os.path.exists(os.path.join(outdir, splitname + str(split))):
438 |                     os.makedirs(os.path.join(outdir, splitname + str(split)))
439 | 
440 |                 image_dir = os.path.join(outdir, splitname + str(split), fname)
441 |                 if not os.path.exists(image_dir):
442 |                     os.makedirs(image_dir)
443 |                 else:
444 |                     if len(os.listdir(image_dir)) == 6:
445 |                         continue
446 |                 video = reader._read_skeleton_file(fname)
447 |                 skeletons_0, skeletons_1 = video._get_main_actor_skeletons()
448 | 
449 |                 two_person_action = True
450 |                 for i in skeletons_1:
451 |                     if i._is_zero_skeleton:
452 |                         two_person_action = False
453 | 
454 |                 if two_person_action:
455 |                     im_size = (len(skeletons_0), 25, 48, 3)
456 |                 else:
457 |                     im_size = (len(skeletons_0), 25, 24, 3)
458 | 
459 |                 im = np.zeros(im_size)
460 |                 im_num = 0
461 |                 for nn, skeleton in enumerate(skeletons_0):
462 |                     joints = skeleton._get_joint_objects()
463 |                     assert len(joints) == 25
464 |                     for i in range(len(joints)):
465 |                         feat_len = 0
466 |                         joint = joints[i]
467 |                         x, y, z = joint._get_cartesian_coordinates()
468 |                         for j in range(len(joints)):
469 |                             if not j == i:
470 |                                 joint_ = joints[j]
471 |                                 x_, y_, z_ = joint_._get_cartesian_coordinates()
472 |                                 r = x - x_; theta = y - y_; phi = z - z_
473 |                                 j = Joint(r, theta, phi)
474 |                                 r, theta, phi = j._get_spherical_coordinates()
475 |                                 im[im_num, i, feat_len, 0] = r
476 |                                 im[im_num, i, feat_len, 1] = theta
477 |                                 im[im_num, i, feat_len, 2] = phi
478 |                                 feat_len += 1
479 |                     im_num += 1
480 |                 if two_person_action:
481 |                     processed = 24
482 |                     two_person += 1
483 |                     im_num = 0
484 |                     for nn, skeleton in enumerate(skeletons_0):
485 |                         joints_0 = skeleton._get_joint_objects()
486 |                         joints_1 = skeletons_1[nn]._get_joint_objects()
487 |                         assert len(joints_0) == 25; assert len(joints_1) == 25
488 |                         for i in range(len(joints)):
489 |                             if not j == i:
490 |                                 feat_len = processed
491 |                                 joint = joints_0[i]
492 |                                 x, y, z = joint._get_cartesian_coordinates()
493 |                                 for j in range(len(joints_1)):
494 |                                     if not j == i:
495 |                                         joint_ = joints_1[j]
496 |                                         x_, y_, z_ = joint_._get_cartesian_coordinates()
497 |                                         r = x - x_; theta = y - y_; phi = z - z_
498 |                                         j = Joint(r, theta, phi)
499 |                                         r, theta, phi = j._get_spherical_coordinates()
500 |                                         im[im_num, i, processed+feat_len, 0] = r
501 |                                         im[im_num, i, processed+feat_len, 1] = theta
502 |                                         im[im_num, i, processed+feat_len, 2] = phi
503 |                                         feat_len += 1
504 |                         im_num += 1
505 |                 count = 0
506 |                 for img in im:
507 |                     img += np.amin(img)
508 |                     img *= 255.0 / np.amax(img)
509 |                     image = Image.fromarray(img.astype(np.uint8), 'RGB')
510 |                     path = os.path.join(image_dir, 'img_%.4d.jpg' % count)
511 |                     image.save(path)
512 |                     count += 1
513 |                 logging.info("%s: status: %d of %d done", task_as_string(task), n, l)
514 |             break
515 |     return n, two_person
516 | 
517 | def main(unused_argv):
518 |     task_data = {"type": "master", "index": 0}
519 |     task = type("TaskSpec", (object,), task_data)
520 | 
521 |     logging.set_verbosity(tf.logging.INFO)
522 |     logging.info("%s: Tensorflow version: %s",
523 |                 task_as_string(task), tf.__version__)
524 | 
525 |     if FLAGS.dataset_dir == '':
526 |         logging.info("%s: No dataset directory provided. "
527 |                 "Please set the --dataset_dir flag when running the script.", task_as_string(task))
528 |         return EXIT_ERROR
529 |     if FLAGS.splits_dir == '':
530 |         logging.info("%s: No split files directory provided. "
531 |                 "Please set the --splits_dir flag when running the script.", task_as_string(task))
532 |         return EXIT_ERROR
533 |     if FLAGS.output_dir == '':
534 |         logging.info("%s: No target directory for TFRecords provided. "
535 |                 "Please set the --tfrecords_dir flag when running the script.", task_as_string(task))
536 |         return EXIT_ERROR
537 | 
538 |     logging.info("%s: Using >\n"
539 |                 "           Dataset directory: %s\n"
540 |                 "           Split files directory: %s\n"
541 |                 "           Directory to store the output: %s\n"
542 |                 "           Split number: %d\n"
543 |                 "           Train/Test split: %s\n", task_as_string(task),
544 |                         FLAGS.dataset_dir, FLAGS.splits_dir, FLAGS.output_dir,
545 |                         FLAGS.split_num, "train" if FLAGS.is_training else "test")
546 | 
547 |     data_dir = FLAGS.dataset_dir
548 |     split_dir = FLAGS.splits_dir
549 |     reader = Reader(dataset_dir=data_dir, splits_dir=split_dir)
550 | 
551 |     if FLAGS.tfrecords:
552 |         n, two_person = _write_to_tfrecords(task,
553 |                             reader=reader,
554 |                             split=FLAGS.split_num,
555 |                             outdir=FLAGS.output_dir,
556 |                             train=FLAGS.is_training)
557 |     else:
558 |         n, two_person = _write_to_images_new(task,
559 |                             reader=reader,
560 |                             split=FLAGS.split_num,
561 |                             outdir=FLAGS.output_dir,
562 |                             train=FLAGS.is_training)
563 | 
564 |     logging.info("%s: Converting to output format done! Total files: %d, Two person actions: %d. Exiting.", task_as_string(task), n, two_person)
565 | 
566 | if __name__ == '__main__':
567 |     app.run()
568 | 


--------------------------------------------------------------------------------