├── .gitignore
├── README.md
├── __init__.py
├── scripts
    ├── README.md
    ├── __init__.py
    ├── draw-visualizations.ipynb
    ├── evaluate_hmdb51gln.py
    ├── evaluate_mAP.py
    └── evaluate_ucf11.py
├── src
    ├── __init__.py
    ├── actrec.py
    └── actrec_mAP.py
└── util
    ├── README.md
    ├── __init__.py
    ├── data_handler.py
    ├── gpu_lock.py
    ├── gpu_util.py
    └── run_on_me_or_pid_quit


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | *.txt
4 | *.npz
5 | *.pkl
6 | *.bak
7 | *.nfs*
8 | *.ipynb_checkpoints*
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Action Recognition using Visual Attention
 2 | 
 3 | We propose a soft attention based model for the task of action recognition in videos. 
 4 | We use multi-layered Recurrent Neural Networks (RNNs) with Long-Short Term Memory 
 5 | (LSTM) units which are deep both spatially and temporally. Our model learns to focus 
 6 | selectively on parts of the video frames and classifies videos after taking a few 
 7 | glimpses. The model essentially learns which parts in the frames are relevant for the 
 8 | task at hand and attaches higher importance to them. We evaluate the model on UCF-11 
 9 | (YouTube Action), HMDB-51 and Hollywood2 datasets and analyze how the model focuses its 
10 | attention depending on the scene and the action being performed.
11 | 
12 | ## Dependencies
13 | 
14 | * Python 2.7
15 | * [NumPy](http://www.numpy.org/)
16 | * [scikit learn](http://scikit-learn.org/stable/index.html)
17 | * [skimage](http://scikit-image.org/docs/dev/api/skimage.html)
18 | * [Theano](http://www.deeplearning.net/software/theano/)
19 | * [h5py](http://docs.h5py.org/en/latest/)
20 | 
21 | ## Input data format
22 | 
23 | This is provided in [util/README.md](https://github.com/kracwarlock/action-recognition-visual-attention/blob/master/util/README.md)
24 | 
25 | ## Reference
26 | 
27 | If you use this code as part of any published research, please acknowledge the
28 | following papers:
29 | 
30 | **"Action Recognition using Visual Attention."**  
31 | Shikhar Sharma, Ryan Kiros, Ruslan Salakhutdinov. *[arXiv](http://arxiv.org/abs/1511.04119)*
32 | 
33 |     @article{sharma2015attention,
34 |         title={Action Recognition using Visual Attention},
35 |         author={Sharma, Shikhar and Kiros, Ryan and Salakhutdinov, Ruslan},
36 |         journal={arXiv preprint arXiv:1511.04119},
37 |         year={2015}
38 |     } 
39 | 
40 | **"Show, Attend and Tell: Neural Image Caption Generation with Visual Attention."**  
41 | Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan
42 | Salakhutdinov, Richard Zemel, Yoshua Bengio. *To appear ICML (2015)*
43 | 
44 |     @article{Xu2015show,
45 |         title={Show, Attend and Tell: Neural Image Caption Generation with Visual Attention},
46 |         author={Xu, Kelvin and Ba, Jimmy and Kiros, Ryan and Cho, Kyunghyun and Courville, Aaron and Salakhutdinov, Ruslan and Zemel, Richard and Bengio, Yoshua},
47 |         journal={arXiv preprint arXiv:1502.03044},
48 |         year={2015}
49 |     }
50 | 
51 | ## License
52 | This repsoitory is released under a [revised (3-clause) BSD License](http://directory.fsf.org/wiki/License:BSD_3Clause). It 
53 | is the implementation for our paper [Action Recognition using Visual Attention](http://arxiv.org/abs/1511.04119). The repository uses some code from the project 
54 | [arctic-caption](https://github.com/kelvinxu/arctic-captions) which is originally the implementation for the paper 
55 | [Show, Attend and Tell: Neural Image Caption Generation with Visual Attention](http://arxiv.org/abs/1502.03044) and is also licensed 
56 | under a [revised (3-clause) BSD License](http://directory.fsf.org/wiki/License:BSD_3Clause).
57 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kracwarlock/action-recognition-visual-attention/6738a0e2240df45ba79e87d24a174f53adb4f29b/__init__.py


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | ### Running the code
 2 | In the action-recognition-visual-attention directory you can use the following commands to run the script files:
 3 | ```
 4 | THEANO_FLAGS='floatX=float32,device=gpu0,mode=FAST_RUN,nvcc.fastmath=True' python -m scripts.evaluate_ucf11
 5 | THEANO_FLAGS='floatX=float32,device=gpu1,mode=FAST_RUN,nvcc.fastmath=True' python -m scripts.evaluate_mAP
 6 | THEANO_FLAGS='floatX=float32,device=gpu2,mode=FAST_RUN,nvcc.fastmath=True' python -m scripts.evaluate_hmdb51gln
 7 | ```
 8 | 
 9 | ### Visualizations
10 | The file `draw-visualizations.ipynb` is a sample IPython notebook for drawing visualizations.
11 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kracwarlock/action-recognition-visual-attention/6738a0e2240df45ba79e87d24a174f53adb4f29b/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/evaluate_hmdb51gln.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import sys
 3 | import argparse
 4 | 
 5 | import util.gpu_util
 6 | board = util.gpu_util.LockGPU()
 7 | print 'GPU Lock Acquired'
 8 | 
 9 | from src.actrec import train
10 | 
11 | def main(job_id, params):
12 |     print 'Anything printed here will end up in the output directory for job #%d' % job_id
13 |     print params
14 | 
15 |     trainerr, validerr, testerr = train(dim_out=params['dim_out'][0],
16 |                                         ctx_dim=params['ctx_dim'][0],
17 |                                         dim=params['dim'][0],
18 |                                         n_actions=params['n_actions'][0],
19 |                                         n_layers_att=params['n_layers_att'][0],
20 |                                         n_layers_out=params['n_layers_out'][0],
21 |                                         n_layers_init=params['n_layers_init'][0],
22 |                                         ctx2out=params['ctx2out'][0],
23 |                                         patience=params['patience'][0],
24 |                                         max_epochs=params['max_epochs'][0],
25 |                                         dispFreq=params['dispFreq'][0],
26 |                                         decay_c=params['decay_c'][0],
27 |                                         alpha_c=params['alpha_c'][0],
28 |                                         temperature_inverse=params['temperature_inverse'][0],
29 |                                         lrate=params['learning_rate'][0],
30 |                                         selector=params['selector'][0],
31 |                                         maxlen=params['maxlen'][0],
32 |                                         optimizer=params['optimizer'][0], 
33 |                                         batch_size=params['batch_size'][0],
34 |                                         valid_batch_size=params['valid_batch_size'][0],
35 |                                         saveto=params['model'][0],
36 |                                         validFreq=params['validFreq'][0],
37 |                                         saveFreq=params['saveFreq'][0],
38 |                                         dataset=params['dataset'][0], 
39 |                                         dictionary=params['dictionary'][0],
40 |                                         use_dropout=params['use_dropout'][0],
41 |                                         reload_=params['reload'][0],
42 | 					training_stride=params['training_stride'][0],
43 | 					testing_stride=params['testing_stride'][0],
44 |                                         last_n=params['last_n'][0],
45 |                                         fps=params['fps'][0]
46 |                              )
47 |     return validerr
48 | 
49 | if __name__ == '__main__':
50 |     options = {
51 |         'dim_out': [1024],		# hidden layer dim for outputs
52 |         'ctx_dim': [1024],		# context vector dimensionality
53 |         'dim': [1024],			# the number of LSTM units
54 |         'n_actions': [51],		# number of digits to predict
55 |         'n_layers_att': [3],
56 |         'n_layers_out': [1],
57 |         'n_layers_init': [1],
58 |         'ctx2out': [False],
59 |         'patience': [10],
60 |         'max_epochs': [15],
61 |         'dispFreq': [100],
62 |         'decay_c': [0.00001], 
63 |         'alpha_c': [0.0], 
64 |         'temperature_inverse': [1],
65 |         'learning_rate': [0.00001],
66 |         'selector': [False],
67 |         'maxlen': [30],
68 |         'optimizer': ['sgd'],
69 |         'batch_size': [128],
70 |         'valid_batch_size': [256],
71 |         'model': ['model_hmdb51.npz'],
72 |         'validFreq': [500],
73 |         'saveFreq': [500],		# save the parameters after every saveFreq updates
74 |         'dataset': ['hmdb51gln'],
75 |         'dictionary': [None],
76 |         'use_dropout': [True],
77 |         'reload': [True],
78 | 	'training_stride': [1],
79 | 	'testing_stride': [1],
80 |         'last_n': [16],			# timesteps from the end used for computing prediction
81 |         'fps': [30]
82 |     }
83 | 
84 |     if len(sys.argv) > 1:
85 |         options.update(eval("{%s}"%sys.argv[1]))
86 | 
87 |     main(0, options)
88 |     util.gpu_util.FreeGPU(board)
89 |     print 'GPU freed'
90 | 
91 | 


--------------------------------------------------------------------------------
/scripts/evaluate_mAP.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import sys
 3 | import argparse
 4 | 
 5 | import util.gpu_util
 6 | board = util.gpu_util.LockGPU()
 7 | print 'GPU Lock Acquired'
 8 | 
 9 | from src.actrec_mAP import train
10 | 
11 | def main(job_id, params):
12 |     print 'Anything printed here will end up in the output directory for job #%d' % job_id
13 |     print params
14 | 
15 |     trainerr, validerr, testerr = train(dim_out=params['dim_out'][0],
16 |                                         ctx_dim=params['ctx_dim'][0],
17 |                                         dim=params['dim'][0],
18 |                                         n_actions=params['n_actions'][0],
19 |                                         n_layers_att=params['n_layers_att'][0],
20 |                                         n_layers_out=params['n_layers_out'][0],
21 |                                         n_layers_init=params['n_layers_init'][0],
22 |                                         ctx2out=params['ctx2out'][0],
23 |                                         patience=params['patience'][0],
24 |                                         max_epochs=params['max_epochs'][0],
25 |                                         dispFreq=params['dispFreq'][0],
26 |                                         decay_c=params['decay_c'][0],
27 |                                         alpha_c=params['alpha_c'][0],
28 |                                         temperature_inverse=params['temperature_inverse'][0],
29 |                                         lrate=params['learning_rate'][0],
30 |                                         selector=params['selector'][0],
31 |                                         maxlen=params['maxlen'][0],
32 |                                         optimizer=params['optimizer'][0], 
33 |                                         batch_size=params['batch_size'][0],
34 |                                         valid_batch_size=params['valid_batch_size'][0],
35 |                                         saveto=params['model'][0],
36 |                                         validFreq=params['validFreq'][0],
37 |                                         saveFreq=params['saveFreq'][0],
38 |                                         dataset=params['dataset'][0], 
39 |                                         dictionary=params['dictionary'][0],
40 |                                         use_dropout=params['use_dropout'][0],
41 |                                         reload_=params['reload'][0],
42 | 					training_stride=params['training_stride'][0],
43 | 					testing_stride=params['testing_stride'][0],
44 |                                         last_n=params['last_n'][0],
45 |                                         fps=params['fps'][0]
46 |                              )
47 |     return validerr
48 | 
49 | if __name__ == '__main__':
50 |     options = {
51 |         'dim_out': [512],		# hidden layer dim for outputs
52 |         'ctx_dim': [1024],		# context vector dimensionality
53 |         'dim': [512],			# the number of LSTM units
54 |         'n_actions': [12],		# number of digits to predict
55 |         'n_layers_att':[1],
56 |         'n_layers_out': [1],
57 |         'n_layers_init': [1],
58 |         'ctx2out': [False],
59 |         'patience': [10],
60 |         'max_epochs': [15],
61 |         'dispFreq': [20],
62 |         'decay_c': [0.00001], 
63 |         'alpha_c': [0.0], 
64 |         'temperature_inverse': [1],
65 |         'learning_rate': [0.0001],
66 |         'selector': [False],
67 |         'maxlen': [30],
68 |         'optimizer': ['sgd'],
69 |         'batch_size': [128],
70 |         'valid_batch_size': [512],
71 |         'model': ['model_h2mAP.npz'],
72 |         'validFreq': [100],
73 |         'saveFreq': [100],		# save the parameters after every saveFreq updates
74 |         'dataset': ['h2mAP'],
75 |         'dictionary': [None],
76 |         'use_dropout': [True],
77 |         'reload': [False],
78 | 	'training_stride': [1],
79 | 	'testing_stride': [1],
80 |         'last_n': [16],			# timesteps from the end used for computing prediction
81 |         'fps': [30]
82 |     }
83 | 
84 |     if len(sys.argv) > 1:
85 |         options.update(eval("{%s}"%sys.argv[1]))
86 | 
87 |     main(0, options)
88 |     util.gpu_util.FreeGPU(board)
89 |     print 'GPU freed'
90 | 
91 | 


--------------------------------------------------------------------------------
/scripts/evaluate_ucf11.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import sys
 3 | import argparse
 4 | 
 5 | import util.gpu_util
 6 | board = util.gpu_util.LockGPU()
 7 | print 'GPU Lock Acquired'
 8 | 
 9 | from src.actrec import train
10 | 
11 | def main(job_id, params):
12 |     print 'Anything printed here will end up in the output directory for job #%d' % job_id
13 |     print params
14 | 
15 |     trainerr, validerr, testerr = train(dim_out=params['dim_out'][0],
16 |                                         ctx_dim=params['ctx_dim'][0],
17 |                                         dim=params['dim'][0],
18 |                                         n_actions=params['n_actions'][0],
19 |                                         n_layers_att=params['n_layers_att'][0],
20 |                                         n_layers_out=params['n_layers_out'][0],
21 |                                         n_layers_init=params['n_layers_init'][0],
22 |                                         ctx2out=params['ctx2out'][0],
23 |                                         patience=params['patience'][0],
24 |                                         max_epochs=params['max_epochs'][0],
25 |                                         dispFreq=params['dispFreq'][0],
26 |                                         decay_c=params['decay_c'][0],
27 |                                         alpha_c=params['alpha_c'][0],
28 |                                         temperature_inverse=params['temperature_inverse'][0],
29 |                                         lrate=params['learning_rate'][0],
30 |                                         selector=params['selector'][0],
31 |                                         maxlen=params['maxlen'][0],
32 |                                         optimizer=params['optimizer'][0], 
33 |                                         batch_size=params['batch_size'][0],
34 |                                         valid_batch_size=params['valid_batch_size'][0],
35 |                                         saveto=params['model'][0],
36 |                                         validFreq=params['validFreq'][0],
37 |                                         saveFreq=params['saveFreq'][0],
38 |                                         dataset=params['dataset'][0], 
39 |                                         dictionary=params['dictionary'][0],
40 |                                         use_dropout=params['use_dropout'][0],
41 |                                         reload_=params['reload'][0],
42 | 					training_stride=params['training_stride'][0],
43 | 					testing_stride=params['testing_stride'][0],
44 |                                         last_n=params['last_n'][0],
45 |                                         fps=params['fps'][0]
46 |                              )
47 |     return validerr
48 | 
49 | if __name__ == '__main__':
50 |     options = {
51 |         'dim_out': [512],		# hidden layer dim for outputs
52 |         'ctx_dim': [1024],		# context vector dimensionality
53 |         'dim': [512],			# the number of LSTM units
54 |         'n_actions': [11],		# number of digits to predict
55 |         'n_layers_att':[1],
56 |         'n_layers_out': [1],
57 |         'n_layers_init': [1],
58 |         'ctx2out': [False],
59 |         'patience': [10],
60 |         'max_epochs': [15],
61 |         'dispFreq': [20],
62 |         'decay_c': [0.00001], 
63 |         'alpha_c': [0.0], 
64 |         'temperature_inverse': [1],
65 |         'learning_rate': [0.0001],
66 |         'selector': [False],
67 |         'maxlen': [30],
68 |         'optimizer': ['adam'],
69 |         'batch_size': [128],
70 |         'valid_batch_size': [256],
71 |         'model': ['model_ucf11.npz'],
72 |         'validFreq': [100],
73 |         'saveFreq': [100],		# save the parameters after every saveFreq updates
74 |         'dataset': ['ucf11'],
75 |         'dictionary': [None],
76 |         'use_dropout': [True],
77 |         'reload': [False],
78 | 	'training_stride': [1],
79 | 	'testing_stride': [1],
80 |         'last_n': [30],			# timesteps from the end used for computing prediction
81 |         'fps': [30]
82 |     }
83 | 
84 |     if len(sys.argv) > 1:
85 |         options.update(eval("{%s}"%sys.argv[1]))
86 | 
87 |     main(0, options)
88 |     util.gpu_util.FreeGPU(board)
89 |     print 'GPU freed'
90 | 
91 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kracwarlock/action-recognition-visual-attention/6738a0e2240df45ba79e87d24a174f53adb4f29b/src/__init__.py


--------------------------------------------------------------------------------
/src/actrec.py:
--------------------------------------------------------------------------------
  1 | # Attention-based action recognition
  2 | 
  3 | import theano
  4 | import theano.tensor as tensor
  5 | theano.config.floatX = 'float32'
  6 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
  7 | 
  8 | import cPickle as pkl
  9 | import numpy
 10 | import copy
 11 | import os
 12 | import time
 13 | 
 14 | from scipy import optimize, stats
 15 | from collections import OrderedDict
 16 | 
 17 | import warnings
 18 | 
 19 | from util.data_handler import DataHandler
 20 | from util.data_handler import TrainProto
 21 | from util.data_handler import TestTrainProto
 22 | from util.data_handler import TestValidProto
 23 | from util.data_handler import TestTestProto
 24 | 
 25 | '''
 26 | Theano shared variables require GPUs, so to
 27 | make this code more portable, these two functions
 28 | push and pull variables between a shared
 29 | variable dictionary and a regular numpy 
 30 | dictionary
 31 | '''
 32 | # push parameters to Theano shared variables
 33 | def zipp(params, tparams):
 34 |     for kk, vv in params.iteritems():
 35 |         tparams[kk].set_value(vv)
 36 | 
 37 | # pull parameters from Theano shared variables
 38 | def unzip(zipped):
 39 |     new_params = OrderedDict()
 40 |     for kk, vv in zipped.iteritems():
 41 |         new_params[kk] = vv.get_value()
 42 |     return new_params
 43 | 
 44 | # get the list of parameters: Note that tparams must be OrderedDict
 45 | def itemlist(tparams):
 46 |     return [vv for kk, vv in tparams.iteritems()]
 47 | 
 48 | # dropout
 49 | def dropout_layer(state_before, use_noise, trng):
 50 |     proj = tensor.switch(use_noise,
 51 |                          state_before *
 52 |                          trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype),
 53 |                          state_before * 0.5)
 54 |     return proj
 55 | 
 56 | # make prefix-appended name
 57 | def _p(pp, name):
 58 |     return '%s_%s'%(pp, name)
 59 | 
 60 | # all parameters
 61 | def init_params(options):
 62 |     """
 63 |     Initialize all parameters
 64 |     """
 65 |     params = OrderedDict()
 66 |     ctx_dim = options['ctx_dim']
 67 | 
 68 |     # init_state, init_cell
 69 |     for lidx in xrange(1, options['n_layers_init']):
 70 |         params = get_layer('ff')[0](options, params, prefix='ff_init_%d'%lidx, nin=ctx_dim, nout=ctx_dim)
 71 |     params = get_layer('ff')[0](options, params, prefix='ff_state', nin=ctx_dim, nout=options['dim'])
 72 |     params = get_layer('ff')[0](options, params, prefix='ff_memory', nin=ctx_dim, nout=options['dim'])
 73 | 
 74 |     # decoder: LSTM - only 1 layer
 75 |     params = get_layer('lstm_cond')[0](options, params, prefix='decoder',
 76 |                                        nin=options['ctx_dim'], dim=options['dim'],
 77 |                                        dimctx=ctx_dim)
 78 | 
 79 |     # Prediction
 80 |     params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm', nin=options['dim'], nout=options['dim_out'])
 81 |     if options['ctx2out']:
 82 |         params = get_layer('ff')[0](options, params, prefix='ff_logit_ctx', nin=ctx_dim, nout=options['dim_out'])
 83 |     if options['n_layers_out'] > 1:
 84 |         for lidx in xrange(1, options['n_layers_out']):
 85 |             params = get_layer('ff')[0](options, params, prefix='ff_logit_h%d'%lidx, nin=options['dim_out'], nout=options['dim_out'])
 86 |     params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_out'], nout=options['n_actions'])
 87 | 
 88 |     return params
 89 | 
 90 | # initialize Theano shared variables according to the initial parameters
 91 | def init_tparams(params):
 92 |     tparams = OrderedDict()
 93 |     for kk, pp in params.iteritems():
 94 |         tparams[kk] = theano.shared(params[kk], name=kk)
 95 |     return tparams
 96 | 
 97 | # load parameters
 98 | def load_params(path, params):
 99 |     pp = numpy.load(path)
100 |     for kk, vv in params.iteritems():
101 |         if kk not in pp:
102 |             raise Warning('%s is not in the archive'%kk)
103 |         params[kk] = pp[kk]
104 |     return params
105 | 
106 | # layers: 'name': ('parameter initializer', 'feedforward')
107 | layers = {'ff': ('param_init_fflayer', 'fflayer'),
108 |           'lstm_cond': ('param_init_lstm_cond', 'lstm_cond_layer'),
109 |           }
110 | 
111 | def get_layer(name):
112 |     """
113 |     Part of the reason the init is very slow is because,
114 |     the layer's constructor is called even when it isn't needed
115 |     """
116 |     fns = layers[name]
117 |     return (eval(fns[0]), eval(fns[1]))
118 | 
119 | # some utilities
120 | def ortho_weight(ndim):
121 |     """
122 |     Random orthogonal weights, we take
123 |     the right matrix in the SVD.
124 | 
125 |     Remember in SVD, u has the same # rows as W
126 |     and v has the same # of cols as W. So we
127 |     are ensuring that the rows are 
128 |     orthogonal. 
129 |     """
130 |     W = numpy.random.randn(ndim, ndim)
131 |     u, _, _ = numpy.linalg.svd(W)
132 |     return u.astype('float32')
133 | 
134 | def norm_weight(nin,nout=None, scale=0.01, ortho=True):
135 |     """
136 |     Random weights drawn from a Gaussian
137 |     """
138 |     if nout == None:
139 |         nout = nin
140 |     if nout == nin and ortho:
141 |         W = ortho_weight(nin)
142 |     else:
143 |         W = scale * numpy.random.randn(nin, nout)
144 |     return W.astype('float32')
145 | 
146 | def tanh(x):
147 |     return tensor.tanh(x)
148 | 
149 | def rectifier(x):
150 |     return tensor.maximum(0., x)
151 | 
152 | def linear(x):
153 |     return x
154 | 
155 | # feedforward layer: affine transformation + point-wise nonlinearity
156 | def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None):
157 |     if nin == None:
158 |         nin = options['dim_proj']
159 |     if nout == None:
160 |         nout = options['dim_proj']
161 |     params[_p(prefix,'W')] = norm_weight(nin, nout, scale=0.01)
162 |     params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
163 | 
164 |     return params
165 | 
166 | def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
167 |     return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])
168 | 
169 | # Conditional LSTM layer with Attention
170 | def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
171 |     if nin == None:
172 |         nin = options['dim']
173 |     if dim == None:
174 |         dim = options['dim']
175 |     if dimctx == None:
176 |         dimctx = options['dim']
177 | 
178 |     # input to LSTM
179 |     W = numpy.concatenate([norm_weight(nin,dim),
180 |                            norm_weight(nin,dim),
181 |                            norm_weight(nin,dim),
182 |                            norm_weight(nin,dim)], axis=1)
183 |     params[_p(prefix,'W')] = W
184 | 
185 |     # LSTM to LSTM
186 |     U = numpy.concatenate([ortho_weight(dim),
187 |                            ortho_weight(dim),
188 |                            ortho_weight(dim),
189 |                            ortho_weight(dim)], axis=1)
190 |     params[_p(prefix,'U')] = U
191 | 
192 |     # bias to LSTM
193 |     params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')
194 | 
195 |     # attention: context -> hidden
196 |     Wc_att = norm_weight(dimctx, ortho=False)
197 |     params[_p(prefix,'Wc_att')] = Wc_att
198 | 
199 |     # attention: LSTM -> hidden
200 |     Wd_att = norm_weight(dim,dimctx)
201 |     params[_p(prefix,'Wd_att')] = Wd_att
202 | 
203 |     # attention: hidden bias
204 |     b_att = numpy.zeros((dimctx,)).astype('float32')
205 |     params[_p(prefix,'b_att')] = b_att
206 | 
207 |     # deeper attention
208 |     if options['n_layers_att'] > 1:
209 |         for lidx in xrange(1, options['n_layers_att']):
210 |             params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
211 |             params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')
212 | 
213 |     # attention: 
214 |     U_att = norm_weight(dimctx,1)
215 |     params[_p(prefix,'U_att')] = U_att
216 |     c_att = numpy.zeros((1,)).astype('float32')
217 |     params[_p(prefix, 'c_tt')] = c_att
218 | 
219 |     if options['selector']:
220 |         # attention: selector
221 |         W_sel = norm_weight(dim, 1)
222 |         params[_p(prefix, 'W_sel')] = W_sel
223 |         b_sel = numpy.float32(0.)
224 |         params[_p(prefix, 'b_sel')] = b_sel
225 | 
226 |     return params
227 | 
228 | def lstm_cond_layer(tparams, state_below, options, prefix='lstm',
229 |                     mask=None, init_memory=None, init_state=None,
230 |                     trng=None, use_noise=None,
231 |                     **kwargs):
232 |     """
233 |     Computation graph for the LSTM.
234 |     Note that we removed 'context' and put this into 'state_below'
235 |     Video frames need to be part of scan, since it changes each step
236 |     """
237 |     nsteps = state_below.shape[0]
238 |     n_samples = state_below.shape[1]
239 |     n_annotations = state_below.shape[2]
240 | 
241 |     # mask
242 |     if mask == None:
243 |         mask = tensor.alloc(1., state_below.shape[0], 1)
244 | 
245 |     dim = tparams[_p(prefix, 'U')].shape[0]
246 | 
247 |     # initial/previous state
248 |     if init_state == None:
249 |         init_state = tensor.alloc(0., n_samples, dim)
250 |     # initial/previous memory 
251 |     if init_memory == None:
252 |         init_memory = tensor.alloc(0., n_samples, dim)
253 | 
254 |     def _slice(_x, n, dim):
255 |         if _x.ndim == 3:
256 |             return _x[:, :, n*dim:(n+1)*dim]
257 |         return _x[:, n*dim:(n+1)*dim]
258 | 
259 |     def _step(m_, x_, h_, c_, a_, ct_, dp_=None, dp_att_=None):
260 |         # mask, xt, ht-1, ct-1, alpha, ctx
261 |         # attention
262 |         # print '\n\ncheck\n\n'
263 |         pstate_ = tensor.dot(h_, tparams[_p(prefix,'Wd_att')]) # pstate_
264 |         pctx_ = tensor.dot(x_, tparams[_p(prefix,'Wc_att')]) + tparams[_p(prefix, 'b_att')]
265 |         if options['n_layers_att'] > 1:
266 |             for lidx in xrange(1, options['n_layers_att']):
267 |                 pctx_ = tensor.dot(pctx_, tparams[_p(prefix,'W_att_%d'%lidx)])+tparams[_p(prefix, 'b_att_%d'%lidx)]
268 |                 if lidx < options['n_layers_att'] - 1:
269 |                     pctx_ = tanh(pctx_)
270 |         pctx_ = pctx_ + pstate_[:,None,:]
271 |         pctx_list = []
272 |         pctx_list.append(pctx_)
273 |         pctx_ = tanh(pctx_)
274 |         alpha = tensor.dot(pctx_, tparams[_p(prefix,'U_att')])+tparams[_p(prefix, 'c_tt')]
275 |         alpha_pre = alpha
276 |         alpha_shp = alpha.shape
277 |         alpha = tensor.nnet.softmax(options['temperature_inverse']*alpha.reshape([alpha_shp[0],alpha_shp[1]])) # softmax
278 |         ctx_ = (x_ * alpha[:,:,None]).sum(1) # current context
279 |         # print '\n\ncheck\n\n'
280 |         if options['selector']:
281 |             sel_ = tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'W_sel')])+tparams[_p(prefix,'b_sel')])
282 |             sel_ = sel_.reshape([sel_.shape[0]])
283 |             ctx_ = sel_[:,None] * ctx_
284 | 
285 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
286 |         preact += tensor.dot(ctx_, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
287 | 
288 |         i = _slice(preact, 0, dim)              # z_it
289 |         f = _slice(preact, 1, dim)              # z_ft
290 |         o = _slice(preact, 2, dim)              # z_ot
291 |         i = tensor.nnet.sigmoid(i)              # it = sigmoid(z_it)
292 |         f = tensor.nnet.sigmoid(f)              # ft = sigmoid(z_ft)
293 |         o = tensor.nnet.sigmoid(o)              # ot = sigmoid(z_ot)
294 |         c = tensor.tanh(_slice(preact, 3, dim)) # at = tanh(z_at)
295 | 
296 |         c = f * c_ + i * c                      # ct = ft * ct-1 + it * at
297 |         c = m_[:,None] * c + (1. - m_)[:,None] * c_
298 | 
299 |         h = o * tensor.tanh(c)                  # ht = ot * thanh(ct)
300 |         h = m_[:,None] * h + (1. - m_)[:,None] * h_
301 | 
302 |         rval = [h, c, alpha, ctx_]
303 |         if options['selector']:
304 |             rval += [sel_]
305 |         rval += [pstate_, pctx_, i, f, o, preact, alpha_pre]+pctx_list
306 |         # print '\n\ncheck\n\n'
307 |         return rval
308 | 
309 |     if options['selector']:
310 |         _step0 = lambda m_, x_, h_, c_, a_, ct_, sel_: _step(m_, x_, h_, c_, a_, ct_)
311 |     else:
312 |         _step0 = lambda m_, x_, h_, c_, a_, ct_: _step(m_, x_, h_, c_, a_, ct_)
313 | 
314 |     seqs = [mask, state_below]
315 |     outputs_info = [init_state,
316 |                     init_memory,
317 |                     tensor.alloc(0., n_samples, n_annotations),
318 |                     tensor.alloc(0., n_samples, options['ctx_dim'])]
319 |     if options['selector']:
320 |         outputs_info += [tensor.alloc(0., n_samples)]
321 |     outputs_info += [None,
322 |                      None,
323 |                      None,
324 |                      None,
325 |                      None,
326 |                      None,
327 |                      None] + [None]#*options['n_layers_att']
328 |     rval, updates = theano.scan(_step0,
329 |                                 sequences=seqs,
330 |                                 outputs_info=outputs_info,
331 |                                 name=_p(prefix, '_layers'),
332 |                                 n_steps=nsteps, profile=False)
333 |     return rval
334 | 
335 | # build a training model
336 | def build_model(tparams, options):
337 |     """
338 |     Build up the whole computation graph
339 |     """
340 |     trng = RandomStreams(1234)
341 |     use_noise = theano.shared(numpy.float32(0.))
342 |     last_n = options['last_n']
343 | 
344 |     # video blocks. (n_timesteps, n_samples, n_annotations, ctxdim)
345 |     x = tensor.tensor4('x', dtype='float32')
346 |     mask = tensor.matrix('mask', dtype='float32')
347 |     n_timesteps = x.shape[0]
348 |     n_samples = x.shape[1]
349 |     n_annotations = x.shape[2]
350 |     ctxdim = x.shape[3]
351 |   
352 |     # action labels
353 |     y = tensor.matrix('y', dtype='int64')
354 | 
355 |     #ctx = tensor.reshape(ctx, (n_timesteps, n_samples, n_annotations, ctxdim))
356 |     ctx = x
357 | 
358 |     # initial state/cell
359 |     ctx_mean = ctx.mean(0) ### ctx_mean is now (n_samples, n_annotations, ctxdim)
360 |     ctx_mean = ctx_mean.mean(1) ### you want ctx_mean to be n_samples x ctxdim
361 | 
362 |     for lidx in xrange(1, options['n_layers_init']):
363 |         ctx_mean = get_layer('ff')[1](tparams, ctx_mean, options,
364 |                                       prefix='ff_init_%d'%lidx, activ='rectifier')
365 |         if options['use_dropout']:
366 |             ctx_mean = dropout_layer(ctx_mean, use_noise, trng)
367 | 
368 |     init_state = get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_state', activ='tanh')
369 |     init_memory = get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_memory', activ='tanh')
370 | 
371 |     # decoder
372 |     proj = get_layer('lstm_cond')[1](tparams, ctx, options,
373 |                                      prefix='decoder',
374 |                                      mask=mask,
375 |                                      init_state=init_state,
376 |                                      init_memory=init_memory,
377 |                                      trng=trng,
378 |                                      use_noise=use_noise)
379 |     # collection
380 |     proj_h = proj[0]
381 |     alphas = proj[2]
382 |     ctxs = proj[3]
383 |     if options['selector']:
384 |         sels = proj[4]
385 |     if options['use_dropout']:
386 |         proj_h = dropout_layer(proj_h, use_noise, trng)
387 | 
388 |     # outputs
389 |     logit = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_logit_lstm', activ='linear')
390 |     if options['ctx2out']:
391 |         logit += get_layer('ff')[1](tparams, ctxs, options, prefix='ff_logit_ctx', activ='linear')
392 |     logit = tanh(logit)
393 |     if options['use_dropout']:
394 |         logit = dropout_layer(logit, use_noise, trng)
395 |     if options['n_layers_out'] > 1:
396 |         for lidx in xrange(1, options['n_layers_out']):
397 |             logit = get_layer('ff')[1](tparams, logit, options, prefix='ff_logit_h%d'%lidx, activ='rectifier')
398 |             if options['use_dropout']:
399 |                 logit = dropout_layer(logit, use_noise, trng)
400 | 
401 |     logit = get_layer('ff')[1](tparams, logit, options, prefix='ff_logit', activ='linear')
402 |     logit_shp = logit.shape #(TS, BS, #actions)
403 | 
404 |     logit = logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])  #(TSxBS,#actions)
405 |     probs = tensor.nnet.softmax(logit)                                #(TSxBS,#actions)
406 |     probs = probs.reshape([logit_shp[0], logit_shp[1], logit_shp[2]]) #(TS,BS,#actions)
407 | 
408 |     # Predictions
409 |     preds = tensor.sum(probs[-last_n:, :, :],axis=0) #(BS,#actions)
410 |     preds = tensor.argmax(preds,axis=1) # computed y; true y is in 'y' #(BS,1)
411 | 
412 |     # Cost function
413 |     probs = probs.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])
414 |     tmp = tensor.reshape(y, [x.shape[0]*x.shape[1],])
415 |     cost = -tensor.log(probs[tensor.arange(n_timesteps*n_samples), tmp] + 1e-8)
416 |     cost = cost.reshape([x.shape[0], x.shape[1]])
417 |     cost = (cost * mask).sum(0).sum(0)
418 | 
419 |     opt_outs = dict()
420 |     if options['selector']:
421 |         opt_outs['selector'] = sels
422 | 
423 |     return trng, use_noise, [x, mask, y], alphas, cost, opt_outs, preds
424 | 
425 | def pred_acc(modelname, batch_size, f_preds, maxlen, data_test_pb, dh_test, test_dataset_size, num_test_batches, last_n, test=True, verbose=False):
426 |     """
427 |     Make predictions for new data
428 |     """
429 |     dh_test.Reset()
430 |     n_examples = test_dataset_size
431 |     preds = numpy.zeros((n_examples,)).astype('float32')
432 |     n_done = 0
433 |     mask = numpy.ones((maxlen, batch_size)).astype('float32')
434 | 
435 |     for tbidx in xrange(num_test_batches):
436 |         n_done += batch_size
437 |         x, y, n_ex = dh_test.GetBatch(data_test_pb)
438 |         if n_ex != batch_size:
439 |             mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32')
440 |         pred_ = f_preds(x,mask,y)
441 |         if n_ex == batch_size:
442 |             preds[tbidx*batch_size:tbidx*batch_size+batch_size] = pred_[:]
443 |         else:
444 |             preds[tbidx*batch_size:tbidx*batch_size+n_ex] = pred_[0:n_ex]
445 |         if n_ex != batch_size:
446 |             mask[:,n_ex:] = numpy.ones((maxlen, batch_size-n_ex)).astype('float32')
447 | 
448 |         if verbose:
449 |             print '%d/%d examples computed'%(n_done,n_examples)
450 | 
451 |     if test==True:
452 |         fileprefix = 'test_results_last{}_'.format(last_n)
453 |     else:
454 |         fileprefix = 'train_results_last{}_'.format(last_n)
455 |     tempfilename = fileprefix + modelname.split('/')[-1].split('.')[0] + '.txt'
456 |     f = open(tempfilename, 'w')
457 |     vid_idx = 0
458 |     resultstr='{} '.format(vid_idx)
459 |     for i in xrange(n_examples):
460 |         if dh_test.video_ind_[dh_test.frame_indices_[i]] == vid_idx:
461 |             resultstr=resultstr+'{},'.format(int(preds[i]))
462 |         else:
463 |             vid_idx = vid_idx+1
464 |             resultstr=resultstr[:-1]+'\n'
465 |             f.write(resultstr)
466 |             resultstr='{} '.format(vid_idx)
467 |             resultstr=resultstr+'{},'.format(int(preds[i]))
468 |     resultstr=resultstr[:-1]+'\n'
469 |     f.write(resultstr)
470 |     f.close()
471 | 
472 |     f = open(tempfilename,'r')
473 |     lines = f.readlines()
474 |     f.close()
475 | 
476 |     pred  = numpy.zeros(len(lines)).astype('int64')
477 |     for i in xrange(len(lines)):
478 |         try:
479 |             s=lines[i].split(' ')[1]
480 |             s=s[0:-1]
481 |             s=s.split(',')
482 |             s = [int(x) for x in s]
483 |             s = numpy.array(s)
484 |             s = stats.mode(s)[0][0]
485 |             pred[i] = int(s)
486 |         except IndexError:
487 |             print 'One blank index skipped'
488 |             pred[i] = -1
489 | 
490 |     f = open(data_test_pb.labels_file,'r')
491 |     lines = f.readlines()
492 |     f.close()
493 |     f = open(data_test_pb.num_frames_file,'r')
494 |     framenum = f.readlines()
495 |     f.close()
496 |     truth  = numpy.zeros(len(lines)).astype('int64')
497 |     framel = numpy.zeros(len(lines)).astype('int64')
498 |     for i in xrange(len(lines)):
499 |         s=lines[i][0:-1]
500 |         truth[i] = int(s)
501 |         framel[i]= int(framenum[i][0:-1])
502 |     return (truth==pred).mean()
503 | 
504 | # optimizers
505 | # name(hyperp, tparams, grads, inputs (list), cost) = f_grad_shared, f_update
506 | def adam(lr, tparams, grads, inp, cost):
507 |     """
508 |     Adam: A Method for Stochastic Optimization (Diederik Kingma, Jimmy Ba)
509 |     """
510 |     gshared = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
511 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
512 |     #print '\n\ncheck\n\n'
513 |     f_grad_shared = theano.function(inp, cost, updates=gsup, allow_input_downcast=True)
514 | 
515 |     # Magic numbers
516 |     lr0 = 0.0002
517 |     b1 = 0.1
518 |     b2 = 0.001
519 |     e = 1e-8
520 | 
521 |     updates = []
522 | 
523 |     i = theano.shared(numpy.float32(0.))
524 |     i_t = i + 1.
525 |     fix1 = 1. - b1**(i_t)
526 |     fix2 = 1. - b2**(i_t)
527 |     lr_t = lr0 * (tensor.sqrt(fix2) / fix1)
528 | 
529 |     for p, g in zip(tparams.values(), gshared):
530 |         m = theano.shared(p.get_value() * numpy.float32(0.))
531 |         v = theano.shared(p.get_value() * numpy.float32(0.))
532 |         m_t = (b1 * g) + ((1. - b1) * m)
533 |         v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v)
534 |         g_t = m_t / (tensor.sqrt(v_t) + e)
535 |         p_t = p - (lr_t * g_t)
536 |         updates.append((m, m_t))
537 |         updates.append((v, v_t))
538 |         updates.append((p, p_t))
539 |     updates.append((i, i_t))
540 | 
541 |     f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore')
542 | 
543 |     return f_grad_shared, f_update
544 | 
545 | def adadelta(lr, tparams, grads, inp, cost):
546 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
547 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
548 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
549 | 
550 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
551 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
552 | 
553 |     f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up, profile=False)
554 | 
555 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
556 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
557 |     param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]
558 | 
559 |     f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=False)
560 | 
561 |     return f_grad_shared, f_update
562 | 
563 | def rmsprop(lr, tparams, grads, inp, cost):
564 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
565 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()]
566 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
567 | 
568 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
569 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
570 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
571 | 
572 |     f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=False)
573 | 
574 |     updir = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir'%k) for k, p in tparams.iteritems()]
575 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)]
576 |     param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)]
577 |     f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=False)
578 | 
579 |     return f_grad_shared, f_update
580 | 
581 | def sgd(lr, tparams, grads, inp, cost):
582 |     gshared = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
583 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
584 | 
585 |     f_grad_shared = theano.function(inp, cost, updates=gsup, profile=False)
586 | 
587 |     pup = [(p, p - lr * g) for p, g in zip(itemlist(tparams), gshared)]
588 |     f_update = theano.function([lr], [], updates=pup, profile=False)
589 | 
590 |     return f_grad_shared, f_update
591 | 
592 | # validate options
593 | def validate_options(options):
594 |     """
595 |     Return warning messages for hyperparams
596 |     """
597 | 
598 | def train(dim_out=100, # hidden layer dim for outputs
599 |           ctx_dim=512, # context vector dimensionality
600 |           dim=1000, # the number of LSTM units
601 |           n_actions=3, # number of actions to predict
602 |           n_layers_att=1,
603 |           n_layers_out=1,
604 |           n_layers_init=1,
605 |           ctx2out=False,
606 |           patience=10,
607 |           max_epochs=5000,
608 |           dispFreq=100,
609 |           decay_c=0.,
610 |           alpha_c=0.,
611 |           temperature_inverse=1.0,
612 |           lrate=0.01,
613 |           selector=False,
614 |           maxlen=5, # maximum length of the video
615 |           optimizer='adam',
616 |           batch_size = 16,
617 |           valid_batch_size = 16,
618 |           saveto='model.npz',
619 |           validFreq=1000,
620 |           saveFreq=1000, # save the parameters after every saveFreq updates
621 |           dataset='flickr8k', # dummy dataset, replace with video ones
622 |           dictionary=None, # word dictionary
623 |           use_dropout=False,
624 |           reload_=False,
625 |           training_stride=1,
626 |           testing_stride=8,
627 |           last_n=16,
628 |           fps=30):
629 | 
630 |     # Model options
631 |     model_options = locals().copy()
632 |     #model_options = validate_options(model_options)
633 | 
634 |     # reload options
635 |     if reload_ and os.path.exists(saveto):
636 |         print "Reloading options"
637 |         with open('%s.pkl'%saveto, 'rb') as f:
638 |             model_options = pkl.load(f)
639 | 
640 |     print '-----'
641 |     print 'Booting up all data handlers' 
642 |     data_pb = TrainProto(batch_size,maxlen,training_stride,dataset,fps)
643 |     dh = DataHandler(data_pb)
644 |     dataset_size = dh.GetDatasetSize()
645 |     num_train_batches = dataset_size / batch_size
646 |     if dataset_size % batch_size != 0:
647 |         num_train_batches += 1
648 | 
649 |     valid = True # not None
650 |     test  = True # not None
651 | 
652 |     data_test_train_pb = TestTrainProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
653 |     dh_test_train = DataHandler(data_test_train_pb)
654 |     test_train_dataset_size = dh_test_train.GetDatasetSize()
655 |     num_test_train_batches = test_train_dataset_size / valid_batch_size
656 |     if test_train_dataset_size % valid_batch_size != 0:
657 |         num_test_train_batches += 1
658 | 
659 |     data_test_valid_pb = TestValidProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
660 |     dh_test_valid = DataHandler(data_test_valid_pb)
661 |     test_valid_dataset_size = dh_test_valid.GetDatasetSize()
662 |     num_test_valid_batches = test_valid_dataset_size / valid_batch_size
663 |     if test_valid_dataset_size % valid_batch_size != 0:
664 |         num_test_valid_batches += 1
665 | 
666 |     data_test_test_pb = TestTestProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
667 |     dh_test_test = DataHandler(data_test_test_pb)
668 |     test_test_dataset_size = dh_test_test.GetDatasetSize()
669 |     num_test_test_batches = test_test_dataset_size / valid_batch_size
670 |     if test_test_dataset_size % valid_batch_size != 0:
671 |         num_test_test_batches += 1
672 |     print 'Data handlers ready'
673 |     print '-----'
674 | 
675 |     print 'Building model'
676 |     params = init_params(model_options)
677 |     # reload parameters
678 |     if reload_ and os.path.exists(saveto):
679 |         print "Reloading model"
680 |         params = load_params(saveto, params)
681 | 
682 |     tparams = init_tparams(params)
683 | 
684 |     trng, use_noise, \
685 |           inps, alphas, \
686 |           cost, \
687 |           opts_out, preds = \
688 |           build_model(tparams, model_options)
689 | 
690 |     # before any regularizer
691 |     f_log_probs = theano.function(inps, -cost, profile=False)
692 |     f_preds = theano.function(inps, preds, profile=False, on_unused_input='ignore')
693 | 
694 |     cost = cost.mean()
695 |     if decay_c > 0.:
696 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
697 |         weight_decay = 0.
698 |         for kk, vv in tparams.iteritems():
699 |             weight_decay += (vv ** 2).sum()
700 |         weight_decay *= decay_c
701 |         cost += weight_decay
702 | 
703 |     if alpha_c > 0.:
704 |         alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
705 |         alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
706 |         cost += alpha_reg
707 | 
708 |     # gradient computation
709 |     grads = tensor.grad(cost, wrt=itemlist(tparams))
710 |     lr = tensor.scalar(name='lr')
711 |     f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
712 | 
713 |     print 'Optimization'
714 | 
715 |     history_errs = []
716 |     # reload history
717 |     if reload_ and os.path.exists(saveto):
718 |         history_errs = numpy.load(saveto)['history_errs'].tolist()
719 |     best_p = None
720 |     bad_count = 0
721 | 
722 |     uidx = 0
723 | 
724 |     for epochidx in xrange(max_epochs):
725 |         # If the input sequences are of variable length get mask from the data loader instead of setting them all to one
726 |         mask = numpy.ones((maxlen, batch_size)).astype('float32')
727 |         print 'Epoch ', epochidx
728 |         n_examples_seen = 0
729 |         estop = False
730 |         if epochidx > 0:
731 |             dh.Reset()
732 | 
733 |         for tbidx in xrange(num_train_batches):
734 |             n_examples_seen += batch_size
735 |             uidx += 1
736 |             use_noise.set_value(1.)
737 | 
738 |             pd_start = time.time()
739 |             x, y, n_ex = dh.GetBatch(data_pb)
740 |             if n_ex != batch_size:
741 |                 mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32')
742 |             pd_duration = time.time() - pd_start
743 | 
744 |             if x == None:
745 |                 print 'Minibatch with zero sample under length ', maxlen
746 |                 continue
747 |             ud_start = time.time()
748 | 
749 |             cost = f_grad_shared(x, mask, y)
750 |             f_update(lrate)
751 |             ud_duration = time.time() - ud_start
752 | 
753 |             if n_ex != batch_size:
754 |                 mask[:,n_ex:] = numpy.ones((maxlen, batch_size-n_ex)).astype('float32')
755 | 
756 |             if numpy.isnan(cost):
757 |                 print 'NaN detected in cost'
758 |                 return 1., 1., 1.
759 |             if numpy.isinf(cost):
760 |                 print 'INF detected in cost'
761 |                 return 1., 1., 1.
762 | 
763 |             if numpy.mod(uidx, dispFreq) == 0:
764 |                 print 'Epoch ', epochidx, 'Update ', uidx, 'Cost ', cost, 'PD ', pd_duration, 'UD ', ud_duration
765 | 
766 |             if numpy.mod(uidx, saveFreq) == 0:
767 |                 print 'Saving...',
768 | 
769 |                 if best_p != None:
770 |                     params = copy.copy(best_p)
771 |                 else:
772 |                     params = unzip(tparams)
773 |                 numpy.savez(saveto, history_errs=history_errs, **params)
774 |                 pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
775 |                 print 'Done'
776 | 
777 |             if numpy.mod(uidx, validFreq) == 0:
778 |                 use_noise.set_value(0.)
779 |                 train_err = 0
780 |                 valid_err = 0
781 |                 test_err = 0
782 |                 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
783 |                 train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
784 |                 if valid is not None:
785 |                     valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
786 |                 if test is not None:
787 |                     test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
788 | 
789 |                 history_errs.append([valid_err, test_err])
790 | 
791 |                 if uidx == 0 or valid_err >= numpy.array(history_errs)[:,0].max():
792 |                     best_p = unzip(tparams) # p for min valid err / max valid acc
793 | 
794 |                 print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err
795 |         if n_ex == batch_size:
796 |             print 'Seen %d training examples'% (n_examples_seen)
797 |         else:
798 |             print 'Seen %d training examples'% (n_examples_seen-batch_size+n_ex)
799 |         use_noise.set_value(0.)
800 |         train_err = 0
801 |         valid_err = 0
802 |         test_err = 0
803 |         print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'    
804 |         train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
805 |         if valid is not None:
806 |             valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
807 |         if test is not None:
808 |             test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
809 |             
810 |         history_errs.append([valid_err, test_err])
811 | 
812 |         if epochidx == 0 or valid_err >= numpy.array(history_errs)[:,0].max():
813 |             best_p = unzip(tparams) # p for min valid err / max valid acc
814 | 
815 |         print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err
816 | 
817 |     if best_p is not None:
818 |         zipp(best_p, tparams)
819 | 
820 |     use_noise.set_value(0.)
821 |     train_err = 0
822 |     valid_err = 0
823 |     test_err = 0
824 |     print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
825 |     train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
826 |     if valid is not None:
827 |         valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
828 |     if test is not None:
829 |         test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
830 | 
831 |     print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err
832 |     params = copy.copy(best_p)
833 |     numpy.savez(saveto, zipped_params=best_p, train_err=train_err,
834 |                 valid_err=valid_err, test_err=test_err, history_errs=history_errs,
835 |                 **params)
836 | 
837 |     print model_options
838 | 
839 |     return train_err, valid_err, test_err
840 | 
841 | if __name__ == '__main__':
842 |     pass
843 | 
844 | 


--------------------------------------------------------------------------------
/src/actrec_mAP.py:
--------------------------------------------------------------------------------
  1 | # Attention-based action recognition
  2 | 
  3 | import theano
  4 | import theano.tensor as tensor
  5 | theano.config.floatX = 'float32'
  6 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
  7 | 
  8 | import cPickle as pkl
  9 | import numpy
 10 | import copy
 11 | import os
 12 | import time
 13 | 
 14 | from scipy import optimize, stats
 15 | from collections import OrderedDict
 16 | 
 17 | import warnings
 18 | 
 19 | from util.data_handler import DataHandler
 20 | from util.data_handler import TrainProto
 21 | from util.data_handler import TestTrainProto
 22 | from util.data_handler import TestValidProto
 23 | from util.data_handler import TestTestProto
 24 | 
 25 | '''
 26 | Theano shared variables require GPUs, so to
 27 | make this code more portable, these two functions
 28 | push and pull variables between a shared
 29 | variable dictionary and a regular numpy 
 30 | dictionary
 31 | '''
 32 | # push parameters to Theano shared variables
 33 | def zipp(params, tparams):
 34 |     for kk, vv in params.iteritems():
 35 |         tparams[kk].set_value(vv)
 36 | 
 37 | # pull parameters from Theano shared variables
 38 | def unzip(zipped):
 39 |     new_params = OrderedDict()
 40 |     for kk, vv in zipped.iteritems():
 41 |         new_params[kk] = vv.get_value()
 42 |     return new_params
 43 | 
 44 | # get the list of parameters: Note that tparams must be OrderedDict
 45 | def itemlist(tparams):
 46 |     return [vv for kk, vv in tparams.iteritems()]
 47 | 
 48 | # dropout
 49 | def dropout_layer(state_before, use_noise, trng):
 50 |     proj = tensor.switch(use_noise,
 51 |                          state_before *
 52 |                          trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype),
 53 |                          state_before * 0.5)
 54 |     return proj
 55 | 
 56 | # make prefix-appended name
 57 | def _p(pp, name):
 58 |     return '%s_%s'%(pp, name)
 59 | 
 60 | # all parameters
 61 | def init_params(options):
 62 |     """
 63 |     Initialize all parameters
 64 |     """
 65 |     params = OrderedDict()
 66 |     ctx_dim = options['ctx_dim']
 67 | 
 68 |     # init_state, init_cell
 69 |     for lidx in xrange(1, options['n_layers_init']):
 70 |         params = get_layer('ff')[0](options, params, prefix='ff_init_%d'%lidx, nin=ctx_dim, nout=ctx_dim)
 71 |     params = get_layer('ff')[0](options, params, prefix='ff_state', nin=ctx_dim, nout=options['dim'])
 72 |     params = get_layer('ff')[0](options, params, prefix='ff_memory', nin=ctx_dim, nout=options['dim'])
 73 | 
 74 |     # decoder: LSTM - only 1 layer
 75 |     params = get_layer('lstm_cond')[0](options, params, prefix='decoder',
 76 |                                        nin=options['ctx_dim'], dim=options['dim'],
 77 |                                        dimctx=ctx_dim)
 78 | 
 79 |     # Prediction
 80 |     params = get_layer('ff')[0](options, params, prefix='ff_logit_lstm', nin=options['dim'], nout=options['dim_out'])
 81 |     if options['ctx2out']:
 82 |         params = get_layer('ff')[0](options, params, prefix='ff_logit_ctx', nin=ctx_dim, nout=options['dim_out'])
 83 |     if options['n_layers_out'] > 1:
 84 |         for lidx in xrange(1, options['n_layers_out']):
 85 |             params = get_layer('ff')[0](options, params, prefix='ff_logit_h%d'%lidx, nin=options['dim_out'], nout=options['dim_out'])
 86 |     params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_out'], nout=options['n_actions'])
 87 | 
 88 |     return params
 89 | 
 90 | # initialize Theano shared variables according to the initial parameters
 91 | def init_tparams(params):
 92 |     tparams = OrderedDict()
 93 |     for kk, pp in params.iteritems():
 94 |         tparams[kk] = theano.shared(params[kk], name=kk)
 95 |     return tparams
 96 | 
 97 | # load parameters
 98 | def load_params(path, params):
 99 |     pp = numpy.load(path)
100 |     for kk, vv in params.iteritems():
101 |         if kk not in pp:
102 |             raise Warning('%s is not in the archive'%kk)
103 |         params[kk] = pp[kk]
104 |     return params
105 | 
106 | # layers: 'name': ('parameter initializer', 'feedforward')
107 | layers = {'ff': ('param_init_fflayer', 'fflayer'),
108 |           'lstm_cond': ('param_init_lstm_cond', 'lstm_cond_layer'),
109 |           }
110 | 
111 | def get_layer(name):
112 |     """
113 |     Part of the reason the init is very slow is because,
114 |     the layer's constructor is called even when it isn't needed
115 |     """
116 |     fns = layers[name]
117 |     return (eval(fns[0]), eval(fns[1]))
118 | 
119 | # some utilities
120 | def ortho_weight(ndim):
121 |     """
122 |     Random orthogonal weights, we take
123 |     the right matrix in the SVD.
124 | 
125 |     Remember in SVD, u has the same # rows as W
126 |     and v has the same # of cols as W. So we
127 |     are ensuring that the rows are 
128 |     orthogonal. 
129 |     """
130 |     W = numpy.random.randn(ndim, ndim)
131 |     u, _, _ = numpy.linalg.svd(W)
132 |     return u.astype('float32')
133 | 
134 | def norm_weight(nin,nout=None, scale=0.01, ortho=True):
135 |     """
136 |     Random weights drawn from a Gaussian
137 |     """
138 |     if nout == None:
139 |         nout = nin
140 |     if nout == nin and ortho:
141 |         W = ortho_weight(nin)
142 |     else:
143 |         W = scale * numpy.random.randn(nin, nout)
144 |     return W.astype('float32')
145 | 
146 | def sigmoid(x):
147 |     return tensor.nnet.sigmoid(x)
148 | 
149 | def tanh(x):
150 |     return tensor.tanh(x)
151 | 
152 | def rectifier(x):
153 |     return tensor.maximum(0., x)
154 | 
155 | def linear(x):
156 |     return x
157 | 
158 | # feedforward layer: affine transformation + point-wise nonlinearity
159 | def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None):
160 |     if nin == None:
161 |         nin = options['dim_proj']
162 |     if nout == None:
163 |         nout = options['dim_proj']
164 |     params[_p(prefix,'W')] = norm_weight(nin, nout, scale=0.01)
165 |     params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32')
166 | 
167 |     return params
168 | 
169 | def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
170 |     return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])
171 | 
172 | # Conditional LSTM layer with Attention
173 | def param_init_lstm_cond(options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None):
174 |     if nin == None:
175 |         nin = options['dim']
176 |     if dim == None:
177 |         dim = options['dim']
178 |     if dimctx == None:
179 |         dimctx = options['dim']
180 | 
181 |     # input to LSTM
182 |     W = numpy.concatenate([norm_weight(nin,dim),
183 |                            norm_weight(nin,dim),
184 |                            norm_weight(nin,dim),
185 |                            norm_weight(nin,dim)], axis=1)
186 |     params[_p(prefix,'W')] = W
187 | 
188 |     # LSTM to LSTM
189 |     U = numpy.concatenate([ortho_weight(dim),
190 |                            ortho_weight(dim),
191 |                            ortho_weight(dim),
192 |                            ortho_weight(dim)], axis=1)
193 |     params[_p(prefix,'U')] = U
194 | 
195 |     # bias to LSTM
196 |     params[_p(prefix,'b')] = numpy.zeros((4 * dim,)).astype('float32')
197 | 
198 |     # attention: context -> hidden
199 |     Wc_att = norm_weight(dimctx, ortho=False)
200 |     params[_p(prefix,'Wc_att')] = Wc_att
201 | 
202 |     # attention: LSTM -> hidden
203 |     Wd_att = norm_weight(dim,dimctx)
204 |     params[_p(prefix,'Wd_att')] = Wd_att
205 | 
206 |     # attention: hidden bias
207 |     b_att = numpy.zeros((dimctx,)).astype('float32')
208 |     params[_p(prefix,'b_att')] = b_att
209 | 
210 |     # deeper attention
211 |     if options['n_layers_att'] > 1:
212 |         for lidx in xrange(1, options['n_layers_att']):
213 |             params[_p(prefix,'W_att_%d'%lidx)] = ortho_weight(dimctx)
214 |             params[_p(prefix,'b_att_%d'%lidx)] = numpy.zeros((dimctx,)).astype('float32')
215 | 
216 |     # attention: 
217 |     U_att = norm_weight(dimctx,1)
218 |     params[_p(prefix,'U_att')] = U_att
219 |     c_att = numpy.zeros((1,)).astype('float32')
220 |     params[_p(prefix, 'c_tt')] = c_att
221 | 
222 |     if options['selector']:
223 |         # attention: selector
224 |         W_sel = norm_weight(dim, 1)
225 |         params[_p(prefix, 'W_sel')] = W_sel
226 |         b_sel = numpy.float32(0.)
227 |         params[_p(prefix, 'b_sel')] = b_sel
228 | 
229 |     return params
230 | 
231 | def lstm_cond_layer(tparams, state_below, options, prefix='lstm',
232 |                     mask=None, init_memory=None, init_state=None,
233 |                     trng=None, use_noise=None,
234 |                     **kwargs):
235 |     """
236 |     Computation graph for the LSTM.
237 |     Note that we removed 'context' and put this into 'state_below'
238 |     Video frames need to be part of scan, since it changes each step
239 |     """
240 |     nsteps = state_below.shape[0]
241 |     n_samples = state_below.shape[1]
242 |     n_annotations = state_below.shape[2]
243 | 
244 |     # mask
245 |     if mask == None:
246 |         mask = tensor.alloc(1., state_below.shape[0], 1)
247 | 
248 |     dim = tparams[_p(prefix, 'U')].shape[0]
249 | 
250 |     # initial/previous state
251 |     if init_state == None:
252 |         init_state = tensor.alloc(0., n_samples, dim)
253 |     # initial/previous memory 
254 |     if init_memory == None:
255 |         init_memory = tensor.alloc(0., n_samples, dim)
256 | 
257 |     def _slice(_x, n, dim):
258 |         if _x.ndim == 3:
259 |             return _x[:, :, n*dim:(n+1)*dim]
260 |         return _x[:, n*dim:(n+1)*dim]
261 | 
262 |     def _step(m_, x_, h_, c_, a_, ct_, dp_=None, dp_att_=None):
263 |         # mask, xt, ht-1, ct-1, alpha, ctx
264 |         # attention
265 |         # print '\n\ncheck\n\n'
266 |         pstate_ = tensor.dot(h_, tparams[_p(prefix,'Wd_att')]) # pstate_
267 |         pctx_ = tensor.dot(x_, tparams[_p(prefix,'Wc_att')]) + tparams[_p(prefix, 'b_att')]
268 |         if options['n_layers_att'] > 1:
269 |             for lidx in xrange(1, options['n_layers_att']):
270 |                 pctx_ = tensor.dot(pctx_, tparams[_p(prefix,'W_att_%d'%lidx)])+tparams[_p(prefix, 'b_att_%d'%lidx)]
271 |                 if lidx < options['n_layers_att'] - 1:
272 |                     pctx_ = tanh(pctx_)
273 |         pctx_ = pctx_ + pstate_[:,None,:]
274 |         pctx_list = []
275 |         pctx_list.append(pctx_)
276 |         pctx_ = tanh(pctx_)
277 |         alpha = tensor.dot(pctx_, tparams[_p(prefix,'U_att')])+tparams[_p(prefix, 'c_tt')]
278 |         alpha_pre = alpha
279 |         alpha_shp = alpha.shape
280 |         alpha = tensor.nnet.softmax(options['temperature_inverse']*alpha.reshape([alpha_shp[0],alpha_shp[1]])) # softmax
281 |         ctx_ = (x_ * alpha[:,:,None]).sum(1) # current context
282 |         # print '\n\ncheck\n\n'
283 |         if options['selector']:
284 |             sel_ = tensor.nnet.sigmoid(tensor.dot(h_, tparams[_p(prefix, 'W_sel')])+tparams[_p(prefix,'b_sel')])
285 |             sel_ = sel_.reshape([sel_.shape[0]])
286 |             ctx_ = sel_[:,None] * ctx_
287 | 
288 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
289 |         preact += tensor.dot(ctx_, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]
290 | 
291 |         i = _slice(preact, 0, dim)              # z_it
292 |         f = _slice(preact, 1, dim)              # z_ft
293 |         o = _slice(preact, 2, dim)              # z_ot
294 |         i = tensor.nnet.sigmoid(i)              # it = sigmoid(z_it)
295 |         f = tensor.nnet.sigmoid(f)              # ft = sigmoid(z_ft)
296 |         o = tensor.nnet.sigmoid(o)              # ot = sigmoid(z_ot)
297 |         c = tensor.tanh(_slice(preact, 3, dim)) # at = tanh(z_at)
298 | 
299 |         c = f * c_ + i * c                      # ct = ft * ct-1 + it * at
300 |         c = m_[:,None] * c + (1. - m_)[:,None] * c_
301 | 
302 |         h = o * tensor.tanh(c)                  # ht = ot * thanh(ct)
303 |         h = m_[:,None] * h + (1. - m_)[:,None] * h_
304 | 
305 |         rval = [h, c, alpha, ctx_]
306 |         if options['selector']:
307 |             rval += [sel_]
308 |         rval += [pstate_, pctx_, i, f, o, preact, alpha_pre]+pctx_list
309 |         # print '\n\ncheck\n\n'
310 |         return rval
311 | 
312 |     if options['selector']:
313 |         _step0 = lambda m_, x_, h_, c_, a_, ct_, sel_: _step(m_, x_, h_, c_, a_, ct_)
314 |     else:
315 |         _step0 = lambda m_, x_, h_, c_, a_, ct_: _step(m_, x_, h_, c_, a_, ct_)
316 | 
317 |     seqs = [mask, state_below]
318 |     outputs_info = [init_state,
319 |                     init_memory,
320 |                     tensor.alloc(0., n_samples, n_annotations),
321 |                     tensor.alloc(0., n_samples, options['ctx_dim'])]
322 |     if options['selector']:
323 |         outputs_info += [tensor.alloc(0., n_samples)]
324 |     outputs_info += [None,
325 |                      None,
326 |                      None,
327 |                      None,
328 |                      None,
329 |                      None,
330 |                      None] + [None]#*options['n_layers_att']
331 |     rval, updates = theano.scan(_step0,
332 |                                 sequences=seqs,
333 |                                 outputs_info=outputs_info,
334 |                                 name=_p(prefix, '_layers'),
335 |                                 n_steps=nsteps, profile=False)
336 |     return rval
337 | 
338 | # build a training model
339 | def build_model(tparams, options):
340 |     """
341 |     Build up the whole computation graph
342 |     """
343 |     trng = RandomStreams(1234)
344 |     use_noise = theano.shared(numpy.float32(0.))
345 |     last_n = options['last_n']
346 | 
347 |     # video blocks. (n_timesteps, n_samples, n_annotations, ctxdim)
348 |     x = tensor.tensor4('x', dtype='float32')
349 |     mask = tensor.matrix('mask', dtype='float32')
350 |     n_timesteps = x.shape[0]
351 |     n_samples = x.shape[1]
352 |     n_annotations = x.shape[2]
353 |     ctxdim = x.shape[3]
354 |   
355 |     # action labels
356 |     y = tensor.tensor3('y', dtype='int64')
357 | 
358 |     #ctx = tensor.reshape(ctx, (n_timesteps, n_samples, n_annotations, ctxdim))
359 |     ctx = x
360 | 
361 |     # initial state/cell
362 |     ctx_mean = ctx.mean(0) ### ctx_mean is now (n_samples, n_annotations, ctxdim)
363 |     ctx_mean = ctx_mean.mean(1) ### you want ctx_mean to be n_samples x ctxdim
364 | 
365 |     for lidx in xrange(1, options['n_layers_init']):
366 |         ctx_mean = get_layer('ff')[1](tparams, ctx_mean, options,
367 |                                       prefix='ff_init_%d'%lidx, activ='rectifier')
368 |         if options['use_dropout']:
369 |             ctx_mean = dropout_layer(ctx_mean, use_noise, trng)
370 | 
371 |     init_state = get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_state', activ='tanh')
372 |     init_memory = get_layer('ff')[1](tparams, ctx_mean, options, prefix='ff_memory', activ='tanh')
373 | 
374 |     # decoder
375 |     proj = get_layer('lstm_cond')[1](tparams, ctx, options,
376 |                                      prefix='decoder',
377 |                                      mask=mask,
378 |                                      init_state=init_state,
379 |                                      init_memory=init_memory,
380 |                                      trng=trng,
381 |                                      use_noise=use_noise)
382 |     # collection
383 |     proj_h = proj[0]
384 |     alphas = proj[2]
385 |     ctxs = proj[3]
386 |     if options['selector']:
387 |         sels = proj[4]
388 |     if options['use_dropout']:
389 |         proj_h = dropout_layer(proj_h, use_noise, trng)
390 | 
391 |     # outputs
392 |     logit = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_logit_lstm', activ='linear')
393 |     if options['ctx2out']:
394 |         logit += get_layer('ff')[1](tparams, ctxs, options, prefix='ff_logit_ctx', activ='linear')
395 |     logit = tanh(logit)
396 |     if options['use_dropout']:
397 |         logit = dropout_layer(logit, use_noise, trng)
398 |     if options['n_layers_out'] > 1:
399 |         for lidx in xrange(1, options['n_layers_out']):
400 |             logit = get_layer('ff')[1](tparams, logit, options, prefix='ff_logit_h%d'%lidx, activ='rectifier')
401 |             if options['use_dropout']:
402 |                 logit = dropout_layer(logit, use_noise, trng)
403 | 
404 |     logit = get_layer('ff')[1](tparams, logit, options, prefix='ff_logit', activ='sigmoid')
405 |     logit_shp = logit.shape #(TS, BS, o/p)
406 | 
407 |     probs = logit
408 |     probs = probs.reshape([probs.shape[0]*probs.shape[1],probs.shape[2]]) #(TSxBS, o/p)
409 | 
410 |     # Cost Function
411 |     tmp   = tensor.reshape(y,[y.shape[0]*y.shape[1],y.shape[2]])           # (TSxBS, 12)
412 |     cost  = -tmp*tensor.log(probs+1e-8) - (1-tmp)*tensor.log(1-probs+1e-8) # (TSxBS, 12)
413 |     cost  = cost.sum(1)                                                    # (TSxBS,)
414 |     cost  = cost.reshape([x.shape[0],x.shape[1]])                          # (TS, BS)
415 |     cost  = (cost*mask).sum(0).sum(0)                                      # float32
416 |     # Predictions
417 |     probs = probs.reshape([x.shape[0],x.shape[1],probs.shape[1]])          # (TS, BS, 12)
418 |     preds = tensor.mean(probs[-last_n:,:,:],axis=0)                        # (BS, 12)
419 | 
420 |     opt_outs = dict()
421 |     if options['selector']:
422 |         opt_outs['selector'] = sels
423 | 
424 |     return trng, use_noise, [x, mask, y], alphas, cost, opt_outs, preds
425 | 
426 | def pred_mAP(modelname, batch_size, f_preds, maxlen, data_test_pb, dh_test, test_dataset_size, num_test_batches, last_n, test=True, verbose=False):
427 |     """
428 |     Make predictions for new data
429 |     """
430 |     dh_test.Reset()
431 |     n_examples = test_dataset_size
432 |     preds      = numpy.zeros((n_examples,12)).astype('float32')
433 |     AP         = numpy.zeros((12,))
434 |     n_done     = 0
435 |     mask       = numpy.ones((maxlen, batch_size)).astype('float32')
436 |     classes    = ['AnswerPhone','DriveCar','Eat','FightPerson','GetOutCar','HandShake','HugPerson','Kiss','Run','SitDown','SitUp','StandUp']
437 | 
438 |     for tbidx in xrange(num_test_batches):
439 |         n_done += batch_size
440 |         x, y, n_ex = dh_test.GetBatch(data_test_pb)
441 |         if n_ex != batch_size:
442 |             mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32')
443 |         pred_ = f_preds(x,mask,y)
444 |         if n_ex == batch_size:
445 |             preds[tbidx*batch_size:tbidx*batch_size+batch_size,:] = pred_[:,:]
446 |         else:
447 |             preds[tbidx*batch_size:tbidx*batch_size+n_ex,:] = pred_[0:n_ex,:]
448 |         if n_ex != batch_size:
449 |             mask[:,n_ex:] = numpy.ones((maxlen, batch_size-n_ex)).astype('float32')
450 | 
451 |         if verbose:
452 |             print '%d/%d examples computed'%(n_done,n_examples)
453 | 
454 |     if test==True:
455 |         fileprefix = 'test_mAP_'
456 |     else:
457 |         fileprefix = 'train_mAP_'
458 | 
459 |     for mapidx in xrange(12):
460 |         tempfilename = fileprefix + '{}'.format(mapidx+1) + modelname.split('/')[-1].split('.')[0] + '.txt'
461 |         f = open(tempfilename, 'w')
462 |         vid_idx = 0
463 |         resultstr='{} '.format(vid_idx)
464 |         for i in xrange(n_examples):
465 |             if dh_test.video_ind_[dh_test.frame_indices_[i]] == vid_idx:
466 |                 resultstr=resultstr+'{},'.format(preds[i,mapidx])
467 |             else:
468 |                 vid_idx = vid_idx+1
469 |                 resultstr=resultstr[:-1]+'\n'
470 |                 f.write(resultstr)
471 |                 resultstr='{} '.format(vid_idx)
472 |                 resultstr=resultstr+'{},'.format(preds[i,mapidx])
473 |         resultstr=resultstr[:-1]+'\n'
474 |         f.write(resultstr)
475 |         f.close()
476 | 
477 |         f = open(tempfilename,'r')
478 |         lines = f.readlines()
479 |         f.close()
480 | 
481 |         pred  = numpy.zeros(len(lines)).astype('float32')
482 |         for i in xrange(len(lines)):
483 |             try:
484 |                 s=lines[i].split(' ')[1]
485 |                 s=s[0:-1]
486 |                 s=s.split(',')
487 |                 s = [float(x) for x in s]
488 |                 s = numpy.array(s)
489 |                 s = s.mean()
490 |                 pred[i] = s
491 |             except IndexError:
492 |                 print 'One blank index skipped'
493 |                 pred[i] = -1
494 | 
495 |         f = open(data_test_pb.labels_file,'r')
496 |         lines = f.readlines()
497 |         f.close()
498 |         f = open(data_test_pb.num_frames_file,'r')
499 |         framenum = f.readlines()
500 |         f.close()
501 |         truth  = numpy.zeros(len(lines)).astype('int64')
502 |         for i in xrange(len(lines)):
503 |             s=lines[i][0:-1]
504 |             s = [int(x) for x in s.split(',')]
505 |             truth[i] = int(s[mapidx])
506 |         # we have truth in truth
507 |         # we have preds in pred (confidence scores)
508 |         # now sort both acc to pred and convert to binary
509 |         sortind = (-pred).argsort()
510 |         pred  = pred[sortind]
511 |         truth = truth[sortind].astype('int64')
512 | 
513 |         ap = 0.0
514 |         for idx in xrange((len(truth))):
515 |             precision = truth[:idx+1].sum()*1.0/(idx+1)
516 |             rel = 0
517 |             if truth[idx]:
518 |                 rel = 1
519 |             ap = ap + precision*rel
520 |         AP[mapidx] = ap*1.0/truth.sum()
521 |         print 'AP of class '+classes[mapidx]+':',AP[mapidx]
522 | 
523 |     return AP.mean()
524 | 
525 | # optimizers
526 | # name(hyperp, tparams, grads, inputs (list), cost) = f_grad_shared, f_update
527 | def adam(lr, tparams, grads, inp, cost):
528 |     """
529 |     Adam: A Method for Stochastic Optimization (Diederik Kingma, Jimmy Ba)
530 |     """
531 |     gshared = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
532 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
533 |     #print '\n\ncheck\n\n'
534 |     f_grad_shared = theano.function(inp, cost, updates=gsup, allow_input_downcast=True)
535 | 
536 |     # Magic numbers
537 |     lr0 = 0.0002
538 |     b1 = 0.1
539 |     b2 = 0.001
540 |     e = 1e-8
541 | 
542 |     updates = []
543 | 
544 |     i = theano.shared(numpy.float32(0.))
545 |     i_t = i + 1.
546 |     fix1 = 1. - b1**(i_t)
547 |     fix2 = 1. - b2**(i_t)
548 |     lr_t = lr0 * (tensor.sqrt(fix2) / fix1)
549 | 
550 |     for p, g in zip(tparams.values(), gshared):
551 |         m = theano.shared(p.get_value() * numpy.float32(0.))
552 |         v = theano.shared(p.get_value() * numpy.float32(0.))
553 |         m_t = (b1 * g) + ((1. - b1) * m)
554 |         v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v)
555 |         g_t = m_t / (tensor.sqrt(v_t) + e)
556 |         p_t = p - (lr_t * g_t)
557 |         updates.append((m, m_t))
558 |         updates.append((v, v_t))
559 |         updates.append((p, p_t))
560 |     updates.append((i, i_t))
561 | 
562 |     f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore')
563 | 
564 |     return f_grad_shared, f_update
565 | 
566 | def adadelta(lr, tparams, grads, inp, cost):
567 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
568 |     running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
569 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
570 | 
571 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
572 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
573 | 
574 |     f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up, profile=False)
575 | 
576 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
577 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
578 |     param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]
579 | 
580 |     f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=False)
581 | 
582 |     return f_grad_shared, f_update
583 | 
584 | def rmsprop(lr, tparams, grads, inp, cost):
585 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
586 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()]
587 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
588 | 
589 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
590 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
591 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
592 | 
593 |     f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=False)
594 | 
595 |     updir = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir'%k) for k, p in tparams.iteritems()]
596 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)]
597 |     param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)]
598 |     f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=False)
599 | 
600 |     return f_grad_shared, f_update
601 | 
602 | def sgd(lr, tparams, grads, inp, cost):
603 |     gshared = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
604 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
605 | 
606 |     f_grad_shared = theano.function(inp, cost, updates=gsup, profile=False)
607 | 
608 |     pup = [(p, p - lr * g) for p, g in zip(itemlist(tparams), gshared)]
609 |     f_update = theano.function([lr], [], updates=pup, profile=False)
610 | 
611 |     return f_grad_shared, f_update
612 | 
613 | # validate options
614 | def validate_options(options):
615 |     """
616 |     Return warning messages for hyperparams
617 |     """
618 | 
619 | def train(dim_out=100, # hidden layer dim for outputs
620 |           ctx_dim=512, # context vector dimensionality
621 |           dim=1000, # the number of LSTM units
622 |           n_actions=3, # number of actions to predict
623 |           n_layers_att=1,
624 |           n_layers_out=1,
625 |           n_layers_init=1,
626 |           ctx2out=False,
627 |           patience=10,
628 |           max_epochs=5000,
629 |           dispFreq=100,
630 |           decay_c=0.,
631 |           alpha_c=0.,
632 |           temperature_inverse=1.0,
633 |           lrate=0.01,
634 |           selector=False,
635 |           maxlen=5, # maximum length of the video
636 |           optimizer='adam',
637 |           batch_size = 16,
638 |           valid_batch_size = 16,
639 |           saveto='model.npz',
640 |           validFreq=1000,
641 |           saveFreq=1000, # save the parameters after every saveFreq updates
642 |           dataset='flickr8k', # dummy dataset, replace with video ones
643 |           dictionary=None, # word dictionary
644 |           use_dropout=False,
645 |           reload_=False,
646 |           training_stride=1,
647 |           testing_stride=8,
648 |           last_n=16,
649 |           fps=30):
650 | 
651 |     # Model options
652 |     model_options = locals().copy()
653 |     #model_options = validate_options(model_options)
654 | 
655 |     # reload options
656 |     if reload_ and os.path.exists(saveto):
657 |         print "Reloading options"
658 |         with open('%s.pkl'%saveto, 'rb') as f:
659 |             model_options = pkl.load(f)
660 | 
661 |     print '-----'
662 |     print 'Booting up all data handlers' 
663 |     data_pb = TrainProto(batch_size,maxlen,training_stride,dataset,fps)
664 |     dh = DataHandler(data_pb)
665 |     dataset_size = dh.GetDatasetSize()
666 |     num_train_batches = dataset_size / batch_size
667 |     if dataset_size % batch_size != 0:
668 |         num_train_batches += 1
669 | 
670 |     valid = True # not None
671 |     test  = True # not None
672 | 
673 |     data_test_train_pb = TestTrainProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
674 |     dh_test_train = DataHandler(data_test_train_pb)
675 |     test_train_dataset_size = dh_test_train.GetDatasetSize()
676 |     num_test_train_batches = test_train_dataset_size / valid_batch_size
677 |     if test_train_dataset_size % valid_batch_size != 0:
678 |         num_test_train_batches += 1
679 | 
680 |     data_test_valid_pb = TestValidProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
681 |     dh_test_valid = DataHandler(data_test_valid_pb)
682 |     test_valid_dataset_size = dh_test_valid.GetDatasetSize()
683 |     num_test_valid_batches = test_valid_dataset_size / valid_batch_size
684 |     if test_valid_dataset_size % valid_batch_size != 0:
685 |         num_test_valid_batches += 1
686 | 
687 |     data_test_test_pb = TestTestProto(valid_batch_size,maxlen,testing_stride,dataset,fps)
688 |     dh_test_test = DataHandler(data_test_test_pb)
689 |     test_test_dataset_size = dh_test_test.GetDatasetSize()
690 |     num_test_test_batches = test_test_dataset_size / valid_batch_size
691 |     if test_test_dataset_size % valid_batch_size != 0:
692 |         num_test_test_batches += 1
693 |     print 'Data handlers ready'
694 |     print '-----'
695 | 
696 |     print 'Building model'
697 |     params = init_params(model_options)
698 |     # reload parameters
699 |     if reload_ and os.path.exists(saveto):
700 |         print "Reloading model"
701 |         params = load_params(saveto, params)
702 | 
703 |     tparams = init_tparams(params)
704 | 
705 |     trng, use_noise, \
706 |           inps, alphas, \
707 |           cost, \
708 |           opts_out, preds = \
709 |           build_model(tparams, model_options)
710 | 
711 |     # before any regularizer
712 |     f_log_probs = theano.function(inps, -cost, profile=False)
713 |     f_preds = theano.function(inps, preds, profile=False, on_unused_input='ignore')
714 | 
715 |     cost = cost.mean()
716 |     if decay_c > 0.:
717 |         decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
718 |         weight_decay = 0.
719 |         for kk, vv in tparams.iteritems():
720 |             weight_decay += (vv ** 2).sum()
721 |         weight_decay *= decay_c
722 |         cost += weight_decay
723 | 
724 |     if alpha_c > 0.:
725 |         alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
726 |         alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
727 |         cost += alpha_reg
728 | 
729 |     # gradient computation
730 |     grads = tensor.grad(cost, wrt=itemlist(tparams))
731 |     lr = tensor.scalar(name='lr')
732 |     f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
733 | 
734 |     print 'Optimization'
735 | 
736 |     history_errs = []
737 |     # reload history
738 |     if reload_ and os.path.exists(saveto):
739 |         history_errs = numpy.load(saveto)['history_errs'].tolist()
740 |     best_p = None
741 |     bad_count = 0
742 | 
743 |     uidx = 0
744 | 
745 |     for epochidx in xrange(max_epochs):
746 |         # If the input sequences are of variable length get mask from the data loader instead of setting them all to one
747 |         mask = numpy.ones((maxlen, batch_size)).astype('float32')
748 |         print 'Epoch ', epochidx
749 |         n_examples_seen = 0
750 |         estop = False
751 |         if epochidx > 0:
752 |             dh.Reset()
753 | 
754 |         for tbidx in xrange(num_train_batches):
755 |             n_examples_seen += batch_size
756 |             uidx += 1
757 |             use_noise.set_value(1.)
758 | 
759 |             pd_start = time.time()
760 |             x, y, n_ex = dh.GetBatch(data_pb)
761 |             if n_ex != batch_size:
762 |                 mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32')
763 |             pd_duration = time.time() - pd_start
764 | 
765 |             if x == None:
766 |                 print 'Minibatch with zero sample under length ', maxlen
767 |                 continue
768 |             ud_start = time.time()
769 | 
770 |             cost = f_grad_shared(x, mask, y)
771 |             f_update(lrate)
772 |             ud_duration = time.time() - ud_start
773 | 
774 |             if n_ex != batch_size:
775 |                 mask[:,n_ex:] = numpy.ones((maxlen, batch_size-n_ex)).astype('float32')
776 | 
777 |             if numpy.isnan(cost):
778 |                 print 'NaN detected in cost'
779 |                 return 1., 1., 1.
780 |             if numpy.isinf(cost):
781 |                 print 'INF detected in cost'
782 |                 return 1., 1., 1.
783 | 
784 |             if numpy.mod(uidx, dispFreq) == 0:
785 |                 print 'Epoch ', epochidx, 'Update ', uidx, 'Cost ', cost, 'PD ', pd_duration, 'UD ', ud_duration
786 | 
787 |             if numpy.mod(uidx, saveFreq) == 0:
788 |                 print 'Saving...',
789 | 
790 |                 if best_p != None:
791 |                     params = copy.copy(best_p)
792 |                 else:
793 |                     params = unzip(tparams)
794 |                 numpy.savez(saveto, history_errs=history_errs, **params)
795 |                 pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
796 |                 print 'Done'
797 | 
798 |             if numpy.mod(uidx, validFreq) == 0:
799 |                 use_noise.set_value(0.)
800 |                 train_err = 0
801 |                 valid_err = 0
802 |                 test_err = 0
803 |                 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
804 |                 train_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
805 |                 if valid is not None:
806 |                     valid_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
807 |                 if test is not None:
808 |                     test_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
809 | 
810 |                 history_errs.append([valid_err, test_err])
811 | 
812 |                 if uidx == 0 or valid_err >= numpy.array(history_errs)[:,0].max():
813 |                     best_p = unzip(tparams) # p for min valid err / max valid acc
814 | 
815 |                 print 'mAP: Train', train_err, 'Valid', valid_err, 'Test', test_err
816 | 
817 |             if numpy.mod(uidx, saveFreq) == 0:
818 |                 print 'Saving...',
819 | 
820 |                 if best_p != None:
821 |                     params = copy.copy(best_p)
822 |                 else:
823 |                     params = unzip(tparams)
824 |                 numpy.savez(saveto, history_errs=history_errs, **params)
825 |                 pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
826 |                 print 'Done'
827 | 
828 |         if n_ex == batch_size:
829 |             print 'Seen %d training examples'% (n_examples_seen)
830 |         else:
831 |             print 'Seen %d training examples'% (n_examples_seen-batch_size+n_ex)
832 |         use_noise.set_value(0.)
833 |         train_err = 0
834 |         valid_err = 0
835 |         test_err = 0
836 |         print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'    
837 |         train_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
838 |         if valid is not None:
839 |             valid_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
840 |         if test is not None:
841 |             test_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
842 |             
843 |         history_errs.append([valid_err, test_err])
844 | 
845 |         if epochidx == 0 or valid_err >= numpy.array(history_errs)[:,0].max():
846 |             best_p = unzip(tparams) # p for min valid err / max valid acc
847 | 
848 |         print 'mAP: Train', train_err, 'Valid', valid_err, 'Test', test_err
849 | 
850 |     if best_p is not None:
851 |         zipp(best_p, tparams)
852 | 
853 |     use_noise.set_value(0.)
854 |     train_err = 0
855 |     valid_err = 0
856 |     test_err = 0
857 |     print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
858 |     train_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
859 |     if valid is not None:
860 |         valid_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True)
861 |     if test is not None:
862 |         test_err = pred_mAP(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)
863 | 
864 |     print 'mAP: Train', train_err, 'Valid', valid_err, 'Test', test_err
865 |     params = copy.copy(best_p)
866 |     numpy.savez(saveto, zipped_params=best_p, train_err=train_err,
867 |                 valid_err=valid_err, test_err=test_err, history_errs=history_errs,
868 |                 **params)
869 | 
870 |     print model_options
871 | 
872 |     return train_err, valid_err, test_err
873 | 
874 | if __name__ == '__main__':
875 |     pass
876 | 
877 | 


--------------------------------------------------------------------------------
/util/README.md:
--------------------------------------------------------------------------------
 1 | ### Data Format
 2 | The `train_features.h5` file is an HDF5 file with `DATATYPE H5T_IEEE_F32LE` and its `DATASPACE` is `SIMPLE { ( #frames, 7*7*1024 ) / ( H5S_UNLIMITED, H5S_UNLIMITED ) }` and `DATASET "features"`.
 3 | 
 4 | The `train_framenum.txt` file contains #frames for each video:
 5 | ```
 6 | 89
 7 | 123
 8 | 22
 9 | 136
10 | ```
11 | 
12 | The `train_filename.txt` file contains the video filenames relative to the root video directory:
13 | ```
14 | cartwheel/lea_kann_radschlag_cartwheel_f_cm_np1_ri_med_0.avi
15 | cartwheel/park_cartwheel_f_cm_np1_ba_med_0.avi
16 | catch/96-_Torwarttraining_1_catch_f_cm_np1_le_bad_0.avi
17 | catch/Ball_hochwerfen_-_Rolle_-_Ball_fangen_(Timo_3)_catch_f_cm_np1_le_goo_0.avi
18 | ```
19 | 
20 | The `train_labels.txt`file for uni-label datasets looks like
21 | ```
22 | 0
23 | 7
24 | 43
25 | ```
26 | and for multi-label datasets:
27 | ```
28 | 0,0,0,0,0,0,0,1,0,0,0,0
29 | 0,0,0,0,0,0,0,1,0,0,0,0
30 | 0,0,0,0,0,0,1,1,0,0,0,0
31 | 0,0,0,0,0,0,0,0,0,0,0,1
32 | ```
33 | The same format is required for the validation and test files too.
34 | 
35 | ### data_handler.py
36 | We have used `order='F'` in all our `numpy.reshape()` calls since we created our data file using Matlab which uses the Fortran indexing order.
37 | You will have to remove this parameter if that is not the case with you.
38 | 
39 | ### GPU locking
40 | Toronto ML users need not make any modifications. The script locks a free GPU automatically.
41 | Non-Toronto users can adapt the GPU locking scripts or remove the following lines from the `scripts/evaluate_*` files:
42 | ```
43 | import util.gpu_util
44 | board = util.gpu_util.LockGPU()
45 | print 'GPU Lock Acquired'
46 | 
47 | util.gpu_util.FreeGPU(board)
48 | print 'GPU freed'
49 | ```
50 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kracwarlock/action-recognition-visual-attention/6738a0e2240df45ba79e87d24a174f53adb4f29b/util/__init__.py


--------------------------------------------------------------------------------
/util/data_handler.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import h5py
  3 | import numpy as np
  4 | import time
  5 | 
  6 | class DataHandler(object):
  7 | 
  8 |   def __init__(self, data_pb):
  9 |     self.seq_length_ = data_pb.num_frames		# no of timesteps
 10 |     self.seq_stride_ = data_pb.stride			# stride for overlap
 11 |     self.randomize_ = data_pb.randomize			# randomize their order for training
 12 |     self.batch_size_ = data_pb.batch_size		# batch size
 13 |     self.fps_ = data_pb.fps
 14 |     skip = int(30.0/self.fps_)
 15 | 
 16 |     if data_pb.dataset != 'h2mAP':
 17 |       labels = self.GetLabels(data_pb.labels_file)	# labels
 18 |     else:
 19 |       labels = self.GetMAPLabels(data_pb.labels_file)	# multi class labels for mAP
 20 | 
 21 |     self.num_frames_ = []
 22 |     init_labels_ = []
 23 | 
 24 |     num_f = []						# number of frames in each example
 25 |     for line in open(data_pb.num_frames_file):
 26 |       num_f.append(int(line.strip()))
 27 |     assert len(num_f) == len(labels)
 28 | 
 29 |     for i in xrange(len(num_f)):
 30 |       self.num_frames_.append(num_f[i])
 31 |       init_labels_.append(labels[i])
 32 | 
 33 |     self.num_videos_ = len(init_labels_)
 34 |  
 35 |     data = h5py.File(data_pb.data_file,'r')[data_pb.dataset_name]	# load dataset
 36 |     self.frame_size_ = data.shape[1]					# 3D cube
 37 |     self.dataset_name_ = data_pb.dataset_name
 38 | 
 39 |     frame_indices = []
 40 |     labels_ = []
 41 |     lengths_ = []
 42 |     self.dataset_size_ = 0
 43 |     start = 0
 44 |     self.video_ind_ = {}
 45 |     for v, f in enumerate(self.num_frames_):
 46 |       end = start + f - self.seq_length_*skip + 1
 47 |       if end <= start:					# short length sequences also selected
 48 |         end = start+1
 49 |       frame_indices.extend(range(start, end, self.seq_stride_))
 50 |       for i in xrange(start, end, self.seq_stride_):
 51 |         self.video_ind_[i] = v
 52 |         labels_.append(init_labels_[v])
 53 |         lengths_.append(self.num_frames_[v])
 54 |       start += f
 55 |     self.dataset_size_ = len(frame_indices)
 56 |     print 'Dataset size', self.dataset_size_
 57 | 
 58 |     self.frame_indices_ = np.array(frame_indices)	# indices of sequence beginnings
 59 |     self.labels_ = np.array(labels_)
 60 |     self.lengths_ = np.array(lengths_)
 61 |     assert len(self.frame_indices_) == len(self.labels_)
 62 |     self.vid_boundary_ = np.array(self.num_frames_).cumsum()
 63 |     self.Reset()
 64 |     self.batch_data_  = np.zeros((self.seq_length_, self.batch_size_, self.frame_size_), dtype=np.float32)
 65 |     if data_pb.dataset != 'h2mAP':
 66 |       self.batch_label_ = np.zeros((self.seq_length_, self.batch_size_), dtype=np.int64)
 67 |     else:
 68 |       self.batch_label_ = np.zeros((self.seq_length_, self.batch_size_, 12), dtype=np.int64)
 69 |     self.handler = data
 70 | 
 71 |   def GetBatch(self, data_pb, verbose=False):
 72 |     skip = int(30.0/self.fps_)
 73 |     self.batch_data_  = np.zeros((self.seq_length_, self.batch_size_, self.frame_size_), dtype=np.float32)
 74 |     batch_size = self.batch_size_
 75 |     n_examples = 0
 76 |     for j in xrange(batch_size):
 77 |       n_examples += 1
 78 |       if verbose:
 79 |         sys.stdout.write('\r%d of %d' % (j+1, batch_size))
 80 |         sys.stdout.flush()
 81 |       start = self.frame_indices_[self.frame_row_]
 82 |       label = self.labels_[self.frame_row_]
 83 |       length= self.lengths_[self.frame_row_]
 84 |       vid_ind = self.video_ind_[start]
 85 | 
 86 |       self.frame_row_ += 1
 87 |       end = start + self.seq_length_ * skip
 88 |       if length >= self.seq_length_*skip:
 89 |           self.batch_data_[:,j, :] = self.handler[start:end:skip, :]
 90 |       else:
 91 |           n = 1 + int((length-1)/skip)
 92 |           self.batch_data_[:n,j, :] = self.handler[start:start+length:skip, :]
 93 |           self.batch_data_[n:,j, :] = np.tile(self.batch_data_[n-1,j, :],(self.seq_length_-n,1))
 94 |       if data_pb.dataset != 'h2mAP':
 95 |         self.batch_label_[:,j] = np.tile(label,(1,self.seq_length_))
 96 |       else:
 97 |         self.batch_label_[:,j,:] = np.tile(label,(self.seq_length_,1))
 98 |       if self.frame_row_ == self.dataset_size_:
 99 |         #self.Reset()
100 |         break
101 | 
102 |     if data_pb.dataset=='ucf11':
103 |       self.batch_data_ = self.batch_data_.reshape([self.batch_data_.shape[0],self.batch_data_.shape[1],49,1024],order='F').astype('float32')
104 |     elif data_pb.dataset=='h2mAP':
105 |       self.batch_data_ = self.batch_data_.reshape([self.batch_data_.shape[0],self.batch_data_.shape[1],49,1024],order='F').astype('float32')
106 |     elif data_pb.dataset=='hmdb51gln':
107 |       self.batch_data_ = self.batch_data_.reshape([self.batch_data_.shape[0],self.batch_data_.shape[1],49,1024],order='F').astype('float32')
108 | 
109 |     self.batch_label_ = self.batch_label_.astype('int64')
110 |     return self.batch_data_, self.batch_label_, n_examples
111 | 
112 |   def GetSingleExample(self, data_pb, idx, offset=0):
113 |     ### length validation
114 |     num_f = []
115 |     for line in open(data_pb.num_frames_file):
116 |       num_f.append(int(line.strip()))
117 | 
118 |     #if num_f[idx] < self.seq_length_:
119 |     #    print 'Example is too short'
120 |     #    exit()
121 | 
122 |     ### data_
123 |     try:
124 |       frames_before = np.cumsum(num_f[:idx],0)[-1]
125 |     except IndexError:
126 |       if idx==0:
127 |         frames_before = 0
128 |       else:
129 |         frames_before = np.cumsum(num_f[:idx],0)[-1]
130 |     start = frames_before + offset                 # inclusive
131 |     end   = frames_before + num_f[idx] - 1         # inclusive
132 |     length= num_f[idx] - offset
133 |     skip = int(30.0/self.fps_)
134 | 
135 |     data_ = np.zeros((self.seq_length_, 1, self.frame_size_), dtype=np.float32)
136 |     f = h5py.File(data_pb.data_file,'r')
137 | 
138 |     if length >= self.seq_length_*skip:
139 |       data_[:,0,:] = f[self.dataset_name_][start:start+self.seq_length_*skip:skip, :]
140 |     else:
141 |       n = 1 + int((length-1)/skip)
142 |       self.batch_data_[:n,0, :] = f[self.dataset_name_][start:start+length:skip, :]
143 |       self.batch_data_[n:,0, :] = np.tile(self.batch_data_[n-1,0, :],(self.seq_length_-n,1))
144 | 
145 |     if data_pb.dataset=='ucf11':
146 |       data_ = data_.reshape([data_.shape[0],data_.shape[1],49,1024],order='F').astype('float32')
147 |     elif data_pb.dataset=='h2mAP':
148 |       data_ = data_.reshape([data_.shape[0],data_.shape[1],49,1024],order='F').astype('float32')
149 |     elif data_pb.dataset=='hmdb51gln':
150 |       data_ = data_.reshape([data_.shape[0],data_.shape[1],49,1024],order='F').astype('float32')
151 | 
152 |     f.close()
153 | 
154 |     ### label_
155 |     if data_pb.dataset!='h2mAP':
156 |       labels = self.GetLabels(data_pb.labels_file)
157 |       label  = labels[idx]
158 |       label_ = np.zeros((self.seq_length_, 1), dtype=np.int64)
159 |       label_[:,0] = np.tile(label,(1,self.seq_length_))
160 |     else:
161 |       labels = np.array(self.GetMAPLabels(data_pb.labels_file))
162 |       label  = labels[idx,:]                                     # (12,)
163 |       label_ = np.zeros((self.seq_length_,1,12), dtype=np.int64) # (TS, 1, 12) # 12 classes in hollywood2
164 |       label_[:,0,:] = np.tile(label,(self.seq_length_,1))
165 |     assert len(num_f) == len(labels)
166 | 
167 |     ### fidx_
168 |     fnames = []
169 |     for line in open(data_pb.vid_name_file):
170 |       fnames.append(line.strip())
171 |     fidx_ = fnames[idx]
172 | 
173 |     return data_, label_, fidx_
174 | 
175 |   def GetBatchSize(self):
176 |     return self.batch_size_
177 | 
178 |   def GetLabels(self, filename):
179 |     labels = []
180 |     if filename != '':
181 |       for line in open(filename,'r'):
182 |         labels.append(int(line.strip()))
183 |     return labels
184 | 
185 |   def GetMAPLabels(self, filename):
186 |     labels = []
187 |     if filename != '':
188 |       for line in open(filename,'r'):
189 |         labels.append([int(x) for x in line.split(',')])
190 |     return labels
191 | 
192 |   def GetDatasetSize(self):
193 |     return self.dataset_size_
194 | 
195 |   def Reset(self):
196 |     self.frame_row_ = 0
197 |     if self.randomize_:
198 |       assert len(self.frame_indices_) == len(self.labels_)
199 |       rng_state = np.random.get_state()
200 |       np.random.shuffle(self.frame_indices_)
201 |       np.random.set_state(rng_state)
202 |       np.random.shuffle(self.labels_)
203 | 
204 | class TrainProto(object):
205 |   def __init__(self, bs, maxlen, stride, dataset, fps=30):
206 |     self.num_frames = maxlen
207 |     self.stride = stride
208 |     self.randomize = True
209 |     self.batch_size = bs
210 |     self.dataset = dataset
211 |     self.fps = fps
212 |     if dataset=='ucf11':
213 |       self.data_file       = '/ais/gobi3/u/shikhar/ucf11/dataset/train_features.h5'
214 |       self.num_frames_file = '/ais/gobi3/u/shikhar/ucf11/dataset/train_framenum.txt'
215 |       self.labels_file     = '/ais/gobi3/u/shikhar/ucf11/dataset/train_labels.txt'
216 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/ucf11/dataset/train_filename.txt'
217 |       self.dataset_name    = 'features'
218 |     elif dataset=='h2mAP':
219 |       self.data_file       = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_features.h5'
220 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_framenum.txt'
221 |       self.labels_file     = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_labels.txt'
222 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_filename.txt'
223 |       self.dataset_name    = 'features'
224 |     elif dataset=='hmdb51gln':
225 |       self.data_file       = '/ais/gobi3/u/shikhar/hmdb/dataset/train_features.h5'
226 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hmdb/dataset/train_framenum.txt'
227 |       self.labels_file     = '/ais/gobi3/u/shikhar/hmdb/dataset/train_labels.txt'
228 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hmdb/dataset/train_filename.txt'
229 |       self.dataset_name    = 'features'
230 | 
231 | class TestTrainProto(object):
232 |   def __init__(self, bs, maxlen, stride, dataset, fps=30):
233 |     self.num_frames = maxlen
234 |     self.stride = stride
235 |     self.randomize = False
236 |     self.batch_size = bs
237 |     self.dataset = dataset
238 |     self.fps = fps
239 |     if dataset=='ucf11':
240 |       self.data_file       = '/ais/gobi3/u/shikhar/ucf11/dataset/train_features.h5'
241 |       self.num_frames_file = '/ais/gobi3/u/shikhar/ucf11/dataset/train_framenum.txt'
242 |       self.labels_file     = '/ais/gobi3/u/shikhar/ucf11/dataset/train_labels.txt'
243 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/ucf11/dataset/train_filename.txt'
244 |       self.dataset_name    = 'features'
245 |     elif dataset=='h2mAP':
246 |       self.data_file       = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_features.h5'
247 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_framenum.txt'
248 |       self.labels_file     = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_labels.txt'
249 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/train_filename.txt'
250 |       self.dataset_name    = 'features'
251 |     elif dataset=='hmdb51gln':
252 |       self.data_file       = '/ais/gobi3/u/shikhar/hmdb/dataset/train_features.h5'
253 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hmdb/dataset/train_framenum.txt'
254 |       self.labels_file     = '/ais/gobi3/u/shikhar/hmdb/dataset/train_labels.txt'
255 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hmdb/dataset/train_filename.txt'
256 |       self.dataset_name    = 'features'
257 | 
258 | class TestValidProto(object):
259 |   def __init__(self, bs, maxlen, stride, dataset, fps=30):
260 |     self.num_frames = maxlen
261 |     self.stride = stride
262 |     self.randomize = False
263 |     self.batch_size = bs
264 |     self.dataset = dataset
265 |     self.fps = fps
266 |     if dataset=='ucf11':
267 |       self.data_file       = '/ais/gobi3/u/shikhar/ucf11/dataset/valid_features.h5'
268 |       self.num_frames_file = '/ais/gobi3/u/shikhar/ucf11/dataset/valid_framenum.txt'
269 |       self.labels_file     = '/ais/gobi3/u/shikhar/ucf11/dataset/valid_labels.txt'
270 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/ucf11/dataset/valid_filename.txt'
271 |       self.dataset_name    = 'features'
272 |     elif dataset=='h2mAP':
273 |       self.data_file       = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/valid_features.h5'
274 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/valid_framenum.txt'
275 |       self.labels_file     = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/valid_labels.txt'
276 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/valid_filename.txt'
277 |       self.dataset_name    = 'features'
278 |     elif dataset=='hmdb51gln':
279 |       self.data_file       = '/ais/gobi3/u/shikhar/hmdb/dataset/test_features.h5'
280 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hmdb/dataset/test_framenum.txt'
281 |       self.labels_file     = '/ais/gobi3/u/shikhar/hmdb/dataset/test_labels.txt'
282 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hmdb/dataset/test_filename.txt'
283 |       self.dataset_name    = 'features'
284 | 
285 | class TestTestProto(object):
286 |   def __init__(self, bs, maxlen, stride, dataset, fps=30):
287 |     self.num_frames = maxlen
288 |     self.stride = stride
289 |     self.randomize = False
290 |     self.batch_size = bs
291 |     self.dataset = dataset
292 |     self.fps = fps
293 |     if dataset=='ucf11':
294 |       self.data_file       = '/ais/gobi3/u/shikhar/ucf11/dataset/test_features.h5'
295 |       self.num_frames_file = '/ais/gobi3/u/shikhar/ucf11/dataset/test_framenum.txt'
296 |       self.labels_file     = '/ais/gobi3/u/shikhar/ucf11/dataset/test_labels.txt'
297 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/ucf11/dataset/test_filename.txt'
298 |       self.dataset_name    = 'features'
299 |     elif dataset=='h2mAP':
300 |       self.data_file       = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/test_features.h5'
301 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/test_framenum.txt'
302 |       self.labels_file     = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/test_labels.txt'
303 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hollywood2/mAPdataset/test_filename.txt'
304 |       self.dataset_name    = 'features'
305 |     elif dataset=='hmdb51gln':
306 |       self.data_file       = '/ais/gobi3/u/shikhar/hmdb/dataset/test_features.h5'
307 |       self.num_frames_file = '/ais/gobi3/u/shikhar/hmdb/dataset/test_framenum.txt'
308 |       self.labels_file     = '/ais/gobi3/u/shikhar/hmdb/dataset/test_labels.txt'
309 |       self.vid_name_file   = '/ais/gobi3/u/shikhar/hmdb/dataset/test_filename.txt'
310 |       self.dataset_name    = 'features'
311 | 
312 | def main():
313 |   fps = 30
314 |   data_pb = TrainProto(128,30,1,'h2mAP',fps)
315 |   dh = DataHandler(data_pb)
316 |   start      = time.time()
317 |   for i in xrange(dh.dataset_size_/dh.batch_size_):
318 |     x,y,n_ex = dh.GetBatch(data_pb)
319 |     #print x.shape
320 |     #print y.shape
321 |     #print n_ex
322 |     #exit()
323 |   end        = time.time()
324 |   print 'Duration', end-start
325 |   x,y,n_ex = dh.GetBatch(data_pb)
326 |   exit()
327 | 
328 | if __name__ == '__main__':
329 |   main()
330 | 
331 | 


--------------------------------------------------------------------------------
/util/gpu_lock.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | """
  4 | A simple discretionary locking system for /dev/nvidia devices.
  5 | 
  6 | Iain Murray, November 2009, January 2010.
  7 | 
  8 | -- Additions -- Charlie Tang, Jan, 2011: 
  9 | added display of GPU usages
 10 | 
 11 | -- Charlie Tang, July, 2011:
 12 | improved statistics displaying
 13 | """
 14 | 
 15 | import os
 16 | import os.path
 17 | from xml.dom import Node
 18 | from xml.dom.minidom import parseString
 19 | from subprocess import Popen, PIPE, STDOUT
 20 | 
 21 | _dev_prefix = '/dev/nvidia'
 22 | 
 23 | # Get ID's of NVIDIA boards. Should do this through a CUDA call, but this is
 24 | # a quick and dirty way that works for now:
 25 | def board_ids():
 26 |     """Returns integer board ids available on this machine."""
 27 |     from glob import glob
 28 |     board_devs = glob(_dev_prefix + '[0-9]*')
 29 |     return range(len(board_devs))
 30 |     #p = Popen(['/u/tang/bin/get_num_gpu_boards'], stdout=PIPE)    
 31 |     #nBoards = int(p.stdout.read())
 32 |     #return range(nBoards)
 33 | 
 34 | def _lock_file(id):
 35 |     """lock file from integer id"""
 36 |     # /tmp is cleared on reboot on many systems, but it doesn't have to be
 37 |     if os.path.exists('/dev/shm'):
 38 |         # /dev/shm on linux machines is a RAM disk, so is definitely cleared
 39 |         return '/dev/shm/gpu_lock_%d' % id
 40 |     else:
 41 |         return '/tmp/gpu_lock_%d' % id
 42 | 
 43 | def owner_of_lock(id):
 44 |     """Username that has locked the device id. (Empty string if no lock)."""
 45 |     import pwd
 46 |     try:
 47 |         statinfo = os.lstat(_lock_file(id))
 48 |         return pwd.getpwuid(statinfo.st_uid).pw_name
 49 |     except:
 50 |         return ""
 51 | 
 52 | def _obtain_lock(id):
 53 |     """Attempts to lock id, returning success as True/False."""
 54 |     try:
 55 |         # On POSIX systems symlink creation is atomic, so this should be a
 56 |         # robust locking operation:
 57 |         os.symlink('/dev/null', _lock_file(id))
 58 |         return True
 59 |     except:
 60 |         return False
 61 | 
 62 | def _launch_reaper(id, pid):
 63 |     """Start a process that will free a lock when process pid terminates"""
 64 |     from subprocess import Popen, PIPE
 65 |     me = __file__
 66 |     if me.endswith('.pyc'):
 67 |         me = me[:-1]
 68 |     myloc = os.path.dirname(me)
 69 |     if not myloc:
 70 |         myloc = os.getcwd()
 71 |     reaper_cmd = os.path.join(myloc, 'run_on_me_or_pid_quit')
 72 |     Popen([reaper_cmd, str(pid), me, '--free', str(id)],
 73 |         stdout=open('/dev/null', 'w'))
 74 | 
 75 | def obtain_lock_id(pid=None):
 76 |     """
 77 |     Finds a free id, locks it and returns integer id, or -1 if none free.
 78 | 
 79 |     A process is spawned that will free the lock automatically when the
 80 |     process pid (by default the current python process) terminates.
 81 |     """
 82 |     id = -1
 83 |     id = obtain_lock_id_to_hog()
 84 |     try:
 85 |         if id >= 0:
 86 |             if pid is None:
 87 |                 pid = os.getpid()
 88 |             _launch_reaper(id, pid)
 89 |     except:
 90 |         free_lock(id)
 91 |         id = -1
 92 |     return id
 93 | 
 94 | def obtain_lock_id_to_hog():
 95 |     """
 96 |     Finds a free id, locks it and returns integer id, or -1 if none free.
 97 | 
 98 |     * Lock must be freed manually *
 99 |     """
100 |     for id in board_ids():
101 |         if _obtain_lock(id):
102 |             return id
103 |     return -1
104 | 
105 | def free_lock(id):
106 |     """Attempts to free lock id, returning success as True/False."""
107 |     try:
108 |         filename = _lock_file(id)
109 |         # On POSIX systems os.rename is an atomic operation, so this is the safe
110 |         # way to delete a lock:
111 |         os.rename(filename, filename + '.redundant')
112 |         os.remove(filename + '.redundant')
113 |         return True
114 |     except:
115 |         return False
116 | 
117 | def nvidia_gpu_stats():    
118 |     p = Popen(['nvidia-smi', '-x', '-a'], stdout=PIPE)    
119 |     output = p.stdout.read().lstrip()
120 |     try:
121 |         doc = parseString(output)
122 |         gpucounter = 0        
123 |         templist = []
124 |         memlist = []
125 |         uselist = []        
126 |         fanlist = []
127 |         doc2 = doc.getElementsByTagName("nvidia_smi_log")[0]
128 |         gpulist = doc2.getElementsByTagName("gpu")
129 |         for gpu in gpulist:        
130 |             temp = gpu.getElementsByTagName('temperature')[0]            
131 |             temp2 = temp.getElementsByTagName('gpu_temp')[0]
132 |             templist.append(str(temp2.firstChild.toxml()))            
133 |             mem = gpu.getElementsByTagName('memory_usage')[0]               
134 |             memtot = mem.getElementsByTagName('total')[0]
135 |             memused = mem.getElementsByTagName('used')[0]
136 |             memfree = mem.getElementsByTagName('free')[0]            
137 |             memtot_str = str(memtot.firstChild.toxml())
138 |             memused_str = str(memused.firstChild.toxml())
139 |             memfree_str = str(memfree.firstChild.toxml())
140 |             memtot_float = float(memtot_str[:-3])            
141 |             memused_float = float(memused_str[:-3])
142 |             memfree_float = float(memfree_str[:-3])
143 |             memlist.append('%03.f' % memused_float + '+%03.f' % memfree_float + '=%03.f' % memtot_float + 'Mb')
144 |             use = gpu.getElementsByTagName('gpu_util')[0]        
145 |             uselist.append(str(use.firstChild.toxml()))
146 |             fan = gpu.getElementsByTagName('fan_speed')[0]
147 |             fanlist.append(str(fan.firstChild.toxml()))
148 |             gpucounter += 1
149 |                     
150 |         return [uselist, memlist, fanlist, templist]
151 |     except:        
152 |         return [ [-9999] * len(board_ids()) ] *4
153 |        
154 |          
155 | # If run as a program:
156 | if __name__ == "__main__":
157 |     
158 |     div = '  ' + "-" * 90    
159 |     import sys
160 |     me = sys.argv[0]
161 |     # Report
162 |     if '--id' in sys.argv:
163 |         if len(sys.argv) > 2:
164 |             try:
165 |                 pid = int(sys.argv[2])
166 |                 assert(os.path.exists('/proc/%d' % pid))
167 |             except:
168 |                 print 'Usage: %s --id [pid_to_wait_on]' % me
169 |                 print 'The optional process id must exist if specified.'
170 |                 print 'Otherwise the id of the parent process is used.'
171 |                 sys.exit(1)
172 |         else:
173 |             pid = os.getppid()
174 |         print obtain_lock_id(pid)
175 |     elif '--ids' in sys.argv:
176 |         try:
177 |             id = int(sys.argv[2])            
178 |         except:
179 |             print 'Usage: %s --ids [specific gpu id]' % me
180 |             sys.exit(1)       
181 |         if _obtain_lock(id):
182 |             print id
183 |         else:
184 |             print - 1
185 |     elif '--id-to-hog' in sys.argv:
186 |         print obtain_lock_id_to_hog()
187 |     elif '--free' in sys.argv:
188 |         try:
189 |             id = int(sys.argv[2])
190 |         except:
191 |             print 'Usage: %s --free <id>' % me
192 |             sys.exit(1)
193 |         if free_lock(id):
194 |             print "Lock freed"
195 |         else:
196 |             owner = owner_of_lock(id)
197 |             if owner:
198 |                 print "Failed to free lock id=%d owned by %s" % (id, owner)        
199 |             else:
200 |                 print "Failed to free lock, but it wasn't actually set?"
201 |     elif '--noverbose' in sys.argv:
202 |         stats = nvidia_gpu_stats()        
203 |         print div
204 |         print "%s board users:" % 'abc'
205 |         print div       
206 |         for id in board_ids():         
207 |             print "      Board %d {Use:%s; Mem:%s; Temp:%s}: %s" % (id, stats[0][id], stats[1][id], stats[2][id], owner_of_lock(id))
208 |         print div + '\n'
209 |     else:
210 |         stats = nvidia_gpu_stats()
211 |         print div      
212 |         print '  Usage instructions:\n'        
213 |         print '  To obtain and lock an id: %s --id' % me
214 |         print '  The lock is automatically freed when the parent terminates'
215 |         print
216 |         print "  To get an id that won't be freed: %s --id-to-hog <id>" % me
217 |         print "  To get a specific id: %s --ids <id>" % me        
218 |         print                                                   
219 |         print "  You *must* manually free these ids: %s --free <id>\n" % me
220 |         print '  More info: http://www.cs.toronto.edu/~murray/code/gpu_monitoring/'
221 |         print '  Report any problems to: tang@cs.toronto.edu'    
222 |         print '\n' + div
223 |         print "  NVIDIA board users:"
224 |         print div
225 |         for id in board_ids():         
226 |             print "  Board %d {Use:%s; Mem(used+free=total): %s; Fan:%s; Temp:%s}: %s" % (id, stats[0][id], stats[1][id], stats[2][id], stats[3][id], owner_of_lock(id))
227 |         print div + '\n'
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/util/gpu_util.py:
--------------------------------------------------------------------------------
 1 | import gpu_lock
 2 | import time
 3 | import sys
 4 | 
 5 | def LockGPU(max_retries=10):
 6 |   """ Locks a free GPU board and returns its id. """
 7 |   for retry_count in range(max_retries):
 8 |     board = gpu_lock.obtain_lock_id()
 9 |     if board != -1:
10 |       break
11 |     time.sleep(1)
12 |   if board == -1:
13 |     print 'No GPU board available.'
14 |     sys.exit(1)
15 |   else:
16 |     import theano.sandbox.cuda
17 |     theano.sandbox.cuda.use('gpu'+str(board))
18 |   return board
19 | 
20 | def FreeGPU(board):
21 |   """ Frees the board. """
22 |   gpu_lock.free_lock(board)
23 | 
24 | 


--------------------------------------------------------------------------------
/util/run_on_me_or_pid_quit:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 |   run_on_me_or_pid_quit PID cmd arg1 arg2
 5 | 
 6 | Runs a command after the process PID has completed, or if this process is
 7 | interrupted.
 8 | 
 9 | Iain Murray, November 2009, January 2010
10 | """
11 | 
12 | # "Daemonize" this job to stop it getting killed by KeyboardInterrupt when
13 | # pressing Ctrl-c in an interactive python session.
14 | import os
15 | if os.fork() != 0:
16 |     os._exit(0)
17 | os.setsid()
18 | if os.fork() != 0:
19 |     os._exit(0)
20 | 
21 | import sys, os.path, time, signal
22 | 
23 | pid = sys.argv[1]
24 | proc_file = '/proc/' + pid
25 | 
26 | def final():
27 |     os.execv(sys.argv[2], sys.argv[2:])
28 | signal.signal(signal.SIGTERM, final)
29 | 
30 | try:
31 |     while os.path.exists(proc_file):
32 |         time.sleep(2)
33 | finally:
34 |     final()
35 | 


--------------------------------------------------------------------------------