├── datasets_preprocessing
    ├── __init__.py
    ├── imgnormalization.py
    └── datasets.py
├── .gitignore
├── predict_script.py
├── environment.yml
├── evaluation.py
├── splitdatasets.py
├── saveloadweights.py
├── README.md
├── trials.py
├── randomsearch.py
├── earlystopping.py
├── training_script.py
├── batchgenerators.py
├── networks.py
└── trainingtesting.py


/datasets_preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.err*
3 | *.o*
4 | .DS_Store
5 | /train_test_splits/
6 | 


--------------------------------------------------------------------------------
/predict_script.py:
--------------------------------------------------------------------------------
 1 | from trainingtesting import Testing
 2 | 
 3 | net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters':
 4 |                   (32, 32, 64, 64, 128, 128, 128, 128, 128),
 5 |                   'conv_filter_size': (3,)*9,
 6 |                   'conv_pad': (1,)*9,
 7 |                   'num_fc_units': (4096, 4096)}
 8 | 
 9 | model_hp_dict = {'p': 0.05}
10 | 
11 | test = Testing(net_specs_dict, model_hp_dict, 14, 'NYU', 'test',
12 |                'score_fusing', input_channels=5, fusion_level=7,
13 |                fusion_type='local')
14 | 
15 | predictions =\
16 |         test.predict('/home/mvrigkas/hand_pose_estimation/'
17 |                      + 'models/NYU/score_fusing/local/0.050000/weights.npz')
18 | 
19 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: fusenet
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - binutils_impl_linux-64=2.31.1=h6176602_1
 6 |   - binutils_linux-64=2.31.1=h6176602_6
 7 |   - blas=1.0=mkl
 8 |   - ca-certificates=2019.1.23=0
 9 |   - certifi=2019.3.9=py27_0
10 |   - gcc_impl_linux-64=7.3.0=habb00fd_1
11 |   - gcc_linux-64=7.3.0=h553295d_6
12 |   - gxx_impl_linux-64=7.3.0=hdf63c60_1
13 |   - gxx_linux-64=7.3.0=h553295d_6
14 |   - h5py=2.9.0=py27h7918eee_0
15 |   - hdf5=1.10.4=hb1b8bf9_0
16 |   - intel-openmp=2019.3=199
17 |   - libedit=3.1.20181209=hc058e9b_0
18 |   - libffi=3.2.1=hd88cf55_4
19 |   - libgcc-ng=8.2.0=hdf63c60_1
20 |   - libgfortran-ng=7.3.0=hdf63c60_0
21 |   - libgpuarray=0.7.6=h14c3975_0
22 |   - libstdcxx-ng=8.2.0=hdf63c60_1
23 |   - linecache2=1.0.0=py27_0
24 |   - markupsafe=1.1.1=py27h7b6447c_0
25 |   - mkl=2017.0.4=h4c4d0af_0
26 |   - mkl-service=1.1.2=py27_3
27 |   - mkl_fft=1.0.10=py27ha843d7b_0
28 |   - ncurses=6.1=he6710b0_1
29 |   - numpy=1.12.0=py27_0
30 |   - openssl=1.1.1b=h7b6447c_1
31 |   - pip=19.0.3=py27_0
32 |   - pygpu=0.7.6=py27h3010b51_0
33 |   - python=2.7.16=h9bab390_0
34 |   - readline=7.0=h7b6447c_5
35 |   - scikit-learn=0.20.3=py27hd81dba3_0
36 |   - scipy=0.14.0=np19py27_0
37 |   - setuptools=40.8.0=py27_0
38 |   - six=1.12.0=py27_0
39 |   - sqlite=3.27.2=h7b6447c_0
40 |   - theano=1.0.3=py27hfd86e86_0
41 |   - tk=8.6.8=hbc83047_0
42 |   - traceback2=1.4.0=py27_0
43 |   - unittest2=1.1.0=py27_0
44 |   - wheel=0.33.1=py27_0
45 |   - zlib=1.2.11=h7b6447c_3
46 |   - pip:
47 |     - lasagne==0.1
48 |     - mako==1.0.7
49 | prefix: /jmain01/home/JAD026/dxd01/wwp62-dxd01/home-shared/miniconda3/envs/fusenet
50 | 
51 | 


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module implements different evaluation metrics of our hand pose estimation
 3 | algorithm given the predictions in the test set. Two famous metrics that are
 4 | implemented here are:
 5 |     1) Accuracy, that is defined by the fraction of test images that the
 6 |     maximum joint error is below a threshold.
 7 | 
 8 |     2) Mean error joint, i.e. mean error over the whole test sequence computed
 9 |     for each joint separately.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | 
15 | def accuracy(test_predictions, gt, threshold):
16 |     """
17 |     Computes accuracy of test predictions.
18 | 
19 |     Keyword arguments:
20 | 
21 |     test_predictions -- numpy array with predictions of joint positions in the
22 |                         test set
23 |     gt -- ground truth joint positions in the test set
24 |     threshold -- threshold of maximum joint error
25 | 
26 |     Return:
27 | 
28 |     acc -- accuracy
29 |     """
30 |     max_error = np.asarray([np.amax(np.linalg.norm(
31 |         gt[i]-test_predictions[i], axis=0))for i in range(gt.shape[0])])
32 |     acc = np.sum((max_error > threshold).astype(dtype=np.int))/gt.shape[0]
33 |     return acc
34 | 
35 | 
36 | def mean_joint_error(test_predictions, gt):
37 |     """
38 |     Computes mean joint error of test predictions.
39 | 
40 |     Keyword arguments:
41 | 
42 |     test_predictions -- numpy array with predictions of joint positions in the
43 |                         test set
44 |     gt -- ground truth joint positions in the test set
45 | 
46 |     Return:
47 | 
48 |     mean_error -- mean error per joint (a numpy array with size same as the
49 |     number of joints)
50 |     """
51 |     mean_error = np.mean(np.asarray(
52 |         [np.linalg.norm(gt[i]-test_predictions[i], axis=0)
53 |          for i in range(gt.shape[0])]), axis=0)
54 | 
55 |     return mean_error
56 | 


--------------------------------------------------------------------------------
/splitdatasets.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains 2 function for splitting a dataset into train/validation
 3 | sets for later hyper-parameter selection. The 2 functions are the following:
 4 |     1) split_dsets_trainval: Splits the dataset in training/validation sets and
 5 |     saves the indexes in selected directory
 6 |     2) load_dsets_trainval: Loads the train/validation indexes of the dataset
 7 | """
 8 | 
 9 | import h5py
10 | import numpy as np
11 | from sklearn.model_selection import train_test_split
12 | import argparse
13 | import os
14 | 
15 | 
16 | def split_dsets_trainval(hdf5_file, save_dir):
17 |     """
18 |     This function saves the train and validation indexes of the hdf5_file in
19 |     numpy arrays(binary .npz format).
20 | 
21 |     Keyword arguments:
22 | 
23 |     hdf5_file -- hdf5 dataset file(already open)
24 |     save_dir -- directory to save the splits
25 | 
26 |     Return:
27 |     --
28 |     """
29 |     idx = range(hdf5_file['train']['depth_normalized'].shape[0])
30 |     idx_train, idx_test = train_test_split(idx, random_state=10, test_size=0.5)
31 | 
32 |     np.savez(save_dir, idx_train, idx_test)
33 | 
34 | 
35 | def load_dsets_trainval(train_val_dir):
36 |     """
37 |     This function loads the train/validation indexes of a dataset.
38 | 
39 |     Keyword arguments:
40 | 
41 |     train_val_dir -- directory of saved train/validation indexes(.npz format)
42 | 
43 |     Return:
44 | 
45 |     idx_train -- indexes of training set
46 |     idx_val -- indexes of validation set
47 |     """
48 |     npzfile = np.load(train_val_dir)
49 |     idx_train = npzfile['arr_0']
50 |     idx_val = npzfile['arr_1']
51 | 
52 |     return idx_train, idx_val
53 | 
54 | 
55 | if __name__ == '__main__':
56 | 
57 |     parser = argparse.ArgumentParser(
58 |         description='''Provides a split of dataset's indices into training/validation,
59 |         to be used by the batch generator''')
60 |     parser.add_argument('dataset', choices=['nyu', 'msra', 'icvl'])
61 |     parser.add_argument('dataset_dir', help='Dataset\'s (in HDF5 format) directory')
62 | 
63 |     args = parser.parse_args()
64 | 
65 |     if not os.path.exists('./train_test_splits/'):
66 |         os.mkdir('./train_test_splits/')
67 |     dataset_hdf5 = h5py.File(args.dataset_dir, 'r')
68 |     split_dsets_trainval(dataset_hdf5, './train_test_splits/'+args.dataset+'_split.npz')
69 |     dataset_hdf5.close()
70 | 
71 | 


--------------------------------------------------------------------------------
/saveloadweights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lasagne.layers import set_all_param_values
 3 | import os
 4 | import cPickle as pickle
 5 | from earlystopping import EarlyStopping
 6 | 
 7 | 
 8 | class SaveWeights(EarlyStopping):
 9 | 
10 |     def __init__(self, weights_dir, net, patience, loss_or_acc, times=5):
11 |         super(SaveWeights, self).__init__(net, patience,
12 |                                           loss_or_acc, times)
13 |         self.weights_dir = weights_dir
14 | 
15 |     def save_weights_numpy(self):
16 |         if not os.path.exists(self.weights_dir):
17 |             os.makedirs(self.weights_dir)
18 |         np.savez(os.path.join(self.weights_dir, 'weights.npz'),
19 |                  *self.best_weights)
20 |         print 'The best accuracy was {} at epoch {}'.format(
21 |             self.best_loss, self.best_epoch)
22 |         print 'Model parameters were saved to '+self.weights_dir
23 | 
24 |     def save_weights_pickle(self):
25 |         if not os.path.exists(self.weights_dir):
26 |             os.makedirs(self.weights_dir)
27 |         with open(os.path.join(self.weights_dir, 'weights.npz'), 'wb') as f:
28 |             pickle.dump(self.best_weights, f, protocol=pickle.HIGHEST_PROTOCOL)
29 |         print 'The best accuracy was {} at epoch {}'.format(
30 |             self.best_loss, self.best_epoch)
31 |         print 'Model parameters were saved to '+self.weights_dir
32 | 
33 | 
34 | class LoadWeights(object):
35 | 
36 |     def __init__(self, weights_dir, net):
37 |         if not (os.path.exists(weights_dir)):
38 |             raise OSError("Directory doesn't exist")
39 |         self.weights_dir = weights_dir
40 |         self.net = net
41 | 
42 |     def load_weights_numpy(self):
43 |         print 'Loading weights from {0:s}...\n'.format(self.weights_dir)
44 |         with np.load(self.weights_dir) as f:
45 |             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
46 |         print 'Setting the weights to the model...\n'
47 |         set_all_param_values(self.net['output'], param_values, trainable=True)
48 | 
49 |     def load_weights_pickle(self):
50 | 
51 |         with open(self.weights_dir, 'rb') as f:
52 |             print 'Loading weights from {0:s}...\n'.format(self.weights_dir)
53 |             param_values = pickle.load(f)
54 |         print 'Setting the weights to the model...\n'
55 |         set_all_param_values(self.net['output'], param_values, trainable=True)
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ##  Two-stream convolutional networks for fusing RGB and depth images for hand pose estimation using Lasagne
 2 | 
 3 | This is an implementation of the paper [On the Fusion of RGB and Depth Information for Hand Pose Estimation](https://ieeexplore.ieee.org/document/8451022). The code is written in python
 4 | using the [Lasagne](https://lasagne.readthedocs.io/en/latest/) DL framework.
 5 | 
 6 | ## Requirements
 7 | 
 8 | * CUDA 9.0
 9 | * Create a conda environment from the environment.yml file using the following command in terminal:
10 |   `$ conda env create -f environment.yml`
11 | * In your home directory, create a `.theanorc` file containing:
12 |   ```
13 |   [global]
14 |   floatX = float32
15 |   device = cuda0
16 |   ```
17 | ## Dataset
18 | 
19 | Download the [NYU dataset](https://cims.nyu.edu/~tompson/NYU_Hand_Pose_Dataset.htm#download) and unzip it.
20 | The code is designed to process the data in HDF5 format using [h5py](https://www.h5py.org). To convert
21 | the dataset in HDF5 format run the following code in your terminal:
22 | 
23 | ```python
24 | from datasets_preprocessing.datasets import NYU_Dataset
25 | nyu = NYU_Dataset('/path/NYU/dataset', '/path/NYU/hdf5')
26 | nyu.convert_to_hdf5()
27 | 
28 | ```
29 | where */path/NYU/* should be replaced with the location of the unziped file from above. In ```datasets_preprocessing.datasets```, there are also classes for converting to HDF5 the [ICVL]() and [MSRA]() datasets. Only NYU contains RGB-D images, while 
30 | ICVL and MSRA contain only depth images, so experiments have been done only for NYU. Nevertheless, you may
31 | want to train just the depth stream for ICVL and MSRA.
32 | 
33 | ## Training 
34 | 
35 | Example:
36 | 
37 | ```
38 | python training_script.py 5 conv_fusing 0.5 9 concat --dataset_dir ~/data-private/NYU/hdf5/ --predef_hp --shuffle --validate
39 | ```
40 | 
41 | For a full description of the input arguments do `python training_script.py -h`.
42 | 
43 | 
44 | ## Publication
45 | 
46 | Please reference this publication if you find this code useful:
47 | 
48 | ```
49 | @inproceedings{kazakos_fusion_icip2018, 
50 |     author={E. Kazakos and C. Nikou and I. A. Kakadiaris}, 
51 |     booktitle={25th IEEE International Conference on Image Processing (ICIP)}, 
52 |     title={On the Fusion of RGB and Depth Information for Hand Pose Estimation}, 
53 |     year={2018}, 
54 |     pages={868-872}, 
55 |     month={Oct},
56 | }
57 | ```
58 | 
59 | ## Citations
60 | 
61 | * J. Tompson, M. Stein, Y. LeCun, and K. Perlin, “Real- Time Continuous Pose Recovery of Human Hands Using Convolutional Networks,” ACM Transactions on Graphics, vol. 33, pp. 169:1–169:10, 2014.
62 | 
63 | 


--------------------------------------------------------------------------------
/trials.py:
--------------------------------------------------------------------------------
 1 | import trainingtesting
 2 | 
 3 | 
 4 | import sys
 5 | 
 6 | if sys.argv[1] == 'conv':
 7 |     net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters':
 8 |                       (32, 32, 64, 64, 128, 128, 128, 128, 128),
 9 |                       'conv_filter_size': (3,)*9,
10 |                       'conv_pad': (1,)*9,
11 |                       'num_fc_units': (4096, 4096)}
12 |     opt_hp_dict = {'lr': 0.009, 'mom': 0.98}
13 |     model_hp_dict = {'p': 0.03}
14 |     tr = trainingtesting.Training(14, 'NYU', 'train', 'simple', 100, 3,
15 |                                   net_specs_dict, model_hp_dict=model_hp_dict,
16 |                                   opt_hp_dict=opt_hp_dict, input_channels=3)
17 | 
18 |     training_inf = tr.train_fused(early_stopping=True, shuffle=True)
19 | elif sys.argv[1] == 'rec':
20 |     net_specs_dict = {'num_conv_layers': 3, 'num_conv_filters':
21 |                       (32, 64, 128), 'conv_filter_size': (3, 3, 3),
22 |                       'conv_pad': (1, 1, 1), 'num_fc_units': (1024, 128)}
23 |     hp_specs_dict = {'lr': 0.01, 'mom': 0.9, 'lambda_con': 0.001,
24 |                      'lambda_rec': 0.01}
25 |     tr = trainingtesting.Training(net_specs_dict, hp_specs_dict, 14, 'NYU',
26 |                                   'train', 'autoencoding', 100, 20)
27 | 
28 |     training_inf = tr.train(early_stopping=False, updates_mode='double')
29 | elif sys.argv[1] == 'fuse':
30 |     net_specs_dict = {'num_conv_layers': 4, 'num_conv_filters':
31 |                       (32, 64, 128, 128),
32 |                       'conv_filter_size': (3,)*4,
33 |                       'conv_pad': (1,)*4,
34 |                       'num_fc_units': (2048, 2048)}
35 |     hp_specs_dict = {'lr': 0.01, 'mom': 0.9}
36 |     tr = trainingtesting.Training(net_specs_dict, hp_specs_dict, 14, 'NYU',
37 |                                   'train', 'fusing', 100, 20, input_channels=4,
38 |                                   fusion_level=4, fusion_type='concatconv')
39 | 
40 |     training_inf = tr.train_fused(early_stopping=False)
41 | elif sys.argv[1] == 'dense_fuse':
42 |     net_specs_dict = {'num_conv_layers': 4, 'num_conv_filters':
43 |                       (32, 64, 128, 128),
44 |                       'conv_filter_size': (3,)*4,
45 |                       'conv_pad': (1,)*4,
46 |                       'num_fc_units': (4096, 4096)}
47 |     opt_hp_dict = {'lr': 0.01, 'mom': 0.9}
48 |     model_hp_dict = {'p': 0.03}
49 |     tr = trainingtesting.Training(14, 'NYU', 'train', 'dense_fusing', 100, 20,
50 |                                   net_specs_dict, model_hp_dict=model_hp_dict,
51 |                                   opt_hp_dict=opt_hp_dict, input_channels=4,
52 |                                   fusion_type='concat')
53 | 
54 |     training_inf = tr.train_fused(early_stopping=False)
55 | 


--------------------------------------------------------------------------------
/randomsearch.py:
--------------------------------------------------------------------------------
 1 | import cPickle as pickle
 2 | import numpy as np
 3 | import os
 4 | 
 5 | 
 6 | def sample_hyperparams(lr_range, mom_range, p_range):
 7 |     opt_hp_dict = {}
 8 |     model_hp_dict = {}
 9 |     opt_hp_dict['lr'] = 10**(np.random.random() *
10 |                              (np.log10(lr_range[1])-np.log10(lr_range[0])) +
11 |                              np.log10(lr_range[0]))
12 |     opt_hp_dict['mom'] = 10**(np.random.random() *
13 |                               (np.log10(mom_range[1])-np.log(mom_range[0])) +
14 |                               np.log10(mom_range[0]))
15 |     model_hp_dict['p'] = np.random.random() *\
16 |         (p_range[1]-p_range[0]) + p_range[0]
17 | 
18 |     return opt_hp_dict, model_hp_dict
19 | 
20 | 
21 | def save_hyperparams(save_dir, opt_hp_dict, model_hp_dict, best_loss):
22 | 
23 |     if not os.path.exists(save_dir):
24 |         os.makedirs(save_dir)
25 |     # num_files = len([f for f in os.listdir(save_dir) if
26 |     #                os.path.isfile(os.path.join(save_dir, f))])
27 |     hyperparams_file = os.path.join(save_dir,
28 |                                     'setting_lr{0:f}_mom{1:f}_p{2:f}.pkl'.
29 |                                     format(opt_hp_dict['lr'],
30 |                                            opt_hp_dict['mom'],
31 |                                            model_hp_dict['p']))
32 |     with open(hyperparams_file, 'wb') as f:
33 |         pickle.dump(opt_hp_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
34 |         pickle.dump(model_hp_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
35 |         pickle.dump(best_loss, f, protocol=pickle.HIGHEST_PROTOCOL)
36 | 
37 | 
38 | def find_best_hyperparams(hyperparams_dir):
39 | 
40 |     loss = []
41 |     hyperparams_file = []
42 | 
43 |     for f in os.listdir(hyperparams_dir):
44 |         if os.path.isfile(os.path.join(hyperparams_dir, f)):
45 |             with open(os.path.join(hyperparams_dir, f), 'rb') as pf:
46 |                 opt_hp_dict = pickle.load(pf)
47 |                 model_hp_dict = pickle.load(pf)
48 |                 best_loss = pickle.load(pf)
49 |             loss.append(best_loss)
50 |             hyperparams_file.append({'file': f, 'opt_hp': opt_hp_dict,
51 |                                      'model_hp': model_hp_dict})
52 |     ind = np.argsort(np.array(loss))
53 |     for i in ind:
54 |         print 'Loss: {0:f}\tLr: {1:f}\tMom: {2:f}\tP: {3:f}'.format(loss[i],
55 |                                                                     hyperparams_file[i]['opt_hp']['lr'],
56 |                                                                     hyperparams_file[i]['opt_hp']['mom'],
57 |                                                                     hyperparams_file[i]['model_hp']['p'])
58 |     '''
59 |     ind = np.argmin(np.array(loss))
60 |     with open(os.path.join(hyperparams_dir, 'best_setting.txt'), 'w') as f:
61 |         f.write('File: {0:s}\n'.format(hyperparams_file[ind]['file']))
62 |         f.write('Loss: {0:f}\n'.format(loss[ind]))
63 |         f.write('Learning rate: {0:f}\tMomentum: {1:f}\tDropout prob:\
64 |                {2:f}\n'.format(hyperparams_file[ind]['opt_hp']['lr'],
65 |                                hyperparams_file[ind]['opt_hp']['mom'],
66 |                                hyperparams_file[ind]['model_hp']['p']))
67 |     '''
68 | 


--------------------------------------------------------------------------------
/earlystopping.py:
--------------------------------------------------------------------------------
 1 | from lasagne.layers import get_all_param_values, set_all_param_values
 2 | 
 3 | 
 4 | class EarlyStopping(object):
 5 | 
 6 |     ACCURACY = 'acc'
 7 |     LOSS = 'loss'
 8 | 
 9 |     def __init__(self, net, patience, loss_or_acc, times=5):
10 |         self.patience = patience
11 |         if loss_or_acc not in [self.ACCURACY, self.LOSS]:
12 |             raise ValueError('loss_or_acc should take one of the following\
13 |                              values: \'loss\', \'acc\'')
14 |         self.loss_or_acc = loss_or_acc
15 |         self.best_acc = 0
16 |         self.best_loss = float('Inf')
17 |         self.best_epoch = 0
18 |         self.best_weights = None
19 |         self.net = net
20 |         self.times = times
21 | 
22 |     def early_stopping(self, current_val, current_epoch):
23 |         if self.loss_or_acc == self.ACCURACY:
24 |             if current_val > self.best_acc:
25 |                 self.best_acc = current_val
26 |                 self.best_epoch = current_epoch
27 |                 self.best_weights = get_all_param_values(self.net['output'],
28 |                                                          trainable=True)
29 |                 return False
30 |             elif self.best_epoch + self.patience < current_epoch:
31 |                 print 'Early Stopping...'
32 |                 return True
33 |         else:
34 |             if current_val < self.best_loss:
35 |                 self.best_loss = current_val
36 |                 self.best_epoch = current_epoch
37 |                 self.best_weights = get_all_param_values(self.net['output'],
38 |                                                          trainable=True)
39 |                 return False
40 |             elif self.best_epoch + self.patience < current_epoch:
41 |                 print 'Early Stopping...'
42 |                 return True
43 | 
44 |     def early_stopping_with_lr_decay(self, current_val, current_epoch, lr,
45 |                                      time):
46 |         if self.loss_or_acc == self.ACCURACY:
47 |             if current_val > self.best_acc:
48 |                 self.best_acc = current_val
49 |                 self.best_epoch = current_epoch
50 |                 self.best_weights = get_all_param_values(self.net['output'],
51 |                                                          trainable=True)
52 |                 return False, False
53 |             elif self.best_epoch + self.patience < current_epoch:
54 |                 if time < self.times:
55 |                     lr.set_value(lr.get_value()*0.5)
56 |                     set_all_param_values(self.net['output'], self.best_weights,
57 |                                          trainable=True)
58 |                     return True, True
59 |                 else:
60 |                     print 'Early Stopping...'
61 |                     return True, False
62 |         else:
63 |             if current_val < self.best_loss:
64 |                 self.best_loss = current_val
65 |                 self.best_epoch = current_epoch
66 |                 self.best_weights = get_all_param_values(self.net['output'],
67 |                                                          trainable=True)
68 |                 return False, False
69 |             elif self.best_epoch + self.patience < current_epoch:
70 |                 if time < self.times:
71 |                     lr.set_value(lr.get_value()*0.5)
72 |                     set_all_param_values(self.net['output'], self.best_weights,
73 |                                          trainable=True)
74 |                     return True, True
75 |                 else:
76 |                     print 'Early Stopping...'
77 |                     return True, False
78 |             else:
79 |                 return False, True
80 | 


--------------------------------------------------------------------------------
/training_script.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import trainingtesting
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(
 7 |         description='''Runs training of the ConvNet of your choice. You can
 8 |         train a classical ConvNet on depth, RGB or RGB-D data or you
 9 |         can train an architecture that fuses ConvNet towers on different
10 |         inputs (RGB and depth).''')
11 |     parser.add_argument('input_channels', choices=[1, 4, 5], type=int,
12 |                         help='number of input'
13 |                         + 'channels. 1 for depth, 4 for rgb, 5 for rgbd or'
14 |                         + 'fusion')
15 |     parser.add_argument('net_type',
16 |                         choices=['simple', 'conv_fusing', 'dense_fusing',
17 |                                  'score_fusing', 'input_fusing'],
18 |                         help='type of network')
19 |     parser.add_argument('p', type=float, help='dropout probability')
20 |     parser.add_argument('fusion_level', type=int, nargs='?',
21 |                         help='integer that specifies in which convolutional'
22 |                         + 'layer to fuse')
23 |     parser.add_argument('fusion_type', nargs='?',
24 |                         choices=['sum', 'max', 'concat', 'concatconv',
25 |                                  'local'],
26 |                         help='Fusion functions. Use \'local\' only with score'
27 |                         + 'fusion.')
28 |     parser.add_argument('--dataset_dir')
29 |     parser.add_argument('--predef_hp', action='store_true', help='boolean that'
30 |                         + 'specifies whether or not to use predifined'
31 |                         + 'hyperparams')
32 |     parser.add_argument('--validate', action='store_true', help='boolean that'
33 |                         + 'specifies validation mode or not')
34 |     parser.add_argument('--save_model', action='store_true',
35 |                         help='boolean that specifies whether to save model'
36 |                         + 'params')
37 |     parser.add_argument('--save_loss', action='store_true', help='boolean that'
38 |                         + 'specifies whether to save loss curves')
39 |     parser.add_argument('--early_stopping', action='store_true',
40 |                         help='boolean that specifies whether to perform early'
41 |                         + 'stopping')
42 |     parser.add_argument('--shuffle', action='store_true',
43 |                         help='boolean that specifies whether to shuffle'
44 |                         + 'training data at each epoch')
45 |     parser.add_argument('--weights_dir', help='Directory of saved weights for'
46 |                         + 'resuming training')
47 |     args = parser.parse_args()
48 |     # Depth-Net
49 |     net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters':
50 |                       (32, 32, 64, 64, 128, 128, 128, 128, 128),
51 |                       'conv_filter_size': (3,)*9,
52 |                       'conv_pad': (1,)*9,
53 |                       'num_fc_units': (4096, 4096)}
54 | 
55 |     if args.predef_hp:
56 |         opt_hp_dict = {'lr': 0.009, 'mom': 0.98}
57 |         model_hp_dict = {'p': args.p}
58 |     else:
59 |         opt_hp_dict = None
60 |         model_hp_dict = None
61 |     tr = trainingtesting.Training(args.dataset_dir, 14, 'NYU', 'train', args.net_type, 50, 5,
62 |                                   net_specs_dict, model_hp_dict=model_hp_dict,
63 |                                   opt_hp_dict=opt_hp_dict,
64 |                                   validate=args.validate,
65 |                                   input_channels=args.input_channels,
66 |                                   fusion_level=args.fusion_level,
67 |                                   fusion_type=args.fusion_type,
68 |                                   weights_dir=args.weights_dir)
69 |     training_inf = tr.train(save_model=args.save_model,
70 |                             save_loss=args.save_loss,
71 |                             early_stopping=args.early_stopping,
72 |                             shuffle=args.shuffle)
73 | 


--------------------------------------------------------------------------------
/batchgenerators.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | """
  3 | This module implements different minibatch generators, depending on the
  4 | dataset. It contains a class namely BatchGenerator which contains functions for
  5 | generating minibatches for each dataset.
  6 | """
  7 | 
  8 | 
  9 | class BatchGenerator(object):
 10 |     """
 11 |     This class handles the minibatch generators for each dataset. It contains
 12 |     the following functions:
 13 |         1) __init__: class constructor
 14 |         2) generate_batches: batch generator for NYU and ICVL datasets
 15 |         3) generate_batches_msra: batch generator for MSRA dataset
 16 |         3) minibatches: returns the correct batch generator depending on the
 17 |         dataset
 18 |     """
 19 |     # Dastasets
 20 |     MSRA = 'MSRA'
 21 |     NYU = 'NYU'
 22 |     ICVL = 'ICVL'
 23 | 
 24 |     def __init__(self, hdf5_file, dataset, group, iterable=None,
 25 |                  shuffle=False):
 26 |         """
 27 |         Class constructor. It contains the following fields:
 28 |             1) _hdf5_file: hdf5 file of the dataset
 29 |             2) _dataset: the name of the dataset(available: "MSRA", "ICVL",
 30 |             "NYU")
 31 |             3) _group: which group of the _hdf5_file while be iterated. For
 32 |             ICVL and NYU if group='train' you have also to specify
 33 |             _iterable(see below).For MSRA _group defines the subject that will
 34 |             be kept as test set.
 35 |             4) _dataset_size: the size of the dataset
 36 |             5) _iterable: iterable with ids that specify part of the group to
 37 |             be iterated(if you splitted training set to train/validation sets
 38 |             provide one iterable with the ids of the training data and one with
 39 |             ids of validation data. When group='test' leave it None)
 40 |         """
 41 |         self._hdf5_file = hdf5_file
 42 |         if dataset not in [self.MSRA, self.NYU, self.ICVL]:
 43 |             raise ValueError('dataset can take on of the following values:\
 44 |                              \'MSRA\', \'ICVL\', \'NYU\'')
 45 |         self._dataset = dataset
 46 |         self._iterable = iterable
 47 |         if group not in self._hdf5_file.keys():
 48 |             raise ValueError('group should take one of the following values:\
 49 |                              {0:s}'.format(self._hdf5_file.keys()))
 50 |         self._group = group
 51 |         if self._iterable is not None:
 52 |             self._dataset_size = self._iterable.shape[0]
 53 |         else:
 54 |             self._dataset_size = self._hdf5_file[
 55 |                 self._group]["depth_normalized"].shape[0]
 56 |         self._shuffle = shuffle
 57 | 
 58 |     def generate_batches(self, input_channels, batch_size=64):
 59 |         start_id = 0
 60 |         if self._iterable is None:
 61 |             indices = range(self._dataset_size)
 62 |         if self._shuffle:
 63 |             if self._iterable is not None:
 64 |                 np.random.shuffle(self._iterable)
 65 |             else:
 66 |                 np.random.shuffle(indices)
 67 |         while(start_id < self._dataset_size):
 68 |             if self._iterable is not None:
 69 |                 chunk = slice(start_id, start_id+batch_size)
 70 |                 chunk = self._iterable[chunk].tolist()
 71 |                 chunk.sort()
 72 |             else:
 73 |                 chunk = slice(start_id, start_id+batch_size)
 74 |                 chunk = indices[chunk]
 75 |             start_id += batch_size
 76 |             if input_channels == 1:
 77 |                 yield self._hdf5_file[self._group]["depth_normalized"][chunk],\
 78 |                     self._hdf5_file[self._group]["joints3D_normalized"][chunk]
 79 |             elif input_channels == 4:
 80 |                 yield self._hdf5_file[self._group]["rgb_normalized"][chunk],\
 81 |                     self._hdf5_file[self._group]["joints3D_normalized"][chunk]
 82 |             elif input_channels == 5:
 83 |                 yield self._hdf5_file[self._group]["rgb_normalized"][chunk],\
 84 |                     self._hdf5_file[self._group]["depth_normalized"][chunk],\
 85 |                     self._hdf5_file[self._group]["joints3D_normalized"][chunk]
 86 | 
 87 |     def generate_batches_msra_train(self, batch_size=64):
 88 | 
 89 |         groups = ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7',
 90 |                   'P8'].remove(self._group)
 91 |         for grp in groups:
 92 |             dsize = self._hdf5_file[grp]["depth_normalized"].shape[0]
 93 |             start_id = 0
 94 |             while(start_id < dsize):
 95 |                 chunk = range(start_id, start_id+batch_size)
 96 |                 start_id += batch_size
 97 |                 yield self._hdf5_file[grp]["depth_normalized"][chunk],\
 98 |                     self._hdf5_file[grp]["joints3D_normalized"][chunk]
 99 | 
100 |     def generate_batches_msra_test(self, batch_size=1):
101 |         start_id = 0
102 |         while(start_id < batch_size):
103 |             chunk = range(start_id, start_id+batch_size)
104 |             start_id += batch_size
105 |             yield self._hdf5_file[self._group]["depth_normalized"][chunk],\
106 |                 self._hdf5_file[self._group]["joints3D_normalized"][chunk]
107 | 


--------------------------------------------------------------------------------
/datasets_preprocessing/imgnormalization.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from scipy.ndimage.measurements import center_of_mass
  4 | import cv2
  5 | 
  6 | 
  7 | class ImgNormalization(object):
  8 | 	"""Preprocess hand images to center hand and normalize"""
  9 | 	def __init__(self, fx, fy, px, py, cube_size):
 10 | 
 11 | 		self.fx = fx
 12 | 		self.fy = fy
 13 | 		self.px = px
 14 | 		self.py = py
 15 | 		self.cube_size = cube_size
 16 | 
 17 | 
 18 | 	#TODO Move the following 3 functions to a different class if necessary 		
 19 | 	def depth_to_uvd(self, depth):
 20 | 
 21 | 		uvd = np.zeros((3, depth.shape[0], depth.shape[1]))
 22 | 		uv = np.mgrid[0:depth.shape[0], 0:depth.shape[1]]
 23 | 		uvd[0] = uv[1]
 24 | 		uvd[1] = uv[0]
 25 | 		uvd[2] = depth
 26 | 		return uvd
 27 | 
 28 | 	def uvd_to_xyz(self, uvd):
 29 | 
 30 | 		xyz = np.zeros(uvd.shape)
 31 | 		xyz[0] = (uvd[0] - self.px)*uvd[2]/self.fx
 32 | 		xyz[1] = (self.py - uvd[1])*uvd[2]/self.fy
 33 | 		xyz[2] = uvd[2]
 34 | 
 35 | 		return xyz
 36 | 
 37 | 	def xyz_to_uvd(self, xyz):
 38 | 
 39 | 		uvd = np.zeros(xyz.shape)
 40 | 		uvd[0] = xyz[0]*self.fx/xyz[2] + self.px
 41 | 		uvd[1] = self.py - xyz[1]*self.fy/xyz[2]
 42 | 		uvd[2] = xyz[2]
 43 | 
 44 | 		return uvd
 45 | 
 46 | 	@staticmethod
 47 | 	def calculate_com(depth_hand):
 48 | 		"""
 49 | 		Calculate the center of mass
 50 | 		:param dpt: depth image
 51 | 		:return: (x,y,z) center of mass
 52 | 		"""
 53 | 
 54 | 		dc = depth_hand.copy()
 55 | 		cc = center_of_mass(dc > 0)
 56 | 		num = np.count_nonzero(dc)
 57 | 		com = np.array((cc[1]*num, cc[0]*num, dc.sum()), np.float)
 58 | 
 59 | 		if num == 0:
 60 | 		    return np.array((0, 0, 0), np.float)
 61 | 		else:
 62 | 		    return com/num
 63 | 
 64 | 	@staticmethod
 65 | 	def transform_point_2D(pt, M):
 66 | 		"""
 67 | 		Transform point in 2D coordinates
 68 | 		:param pt: point coordinates
 69 | 		:param M: transformation matrix
 70 | 		:return: transformed point
 71 | 		"""
 72 | 		pt2 = np.asmatrix(M.reshape((3, 3))) * np.matrix([pt[0], pt[1], 1]).T
 73 | 		return np.array([pt2[0] / pt2[2], pt2[1] / pt2[2]])
 74 | 
 75 | 	def ptcl_normalization(self, depth, com3D):
 76 | 		"""
 77 | 		Center point cloud to 0 and normalize it to [-1, 1]
 78 | 
 79 | 		Keyword arguments: 
 80 | 		depth -- depth image (the initial before cropping)
 81 | 		com3D -- center of mass in 3D
 82 | 		cube_size -- size of the cube that used to crop hand area (default 250)
 83 | 
 84 | 		Return:
 85 | 		ptcl_normalized -- point cloud centered to 0 and normalized to [-1, 1]
 86 | 		"""
 87 | 
 88 | 		pcl_uvd = self.depth_to_uvd(depth)
 89 | 		pcl_xyz = self.uvd_to_xyz(pcl_uvd)
 90 | 		indr,indc = np.nonzero(pcl_xyz[2])
 91 | 
 92 | 		ptcl_normalized = np.vstack((pcl_xyz[0,indr,indc],pcl_xyz[1,indr,indc],pcl_xyz[2,indr,indc]))
 93 | 
 94 | 		ptcl_normalized[0]-=com3D[0]
 95 | 		ptcl_normalized[1]-=com3D[1]
 96 | 		ptcl_normalized[2]-=com3D[2]
 97 | 		ptcl_normalized /= self.cube_size / 2
 98 | 
 99 | 		return ptcl_normalized
100 | 
101 | 	def joints3D_depth_normalization(self, joints3D, depth, com3D):
102 | 		"""
103 |         Center depth and joints in 3D to 0 and normalize it to [-1, 1].
104 |         
105 |         Keyword arguments:
106 |         joints3D -- joints in 3D
107 |         com3D -- center of mass in 3D
108 | 		depth -- depth image that has been croped and scaled
109 | 
110 |         Return:
111 |         joints3D_normalized -- joints in 3D centered to 0 and normalized to [-1, 1]
112 |         depth_normalized -- depth centered to 0 and normalized to [-1, 1]
113 |         """
114 | 
115 | 		joints3D_normalized = np.clip((joints3D - com3D[:,None]) / (self.cube_size / 2), -1, 1)
116 | 		depth[depth == 0.] = com3D[2] + self.cube_size / 2.
117 | 		depth -= com3D[2]
118 | 		depth_normalized = depth / (self.cube_size / 2)
119 | 
120 | 		return joints3D_normalized, depth_normalized
121 | 
122 | 	# def getNDValue(self):
123 | 	# 	"""
124 |  #        Get value of not defined depth value distances
125 |  #        :return:value of not defined depth value
126 |  #        """
127 | 	# 	if self.depth[self.depth < self.minDepth].shape[0] > self.depth[self.depth > self.maxDepth].shape[0]:
128 | 	# 		return stats.mode(self.depth[self.depth < self.minDepth])[0][0]
129 | 	# 	else:
130 | 	# 		return stats.mode(self.depth[self.depth > self.maxDepth])[0][0]
131 | 
132 | 	def crop_scale_depth(self, depth, com, dsize=(128, 128)):
133 | 		"""
134 | 		Crops depth image using 3D bounding box centered at the CoM of hand
135 | 		and then resize it to a 128x128 image
136 | 		:param depth: depth image
137 | 		:param com: center of mass of hand
138 | 		:param size: size of 3D bounding box
139 | 		:param dsize: size of the scaled image
140 | 		:return: cropped and resized image and transformation matrix for joints 
141 | 
142 | 		"""
143 | 		maxDepth = min(1500, depth.max())
144 | 		minDepth = max(10, depth.min())
145 |         # set values out of range to 0
146 | 		depth[depth > maxDepth] = 0.
147 | 		depth[depth < minDepth] = 0.
148 | 		
149 | 		# calculate boundaries
150 | 		zstart = com[2] - self.cube_size / 2.
151 | 		zend = com[2] + self.cube_size / 2.
152 | 		xstart = int(math.floor((com[0] * com[2] / self.fx - self.cube_size / 2.) / com[2]*self.fx))
153 | 		xend = int(math.floor((com[0] * com[2] / self.fx + self.cube_size / 2.) / com[2]*self.fx))
154 | 		ystart = int(math.floor((com[1] * com[2] / self.fy - self.cube_size / 2.) / com[2]*self.fy))
155 | 		yend = int(math.floor((com[1] * com[2] / self.fy + self.cube_size / 2.) / com[2]*self.fy))
156 | 
157 | 		# crop patch from source
158 | 		cropped = depth[max(ystart, 0):min(yend, depth.shape[0]), max(xstart, 0):min(xend, depth.shape[1])].copy()
159 | 		# add pixels that are out of the image in order to keep aspect ratio
160 | 		cropped = np.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, depth.shape[0])), 
161 | 		(abs(xstart)-max(xstart, 0),abs(xend)-min(xend, depth.shape[1]))), mode='constant', constant_values=0)
162 | 		msk1 = np.bitwise_and(cropped < zstart, cropped != 0)
163 | 		msk2 = np.bitwise_and(cropped > zend, cropped != 0)
164 | 		cropped[msk1] = zstart
165 | 		cropped[msk2] = 0.
166 | 
167 | 		wb = (xend - xstart)
168 | 		hb = (yend - ystart)
169 | 
170 | 		trans = np.asmatrix(np.eye(3, dtype=float))
171 | 		trans[0, 2] = -xstart
172 | 		trans[1, 2] = -ystart
173 | 
174 | 		if wb > hb:
175 | 			sz = (dsize[0], hb * dsize[0] / wb)
176 | 		else:
177 | 			sz = (wb * dsize[1] / hb, dsize[1])
178 | 
179 | 		roi = cropped
180 | 
181 | 		if roi.shape[0] > roi.shape[1]:
182 |  			scale = np.asmatrix(np.eye(3, dtype=float) * sz[1] / float(roi.shape[0]))
183 | 		else:
184 |  			scale = np.asmatrix(np.eye(3, dtype=float) * sz[0] / float(roi.shape[1]))
185 | 		scale[2, 2] = 1
186 | 
187 | 		rz = cv2.resize(roi, sz, interpolation=cv2.INTER_NEAREST)
188 | 
189 | 		ret = np.ones(dsize, np.float) * zend  # use background as filler
190 | 		xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2))
191 | 		xend = int(xstart + rz.shape[1])
192 | 		ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2))
193 | 		yend = int(ystart + rz.shape[0])
194 | 		ret[ystart:yend, xstart:xend] = rz
195 | 
196 | 		off = np.asmatrix(np.eye(3, dtype=float))
197 | 		off[0, 2] = xstart
198 | 		off[1, 2] = ystart
199 | 
200 | 		return ret, off * scale * trans
201 | 
202 | 	def crop_scale_rgb(self, rgb, depth, com, dsize=(128, 128, 3)):
203 | 		"""
204 | 		Crops depth image using 3D bounding box centered at the CoM of hand
205 | 		and then resize it to a 128x128 image
206 | 		:param depth: depth image
207 | 		:param com: center of mass of hand
208 | 		:param size: size of 3D bounding box
209 | 		:param dsize: size of the scaled image
210 | 		:return: cropped and resized image and transformation matrix for joints 
211 | 
212 | 		"""
213 | 
214 | 		# calculate boundaries
215 | 		xstart = int(math.floor((com[0] * com[2] / self.fx - self.cube_size / 2.) / com[2]*self.fx))
216 | 		xend = int(math.floor((com[0] * com[2] / self.fx + self.cube_size / 2.) / com[2]*self.fx))
217 | 		ystart = int(math.floor((com[1] * com[2] / self.fy - self.cube_size / 2.) / com[2]*self.fy))
218 | 		yend = int(math.floor((com[1] * com[2] / self.fy + self.cube_size / 2.) / com[2]*self.fy))
219 | 
220 | 		# crop patch from source
221 | 		cropped = rgb[max(ystart, 0):min(yend, rgb.shape[0]), max(xstart, 0):min(xend, rgb.shape[1])].copy()
222 | 
223 | 		# add pixels that are out of the image in order to keep aspect ratio
224 | 		cropped = np.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, rgb.shape[0])), 
225 | 		(abs(xstart)-max(xstart, 0),abs(xend)-min(xend, rgb.shape[1])), (0,0)), mode='constant', constant_values=0)
226 | 
227 | 
228 | 		wb = (xend - xstart)
229 | 		hb = (yend - ystart)
230 | 
231 | 		if wb > hb:
232 | 			sz = (dsize[0], hb * dsize[0] / wb)
233 | 		else:
234 | 			sz = (wb * dsize[1] / hb, dsize[1])
235 | 
236 | 		roi = cropped
237 | 		rz = cv2.resize(roi, sz)
238 | 
239 | 		ret = np.zeros(dsize, np.uint8) 
240 | 		xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2))
241 | 		xend = int(xstart + rz.shape[1])
242 | 		ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2))
243 | 		yend = int(ystart + rz.shape[0])
244 | 		ret[ystart:yend, xstart:xend, :] = rz
245 | 		msk = np.bitwise_not(np.bitwise_or(depth==1., depth==-1.))
246 | 		return ret, msk


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains definitions for different network architectures.
  3 | """
  4 | from collections import OrderedDict
  5 | from lasagne.layers import InputLayer, Conv2DLayer, MaxPool2DLayer, DenseLayer, dropout, ElemwiseMergeLayer, concat, reshape, Conv1DLayer, ElemwiseSumLayer
  6 | # from lasagne.layers import LocallyConnected2DLayer
  7 | import lasagne.nonlinearities
  8 | import lasagne
  9 | import theano.tensor as T
 10 | from saveloadweights import LoadWeights
 11 | 
 12 | 
 13 | class ConvNet(object):
 14 |     """
 15 |     This class contains all the necessary information for creating a
 16 |     network(such as number of layers and number of filters per layer), as well
 17 |     as functions that define different networks.
 18 |     """
 19 |     CONCAT = 'concat'
 20 |     CONCATCONV = 'concatconv'
 21 |     SUM = 'sum'
 22 |     MAX = 'max'
 23 |     LOCAL = 'local'
 24 | 
 25 |     def __init__(self, net_specs_dict, model_hp_dict, num_joints):
 26 | 
 27 |         self._net_specs_dict = net_specs_dict
 28 |         self._model_hp_dict = model_hp_dict
 29 |         self._num_joints = num_joints
 30 | 
 31 |     def simple_convnet(self, input_channels, input_var=None,
 32 |                        bottleneck_W=None):
 33 |         """
 34 |         This is a classical convnet. It contains convolution and
 35 |         fully-connected(fc) layers.
 36 | 
 37 |         Keyword arguments:
 38 |         input_var -- theano variable that specifies the type and dimension of
 39 |         the input(default None)
 40 | 
 41 |         Return:
 42 |         net -- dictionary that contains all the network layers
 43 |         """
 44 |         net = OrderedDict()
 45 |         net['input'] = InputLayer((None, input_channels, 128, 128),
 46 |                                   input_var=input_var)
 47 |         layer = 0
 48 |         for i in range(self._net_specs_dict['num_conv_layers']):
 49 |             # Add convolution layers
 50 |             net['conv{0:d}'.format(i+1)] = Conv2DLayer(
 51 |                 net.values()[layer],
 52 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
 53 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
 54 |                 pad='same')
 55 |             layer += 1
 56 |             if self._net_specs_dict['num_conv_layers'] <= 2:
 57 |                 # Add pooling layers
 58 |                 net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
 59 |                     net.values()[layer], pool_size=(3, 3))
 60 |                 layer += 1
 61 |             else:
 62 |                 if i < 4:
 63 |                     if (i+1) % 2 == 0:
 64 |                         # Add pooling layers
 65 |                         net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
 66 |                             net.values()[layer], pool_size=(3, 3))
 67 |                         layer += 1
 68 |                 else:
 69 |                     if (i+1) == 7:
 70 |                         # Add pooling layers
 71 |                         net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
 72 |                             net.values()[layer], pool_size=(3, 3))
 73 |                         layer += 1
 74 | 
 75 |         # Add fc-layers
 76 |         net['fc1'] = DenseLayer(
 77 |             net.values()[layer],
 78 |             self._net_specs_dict['num_fc_units'][0])
 79 |         # Add dropout layer
 80 |         net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p'])
 81 |         net['fc2'] = DenseLayer(
 82 |             net['dropout1'], self._net_specs_dict['num_fc_units'][1])
 83 |         # Add dropout layer
 84 |         net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
 85 |         if bottleneck_W is not None:
 86 |             # Add bottleneck layer
 87 |             net['bottleneck'] = DenseLayer(net['dropout2'], 30)
 88 |             # Add output layer(linear activation because it's regression)
 89 |             net['output'] = DenseLayer(
 90 |                 net['bottleneck'], 3*self._num_joints,
 91 |                 W=bottleneck_W[0:30],
 92 |                 nonlinearity=lasagne.nonlinearities.tanh)
 93 |         else:
 94 |             # Add output layer(linear activation because it's regression)
 95 |             net['output'] = DenseLayer(
 96 |                 net['dropout2'], 3*self._num_joints,
 97 |                 nonlinearity=lasagne.nonlinearities.tanh)
 98 |         return net
 99 | 
100 |     def input_fused_convnets(self, fusion_type, input_var1=None,
101 |                              input_var2=None, bottleneck_W=None):
102 |         net = OrderedDict()
103 |         net['input_rgb'] = InputLayer((None, 4, 128, 128),
104 |                                       input_var=input_var1)
105 |         layer = 0
106 |         net['input_depth'] = InputLayer((None, 1, 128, 128),
107 |                                         input_var=input_var2)
108 |         layer += 1
109 | 
110 |         if fusion_type == self.CONCAT:
111 |             net['merge'] = concat([net['input_rgb'],
112 |                                    net['input_depth']]
113 |                                   )
114 |             layer += 1
115 |         elif fusion_type == self.CONCATCONV:
116 |             net['concat'] = concat(
117 |                 [net['input_rgb'], net['input_depth']])
118 |             layer += 1
119 |             net['merge'] = Conv2DLayer(net['concat'],
120 |                                        num_filters=1,
121 |                                        filter_size=(1, 1), nonlinearity=None)
122 |             layer += 1
123 | 
124 |         for i in range(self._net_specs_dict['num_conv_layers']):
125 |             # Add convolution layers
126 |             net['conv{0:d}'.format(i+1)] = Conv2DLayer(
127 |                 net.values()[layer],
128 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
129 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
130 |                 pad='same')
131 |             layer += 1
132 |             if self._net_specs_dict['num_conv_layers'] <= 2:
133 |                 # Add pooling layers
134 |                 net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
135 |                     net.values()[layer], pool_size=(3, 3))
136 |                 layer += 1
137 |             else:
138 |                 if i < 4:
139 |                     if (i+1) % 2 == 0:
140 |                         # Add pooling layers
141 |                         net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
142 |                             net.values()[layer], pool_size=(3, 3))
143 |                         layer += 1
144 |                 else:
145 |                     if (i+1) == 7:
146 |                         # Add pooling layers
147 |                         net['pool{0:d}'.format(i+1)] = MaxPool2DLayer(
148 |                             net.values()[layer], pool_size=(3, 3))
149 |                         layer += 1
150 | 
151 |         # Add fc-layers
152 |         net['fc1'] = DenseLayer(
153 |             net.values()[layer],
154 |             self._net_specs_dict['num_fc_units'][0])
155 |         # Add dropout layer
156 |         net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p'])
157 |         net['fc2'] = DenseLayer(
158 |             net['dropout1'], self._net_specs_dict['num_fc_units'][1])
159 |         # Add dropout layer
160 |         net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
161 |         if bottleneck_W is not None:
162 |             # Add bottleneck layer
163 |             net['bottleneck'] = DenseLayer(net['dropout2'], 30)
164 |             # Add output layer(linear activation because it's regression)
165 |             net['output'] = DenseLayer(
166 |                 net['bottleneck'], 3*self._num_joints,
167 |                 W=bottleneck_W[0:30],
168 |                 nonlinearity=lasagne.nonlinearities.tanh)
169 |         else:
170 |             # Add output layer(linear activation because it's regression)
171 |             net['output'] = DenseLayer(
172 |                 net['dropout2'], 3*self._num_joints,
173 |                 nonlinearity=lasagne.nonlinearities.tanh)
174 |         return net
175 | 
176 |     def dense_fused_convnets(self, fusion_level, fusion_type, input_var1=None,
177 |                              input_var2=None, bottleneck_W=None,
178 |                              weights_dir=None):
179 | 
180 |         net = OrderedDict()
181 |         net['input_rgb'] = InputLayer((None, 4, 128, 128),
182 |                                       input_var=input_var1)
183 |         layer = 0
184 |         for i in range(self._net_specs_dict['num_conv_layers']):
185 |             # Add convolution layers
186 |             net['conv_rgb{0:d}'.format(i+1)] = Conv2DLayer(
187 |                 net.values()[layer],
188 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
189 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
190 |                 pad='same')
191 |             layer += 1
192 |             if self._net_specs_dict['num_conv_layers'] <= 2:
193 |                 # Add pooling layers
194 |                 net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
195 |                     net.values()[layer], pool_size=(3, 3))
196 |                 layer += 1
197 |             else:
198 |                 if i < 4:
199 |                     if (i+1) % 2 == 0:
200 |                         # Add pooling layers
201 |                         net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
202 |                             net.values()[layer], pool_size=(3, 3))
203 |                         layer += 1
204 |                 else:
205 |                     if (i+1) == 7:
206 |                         # Add pooling layers
207 |                         net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
208 |                             net.values()[layer], pool_size=(3, 3))
209 |                         layer += 1
210 |         # Fc-layers
211 |         net['fc1_rgb'] = DenseLayer(
212 |             net.values()[layer],
213 |             self._net_specs_dict['num_fc_units'][0])
214 |         layer += 1
215 |         if fusion_level == 2:
216 |             # Add dropout layer
217 |             net['dropout1_rgb'] = dropout(net['fc1_rgb'],
218 |                                           p=self._model_hp_dict['p'])
219 |             layer += 1
220 |             net['fc2_rgb'] = DenseLayer(
221 |                 net['dropout1_rgb'], self._net_specs_dict['num_fc_units'][1])
222 |             layer += 1
223 | 
224 |         net['input_depth'] = InputLayer((None, 1, 128, 128),
225 |                                         input_var=input_var2)
226 |         layer += 1
227 |         for i in range(self._net_specs_dict['num_conv_layers']):
228 |             # Add convolution layers
229 |             net['conv_depth{0:d}'.format(i+1)] = Conv2DLayer(
230 |                 net.values()[layer],
231 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
232 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
233 |                 pad='same')
234 |             layer += 1
235 |             if self._net_specs_dict['num_conv_layers'] <= 2:
236 |                 # Add pooling layers
237 |                 net['pool_depth{0:d}'.format(i+1)] = MaxPool2DLayer(
238 |                     net.values()[layer], pool_size=(3, 3))
239 |                 layer += 1
240 |             else:
241 |                 if i < 4:
242 |                     if (i+1) % 2 == 0:
243 |                         # Add pooling layers
244 |                         net['pool_depth{0:d}'.format(i+1)] =\
245 |                             MaxPool2DLayer(net.values()[layer],
246 |                                            pool_size=(3, 3))
247 |                         layer += 1
248 |                 else:
249 |                     if (i+1) == 7:
250 |                         # Add pooling layers
251 |                         net['pool_depth{0:d}'.format(i+1)] =\
252 |                             MaxPool2DLayer(net.values()[layer],
253 |                                            pool_size=(3, 3))
254 |                         layer += 1
255 |         # Fc-layers
256 |         net['fc1_depth'] = DenseLayer(
257 |             net.values()[layer],
258 |             self._net_specs_dict['num_fc_units'][0])
259 |         layer += 1
260 |         if fusion_level == 2:
261 |             # Add dropout layer
262 |             net['dropout1_depth'] = dropout(net['fc1_depth'],
263 |                                             p=self._model_hp_dict['p'])
264 |             layer += 1
265 |             net['fc2_depth'] = DenseLayer(
266 |                 net['dropout1_depth'], self._net_specs_dict['num_fc_units'][1])
267 |             layer += 1
268 | 
269 |         # Fuse ConvNets by fusion_level and fusion_type
270 |         if fusion_type == self.MAX:
271 |             net['merge'] =\
272 |                 ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level],
273 |                                     net['fc%i_depth' % fusion_level]],
274 |                                    T.maximum)
275 |             layer += 1
276 |         elif fusion_type == self.SUM:
277 |             net['merge'] =\
278 |                 ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level],
279 |                                     net['fc%i_depth' % fusion_level]],
280 |                                    T.add)
281 |             layer += 1
282 |         elif fusion_type == self.CONCAT:
283 |             net['merge'] = concat([net['fc%i_rgb' % fusion_level],
284 |                                    net['fc%i_depth' % fusion_level]])
285 |             layer += 1
286 |         elif fusion_type == self.CONCATCONV:
287 |             net['fc%i_rgb_res' % fusion_level] =\
288 |                 reshape(net['fc%i_rgb' % fusion_level], ([0], 1, [1]))
289 |             layer += 1
290 |             net['fc%i_depth_res' % fusion_level] =\
291 |                 reshape(net['fc%i_depth' % fusion_level], ([0], 1, [1]))
292 |             layer += 1
293 |             net['concat'] = concat([net['fc%i_rgb_res' % fusion_level],
294 |                                     net['fc%i_depth_res' % fusion_level]])
295 |             layer += 1
296 |             net['merge_con'] = Conv1DLayer(net['concat'],
297 |                                            num_filters=1,
298 |                                            filter_size=(1,),
299 |                                            nonlinearity=None)
300 |             layer += 1
301 |             net['merge'] = reshape(net['merge_con'], ([0], [2]))
302 |             layer += 1
303 | 
304 |         if fusion_level == 1:
305 |             # Add dropout layer
306 |             net['dropout1'] = dropout(net['merge'],
307 |                                       p=self._model_hp_dict['p'])
308 |             layer += 1
309 |             net['fc2'] = DenseLayer(
310 |                 net['dropout1'], self._net_specs_dict['num_fc_units'][1])
311 |             layer += 1
312 |             # Add dropout layer
313 |             net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
314 |             layer += 1
315 |         else:
316 |             # Add dropout layer
317 |             net['dropout2'] = dropout(net['merge'], p=self._model_hp_dict['p'])
318 |             layer += 1
319 |         # Add output layer(linear activation because it's regression)
320 |         if bottleneck_W is not None:
321 |             # Add bottleneck layer
322 |             net['bottleneck'] = DenseLayer(net['dropout2'], 30)
323 |             # Add output layer(linear activation because it's regression)
324 |             net['output'] = DenseLayer(
325 |                 net['bottleneck'], 3*self._num_joints,
326 |                 W=bottleneck_W[0:30],
327 |                 nonlinearity=lasagne.nonlinearities.tanh)
328 |         else:
329 |             # Add output layer(linear activation because it's regression)
330 |             net['output'] = DenseLayer(
331 |                 net['dropout2'], 3*self._num_joints,
332 |                 nonlinearity=lasagne.nonlinearities.tanh)
333 |         if weights_dir is not None:
334 |             lw = LoadWeights(weights_dir, net)
335 |             lw.load_weights_numpy()
336 |         return net
337 | 
338 |     def fused_convnets(self, fusion_level, fusion_type, input_var1=None,
339 |                        input_var2=None, bottleneck_W=None, weights_dir=None):
340 | 
341 |         net = OrderedDict()
342 |         net['input_rgb'] = InputLayer((None, 4, 128, 128),
343 |                                       input_var=input_var1)
344 |         layer = 0
345 |         for i in range(fusion_level):
346 |             # Add convolution layers
347 |             net['conv_rgb{0:d}'.format(i+1)] = Conv2DLayer(
348 |                 net.values()[layer],
349 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
350 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
351 |                 pad='same')
352 |             layer += 1
353 |             if self._net_specs_dict['num_conv_layers'] <= 2 and\
354 |                     i != fusion_level - 1:
355 |                 # Add pooling layers
356 |                 net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
357 |                     net.values()[layer], pool_size=(3, 3))
358 |                 layer += 1
359 |             else:
360 |                 if i < 4:
361 |                     if (i+1) % 2 == 0 and i != fusion_level-1:
362 |                         # Add pooling layers
363 |                         net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
364 |                             net.values()[layer], pool_size=(3, 3))
365 |                         layer += 1
366 |                 else:
367 |                     if (i+1) == 7 and i != fusion_level-1:
368 |                         # Add pooling layers
369 |                         net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer(
370 |                             net.values()[layer], pool_size=(3, 3))
371 |                         layer += 1
372 | 
373 |         net['input_depth'] = InputLayer((None, 1, 128, 128),
374 |                                         input_var=input_var2)
375 |         layer += 1
376 |         for i in range(fusion_level):
377 |             # Add convolution layers
378 |             net['conv_depth{0:d}'.format(i+1)] = Conv2DLayer(
379 |                 net.values()[layer],
380 |                 num_filters=self._net_specs_dict['num_conv_filters'][i],
381 |                 filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2,
382 |                 pad='same')
383 |             layer += 1
384 |             if self._net_specs_dict['num_conv_layers'] <= 2 and\
385 |                     i != fusion_level - 1:
386 |                 # Add pooling layers
387 |                 net['pool_depth{0:d}'.format(i+1)] = MaxPool2DLayer(
388 |                     net.values()[layer], pool_size=(3, 3))
389 |                 layer += 1
390 |             else:
391 |                 if i < 4:
392 |                     if (i+1) % 2 == 0 and i != fusion_level-1:
393 |                         # Add pooling layers
394 |                         net['pool_depth{0:d}'.format(i+1)] =\
395 |                             MaxPool2DLayer(net.values()[layer],
396 |                                            pool_size=(3, 3))
397 |                         layer += 1
398 |                 else:
399 |                     if (i+1) == 7 and i != fusion_level-1:
400 |                         # Add pooling layers
401 |                         net['pool_depth{0:d}'.format(i+1)] =\
402 |                             MaxPool2DLayer(net.values()[layer],
403 |                                            pool_size=(3, 3))
404 |                         layer += 1
405 |         # Fuse ConvNets by fusion_level and fusion_type
406 |         if fusion_type == self.MAX:
407 |             net['merge'] =\
408 |                 ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)],
409 |                                     net['conv_depth{0:d}'.format(fusion_level)]
410 |                                     ], T.maximum)
411 |             layer += 1
412 |         elif fusion_type == self.SUM:
413 |             net['merge'] =\
414 |                 ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)],
415 |                                     net['conv_depth{0:d}'.format(fusion_level)]
416 |                                     ], T.add)
417 |             layer += 1
418 |         elif fusion_type == self.CONCAT:
419 |             net['merge'] = concat([net['conv_rgb{0:d}'.format(fusion_level)],
420 |                                    net['conv_depth{0:d}'.format(fusion_level)]]
421 |                                   )
422 |             layer += 1
423 |         elif fusion_type == self.CONCATCONV:
424 |             net['concat'] = concat(
425 |                 [net['conv_rgb{0:d}'.format(fusion_level)],
426 |                  net['conv_depth{0:d}'.format(fusion_level)]])
427 |             layer += 1
428 |             net['merge'] = Conv2DLayer(net['concat'],
429 |                                        num_filters=self._net_specs_dict[
430 |                                        'num_conv_filters'][fusion_level-1],
431 |                                        filter_size=(1, 1), nonlinearity=None)
432 |             layer += 1
433 |         # Max-pooling to the merged
434 |         if fusion_level in [2, 4, 7]:
435 |             net['pool_merged'] = MaxPool2DLayer(net['merge'], pool_size=(3, 3))
436 |             layer += 1
437 |         # Continue the rest of the convolutional part of the network,
438 |         # if the fusion took place before the last convolutional layer,
439 |         # else just connect the convolutional part with the fully connected
440 |         # part
441 |         if self._net_specs_dict['num_conv_layers'] > fusion_level:
442 |             for i in range(fusion_level,
443 |                            self._net_specs_dict['num_conv_layers']):
444 |                 # Add convolution layers
445 |                 net['conv_merged{0:d}'.format(i+1)] = Conv2DLayer(
446 |                     net.values()[layer],
447 |                     num_filters=self._net_specs_dict['num_conv_filters'][i],
448 |                     filter_size=(self._net_specs_dict['conv_filter_size'][i],)
449 |                     * 2, pad='same')
450 |                 layer += 1
451 |                 if self._net_specs_dict['num_conv_layers'] <= 2:
452 |                     # Add pooling layers
453 |                     net['pool_merged{0:d}'.format(i+1)] = MaxPool2DLayer(
454 |                         net.values()[layer], pool_size=(3, 3))
455 |                     layer += 1
456 |                 else:
457 |                     if i < 4:
458 |                         if (i+1) % 2 == 0:
459 |                             # Add pooling layers
460 |                             net['pool_merged{0:d}'.format(i+1)] =\
461 |                                 MaxPool2DLayer(net.values()[layer],
462 |                                                pool_size=(3, 3))
463 |                             layer += 1
464 |                     else:
465 |                         if (i+1) == 7:
466 |                             # Add pooling layers
467 |                             net['pool_merged{0:d}'.format(i+1)] =\
468 |                                 MaxPool2DLayer(net.values()[layer],
469 |                                                pool_size=(3, 3))
470 |                             layer += 1
471 |         # Fc-layers
472 |         net['fc1'] = DenseLayer(
473 |             net.values()[layer],
474 |             self._net_specs_dict['num_fc_units'][0])
475 |         # Add dropout layer
476 |         net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p'])
477 |         net['fc2'] = DenseLayer(
478 |             net['dropout1'], self._net_specs_dict['num_fc_units'][1])
479 |         # Add dropout layer
480 |         net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p'])
481 |         if bottleneck_W is not None:
482 |             # Add bottleneck layer
483 |             net['bottleneck'] = DenseLayer(net['dropout2'], 30)
484 |             # Add output layer(linear activation because it's regression)
485 |             net['output'] = DenseLayer(
486 |                 net['bottleneck'], 3*self._num_joints,
487 |                 W=bottleneck_W[0:30],
488 |                 nonlinearity=lasagne.nonlinearities.tanh)
489 |         else:
490 |             # Add output layer(linear activation because it's regression)
491 |             net['output'] = DenseLayer(
492 |                 net['dropout2'], 3*self._num_joints,
493 |                 nonlinearity=lasagne.nonlinearities.tanh)
494 |         if weights_dir is not None:
495 |             lw = LoadWeights(weights_dir, net)
496 |             lw.load_weights_numpy()
497 |         return net
498 | 
499 |     def score_fused_convnets(self, fusion_type, input_var1=None,
500 |                              input_var2=None, weights_dir_depth=None,
501 |                              weights_dir_rgb=None, bottleneck_W=None,
502 |                              weights_dir=None):
503 | 
504 |         net = OrderedDict()
505 |         rgb_net = self.simple_convnet(4, input_var=input_var1,
506 |                                       bottleneck_W=bottleneck_W)
507 |         depth_net = self.simple_convnet(1, input_var=input_var2,
508 |                                         bottleneck_W=bottleneck_W)
509 |         if weights_dir_depth is not None and weights_dir_rgb is not None:
510 |             lw_depth = LoadWeights(weights_dir_depth, depth_net)
511 |             lw_depth.load_weights_numpy()
512 |             lw_rgb = LoadWeights(weights_dir_rgb, rgb_net)
513 |             lw_rgb.load_weights_numpy()
514 |         if fusion_type == self.LOCAL:
515 |             net['reshape_depth'] = reshape(depth_net['output'],
516 |                                            ([0], 1, 1, [1]))
517 |             net['reshape_rgb'] = reshape(rgb_net['output'],
518 |                                          ([0], 1, 1, [1]))
519 |             net['concat'] = concat([net['reshape_depth'], net['reshape_rgb']])
520 |             net['lcl'] = LocallyConnected2DLayer(net['concat'], 1, (1, 1),
521 |                                                  untie_biases=True,
522 |                                                  nonlinearity=None)
523 |             net['output'] = reshape(net['lcl'], ([0], [3]))
524 |         elif fusion_type == self.SUM:
525 |             net['output'] = ElemwiseSumLayer([depth_net['output'],
526 |                                                rgb_net['output']], coeffs=0.5)
527 | 
528 |         if weights_dir is not None:
529 |             lw = LoadWeights(weights_dir, net)
530 |             lw.load_weights_numpy()
531 |         return net
532 | 


--------------------------------------------------------------------------------
/datasets_preprocessing/datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reads datasets and save normalized images and annotations to HDF5
  3 | """
  4 | 
  5 | from collections import OrderedDict
  6 | import os
  7 | import struct
  8 | import abc
  9 | import numpy as np
 10 | from scipy import misc
 11 | import imageio
 12 | import scipy.io as sio
 13 | import h5py
 14 | import cv2
 15 | from imgnormalization import ImgNormalization
 16 | 
 17 | 
 18 | """
 19 | superclass: Dataset
 20 | sublasses: one for each dataset
 21 | 
 22 | functions::
 23 | 1. save to hdf5(superclass)
 24 | 2. read from files and folders and load to numpys (return img)(subclass) 
 25 | 3. normalize img and joints in both uvd and xyz(subclass) (returns everything that will be saved e.g. depth,com,joints)
 26 | 4. script tha combines everything and finally saves in hdf5(subclass)
 27 | """
 28 | #maybe move here xyz_to_uvd blablabla
 29 | #in the subclasses you will put the configurations of the datasets (e.g. number of joints, which joints, focal legths etc)
 30 | 
 31 | 
 32 | class Dataset(object):
 33 | 
 34 |     __metaclass__ = abc.ABCMeta
 35 | 
 36 |     def __init__(self, fx, fy, px, py, joints_num, groups_list):
 37 | 
 38 |         self.fx = fx
 39 |         self.fy = fy
 40 |         self.px = px
 41 |         self.py = py
 42 |         self.joints_num = joints_num
 43 |         self.groups_list = groups_list
 44 |         self.dataset_size = self._get_dataset_size()
 45 |         self._in = ImgNormalization(self.fx, self.fy, self.px, self.py, 250.)
 46 | 
 47 |     @abc.abstractmethod
 48 |     def _get_dataset_size(self):
 49 |         """ 
 50 |         Abstract class for computing the dataset size. 
 51 |         Different implementation in each dataset 
 52 |         """
 53 | 
 54 |     def initialize_hdf5(self, f):
 55 |         """
 56 |         Initializes the dataset structure in HDF5 format
 57 | 
 58 |         Keyword arguments:
 59 |         f -- HDF5 file(already open)
 60 | 
 61 |         Return:
 62 |         dset -- object for accessing dataset attributes
 63 |         """
 64 |         grp = {}
 65 |         dset = {}
 66 |         for g in self.groups_list:
 67 |             grp[g] = f.create_group(g)
 68 |             dset[g] = {}
 69 |         
 70 |         for group in grp.keys():
 71 |             dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32)
 72 |             dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32)
 73 |             dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32)
 74 |             dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
 75 |             dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
 76 |             dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
 77 |             dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
 78 |             dset[group]["path"] = grp[group].create_dataset("path", (self.dataset_size[group],), dtype = "S72")
 79 | 
 80 |         return dset
 81 |     @staticmethod
 82 |     def save_hdf5(dset, group, index, depth_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M, path):
 83 | 
 84 |         dset[group]["depth_normalized"][index] = depth_normalized
 85 |         dset[group]["com3D"][index] = com3D
 86 |         dset[group]["T"][index] = M
 87 |         dset[group]["joints"][index] = joints
 88 |         dset[group]["joints_normalized"][index] = joints_normalized
 89 |         dset[group]["joints3D"][index] = joints3D
 90 |         dset[group]["joints3D_normalized"][index] = joints3D_normalized
 91 |         dset[group]["path"][index] = path
 92 | 
 93 |     def transfrom_joints(self, joints, M):
 94 |         joints_normalized = np.zeros(joints.shape)
 95 | 
 96 |         for joint in range(joints.shape[1]):
 97 |             t = self._in.transform_point_2D( joints[:,joint], M)
 98 |             joints_normalized[0,joint] = t[0]
 99 |             joints_normalized[1,joint] = t[1]
100 |             joints_normalized[2,joint] = joints[2,joint]
101 | 
102 |         return joints_normalized
103 | 
104 | class MSRA_Dataset(Dataset):
105 | 
106 |     def __init__(self, path, save_dir, group_subjects):
107 | 
108 |         if not (os.path.exists(path)):
109 |             raise OSError("Directory doesn't exist")
110 |         self.path = path
111 |         self.save_dir = save_dir
112 |         if not isinstance(group_subjects, bool):
113 |             raise TypeError('group_subjects should be a boolean')
114 |         self.group_subjects = group_subjects
115 |         if self.group_subjects:
116 |             groups_list = ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8']
117 |         else:
118 |             groups_list = ['train']
119 |         super(MSRA_Dataset, self).__init__(241.42, 241.42, 160., 120., 21, groups_list)
120 | 
121 |     def load_image(self, img_dir):
122 |         """
123 |         Loads depth image from binary file. Binary file contains only the bounding box
124 |         of hand as well as its coordinates(img_width, img_height, left, top, right, bottom).
125 | 
126 |         Keyword arguments:
127 |         img_dir -- the directory of the binary file containing the depth image        
128 | 
129 |         Return:
130 |         depth -- the depth image
131 |         """
132 |         with open(img_dir,'rb') as f:
133 | 
134 |             bbox_bin = f.read(24) #read first 6 uint32, unit32 = 4bytes, so 4*6 = 24 bytes 
135 |             bbox = struct.unpack('IIIIII',bbox_bin)
136 |             
137 |             f.seek(24) #move to the position of the 7th digit
138 |             img_bin = f.read((bbox[4] - bbox[2])*(bbox[5] - bbox[3])*4)
139 |             img = struct.unpack('f'*(bbox[4] - bbox[2])*(bbox[5] - bbox[3]), img_bin)
140 |             img = np.array(img)
141 |             img = np.reshape(img, (bbox[5] - bbox[3], bbox[4] - bbox[2]))
142 |         depth = np.zeros((240,320)) # create an image filled with background
143 |         depth[bbox[3]:bbox[5],bbox[2]:bbox[4]] = img
144 |         depth_copy = depth.copy()
145 |         depth[depth==0.] = 2000.
146 |         return depth, depth_copy
147 | 
148 |     def _get_dataset_size(self):
149 | 
150 |         if self.group_subjects:
151 |             dataset_size = OrderedDict()
152 |             for g in self.groups_list:
153 |                 dataset_size[g] = 0
154 |             for path, dirs, files in os.walk(self.path):
155 |                 if (not dirs): 
156 |                     joints_dir = os.path.join(path, 'joint.txt')
157 |                     group = os.path.basename(os.path.dirname(os.path.dirname(joints_dir)))
158 |                     with open(joints_dir, 'r') as f:
159 |                         num = f.readline()
160 |                         num = num.rstrip()
161 |                         num = int(num)
162 |                     dataset_size[group]+=num
163 |         else:
164 |             dataset_size = OrderedDict()
165 |             for g in self.groups_list:
166 |                 dataset_size[g] = 0
167 |             for path, dirs, files in os.walk(self.path):
168 |                 if (not dirs): 
169 |                     joints_dir = os.path.join(path, 'joint.txt')
170 |                     with open(joints_dir, 'r') as f:
171 |                         num = f.readline()
172 |                         num = num.rstrip()
173 |                         num = int(num)
174 |                     dataset_size['train']+=num
175 |         return dataset_size
176 | 
177 |     def initialize_hdf5(self, f):
178 |         """
179 |         Initializes the dataset structure in HDF5 format
180 | 
181 |         Keyword arguments:
182 |         f -- HDF5 file(already open)
183 | 
184 |         Return:
185 |         dset -- object for accessing dataset attributes
186 |         """
187 |         grp = {}
188 |         dset = {}
189 |         for g in self.groups_list:
190 |             grp[g] = f.create_group(g)
191 |             dset[g] = {}
192 |         
193 |         for group in grp.keys():
194 |             dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32)
195 |             dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32)
196 |             dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32)
197 |             dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
198 |             dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
199 |             dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
200 |             dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
201 |             dset[group]["path"] = grp[group].create_dataset("path", (self.dataset_size[group],), dtype = "S72")
202 |             dset[group]["subject"] = grp[group].create_dataset("subject", (self.dataset_size[group],), dtype = "S2")
203 |         return dset
204 | 
205 |     @staticmethod
206 |     def save_hdf5(dset, group, index, depth_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M, path, subject):
207 | 
208 |         dset[group]["depth_normalized"][index] = depth_normalized
209 |         dset[group]["com3D"][index] = com3D
210 |         dset[group]["T"][index] = M
211 |         dset[group]["joints"][index] = joints
212 |         dset[group]["joints_normalized"][index] = joints_normalized
213 |         dset[group]["joints3D"][index] = joints3D
214 |         dset[group]["joints3D_normalized"][index] = joints3D_normalized
215 |         dset[group]["path"][index] = path
216 |         dset[group]["subject"][index] = subject
217 | 
218 |     def convert_to_hdf5(self):
219 |         """
220 |         Walks into directories, reads depth images and joints
221 |         and after centering and normalizing both images and joints, saving them in hdf5.
222 |         It also save in hdf5 other useful information, i.e center of mass of hand in 3D,
223 |         the transformation for joints in UVD. Joints are saved in both UVD and XYZ
224 |         (the initial and the normalized versions).
225 | 
226 |         Keyword arguments:
227 |         --
228 |         Return:
229 |         --
230 |         """
231 |         if (not os.path.exists(self.save_dir)):
232 |             os.makedirs(self.save_dir)
233 | 
234 |         f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w')
235 |         if self.group_subjects:
236 |             dset = super(MSRA_Dataset, self).initialize_hdf5(f)
237 |         else:
238 |             dset = self.initialize_hdf5(f)
239 |         index = OrderedDict()
240 |         for g in self.groups_list:
241 |             index[g] = 0
242 |         for path, dirs, files in os.walk(self.path):
243 |             if (not dirs): 
244 |                 bins = [f for f in files if f.split('.')[1] == 'bin']
245 |                 bins = sorted(bins, key = lambda fname: fname.split('_')[0])
246 |                 joints_dir = os.path.join(path, 'joint.txt')
247 |                 joints_list=[]
248 |                 with open(joints_dir,'r') as f:
249 |                     for joints_txt in f:
250 |                         joints = joints_txt.split(' ')
251 |                         joints = [float(j.rstrip()) for j in joints]
252 |                         joints_list.append(joints)
253 |                 joints3D_array = np.array(joints_list[1:len(joints_list)])
254 |                 for i, bin in enumerate(bins):
255 |                     bin_dir = os.path.join(path, bin)
256 |                     depth, depth_copy = self.load_image(bin_dir)
257 |                     com = self._in.calculate_com(depth_copy)  
258 |                     com3D = self._in.uvd_to_xyz( com )
259 |                     depth_crop_scaled, M = self._in.crop_scale_depth(depth, com)
260 |                     joints3D = np.reshape(joints3D_array[i], (self.joints_num, 3))
261 |                     joints3D = np.swapaxes(joints3D, 0, 1)
262 |                     joints3D[2]*=-1
263 |                     joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D, depth_crop_scaled, com3D)
264 |                     joints = self._in.xyz_to_uvd(joints3D)
265 |                     joints_normalized = self.transfrom_joints(joints, M)
266 |                     
267 |                     # Reshape to 3*joints_num
268 |                     joints_res = np.swapaxes(joints, 0, 1)
269 |                     joints_res = np.reshape(joints_res, (3*self.joints_num,))
270 | 
271 |                     joints3D_res = np.swapaxes(joints3D, 0, 1)
272 |                     joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,))
273 | 
274 |                     joints_norm_res = np.swapaxes(joints_normalized, 0, 1)
275 |                     joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,))
276 | 
277 |                     joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1)
278 |                     joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,))
279 | 
280 |                     group = os.path.basename(os.path.dirname(path))
281 |                     dpt = np.reshape(depth_normalized, (1, 128, 128))
282 |                     if self.group_subjects:
283 |                         super(MSRA_Dataset, self).save_hdf5(dset, group, index[group], dpt.astype(np.float32), com3D.astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), bin_dir)
284 |                         index[group]+=1
285 |                     else:
286 |                         self.save_hdf5(dset, 'train', index['train'], dpt.astype(np.float32), com3D.astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), bin_dir, group)
287 |                         index['train']+=1
288 |         f.close()
289 | 
290 | 
291 | class ICVL_Dataset(Dataset):
292 |     #TODO: Add function for selecting only the original images
293 |     def __init__(self, path, save_dir):
294 | 
295 |         if not (os.path.exists(path)):
296 |             raise OSError("Directory doesn't exist")
297 |         self.path = path
298 |         self.save_dir = save_dir
299 |         groups_list = ['train', 'test1', 'test2']
300 |         super(ICVL_Dataset, self).__init__(241.42, 241.42, 160., 120., 16, groups_list)
301 | 
302 |     def _get_dataset_size(self):
303 | 
304 |         dataset_size = OrderedDict()
305 |         for g in self.groups_list:
306 |             dataset_size[g] = 0
307 |         subdirs = ['Training/labels.txt', 'Testing/test_seq_1.txt', 'Testing/test_seq_2.txt']
308 |         for grp, dir_ in zip(dataset_size, subdirs):
309 |             labels_dir = os.path.join(self.path, dir_)
310 |             with open(labels_dir, 'r') as f:
311 |                 i=0
312 |                 for line in f:
313 |                     line_split = line.split(' ', 1)
314 |                     if not os.path.exists(os.path.join(os.path.join(os.path.join(self.path, os.path.dirname(dir_)),'Depth'), line_split[0])):
315 |                         continue
316 |                     i+=1
317 |             dataset_size[grp] = i
318 | 
319 |         return dataset_size
320 | 
321 |     def convert_to_hdf5(self):
322 | 
323 |         if (not os.path.exists(self.save_dir)):
324 |             os.makedirs(self.save_dir)
325 | 
326 |         f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w')
327 |         dset = self.initialize_hdf5(f)
328 |         subdirs = ['Training/labels.txt', 'Testing/test_seq_1.txt', 'Testing/test_seq_2.txt']
329 |         for group, dir_ in zip(self.groups_list, subdirs):
330 |             depth_dir = os.path.join(self.path, os.path.join(os.path.dirname(dir_), 'Depth'))
331 |             labels_dir = os.path.join(self.path, dir_)
332 | 
333 |             with open(labels_dir, 'r') as f:
334 |                 index=0
335 |                 for line in f:
336 |                     line_split = line.split(' ', 1)
337 |                     img_dir = os.path.join(depth_dir, line_split[0])
338 |                     if not os.path.exists(img_dir):
339 |                         continue
340 |                     img = misc.imread(img_dir)
341 |                     img = img.astype(np.float32)
342 |                     joints = line_split[1].rstrip()
343 |                     joints = joints.split(' ')
344 |                     joints = np.asarray(joints, dtype = np.float32)
345 |                     joints = np.reshape(joints, (self.joints_num, 3))
346 |                     joints = np.swapaxes(joints,0,1)
347 |                     joints3D = self._in.uvd_to_xyz(joints)
348 |                     depth, M = self._in.crop_scale_depth(img, joints[:,0])
349 |                     joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D, depth, joints3D[:,0])
350 |                     joints_normalized = self.transfrom_joints(joints, M)
351 |                     dpt = np.reshape(depth_normalized, (1, 128, 128))
352 |                     # Reshape to 3*joints_num
353 |                     joints_res = np.swapaxes(joints, 0, 1)
354 |                     joints_res = np.reshape(joints_res, (3*self.joints_num,))
355 | 
356 |                     joints3D_res = np.swapaxes(joints3D, 0, 1)
357 |                     joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,))
358 | 
359 |                     joints_norm_res = np.swapaxes(joints_normalized, 0, 1)
360 |                     joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,))
361 | 
362 |                     joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1)
363 |                     joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,))
364 | 
365 |                     self.save_hdf5(dset, group, index, dpt.astype(np.float32), joints3D[:,0].astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), img_dir)
366 |                     index+=1
367 |         f.close()
368 | 
369 | class NYU_Dataset(Dataset):
370 | 
371 |     def __init__(self, path, save_dir):
372 |         if not (os.path.exists(path)):
373 |             raise OSError("Directory doesn't exist")
374 |         self.path = path
375 |         self.save_dir = save_dir
376 |         groups_list = ['train', 'test']
377 |         super(NYU_Dataset, self).__init__(588.036865, 587.075073, 320., 240., 14, groups_list)
378 |         self.selected_joints = [32, 3, 0, 9, 6, 15, 12, 21, 18, 27, 25, 24, 30, 31]
379 |         self._in = ImgNormalization(self.fx, self.fy, self.px, self.py, 300.)
380 |         self._in1 = ImgNormalization(self.fx, self.fy, self.px, self.py, 300.*0.87)
381 |         self.subject_change = 2440
382 | 
383 |     def _get_dataset_size(self):
384 | 
385 |         dataset_size = OrderedDict()
386 |         for g in self.groups_list:
387 |             dataset_size[g] = 0
388 |         for grp in dataset_size:
389 |             dir_ = '{0:s}/{1:s}/{2:s}'.format(self.path, grp, 'joint_data.mat')
390 |             joint_data = sio.loadmat(dir_)
391 |             joints = joint_data['joint_uvd'][0]
392 |             dataset_size[grp] = joints.shape[0]
393 |         return dataset_size
394 | 
395 |     def initialize_hdf5(self, f):
396 |         """
397 |         Initializes the dataset structure in HDF5 format
398 | 
399 |         Keyword arguments:
400 |         f -- HDF5 file(already open)
401 | 
402 |         Return:
403 |         dset -- object for accessing dataset attributes
404 |         """
405 |         grp = {}
406 |         dset = {}
407 |         for g in self.groups_list:
408 |             grp[g] = f.create_group(g)
409 |             dset[g] = {}
410 |         
411 |         for group in grp.keys():
412 |             dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32)
413 |             dset[group]["rgb_normalized"] = grp[group].create_dataset("rgb_normalized", (self.dataset_size[group], 4, 128, 128), dtype = np.float32)
414 |             dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32)
415 |             dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32)
416 |             dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
417 |             dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
418 |             dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
419 |             dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32)
420 | 
421 |         return dset
422 | 
423 |     @staticmethod
424 |     def save_hdf5(dset, group, index, depth_normalized, rgb_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M):
425 | 
426 |         dset[group]["depth_normalized"][index] = depth_normalized
427 |         dset[group]["rgb_normalized"][index] = rgb_normalized
428 |         dset[group]["com3D"][index] = com3D
429 |         dset[group]["T"][index] = M
430 |         dset[group]["joints"][index] = joints
431 |         dset[group]["joints_normalized"][index] = joints_normalized
432 |         dset[group]["joints3D"][index] = joints3D
433 |         dset[group]["joints3D_normalized"][index] = joints3D_normalized
434 | 
435 |     def load_image(self, img_dir):
436 |         """
437 |         Loads depth image from binary file. Binary file contains only the bounding box
438 |         of hand as well as its coordinates(img_width, img_height, left, top, right, bottom).
439 | 
440 |         Keyword arguments:
441 |         img_dir -- the directory of the binary file containing the depth image        
442 | 
443 |         Return:
444 |         depth -- the depth image
445 |         """
446 | 
447 |         img = imageio.imread(img_dir)
448 |         _, g, b = np.split(img, 3, axis=2)
449 |         g = np.squeeze(g)
450 |         b = np.squeeze(b)
451 |         g = g.astype(np.int32)
452 |         b = b.astype(np.int32)
453 |         depth = np.bitwise_or(np.left_shift(g,8), b)
454 |         depth = depth.astype(np.float32)
455 | 
456 |         return depth
457 | 
458 |     def convert_to_hdf5(self):
459 | 
460 | 
461 |         if (not os.path.exists(self.save_dir)):
462 |             os.makedirs(self.save_dir)
463 | 
464 |         f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w')
465 |         dset = self.initialize_hdf5(f)
466 | 
467 |         for group in self.groups_list:
468 | 
469 |             labels_dir = '{0:s}/{1:s}/joint_data.mat'.format(self.path, group)
470 |             joint_data = sio.loadmat(labels_dir)
471 |             joints = joint_data['joint_uvd'][0]
472 |             joints3D = joint_data['joint_xyz'][0]
473 |             joints = joints[:, self.selected_joints, :]
474 |             joints3D = joints3D[:, self.selected_joints, :]
475 |             joints = np.swapaxes(joints, 1, 2)
476 |             joints3D = np.swapaxes(joints3D, 1, 2)
477 | 
478 |             for index in range(joints.shape[0]):
479 |                 depth_dir = '{0:s}/{1:s}/depth_1_{2:07d}.png'.format(self.path, group, index+1)
480 |                 rgb_dir = '{0:s}/{1:s}/rgb_1_{2:07d}.png'.format(self.path, group, index+1)
481 |                 depth = self.load_image(depth_dir)
482 |                 rgb = cv2.imread(rgb_dir)
483 |                 rgb = rgb[:,:,::-1]
484 |                 if group == 'test' and index >= self.subject_change:
485 |                     depth, M = self._in1.crop_scale_depth(depth, joints[index, :, 0])
486 |                     joints3D_normalized, depth_normalized = self._in1.joints3D_depth_normalization(joints3D[index, :, :], depth, joints3D[index, :, 0])
487 |                     rgb, msk = self._in1.crop_scale_rgb(rgb, depth_normalized, joints[index, :, 0])
488 |                     rgb = np.rollaxis(rgb, 2)
489 |                     rgb = rgb.astype(np.float32)
490 |                     msk = msk.astype(np.float32)
491 |                     rgb/=255.
492 |                     msk = np.reshape(msk, (1, 128, 128))
493 |                     rgb = np.vstack((rgb, msk))
494 |                 else:
495 |                     depth, M = self._in.crop_scale_depth(depth, joints[index, :, 0])                    
496 |                     joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D[index, :, :], depth, joints3D[index, :, 0])
497 |                     rgb, msk = self._in.crop_scale_rgb(rgb, depth_normalized, joints[index, :, 0])
498 |                     rgb = np.rollaxis(rgb, 2)
499 |                     rgb = rgb.astype(np.float32)
500 |                     msk = msk.astype(np.float32)
501 |                     rgb/=255.
502 |                     msk = np.reshape(msk, (1, 128, 128))
503 |                     rgb = np.vstack((rgb, msk))
504 |                 joints_normalized = self.transfrom_joints(joints[index, :, :], M)
505 |                 dpt = np.reshape(depth_normalized, (1, 128, 128))
506 |                 # Reshape to 3*joints_num
507 |                 joints_res = np.swapaxes(joints[index, :, :], 0, 1)
508 |                 joints_res = np.reshape(joints_res, (3*self.joints_num,))
509 | 
510 |                 joints3D_res = np.swapaxes(joints3D[index, :, :], 0, 1)
511 |                 joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,))
512 | 
513 |                 joints_norm_res = np.swapaxes(joints_normalized, 0, 1)
514 |                 joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,))
515 | 
516 |                 joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1)
517 |                 joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,))
518 | 
519 |                 self.save_hdf5(dset, group, index, dpt.astype(np.float32), rgb.astype(np.float32), joints3D[index, :, 0].astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32))      
520 | 
521 |         f.close()
522 | 
523 |     @staticmethod
524 |     def compute_mean_dataset(dataset_dir):
525 |         nyu_dir = os.path.join(dataset_dir, 'NYU.hdf5')
526 |         if not os.path.exists(nyu_dir):
527 |             raise IOError('{0:s} could not be found. Please enter a valid hdf5 file for NYU dataset.'.format(nyu_dir))
528 |         with h5py.File(nyu_dir, 'r') as f:
529 |             mean = np.zeros((3,))
530 |             std = np.zeros((3,))
531 |             N = 0
532 |             # Compute dataset mean
533 |             for i in xrange(f["train/rgb_normalized"].shape[0]):
534 |                 r, g, b, m = f["train/rgb_normalized"][i]
535 |                 m=m.astype(np.int)
536 |                 m=np.bitwise_not(m.astype(np.bool))
537 |                 mean[0] += np.sum(r[m])
538 |                 mean[1] += np.sum(g[m])
539 |                 mean[2] += np.sum(b[m])
540 |                 N += np.sum(m.astype(np.int))
541 | 
542 |             mean/=N
543 |             # Compute dataset standard deviation
544 |             for i in xrange(f["train/rgb_normalized"].shape[0]):
545 |                 r, g, b, m = f["train/rgb_normalized"][i]
546 |                 m=m.astype(np.int)
547 |                 m=np.bitwise_not(m.astype(np.bool))
548 |                 std[0] += np.sum(np.square(r[m]-mean[0]))
549 |                 std[1] += np.sum(np.square(g[m]-mean[1]))
550 |                 std[2] += np.sum(np.square(b[m]-mean[2]))
551 | 
552 |             std/=N-1
553 | 
554 |         np.savez(os.path.join(dataset_dir, 'mean_std.npz'), mean, std)
555 | 
556 |     @staticmethod
557 |     def normalize_dataset(dataset_dir):
558 |         mean_std_hand_dir = os.path.join(dataset_dir, 'mean_std_hand.npz')
559 |         mean_std_bg_dir = os.path.join(dataset_dir, 'mean_std_bg.npz')
560 |         if not (os.path.exists(mean_std_hand_dir)):
561 |             raise IOError('{0:s} could not be found. Please enter a valid file with the mean and the standar deviation of the dataset.'.format(mean_std_hand_dir))
562 |         if not (os.path.exists(mean_std_bg_dir)):
563 |             raise IOError('{0:s} could not be found. Please enter a valid file with the mean and the standar deviation of the dataset.'.format(mean_std_bg_dir))
564 |         mean_std_hand = np.load(os.path.join(mean_std_hand_dir))
565 |         mean_std_bg = np.load(os.path.join(mean_std_bg_dir))
566 |         mean_hand = mean_std_hand['arr_0']
567 |         std_hand = mean_std_hand['arr_1']
568 |         mean_bg = mean_std_bg['arr_0']
569 |         std_bg = mean_std_bg['arr_1']
570 |         nyu_dir = os.path.join(dataset_dir, 'NYU.hdf5')
571 |         if not os.path.exists(nyu_dir):
572 |             raise IOError('{0:s} could not be found. Please enter a valid hdf5 file for NYU dataset.'.format(nyu_dir))
573 |         with h5py.File(nyu_dir, 'r+') as f:
574 |             for i in xrange(f["train/rgb_normalized"].shape[0]):
575 |                 rgb = np.rollaxis(f["train/rgb_normalized"][i], 0, start=3)
576 |                 rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] = (rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] - mean_hand.astype(np.float32)) / np.sqrt(std_hand.astype(np.float32))
577 |                 rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] = (rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] - mean_bg.astype(np.float32)) / np.sqrt(std_bg.astype(np.float32))
578 |                 f["train/rgb_normalized"][i] = np.rollaxis(rgb, 2)
579 |             for i in xrange(f["test/rgb_normalized"].shape[0]):
580 |                 rgb = np.rollaxis(f["test/rgb_normalized"][i], 0, start=3)
581 |                 rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] = (rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] - mean_hand.astype(np.float32)) / np.sqrt(std_hand.astype(np.float32))
582 |                 rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] = (rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] - mean_bg.astype(np.float32)) / np.sqrt(std_bg.astype(np.float32))
583 |                 f["test/rgb_normalized"][i] = np.rollaxis(rgb, 2)


--------------------------------------------------------------------------------
/trainingtesting.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements the training and testing procedures of the ConvNet. They
  3 | are implemented in two different classes: Training and Test class. Each class
  4 | implements all the neccessary tools that are needed, such as batch generators,
  5 | theano functions compilation etc. A base class, TrainingTesting provides some
  6 | general tools that the other two classes are using.
  7 | """
  8 | import time
  9 | import os
 10 | import cPickle as pickle
 11 | import h5py
 12 | import numpy as np
 13 | import theano.tensor as T
 14 | import theano
 15 | import lasagne
 16 | from networks import ConvNet
 17 | from saveloadweights import SaveWeights, LoadWeights
 18 | from earlystopping import EarlyStopping
 19 | from batchgenerators import BatchGenerator
 20 | from splitdatasets import load_dsets_trainval
 21 | from randomsearch import sample_hyperparams, save_hyperparams
 22 | 
 23 | 
 24 | class TrainingTesting(object):
 25 |     SIMPLE = 'simple'
 26 |     AUTOENCODING = 'autoencoding'
 27 |     CONV_FUSING = 'conv_fusing'
 28 |     DENSE_FUSING = 'dense_fusing'
 29 |     SCORE_FUSING = 'score_fusing'
 30 |     INPUT_FUSING = 'input_fusing'
 31 |     MSRA = 'MSRA'
 32 |     NYU = 'NYU'
 33 |     ICVL = 'ICVL'
 34 | 
 35 |     def __init__(self, dataset_dir, net_specs_dict, model_hp_dict, num_joints, dataset,
 36 |                  group, network_type, input_channels=None, fusion_level=None,
 37 |                  fusion_type=None):
 38 |         self.convnet = ConvNet(net_specs_dict, model_hp_dict, num_joints)
 39 |         self._datasets_dir = dataset_dir
 40 |         if dataset not in [self.ICVL, self.MSRA, self.NYU]:
 41 |             raise ValueError("dataset can take one of the following values:"
 42 |                              + " 'MSRA', 'NYU', 'ICVL'")
 43 |         self._network_type = network_type
 44 |         self._dataset = dataset
 45 |         self._group = group
 46 |         self._input_channels = input_channels
 47 |         self._fusion_level = fusion_level
 48 |         self._fusion_type = fusion_type
 49 | 
 50 | 
 51 | class Training(TrainingTesting):
 52 |     """
 53 |     This class implements the training procedure of the convnet
 54 |     """
 55 | 
 56 |     def __init__(self, dataset_dir, num_joints, dataset, group, network_type, num_epochs,
 57 |                  patience, net_specs_dict, model_hp_dict=None,
 58 |                  opt_hp_dict=None, validate=True, input_channels=None,
 59 |                  fusion_level=None, fusion_type=None, weights_dir=None):
 60 |         if model_hp_dict is None and opt_hp_dict is None:
 61 |             opt_hp_dict, model_hp_dict = sample_hyperparams([0.001, 0.1], [
 62 |              0.5, 1], [
 63 |              0.0, 0.1])
 64 |             self._save_settings = True
 65 |         else:
 66 |             self._save_settings = False
 67 |         super(Training, self).__init__(
 68 |             dataset_dir, net_specs_dict, model_hp_dict, num_joints, dataset, group,
 69 |             network_type, input_channels=input_channels,
 70 |             fusion_level=fusion_level, fusion_type=fusion_type)
 71 |         if network_type not in [self.SIMPLE, self.AUTOENCODING, self.CONV_FUSING,
 72 |                                 self.DENSE_FUSING, self.SCORE_FUSING,
 73 |                                 self.INPUT_FUSING]:
 74 |             raise ValueError("Network types can take one of the following"
 75 |                              + " values: 'simple', 'autoencoding', 'conv_fusing',"
 76 |                              + " 'dense_fusing', 'score_fusing',"
 77 |                              + " input_fusing")
 78 |         self._model_hp_dict = model_hp_dict
 79 |         self._opt_hp_dict = opt_hp_dict
 80 |         self._num_epochs = num_epochs
 81 |         self._patience = patience
 82 |         if not isinstance(validate, bool):
 83 |             raise TypeError('validate should be boolean')
 84 |         self._validate = validate
 85 |         self._weights_dir = weights_dir
 86 |         return
 87 | 
 88 |     def _compile_functions(self):
 89 |         if self._network_type == self.SIMPLE:
 90 |             input_var = T.tensor4('inputs')
 91 |         else:
 92 |             input_var1 = T.tensor4('inputs_rgb')
 93 |             input_var2 = T.tensor4('inputs_depth')
 94 |         target_var = T.matrix('targets')
 95 |         # bottleneck_W = np.load('nyu_princ_comp_pose.npz')
 96 |         # bottleneck_W = bottleneck_W['arr_0']
 97 |         lr = theano.shared(np.array(self._opt_hp_dict['lr'],
 98 |                                     dtype=theano.config.floatX))
 99 |         lr_decay = np.array(0.1, dtype=theano.config.floatX)
100 |         mom = theano.shared(np.array(self._opt_hp_dict['mom'],
101 |                                      dtype=theano.config.floatX))
102 |         print 'Building the ConvNet...\n'
103 |         if self._network_type == self.SIMPLE:
104 |             net = self.convnet.simple_convnet(self._input_channels,
105 |                                               input_var=input_var)
106 |         elif self._network_type == self.CONV_FUSING:
107 |             net = self.convnet.fused_convnets(self._fusion_level,
108 |                                               self._fusion_type,
109 |                                               input_var1=input_var1,
110 |                                               input_var2=input_var2,
111 |                                               weights_dir=self._weights_dir)
112 |         elif self._network_type == self.INPUT_FUSING:
113 |             net = self.convnet.input_fused_convnets(self._fusion_type,
114 |                                                     input_var1=input_var1,
115 |                                                     input_var2=input_var2)
116 |         elif self._network_type == self.DENSE_FUSING:
117 |             net = self.convnet.dense_fused_convnets(
118 |                 self._fusion_level, self._fusion_type,
119 |                 input_var1=input_var1, input_var2=input_var2,
120 |                 weights_dir=self._weights_dir)
121 |         elif self._network_type == self.SCORE_FUSING:
122 |             net = self.convnet.score_fused_convnets(
123 |                 self._fusion_type, input_var1=input_var1,
124 |                 input_var2=input_var2,
125 |                 weights_dir=self._weights_dir)
126 |         print 'Compiling theano functions...\n'
127 |         train_pred = lasagne.layers.get_output(net['output'],
128 |                                                deterministic=False)
129 |         val_pred = lasagne.layers.get_output(net['output'], deterministic=True)
130 |         train_loss = lasagne.objectives.squared_error(train_pred, target_var)
131 |         train_loss = 1 / 2.0 * T.mean(T.sum(train_loss, axis=1))
132 |         val_loss = lasagne.objectives.squared_error(val_pred, target_var)
133 |         val_loss = 1 / 2.0 * T.mean(T.sum(val_loss, axis=1))
134 |         params = lasagne.layers.get_all_params(net['output'], trainable=True)
135 |         updates = lasagne.updates.nesterov_momentum(train_loss, params,
136 |                                                     learning_rate=lr,
137 |                                                     momentum=mom)
138 |         if self._network_type == self.SIMPLE:
139 |             fn_train = theano.function([input_var, target_var], [
140 |              train_loss], updates=updates)
141 |             fn_val = theano.function([input_var, target_var], [val_loss])
142 |         else:
143 |             fn_train = theano.function([input_var1, input_var2, target_var], [
144 |              train_loss], updates=updates)
145 |             fn_val = theano.function([input_var1, input_var2, target_var], [
146 |              val_loss])
147 |         return (fn_train, fn_val, net, lr, lr_decay)
148 | 
149 |     def _training_loop(self, bg_train, bg_val, fn_train, fn_val, lr,
150 |                               lr_decay, sw=None, es=None):
151 |         """
152 |         This function performs the training loop for the case of a simple
153 |         convnet, where the parameters are updated through backprop and the
154 |         training/validation losses are reported.
155 | 
156 |         Keyword arguments:
157 | 
158 |         minibatches_train -- batch generator for the training set
159 |         minibatches_val -- batch generator for the validation set
160 |         fn_train -- theano function that perform parameters updated and
161 |                     computes training loss
162 |         fn_val -- theano function that computes validation loss
163 |         lr -- learning rate(theano shared variable)
164 |         lr_decay -- learning rate decay constant(we use constant decay policy)
165 |         sw -- instance of SaveWeights class(default: None)
166 | 
167 |         """
168 |         training_information = {}
169 |         train_loss_d = []
170 |         val_loss_d = []
171 |         epoch = 0
172 |         if es is not None or sw is not None:
173 |             time_back = 0
174 |         while epoch < self._num_epochs:
175 |             train_loss = 0
176 |             train_batches = 0
177 |             start_time = time.time()
178 |             for batch in bg_train.generate_batches(self._input_channels):
179 |                 if self._network_type == self.SIMPLE:
180 |                     X_batch, y_batch = batch
181 |                     loss = fn_train(X_batch, y_batch)
182 |                 else:
183 |                     X_batch_rgb, X_batch_depth, y_batch = batch
184 |                     loss = fn_train(X_batch_rgb, X_batch_depth, y_batch)
185 |                 train_loss += loss[0]
186 |                 train_batches += 1
187 | 
188 |             train_loss /= train_batches
189 |             train_loss_d.append(train_loss)
190 |             val_loss = 0
191 |             val_batches = 0
192 |             for batch in bg_val.generate_batches(self._input_channels,
193 |                                                  batch_size=1):
194 |                 if self._network_type == self.SIMPLE:
195 |                     X_batch, y_batch = batch
196 |                     loss = fn_val(X_batch, y_batch)
197 |                 else:
198 |                     X_batch_rgb, X_batch_depth, y_batch = batch
199 |                     loss = fn_val(X_batch_rgb, X_batch_depth, y_batch)
200 |                 val_loss += loss[0]
201 |                 val_batches += 1
202 | 
203 |             val_loss /= val_batches
204 |             val_loss_d.append(val_loss)
205 |             print 'Epoch: {0:d}. Completion time:{1:.3f} '.format(
206 |                 epoch + 1, time.time() - start_time)
207 |             print 'Train loss: {0:.5f}\t\tValidation loss:{1:.5f}\t\t\
208 |                 Ratio(Val/Train): {2:.5f}'.format(train_loss, val_loss,
209 |                                                   val_loss / train_loss)
210 |             print '--------------------------------------------------------'\
211 |                   + '-----------------------------------'
212 |             if sw is not None:
213 |                 stop, go_back = sw.early_stopping_with_lr_decay(val_loss,
214 |                                                                 epoch, lr,
215 |                                                                 time_back)
216 |                 if stop and not go_back or epoch == self._num_epochs - 1:
217 |                     sw.save_weights_numpy()
218 |                     break
219 |                 if stop and go_back:
220 |                     time_back += 1
221 |                     epoch = sw.best_epoch - 1
222 |             elif es is not None:
223 |                 stop, go_back = es.early_stopping_with_lr_decay(val_loss,
224 |                                                                 epoch, lr,
225 |                                                                 time_back)
226 |                 if stop and not go_back or epoch == self._num_epochs - 1:
227 |                     break
228 |                 if stop and go_back:
229 |                     time_back += 1
230 |                     epoch = es.best_epoch - 1
231 |             epoch += 1
232 | 
233 |         training_information['train_loss'] = train_loss_d
234 |         training_information['val_loss'] = val_loss_d
235 |         return training_information
236 | 
237 |     def train(self, save_model=False, save_loss=False,
238 |                     early_stopping=True, shuffle=False):
239 |         """
240 |         This function performs the training of our ConvNets. It compiles the
241 |         theano functions and performs parameters updates
242 |         (by calling compile_functions), saves several useful
243 |         information during training and stops using early stopping where also
244 |         the model parameters are saved. All the basic components are described
245 |         below as well as their respective modules/functions:
246 |             1) functions compilation: Training.compile_functions(module:
247 |                 trainingtesting). Here you can also find optimization details
248 |                 such as regularization term in the loss for autoencoder
249 |             2) load/save weights, early stopping: SaveWeights,
250 |             LoadWeights(module: saveloadweights)
251 |             3) networks definitions: module: networks.py. Here you can find
252 |             details related with network design choices as well as
253 |             regularization layers(e.g. dropout) or other techniques such as
254 |             tied weights in the autoencoder.
255 |         """
256 |         dataset = os.path.join(self._datasets_dir, self._dataset)
257 |         dataset += '.hdf5'
258 |         dset = h5py.File(dataset, 'r')
259 | 
260 |         fn_train, fn_val, net, lr, lr_decay = self._compile_functions()
261 | 
262 |         if type(save_model) is not bool:
263 |             raise TypeError('save_model should be boolean')
264 |         if save_model:
265 |             models_dir = './models'
266 |             if not os.path.exists(models_dir):
267 |                 os.mkdir(models_dir)
268 | 
269 |             if self._network_type == self.SIMPLE:
270 |                 if self._input_channels == 1:
271 |                     input_type = 'depth'
272 |                 elif self._input_channels == 4:
273 |                     input_type = 'rgb'
274 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
275 |                     self._dataset, self._network_type, input_type,
276 |                     self.convnet._model_hp_dict['p'])
277 |                 sw = SaveWeights(os.path.join(models_dir, save_dir), net,
278 |                                  self._patience, 'loss')
279 |             elif self._network_type == self.CONV_FUSING:
280 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
281 |                     self._dataset, self._network_type, self._fusion_type,
282 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
283 |                 sw = SaveWeights(os.path.join(models_dir, save_dir), net,
284 |                                  self._patience, 'loss')
285 |             elif self._network_type == self.DENSE_FUSING:
286 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
287 |                     self._dataset, self._network_type, self._fusion_type,
288 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
289 |                 sw = SaveWeights(os.path.join(models_dir, save_dir), net,
290 |                                  self._patience, 'loss')
291 |             elif self._network_type == self.SCORE_FUSING:
292 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
293 |                     self._dataset, self._network_type, self._fusion_type,
294 |                     self.convnet._model_hp_dict['p'])
295 |                 sw = SaveWeights(os.path.join(models_dir, save_dir), net,
296 |                                  self._patience, 'loss')
297 |             elif self._network_type == self.INPUT_FUSING:
298 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
299 |                     self._dataset, self._network_type, self._fusion_type,
300 |                     self.convnet._model_hp_dict['p'])
301 |                 sw = SaveWeights(os.path.join(models_dir, save_dir), net,
302 |                                  self._patience, 'loss')
303 |         elif early_stopping:
304 |             es = EarlyStopping(net, self._patience, 'loss')
305 |         if self._validate:
306 |             idx_train, idx_val = load_dsets_trainval(
307 |                 './train_test_splits/nyu_split.npz')
308 |             bg_train = BatchGenerator(dset, self._dataset, self._group,
309 |                                       iterable=idx_train, shuffle=shuffle)
310 |             bg_val = BatchGenerator(dset, self._dataset, self._group,
311 |                                     iterable=idx_val, shuffle=shuffle)
312 |         else:
313 |             bg_train = BatchGenerator(dset, self._dataset, self._group,
314 |                                       shuffle=shuffle)
315 |         print 'Training started...\n'
316 |         if save_model:
317 |             training_information = self._training_loop(
318 |                 bg_train, bg_val, fn_train, fn_val, lr, lr_decay, sw=sw)
319 |         elif early_stopping:
320 |             training_information = self._training_loop(
321 |                 bg_train, bg_val, fn_train, fn_val, lr, lr_decay, es=es)
322 |         else:
323 |             training_information = self._training_loop(
324 |                 bg_train, bg_val, fn_train, fn_val, lr, lr_decay)
325 |         if self._save_settings:
326 |             settings_dir = './settings'
327 |             if not os.path.exists(settings_dir):
328 |                 os.mkdir(settings_dir)
329 |             val_loss_array = np.array(training_information['val_loss'])
330 |             best_loss = np.amin(val_loss_array)
331 |             if self._network_type == self.SIMPLE:
332 |                 if self._input_channels == 1:
333 |                     input_type = 'depth'
334 |                 elif self._input_channels == 4:
335 |                     input_type = 'rgb'
336 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
337 |                     self._dataset, self._network_type, input_type,
338 |                     self.convnet._model_hp_dict['p'])
339 |                 save_hyperparams(os.path.join(settings_dir, save_dir),
340 |                                  self._opt_hp_dict, self._model_hp_dict,
341 |                                  best_loss)
342 |             elif self._network_type == self.CONV_FUSING:
343 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
344 |                     self._dataset, self._network_type, self._fusion_type,
345 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
346 |                 save_hyperparams(os.path.join(settings_dir, save_dir),
347 |                                  self._opt_hp_dict, self._model_hp_dict,
348 |                                  best_loss)
349 |             elif self._network_type == self.DENSE_FUSING:
350 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
351 |                     self._dataset, self._network_type, self._fusion_type,
352 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
353 |                 save_hyperparams(os.path.join(settings_dir, save_dir),
354 |                                  self._opt_hp_dict, self._model_hp_dict,
355 |                                  best_loss)
356 |             elif self._network_type == self.SCORE_FUSING:
357 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
358 |                     self._dataset, self._network_type, self._fusion_type,
359 |                     self.convnet._model_hp_dict['p'])
360 |                 save_hyperparams(os.path.join(settings_dir, save_dir),
361 |                                  self._opt_hp_dict, self._model_hp_dict,
362 |                                  best_loss)
363 |             elif self._network_type == self.INPUT_FUSING:
364 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
365 |                     self._dataset, self._network_type, self._fusion_type,
366 |                     self.convnet._model_hp_dict['p'])
367 |                 save_hyperparams(os.path.join(settings_dir, save_dir),
368 |                                  self._opt_hp_dict, self._model_hp_dict,
369 |                                  best_loss)
370 |         if save_loss:
371 |             train_val_loss_dir = './train_val_loss'
372 |             if not os.path.exists(train_val_loss_dir):
373 |                 os.mkdir(train_val_loss_dir)
374 |             if self._network_type == self.SIMPLE:
375 |                 if self._input_channels == 1:
376 |                     input_type = 'depth'
377 |                 elif self._input_channels == 4:
378 |                     input_type = 'rgb'
379 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
380 |                     self._dataset, self._network_type, input_type,
381 |                     self.convnet._model_hp_dict['p'])
382 |                 save_dir = os.path.join(train_val_loss_dir, save_dir)
383 |                 if not os.path.exists(save_dir):
384 |                     os.makedirs(save_dir)
385 |                 with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\
386 |                         as f:
387 |                     pickle.dump(training_information, f,
388 |                                 protocol=pickle.HIGHEST_PROTOCOL)
389 |             elif self._network_type == self.CONV_FUSING:
390 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
391 |                     self._dataset, self._network_type, self._fusion_type,
392 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
393 |                 save_dir = os.path.join(train_val_loss_dir, save_dir)
394 |                 if not os.path.exists(save_dir):
395 |                     os.makedirs(save_dir)
396 |                 with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\
397 |                         as f:
398 |                     pickle.dump(training_information, f,
399 |                                 protocol=pickle.HIGHEST_PROTOCOL)
400 |             elif self._network_type == self.DENSE_FUSING:
401 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
402 |                     self._dataset, self._network_type, self._fusion_type,
403 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
404 |                 save_dir = os.path.join(train_val_loss_dir, save_dir)
405 |                 if not os.path.exists(save_dir):
406 |                     os.makedirs(save_dir)
407 |                 with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\
408 |                         as f:
409 |                     pickle.dump(training_information, f,
410 |                                 protocol=pickle.HIGHEST_PROTOCOL)
411 |             elif self._network_type == self.SCORE_FUSING:
412 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
413 |                     self._dataset, self._network_type, self._fusion_type,
414 |                     self.convnet._model_hp_dict['p'])
415 |                 save_dir = os.path.join(train_val_loss_dir, save_dir)
416 |                 if not os.path.exists(save_dir):
417 |                     os.makedirs(save_dir)
418 |                 with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\
419 |                         as f:
420 |                     pickle.dump(training_information, f,
421 |                                 protocol=pickle.HIGHEST_PROTOCOL)
422 |             elif self._network_type == self.INPUT_FUSING:
423 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
424 |                     self._dataset, self._network_type, self._fusion_type,
425 |                     self.convnet._model_hp_dict['p'])
426 |                 save_dir = os.path.join(train_val_loss_dir, save_dir)
427 |                 if not os.path.exists(save_dir):
428 |                     os.makedirs(save_dir)
429 |                 with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\
430 |                         as f:
431 |                     pickle.dump(training_information, f,
432 |                                 protocol=pickle.HIGHEST_PROTOCOL)
433 |         return training_information
434 | 
435 | 
436 | class Testing(TrainingTesting):
437 | 
438 |     def __init__(self, net_specs_dict, model_hp_dict, num_joints, dataset,
439 |                  group, network_type, input_channels=None, fusion_level=None,
440 |                  fusion_type=None, score_fusion=None):
441 |         super(Testing, self).__init__(net_specs_dict, model_hp_dict,
442 |                                       num_joints, dataset, group, network_type,
443 |                                       input_channels=input_channels,
444 |                                       fusion_level=fusion_level,
445 |                                       fusion_type=fusion_type)
446 |         self._score_fusion = score_fusion
447 | 
448 |     def _compile_functions(self, weights_dir):
449 |         if self._network_type == self.SIMPLE:
450 |             input_var = T.tensor4('inputs')
451 |         else:
452 |             input_var1 = T.tensor4('inputs_rgb')
453 |             input_var2 = T.tensor4('inputs_depth')
454 |         # bottleneck_W = np.load('nyu_princ_comp_pose.npz')
455 |         # bottleneck_W = bottleneck_W['arr_0']
456 |         print 'Building the ConvNet...\n'
457 |         if self._network_type == self.SIMPLE:
458 |             net = self.convnet.simple_convnet(self._input_channels,
459 |                                               input_var=input_var)
460 |         elif self._network_type == self.CONV_FUSING:
461 |             net = self.convnet.fused_convnets(self._fusion_level,
462 |                                               self._fusion_type,
463 |                                               input_var1=input_var1,
464 |                                               input_var2=input_var2)
465 |         elif self._network_type == self.INPUT_FUSING:
466 |             net = self.convnet.input_fused_convnets(self._fusion_type,
467 |                                                     input_var1=input_var1,
468 |                                                     input_var2=input_var2)
469 |         elif self._network_type == self.DENSE_FUSING:
470 |             net = self.convnet.dense_fused_convnets(
471 |                 self._fusion_level, self._fusion_type,
472 |                 input_var1=input_var1, input_var2=input_var2)
473 |         elif self._network_type == self.SCORE_FUSING:
474 |             net = self.convnet.score_fused_convnets(
475 |                 self._fusion_type, input_var1=input_var1,
476 |                 input_var2=input_var2)
477 |         lw = LoadWeights(weights_dir, net)
478 |         lw.load_weights_numpy()
479 |         pred = lasagne.layers.get_output(net['output'], deterministic=True)
480 |         if self._network_type == self.SIMPLE:
481 |             fn_pred = theano.function([input_var], pred)
482 |         else:
483 |             fn_pred = theano.function([input_var1, input_var2], pred)
484 |         return fn_pred
485 | 
486 |     def predict(self, weights_dir, save_preds=True):
487 |         dataset = os.path.join(self._datasets_dir, self._dataset)
488 |         dataset += '.hdf5'
489 |         dset = h5py.File(dataset, 'r')
490 |         fn_pred = self._compile_functions(weights_dir)
491 |         bg_test = BatchGenerator(dset, self._dataset, self._group)
492 |         predictions = []
493 |         for batch in bg_test.generate_batches(self._input_channels,
494 |                                               batch_size=1):
495 |             if self._network_type == self.SIMPLE:
496 |                 X_batch, y_batch = batch
497 |                 pred = fn_pred(X_batch)
498 |                 pred = np.squeeze(pred)
499 |             else:
500 |                 X_batch_rgb, X_batch_depth, y_batch = batch
501 |                 pred = fn_pred(X_batch_rgb, X_batch_depth)
502 |                 pred = np.squeeze(pred)
503 | 
504 |             predictions.append(pred)
505 | 
506 |         predictions = np.array(predictions)
507 |         if save_preds:
508 |             predictions_dir = './predictions'
509 |             if not os.path.exists(predictions_dir):
510 |                 os.mkdir(predictions_dir)
511 |             if self._network_type == self.SIMPLE:
512 |                 if self._input_channels == 1:
513 |                     input_type = 'depth'
514 |                 elif self._input_channels == 4:
515 |                     input_type = 'rgb'
516 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
517 |                     self._dataset, self._network_type, input_type,
518 |                     self.convnet._model_hp_dict['p'])
519 |                 if not os.path.exists(os.path.join(predictions_dir, save_dir)):
520 |                     os.makedirs(os.path.join(predictions_dir, save_dir))
521 |                 np.savez(os.path.join(predictions_dir, save_dir,
522 |                                       'predictions.npz'), predictions)
523 |             elif self._network_type == self.CONV_FUSING:
524 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
525 |                     self._dataset, self._network_type, self._fusion_type,
526 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
527 |                 if not os.path.exists(os.path.join(predictions_dir, save_dir)):
528 |                     os.makedirs(os.path.join(predictions_dir, save_dir))
529 |                 np.savez(os.path.join(predictions_dir, save_dir,
530 |                                       'predictions.npz'), predictions)
531 |             elif self._network_type == self.DENSE_FUSING:
532 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format(
533 |                     self._dataset, self._network_type, self._fusion_type,
534 |                     self._fusion_level, self.convnet._model_hp_dict['p'])
535 |                 if not os.path.exists(os.path.join(predictions_dir, save_dir)):
536 |                     os.makedirs(os.path.join(predictions_dir, save_dir))
537 |                 np.savez(os.path.join(predictions_dir, save_dir,
538 |                                       'predictions.npz'), predictions)
539 |             elif self._network_type == self.SCORE_FUSING:
540 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
541 |                     self._dataset, self._network_type, self._fusion_type,
542 |                     self.convnet._model_hp_dict['p'])
543 |                 if not os.path.exists(os.path.join(predictions_dir, save_dir)):
544 |                     os.makedirs(os.path.join(predictions_dir, save_dir))
545 |                 np.savez(os.path.join(predictions_dir, save_dir,
546 |                                       'predictions.npz'), predictions)
547 |             elif self._network_type == self.INPUT_FUSING:
548 |                 save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format(
549 |                     self._dataset, self._network_type, self._fusion_type,
550 |                     self.convnet._model_hp_dict['p'])
551 |                 if not os.path.exists(os.path.join(predictions_dir, save_dir)):
552 |                     os.makedirs(os.path.join(predictions_dir, save_dir))
553 |                 np.savez(os.path.join(predictions_dir, save_dir,
554 |                                       'predictions.npz'), predictions)
555 |         else:
556 |             return predictions
557 | 
558 |     def extract_kernels(self, layer, weights_dir):
559 | 
560 |         # bottleneck_W = np.load('nyu_princ_comp_pose.npz')
561 |         # bottleneck_W = bottleneck_W['arr_0']
562 |         print 'Building the ConvNet...\n'
563 |         if self._network_type == self.SIMPLE:
564 |             net = self.convnet.simple_convnet(self._input_channels)
565 |         elif self._network_type == self.CONV_FUSING:
566 |             net = self.convnet.fused_convnets(self._fusion_level,
567 |                                               self._fusion_type)
568 |         lw = LoadWeights(weights_dir, net)
569 |         lw.load_weights_numpy()
570 |         return net[layer].W
571 | 


--------------------------------------------------------------------------------