├── datasets_preprocessing ├── __init__.py ├── imgnormalization.py └── datasets.py ├── .gitignore ├── predict_script.py ├── environment.yml ├── evaluation.py ├── splitdatasets.py ├── saveloadweights.py ├── README.md ├── trials.py ├── randomsearch.py ├── earlystopping.py ├── training_script.py ├── batchgenerators.py ├── networks.py └── trainingtesting.py /datasets_preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.err* 3 | *.o* 4 | .DS_Store 5 | /train_test_splits/ 6 | -------------------------------------------------------------------------------- /predict_script.py: -------------------------------------------------------------------------------- 1 | from trainingtesting import Testing 2 | 3 | net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters': 4 | (32, 32, 64, 64, 128, 128, 128, 128, 128), 5 | 'conv_filter_size': (3,)*9, 6 | 'conv_pad': (1,)*9, 7 | 'num_fc_units': (4096, 4096)} 8 | 9 | model_hp_dict = {'p': 0.05} 10 | 11 | test = Testing(net_specs_dict, model_hp_dict, 14, 'NYU', 'test', 12 | 'score_fusing', input_channels=5, fusion_level=7, 13 | fusion_type='local') 14 | 15 | predictions =\ 16 | test.predict('/home/mvrigkas/hand_pose_estimation/' 17 | + 'models/NYU/score_fusing/local/0.050000/weights.npz') 18 | 19 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: fusenet 2 | channels: 3 | - defaults 4 | dependencies: 5 | - binutils_impl_linux-64=2.31.1=h6176602_1 6 | - binutils_linux-64=2.31.1=h6176602_6 7 | - blas=1.0=mkl 8 | - ca-certificates=2019.1.23=0 9 | - certifi=2019.3.9=py27_0 10 | - gcc_impl_linux-64=7.3.0=habb00fd_1 11 | - gcc_linux-64=7.3.0=h553295d_6 12 | - gxx_impl_linux-64=7.3.0=hdf63c60_1 13 | - gxx_linux-64=7.3.0=h553295d_6 14 | - h5py=2.9.0=py27h7918eee_0 15 | - hdf5=1.10.4=hb1b8bf9_0 16 | - intel-openmp=2019.3=199 17 | - libedit=3.1.20181209=hc058e9b_0 18 | - libffi=3.2.1=hd88cf55_4 19 | - libgcc-ng=8.2.0=hdf63c60_1 20 | - libgfortran-ng=7.3.0=hdf63c60_0 21 | - libgpuarray=0.7.6=h14c3975_0 22 | - libstdcxx-ng=8.2.0=hdf63c60_1 23 | - linecache2=1.0.0=py27_0 24 | - markupsafe=1.1.1=py27h7b6447c_0 25 | - mkl=2017.0.4=h4c4d0af_0 26 | - mkl-service=1.1.2=py27_3 27 | - mkl_fft=1.0.10=py27ha843d7b_0 28 | - ncurses=6.1=he6710b0_1 29 | - numpy=1.12.0=py27_0 30 | - openssl=1.1.1b=h7b6447c_1 31 | - pip=19.0.3=py27_0 32 | - pygpu=0.7.6=py27h3010b51_0 33 | - python=2.7.16=h9bab390_0 34 | - readline=7.0=h7b6447c_5 35 | - scikit-learn=0.20.3=py27hd81dba3_0 36 | - scipy=0.14.0=np19py27_0 37 | - setuptools=40.8.0=py27_0 38 | - six=1.12.0=py27_0 39 | - sqlite=3.27.2=h7b6447c_0 40 | - theano=1.0.3=py27hfd86e86_0 41 | - tk=8.6.8=hbc83047_0 42 | - traceback2=1.4.0=py27_0 43 | - unittest2=1.1.0=py27_0 44 | - wheel=0.33.1=py27_0 45 | - zlib=1.2.11=h7b6447c_3 46 | - pip: 47 | - lasagne==0.1 48 | - mako==1.0.7 49 | prefix: /jmain01/home/JAD026/dxd01/wwp62-dxd01/home-shared/miniconda3/envs/fusenet 50 | 51 | -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements different evaluation metrics of our hand pose estimation 3 | algorithm given the predictions in the test set. Two famous metrics that are 4 | implemented here are: 5 | 1) Accuracy, that is defined by the fraction of test images that the 6 | maximum joint error is below a threshold. 7 | 8 | 2) Mean error joint, i.e. mean error over the whole test sequence computed 9 | for each joint separately. 10 | """ 11 | 12 | import numpy as np 13 | 14 | 15 | def accuracy(test_predictions, gt, threshold): 16 | """ 17 | Computes accuracy of test predictions. 18 | 19 | Keyword arguments: 20 | 21 | test_predictions -- numpy array with predictions of joint positions in the 22 | test set 23 | gt -- ground truth joint positions in the test set 24 | threshold -- threshold of maximum joint error 25 | 26 | Return: 27 | 28 | acc -- accuracy 29 | """ 30 | max_error = np.asarray([np.amax(np.linalg.norm( 31 | gt[i]-test_predictions[i], axis=0))for i in range(gt.shape[0])]) 32 | acc = np.sum((max_error > threshold).astype(dtype=np.int))/gt.shape[0] 33 | return acc 34 | 35 | 36 | def mean_joint_error(test_predictions, gt): 37 | """ 38 | Computes mean joint error of test predictions. 39 | 40 | Keyword arguments: 41 | 42 | test_predictions -- numpy array with predictions of joint positions in the 43 | test set 44 | gt -- ground truth joint positions in the test set 45 | 46 | Return: 47 | 48 | mean_error -- mean error per joint (a numpy array with size same as the 49 | number of joints) 50 | """ 51 | mean_error = np.mean(np.asarray( 52 | [np.linalg.norm(gt[i]-test_predictions[i], axis=0) 53 | for i in range(gt.shape[0])]), axis=0) 54 | 55 | return mean_error 56 | -------------------------------------------------------------------------------- /splitdatasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains 2 function for splitting a dataset into train/validation 3 | sets for later hyper-parameter selection. The 2 functions are the following: 4 | 1) split_dsets_trainval: Splits the dataset in training/validation sets and 5 | saves the indexes in selected directory 6 | 2) load_dsets_trainval: Loads the train/validation indexes of the dataset 7 | """ 8 | 9 | import h5py 10 | import numpy as np 11 | from sklearn.model_selection import train_test_split 12 | import argparse 13 | import os 14 | 15 | 16 | def split_dsets_trainval(hdf5_file, save_dir): 17 | """ 18 | This function saves the train and validation indexes of the hdf5_file in 19 | numpy arrays(binary .npz format). 20 | 21 | Keyword arguments: 22 | 23 | hdf5_file -- hdf5 dataset file(already open) 24 | save_dir -- directory to save the splits 25 | 26 | Return: 27 | -- 28 | """ 29 | idx = range(hdf5_file['train']['depth_normalized'].shape[0]) 30 | idx_train, idx_test = train_test_split(idx, random_state=10, test_size=0.5) 31 | 32 | np.savez(save_dir, idx_train, idx_test) 33 | 34 | 35 | def load_dsets_trainval(train_val_dir): 36 | """ 37 | This function loads the train/validation indexes of a dataset. 38 | 39 | Keyword arguments: 40 | 41 | train_val_dir -- directory of saved train/validation indexes(.npz format) 42 | 43 | Return: 44 | 45 | idx_train -- indexes of training set 46 | idx_val -- indexes of validation set 47 | """ 48 | npzfile = np.load(train_val_dir) 49 | idx_train = npzfile['arr_0'] 50 | idx_val = npzfile['arr_1'] 51 | 52 | return idx_train, idx_val 53 | 54 | 55 | if __name__ == '__main__': 56 | 57 | parser = argparse.ArgumentParser( 58 | description='''Provides a split of dataset's indices into training/validation, 59 | to be used by the batch generator''') 60 | parser.add_argument('dataset', choices=['nyu', 'msra', 'icvl']) 61 | parser.add_argument('dataset_dir', help='Dataset\'s (in HDF5 format) directory') 62 | 63 | args = parser.parse_args() 64 | 65 | if not os.path.exists('./train_test_splits/'): 66 | os.mkdir('./train_test_splits/') 67 | dataset_hdf5 = h5py.File(args.dataset_dir, 'r') 68 | split_dsets_trainval(dataset_hdf5, './train_test_splits/'+args.dataset+'_split.npz') 69 | dataset_hdf5.close() 70 | 71 | -------------------------------------------------------------------------------- /saveloadweights.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lasagne.layers import set_all_param_values 3 | import os 4 | import cPickle as pickle 5 | from earlystopping import EarlyStopping 6 | 7 | 8 | class SaveWeights(EarlyStopping): 9 | 10 | def __init__(self, weights_dir, net, patience, loss_or_acc, times=5): 11 | super(SaveWeights, self).__init__(net, patience, 12 | loss_or_acc, times) 13 | self.weights_dir = weights_dir 14 | 15 | def save_weights_numpy(self): 16 | if not os.path.exists(self.weights_dir): 17 | os.makedirs(self.weights_dir) 18 | np.savez(os.path.join(self.weights_dir, 'weights.npz'), 19 | *self.best_weights) 20 | print 'The best accuracy was {} at epoch {}'.format( 21 | self.best_loss, self.best_epoch) 22 | print 'Model parameters were saved to '+self.weights_dir 23 | 24 | def save_weights_pickle(self): 25 | if not os.path.exists(self.weights_dir): 26 | os.makedirs(self.weights_dir) 27 | with open(os.path.join(self.weights_dir, 'weights.npz'), 'wb') as f: 28 | pickle.dump(self.best_weights, f, protocol=pickle.HIGHEST_PROTOCOL) 29 | print 'The best accuracy was {} at epoch {}'.format( 30 | self.best_loss, self.best_epoch) 31 | print 'Model parameters were saved to '+self.weights_dir 32 | 33 | 34 | class LoadWeights(object): 35 | 36 | def __init__(self, weights_dir, net): 37 | if not (os.path.exists(weights_dir)): 38 | raise OSError("Directory doesn't exist") 39 | self.weights_dir = weights_dir 40 | self.net = net 41 | 42 | def load_weights_numpy(self): 43 | print 'Loading weights from {0:s}...\n'.format(self.weights_dir) 44 | with np.load(self.weights_dir) as f: 45 | param_values = [f['arr_%d' % i] for i in range(len(f.files))] 46 | print 'Setting the weights to the model...\n' 47 | set_all_param_values(self.net['output'], param_values, trainable=True) 48 | 49 | def load_weights_pickle(self): 50 | 51 | with open(self.weights_dir, 'rb') as f: 52 | print 'Loading weights from {0:s}...\n'.format(self.weights_dir) 53 | param_values = pickle.load(f) 54 | print 'Setting the weights to the model...\n' 55 | set_all_param_values(self.net['output'], param_values, trainable=True) 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Two-stream convolutional networks for fusing RGB and depth images for hand pose estimation using Lasagne 2 | 3 | This is an implementation of the paper [On the Fusion of RGB and Depth Information for Hand Pose Estimation](https://ieeexplore.ieee.org/document/8451022). The code is written in python 4 | using the [Lasagne](https://lasagne.readthedocs.io/en/latest/) DL framework. 5 | 6 | ## Requirements 7 | 8 | * CUDA 9.0 9 | * Create a conda environment from the environment.yml file using the following command in terminal: 10 | `$ conda env create -f environment.yml` 11 | * In your home directory, create a `.theanorc` file containing: 12 | ``` 13 | [global] 14 | floatX = float32 15 | device = cuda0 16 | ``` 17 | ## Dataset 18 | 19 | Download the [NYU dataset](https://cims.nyu.edu/~tompson/NYU_Hand_Pose_Dataset.htm#download) and unzip it. 20 | The code is designed to process the data in HDF5 format using [h5py](https://www.h5py.org). To convert 21 | the dataset in HDF5 format run the following code in your terminal: 22 | 23 | ```python 24 | from datasets_preprocessing.datasets import NYU_Dataset 25 | nyu = NYU_Dataset('/path/NYU/dataset', '/path/NYU/hdf5') 26 | nyu.convert_to_hdf5() 27 | 28 | ``` 29 | where */path/NYU/* should be replaced with the location of the unziped file from above. In ```datasets_preprocessing.datasets```, there are also classes for converting to HDF5 the [ICVL]() and [MSRA]() datasets. Only NYU contains RGB-D images, while 30 | ICVL and MSRA contain only depth images, so experiments have been done only for NYU. Nevertheless, you may 31 | want to train just the depth stream for ICVL and MSRA. 32 | 33 | ## Training 34 | 35 | Example: 36 | 37 | ``` 38 | python training_script.py 5 conv_fusing 0.5 9 concat --dataset_dir ~/data-private/NYU/hdf5/ --predef_hp --shuffle --validate 39 | ``` 40 | 41 | For a full description of the input arguments do `python training_script.py -h`. 42 | 43 | 44 | ## Publication 45 | 46 | Please reference this publication if you find this code useful: 47 | 48 | ``` 49 | @inproceedings{kazakos_fusion_icip2018, 50 | author={E. Kazakos and C. Nikou and I. A. Kakadiaris}, 51 | booktitle={25th IEEE International Conference on Image Processing (ICIP)}, 52 | title={On the Fusion of RGB and Depth Information for Hand Pose Estimation}, 53 | year={2018}, 54 | pages={868-872}, 55 | month={Oct}, 56 | } 57 | ``` 58 | 59 | ## Citations 60 | 61 | * J. Tompson, M. Stein, Y. LeCun, and K. Perlin, “Real- Time Continuous Pose Recovery of Human Hands Using Convolutional Networks,” ACM Transactions on Graphics, vol. 33, pp. 169:1–169:10, 2014. 62 | 63 | -------------------------------------------------------------------------------- /trials.py: -------------------------------------------------------------------------------- 1 | import trainingtesting 2 | 3 | 4 | import sys 5 | 6 | if sys.argv[1] == 'conv': 7 | net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters': 8 | (32, 32, 64, 64, 128, 128, 128, 128, 128), 9 | 'conv_filter_size': (3,)*9, 10 | 'conv_pad': (1,)*9, 11 | 'num_fc_units': (4096, 4096)} 12 | opt_hp_dict = {'lr': 0.009, 'mom': 0.98} 13 | model_hp_dict = {'p': 0.03} 14 | tr = trainingtesting.Training(14, 'NYU', 'train', 'simple', 100, 3, 15 | net_specs_dict, model_hp_dict=model_hp_dict, 16 | opt_hp_dict=opt_hp_dict, input_channels=3) 17 | 18 | training_inf = tr.train_fused(early_stopping=True, shuffle=True) 19 | elif sys.argv[1] == 'rec': 20 | net_specs_dict = {'num_conv_layers': 3, 'num_conv_filters': 21 | (32, 64, 128), 'conv_filter_size': (3, 3, 3), 22 | 'conv_pad': (1, 1, 1), 'num_fc_units': (1024, 128)} 23 | hp_specs_dict = {'lr': 0.01, 'mom': 0.9, 'lambda_con': 0.001, 24 | 'lambda_rec': 0.01} 25 | tr = trainingtesting.Training(net_specs_dict, hp_specs_dict, 14, 'NYU', 26 | 'train', 'autoencoding', 100, 20) 27 | 28 | training_inf = tr.train(early_stopping=False, updates_mode='double') 29 | elif sys.argv[1] == 'fuse': 30 | net_specs_dict = {'num_conv_layers': 4, 'num_conv_filters': 31 | (32, 64, 128, 128), 32 | 'conv_filter_size': (3,)*4, 33 | 'conv_pad': (1,)*4, 34 | 'num_fc_units': (2048, 2048)} 35 | hp_specs_dict = {'lr': 0.01, 'mom': 0.9} 36 | tr = trainingtesting.Training(net_specs_dict, hp_specs_dict, 14, 'NYU', 37 | 'train', 'fusing', 100, 20, input_channels=4, 38 | fusion_level=4, fusion_type='concatconv') 39 | 40 | training_inf = tr.train_fused(early_stopping=False) 41 | elif sys.argv[1] == 'dense_fuse': 42 | net_specs_dict = {'num_conv_layers': 4, 'num_conv_filters': 43 | (32, 64, 128, 128), 44 | 'conv_filter_size': (3,)*4, 45 | 'conv_pad': (1,)*4, 46 | 'num_fc_units': (4096, 4096)} 47 | opt_hp_dict = {'lr': 0.01, 'mom': 0.9} 48 | model_hp_dict = {'p': 0.03} 49 | tr = trainingtesting.Training(14, 'NYU', 'train', 'dense_fusing', 100, 20, 50 | net_specs_dict, model_hp_dict=model_hp_dict, 51 | opt_hp_dict=opt_hp_dict, input_channels=4, 52 | fusion_type='concat') 53 | 54 | training_inf = tr.train_fused(early_stopping=False) 55 | -------------------------------------------------------------------------------- /randomsearch.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | 5 | 6 | def sample_hyperparams(lr_range, mom_range, p_range): 7 | opt_hp_dict = {} 8 | model_hp_dict = {} 9 | opt_hp_dict['lr'] = 10**(np.random.random() * 10 | (np.log10(lr_range[1])-np.log10(lr_range[0])) + 11 | np.log10(lr_range[0])) 12 | opt_hp_dict['mom'] = 10**(np.random.random() * 13 | (np.log10(mom_range[1])-np.log(mom_range[0])) + 14 | np.log10(mom_range[0])) 15 | model_hp_dict['p'] = np.random.random() *\ 16 | (p_range[1]-p_range[0]) + p_range[0] 17 | 18 | return opt_hp_dict, model_hp_dict 19 | 20 | 21 | def save_hyperparams(save_dir, opt_hp_dict, model_hp_dict, best_loss): 22 | 23 | if not os.path.exists(save_dir): 24 | os.makedirs(save_dir) 25 | # num_files = len([f for f in os.listdir(save_dir) if 26 | # os.path.isfile(os.path.join(save_dir, f))]) 27 | hyperparams_file = os.path.join(save_dir, 28 | 'setting_lr{0:f}_mom{1:f}_p{2:f}.pkl'. 29 | format(opt_hp_dict['lr'], 30 | opt_hp_dict['mom'], 31 | model_hp_dict['p'])) 32 | with open(hyperparams_file, 'wb') as f: 33 | pickle.dump(opt_hp_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 34 | pickle.dump(model_hp_dict, f, protocol=pickle.HIGHEST_PROTOCOL) 35 | pickle.dump(best_loss, f, protocol=pickle.HIGHEST_PROTOCOL) 36 | 37 | 38 | def find_best_hyperparams(hyperparams_dir): 39 | 40 | loss = [] 41 | hyperparams_file = [] 42 | 43 | for f in os.listdir(hyperparams_dir): 44 | if os.path.isfile(os.path.join(hyperparams_dir, f)): 45 | with open(os.path.join(hyperparams_dir, f), 'rb') as pf: 46 | opt_hp_dict = pickle.load(pf) 47 | model_hp_dict = pickle.load(pf) 48 | best_loss = pickle.load(pf) 49 | loss.append(best_loss) 50 | hyperparams_file.append({'file': f, 'opt_hp': opt_hp_dict, 51 | 'model_hp': model_hp_dict}) 52 | ind = np.argsort(np.array(loss)) 53 | for i in ind: 54 | print 'Loss: {0:f}\tLr: {1:f}\tMom: {2:f}\tP: {3:f}'.format(loss[i], 55 | hyperparams_file[i]['opt_hp']['lr'], 56 | hyperparams_file[i]['opt_hp']['mom'], 57 | hyperparams_file[i]['model_hp']['p']) 58 | ''' 59 | ind = np.argmin(np.array(loss)) 60 | with open(os.path.join(hyperparams_dir, 'best_setting.txt'), 'w') as f: 61 | f.write('File: {0:s}\n'.format(hyperparams_file[ind]['file'])) 62 | f.write('Loss: {0:f}\n'.format(loss[ind])) 63 | f.write('Learning rate: {0:f}\tMomentum: {1:f}\tDropout prob:\ 64 | {2:f}\n'.format(hyperparams_file[ind]['opt_hp']['lr'], 65 | hyperparams_file[ind]['opt_hp']['mom'], 66 | hyperparams_file[ind]['model_hp']['p'])) 67 | ''' 68 | -------------------------------------------------------------------------------- /earlystopping.py: -------------------------------------------------------------------------------- 1 | from lasagne.layers import get_all_param_values, set_all_param_values 2 | 3 | 4 | class EarlyStopping(object): 5 | 6 | ACCURACY = 'acc' 7 | LOSS = 'loss' 8 | 9 | def __init__(self, net, patience, loss_or_acc, times=5): 10 | self.patience = patience 11 | if loss_or_acc not in [self.ACCURACY, self.LOSS]: 12 | raise ValueError('loss_or_acc should take one of the following\ 13 | values: \'loss\', \'acc\'') 14 | self.loss_or_acc = loss_or_acc 15 | self.best_acc = 0 16 | self.best_loss = float('Inf') 17 | self.best_epoch = 0 18 | self.best_weights = None 19 | self.net = net 20 | self.times = times 21 | 22 | def early_stopping(self, current_val, current_epoch): 23 | if self.loss_or_acc == self.ACCURACY: 24 | if current_val > self.best_acc: 25 | self.best_acc = current_val 26 | self.best_epoch = current_epoch 27 | self.best_weights = get_all_param_values(self.net['output'], 28 | trainable=True) 29 | return False 30 | elif self.best_epoch + self.patience < current_epoch: 31 | print 'Early Stopping...' 32 | return True 33 | else: 34 | if current_val < self.best_loss: 35 | self.best_loss = current_val 36 | self.best_epoch = current_epoch 37 | self.best_weights = get_all_param_values(self.net['output'], 38 | trainable=True) 39 | return False 40 | elif self.best_epoch + self.patience < current_epoch: 41 | print 'Early Stopping...' 42 | return True 43 | 44 | def early_stopping_with_lr_decay(self, current_val, current_epoch, lr, 45 | time): 46 | if self.loss_or_acc == self.ACCURACY: 47 | if current_val > self.best_acc: 48 | self.best_acc = current_val 49 | self.best_epoch = current_epoch 50 | self.best_weights = get_all_param_values(self.net['output'], 51 | trainable=True) 52 | return False, False 53 | elif self.best_epoch + self.patience < current_epoch: 54 | if time < self.times: 55 | lr.set_value(lr.get_value()*0.5) 56 | set_all_param_values(self.net['output'], self.best_weights, 57 | trainable=True) 58 | return True, True 59 | else: 60 | print 'Early Stopping...' 61 | return True, False 62 | else: 63 | if current_val < self.best_loss: 64 | self.best_loss = current_val 65 | self.best_epoch = current_epoch 66 | self.best_weights = get_all_param_values(self.net['output'], 67 | trainable=True) 68 | return False, False 69 | elif self.best_epoch + self.patience < current_epoch: 70 | if time < self.times: 71 | lr.set_value(lr.get_value()*0.5) 72 | set_all_param_values(self.net['output'], self.best_weights, 73 | trainable=True) 74 | return True, True 75 | else: 76 | print 'Early Stopping...' 77 | return True, False 78 | else: 79 | return False, True 80 | -------------------------------------------------------------------------------- /training_script.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import trainingtesting 3 | 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser( 7 | description='''Runs training of the ConvNet of your choice. You can 8 | train a classical ConvNet on depth, RGB or RGB-D data or you 9 | can train an architecture that fuses ConvNet towers on different 10 | inputs (RGB and depth).''') 11 | parser.add_argument('input_channels', choices=[1, 4, 5], type=int, 12 | help='number of input' 13 | + 'channels. 1 for depth, 4 for rgb, 5 for rgbd or' 14 | + 'fusion') 15 | parser.add_argument('net_type', 16 | choices=['simple', 'conv_fusing', 'dense_fusing', 17 | 'score_fusing', 'input_fusing'], 18 | help='type of network') 19 | parser.add_argument('p', type=float, help='dropout probability') 20 | parser.add_argument('fusion_level', type=int, nargs='?', 21 | help='integer that specifies in which convolutional' 22 | + 'layer to fuse') 23 | parser.add_argument('fusion_type', nargs='?', 24 | choices=['sum', 'max', 'concat', 'concatconv', 25 | 'local'], 26 | help='Fusion functions. Use \'local\' only with score' 27 | + 'fusion.') 28 | parser.add_argument('--dataset_dir') 29 | parser.add_argument('--predef_hp', action='store_true', help='boolean that' 30 | + 'specifies whether or not to use predifined' 31 | + 'hyperparams') 32 | parser.add_argument('--validate', action='store_true', help='boolean that' 33 | + 'specifies validation mode or not') 34 | parser.add_argument('--save_model', action='store_true', 35 | help='boolean that specifies whether to save model' 36 | + 'params') 37 | parser.add_argument('--save_loss', action='store_true', help='boolean that' 38 | + 'specifies whether to save loss curves') 39 | parser.add_argument('--early_stopping', action='store_true', 40 | help='boolean that specifies whether to perform early' 41 | + 'stopping') 42 | parser.add_argument('--shuffle', action='store_true', 43 | help='boolean that specifies whether to shuffle' 44 | + 'training data at each epoch') 45 | parser.add_argument('--weights_dir', help='Directory of saved weights for' 46 | + 'resuming training') 47 | args = parser.parse_args() 48 | # Depth-Net 49 | net_specs_dict = {'num_conv_layers': 9, 'num_conv_filters': 50 | (32, 32, 64, 64, 128, 128, 128, 128, 128), 51 | 'conv_filter_size': (3,)*9, 52 | 'conv_pad': (1,)*9, 53 | 'num_fc_units': (4096, 4096)} 54 | 55 | if args.predef_hp: 56 | opt_hp_dict = {'lr': 0.009, 'mom': 0.98} 57 | model_hp_dict = {'p': args.p} 58 | else: 59 | opt_hp_dict = None 60 | model_hp_dict = None 61 | tr = trainingtesting.Training(args.dataset_dir, 14, 'NYU', 'train', args.net_type, 50, 5, 62 | net_specs_dict, model_hp_dict=model_hp_dict, 63 | opt_hp_dict=opt_hp_dict, 64 | validate=args.validate, 65 | input_channels=args.input_channels, 66 | fusion_level=args.fusion_level, 67 | fusion_type=args.fusion_type, 68 | weights_dir=args.weights_dir) 69 | training_inf = tr.train(save_model=args.save_model, 70 | save_loss=args.save_loss, 71 | early_stopping=args.early_stopping, 72 | shuffle=args.shuffle) 73 | -------------------------------------------------------------------------------- /batchgenerators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | """ 3 | This module implements different minibatch generators, depending on the 4 | dataset. It contains a class namely BatchGenerator which contains functions for 5 | generating minibatches for each dataset. 6 | """ 7 | 8 | 9 | class BatchGenerator(object): 10 | """ 11 | This class handles the minibatch generators for each dataset. It contains 12 | the following functions: 13 | 1) __init__: class constructor 14 | 2) generate_batches: batch generator for NYU and ICVL datasets 15 | 3) generate_batches_msra: batch generator for MSRA dataset 16 | 3) minibatches: returns the correct batch generator depending on the 17 | dataset 18 | """ 19 | # Dastasets 20 | MSRA = 'MSRA' 21 | NYU = 'NYU' 22 | ICVL = 'ICVL' 23 | 24 | def __init__(self, hdf5_file, dataset, group, iterable=None, 25 | shuffle=False): 26 | """ 27 | Class constructor. It contains the following fields: 28 | 1) _hdf5_file: hdf5 file of the dataset 29 | 2) _dataset: the name of the dataset(available: "MSRA", "ICVL", 30 | "NYU") 31 | 3) _group: which group of the _hdf5_file while be iterated. For 32 | ICVL and NYU if group='train' you have also to specify 33 | _iterable(see below).For MSRA _group defines the subject that will 34 | be kept as test set. 35 | 4) _dataset_size: the size of the dataset 36 | 5) _iterable: iterable with ids that specify part of the group to 37 | be iterated(if you splitted training set to train/validation sets 38 | provide one iterable with the ids of the training data and one with 39 | ids of validation data. When group='test' leave it None) 40 | """ 41 | self._hdf5_file = hdf5_file 42 | if dataset not in [self.MSRA, self.NYU, self.ICVL]: 43 | raise ValueError('dataset can take on of the following values:\ 44 | \'MSRA\', \'ICVL\', \'NYU\'') 45 | self._dataset = dataset 46 | self._iterable = iterable 47 | if group not in self._hdf5_file.keys(): 48 | raise ValueError('group should take one of the following values:\ 49 | {0:s}'.format(self._hdf5_file.keys())) 50 | self._group = group 51 | if self._iterable is not None: 52 | self._dataset_size = self._iterable.shape[0] 53 | else: 54 | self._dataset_size = self._hdf5_file[ 55 | self._group]["depth_normalized"].shape[0] 56 | self._shuffle = shuffle 57 | 58 | def generate_batches(self, input_channels, batch_size=64): 59 | start_id = 0 60 | if self._iterable is None: 61 | indices = range(self._dataset_size) 62 | if self._shuffle: 63 | if self._iterable is not None: 64 | np.random.shuffle(self._iterable) 65 | else: 66 | np.random.shuffle(indices) 67 | while(start_id < self._dataset_size): 68 | if self._iterable is not None: 69 | chunk = slice(start_id, start_id+batch_size) 70 | chunk = self._iterable[chunk].tolist() 71 | chunk.sort() 72 | else: 73 | chunk = slice(start_id, start_id+batch_size) 74 | chunk = indices[chunk] 75 | start_id += batch_size 76 | if input_channels == 1: 77 | yield self._hdf5_file[self._group]["depth_normalized"][chunk],\ 78 | self._hdf5_file[self._group]["joints3D_normalized"][chunk] 79 | elif input_channels == 4: 80 | yield self._hdf5_file[self._group]["rgb_normalized"][chunk],\ 81 | self._hdf5_file[self._group]["joints3D_normalized"][chunk] 82 | elif input_channels == 5: 83 | yield self._hdf5_file[self._group]["rgb_normalized"][chunk],\ 84 | self._hdf5_file[self._group]["depth_normalized"][chunk],\ 85 | self._hdf5_file[self._group]["joints3D_normalized"][chunk] 86 | 87 | def generate_batches_msra_train(self, batch_size=64): 88 | 89 | groups = ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 90 | 'P8'].remove(self._group) 91 | for grp in groups: 92 | dsize = self._hdf5_file[grp]["depth_normalized"].shape[0] 93 | start_id = 0 94 | while(start_id < dsize): 95 | chunk = range(start_id, start_id+batch_size) 96 | start_id += batch_size 97 | yield self._hdf5_file[grp]["depth_normalized"][chunk],\ 98 | self._hdf5_file[grp]["joints3D_normalized"][chunk] 99 | 100 | def generate_batches_msra_test(self, batch_size=1): 101 | start_id = 0 102 | while(start_id < batch_size): 103 | chunk = range(start_id, start_id+batch_size) 104 | start_id += batch_size 105 | yield self._hdf5_file[self._group]["depth_normalized"][chunk],\ 106 | self._hdf5_file[self._group]["joints3D_normalized"][chunk] 107 | -------------------------------------------------------------------------------- /datasets_preprocessing/imgnormalization.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from scipy.ndimage.measurements import center_of_mass 4 | import cv2 5 | 6 | 7 | class ImgNormalization(object): 8 | """Preprocess hand images to center hand and normalize""" 9 | def __init__(self, fx, fy, px, py, cube_size): 10 | 11 | self.fx = fx 12 | self.fy = fy 13 | self.px = px 14 | self.py = py 15 | self.cube_size = cube_size 16 | 17 | 18 | #TODO Move the following 3 functions to a different class if necessary 19 | def depth_to_uvd(self, depth): 20 | 21 | uvd = np.zeros((3, depth.shape[0], depth.shape[1])) 22 | uv = np.mgrid[0:depth.shape[0], 0:depth.shape[1]] 23 | uvd[0] = uv[1] 24 | uvd[1] = uv[0] 25 | uvd[2] = depth 26 | return uvd 27 | 28 | def uvd_to_xyz(self, uvd): 29 | 30 | xyz = np.zeros(uvd.shape) 31 | xyz[0] = (uvd[0] - self.px)*uvd[2]/self.fx 32 | xyz[1] = (self.py - uvd[1])*uvd[2]/self.fy 33 | xyz[2] = uvd[2] 34 | 35 | return xyz 36 | 37 | def xyz_to_uvd(self, xyz): 38 | 39 | uvd = np.zeros(xyz.shape) 40 | uvd[0] = xyz[0]*self.fx/xyz[2] + self.px 41 | uvd[1] = self.py - xyz[1]*self.fy/xyz[2] 42 | uvd[2] = xyz[2] 43 | 44 | return uvd 45 | 46 | @staticmethod 47 | def calculate_com(depth_hand): 48 | """ 49 | Calculate the center of mass 50 | :param dpt: depth image 51 | :return: (x,y,z) center of mass 52 | """ 53 | 54 | dc = depth_hand.copy() 55 | cc = center_of_mass(dc > 0) 56 | num = np.count_nonzero(dc) 57 | com = np.array((cc[1]*num, cc[0]*num, dc.sum()), np.float) 58 | 59 | if num == 0: 60 | return np.array((0, 0, 0), np.float) 61 | else: 62 | return com/num 63 | 64 | @staticmethod 65 | def transform_point_2D(pt, M): 66 | """ 67 | Transform point in 2D coordinates 68 | :param pt: point coordinates 69 | :param M: transformation matrix 70 | :return: transformed point 71 | """ 72 | pt2 = np.asmatrix(M.reshape((3, 3))) * np.matrix([pt[0], pt[1], 1]).T 73 | return np.array([pt2[0] / pt2[2], pt2[1] / pt2[2]]) 74 | 75 | def ptcl_normalization(self, depth, com3D): 76 | """ 77 | Center point cloud to 0 and normalize it to [-1, 1] 78 | 79 | Keyword arguments: 80 | depth -- depth image (the initial before cropping) 81 | com3D -- center of mass in 3D 82 | cube_size -- size of the cube that used to crop hand area (default 250) 83 | 84 | Return: 85 | ptcl_normalized -- point cloud centered to 0 and normalized to [-1, 1] 86 | """ 87 | 88 | pcl_uvd = self.depth_to_uvd(depth) 89 | pcl_xyz = self.uvd_to_xyz(pcl_uvd) 90 | indr,indc = np.nonzero(pcl_xyz[2]) 91 | 92 | ptcl_normalized = np.vstack((pcl_xyz[0,indr,indc],pcl_xyz[1,indr,indc],pcl_xyz[2,indr,indc])) 93 | 94 | ptcl_normalized[0]-=com3D[0] 95 | ptcl_normalized[1]-=com3D[1] 96 | ptcl_normalized[2]-=com3D[2] 97 | ptcl_normalized /= self.cube_size / 2 98 | 99 | return ptcl_normalized 100 | 101 | def joints3D_depth_normalization(self, joints3D, depth, com3D): 102 | """ 103 | Center depth and joints in 3D to 0 and normalize it to [-1, 1]. 104 | 105 | Keyword arguments: 106 | joints3D -- joints in 3D 107 | com3D -- center of mass in 3D 108 | depth -- depth image that has been croped and scaled 109 | 110 | Return: 111 | joints3D_normalized -- joints in 3D centered to 0 and normalized to [-1, 1] 112 | depth_normalized -- depth centered to 0 and normalized to [-1, 1] 113 | """ 114 | 115 | joints3D_normalized = np.clip((joints3D - com3D[:,None]) / (self.cube_size / 2), -1, 1) 116 | depth[depth == 0.] = com3D[2] + self.cube_size / 2. 117 | depth -= com3D[2] 118 | depth_normalized = depth / (self.cube_size / 2) 119 | 120 | return joints3D_normalized, depth_normalized 121 | 122 | # def getNDValue(self): 123 | # """ 124 | # Get value of not defined depth value distances 125 | # :return:value of not defined depth value 126 | # """ 127 | # if self.depth[self.depth < self.minDepth].shape[0] > self.depth[self.depth > self.maxDepth].shape[0]: 128 | # return stats.mode(self.depth[self.depth < self.minDepth])[0][0] 129 | # else: 130 | # return stats.mode(self.depth[self.depth > self.maxDepth])[0][0] 131 | 132 | def crop_scale_depth(self, depth, com, dsize=(128, 128)): 133 | """ 134 | Crops depth image using 3D bounding box centered at the CoM of hand 135 | and then resize it to a 128x128 image 136 | :param depth: depth image 137 | :param com: center of mass of hand 138 | :param size: size of 3D bounding box 139 | :param dsize: size of the scaled image 140 | :return: cropped and resized image and transformation matrix for joints 141 | 142 | """ 143 | maxDepth = min(1500, depth.max()) 144 | minDepth = max(10, depth.min()) 145 | # set values out of range to 0 146 | depth[depth > maxDepth] = 0. 147 | depth[depth < minDepth] = 0. 148 | 149 | # calculate boundaries 150 | zstart = com[2] - self.cube_size / 2. 151 | zend = com[2] + self.cube_size / 2. 152 | xstart = int(math.floor((com[0] * com[2] / self.fx - self.cube_size / 2.) / com[2]*self.fx)) 153 | xend = int(math.floor((com[0] * com[2] / self.fx + self.cube_size / 2.) / com[2]*self.fx)) 154 | ystart = int(math.floor((com[1] * com[2] / self.fy - self.cube_size / 2.) / com[2]*self.fy)) 155 | yend = int(math.floor((com[1] * com[2] / self.fy + self.cube_size / 2.) / com[2]*self.fy)) 156 | 157 | # crop patch from source 158 | cropped = depth[max(ystart, 0):min(yend, depth.shape[0]), max(xstart, 0):min(xend, depth.shape[1])].copy() 159 | # add pixels that are out of the image in order to keep aspect ratio 160 | cropped = np.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, depth.shape[0])), 161 | (abs(xstart)-max(xstart, 0),abs(xend)-min(xend, depth.shape[1]))), mode='constant', constant_values=0) 162 | msk1 = np.bitwise_and(cropped < zstart, cropped != 0) 163 | msk2 = np.bitwise_and(cropped > zend, cropped != 0) 164 | cropped[msk1] = zstart 165 | cropped[msk2] = 0. 166 | 167 | wb = (xend - xstart) 168 | hb = (yend - ystart) 169 | 170 | trans = np.asmatrix(np.eye(3, dtype=float)) 171 | trans[0, 2] = -xstart 172 | trans[1, 2] = -ystart 173 | 174 | if wb > hb: 175 | sz = (dsize[0], hb * dsize[0] / wb) 176 | else: 177 | sz = (wb * dsize[1] / hb, dsize[1]) 178 | 179 | roi = cropped 180 | 181 | if roi.shape[0] > roi.shape[1]: 182 | scale = np.asmatrix(np.eye(3, dtype=float) * sz[1] / float(roi.shape[0])) 183 | else: 184 | scale = np.asmatrix(np.eye(3, dtype=float) * sz[0] / float(roi.shape[1])) 185 | scale[2, 2] = 1 186 | 187 | rz = cv2.resize(roi, sz, interpolation=cv2.INTER_NEAREST) 188 | 189 | ret = np.ones(dsize, np.float) * zend # use background as filler 190 | xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2)) 191 | xend = int(xstart + rz.shape[1]) 192 | ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2)) 193 | yend = int(ystart + rz.shape[0]) 194 | ret[ystart:yend, xstart:xend] = rz 195 | 196 | off = np.asmatrix(np.eye(3, dtype=float)) 197 | off[0, 2] = xstart 198 | off[1, 2] = ystart 199 | 200 | return ret, off * scale * trans 201 | 202 | def crop_scale_rgb(self, rgb, depth, com, dsize=(128, 128, 3)): 203 | """ 204 | Crops depth image using 3D bounding box centered at the CoM of hand 205 | and then resize it to a 128x128 image 206 | :param depth: depth image 207 | :param com: center of mass of hand 208 | :param size: size of 3D bounding box 209 | :param dsize: size of the scaled image 210 | :return: cropped and resized image and transformation matrix for joints 211 | 212 | """ 213 | 214 | # calculate boundaries 215 | xstart = int(math.floor((com[0] * com[2] / self.fx - self.cube_size / 2.) / com[2]*self.fx)) 216 | xend = int(math.floor((com[0] * com[2] / self.fx + self.cube_size / 2.) / com[2]*self.fx)) 217 | ystart = int(math.floor((com[1] * com[2] / self.fy - self.cube_size / 2.) / com[2]*self.fy)) 218 | yend = int(math.floor((com[1] * com[2] / self.fy + self.cube_size / 2.) / com[2]*self.fy)) 219 | 220 | # crop patch from source 221 | cropped = rgb[max(ystart, 0):min(yend, rgb.shape[0]), max(xstart, 0):min(xend, rgb.shape[1])].copy() 222 | 223 | # add pixels that are out of the image in order to keep aspect ratio 224 | cropped = np.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, rgb.shape[0])), 225 | (abs(xstart)-max(xstart, 0),abs(xend)-min(xend, rgb.shape[1])), (0,0)), mode='constant', constant_values=0) 226 | 227 | 228 | wb = (xend - xstart) 229 | hb = (yend - ystart) 230 | 231 | if wb > hb: 232 | sz = (dsize[0], hb * dsize[0] / wb) 233 | else: 234 | sz = (wb * dsize[1] / hb, dsize[1]) 235 | 236 | roi = cropped 237 | rz = cv2.resize(roi, sz) 238 | 239 | ret = np.zeros(dsize, np.uint8) 240 | xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2)) 241 | xend = int(xstart + rz.shape[1]) 242 | ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2)) 243 | yend = int(ystart + rz.shape[0]) 244 | ret[ystart:yend, xstart:xend, :] = rz 245 | msk = np.bitwise_not(np.bitwise_or(depth==1., depth==-1.)) 246 | return ret, msk -------------------------------------------------------------------------------- /networks.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains definitions for different network architectures. 3 | """ 4 | from collections import OrderedDict 5 | from lasagne.layers import InputLayer, Conv2DLayer, MaxPool2DLayer, DenseLayer, dropout, ElemwiseMergeLayer, concat, reshape, Conv1DLayer, ElemwiseSumLayer 6 | # from lasagne.layers import LocallyConnected2DLayer 7 | import lasagne.nonlinearities 8 | import lasagne 9 | import theano.tensor as T 10 | from saveloadweights import LoadWeights 11 | 12 | 13 | class ConvNet(object): 14 | """ 15 | This class contains all the necessary information for creating a 16 | network(such as number of layers and number of filters per layer), as well 17 | as functions that define different networks. 18 | """ 19 | CONCAT = 'concat' 20 | CONCATCONV = 'concatconv' 21 | SUM = 'sum' 22 | MAX = 'max' 23 | LOCAL = 'local' 24 | 25 | def __init__(self, net_specs_dict, model_hp_dict, num_joints): 26 | 27 | self._net_specs_dict = net_specs_dict 28 | self._model_hp_dict = model_hp_dict 29 | self._num_joints = num_joints 30 | 31 | def simple_convnet(self, input_channels, input_var=None, 32 | bottleneck_W=None): 33 | """ 34 | This is a classical convnet. It contains convolution and 35 | fully-connected(fc) layers. 36 | 37 | Keyword arguments: 38 | input_var -- theano variable that specifies the type and dimension of 39 | the input(default None) 40 | 41 | Return: 42 | net -- dictionary that contains all the network layers 43 | """ 44 | net = OrderedDict() 45 | net['input'] = InputLayer((None, input_channels, 128, 128), 46 | input_var=input_var) 47 | layer = 0 48 | for i in range(self._net_specs_dict['num_conv_layers']): 49 | # Add convolution layers 50 | net['conv{0:d}'.format(i+1)] = Conv2DLayer( 51 | net.values()[layer], 52 | num_filters=self._net_specs_dict['num_conv_filters'][i], 53 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 54 | pad='same') 55 | layer += 1 56 | if self._net_specs_dict['num_conv_layers'] <= 2: 57 | # Add pooling layers 58 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 59 | net.values()[layer], pool_size=(3, 3)) 60 | layer += 1 61 | else: 62 | if i < 4: 63 | if (i+1) % 2 == 0: 64 | # Add pooling layers 65 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 66 | net.values()[layer], pool_size=(3, 3)) 67 | layer += 1 68 | else: 69 | if (i+1) == 7: 70 | # Add pooling layers 71 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 72 | net.values()[layer], pool_size=(3, 3)) 73 | layer += 1 74 | 75 | # Add fc-layers 76 | net['fc1'] = DenseLayer( 77 | net.values()[layer], 78 | self._net_specs_dict['num_fc_units'][0]) 79 | # Add dropout layer 80 | net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p']) 81 | net['fc2'] = DenseLayer( 82 | net['dropout1'], self._net_specs_dict['num_fc_units'][1]) 83 | # Add dropout layer 84 | net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) 85 | if bottleneck_W is not None: 86 | # Add bottleneck layer 87 | net['bottleneck'] = DenseLayer(net['dropout2'], 30) 88 | # Add output layer(linear activation because it's regression) 89 | net['output'] = DenseLayer( 90 | net['bottleneck'], 3*self._num_joints, 91 | W=bottleneck_W[0:30], 92 | nonlinearity=lasagne.nonlinearities.tanh) 93 | else: 94 | # Add output layer(linear activation because it's regression) 95 | net['output'] = DenseLayer( 96 | net['dropout2'], 3*self._num_joints, 97 | nonlinearity=lasagne.nonlinearities.tanh) 98 | return net 99 | 100 | def input_fused_convnets(self, fusion_type, input_var1=None, 101 | input_var2=None, bottleneck_W=None): 102 | net = OrderedDict() 103 | net['input_rgb'] = InputLayer((None, 4, 128, 128), 104 | input_var=input_var1) 105 | layer = 0 106 | net['input_depth'] = InputLayer((None, 1, 128, 128), 107 | input_var=input_var2) 108 | layer += 1 109 | 110 | if fusion_type == self.CONCAT: 111 | net['merge'] = concat([net['input_rgb'], 112 | net['input_depth']] 113 | ) 114 | layer += 1 115 | elif fusion_type == self.CONCATCONV: 116 | net['concat'] = concat( 117 | [net['input_rgb'], net['input_depth']]) 118 | layer += 1 119 | net['merge'] = Conv2DLayer(net['concat'], 120 | num_filters=1, 121 | filter_size=(1, 1), nonlinearity=None) 122 | layer += 1 123 | 124 | for i in range(self._net_specs_dict['num_conv_layers']): 125 | # Add convolution layers 126 | net['conv{0:d}'.format(i+1)] = Conv2DLayer( 127 | net.values()[layer], 128 | num_filters=self._net_specs_dict['num_conv_filters'][i], 129 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 130 | pad='same') 131 | layer += 1 132 | if self._net_specs_dict['num_conv_layers'] <= 2: 133 | # Add pooling layers 134 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 135 | net.values()[layer], pool_size=(3, 3)) 136 | layer += 1 137 | else: 138 | if i < 4: 139 | if (i+1) % 2 == 0: 140 | # Add pooling layers 141 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 142 | net.values()[layer], pool_size=(3, 3)) 143 | layer += 1 144 | else: 145 | if (i+1) == 7: 146 | # Add pooling layers 147 | net['pool{0:d}'.format(i+1)] = MaxPool2DLayer( 148 | net.values()[layer], pool_size=(3, 3)) 149 | layer += 1 150 | 151 | # Add fc-layers 152 | net['fc1'] = DenseLayer( 153 | net.values()[layer], 154 | self._net_specs_dict['num_fc_units'][0]) 155 | # Add dropout layer 156 | net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p']) 157 | net['fc2'] = DenseLayer( 158 | net['dropout1'], self._net_specs_dict['num_fc_units'][1]) 159 | # Add dropout layer 160 | net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) 161 | if bottleneck_W is not None: 162 | # Add bottleneck layer 163 | net['bottleneck'] = DenseLayer(net['dropout2'], 30) 164 | # Add output layer(linear activation because it's regression) 165 | net['output'] = DenseLayer( 166 | net['bottleneck'], 3*self._num_joints, 167 | W=bottleneck_W[0:30], 168 | nonlinearity=lasagne.nonlinearities.tanh) 169 | else: 170 | # Add output layer(linear activation because it's regression) 171 | net['output'] = DenseLayer( 172 | net['dropout2'], 3*self._num_joints, 173 | nonlinearity=lasagne.nonlinearities.tanh) 174 | return net 175 | 176 | def dense_fused_convnets(self, fusion_level, fusion_type, input_var1=None, 177 | input_var2=None, bottleneck_W=None, 178 | weights_dir=None): 179 | 180 | net = OrderedDict() 181 | net['input_rgb'] = InputLayer((None, 4, 128, 128), 182 | input_var=input_var1) 183 | layer = 0 184 | for i in range(self._net_specs_dict['num_conv_layers']): 185 | # Add convolution layers 186 | net['conv_rgb{0:d}'.format(i+1)] = Conv2DLayer( 187 | net.values()[layer], 188 | num_filters=self._net_specs_dict['num_conv_filters'][i], 189 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 190 | pad='same') 191 | layer += 1 192 | if self._net_specs_dict['num_conv_layers'] <= 2: 193 | # Add pooling layers 194 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 195 | net.values()[layer], pool_size=(3, 3)) 196 | layer += 1 197 | else: 198 | if i < 4: 199 | if (i+1) % 2 == 0: 200 | # Add pooling layers 201 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 202 | net.values()[layer], pool_size=(3, 3)) 203 | layer += 1 204 | else: 205 | if (i+1) == 7: 206 | # Add pooling layers 207 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 208 | net.values()[layer], pool_size=(3, 3)) 209 | layer += 1 210 | # Fc-layers 211 | net['fc1_rgb'] = DenseLayer( 212 | net.values()[layer], 213 | self._net_specs_dict['num_fc_units'][0]) 214 | layer += 1 215 | if fusion_level == 2: 216 | # Add dropout layer 217 | net['dropout1_rgb'] = dropout(net['fc1_rgb'], 218 | p=self._model_hp_dict['p']) 219 | layer += 1 220 | net['fc2_rgb'] = DenseLayer( 221 | net['dropout1_rgb'], self._net_specs_dict['num_fc_units'][1]) 222 | layer += 1 223 | 224 | net['input_depth'] = InputLayer((None, 1, 128, 128), 225 | input_var=input_var2) 226 | layer += 1 227 | for i in range(self._net_specs_dict['num_conv_layers']): 228 | # Add convolution layers 229 | net['conv_depth{0:d}'.format(i+1)] = Conv2DLayer( 230 | net.values()[layer], 231 | num_filters=self._net_specs_dict['num_conv_filters'][i], 232 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 233 | pad='same') 234 | layer += 1 235 | if self._net_specs_dict['num_conv_layers'] <= 2: 236 | # Add pooling layers 237 | net['pool_depth{0:d}'.format(i+1)] = MaxPool2DLayer( 238 | net.values()[layer], pool_size=(3, 3)) 239 | layer += 1 240 | else: 241 | if i < 4: 242 | if (i+1) % 2 == 0: 243 | # Add pooling layers 244 | net['pool_depth{0:d}'.format(i+1)] =\ 245 | MaxPool2DLayer(net.values()[layer], 246 | pool_size=(3, 3)) 247 | layer += 1 248 | else: 249 | if (i+1) == 7: 250 | # Add pooling layers 251 | net['pool_depth{0:d}'.format(i+1)] =\ 252 | MaxPool2DLayer(net.values()[layer], 253 | pool_size=(3, 3)) 254 | layer += 1 255 | # Fc-layers 256 | net['fc1_depth'] = DenseLayer( 257 | net.values()[layer], 258 | self._net_specs_dict['num_fc_units'][0]) 259 | layer += 1 260 | if fusion_level == 2: 261 | # Add dropout layer 262 | net['dropout1_depth'] = dropout(net['fc1_depth'], 263 | p=self._model_hp_dict['p']) 264 | layer += 1 265 | net['fc2_depth'] = DenseLayer( 266 | net['dropout1_depth'], self._net_specs_dict['num_fc_units'][1]) 267 | layer += 1 268 | 269 | # Fuse ConvNets by fusion_level and fusion_type 270 | if fusion_type == self.MAX: 271 | net['merge'] =\ 272 | ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level], 273 | net['fc%i_depth' % fusion_level]], 274 | T.maximum) 275 | layer += 1 276 | elif fusion_type == self.SUM: 277 | net['merge'] =\ 278 | ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level], 279 | net['fc%i_depth' % fusion_level]], 280 | T.add) 281 | layer += 1 282 | elif fusion_type == self.CONCAT: 283 | net['merge'] = concat([net['fc%i_rgb' % fusion_level], 284 | net['fc%i_depth' % fusion_level]]) 285 | layer += 1 286 | elif fusion_type == self.CONCATCONV: 287 | net['fc%i_rgb_res' % fusion_level] =\ 288 | reshape(net['fc%i_rgb' % fusion_level], ([0], 1, [1])) 289 | layer += 1 290 | net['fc%i_depth_res' % fusion_level] =\ 291 | reshape(net['fc%i_depth' % fusion_level], ([0], 1, [1])) 292 | layer += 1 293 | net['concat'] = concat([net['fc%i_rgb_res' % fusion_level], 294 | net['fc%i_depth_res' % fusion_level]]) 295 | layer += 1 296 | net['merge_con'] = Conv1DLayer(net['concat'], 297 | num_filters=1, 298 | filter_size=(1,), 299 | nonlinearity=None) 300 | layer += 1 301 | net['merge'] = reshape(net['merge_con'], ([0], [2])) 302 | layer += 1 303 | 304 | if fusion_level == 1: 305 | # Add dropout layer 306 | net['dropout1'] = dropout(net['merge'], 307 | p=self._model_hp_dict['p']) 308 | layer += 1 309 | net['fc2'] = DenseLayer( 310 | net['dropout1'], self._net_specs_dict['num_fc_units'][1]) 311 | layer += 1 312 | # Add dropout layer 313 | net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) 314 | layer += 1 315 | else: 316 | # Add dropout layer 317 | net['dropout2'] = dropout(net['merge'], p=self._model_hp_dict['p']) 318 | layer += 1 319 | # Add output layer(linear activation because it's regression) 320 | if bottleneck_W is not None: 321 | # Add bottleneck layer 322 | net['bottleneck'] = DenseLayer(net['dropout2'], 30) 323 | # Add output layer(linear activation because it's regression) 324 | net['output'] = DenseLayer( 325 | net['bottleneck'], 3*self._num_joints, 326 | W=bottleneck_W[0:30], 327 | nonlinearity=lasagne.nonlinearities.tanh) 328 | else: 329 | # Add output layer(linear activation because it's regression) 330 | net['output'] = DenseLayer( 331 | net['dropout2'], 3*self._num_joints, 332 | nonlinearity=lasagne.nonlinearities.tanh) 333 | if weights_dir is not None: 334 | lw = LoadWeights(weights_dir, net) 335 | lw.load_weights_numpy() 336 | return net 337 | 338 | def fused_convnets(self, fusion_level, fusion_type, input_var1=None, 339 | input_var2=None, bottleneck_W=None, weights_dir=None): 340 | 341 | net = OrderedDict() 342 | net['input_rgb'] = InputLayer((None, 4, 128, 128), 343 | input_var=input_var1) 344 | layer = 0 345 | for i in range(fusion_level): 346 | # Add convolution layers 347 | net['conv_rgb{0:d}'.format(i+1)] = Conv2DLayer( 348 | net.values()[layer], 349 | num_filters=self._net_specs_dict['num_conv_filters'][i], 350 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 351 | pad='same') 352 | layer += 1 353 | if self._net_specs_dict['num_conv_layers'] <= 2 and\ 354 | i != fusion_level - 1: 355 | # Add pooling layers 356 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 357 | net.values()[layer], pool_size=(3, 3)) 358 | layer += 1 359 | else: 360 | if i < 4: 361 | if (i+1) % 2 == 0 and i != fusion_level-1: 362 | # Add pooling layers 363 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 364 | net.values()[layer], pool_size=(3, 3)) 365 | layer += 1 366 | else: 367 | if (i+1) == 7 and i != fusion_level-1: 368 | # Add pooling layers 369 | net['pool_rgb{0:d}'.format(i+1)] = MaxPool2DLayer( 370 | net.values()[layer], pool_size=(3, 3)) 371 | layer += 1 372 | 373 | net['input_depth'] = InputLayer((None, 1, 128, 128), 374 | input_var=input_var2) 375 | layer += 1 376 | for i in range(fusion_level): 377 | # Add convolution layers 378 | net['conv_depth{0:d}'.format(i+1)] = Conv2DLayer( 379 | net.values()[layer], 380 | num_filters=self._net_specs_dict['num_conv_filters'][i], 381 | filter_size=(self._net_specs_dict['conv_filter_size'][i],)*2, 382 | pad='same') 383 | layer += 1 384 | if self._net_specs_dict['num_conv_layers'] <= 2 and\ 385 | i != fusion_level - 1: 386 | # Add pooling layers 387 | net['pool_depth{0:d}'.format(i+1)] = MaxPool2DLayer( 388 | net.values()[layer], pool_size=(3, 3)) 389 | layer += 1 390 | else: 391 | if i < 4: 392 | if (i+1) % 2 == 0 and i != fusion_level-1: 393 | # Add pooling layers 394 | net['pool_depth{0:d}'.format(i+1)] =\ 395 | MaxPool2DLayer(net.values()[layer], 396 | pool_size=(3, 3)) 397 | layer += 1 398 | else: 399 | if (i+1) == 7 and i != fusion_level-1: 400 | # Add pooling layers 401 | net['pool_depth{0:d}'.format(i+1)] =\ 402 | MaxPool2DLayer(net.values()[layer], 403 | pool_size=(3, 3)) 404 | layer += 1 405 | # Fuse ConvNets by fusion_level and fusion_type 406 | if fusion_type == self.MAX: 407 | net['merge'] =\ 408 | ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)], 409 | net['conv_depth{0:d}'.format(fusion_level)] 410 | ], T.maximum) 411 | layer += 1 412 | elif fusion_type == self.SUM: 413 | net['merge'] =\ 414 | ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)], 415 | net['conv_depth{0:d}'.format(fusion_level)] 416 | ], T.add) 417 | layer += 1 418 | elif fusion_type == self.CONCAT: 419 | net['merge'] = concat([net['conv_rgb{0:d}'.format(fusion_level)], 420 | net['conv_depth{0:d}'.format(fusion_level)]] 421 | ) 422 | layer += 1 423 | elif fusion_type == self.CONCATCONV: 424 | net['concat'] = concat( 425 | [net['conv_rgb{0:d}'.format(fusion_level)], 426 | net['conv_depth{0:d}'.format(fusion_level)]]) 427 | layer += 1 428 | net['merge'] = Conv2DLayer(net['concat'], 429 | num_filters=self._net_specs_dict[ 430 | 'num_conv_filters'][fusion_level-1], 431 | filter_size=(1, 1), nonlinearity=None) 432 | layer += 1 433 | # Max-pooling to the merged 434 | if fusion_level in [2, 4, 7]: 435 | net['pool_merged'] = MaxPool2DLayer(net['merge'], pool_size=(3, 3)) 436 | layer += 1 437 | # Continue the rest of the convolutional part of the network, 438 | # if the fusion took place before the last convolutional layer, 439 | # else just connect the convolutional part with the fully connected 440 | # part 441 | if self._net_specs_dict['num_conv_layers'] > fusion_level: 442 | for i in range(fusion_level, 443 | self._net_specs_dict['num_conv_layers']): 444 | # Add convolution layers 445 | net['conv_merged{0:d}'.format(i+1)] = Conv2DLayer( 446 | net.values()[layer], 447 | num_filters=self._net_specs_dict['num_conv_filters'][i], 448 | filter_size=(self._net_specs_dict['conv_filter_size'][i],) 449 | * 2, pad='same') 450 | layer += 1 451 | if self._net_specs_dict['num_conv_layers'] <= 2: 452 | # Add pooling layers 453 | net['pool_merged{0:d}'.format(i+1)] = MaxPool2DLayer( 454 | net.values()[layer], pool_size=(3, 3)) 455 | layer += 1 456 | else: 457 | if i < 4: 458 | if (i+1) % 2 == 0: 459 | # Add pooling layers 460 | net['pool_merged{0:d}'.format(i+1)] =\ 461 | MaxPool2DLayer(net.values()[layer], 462 | pool_size=(3, 3)) 463 | layer += 1 464 | else: 465 | if (i+1) == 7: 466 | # Add pooling layers 467 | net['pool_merged{0:d}'.format(i+1)] =\ 468 | MaxPool2DLayer(net.values()[layer], 469 | pool_size=(3, 3)) 470 | layer += 1 471 | # Fc-layers 472 | net['fc1'] = DenseLayer( 473 | net.values()[layer], 474 | self._net_specs_dict['num_fc_units'][0]) 475 | # Add dropout layer 476 | net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p']) 477 | net['fc2'] = DenseLayer( 478 | net['dropout1'], self._net_specs_dict['num_fc_units'][1]) 479 | # Add dropout layer 480 | net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) 481 | if bottleneck_W is not None: 482 | # Add bottleneck layer 483 | net['bottleneck'] = DenseLayer(net['dropout2'], 30) 484 | # Add output layer(linear activation because it's regression) 485 | net['output'] = DenseLayer( 486 | net['bottleneck'], 3*self._num_joints, 487 | W=bottleneck_W[0:30], 488 | nonlinearity=lasagne.nonlinearities.tanh) 489 | else: 490 | # Add output layer(linear activation because it's regression) 491 | net['output'] = DenseLayer( 492 | net['dropout2'], 3*self._num_joints, 493 | nonlinearity=lasagne.nonlinearities.tanh) 494 | if weights_dir is not None: 495 | lw = LoadWeights(weights_dir, net) 496 | lw.load_weights_numpy() 497 | return net 498 | 499 | def score_fused_convnets(self, fusion_type, input_var1=None, 500 | input_var2=None, weights_dir_depth=None, 501 | weights_dir_rgb=None, bottleneck_W=None, 502 | weights_dir=None): 503 | 504 | net = OrderedDict() 505 | rgb_net = self.simple_convnet(4, input_var=input_var1, 506 | bottleneck_W=bottleneck_W) 507 | depth_net = self.simple_convnet(1, input_var=input_var2, 508 | bottleneck_W=bottleneck_W) 509 | if weights_dir_depth is not None and weights_dir_rgb is not None: 510 | lw_depth = LoadWeights(weights_dir_depth, depth_net) 511 | lw_depth.load_weights_numpy() 512 | lw_rgb = LoadWeights(weights_dir_rgb, rgb_net) 513 | lw_rgb.load_weights_numpy() 514 | if fusion_type == self.LOCAL: 515 | net['reshape_depth'] = reshape(depth_net['output'], 516 | ([0], 1, 1, [1])) 517 | net['reshape_rgb'] = reshape(rgb_net['output'], 518 | ([0], 1, 1, [1])) 519 | net['concat'] = concat([net['reshape_depth'], net['reshape_rgb']]) 520 | net['lcl'] = LocallyConnected2DLayer(net['concat'], 1, (1, 1), 521 | untie_biases=True, 522 | nonlinearity=None) 523 | net['output'] = reshape(net['lcl'], ([0], [3])) 524 | elif fusion_type == self.SUM: 525 | net['output'] = ElemwiseSumLayer([depth_net['output'], 526 | rgb_net['output']], coeffs=0.5) 527 | 528 | if weights_dir is not None: 529 | lw = LoadWeights(weights_dir, net) 530 | lw.load_weights_numpy() 531 | return net 532 | -------------------------------------------------------------------------------- /datasets_preprocessing/datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reads datasets and save normalized images and annotations to HDF5 3 | """ 4 | 5 | from collections import OrderedDict 6 | import os 7 | import struct 8 | import abc 9 | import numpy as np 10 | from scipy import misc 11 | import imageio 12 | import scipy.io as sio 13 | import h5py 14 | import cv2 15 | from imgnormalization import ImgNormalization 16 | 17 | 18 | """ 19 | superclass: Dataset 20 | sublasses: one for each dataset 21 | 22 | functions:: 23 | 1. save to hdf5(superclass) 24 | 2. read from files and folders and load to numpys (return img)(subclass) 25 | 3. normalize img and joints in both uvd and xyz(subclass) (returns everything that will be saved e.g. depth,com,joints) 26 | 4. script tha combines everything and finally saves in hdf5(subclass) 27 | """ 28 | #maybe move here xyz_to_uvd blablabla 29 | #in the subclasses you will put the configurations of the datasets (e.g. number of joints, which joints, focal legths etc) 30 | 31 | 32 | class Dataset(object): 33 | 34 | __metaclass__ = abc.ABCMeta 35 | 36 | def __init__(self, fx, fy, px, py, joints_num, groups_list): 37 | 38 | self.fx = fx 39 | self.fy = fy 40 | self.px = px 41 | self.py = py 42 | self.joints_num = joints_num 43 | self.groups_list = groups_list 44 | self.dataset_size = self._get_dataset_size() 45 | self._in = ImgNormalization(self.fx, self.fy, self.px, self.py, 250.) 46 | 47 | @abc.abstractmethod 48 | def _get_dataset_size(self): 49 | """ 50 | Abstract class for computing the dataset size. 51 | Different implementation in each dataset 52 | """ 53 | 54 | def initialize_hdf5(self, f): 55 | """ 56 | Initializes the dataset structure in HDF5 format 57 | 58 | Keyword arguments: 59 | f -- HDF5 file(already open) 60 | 61 | Return: 62 | dset -- object for accessing dataset attributes 63 | """ 64 | grp = {} 65 | dset = {} 66 | for g in self.groups_list: 67 | grp[g] = f.create_group(g) 68 | dset[g] = {} 69 | 70 | for group in grp.keys(): 71 | dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32) 72 | dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32) 73 | dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32) 74 | dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 75 | dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 76 | dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 77 | dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 78 | dset[group]["path"] = grp[group].create_dataset("path", (self.dataset_size[group],), dtype = "S72") 79 | 80 | return dset 81 | @staticmethod 82 | def save_hdf5(dset, group, index, depth_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M, path): 83 | 84 | dset[group]["depth_normalized"][index] = depth_normalized 85 | dset[group]["com3D"][index] = com3D 86 | dset[group]["T"][index] = M 87 | dset[group]["joints"][index] = joints 88 | dset[group]["joints_normalized"][index] = joints_normalized 89 | dset[group]["joints3D"][index] = joints3D 90 | dset[group]["joints3D_normalized"][index] = joints3D_normalized 91 | dset[group]["path"][index] = path 92 | 93 | def transfrom_joints(self, joints, M): 94 | joints_normalized = np.zeros(joints.shape) 95 | 96 | for joint in range(joints.shape[1]): 97 | t = self._in.transform_point_2D( joints[:,joint], M) 98 | joints_normalized[0,joint] = t[0] 99 | joints_normalized[1,joint] = t[1] 100 | joints_normalized[2,joint] = joints[2,joint] 101 | 102 | return joints_normalized 103 | 104 | class MSRA_Dataset(Dataset): 105 | 106 | def __init__(self, path, save_dir, group_subjects): 107 | 108 | if not (os.path.exists(path)): 109 | raise OSError("Directory doesn't exist") 110 | self.path = path 111 | self.save_dir = save_dir 112 | if not isinstance(group_subjects, bool): 113 | raise TypeError('group_subjects should be a boolean') 114 | self.group_subjects = group_subjects 115 | if self.group_subjects: 116 | groups_list = ['P0', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8'] 117 | else: 118 | groups_list = ['train'] 119 | super(MSRA_Dataset, self).__init__(241.42, 241.42, 160., 120., 21, groups_list) 120 | 121 | def load_image(self, img_dir): 122 | """ 123 | Loads depth image from binary file. Binary file contains only the bounding box 124 | of hand as well as its coordinates(img_width, img_height, left, top, right, bottom). 125 | 126 | Keyword arguments: 127 | img_dir -- the directory of the binary file containing the depth image 128 | 129 | Return: 130 | depth -- the depth image 131 | """ 132 | with open(img_dir,'rb') as f: 133 | 134 | bbox_bin = f.read(24) #read first 6 uint32, unit32 = 4bytes, so 4*6 = 24 bytes 135 | bbox = struct.unpack('IIIIII',bbox_bin) 136 | 137 | f.seek(24) #move to the position of the 7th digit 138 | img_bin = f.read((bbox[4] - bbox[2])*(bbox[5] - bbox[3])*4) 139 | img = struct.unpack('f'*(bbox[4] - bbox[2])*(bbox[5] - bbox[3]), img_bin) 140 | img = np.array(img) 141 | img = np.reshape(img, (bbox[5] - bbox[3], bbox[4] - bbox[2])) 142 | depth = np.zeros((240,320)) # create an image filled with background 143 | depth[bbox[3]:bbox[5],bbox[2]:bbox[4]] = img 144 | depth_copy = depth.copy() 145 | depth[depth==0.] = 2000. 146 | return depth, depth_copy 147 | 148 | def _get_dataset_size(self): 149 | 150 | if self.group_subjects: 151 | dataset_size = OrderedDict() 152 | for g in self.groups_list: 153 | dataset_size[g] = 0 154 | for path, dirs, files in os.walk(self.path): 155 | if (not dirs): 156 | joints_dir = os.path.join(path, 'joint.txt') 157 | group = os.path.basename(os.path.dirname(os.path.dirname(joints_dir))) 158 | with open(joints_dir, 'r') as f: 159 | num = f.readline() 160 | num = num.rstrip() 161 | num = int(num) 162 | dataset_size[group]+=num 163 | else: 164 | dataset_size = OrderedDict() 165 | for g in self.groups_list: 166 | dataset_size[g] = 0 167 | for path, dirs, files in os.walk(self.path): 168 | if (not dirs): 169 | joints_dir = os.path.join(path, 'joint.txt') 170 | with open(joints_dir, 'r') as f: 171 | num = f.readline() 172 | num = num.rstrip() 173 | num = int(num) 174 | dataset_size['train']+=num 175 | return dataset_size 176 | 177 | def initialize_hdf5(self, f): 178 | """ 179 | Initializes the dataset structure in HDF5 format 180 | 181 | Keyword arguments: 182 | f -- HDF5 file(already open) 183 | 184 | Return: 185 | dset -- object for accessing dataset attributes 186 | """ 187 | grp = {} 188 | dset = {} 189 | for g in self.groups_list: 190 | grp[g] = f.create_group(g) 191 | dset[g] = {} 192 | 193 | for group in grp.keys(): 194 | dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32) 195 | dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32) 196 | dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32) 197 | dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 198 | dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 199 | dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 200 | dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 201 | dset[group]["path"] = grp[group].create_dataset("path", (self.dataset_size[group],), dtype = "S72") 202 | dset[group]["subject"] = grp[group].create_dataset("subject", (self.dataset_size[group],), dtype = "S2") 203 | return dset 204 | 205 | @staticmethod 206 | def save_hdf5(dset, group, index, depth_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M, path, subject): 207 | 208 | dset[group]["depth_normalized"][index] = depth_normalized 209 | dset[group]["com3D"][index] = com3D 210 | dset[group]["T"][index] = M 211 | dset[group]["joints"][index] = joints 212 | dset[group]["joints_normalized"][index] = joints_normalized 213 | dset[group]["joints3D"][index] = joints3D 214 | dset[group]["joints3D_normalized"][index] = joints3D_normalized 215 | dset[group]["path"][index] = path 216 | dset[group]["subject"][index] = subject 217 | 218 | def convert_to_hdf5(self): 219 | """ 220 | Walks into directories, reads depth images and joints 221 | and after centering and normalizing both images and joints, saving them in hdf5. 222 | It also save in hdf5 other useful information, i.e center of mass of hand in 3D, 223 | the transformation for joints in UVD. Joints are saved in both UVD and XYZ 224 | (the initial and the normalized versions). 225 | 226 | Keyword arguments: 227 | -- 228 | Return: 229 | -- 230 | """ 231 | if (not os.path.exists(self.save_dir)): 232 | os.makedirs(self.save_dir) 233 | 234 | f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w') 235 | if self.group_subjects: 236 | dset = super(MSRA_Dataset, self).initialize_hdf5(f) 237 | else: 238 | dset = self.initialize_hdf5(f) 239 | index = OrderedDict() 240 | for g in self.groups_list: 241 | index[g] = 0 242 | for path, dirs, files in os.walk(self.path): 243 | if (not dirs): 244 | bins = [f for f in files if f.split('.')[1] == 'bin'] 245 | bins = sorted(bins, key = lambda fname: fname.split('_')[0]) 246 | joints_dir = os.path.join(path, 'joint.txt') 247 | joints_list=[] 248 | with open(joints_dir,'r') as f: 249 | for joints_txt in f: 250 | joints = joints_txt.split(' ') 251 | joints = [float(j.rstrip()) for j in joints] 252 | joints_list.append(joints) 253 | joints3D_array = np.array(joints_list[1:len(joints_list)]) 254 | for i, bin in enumerate(bins): 255 | bin_dir = os.path.join(path, bin) 256 | depth, depth_copy = self.load_image(bin_dir) 257 | com = self._in.calculate_com(depth_copy) 258 | com3D = self._in.uvd_to_xyz( com ) 259 | depth_crop_scaled, M = self._in.crop_scale_depth(depth, com) 260 | joints3D = np.reshape(joints3D_array[i], (self.joints_num, 3)) 261 | joints3D = np.swapaxes(joints3D, 0, 1) 262 | joints3D[2]*=-1 263 | joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D, depth_crop_scaled, com3D) 264 | joints = self._in.xyz_to_uvd(joints3D) 265 | joints_normalized = self.transfrom_joints(joints, M) 266 | 267 | # Reshape to 3*joints_num 268 | joints_res = np.swapaxes(joints, 0, 1) 269 | joints_res = np.reshape(joints_res, (3*self.joints_num,)) 270 | 271 | joints3D_res = np.swapaxes(joints3D, 0, 1) 272 | joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,)) 273 | 274 | joints_norm_res = np.swapaxes(joints_normalized, 0, 1) 275 | joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,)) 276 | 277 | joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1) 278 | joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,)) 279 | 280 | group = os.path.basename(os.path.dirname(path)) 281 | dpt = np.reshape(depth_normalized, (1, 128, 128)) 282 | if self.group_subjects: 283 | super(MSRA_Dataset, self).save_hdf5(dset, group, index[group], dpt.astype(np.float32), com3D.astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), bin_dir) 284 | index[group]+=1 285 | else: 286 | self.save_hdf5(dset, 'train', index['train'], dpt.astype(np.float32), com3D.astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), bin_dir, group) 287 | index['train']+=1 288 | f.close() 289 | 290 | 291 | class ICVL_Dataset(Dataset): 292 | #TODO: Add function for selecting only the original images 293 | def __init__(self, path, save_dir): 294 | 295 | if not (os.path.exists(path)): 296 | raise OSError("Directory doesn't exist") 297 | self.path = path 298 | self.save_dir = save_dir 299 | groups_list = ['train', 'test1', 'test2'] 300 | super(ICVL_Dataset, self).__init__(241.42, 241.42, 160., 120., 16, groups_list) 301 | 302 | def _get_dataset_size(self): 303 | 304 | dataset_size = OrderedDict() 305 | for g in self.groups_list: 306 | dataset_size[g] = 0 307 | subdirs = ['Training/labels.txt', 'Testing/test_seq_1.txt', 'Testing/test_seq_2.txt'] 308 | for grp, dir_ in zip(dataset_size, subdirs): 309 | labels_dir = os.path.join(self.path, dir_) 310 | with open(labels_dir, 'r') as f: 311 | i=0 312 | for line in f: 313 | line_split = line.split(' ', 1) 314 | if not os.path.exists(os.path.join(os.path.join(os.path.join(self.path, os.path.dirname(dir_)),'Depth'), line_split[0])): 315 | continue 316 | i+=1 317 | dataset_size[grp] = i 318 | 319 | return dataset_size 320 | 321 | def convert_to_hdf5(self): 322 | 323 | if (not os.path.exists(self.save_dir)): 324 | os.makedirs(self.save_dir) 325 | 326 | f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w') 327 | dset = self.initialize_hdf5(f) 328 | subdirs = ['Training/labels.txt', 'Testing/test_seq_1.txt', 'Testing/test_seq_2.txt'] 329 | for group, dir_ in zip(self.groups_list, subdirs): 330 | depth_dir = os.path.join(self.path, os.path.join(os.path.dirname(dir_), 'Depth')) 331 | labels_dir = os.path.join(self.path, dir_) 332 | 333 | with open(labels_dir, 'r') as f: 334 | index=0 335 | for line in f: 336 | line_split = line.split(' ', 1) 337 | img_dir = os.path.join(depth_dir, line_split[0]) 338 | if not os.path.exists(img_dir): 339 | continue 340 | img = misc.imread(img_dir) 341 | img = img.astype(np.float32) 342 | joints = line_split[1].rstrip() 343 | joints = joints.split(' ') 344 | joints = np.asarray(joints, dtype = np.float32) 345 | joints = np.reshape(joints, (self.joints_num, 3)) 346 | joints = np.swapaxes(joints,0,1) 347 | joints3D = self._in.uvd_to_xyz(joints) 348 | depth, M = self._in.crop_scale_depth(img, joints[:,0]) 349 | joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D, depth, joints3D[:,0]) 350 | joints_normalized = self.transfrom_joints(joints, M) 351 | dpt = np.reshape(depth_normalized, (1, 128, 128)) 352 | # Reshape to 3*joints_num 353 | joints_res = np.swapaxes(joints, 0, 1) 354 | joints_res = np.reshape(joints_res, (3*self.joints_num,)) 355 | 356 | joints3D_res = np.swapaxes(joints3D, 0, 1) 357 | joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,)) 358 | 359 | joints_norm_res = np.swapaxes(joints_normalized, 0, 1) 360 | joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,)) 361 | 362 | joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1) 363 | joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,)) 364 | 365 | self.save_hdf5(dset, group, index, dpt.astype(np.float32), joints3D[:,0].astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32), img_dir) 366 | index+=1 367 | f.close() 368 | 369 | class NYU_Dataset(Dataset): 370 | 371 | def __init__(self, path, save_dir): 372 | if not (os.path.exists(path)): 373 | raise OSError("Directory doesn't exist") 374 | self.path = path 375 | self.save_dir = save_dir 376 | groups_list = ['train', 'test'] 377 | super(NYU_Dataset, self).__init__(588.036865, 587.075073, 320., 240., 14, groups_list) 378 | self.selected_joints = [32, 3, 0, 9, 6, 15, 12, 21, 18, 27, 25, 24, 30, 31] 379 | self._in = ImgNormalization(self.fx, self.fy, self.px, self.py, 300.) 380 | self._in1 = ImgNormalization(self.fx, self.fy, self.px, self.py, 300.*0.87) 381 | self.subject_change = 2440 382 | 383 | def _get_dataset_size(self): 384 | 385 | dataset_size = OrderedDict() 386 | for g in self.groups_list: 387 | dataset_size[g] = 0 388 | for grp in dataset_size: 389 | dir_ = '{0:s}/{1:s}/{2:s}'.format(self.path, grp, 'joint_data.mat') 390 | joint_data = sio.loadmat(dir_) 391 | joints = joint_data['joint_uvd'][0] 392 | dataset_size[grp] = joints.shape[0] 393 | return dataset_size 394 | 395 | def initialize_hdf5(self, f): 396 | """ 397 | Initializes the dataset structure in HDF5 format 398 | 399 | Keyword arguments: 400 | f -- HDF5 file(already open) 401 | 402 | Return: 403 | dset -- object for accessing dataset attributes 404 | """ 405 | grp = {} 406 | dset = {} 407 | for g in self.groups_list: 408 | grp[g] = f.create_group(g) 409 | dset[g] = {} 410 | 411 | for group in grp.keys(): 412 | dset[group]["depth_normalized"] = grp[group].create_dataset("depth_normalized", (self.dataset_size[group], 1, 128, 128), dtype = np.float32) 413 | dset[group]["rgb_normalized"] = grp[group].create_dataset("rgb_normalized", (self.dataset_size[group], 4, 128, 128), dtype = np.float32) 414 | dset[group]["com3D"] = grp[group].create_dataset("com3D", (self.dataset_size[group], 3), dtype = np.float32) 415 | dset[group]["T"] = grp[group].create_dataset("T", (self.dataset_size[group], 3, 3), dtype = np.float32) 416 | dset[group]["joints"] = grp[group].create_dataset("joints", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 417 | dset[group]["joints_normalized"] = grp[group].create_dataset("joints_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 418 | dset[group]["joints3D"] = grp[group].create_dataset("joints3D", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 419 | dset[group]["joints3D_normalized"] = grp[group].create_dataset("joints3D_normalized", (self.dataset_size[group], 3*self.joints_num), dtype = np.float32) 420 | 421 | return dset 422 | 423 | @staticmethod 424 | def save_hdf5(dset, group, index, depth_normalized, rgb_normalized, com3D, joints, joints_normalized, joints3D, joints3D_normalized, M): 425 | 426 | dset[group]["depth_normalized"][index] = depth_normalized 427 | dset[group]["rgb_normalized"][index] = rgb_normalized 428 | dset[group]["com3D"][index] = com3D 429 | dset[group]["T"][index] = M 430 | dset[group]["joints"][index] = joints 431 | dset[group]["joints_normalized"][index] = joints_normalized 432 | dset[group]["joints3D"][index] = joints3D 433 | dset[group]["joints3D_normalized"][index] = joints3D_normalized 434 | 435 | def load_image(self, img_dir): 436 | """ 437 | Loads depth image from binary file. Binary file contains only the bounding box 438 | of hand as well as its coordinates(img_width, img_height, left, top, right, bottom). 439 | 440 | Keyword arguments: 441 | img_dir -- the directory of the binary file containing the depth image 442 | 443 | Return: 444 | depth -- the depth image 445 | """ 446 | 447 | img = imageio.imread(img_dir) 448 | _, g, b = np.split(img, 3, axis=2) 449 | g = np.squeeze(g) 450 | b = np.squeeze(b) 451 | g = g.astype(np.int32) 452 | b = b.astype(np.int32) 453 | depth = np.bitwise_or(np.left_shift(g,8), b) 454 | depth = depth.astype(np.float32) 455 | 456 | return depth 457 | 458 | def convert_to_hdf5(self): 459 | 460 | 461 | if (not os.path.exists(self.save_dir)): 462 | os.makedirs(self.save_dir) 463 | 464 | f = h5py.File(os.path.join(self.save_dir, self.__class__.__name__.split('_')[0]+'.hdf5'), 'w') 465 | dset = self.initialize_hdf5(f) 466 | 467 | for group in self.groups_list: 468 | 469 | labels_dir = '{0:s}/{1:s}/joint_data.mat'.format(self.path, group) 470 | joint_data = sio.loadmat(labels_dir) 471 | joints = joint_data['joint_uvd'][0] 472 | joints3D = joint_data['joint_xyz'][0] 473 | joints = joints[:, self.selected_joints, :] 474 | joints3D = joints3D[:, self.selected_joints, :] 475 | joints = np.swapaxes(joints, 1, 2) 476 | joints3D = np.swapaxes(joints3D, 1, 2) 477 | 478 | for index in range(joints.shape[0]): 479 | depth_dir = '{0:s}/{1:s}/depth_1_{2:07d}.png'.format(self.path, group, index+1) 480 | rgb_dir = '{0:s}/{1:s}/rgb_1_{2:07d}.png'.format(self.path, group, index+1) 481 | depth = self.load_image(depth_dir) 482 | rgb = cv2.imread(rgb_dir) 483 | rgb = rgb[:,:,::-1] 484 | if group == 'test' and index >= self.subject_change: 485 | depth, M = self._in1.crop_scale_depth(depth, joints[index, :, 0]) 486 | joints3D_normalized, depth_normalized = self._in1.joints3D_depth_normalization(joints3D[index, :, :], depth, joints3D[index, :, 0]) 487 | rgb, msk = self._in1.crop_scale_rgb(rgb, depth_normalized, joints[index, :, 0]) 488 | rgb = np.rollaxis(rgb, 2) 489 | rgb = rgb.astype(np.float32) 490 | msk = msk.astype(np.float32) 491 | rgb/=255. 492 | msk = np.reshape(msk, (1, 128, 128)) 493 | rgb = np.vstack((rgb, msk)) 494 | else: 495 | depth, M = self._in.crop_scale_depth(depth, joints[index, :, 0]) 496 | joints3D_normalized, depth_normalized = self._in.joints3D_depth_normalization(joints3D[index, :, :], depth, joints3D[index, :, 0]) 497 | rgb, msk = self._in.crop_scale_rgb(rgb, depth_normalized, joints[index, :, 0]) 498 | rgb = np.rollaxis(rgb, 2) 499 | rgb = rgb.astype(np.float32) 500 | msk = msk.astype(np.float32) 501 | rgb/=255. 502 | msk = np.reshape(msk, (1, 128, 128)) 503 | rgb = np.vstack((rgb, msk)) 504 | joints_normalized = self.transfrom_joints(joints[index, :, :], M) 505 | dpt = np.reshape(depth_normalized, (1, 128, 128)) 506 | # Reshape to 3*joints_num 507 | joints_res = np.swapaxes(joints[index, :, :], 0, 1) 508 | joints_res = np.reshape(joints_res, (3*self.joints_num,)) 509 | 510 | joints3D_res = np.swapaxes(joints3D[index, :, :], 0, 1) 511 | joints3D_res = np.reshape(joints3D_res, (3*self.joints_num,)) 512 | 513 | joints_norm_res = np.swapaxes(joints_normalized, 0, 1) 514 | joints_norm_res = np.reshape(joints_norm_res, (3*self.joints_num,)) 515 | 516 | joints3D_norm_res = np.swapaxes(joints3D_normalized, 0, 1) 517 | joints3D_norm_res = np.reshape(joints3D_norm_res, (3*self.joints_num,)) 518 | 519 | self.save_hdf5(dset, group, index, dpt.astype(np.float32), rgb.astype(np.float32), joints3D[index, :, 0].astype(np.float32), joints_res.astype(np.float32), joints_norm_res.astype(np.float32), joints3D_res.astype(np.float32), joints3D_norm_res.astype(np.float32), M.astype(np.float32)) 520 | 521 | f.close() 522 | 523 | @staticmethod 524 | def compute_mean_dataset(dataset_dir): 525 | nyu_dir = os.path.join(dataset_dir, 'NYU.hdf5') 526 | if not os.path.exists(nyu_dir): 527 | raise IOError('{0:s} could not be found. Please enter a valid hdf5 file for NYU dataset.'.format(nyu_dir)) 528 | with h5py.File(nyu_dir, 'r') as f: 529 | mean = np.zeros((3,)) 530 | std = np.zeros((3,)) 531 | N = 0 532 | # Compute dataset mean 533 | for i in xrange(f["train/rgb_normalized"].shape[0]): 534 | r, g, b, m = f["train/rgb_normalized"][i] 535 | m=m.astype(np.int) 536 | m=np.bitwise_not(m.astype(np.bool)) 537 | mean[0] += np.sum(r[m]) 538 | mean[1] += np.sum(g[m]) 539 | mean[2] += np.sum(b[m]) 540 | N += np.sum(m.astype(np.int)) 541 | 542 | mean/=N 543 | # Compute dataset standard deviation 544 | for i in xrange(f["train/rgb_normalized"].shape[0]): 545 | r, g, b, m = f["train/rgb_normalized"][i] 546 | m=m.astype(np.int) 547 | m=np.bitwise_not(m.astype(np.bool)) 548 | std[0] += np.sum(np.square(r[m]-mean[0])) 549 | std[1] += np.sum(np.square(g[m]-mean[1])) 550 | std[2] += np.sum(np.square(b[m]-mean[2])) 551 | 552 | std/=N-1 553 | 554 | np.savez(os.path.join(dataset_dir, 'mean_std.npz'), mean, std) 555 | 556 | @staticmethod 557 | def normalize_dataset(dataset_dir): 558 | mean_std_hand_dir = os.path.join(dataset_dir, 'mean_std_hand.npz') 559 | mean_std_bg_dir = os.path.join(dataset_dir, 'mean_std_bg.npz') 560 | if not (os.path.exists(mean_std_hand_dir)): 561 | raise IOError('{0:s} could not be found. Please enter a valid file with the mean and the standar deviation of the dataset.'.format(mean_std_hand_dir)) 562 | if not (os.path.exists(mean_std_bg_dir)): 563 | raise IOError('{0:s} could not be found. Please enter a valid file with the mean and the standar deviation of the dataset.'.format(mean_std_bg_dir)) 564 | mean_std_hand = np.load(os.path.join(mean_std_hand_dir)) 565 | mean_std_bg = np.load(os.path.join(mean_std_bg_dir)) 566 | mean_hand = mean_std_hand['arr_0'] 567 | std_hand = mean_std_hand['arr_1'] 568 | mean_bg = mean_std_bg['arr_0'] 569 | std_bg = mean_std_bg['arr_1'] 570 | nyu_dir = os.path.join(dataset_dir, 'NYU.hdf5') 571 | if not os.path.exists(nyu_dir): 572 | raise IOError('{0:s} could not be found. Please enter a valid hdf5 file for NYU dataset.'.format(nyu_dir)) 573 | with h5py.File(nyu_dir, 'r+') as f: 574 | for i in xrange(f["train/rgb_normalized"].shape[0]): 575 | rgb = np.rollaxis(f["train/rgb_normalized"][i], 0, start=3) 576 | rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] = (rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] - mean_hand.astype(np.float32)) / np.sqrt(std_hand.astype(np.float32)) 577 | rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] = (rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] - mean_bg.astype(np.float32)) / np.sqrt(std_bg.astype(np.float32)) 578 | f["train/rgb_normalized"][i] = np.rollaxis(rgb, 2) 579 | for i in xrange(f["test/rgb_normalized"].shape[0]): 580 | rgb = np.rollaxis(f["test/rgb_normalized"][i], 0, start=3) 581 | rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] = (rgb[rgb[:,:,3].astype(np.int).astype(np.bool), 0:3] - mean_hand.astype(np.float32)) / np.sqrt(std_hand.astype(np.float32)) 582 | rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] = (rgb[np.bitwise_not(rgb[:,:,3].astype(np.int).astype(np.bool)), 0:3] - mean_bg.astype(np.float32)) / np.sqrt(std_bg.astype(np.float32)) 583 | f["test/rgb_normalized"][i] = np.rollaxis(rgb, 2) -------------------------------------------------------------------------------- /trainingtesting.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements the training and testing procedures of the ConvNet. They 3 | are implemented in two different classes: Training and Test class. Each class 4 | implements all the neccessary tools that are needed, such as batch generators, 5 | theano functions compilation etc. A base class, TrainingTesting provides some 6 | general tools that the other two classes are using. 7 | """ 8 | import time 9 | import os 10 | import cPickle as pickle 11 | import h5py 12 | import numpy as np 13 | import theano.tensor as T 14 | import theano 15 | import lasagne 16 | from networks import ConvNet 17 | from saveloadweights import SaveWeights, LoadWeights 18 | from earlystopping import EarlyStopping 19 | from batchgenerators import BatchGenerator 20 | from splitdatasets import load_dsets_trainval 21 | from randomsearch import sample_hyperparams, save_hyperparams 22 | 23 | 24 | class TrainingTesting(object): 25 | SIMPLE = 'simple' 26 | AUTOENCODING = 'autoencoding' 27 | CONV_FUSING = 'conv_fusing' 28 | DENSE_FUSING = 'dense_fusing' 29 | SCORE_FUSING = 'score_fusing' 30 | INPUT_FUSING = 'input_fusing' 31 | MSRA = 'MSRA' 32 | NYU = 'NYU' 33 | ICVL = 'ICVL' 34 | 35 | def __init__(self, dataset_dir, net_specs_dict, model_hp_dict, num_joints, dataset, 36 | group, network_type, input_channels=None, fusion_level=None, 37 | fusion_type=None): 38 | self.convnet = ConvNet(net_specs_dict, model_hp_dict, num_joints) 39 | self._datasets_dir = dataset_dir 40 | if dataset not in [self.ICVL, self.MSRA, self.NYU]: 41 | raise ValueError("dataset can take one of the following values:" 42 | + " 'MSRA', 'NYU', 'ICVL'") 43 | self._network_type = network_type 44 | self._dataset = dataset 45 | self._group = group 46 | self._input_channels = input_channels 47 | self._fusion_level = fusion_level 48 | self._fusion_type = fusion_type 49 | 50 | 51 | class Training(TrainingTesting): 52 | """ 53 | This class implements the training procedure of the convnet 54 | """ 55 | 56 | def __init__(self, dataset_dir, num_joints, dataset, group, network_type, num_epochs, 57 | patience, net_specs_dict, model_hp_dict=None, 58 | opt_hp_dict=None, validate=True, input_channels=None, 59 | fusion_level=None, fusion_type=None, weights_dir=None): 60 | if model_hp_dict is None and opt_hp_dict is None: 61 | opt_hp_dict, model_hp_dict = sample_hyperparams([0.001, 0.1], [ 62 | 0.5, 1], [ 63 | 0.0, 0.1]) 64 | self._save_settings = True 65 | else: 66 | self._save_settings = False 67 | super(Training, self).__init__( 68 | dataset_dir, net_specs_dict, model_hp_dict, num_joints, dataset, group, 69 | network_type, input_channels=input_channels, 70 | fusion_level=fusion_level, fusion_type=fusion_type) 71 | if network_type not in [self.SIMPLE, self.AUTOENCODING, self.CONV_FUSING, 72 | self.DENSE_FUSING, self.SCORE_FUSING, 73 | self.INPUT_FUSING]: 74 | raise ValueError("Network types can take one of the following" 75 | + " values: 'simple', 'autoencoding', 'conv_fusing'," 76 | + " 'dense_fusing', 'score_fusing'," 77 | + " input_fusing") 78 | self._model_hp_dict = model_hp_dict 79 | self._opt_hp_dict = opt_hp_dict 80 | self._num_epochs = num_epochs 81 | self._patience = patience 82 | if not isinstance(validate, bool): 83 | raise TypeError('validate should be boolean') 84 | self._validate = validate 85 | self._weights_dir = weights_dir 86 | return 87 | 88 | def _compile_functions(self): 89 | if self._network_type == self.SIMPLE: 90 | input_var = T.tensor4('inputs') 91 | else: 92 | input_var1 = T.tensor4('inputs_rgb') 93 | input_var2 = T.tensor4('inputs_depth') 94 | target_var = T.matrix('targets') 95 | # bottleneck_W = np.load('nyu_princ_comp_pose.npz') 96 | # bottleneck_W = bottleneck_W['arr_0'] 97 | lr = theano.shared(np.array(self._opt_hp_dict['lr'], 98 | dtype=theano.config.floatX)) 99 | lr_decay = np.array(0.1, dtype=theano.config.floatX) 100 | mom = theano.shared(np.array(self._opt_hp_dict['mom'], 101 | dtype=theano.config.floatX)) 102 | print 'Building the ConvNet...\n' 103 | if self._network_type == self.SIMPLE: 104 | net = self.convnet.simple_convnet(self._input_channels, 105 | input_var=input_var) 106 | elif self._network_type == self.CONV_FUSING: 107 | net = self.convnet.fused_convnets(self._fusion_level, 108 | self._fusion_type, 109 | input_var1=input_var1, 110 | input_var2=input_var2, 111 | weights_dir=self._weights_dir) 112 | elif self._network_type == self.INPUT_FUSING: 113 | net = self.convnet.input_fused_convnets(self._fusion_type, 114 | input_var1=input_var1, 115 | input_var2=input_var2) 116 | elif self._network_type == self.DENSE_FUSING: 117 | net = self.convnet.dense_fused_convnets( 118 | self._fusion_level, self._fusion_type, 119 | input_var1=input_var1, input_var2=input_var2, 120 | weights_dir=self._weights_dir) 121 | elif self._network_type == self.SCORE_FUSING: 122 | net = self.convnet.score_fused_convnets( 123 | self._fusion_type, input_var1=input_var1, 124 | input_var2=input_var2, 125 | weights_dir=self._weights_dir) 126 | print 'Compiling theano functions...\n' 127 | train_pred = lasagne.layers.get_output(net['output'], 128 | deterministic=False) 129 | val_pred = lasagne.layers.get_output(net['output'], deterministic=True) 130 | train_loss = lasagne.objectives.squared_error(train_pred, target_var) 131 | train_loss = 1 / 2.0 * T.mean(T.sum(train_loss, axis=1)) 132 | val_loss = lasagne.objectives.squared_error(val_pred, target_var) 133 | val_loss = 1 / 2.0 * T.mean(T.sum(val_loss, axis=1)) 134 | params = lasagne.layers.get_all_params(net['output'], trainable=True) 135 | updates = lasagne.updates.nesterov_momentum(train_loss, params, 136 | learning_rate=lr, 137 | momentum=mom) 138 | if self._network_type == self.SIMPLE: 139 | fn_train = theano.function([input_var, target_var], [ 140 | train_loss], updates=updates) 141 | fn_val = theano.function([input_var, target_var], [val_loss]) 142 | else: 143 | fn_train = theano.function([input_var1, input_var2, target_var], [ 144 | train_loss], updates=updates) 145 | fn_val = theano.function([input_var1, input_var2, target_var], [ 146 | val_loss]) 147 | return (fn_train, fn_val, net, lr, lr_decay) 148 | 149 | def _training_loop(self, bg_train, bg_val, fn_train, fn_val, lr, 150 | lr_decay, sw=None, es=None): 151 | """ 152 | This function performs the training loop for the case of a simple 153 | convnet, where the parameters are updated through backprop and the 154 | training/validation losses are reported. 155 | 156 | Keyword arguments: 157 | 158 | minibatches_train -- batch generator for the training set 159 | minibatches_val -- batch generator for the validation set 160 | fn_train -- theano function that perform parameters updated and 161 | computes training loss 162 | fn_val -- theano function that computes validation loss 163 | lr -- learning rate(theano shared variable) 164 | lr_decay -- learning rate decay constant(we use constant decay policy) 165 | sw -- instance of SaveWeights class(default: None) 166 | 167 | """ 168 | training_information = {} 169 | train_loss_d = [] 170 | val_loss_d = [] 171 | epoch = 0 172 | if es is not None or sw is not None: 173 | time_back = 0 174 | while epoch < self._num_epochs: 175 | train_loss = 0 176 | train_batches = 0 177 | start_time = time.time() 178 | for batch in bg_train.generate_batches(self._input_channels): 179 | if self._network_type == self.SIMPLE: 180 | X_batch, y_batch = batch 181 | loss = fn_train(X_batch, y_batch) 182 | else: 183 | X_batch_rgb, X_batch_depth, y_batch = batch 184 | loss = fn_train(X_batch_rgb, X_batch_depth, y_batch) 185 | train_loss += loss[0] 186 | train_batches += 1 187 | 188 | train_loss /= train_batches 189 | train_loss_d.append(train_loss) 190 | val_loss = 0 191 | val_batches = 0 192 | for batch in bg_val.generate_batches(self._input_channels, 193 | batch_size=1): 194 | if self._network_type == self.SIMPLE: 195 | X_batch, y_batch = batch 196 | loss = fn_val(X_batch, y_batch) 197 | else: 198 | X_batch_rgb, X_batch_depth, y_batch = batch 199 | loss = fn_val(X_batch_rgb, X_batch_depth, y_batch) 200 | val_loss += loss[0] 201 | val_batches += 1 202 | 203 | val_loss /= val_batches 204 | val_loss_d.append(val_loss) 205 | print 'Epoch: {0:d}. Completion time:{1:.3f} '.format( 206 | epoch + 1, time.time() - start_time) 207 | print 'Train loss: {0:.5f}\t\tValidation loss:{1:.5f}\t\t\ 208 | Ratio(Val/Train): {2:.5f}'.format(train_loss, val_loss, 209 | val_loss / train_loss) 210 | print '--------------------------------------------------------'\ 211 | + '-----------------------------------' 212 | if sw is not None: 213 | stop, go_back = sw.early_stopping_with_lr_decay(val_loss, 214 | epoch, lr, 215 | time_back) 216 | if stop and not go_back or epoch == self._num_epochs - 1: 217 | sw.save_weights_numpy() 218 | break 219 | if stop and go_back: 220 | time_back += 1 221 | epoch = sw.best_epoch - 1 222 | elif es is not None: 223 | stop, go_back = es.early_stopping_with_lr_decay(val_loss, 224 | epoch, lr, 225 | time_back) 226 | if stop and not go_back or epoch == self._num_epochs - 1: 227 | break 228 | if stop and go_back: 229 | time_back += 1 230 | epoch = es.best_epoch - 1 231 | epoch += 1 232 | 233 | training_information['train_loss'] = train_loss_d 234 | training_information['val_loss'] = val_loss_d 235 | return training_information 236 | 237 | def train(self, save_model=False, save_loss=False, 238 | early_stopping=True, shuffle=False): 239 | """ 240 | This function performs the training of our ConvNets. It compiles the 241 | theano functions and performs parameters updates 242 | (by calling compile_functions), saves several useful 243 | information during training and stops using early stopping where also 244 | the model parameters are saved. All the basic components are described 245 | below as well as their respective modules/functions: 246 | 1) functions compilation: Training.compile_functions(module: 247 | trainingtesting). Here you can also find optimization details 248 | such as regularization term in the loss for autoencoder 249 | 2) load/save weights, early stopping: SaveWeights, 250 | LoadWeights(module: saveloadweights) 251 | 3) networks definitions: module: networks.py. Here you can find 252 | details related with network design choices as well as 253 | regularization layers(e.g. dropout) or other techniques such as 254 | tied weights in the autoencoder. 255 | """ 256 | dataset = os.path.join(self._datasets_dir, self._dataset) 257 | dataset += '.hdf5' 258 | dset = h5py.File(dataset, 'r') 259 | 260 | fn_train, fn_val, net, lr, lr_decay = self._compile_functions() 261 | 262 | if type(save_model) is not bool: 263 | raise TypeError('save_model should be boolean') 264 | if save_model: 265 | models_dir = './models' 266 | if not os.path.exists(models_dir): 267 | os.mkdir(models_dir) 268 | 269 | if self._network_type == self.SIMPLE: 270 | if self._input_channels == 1: 271 | input_type = 'depth' 272 | elif self._input_channels == 4: 273 | input_type = 'rgb' 274 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 275 | self._dataset, self._network_type, input_type, 276 | self.convnet._model_hp_dict['p']) 277 | sw = SaveWeights(os.path.join(models_dir, save_dir), net, 278 | self._patience, 'loss') 279 | elif self._network_type == self.CONV_FUSING: 280 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 281 | self._dataset, self._network_type, self._fusion_type, 282 | self._fusion_level, self.convnet._model_hp_dict['p']) 283 | sw = SaveWeights(os.path.join(models_dir, save_dir), net, 284 | self._patience, 'loss') 285 | elif self._network_type == self.DENSE_FUSING: 286 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 287 | self._dataset, self._network_type, self._fusion_type, 288 | self._fusion_level, self.convnet._model_hp_dict['p']) 289 | sw = SaveWeights(os.path.join(models_dir, save_dir), net, 290 | self._patience, 'loss') 291 | elif self._network_type == self.SCORE_FUSING: 292 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 293 | self._dataset, self._network_type, self._fusion_type, 294 | self.convnet._model_hp_dict['p']) 295 | sw = SaveWeights(os.path.join(models_dir, save_dir), net, 296 | self._patience, 'loss') 297 | elif self._network_type == self.INPUT_FUSING: 298 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 299 | self._dataset, self._network_type, self._fusion_type, 300 | self.convnet._model_hp_dict['p']) 301 | sw = SaveWeights(os.path.join(models_dir, save_dir), net, 302 | self._patience, 'loss') 303 | elif early_stopping: 304 | es = EarlyStopping(net, self._patience, 'loss') 305 | if self._validate: 306 | idx_train, idx_val = load_dsets_trainval( 307 | './train_test_splits/nyu_split.npz') 308 | bg_train = BatchGenerator(dset, self._dataset, self._group, 309 | iterable=idx_train, shuffle=shuffle) 310 | bg_val = BatchGenerator(dset, self._dataset, self._group, 311 | iterable=idx_val, shuffle=shuffle) 312 | else: 313 | bg_train = BatchGenerator(dset, self._dataset, self._group, 314 | shuffle=shuffle) 315 | print 'Training started...\n' 316 | if save_model: 317 | training_information = self._training_loop( 318 | bg_train, bg_val, fn_train, fn_val, lr, lr_decay, sw=sw) 319 | elif early_stopping: 320 | training_information = self._training_loop( 321 | bg_train, bg_val, fn_train, fn_val, lr, lr_decay, es=es) 322 | else: 323 | training_information = self._training_loop( 324 | bg_train, bg_val, fn_train, fn_val, lr, lr_decay) 325 | if self._save_settings: 326 | settings_dir = './settings' 327 | if not os.path.exists(settings_dir): 328 | os.mkdir(settings_dir) 329 | val_loss_array = np.array(training_information['val_loss']) 330 | best_loss = np.amin(val_loss_array) 331 | if self._network_type == self.SIMPLE: 332 | if self._input_channels == 1: 333 | input_type = 'depth' 334 | elif self._input_channels == 4: 335 | input_type = 'rgb' 336 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 337 | self._dataset, self._network_type, input_type, 338 | self.convnet._model_hp_dict['p']) 339 | save_hyperparams(os.path.join(settings_dir, save_dir), 340 | self._opt_hp_dict, self._model_hp_dict, 341 | best_loss) 342 | elif self._network_type == self.CONV_FUSING: 343 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 344 | self._dataset, self._network_type, self._fusion_type, 345 | self._fusion_level, self.convnet._model_hp_dict['p']) 346 | save_hyperparams(os.path.join(settings_dir, save_dir), 347 | self._opt_hp_dict, self._model_hp_dict, 348 | best_loss) 349 | elif self._network_type == self.DENSE_FUSING: 350 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 351 | self._dataset, self._network_type, self._fusion_type, 352 | self._fusion_level, self.convnet._model_hp_dict['p']) 353 | save_hyperparams(os.path.join(settings_dir, save_dir), 354 | self._opt_hp_dict, self._model_hp_dict, 355 | best_loss) 356 | elif self._network_type == self.SCORE_FUSING: 357 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 358 | self._dataset, self._network_type, self._fusion_type, 359 | self.convnet._model_hp_dict['p']) 360 | save_hyperparams(os.path.join(settings_dir, save_dir), 361 | self._opt_hp_dict, self._model_hp_dict, 362 | best_loss) 363 | elif self._network_type == self.INPUT_FUSING: 364 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 365 | self._dataset, self._network_type, self._fusion_type, 366 | self.convnet._model_hp_dict['p']) 367 | save_hyperparams(os.path.join(settings_dir, save_dir), 368 | self._opt_hp_dict, self._model_hp_dict, 369 | best_loss) 370 | if save_loss: 371 | train_val_loss_dir = './train_val_loss' 372 | if not os.path.exists(train_val_loss_dir): 373 | os.mkdir(train_val_loss_dir) 374 | if self._network_type == self.SIMPLE: 375 | if self._input_channels == 1: 376 | input_type = 'depth' 377 | elif self._input_channels == 4: 378 | input_type = 'rgb' 379 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 380 | self._dataset, self._network_type, input_type, 381 | self.convnet._model_hp_dict['p']) 382 | save_dir = os.path.join(train_val_loss_dir, save_dir) 383 | if not os.path.exists(save_dir): 384 | os.makedirs(save_dir) 385 | with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\ 386 | as f: 387 | pickle.dump(training_information, f, 388 | protocol=pickle.HIGHEST_PROTOCOL) 389 | elif self._network_type == self.CONV_FUSING: 390 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 391 | self._dataset, self._network_type, self._fusion_type, 392 | self._fusion_level, self.convnet._model_hp_dict['p']) 393 | save_dir = os.path.join(train_val_loss_dir, save_dir) 394 | if not os.path.exists(save_dir): 395 | os.makedirs(save_dir) 396 | with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\ 397 | as f: 398 | pickle.dump(training_information, f, 399 | protocol=pickle.HIGHEST_PROTOCOL) 400 | elif self._network_type == self.DENSE_FUSING: 401 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 402 | self._dataset, self._network_type, self._fusion_type, 403 | self._fusion_level, self.convnet._model_hp_dict['p']) 404 | save_dir = os.path.join(train_val_loss_dir, save_dir) 405 | if not os.path.exists(save_dir): 406 | os.makedirs(save_dir) 407 | with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\ 408 | as f: 409 | pickle.dump(training_information, f, 410 | protocol=pickle.HIGHEST_PROTOCOL) 411 | elif self._network_type == self.SCORE_FUSING: 412 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 413 | self._dataset, self._network_type, self._fusion_type, 414 | self.convnet._model_hp_dict['p']) 415 | save_dir = os.path.join(train_val_loss_dir, save_dir) 416 | if not os.path.exists(save_dir): 417 | os.makedirs(save_dir) 418 | with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\ 419 | as f: 420 | pickle.dump(training_information, f, 421 | protocol=pickle.HIGHEST_PROTOCOL) 422 | elif self._network_type == self.INPUT_FUSING: 423 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 424 | self._dataset, self._network_type, self._fusion_type, 425 | self.convnet._model_hp_dict['p']) 426 | save_dir = os.path.join(train_val_loss_dir, save_dir) 427 | if not os.path.exists(save_dir): 428 | os.makedirs(save_dir) 429 | with open(os.path.join(save_dir, 'train_val_loss.pkl'), 'wb')\ 430 | as f: 431 | pickle.dump(training_information, f, 432 | protocol=pickle.HIGHEST_PROTOCOL) 433 | return training_information 434 | 435 | 436 | class Testing(TrainingTesting): 437 | 438 | def __init__(self, net_specs_dict, model_hp_dict, num_joints, dataset, 439 | group, network_type, input_channels=None, fusion_level=None, 440 | fusion_type=None, score_fusion=None): 441 | super(Testing, self).__init__(net_specs_dict, model_hp_dict, 442 | num_joints, dataset, group, network_type, 443 | input_channels=input_channels, 444 | fusion_level=fusion_level, 445 | fusion_type=fusion_type) 446 | self._score_fusion = score_fusion 447 | 448 | def _compile_functions(self, weights_dir): 449 | if self._network_type == self.SIMPLE: 450 | input_var = T.tensor4('inputs') 451 | else: 452 | input_var1 = T.tensor4('inputs_rgb') 453 | input_var2 = T.tensor4('inputs_depth') 454 | # bottleneck_W = np.load('nyu_princ_comp_pose.npz') 455 | # bottleneck_W = bottleneck_W['arr_0'] 456 | print 'Building the ConvNet...\n' 457 | if self._network_type == self.SIMPLE: 458 | net = self.convnet.simple_convnet(self._input_channels, 459 | input_var=input_var) 460 | elif self._network_type == self.CONV_FUSING: 461 | net = self.convnet.fused_convnets(self._fusion_level, 462 | self._fusion_type, 463 | input_var1=input_var1, 464 | input_var2=input_var2) 465 | elif self._network_type == self.INPUT_FUSING: 466 | net = self.convnet.input_fused_convnets(self._fusion_type, 467 | input_var1=input_var1, 468 | input_var2=input_var2) 469 | elif self._network_type == self.DENSE_FUSING: 470 | net = self.convnet.dense_fused_convnets( 471 | self._fusion_level, self._fusion_type, 472 | input_var1=input_var1, input_var2=input_var2) 473 | elif self._network_type == self.SCORE_FUSING: 474 | net = self.convnet.score_fused_convnets( 475 | self._fusion_type, input_var1=input_var1, 476 | input_var2=input_var2) 477 | lw = LoadWeights(weights_dir, net) 478 | lw.load_weights_numpy() 479 | pred = lasagne.layers.get_output(net['output'], deterministic=True) 480 | if self._network_type == self.SIMPLE: 481 | fn_pred = theano.function([input_var], pred) 482 | else: 483 | fn_pred = theano.function([input_var1, input_var2], pred) 484 | return fn_pred 485 | 486 | def predict(self, weights_dir, save_preds=True): 487 | dataset = os.path.join(self._datasets_dir, self._dataset) 488 | dataset += '.hdf5' 489 | dset = h5py.File(dataset, 'r') 490 | fn_pred = self._compile_functions(weights_dir) 491 | bg_test = BatchGenerator(dset, self._dataset, self._group) 492 | predictions = [] 493 | for batch in bg_test.generate_batches(self._input_channels, 494 | batch_size=1): 495 | if self._network_type == self.SIMPLE: 496 | X_batch, y_batch = batch 497 | pred = fn_pred(X_batch) 498 | pred = np.squeeze(pred) 499 | else: 500 | X_batch_rgb, X_batch_depth, y_batch = batch 501 | pred = fn_pred(X_batch_rgb, X_batch_depth) 502 | pred = np.squeeze(pred) 503 | 504 | predictions.append(pred) 505 | 506 | predictions = np.array(predictions) 507 | if save_preds: 508 | predictions_dir = './predictions' 509 | if not os.path.exists(predictions_dir): 510 | os.mkdir(predictions_dir) 511 | if self._network_type == self.SIMPLE: 512 | if self._input_channels == 1: 513 | input_type = 'depth' 514 | elif self._input_channels == 4: 515 | input_type = 'rgb' 516 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 517 | self._dataset, self._network_type, input_type, 518 | self.convnet._model_hp_dict['p']) 519 | if not os.path.exists(os.path.join(predictions_dir, save_dir)): 520 | os.makedirs(os.path.join(predictions_dir, save_dir)) 521 | np.savez(os.path.join(predictions_dir, save_dir, 522 | 'predictions.npz'), predictions) 523 | elif self._network_type == self.CONV_FUSING: 524 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 525 | self._dataset, self._network_type, self._fusion_type, 526 | self._fusion_level, self.convnet._model_hp_dict['p']) 527 | if not os.path.exists(os.path.join(predictions_dir, save_dir)): 528 | os.makedirs(os.path.join(predictions_dir, save_dir)) 529 | np.savez(os.path.join(predictions_dir, save_dir, 530 | 'predictions.npz'), predictions) 531 | elif self._network_type == self.DENSE_FUSING: 532 | save_dir = '{0:s}/{1:s}/{2:s}/{3:d}/{4:f}'.format( 533 | self._dataset, self._network_type, self._fusion_type, 534 | self._fusion_level, self.convnet._model_hp_dict['p']) 535 | if not os.path.exists(os.path.join(predictions_dir, save_dir)): 536 | os.makedirs(os.path.join(predictions_dir, save_dir)) 537 | np.savez(os.path.join(predictions_dir, save_dir, 538 | 'predictions.npz'), predictions) 539 | elif self._network_type == self.SCORE_FUSING: 540 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 541 | self._dataset, self._network_type, self._fusion_type, 542 | self.convnet._model_hp_dict['p']) 543 | if not os.path.exists(os.path.join(predictions_dir, save_dir)): 544 | os.makedirs(os.path.join(predictions_dir, save_dir)) 545 | np.savez(os.path.join(predictions_dir, save_dir, 546 | 'predictions.npz'), predictions) 547 | elif self._network_type == self.INPUT_FUSING: 548 | save_dir = '{0:s}/{1:s}/{2:s}/{3:f}'.format( 549 | self._dataset, self._network_type, self._fusion_type, 550 | self.convnet._model_hp_dict['p']) 551 | if not os.path.exists(os.path.join(predictions_dir, save_dir)): 552 | os.makedirs(os.path.join(predictions_dir, save_dir)) 553 | np.savez(os.path.join(predictions_dir, save_dir, 554 | 'predictions.npz'), predictions) 555 | else: 556 | return predictions 557 | 558 | def extract_kernels(self, layer, weights_dir): 559 | 560 | # bottleneck_W = np.load('nyu_princ_comp_pose.npz') 561 | # bottleneck_W = bottleneck_W['arr_0'] 562 | print 'Building the ConvNet...\n' 563 | if self._network_type == self.SIMPLE: 564 | net = self.convnet.simple_convnet(self._input_channels) 565 | elif self._network_type == self.CONV_FUSING: 566 | net = self.convnet.fused_convnets(self._fusion_level, 567 | self._fusion_type) 568 | lw = LoadWeights(weights_dir, net) 569 | lw.load_weights_numpy() 570 | return net[layer].W 571 | --------------------------------------------------------------------------------