├── MIR_Flickr_Theano_Unlab.py ├── MIR_Flickr_Theano_lab.py ├── README ├── SupDeepDocNADE.py ├── gen_dataset_labeled.py ├── gen_dataset_unlabeled.py ├── licence ├── run_SupDeepDocNADE.py └── run_pretrain_DeepDocNADE.py /MIR_Flickr_Theano_Unlab.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, are 4 | # permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | # The views and conclusions contained in the software and documentation are those of the 24 | # authors and should not be interpreted as representing official policies, either expressed 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 26 | 27 | """ 28 | Module ``datasets.MIR_Flickr`` gives access to the MIR_Flickr dataset (labeled part) for Theano. 29 | 30 | """ 31 | import numpy as np 32 | import os 33 | import scipy.sparse as sp 34 | 35 | def LoadSparse(inputfile, verbose=False): 36 | """Loads a sparse matrix stored as npz file.""" 37 | npzfile = np.load(inputfile) 38 | mat = sp.csr_matrix((npzfile['data'], npzfile['indices'], 39 | npzfile['indptr']), 40 | shape=tuple(list(npzfile['shape']))) 41 | if verbose: 42 | 43 | print 'Loaded sparse matrix from %s of shape %s' % (inputfile, 44 | mat.shape.__str__()) 45 | return mat 46 | 47 | def load(dir_path,folder_ID, log_option='NoLog', spatial_split=[1,2,3]): 48 | """ 49 | 50 | """ 51 | 52 | dir_path = os.path.expanduser(dir_path) 53 | sizes_file = open(os.path.join(dir_path,'sizes.txt'),'r') 54 | unlabel_size = int(sizes_file.readline()) 55 | sizes_file.close() 56 | # lengths = [unlabel_size] 57 | meta_file = open(os.path.join(dir_path, 'meta.txt')) 58 | meta = {} 59 | for line in meta_file: 60 | meta_name, meta_value = line.rstrip().split(':') 61 | meta.update({meta_name:int(meta_value)}) 62 | 63 | spatial_split = np.asarray(spatial_split, np.float64) 64 | n_regions = int((spatial_split**2).sum()) 65 | meta['n_regions'] = n_regions 66 | 67 | unlabel_size = int(unlabel_size/meta['dataset_split']) 68 | file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_unlabelled.npz') 69 | annotations = LoadSparse(file_annotations, verbose = False) 70 | 71 | unlabel_str,length_str = 'unlabeled','length' 72 | unlabel_file = os.path.join(dir_path,unlabel_str+str(folder_ID)+'.npz') 73 | unlabel_meta = {length_str:unlabel_size} 74 | unlabel_meta.update(meta) 75 | unlabel_meta.update({'Folder_ID':folder_ID}) 76 | 77 | npzfile_train = np.load(unlabel_file) 78 | if log_option == 'NoLog': 79 | unlabel_hists = npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']] 80 | elif log_option == 'Log_Natural': 81 | unlabel_hists = np.round(np.log(npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']]+1.0)).astype(np.int32) 82 | elif log_option == 'Log_4': 83 | unlabel_hists = np.round(np.log(npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32) 84 | # unlabel_hists = npzfile_train['unlabeled_matrix_hists'] 85 | unlabel_global_features = npzfile_train['unlabeled_matrix_global_features'] 86 | unlabel_annos = annotations[(folder_ID-1)*unlabel_size:folder_ID*unlabel_size,:] 87 | 88 | 89 | return {'hists_visual':unlabel_hists, 'global_features':unlabel_global_features, 'hists_anno':unlabel_annos,'meta':unlabel_meta} 90 | 91 | def obtain(dir_path): 92 | """ 93 | Gives information about how to obtain this dataset (``dir_path`` is ignored). 94 | """ 95 | 96 | print 'Ask Yin Zheng (yzheng3xg@gmail.com) for the data.' 97 | 98 | -------------------------------------------------------------------------------- /MIR_Flickr_Theano_lab.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, are 4 | # permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | # The views and conclusions contained in the software and documentation are those of the 24 | # authors and should not be interpreted as representing official policies, either expressed 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 26 | 27 | """ 28 | Module ``datasets.MIR_Flickr`` gives access to the MIR_Flickr dataset (labeled part) for Theano. 29 | 30 | 31 | 32 | """ 33 | import numpy as np 34 | import os 35 | import scipy.sparse as sp 36 | 37 | def LoadSparse(inputfile, verbose=False): 38 | """Loads a sparse matrix stored as npz file.""" 39 | npzfile = np.load(inputfile) 40 | mat = sp.csr_matrix((npzfile['data'], npzfile['indices'], 41 | npzfile['indptr']), 42 | shape=tuple(list(npzfile['shape']))) 43 | if verbose: 44 | 45 | print 'Loaded sparse matrix from %s of shape %s' % (inputfile, 46 | mat.shape.__str__()) 47 | return mat 48 | 49 | def load(dir_path,folder_ID, log_option='NoLog', spatial_split=[1,2,3]): 50 | """ 51 | """ 52 | 53 | dir_path = os.path.expanduser(dir_path) 54 | sizes_file = open(os.path.join(dir_path,'sizes.txt'),'r') 55 | train_size,valid_size,test_size = int(sizes_file.readline()),int(sizes_file.readline()),int(sizes_file.readline()) 56 | sizes_file.close() 57 | lengths = [train_size,valid_size,test_size] 58 | meta_file = open(os.path.join(dir_path, 'meta.txt')) 59 | meta = {} 60 | for line in meta_file: 61 | meta_name, meta_value = line.rstrip().split(':') 62 | meta.update({meta_name:int(meta_value)}) 63 | 64 | spatial_split = np.asarray(spatial_split, np.float64) 65 | n_regions = int((spatial_split**2).sum()) 66 | meta['n_regions'] = n_regions 67 | file_train_indices = os.path.join(dir_path, 'splits', 'train_indices_'+str(folder_ID)+'.npy' ) 68 | file_valid_indices = os.path.join(dir_path, 'splits', 'valid_indices_'+str(folder_ID)+ '.npy' ) 69 | file_test_indices = os.path.join(dir_path, 'splits', 'test_indices_'+str(folder_ID)+'.npy' ) 70 | train_indices = np.load(file_train_indices) 71 | valid_indices = np.load(file_valid_indices) 72 | test_indices = np.load(file_test_indices) 73 | file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_labelled.npz') 74 | annotations = LoadSparse(file_annotations, verbose = True) 75 | 76 | train_str,valid_str,test_str,length_str = 'train','valid','test','length' 77 | train_file,valid_file,test_file = [os.path.join(dir_path,name+str(folder_ID)+'.npz') for name in [train_str,valid_str,test_str]] 78 | train_meta,valid_meta,test_meta = [{length_str:length} for length in lengths] 79 | train_meta.update(meta) 80 | valid_meta.update(meta) 81 | test_meta.update(meta) 82 | 83 | npzfile_train = np.load(train_file) 84 | if log_option == 'NoLog': 85 | trainset_hists = npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']] 86 | elif log_option == 'Log_Natural': 87 | trainset_hists = np.round(np.log(npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']]+1.0)).astype(np.int32) 88 | elif log_option == 'Log_4': 89 | trainset_hists = np.round(np.log(npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32) 90 | 91 | trainset_global_features = npzfile_train['trainset_matrix_global_features'] 92 | trainset_targets = npzfile_train['trainset_matrix_targets'] 93 | trainset_annos = annotations[train_indices, :] 94 | 95 | npzfile_valid = np.load(valid_file) 96 | if log_option == 'NoLog': 97 | validset_hists = npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']] 98 | elif log_option == 'Log_Natural': 99 | validset_hists = np.round(np.log(npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']]+1.0)).astype(np.int32) 100 | elif log_option == 'Log_4': 101 | validset_hists = np.round(np.log(npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32) 102 | # validset_hists = npzfile_valid['validset_matrix_hists'][:,:n_regions] 103 | validset_global_features = npzfile_valid['validset_matrix_global_features'] 104 | validset_targets = npzfile_valid['validset_matrix_targets'] 105 | validset_annos = annotations[valid_indices, :] 106 | 107 | npzfile_test = np.load(test_file) 108 | if log_option == 'NoLog': 109 | testset_hists = npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']] 110 | elif log_option == 'Log_Natural': 111 | testset_hists = np.round(np.log(npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']]+1.0)).astype(np.int32) 112 | elif log_option == 'Log_4': 113 | testset_hists = np.round(np.log(npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32) 114 | # testset_hists = npzfile_test['testset_matrix_hists'][:,:n_regions] 115 | testset_global_features = npzfile_test['testset_matrix_global_features'] 116 | testset_targets = npzfile_test['testset_matrix_targets'] 117 | testset_annos = annotations[test_indices, :] 118 | 119 | return ({train_str:{'hists_visual':trainset_hists, 'global_features':trainset_global_features, 'targets':trainset_targets, 'hists_anno':trainset_annos,'meta':train_meta}, 120 | valid_str:{'hists_visual':validset_hists, 'global_features':validset_global_features, 'targets':validset_targets, 'hists_anno':validset_annos,'meta':valid_meta}, 121 | test_str:{'hists_visual':testset_hists, 'global_features':testset_global_features, 'targets':testset_targets, 'hists_anno':testset_annos,'meta':test_meta}}) 122 | 123 | def obtain(dir_path): 124 | """ 125 | Gives information about how to obtain this dataset (``dir_path`` is ignored). 126 | """ 127 | 128 | print 'Ask Yin Zheng (yzheng3xg@gmail.com) for the data.' 129 | 130 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, are 4 | # permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | # The views and conclusions contained in the software and documentation are those of the 24 | # authors and should not be interpreted as representing official policies, either expressed 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 26 | 27 | 28 | 29 | ''' 30 | @reference: A Deep and Autoregressive Approach for Topic Modeling of Multimodal Data, IEEE Trans. Pattern Analysis and Machine Intelligence (PAMI) 31 | 32 | Project page: https://sites.google.com/site/zhengyin1126/home/supdeepdocnade 33 | 34 | @Authors: Yin Zheng, Received Ph.D from Tsinghua University, 2015. Homepage: https://sites.google.com/site/zhengyin1126/ 35 | Yu-Jin Zhang, Tsinghua University, 36 | Hugo Larochelle, University of Sherbrooke and Twitter. Homepage: http://www.dmi.usherb.ca/~larocheh/index_en.html 37 | 38 | @contact: Yin Zheng, yzheng3xg@gmail.com 39 | 40 | ''' 41 | 42 | 43 | We provide: 44 | 1. SupDeepDocNADE.py : Classes of DeepDocNADE and SupDeepDocNADE, which is used by run_pretrain_DeepDocNADE.py and run_SupDeepDocNADE.py 45 | 2. run_pretrain_DeepDocNADE.py : Python script to pretrain DeepDocNADE model on MIR Flickr Unlabeled data 46 | 3. run_SupDeepDocNADE.py : Python script to train and test SupDeepDocNADE mdoel. 47 | 4. gen_dataset_labeled.py: Python script to generate the labeled dateset files needed by the model 48 | 5. gen_dataset_unlabeled.py: Python script to generate the unlabeled dateset files needed by the model 49 | 6. MIR_Flickr_Theano_lab.py: Python script to load labeled dataset and provide interface to SupDeepDocNADE model 50 | 7. MIR_Flickr_Theano_Unlab.py: Python script to load unlabeled dataset and provide interface to DeepDocNADE model 51 | 52 | ===================================================================================================================== 53 | 54 | 0. Install liblinear (http://www.csie.ntu.edu.tw/~cjlin/liblinear/) and put liblinear into the PYTHONPATH 55 | NOTE: make sure that you can use liblinear in python. To test whether you config liblinear successfully, you could 56 | use "from liblinearutil import *" in python. 57 | 58 | 1. Generate the dataset: 59 | 1) Download the dataset from Nitsh's homepage: http://www.cs.toronto.edu/~nitish/multimodal/index.html 60 | 2) Run "python gen_dataset_labeled.py" to generate the labeled dataset. 61 | a) Read the comments in the script about how to use it. 62 | b) create a file IN THE SAME PATH OF THE LABELED DATASET named "meta.txt" with the following lines in the meta.txt 63 | voc_size:2000 64 | n_regions:14 65 | text_voc_size:2000 66 | global_feat_size:1857 67 | n_classes:38 68 | c) Create a file IN THE SAME PATH OF THE LABELED DATASET named "sizes.txt" with the following content: 69 | 10000 70 | 5000 71 | 10000 72 | 73 | 3) Run "python gen_dataset_unlabeled.py" to generate the unlabeled dataset. 74 | a) Read the comments in the script about how to use it. 75 | b) create a file IN THE SAME PATH OF THE UNLABELED DATASET with the name meta.txt, the content is: 76 | voc_size:2000 77 | n_regions:14 78 | text_voc_size:2000 79 | global_feat_size:1857 80 | dataset_split:50 81 | c) create a file IN THE SAME PATH OF THE UNLABELED DATASET named "sizes.txt" with the following content: 82 | 975000 83 | 84 | 85 | 86 | 2. Run run_pretrain_DeepDocNADE.py to pretrain DeepDocNADE model on unlabeled data: 87 | python run_pretrain_DeepDocNADE.py n_pretrain pre_learning_rate hidden_size activation_function dropout_rate model_file_dir unlab_dataset_dir batch_size anno_weight polyakexp_weight model_init 88 | 89 | 90 | The parameters of the script is as follows: 91 | 92 | n_pretrain : number of iterations 93 | pre_learning_rate : learning rate of the pretraining 94 | hidden_size : the hidden size of the model, e.g. 2048_2048_2048 is a 3 hidden layers model with 2048 units each layer 95 | activation_function : the activation function of the hidden layers, "sigmoid, relu or tanh" 96 | dropout_rate: the dropout rate for each hidden layer, e.g. "0.5_0.5_0.5" means dropout rate is 0.5 for each layer 97 | model_file_dir: path to save the pretrained model 98 | unlab_dataset_dir: the path to the unlabeled dataset 99 | batch_size: the batch size 100 | anno_weight: annotation weight 101 | polyakexp_weight: polyak weight 102 | model_init: path to the saved pretrain model, which is used to continue_training based on the trained model. It could be NULL if no model saved before. 103 | 104 | One example of the scipt is: 105 | python run_pretrain_DeepDocNADE.py 6000 0.03 2048_2048_2048 reclin 0.5_0.5_0.5 PATH_TO_SAVE_THE_MODEL PATH_TO_UNLABELED_DATA 500 12000 std 0.9995 PATH_TO_SAVED_MODEL 106 | 107 | 108 | 3. Run run_SupDeepDocNADE.py to train SupDeepDocNADE model on labeled data, which could be trained from scratch or initialize from pretrained model on unlabeled data : 109 | python run_SupDeepDocNADE.py folder_ID use_pretrain max_iter look_ahead hidden_size learning_rate unsup_weight activation_function Linear_minC, Linear_maxC, dropout_rate uniresult_dir Pretrain_model_name lab_dataset_dir batch_size anno_weight polyakexp_weight 110 | 111 | The parameters are as follows: 112 | folder_ID : ID of the dataset ( 1 to 5 ) 113 | use_pretrain: Whether use pretrained model or training from randomly initialized parameters (True or False) 114 | max_iter: number of max iterations 115 | look_ahead: early stop if number of iterations without improvement exceed the number of look ahead 116 | hidden_size: the hidden size of the model, e.g. 2048_2048_2048 is a 3 hidden layers model with 2048 units each layer 117 | learning_rate: learning rate of training process 118 | unsup_weight: the weight of the unsupervised part 119 | activation_function: the activation function of the hidden layers, "sigmoid, relu or tanh" 120 | Linear_minC: the minimum value of C for linear SVM (in log_2 space) 121 | Linear_maxC: the max value of C for linear SVM (in log_2 space) 122 | dropout_rate: the dropout rate for each hidden layer, e.g. "0.5_0.5_0.5" means dropout rate is 0.5 for each layer 123 | uniresult_dir: the path to save the results 124 | Pretrain_model_name: the path and the name of the pretrained model if you set use_pretrain True. Otherwise, any string 125 | lab_dataset_dir: the path to the labeled dataset 126 | batch_size : the mini-batch size 127 | anno_weight: the weight of the annotation words 128 | polyakexp_weight: polyak weight 129 | 130 | One example of the script is: 131 | 132 | python run_SupDeepDocNADE.py 1 True 20000 300 2048_2048_2048 0.01 0.25 reclin -17 10 0.5_0.5_0.5 PATH_TO_SAVE_RESULTS PATH_NAME_OF_PRETRAINED_MODEL PATH_TO_LABELED_DATA 500 12000 0.9995 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /SupDeepDocNADE.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, are 6 | # permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # The views and conclusions contained in the software and documentation are those of the 26 | # authors and should not be interpreted as representing official policies, either expressed 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 28 | 29 | Created on Aug 15, 2015 30 | 31 | @author: zhengyin 32 | 33 | @contact: yzheng3xg@gmail.com 34 | 35 | @summary: The class for paper A Deep and Autoregressive Approach for Topic Modeling of Multimodal Data, TPAMI 2015 36 | 37 | ''' 38 | 39 | import theano 40 | import theano.tensor as T 41 | import theano.sandbox.linalg as Tlin 42 | from theano.ifelse import ifelse 43 | from theano.tensor.shared_randomstreams import RandomStreams as RS_FixationNADE 44 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 45 | import theano.sparse as S 46 | import Image 47 | import numpy as np 48 | import copy as cp 49 | import scipy.sparse as sp 50 | import gc 51 | from theano.tensor.nnet import conv 52 | from theano.tensor.signal import downsample 53 | import sys 54 | sys.setrecursionlimit(50000) 55 | # import pydot 56 | import warnings 57 | try: 58 | from scipy.linalg import cholesky, det, solve 59 | except ImportError: 60 | warnings.warn("Could not import some scipy.linalg functions") 61 | # import theano.tensor as T 62 | from theano import config 63 | 64 | activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: theano.tensor.maximum(x, 0.0), "tanh": theano.tensor.tanh} 65 | class DeepDocNADE(object): 66 | ''' Theano verson for deep DocNADE''' 67 | 68 | def __init__(self, 69 | hidden_size = [100,100], 70 | learning_rate = 0.001, 71 | activation_function = 'sigmoid', 72 | testing_ensemble_size = 1, 73 | hidden_bias_scaled_by_document_size = False, 74 | word_representation_size = 0, 75 | seed_np = 1234, 76 | seed_theano = 4321, 77 | use_dropout = False, 78 | dropout_rate = [0.5], 79 | normalize_by_document_size = False, 80 | anno_weight = 1.0, 81 | global_feature_weight = 1.0, 82 | batch_size = 1, 83 | aver_words_count = 1, 84 | preprocess_method = 'std', 85 | decrease_constant = 0.999, 86 | length_limit = 15.0, 87 | polyakexp_weight = 0.99 88 | 89 | ): 90 | self.hidden_size = hidden_size 91 | self.learning_rate = learning_rate 92 | self.activation_function_name = activation_function 93 | self.aver_words_count = aver_words_count 94 | self.testing_ensemble_size = testing_ensemble_size 95 | self.hidden_bias_scaled_by_document_size = hidden_bias_scaled_by_document_size 96 | self.seed_np = seed_np 97 | self.seed_theano = seed_theano 98 | # self.seed_shuffle = seed_shuffle 99 | self.word_representation_size = word_representation_size 100 | self.use_dropout = use_dropout 101 | self.dropout_rate = dropout_rate 102 | self.normalize_by_document_size = normalize_by_document_size 103 | self.n_layers = len(self.hidden_size) 104 | self.anno_weight = anno_weight 105 | self.global_feature_weight = global_feature_weight 106 | self.batch_size = batch_size 107 | self.preprocess_method = preprocess_method 108 | self.decrease_constant = decrease_constant 109 | dec_learning_rate_value = np.asarray(learning_rate, dtype=theano.config.floatX) 110 | self.dec_learning_rate = theano.shared(value=dec_learning_rate_value, name='dec_learning_rate') 111 | self.length_limit = length_limit 112 | self.polyakexp_weight = polyakexp_weight 113 | 114 | 115 | def initialize(self,voc_size, anno_voc_size, global_feature_size, region_split): 116 | 117 | self.activation = activation_functions[self.activation_function_name] 118 | self.rng_theano = RandomStreams(seed=self.seed_theano) 119 | self.rng = np.random.mtrand.RandomState(self.seed_np) 120 | self.voc_size = voc_size 121 | self.anno_voc_size = anno_voc_size 122 | self.global_feat_size = global_feature_size 123 | self.region_split = region_split 124 | 125 | 126 | self.W = [] 127 | self.c = [] 128 | input_size = self.voc_size + self.anno_voc_size 129 | for hidden_size in self.hidden_size: 130 | # W_value = (2*self.rng.rand(input_size,hidden_size)-1)/(np.max([input_size, hidden_size])) 131 | # W_value = self.rng.uniform(-np.sqrt(6)/(input_size + hidden_size), np.sqrt(6)/(input_size + hidden_size), size=(input_size, hidden_size)) 132 | W_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(input_size + hidden_size), np.sqrt(6)/np.sqrt(input_size + hidden_size), size=(input_size, hidden_size)) 133 | W_value = np.asarray(W_value, theano.config.floatX) 134 | c_value = np.zeros((hidden_size,),theano.config.floatX) 135 | W = theano.shared(value = W_value, name = 'W') 136 | c = theano.shared(value = c_value, name = 'c') 137 | self.W.append(W) 138 | self.c.append(c) 139 | input_size = hidden_size 140 | # G_value = (2*self.rng.rand(self.global_feat_size,self.hidden_size[0])-1)/(np.max([self.global_feat_size, self.hidden_size[0]])) 141 | # G_value = self.rng.uniform(-np.sqrt(6)/(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0])) 142 | G_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0])) 143 | G_value = np.asarray(G_value, theano.config.floatX) 144 | self.G = theano.shared(value=G_value, name = 'G') 145 | 146 | anno_mask = np.ones((self.batch_size, self.voc_size+self.anno_voc_size), theano.config.floatX) 147 | anno_mask[:, -self.anno_voc_size:] = self.anno_weight 148 | self.anno_mask = theano.shared(value=anno_mask, name='anno_mask') 149 | 150 | self.W_polyak = cp.deepcopy(self.W) 151 | self.c_polyak = cp.deepcopy(self.c) 152 | self.G_polyak = cp.deepcopy(self.G) 153 | def __deepcopy__(self,memo): 154 | print "Warning: the deepcopy only copies the parameters, you SHOULD call compile_function for the functions" 155 | newone = type(self)() 156 | memo[id(self)] = newone 157 | old_dict = dict(self.__dict__) 158 | for key,val in old_dict.items(): 159 | if key in ['train','valid','test']: 160 | print 'escape %s'%(key) 161 | pass 162 | else: 163 | newone.__dict__[key] = cp.deepcopy(val, memo) 164 | return newone 165 | 166 | 167 | def build_graph(self, debug, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c, V, b, G, flag_train): 168 | 169 | if n_layer_to_build <1: 170 | print 'there is at least 1 hidden layer' 171 | exit(-1) 172 | if n_layer_to_build > self.n_layers: 173 | print 'exceed the max number of hidden layers' 174 | print 'the max number of hidden layers is %d'%(self.n_layers) 175 | exit(-1) 176 | 177 | 178 | hist_anno_dense = hist_anno.toarray() 179 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 180 | # anno_mask = T.ones(hist.shape, theano.config.floatX) 181 | # tt = T.ones((hist.shape[0],2000), theano.config.floatX)*self.anno_weight 182 | # anno_weighted_mask = T.set_subtensor(anno_mask[:, -2000:], tt) 183 | if debug==True: 184 | mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX) 185 | 186 | else: 187 | mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX) 188 | mask_counts = mask_unif*(hist+1) 189 | input = T.floor(mask_counts)*self.anno_mask 190 | hist = hist*self.anno_mask 191 | d = input.sum(axis = 1) 192 | D = hist.sum(axis = 1) 193 | predict = hist - input 194 | condition_bias = T.dot(global_feature, G) 195 | 196 | if self.preprocess_method == 'None': 197 | tmp_input = input 198 | elif self.preprocess_method == 'std': 199 | std = T.std(input, axis=1) 200 | tmp_input = input/(std[:, np.newaxis]+1e-16) 201 | elif self.preprocess_method == 'SPM': 202 | div_number = T.sqrt((input**2).sum(axis=1)) 203 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 204 | else: 205 | print 'Unknow preprocess method' 206 | exit(-1) 207 | 208 | # tmp_input = input 209 | for i in xrange(n_layer_to_build): 210 | if i==0: 211 | 212 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i])) 213 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias) 214 | else: 215 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i])) 216 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]) 217 | tmp_input = h 218 | log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16) 219 | log_prob_each_bin = log_prob_each_word*predict 220 | nll = -log_prob_each_bin.sum(axis=1) 221 | #TODO:??divide D?? 222 | if self.normalize_by_document_size: 223 | cost = T.mean(1.0/(D-d)*nll) 224 | else: 225 | cost = T.mean(D/(D-d)/self.aver_words_count*nll) 226 | 227 | return cost,h,mask_unif,mask_counts,input,predict,hist,log_prob_each_bin,nll,condition_bias 228 | 229 | def build_compute_representation_graph(self, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c,G, flag_train): 230 | 231 | 232 | if n_layer_to_build <1: 233 | print 'there is at least 1 hidden layer' 234 | exit(-1) 235 | if n_layer_to_build > self.n_layers: 236 | print 'exceed the max number of hidden layers' 237 | print 'the max number of hidden layers is %d'%(self.n_layers) 238 | exit(-1) 239 | 240 | 241 | hist_anno_dense = hist_anno.toarray() 242 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 243 | # anno_mask = T.ones(hist.shape, theano.config.floatX) 244 | # anno_weighted_mask = T.set_subtensor(anno_mask[:, -self.anno_voc_size:], self.anno_weight) 245 | if self.preprocess_method == 'None': 246 | input = hist*self.anno_mask 247 | tmp_input = input 248 | elif self.preprocess_method == 'std': 249 | input = hist*self.anno_mask 250 | std = T.std(input, axis=1) 251 | tmp_input = input/(std[:, np.newaxis]+1e-16) 252 | elif self.preprocess_method == 'SPM': 253 | input = hist*self.anno_mask 254 | div_number = T.sqrt((input**2).sum(axis=1)) 255 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 256 | # squared_input = input**2 257 | # init_tmp_input = T.ones(shape=input.shape, dtype=theano.config.floatX) 258 | # last_rsp = 0 259 | # for r_sp in self.region_split: 260 | # div_number = T.sqrt(squared_input[:,last_rsp:r_sp].sum(axis=1)) 261 | # tmp_input = T.set_subtensor(init_tmp_input[:,last_rsp:r_sp], input[:,last_rsp:r_sp]/(div_number[:, np.newaxis]+1e-16)) 262 | # init_tmp_input = tmp_input 263 | # last_rsp = r_sp 264 | # anno_factor = tmp_input[:,:self.region_split[-2]].sum(axis=1) 265 | # tmp_input = T.set_subtensor(tmp_input[:,self.region_split[-2]:], tmp_input[:,self.region_split[-2]:]*anno_factor[:, np.newaxis]*2) 266 | else: 267 | print 'Unknow preprocess method' 268 | exit(-1) 269 | # input = hist 270 | 271 | 272 | 273 | 274 | condition_bias = T.dot(global_feature, G) 275 | # 276 | # tmp_input = input 277 | for i in xrange(n_layer_to_build): 278 | if i==0: 279 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i])) 280 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias) 281 | else: 282 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]) 283 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i])) 284 | tmp_input = h 285 | 286 | 287 | return h 288 | 289 | def compile_function(self, n_layers, trainset, validset): 290 | 291 | hist_visual = T.matrix(name='hist_visual') 292 | hist_anno = S.csr_matrix(name='hist_anno') 293 | global_feature = T.matrix(name='global_features') 294 | index = T.cast(T.scalar('index'), 'int32') 295 | flag_train = T.scalar(name='flag_train') 296 | cost,hidden_representation,mask_unif,mask_counts,input,predict,hist,log_prob_each_bin,nll,condition_bias = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train) 297 | 298 | params = [self.V, self.b, self.G] 299 | params.extend(self.W[:n_layers]) 300 | params.extend(self.c[:n_layers]) 301 | 302 | polyaks = [self.V_polyak, self.b_polyak, self.G_polyak] 303 | polyaks.extend(self.W_polyak[:n_layers]) 304 | polyaks.extend(self.c_polyak[:n_layers]) 305 | 306 | params_gradient = [T.grad(cost, param) for param in params] 307 | 308 | 309 | updates = [] 310 | 311 | for param, param_gradient, polyak in zip(params, params_gradient, polyaks): 312 | param_updated = param - self.dec_learning_rate*param_gradient 313 | if param.get_value(borrow=True).ndim==2: 314 | col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0)) 315 | desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit)) 316 | col_scale = desired_norms / (1e-16 + col_norms) 317 | updates.append((param, param_updated*col_scale)) 318 | else: 319 | updates.append((param, param_updated)) 320 | 321 | polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated 322 | updates.append((polyak, polyak_updated)) 323 | 324 | 325 | updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant)) 326 | 327 | 328 | self.train = theano.function(inputs = [index], 329 | updates = updates, 330 | outputs = cost, 331 | givens = { 332 | hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 333 | hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 334 | global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 335 | flag_train:np.asarray(1,dtype=theano.config.floatX) 336 | }, 337 | # mode='DebugMode' 338 | ) 339 | # theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png') 340 | self.valid = theano.function(inputs = [index], 341 | # updates = updates, 342 | outputs = cost, 343 | givens = { 344 | hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 345 | hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 346 | global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 347 | flag_train:np.asarray(0,dtype=theano.config.floatX) 348 | }, 349 | # mode='DebugMode' 350 | ) 351 | 352 | 353 | def compile_compute_representation_function(self,n_layers, dataset): 354 | hist_visual = T.matrix(name='hist_visual') 355 | hist_anno = S.csr_matrix(name='hist_anno') 356 | global_feature = T.matrix(name='global_features') 357 | index = T.lscalar('index') 358 | flag_train = T.scalar(name='flag_train') 359 | # cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G) 360 | hidden_representation = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.G, flag_train) 361 | self.compute_representation = theano.function(inputs = [index], 362 | outputs = hidden_representation, 363 | givens = { 364 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 365 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 366 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 367 | flag_train:np.asarray(0,dtype=theano.config.floatX) 368 | } 369 | # mode='DebugMode' 370 | ) 371 | def compile_compute_representation_function_polyak(self,n_layers, dataset): 372 | hist_visual = T.matrix(name='hist_visual') 373 | hist_anno = S.csr_matrix(name='hist_anno') 374 | global_feature = T.matrix(name='global_features') 375 | index = T.lscalar('index') 376 | flag_train = T.scalar(name='flag_train') 377 | # cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G) 378 | hidden_representation = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.G_polyak, flag_train) 379 | self.compute_representation = theano.function(inputs = [index], 380 | outputs = hidden_representation, 381 | givens = { 382 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 383 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 384 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 385 | flag_train:np.asarray(0,dtype=theano.config.floatX) 386 | } 387 | # mode='DebugMode' 388 | ) 389 | def compile_LayerByLayer_function(self, n_layers, trainset, validset): 390 | 391 | hist_visual = T.matrix(name='hist_visual') 392 | hist_anno = S.csr_matrix(name='hist_anno') 393 | global_feature = T.matrix(name='global_features') 394 | index = T.cast(T.scalar('index'), 'int32') 395 | flag_train = T.scalar(name='flag_train') 396 | cost,hidden_representation = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train) 397 | 398 | params = [self.V, self.b, self.G, self.W[n_layers-1], self.c[n_layers-1]] 399 | polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.W_polyak[n_layers-1], self.c_polyak[n_layers-1]] 400 | params_gradient = [T.grad(cost, param) for param in params] 401 | 402 | 403 | updates = [] 404 | 405 | for param, param_gradient, polyak in zip(params, params_gradient, polyaks): 406 | param_updated = param - self.dec_learning_rate*param_gradient 407 | if param.get_value(borrow=True).ndim==2: 408 | col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0)) 409 | desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit)) 410 | col_scale = desired_norms / (1e-16 + col_norms) 411 | updates.append((param, param_updated*col_scale)) 412 | else: 413 | updates.append((param, param_updated)) 414 | 415 | polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated 416 | updates.append((polyak, polyak_updated)) 417 | updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant)) 418 | 419 | self.train = theano.function(inputs = [index], 420 | updates = updates, 421 | outputs = cost, 422 | givens = { 423 | hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 424 | hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 425 | global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 426 | flag_train:np.asarray(1,dtype=theano.config.floatX) 427 | }, 428 | # mode='DebugMode' 429 | ) 430 | # theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png') 431 | self.valid = theano.function(inputs = [index], 432 | # updates = updates, 433 | outputs = cost, 434 | givens = { 435 | hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 436 | hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 437 | global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 438 | flag_train:np.asarray(0,dtype=theano.config.floatX) 439 | }, 440 | # mode='DebugMode' 441 | ) 442 | 443 | 444 | def verify_gradients(self): 445 | 446 | def fun(W0,W1, c0, c1, V, b,G): 447 | 448 | hist_visual = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX) 449 | hist_anno = sp.csr_matrix([[0,0,1,0,0,0,1,2,0,0]], dtype = theano.config.floatX) 450 | global_feature = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX) 451 | n_layers = 2 452 | cost, h=self.build_graph(True, hist_visual, hist_anno, global_feature, n_layers, 453 | [W0,W1], [c0,c1], V, b, G) 454 | 455 | return cost 456 | print 'Warning: verify_gradient will reinitialize the model!!!' 457 | self.hidden_size = [100,100] 458 | self.n_classes = 7 459 | self.dropout_rate = 0.5 460 | self.activation = activation_functions['reclin'] 461 | self.n_layers = len(self.hidden_size) 462 | self.initialize(10,10,10) 463 | rng = np.random.RandomState(42) 464 | 465 | 466 | # rng = np.random.RandomState(42) 467 | theano.tests.unittest_tools.verify_grad(fun, [self.W[0].get_value(), self.W[1].get_value(),self.c[0].get_value(), self.c[1].get_value(), 468 | self.V.get_value(), self.b.get_value(), self.G.get_value()], rng = rng) 469 | 470 | 471 | 472 | 473 | def remove_activation(self): 474 | 475 | del self.activation 476 | 477 | def add_activation(self): 478 | 479 | self.activation = activation_functions[self.activation_function_name] 480 | 481 | def remove_top_layer(self): 482 | if hasattr(self, 'V'): 483 | del self.V 484 | if hasattr(self, 'b'): 485 | del self.b 486 | 487 | def add_top_layer(self, layer_id): 488 | ''' 489 | layer_id is the id of the hidden layer (starting from 0) on which we build the top layer to compute the conditionals 490 | ''' 491 | if layer_id <0: 492 | print 'there is at least 1 hidden layer' 493 | exit(-1) 494 | if layer_id > self.n_layers-1: 495 | print 'exceed the max number of hidden layers' 496 | print 'the max number of hidden layers is %d'%(self.n_layers) 497 | exit(-1) 498 | # V_value = (2*self.rng.rand(self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)-1)/(np.max([self.voc_size+self.anno_voc_size, self.hidden_size[layer_id]])) 499 | # V_value = self.rng.uniform(-np.sqrt(6)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)) 500 | V_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)) 501 | V_value = np.asarray(V_value, theano.config.floatX) 502 | self.V = theano.shared(value = V_value, name = 'V') 503 | b_value = np.zeros((self.voc_size+self.anno_voc_size), theano.config.floatX) 504 | self.b = theano.shared(value = b_value, name = 'b') 505 | self.V_polyak = cp.deepcopy(self.V) 506 | self.b_polyak = cp.deepcopy(self.b) 507 | 508 | def copy_parameters(self, source): 509 | 510 | self.V.set_value(source.V.get_value()) 511 | self.b.set_value(source.b.get_value()) 512 | self.V_polyak.set_value(source.V_polyak.get_value()) 513 | self.b_polyak.set_value(source.b_polyak.get_value()) 514 | for i in xrange(self.n_layers): 515 | self.W[i].set_value(source.W[i].get_value()) 516 | self.c[i].set_value(source.c[i].get_value()) 517 | self.W_polyak[i].set_value(source.W_polyak[i].get_value()) 518 | self.c_polyak[i].set_value(source.c_polyak[i].get_value()) 519 | self.G.set_value(source.G.get_value()) 520 | self.G_polyak.set_value(source.G_polyak.get_value()) 521 | self.dec_learning_rate.set_value(source.dec_learning_rate.get_value()) 522 | 523 | 524 | 525 | class SupDeepDocNADE(object): 526 | ''' Theano verson for Supervised deep DocNADE''' 527 | 528 | def __init__(self, 529 | hidden_size = [100,100], 530 | learning_rate = 0.001, 531 | learning_rate_unsup = 0.001, 532 | activation_function = 'sigmoid', 533 | testing_ensemble_size = 1, 534 | hidden_bias_scaled_by_document_size = False, 535 | word_representation_size = 0, 536 | seed_np = 1234, 537 | seed_theano = 4321, 538 | use_dropout = False, 539 | dropout_rate = [0.5], 540 | normalize_by_document_size = False, 541 | anno_weight = 1.0, 542 | global_feature_weight = 1.0, 543 | batch_size = 1, 544 | unsup_weight = 0.001, 545 | sup_option = 'full', 546 | aver_words_count = 1, 547 | n_connection = 15, 548 | bias = 0.0, 549 | rescale = 0.01, 550 | preprocess_method = 'SPM', 551 | decrease_constant = 0.999, 552 | length_limit = 15.0, 553 | polyakexp_weight = 0.99 554 | 555 | ): 556 | self.n_epoches_trained = 0 557 | self.hidden_size = hidden_size 558 | self.learning_rate = learning_rate 559 | self.learning_rate_unsup = learning_rate_unsup 560 | self.activation_function_name = activation_function 561 | self.aver_words_count = aver_words_count 562 | self.testing_ensemble_size = testing_ensemble_size 563 | self.hidden_bias_scaled_by_document_size = hidden_bias_scaled_by_document_size 564 | self.seed_np = seed_np 565 | self.seed_theano = seed_theano 566 | # self.seed_shuffle = seed_shuffle 567 | self.word_representation_size = word_representation_size 568 | self.use_dropout = use_dropout 569 | self.dropout_rate = dropout_rate 570 | self.normalize_by_document_size = normalize_by_document_size 571 | self.n_layers = len(self.hidden_size) 572 | self.anno_weight = anno_weight 573 | self.global_feature_weight = global_feature_weight 574 | self.batch_size = batch_size 575 | self.unsup_weight = unsup_weight 576 | # self.unsup_weight = theano.shared(value=unsup_weight,name='unsup') 577 | self.sup_option = sup_option 578 | self.n_connection = n_connection 579 | self.bias = bias 580 | self.rescale = rescale 581 | self.preprocess_method = preprocess_method 582 | self.decrease_constant = decrease_constant 583 | dec_learning_rate_value = np.asarray(learning_rate, dtype=theano.config.floatX) 584 | self.dec_learning_rate = theano.shared(value=dec_learning_rate_value, name='dec_learning_rate') 585 | self.length_limit = length_limit 586 | self.polyakexp_weight = polyakexp_weight 587 | 588 | 589 | def initialize(self,voc_size, anno_voc_size, global_feature_size, n_classes, region_split): 590 | 591 | self.activation = activation_functions[self.activation_function_name] 592 | self.rng_theano = RandomStreams(seed=self.seed_theano) 593 | self.rng = np.random.mtrand.RandomState(self.seed_np) 594 | # self.rng = np.random.mtrand.RandomState(self.seed) 595 | # self.rng_shuffle = np.random.mtrand.RandomState(self.seed_shuffle) 596 | self.voc_size = voc_size 597 | self.anno_voc_size = anno_voc_size 598 | self.global_feat_size = global_feature_size 599 | self.n_classes = n_classes 600 | self.region_split = region_split 601 | 602 | 603 | 604 | 605 | self.W = [] 606 | self.c = [] 607 | input_size = self.voc_size + self.anno_voc_size 608 | cnt = 1 609 | for hidden_size in self.hidden_size: 610 | W_value = 1*self.rng.uniform(-np.sqrt(6)/np.sqrt(input_size + hidden_size), np.sqrt(6)/np.sqrt(input_size + hidden_size), size=(input_size, hidden_size)) 611 | # W_value = 10*generate_SparseConnectionMat(self.rng, input_size, hidden_size, self.n_connection, self.rescale, self.bias)*cnt 612 | W_value = np.asarray(W_value, theano.config.floatX) 613 | c_value = np.zeros((hidden_size,),theano.config.floatX) 614 | W = theano.shared(value = W_value, name = 'W') 615 | c = theano.shared(value = c_value, name = 'c') 616 | self.W.append(W) 617 | self.c.append(c) 618 | input_size = hidden_size 619 | cnt *= 3 620 | 621 | G_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0])) 622 | G_value = np.asarray(G_value, theano.config.floatX) 623 | self.G = theano.shared(value=G_value, name = 'G') 624 | 625 | anno_mask = np.ones((self.batch_size, self.voc_size+self.anno_voc_size), theano.config.floatX) 626 | anno_mask[:, -self.anno_voc_size:] = self.anno_weight 627 | self.anno_mask = theano.shared(value=anno_mask, name='anno_mask') 628 | 629 | self.W_polyak = cp.deepcopy(self.W) 630 | self.c_polyak = cp.deepcopy(self.c) 631 | self.G_polyak = cp.deepcopy(self.G) 632 | 633 | 634 | def __deepcopy__(self,memo): 635 | print "Warning: the deepcopy only copies the parameters, you SHOULD call compile_function for the functions" 636 | newone = type(self)() 637 | memo[id(self)] = newone 638 | old_dict = dict(self.__dict__) 639 | for key,val in old_dict.items(): 640 | if key in ['train','valid','test']: 641 | print 'escape %s'%(key) 642 | pass 643 | else: 644 | newone.__dict__[key] = cp.deepcopy(val, memo) 645 | return newone 646 | 647 | 648 | def build_graph(self, debug, hist_visual, hist_anno, global_feature, target,n_layer_to_build, W, c, V, b, G, U, dd, flag_train): 649 | 650 | if n_layer_to_build <1: 651 | print 'there is at least 1 hidden layer' 652 | exit(-1) 653 | if n_layer_to_build > self.n_layers: 654 | print 'exceed the max number of hidden layers' 655 | print 'the max number of hidden layers is %d'%(self.n_layers) 656 | exit(-1) 657 | 658 | 659 | hist_anno_dense = hist_anno.toarray() 660 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 661 | if debug==True: 662 | mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX) 663 | 664 | else: 665 | mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX) 666 | mask_counts = mask_unif*(hist+1) 667 | 668 | input = T.floor(mask_counts)*self.anno_mask 669 | hist = hist*self.anno_mask 670 | d = input.sum(axis = 1) 671 | D = hist.sum(axis = 1) 672 | predict = hist - input 673 | condition_bias = T.dot(global_feature, G) 674 | 675 | if self.preprocess_method == 'None': 676 | tmp_input = input 677 | elif self.preprocess_method == 'std': 678 | std = T.std(input, axis=1) 679 | tmp_input = input/(std[:, np.newaxis]+1e-16) 680 | elif self.preprocess_method == 'SPM': 681 | div_number = T.sqrt((input**2).sum(axis=1)) 682 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 683 | 684 | 685 | else: 686 | print 'Unknow preprocess method' 687 | exit(-1) 688 | 689 | 690 | first_tmp_input = tmp_input 691 | if self.sup_option == 'full': 692 | if self.preprocess_method == 'None': 693 | tmp_sup_input = hist 694 | elif self.preprocess_method == 'std': 695 | std_full = T.std(hist, axis=1) 696 | tmp_sup_input = hist/(std_full[:, np.newaxis]+1e-16) 697 | elif self.preprocess_method == 'SPM': 698 | div_number = T.sqrt((hist**2).sum(axis=1)) 699 | tmp_sup_input = input/(div_number[:,np.newaxis]+1e-16) 700 | else: 701 | print 'Unknow preprocess method' 702 | exit(-1) 703 | 704 | for i in xrange(n_layer_to_build): 705 | if i==0: 706 | dropout_mask = ifelse(T.neq(flag_train, 0) ,self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX), (1.-self.dropout_rate[i])*T.ones((tmp_input.shape[0],W[i].shape[1]), theano.config.floatX)) 707 | h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*dropout_mask 708 | if self.sup_option == 'full': 709 | h_sup = self.activation(T.dot(tmp_sup_input, W[i])+c[i]+condition_bias)*dropout_mask 710 | else: 711 | dropout_mask = ifelse(T.neq(flag_train, 0) ,self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX), (1.-self.dropout_rate[i])*T.ones((tmp_input.shape[0],W[i].shape[1]), theano.config.floatX)) 712 | h = self.activation(T.dot(tmp_input, W[i])+c[i])*dropout_mask*1.0 713 | if self.sup_option == 'full': 714 | h_sup = self.activation(T.dot(tmp_sup_input, W[i])+c[i])*dropout_mask*1.0 715 | tmp_input = h 716 | if self.sup_option == 'full': 717 | tmp_sup_input = h_sup 718 | # G_value = (2*self.rng.rand(self.global_feat_size,self.hidden_size[0])-1)/(np.max([self.global_feat_size, self.hidden_size[0]])) 719 | log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16) 720 | log_prob_each_bin = log_prob_each_word*predict 721 | nll = -log_prob_each_bin.sum(axis=1) 722 | 723 | 724 | #=====================sup_cost=============================== 725 | if self.sup_option == 'full': 726 | prob_target = T.nnet.sigmoid(T.dot(h_sup, U)+dd) 727 | elif self.sup_option == 'partial': 728 | prob_target = T.nnet.sigmoid(T.dot(h, U)+dd) 729 | else: 730 | print "unknown supvervised option" 731 | exit(-1) 732 | cross_entropy = T.nnet.binary_crossentropy(prob_target, target).sum(axis=1)# the better the smaller (theano crossentropy add a minus here 733 | if self.normalize_by_document_size: 734 | unsup_cost = 1.0/(D-d)*nll 735 | else: 736 | unsup_cost = D/(D-d)/self.aver_words_count*nll 737 | 738 | cost = T.mean(unsup_cost*self.unsup_weight + cross_entropy) 739 | # T.mean(D/(D-d)*nll*self.unsup_weight + cross_entropy) 740 | log_prob_target = T.log(prob_target) 741 | return cost,log_prob_target,h, unsup_cost, cross_entropy, first_tmp_input, h_sup 742 | 743 | def build_compute_representation_graph(self, hist_visual, hist_anno, global_feature,n_layer_to_build, W, c,G, U, d, flag_train): 744 | 745 | 746 | if n_layer_to_build <1: 747 | print 'there is at least 1 hidden layer' 748 | exit(-1) 749 | if n_layer_to_build > self.n_layers: 750 | print 'exceed the max number of hidden layers' 751 | print 'the max number of hidden layers is %d'%(self.n_layers) 752 | exit(-1) 753 | 754 | 755 | hist_anno_dense = hist_anno.toarray() 756 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 757 | 758 | 759 | 760 | 761 | condition_bias = T.dot(global_feature, G) 762 | # 763 | if self.preprocess_method == 'None': 764 | input = hist*self.anno_mask 765 | tmp_input = input 766 | elif self.preprocess_method == 'std': 767 | input = hist*self.anno_mask 768 | std = T.std(input, axis=1) 769 | tmp_input = input/(std[:, np.newaxis]+1e-16) 770 | elif self.preprocess_method == 'SPM': 771 | input = hist*self.anno_mask 772 | 773 | div_number = T.sqrt((input**2).sum(axis=1)) 774 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 775 | else: 776 | print 'Unknow preprocess method' 777 | exit(-1) 778 | 779 | 780 | for i in xrange(n_layer_to_build): 781 | if i==0: 782 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i])) 783 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias) 784 | else: 785 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]) 786 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i])) 787 | tmp_input = h 788 | log_prob_target = T.log(T.nnet.sigmoid(T.dot(h, U)+d)) 789 | 790 | return h,log_prob_target 791 | 792 | 793 | 794 | def compile_function(self, n_layers, trainset, validset): 795 | 796 | hist_visual = T.matrix(name='hist_visual') 797 | hist_anno = S.csr_matrix(name='hist_anno') 798 | global_feature = T.matrix(name='global_feature') 799 | target = T.matrix(name='target') 800 | index = T.lscalar('index') 801 | flag_train = T.scalar(name='flag_train') 802 | cost,log_prob_target, hidden_representation, unsup_cost, cross_entropy, first_tmp_input, h_sup = self.build_graph(False, hist_visual, hist_anno, global_feature, target, n_layers, self.W, self.c, self.V, self.b, self.G,self.U, self.d, flag_train) 803 | 804 | params = [self.V, self.b, self.G, self.U, self.d] 805 | params.extend(self.W[:n_layers]) 806 | params.extend(self.c[:n_layers]) 807 | 808 | polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.U_polyak, self.d_polyak] 809 | polyaks.extend(self.W_polyak[:n_layers]) 810 | polyaks.extend(self.c_polyak[:n_layers]) 811 | 812 | params_gradient = [T.grad(cost, param) for param in params] 813 | 814 | 815 | updates = [] 816 | 817 | for param, param_gradient, polyak in zip(params, params_gradient, polyaks): 818 | param_updated = param - self.dec_learning_rate*param_gradient 819 | if param.get_value(borrow=True).ndim==2: 820 | col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0)) 821 | desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit)) 822 | col_scale = desired_norms / (1e-16 + col_norms) 823 | updates.append((param, param_updated*col_scale)) 824 | else: 825 | updates.append((param, param_updated)) 826 | 827 | polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated 828 | updates.append((polyak, polyak_updated)) 829 | 830 | 831 | updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant)) 832 | 833 | self.train = theano.function(inputs = [index], 834 | updates = updates, 835 | # outputs = [cost, log_prob_target, unsup_cost, cross_entropy, hidden_representation, first_tmp_input, h_sup], 836 | outputs = [cost, log_prob_target, unsup_cost, cross_entropy], 837 | givens = { 838 | hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 839 | hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 840 | global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 841 | target:trainset['targets'][index*self.batch_size:(index+1)*self.batch_size, :], 842 | flag_train:np.asarray(1,dtype=theano.config.floatX) 843 | }, 844 | # mode='DebugMode' 845 | ) 846 | # theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png') 847 | self.valid = theano.function(inputs = [index], 848 | # updates = updates, 849 | # outputs = [cost, log_prob_target, unsup_cost, cross_entropy, hidden_representation, first_tmp_input, h_sup], 850 | outputs = [cost, log_prob_target, unsup_cost, cross_entropy], 851 | givens = { 852 | hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 853 | hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 854 | global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 855 | target:validset['targets'][index*self.batch_size:(index+1)*self.batch_size, :], 856 | flag_train:np.asarray(0,dtype=theano.config.floatX) 857 | }, 858 | # mode='DebugMode' 859 | ) 860 | def compile_LayerByLayer_function(self, n_layers, trainset, validset): 861 | 862 | hist_visual = T.matrix(name='hist_visual') 863 | hist_anno = S.csr_matrix(name='hist_anno') 864 | global_feature = T.matrix(name='global_features') 865 | index = T.cast(T.scalar('index'), 'int32') 866 | flag_train = T.scalar(name='flag_train') 867 | cost,hidden_representation = self.build_unsupervised_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train) 868 | 869 | params = [self.V, self.b, self.G, self.W[n_layers-1], self.c[n_layers-1]] 870 | polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.W_polyak[n_layers-1], self.c_polyak[n_layers-1]] 871 | 872 | 873 | params_gradient = [T.grad(cost, param) for param in params] 874 | 875 | 876 | updates = [] 877 | 878 | for param, param_gradient, polyak in zip(params, params_gradient, polyaks): 879 | param_updated = param - self.dec_learning_rate*param_gradient 880 | if param.get_value(borrow=True).ndim==2: 881 | col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0)) 882 | desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit)) 883 | col_scale = desired_norms / (1e-16 + col_norms) 884 | updates.append((param, param_updated*col_scale)) 885 | else: 886 | updates.append((param, param_updated)) 887 | 888 | polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated 889 | updates.append((polyak, polyak_updated)) 890 | 891 | 892 | updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant)) 893 | 894 | self.train = theano.function(inputs = [index], 895 | updates = updates, 896 | outputs = cost, 897 | givens = { 898 | hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 899 | hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 900 | global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 901 | flag_train:np.asarray(1,dtype=theano.config.floatX) 902 | }, 903 | # mode='DebugMode' 904 | ) 905 | # theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png') 906 | self.valid = theano.function(inputs = [index], 907 | # updates = updates, 908 | outputs = cost, 909 | givens = { 910 | hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 911 | hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 912 | global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 913 | flag_train:np.asarray(0,dtype=theano.config.floatX) 914 | }, 915 | # mode='DebugMode' 916 | ) 917 | 918 | def compile_compute_representation_function(self,n_layers, dataset): 919 | hist_visual = T.matrix(name='hist_visual') 920 | hist_anno = S.csr_matrix(name='hist_anno') 921 | global_feature = T.matrix(name='global_feature') 922 | index = T.lscalar('index') 923 | flag_train = T.scalar(name='flag_train') 924 | # cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G) 925 | hidden_representation, log_prob_target = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.G, self.U, self.d, flag_train) 926 | self.compute_representation = theano.function(inputs = [index], 927 | outputs = [hidden_representation,log_prob_target], 928 | givens = { 929 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 930 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 931 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 932 | 933 | flag_train:np.asarray(0,dtype=theano.config.floatX) 934 | } 935 | # mode='DebugMode' 936 | ) 937 | def compile_compute_representation_function_polyak(self,n_layers, dataset): 938 | hist_visual = T.matrix(name='hist_visual') 939 | hist_anno = S.csr_matrix(name='hist_anno') 940 | global_feature = T.matrix(name='global_feature') 941 | index = T.lscalar('index') 942 | flag_train = T.scalar(name='flag_train') 943 | # cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G) 944 | hidden_representation, log_prob_target = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.G_polyak, self.U_polyak, self.d_polyak, flag_train) 945 | self.compute_representation = theano.function(inputs = [index], 946 | outputs = [hidden_representation,log_prob_target], 947 | givens = { 948 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 949 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 950 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 951 | 952 | flag_train:np.asarray(0,dtype=theano.config.floatX) 953 | } 954 | # mode='DebugMode' 955 | ) 956 | def build_unsupervised_graph(self, debug, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c, V, b, G, flag_train): 957 | 958 | if n_layer_to_build <1: 959 | print 'there is at least 1 hidden layer' 960 | exit(-1) 961 | if n_layer_to_build > self.n_layers: 962 | print 'exceed the max number of hidden layers' 963 | print 'the max number of hidden layers is %d'%(self.n_layers) 964 | exit(-1) 965 | 966 | 967 | hist_anno_dense = hist_anno.toarray() 968 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 969 | if debug==True: 970 | mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX) 971 | 972 | else: 973 | mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX) 974 | mask_counts = mask_unif*(hist+1) 975 | input = T.floor(mask_counts)*self.anno_mask 976 | hist = hist*self.anno_mask 977 | d = input.sum(axis = 1) 978 | D = hist.sum(axis = 1) 979 | predict = hist - input 980 | condition_bias = T.dot(global_feature, G) 981 | 982 | if self.preprocess_method == 'None': 983 | tmp_input = input 984 | elif self.preprocess_method == 'std': 985 | std = T.std(input, axis=1) 986 | tmp_input = input/(std[:, np.newaxis]+1e-16) 987 | elif self.preprocess_method == 'SPM': 988 | div_number = T.sqrt((input**2).sum(axis=1)) 989 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 990 | 991 | else: 992 | print 'Unknow preprocess method' 993 | exit(-1) 994 | 995 | # tmp_input = input 996 | for i in xrange(n_layer_to_build): 997 | if i==0: 998 | 999 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i])) 1000 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias) 1001 | else: 1002 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i])) 1003 | # h = self.activation(T.dot(tmp_input, W[i])+c[i]) 1004 | tmp_input = h 1005 | log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16) 1006 | log_prob_each_bin = log_prob_each_word*predict 1007 | nll = -log_prob_each_bin.sum(axis=1) 1008 | #TODO:??divide D?? 1009 | if self.normalize_by_document_size: 1010 | cost = T.mean(1.0/(D-d)*nll) 1011 | else: 1012 | cost = T.mean(D/(D-d)/self.aver_words_count*nll) 1013 | 1014 | return cost,h 1015 | 1016 | 1017 | def compile_unsupervised_function(self, n_layers, trainset, validset): 1018 | 1019 | hist_visual = T.matrix(name='hist_visual') 1020 | hist_anno = S.csr_matrix(name='hist_anno') 1021 | global_feature = T.matrix(name='global_features') 1022 | index = T.cast(T.scalar('index'), 'int32') 1023 | flag_train = T.scalar(name='flag_train') 1024 | cost,hidden_representation = self.build_unsupervised_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train) 1025 | 1026 | params = [self.V, self.b, self.G] 1027 | params.extend(self.W[:n_layers]) 1028 | params.extend(self.c[:n_layers]) 1029 | 1030 | polyaks = [self.V_polyak, self.b_polyak, self.G_polyak] 1031 | polyaks.extend(self.W_polyak[:n_layers]) 1032 | polyaks.extend(self.c_polyak[:n_layers]) 1033 | 1034 | params_gradient = [T.grad(cost, param) for param in params] 1035 | 1036 | 1037 | updates = [] 1038 | 1039 | for param, param_gradient, polyak in zip(params, params_gradient, polyaks): 1040 | param_updated = param - self.dec_learning_rate*param_gradient 1041 | if param.get_value(borrow=True).ndim==2: 1042 | col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0)) 1043 | desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit)) 1044 | col_scale = desired_norms / (1e-16 + col_norms) 1045 | updates.append((param, param_updated*col_scale)) 1046 | else: 1047 | updates.append((param, param_updated)) 1048 | 1049 | polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated 1050 | updates.append((polyak, polyak_updated)) 1051 | 1052 | 1053 | updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant)) 1054 | 1055 | self.train = theano.function(inputs = [index], 1056 | updates = updates, 1057 | outputs = [cost,hidden_representation], 1058 | givens = { 1059 | hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 1060 | hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 1061 | global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 1062 | flag_train:np.asarray(1,dtype=theano.config.floatX) 1063 | }, 1064 | # mode='DebugMode' 1065 | ) 1066 | # theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png') 1067 | self.valid = theano.function(inputs = [index], 1068 | # updates = updates, 1069 | outputs = [cost,hidden_representation], 1070 | givens = { 1071 | hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 1072 | hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 1073 | global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 1074 | flag_train:np.asarray(0,dtype=theano.config.floatX) 1075 | }, 1076 | # mode='DebugMode' 1077 | ) 1078 | 1079 | 1080 | def build_graph_generateTEXT(self, hist_visual, hist_anno, global_feature,n_layer_to_build, W, c, V, b, G, flag_train): 1081 | 1082 | if n_layer_to_build <1: 1083 | print 'there is at least 1 hidden layer' 1084 | exit(-1) 1085 | if n_layer_to_build > self.n_layers: 1086 | print 'exceed the max number of hidden layers' 1087 | print 'the max number of hidden layers is %d'%(self.n_layers) 1088 | exit(-1) 1089 | 1090 | 1091 | hist_anno_dense = hist_anno.toarray()*0.0 1092 | hist = T.concatenate([hist_visual, hist_anno_dense], axis=1) 1093 | hist = hist*self.anno_mask 1094 | 1095 | condition_bias = T.dot(global_feature, G) 1096 | # 1097 | if self.preprocess_method == 'None': 1098 | input = hist*self.anno_mask 1099 | tmp_input = input 1100 | elif self.preprocess_method == 'std': 1101 | input = hist*self.anno_mask 1102 | std = T.std(input, axis=1) 1103 | tmp_input = input/(std[:, np.newaxis]+1e-16) 1104 | elif self.preprocess_method == 'SPM': 1105 | input = hist*self.anno_mask 1106 | div_number = T.sqrt((input**2).sum(axis=1)) 1107 | tmp_input = input/(div_number[:,np.newaxis]+1e-16) 1108 | else: 1109 | print 'Unknow preprocess method' 1110 | exit(-1) 1111 | 1112 | 1113 | for i in xrange(n_layer_to_build): 1114 | if i==0: 1115 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i])) 1116 | else: 1117 | h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i])) 1118 | tmp_input = h 1119 | 1120 | 1121 | log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16) 1122 | 1123 | 1124 | return h, log_prob_each_word 1125 | 1126 | def compile_generateTEXT_function(self,n_layers, dataset): 1127 | hist_visual = T.matrix(name='hist_visual') 1128 | hist_anno = S.csr_matrix(name='hist_anno') 1129 | global_feature = T.matrix(name='global_feature') 1130 | index = T.lscalar('index') 1131 | flag_train = T.scalar(name='flag_train') 1132 | hidden_representation, log_prob_each_word = self.build_graph_generateTEXT(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train) 1133 | self.generateTEXT = theano.function(inputs = [index], 1134 | outputs = [hidden_representation,log_prob_each_word], 1135 | givens = { 1136 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 1137 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 1138 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 1139 | 1140 | flag_train:np.asarray(0,dtype=theano.config.floatX) 1141 | } 1142 | ) 1143 | def compile_generateTEXT_function_polyak(self,n_layers, dataset): 1144 | hist_visual = T.matrix(name='hist_visual') 1145 | hist_anno = S.csr_matrix(name='hist_anno') 1146 | global_feature = T.matrix(name='global_feature') 1147 | index = T.lscalar('index') 1148 | flag_train = T.scalar(name='flag_train') 1149 | hidden_representation, log_prob_each_word = self.build_graph_generateTEXT(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.V_polyak, self.b_polyak, self.G_polyak, flag_train) 1150 | self.generateTEXT = theano.function(inputs = [index], 1151 | outputs = [hidden_representation,log_prob_each_word], 1152 | givens = { 1153 | hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:], 1154 | hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:], 1155 | global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :], 1156 | 1157 | flag_train:np.asarray(0,dtype=theano.config.floatX) 1158 | } 1159 | ) 1160 | 1161 | def verify_gradients(self): 1162 | 1163 | def fun(W0,W1, c0, c1, V, b,G): 1164 | 1165 | hist_visual = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX) 1166 | hist_anno = sp.csr_matrix([[0,0,1,0,0,0,1,2,0,0]], dtype = theano.config.floatX) 1167 | global_feature = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX) 1168 | n_layers = 2 1169 | cost, h=self.build_graph(True, hist_visual, hist_anno, global_feature, n_layers, 1170 | [W0,W1], [c0,c1], V, b, G) 1171 | 1172 | return cost 1173 | print 'Warning: verify_gradient will reinitialize the model!!!' 1174 | self.hidden_size = [100,100] 1175 | self.n_classes = 7 1176 | self.dropout_rate = 0.5 1177 | self.activation = activation_functions['reclin'] 1178 | self.n_layers = len(self.hidden_size) 1179 | self.initialize(10,10,10) 1180 | rng = np.random.RandomState(42) 1181 | 1182 | 1183 | # rng = np.random.RandomState(42) 1184 | theano.tests.unittest_tools.verify_grad(fun, [self.W[0].get_value(), self.W[1].get_value(),self.c[0].get_value(), self.c[1].get_value(), 1185 | self.V.get_value(), self.b.get_value(), self.G.get_value()], rng = rng) 1186 | 1187 | 1188 | 1189 | 1190 | def remove_activation(self): 1191 | 1192 | del self.activation 1193 | 1194 | def add_activation(self): 1195 | 1196 | self.activation = activation_functions[self.activation_function_name] 1197 | 1198 | def remove_supervised_layer(self): 1199 | 1200 | if hasattr(self, 'U'): 1201 | del self.U 1202 | if hasattr(self, 'd'): 1203 | del self.d 1204 | 1205 | def add_supervised_layer(self, layer_id): 1206 | 1207 | if layer_id <0: 1208 | print 'there is at least 1 hidden layer' 1209 | exit(-1) 1210 | if layer_id > self.n_layers-1: 1211 | print 'exceed the max number of hidden layers' 1212 | print 'the max number of hidden layers is %d'%(self.n_layers) 1213 | exit(-1) 1214 | # U_value = 1*(2*self.rng.rand(self.hidden_size[layer_id] ,self.n_classes)-1)/(np.max([self.hidden_size[layer_id],self.n_classes])) 1215 | # U_value = self.rng.uniform(-np.sqrt(0.05)/(self.hidden_size[layer_id]+self.n_classes), np.sqrt(0.05)/(self.hidden_size[layer_id]+self.n_classes), size=(self.hidden_size[layer_id],self.n_classes)) 1216 | U_value = (1.0**(layer_id))*self.rng.uniform(-np.sqrt(6)/np.sqrt(self.hidden_size[layer_id]+self.n_classes), np.sqrt(6)/np.sqrt(self.hidden_size[layer_id]+self.n_classes), size=(self.hidden_size[layer_id],self.n_classes)) 1217 | # U_value = 0.001*generate_SparseConnectionMat(self.rng, self.hidden_size[layer_id],self.n_classes, self.n_connection, self.rescale, self.bias) 1218 | U_value = np.asarray(U_value, theano.config.floatX) 1219 | d_value = np.zeros((self.n_classes), theano.config.floatX) 1220 | self.U = theano.shared(value=U_value, name='U') 1221 | self.d = theano.shared(value=d_value, name='d') 1222 | 1223 | self.U_polyak = cp.deepcopy(self.U) 1224 | self.d_polyak = cp.deepcopy(self.d) 1225 | 1226 | def remove_top_layer(self): 1227 | if hasattr(self, 'V'): 1228 | del self.V 1229 | if hasattr(self, 'b'): 1230 | del self.b 1231 | 1232 | def add_top_layer(self, layer_id): 1233 | ''' 1234 | layer_id is the id of the hidden layer (starting from 0) on which we build the top layer to compute the conditionals 1235 | ''' 1236 | if layer_id <0: 1237 | print 'there is at least 1 hidden layer' 1238 | exit(-1) 1239 | if layer_id > self.n_layers-1: 1240 | print 'exceed the max number of hidden layers' 1241 | print 'the max number of hidden layers is %d'%(self.n_layers) 1242 | exit(-1) 1243 | # V_value = 1*(2*self.rng.rand(self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)-1)/(np.max([self.voc_size+self.anno_voc_size, self.hidden_size[layer_id]])) 1244 | # V_value = self.rng.uniform(-np.sqrt(0.05)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(0.05)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)) 1245 | V_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)) 1246 | # V_value = 0.01*generate_SparseConnectionMat(self.rng, self.hidden_size[layer_id],self.voc_size+self.anno_voc_size, self.n_connection, self.rescale, self.bias) 1247 | V_value = np.asarray(V_value, theano.config.floatX) 1248 | self.V = theano.shared(value = V_value, name = 'V') 1249 | b_value = np.zeros((self.voc_size+self.anno_voc_size), theano.config.floatX) 1250 | self.b = theano.shared(value = b_value, name = 'b') 1251 | 1252 | self.V_polyak = cp.deepcopy(self.V) 1253 | self.b_polyak = cp.deepcopy(self.b) 1254 | 1255 | def copy_parameters(self, source): 1256 | 1257 | self.V.set_value(source.V.get_value()) 1258 | self.b.set_value(source.b.get_value()) 1259 | self.V_polyak.set_value(source.V_polyak.get_value()) 1260 | self.b_polyak.set_value(source.b_polyak.get_value()) 1261 | for i in xrange(self.n_layers): 1262 | self.W[i].set_value(source.W[i].get_value()) 1263 | self.c[i].set_value(source.c[i].get_value()) 1264 | self.W_polyak[i].set_value(source.W_polyak[i].get_value()) 1265 | self.c_polyak[i].set_value(source.c_polyak[i].get_value()) 1266 | self.G.set_value(source.G.get_value()) 1267 | self.G_polyak.set_value(source.G_polyak.get_value()) 1268 | self.dec_learning_rate.set_value(source.dec_learning_rate.get_value()) 1269 | 1270 | if hasattr(source, 'U'): 1271 | self.U.set_value(source.U.get_value()) 1272 | self.U_polyak.set_value(source.U_polyak.get_value()) 1273 | if hasattr(source,'d'): 1274 | self.d.set_value(source.d.get_value()) 1275 | self.d_polyak.set_value(source.d_polyak.get_value()) 1276 | -------------------------------------------------------------------------------- /gen_dataset_labeled.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, are 6 | # permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # The views and conclusions contained in the software and documentation are those of the 26 | # authors and should not be interpreted as representing official policies, either expressed 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 28 | 29 | 30 | Created on Aug 15, 2015 31 | 32 | @author: yin.zheng 33 | ''' 34 | 35 | import numpy as np 36 | import os 37 | import string 38 | import time 39 | import scipy.sparse as sp 40 | import collections 41 | from itertools import izip 42 | 43 | def LoadSparse(inputfile, verbose=False): 44 | """Loads a sparse matrix stored as npz file.""" 45 | npzfile = np.load(inputfile) 46 | mat = sp.csr_matrix((npzfile['data'], npzfile['indices'], 47 | npzfile['indptr']), 48 | shape=tuple(list(npzfile['shape']))) 49 | if verbose: 50 | 51 | print 'Loaded sparse matrix from %s of shape %s' % (inputfile, 52 | mat.shape.__str__()) 53 | return mat 54 | 55 | def load(dir_path, path_output_data, folder_ID): 56 | """ 57 | dir_path: The dataset to the extracted folder from data download from http://www.cs.toronto.edu/~nitish/multimodal/index.html 58 | path_output_data: The path to save the processed dataset 59 | folder_ID: the split ID (from 1 to 5) 60 | 61 | """ 62 | 63 | dir_path = os.path.expanduser(dir_path) 64 | file_train_indices = os.path.join(dir_path, 'splits', 'train_indices_'+str(folder_ID)+'.npy' ) 65 | file_valid_indices = os.path.join(dir_path, 'splits', 'valid_indices_'+str(folder_ID)+ '.npy' ) 66 | file_test_indices = os.path.join(dir_path, 'splits', 'test_indices_'+str(folder_ID)+'.npy' ) 67 | train_indices = np.load(file_train_indices) 68 | valid_indices = np.load(file_valid_indices) 69 | test_indices = np.load(file_test_indices) 70 | 71 | file_labels = os.path.join(dir_path, 'labels.npy') 72 | labels = np.load(file_labels).astype(np.int32) 73 | trainset_matrix_targets = labels[train_indices, :] 74 | validset_matrix_targets = labels[valid_indices, :] 75 | testset_matrix_targets = labels[test_indices, :] 76 | 77 | file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_labelled.npz') 78 | annotations = LoadSparse(file_annotations, verbose = True) 79 | train_annos = annotations[train_indices, :] 80 | valid_annos = annotations[valid_indices, :] 81 | test_annos = annotations[test_indices, :] 82 | 83 | 84 | file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00003_1-of-00100.npy') 85 | global_features_unlab = np.load(file_global_features_unlab)[:, :-2000] 86 | for i in range(2): 87 | if i+4<10: 88 | tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-0000'+str(i+4)+'-of-00100.npy') 89 | elif i+4 <100: 90 | tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-000'+str(i+4)+'-of-00100.npy') 91 | else: 92 | tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00'+str(i+4)+'-of-00100.npy') 93 | tmp_global_features_unlab = np.load(tmp_file_global_features_unlab)[:, :-2000] 94 | global_features_unlab = np.vstack((global_features_unlab, tmp_global_features_unlab)) 95 | 96 | mean_global_features_unlab = np.mean(a=global_features_unlab, axis=0, dtype=np.float64) 97 | std_global_features_unlab = np.std(a=global_features_unlab, axis=0, dtype=np.float64) 98 | del global_features_unlab 99 | 100 | file_global_features1 = os.path.join(dir_path, 'image', 'labelled', 'combined-00001-of-00100.npy') 101 | global_features1 = np.load(file_global_features1) 102 | file_global_features2 = os.path.join(dir_path, 'image', 'labelled', 'combined-00002-of-00100.npy') 103 | global_features2 = np.load(file_global_features2) 104 | file_global_features3 = os.path.join(dir_path, 'image', 'labelled', 'combined-00003_0-of-00100.npy') 105 | global_features3 = np.load(file_global_features3) 106 | global_features = np.vstack((global_features1, global_features2, global_features3)) 107 | train_global_features = global_features[train_indices, :-2000] 108 | valid_global_features = global_features[valid_indices, :-2000] 109 | test_global_features = global_features[test_indices, :-2000] 110 | 111 | trainset_matrix_hists = global_features[train_indices, -2000:] 112 | validset_matrix_hists = global_features[valid_indices, -2000:] 113 | testset_matrix_hists = global_features[test_indices, -2000:] 114 | 115 | 116 | train_global_features -= mean_global_features_unlab[np.newaxis,:] 117 | trainset_matrix_global_features = train_global_features / std_global_features_unlab[:, np.newaxis] 118 | 119 | valid_global_features -= mean_global_features_unlab[np.newaxis,:] 120 | validset_matrix_global_features = valid_global_features / std_global_features_unlab[:, np.newaxis] 121 | 122 | test_global_features -= mean_global_features_unlab[np.newaxis,:] 123 | testset_matrix_global_features = test_global_features / std_global_features_unlab[:, np.newaxis] 124 | 125 | file_train = os.path.join(path_output_data, 'train'+str(folder_ID)) 126 | file_valid = os.path.join(path_output_data, 'valid'+str(folder_ID)) 127 | file_test = os.path.join(path_output_data, 'test'+str(folder_ID)) 128 | 129 | np.savez(file_train, trainset_matrix_hists=trainset_matrix_hists, trainset_matrix_global_features = trainset_matrix_global_features, trainset_matrix_targets=trainset_matrix_targets) 130 | np.savez(file_valid, validset_matrix_hists=validset_matrix_hists, validset_matrix_global_features = validset_matrix_global_features, validset_matrix_targets=validset_matrix_targets) 131 | np.savez(file_test, testset_matrix_hists=testset_matrix_hists, testset_matrix_global_features = testset_matrix_global_features, testset_matrix_targets=testset_matrix_targets) 132 | 133 | 134 | 135 | 136 | if __name__ == "__main__": 137 | load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 1) 138 | load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 2) 139 | load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 3) 140 | load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 4) 141 | load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 5) 142 | -------------------------------------------------------------------------------- /gen_dataset_unlabeled.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, are 6 | # permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # The views and conclusions contained in the software and documentation are those of the 26 | # authors and should not be interpreted as representing official policies, either expressed 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 28 | 29 | 30 | Created on Aug 15, 2015 31 | 32 | @author: zhengyin 33 | 34 | 35 | ''' 36 | 37 | import numpy as np 38 | import os 39 | import string 40 | import time 41 | import scipy.sparse as sp 42 | import collections 43 | from itertools import izip 44 | 45 | def LoadSparse(inputfile, verbose=False): 46 | """Loads a sparse matrix stored as npz file.""" 47 | npzfile = np.load(inputfile) 48 | mat = sp.csr_matrix((npzfile['data'], npzfile['indices'], 49 | npzfile['indptr']), 50 | shape=tuple(list(npzfile['shape']))) 51 | if verbose: 52 | 53 | print 'Loaded sparse matrix from %s of shape %s' % (inputfile, 54 | mat.shape.__str__()) 55 | return mat 56 | 57 | def load(dir_path, path_output_data): 58 | """ 59 | ir_path: The dataset to the extracted folder from data download from http://www.cs.toronto.edu/~nitish/multimodal/index.html 60 | path_output_data: The path to save the processed dataset 61 | 62 | """ 63 | 64 | dir_path = os.path.expanduser(dir_path) 65 | file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_unlabelled.npz') 66 | annotations = LoadSparse(file_annotations, verbose = True) 67 | 68 | 69 | 70 | 71 | file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00003_1-of-00100.npy') 72 | global_features = np.load(file_global_features)[:, :-2000] 73 | unlabeled_matrix_hists = np.load(file_global_features)[:, -2000:] 74 | for i in range(97): 75 | if i+4<10: 76 | tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-0000'+str(i+4)+'-of-00100.npy') 77 | elif i+4 <100: 78 | tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-000'+str(i+4)+'-of-00100.npy') 79 | else: 80 | tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00'+str(i+4)+'-of-00100.npy') 81 | tmp_global_features = np.load(tmp_file_global_features)[:, :-2000] 82 | tmp_mir_unlab_histograms = np.load(tmp_file_global_features)[:, -2000:] 83 | global_features = np.vstack((global_features, tmp_global_features)) 84 | unlabeled_matrix_hists = np.vstack((unlabeled_matrix_hists, tmp_mir_unlab_histograms)) 85 | 86 | mean_global_features = np.mean(a=global_features, axis=0, dtype=np.float64) 87 | std_global_features = np.std(a=global_features, axis=0, dtype=np.float64) 88 | global_features -= mean_global_features[np.newaxis,:] 89 | global_features /= std_global_features 90 | batch_size = global_features.shape[0]/50 91 | for i in range(50): 92 | file_unlab = os.path.join(path_output_data, 'unlabeled'+str(i+1)) 93 | np.savez(file_unlab, unlabeled_matrix_hists=unlabeled_matrix_hists[batch_size*i:batch_size*(i+1),:], unlabeled_matrix_global_features = unlabeled_matrix_global_features[batch_size*i:batch_size*(i+1),:]) 94 | # 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | if __name__ == "__main__": 103 | load('/run/media/ian/2TDisk/Flickr', '/run/media/ian/2TDisk/Flickr') 104 | -------------------------------------------------------------------------------- /licence: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without modification, are 4 | # permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | # The views and conclusions contained in the software and documentation are those of the 24 | # authors and should not be interpreted as representing official policies, either expressed 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 26 | -------------------------------------------------------------------------------- /run_SupDeepDocNADE.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, are 6 | # permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # The views and conclusions contained in the software and documentation are those of the 26 | # authors and should not be interpreted as representing official policies, either expressed 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 28 | 29 | 30 | Created on Aug 15, 2015 31 | 32 | @author: zhengyin 33 | ''' 34 | 35 | import MIR_Flickr_Theano_lab as MF 36 | from SupDeepDocNADE import SupDeepDocNADE 37 | import copy 38 | import numpy as np 39 | from itertools import izip 40 | import random 41 | import collections 42 | # import cPickle 43 | import sys, os 44 | import fcntl 45 | import time as t 46 | import theano 47 | import theano.tensor as T 48 | import theano.sparse as S 49 | import cPickle 50 | from sklearn.metrics import average_precision_score 51 | from liblinearutil import * 52 | import gc 53 | sys.argv.pop(0); # Remove first argument 54 | def get_done_text(start_time): 55 | sys.stdout.flush() 56 | return "DONE in {:.4f} seconds.".format(t.time() - start_time) 57 | 58 | def softmax(a): 59 | max_a = np.amax(a , axis=1) 60 | max_a = max_a.reshape(max_a.shape[0], 1) 61 | e = np.exp(a-max_a) 62 | dist = e/np.sum(e, axis = 1)[:, np.newaxis] 63 | return dist 64 | 65 | # activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: x * (x > 0), "tanh": theano.tensor.tanh} 66 | # Check if all options are provided 67 | if 17 != len(sys.argv): 68 | print "Usage: python run_SupDeepDocNADE.py folder_ID use_pretrain max_iter look_ahead hidden_size \ 69 | learning_rate unsup_weight activation_function Linear_minC, Linear_maxC, \ 70 | dropout_rate uniresult_dir Pretrain_model_name\ 71 | lab_dataset_dir batch_size anno_weight\ 72 | polyakexp_weight" 73 | sys.exit() 74 | 75 | folder_ID = int(sys.argv[0]) 76 | use_pretrain = str2bool[sys.argv[1]] 77 | max_iter = int(sys.argv[2]) 78 | look_ahead = int(sys.argv[3]) 79 | hidden_size_split = (sys.argv[4]).split('_') 80 | hidden_size = [int(x) for x in hidden_size_split] 81 | learning_rate = float(sys.argv[5]) 82 | unsup_weight = float(sys.argv[6]) 83 | activation_function = sys.argv[7] 84 | sup_option = 'full' 85 | Linear_minC = float(sys.argv[8]) 86 | Linear_maxC = float(sys.argv[9]) 87 | dropout_split = (sys.argv[10]).split('_') 88 | dropout_rate = [float(x) for x in dropout_split] 89 | uniresult_dir = sys.argv[11] 90 | full_path_premodel = sys.argv[12] 91 | lab_dataset_dir = sys.argv[13] 92 | batch_size = int(sys.argv[14]) 93 | normalize_by_document_size = False 94 | anno_weight = float(sys.argv[15]) 95 | log_option = "NoLog" 96 | spatial_pyramid = [1] 97 | scaled_method = 'std' 98 | length_limit = float(100.0) 99 | decrease_constant = float(1.0) 100 | polyakexp_weight = float(sys.argv[16]) 101 | 102 | 103 | 104 | 105 | def compute_AP_Prec50(preds, targets): 106 | 107 | targets_sorted = targets[(-preds.T).argsort().flatten()] 108 | cumsum = targets_sorted.cumsum() 109 | prec = cumsum / np.arange(1.0, 1 + targets.shape[0]) 110 | ap = average_precision_score(targets, preds) 111 | prec50 = prec[50] 112 | return ap, prec50 113 | 114 | 115 | def compute_MAP_Prec50(predictions, targets): 116 | 117 | numdims = predictions.shape[1] 118 | ap = 0 119 | prec50 = 0 120 | ap_list = [] 121 | prec50_list = [] 122 | 123 | for i in range(numdims): 124 | this_ap, this_prec = compute_AP_Prec50(predictions[:,i], targets[:,i]) 125 | ap_list.append(this_ap) 126 | prec50_list.append(this_prec) 127 | ap += this_ap 128 | prec50 += this_prec 129 | map = ap/numdims 130 | mprec50 = prec50/numdims 131 | return map, mprec50 132 | 133 | 134 | 135 | 136 | str2bool = {'True':True, 'False': False} 137 | folder_ID = int(sys.argv[0]) 138 | use_pretrain = str2bool[sys.argv[1]] 139 | max_iter = int(sys.argv[2]) 140 | look_ahead = int(sys.argv[3]) 141 | hidden_size_split = (sys.argv[4]).split('_') 142 | hidden_size = [int(x) for x in hidden_size_split] 143 | learning_rate = float(sys.argv[5]) 144 | unsup_weight = float(sys.argv[6]) 145 | activation_function = sys.argv[7] 146 | sup_option = 'full' 147 | Linear_minC = float(sys.argv[8]) 148 | Linear_maxC = float(sys.argv[9]) 149 | dropout_split = (sys.argv[10]).split('_') 150 | dropout_rate = [float(x) for x in dropout_split] 151 | uniresult_dir = sys.argv[11] 152 | full_path_premodel = sys.argv[12] 153 | lab_dataset_dir = sys.argv[13] 154 | batch_size = int(sys.argv[14]) 155 | normalize_by_document_size = False 156 | anno_weight = float(sys.argv[15]) 157 | log_option = "NoLog" 158 | spatial_pyramid = [1] 159 | scaled_method = 'std' 160 | length_limit = float(100.0) 161 | decrease_constant = float(1.0) 162 | polyakexp_weight = float(sys.argv[16]) 163 | 164 | file_name_Linear = 'Polyak_Linear_Flickr_SupDeepDocNADE_%s__%s__%s.txt' %(sys.argv[0], activation_function, log_option) 165 | uniresultfile_name_Linear = os.path.join(uniresult_dir, file_name_Linear) 166 | print uniresultfile_name_Linear 167 | rng_shuffle = np.random.mtrand.RandomState(1111) 168 | if not os.path.exists(lab_dataset_dir): 169 | print 'label dataset not found' 170 | exit(-1) 171 | 172 | 173 | 174 | print 'train using labeled data' 175 | 176 | 177 | dataset = MF.load(lab_dataset_dir, folder_ID, log_option, spatial_pyramid) 178 | trainset_raw = dataset['train'] 179 | validset_raw = dataset['valid'] 180 | testset_raw = dataset['test'] 181 | n_classes = trainset_raw['meta']['n_classes'] 182 | 183 | train_labels = trainset_raw['targets'] 184 | valid_labels = validset_raw['targets'] 185 | test_labels = testset_raw['targets'] 186 | 187 | trainset = {} 188 | validset = {} 189 | testset = {} 190 | trainset['hists_visual'] = theano.shared(np.asarray(trainset_raw['hists_visual'], theano.config.floatX)) 191 | trainset['hists_anno'] = theano.shared(trainset_raw['hists_anno'].astype(theano.config.floatX)) 192 | trainset['global_features'] = theano.shared(np.asarray(trainset_raw['global_features'], theano.config.floatX)) 193 | trainset['targets'] = theano.shared(np.asarray(trainset_raw['targets'], theano.config.floatX)) 194 | 195 | validset['hists_visual'] = theano.shared(np.asarray(validset_raw['hists_visual'], theano.config.floatX)) 196 | validset['hists_anno'] = theano.shared(validset_raw['hists_anno'].astype(theano.config.floatX)) 197 | validset['global_features'] = theano.shared(np.asarray(validset_raw['global_features'], theano.config.floatX)) 198 | validset['targets'] = theano.shared(np.asarray(validset_raw['targets'], theano.config.floatX)) 199 | 200 | testset['hists_visual'] = theano.shared(np.asarray(testset_raw['hists_visual'], theano.config.floatX)) 201 | testset['hists_anno'] = theano.shared(testset_raw['hists_anno'].astype(theano.config.floatX)) 202 | testset['global_features'] = theano.shared(np.asarray(testset_raw['global_features'], theano.config.floatX)) 203 | testset['targets'] = theano.shared(np.asarray(testset_raw['targets'], theano.config.floatX)) 204 | 205 | n_train = trainset_raw['meta']['length'] 206 | n_valid = validset_raw['meta']['length'] 207 | n_test = testset_raw['meta']['length'] 208 | 209 | n_train_batches = trainset_raw['meta']['length'] / batch_size 210 | n_valid_batches = validset_raw['meta']['length'] / batch_size 211 | n_test_batches = testset_raw['meta']['length'] / batch_size 212 | 213 | aver_words_count_trainset = trainset_raw['hists_visual'].sum(axis=1).mean() 214 | print 'average word counts of trainset is %f'%(aver_words_count_trainset) 215 | 216 | model = SupDeepDocNADE(hidden_size = hidden_size, 217 | learning_rate = learning_rate, 218 | # learning_rate_unsup = learning_rate_unsup, 219 | activation_function = activation_function, 220 | word_representation_size = 0, 221 | dropout_rate = dropout_rate, 222 | normalize_by_document_size = normalize_by_document_size, 223 | anno_weight = anno_weight, 224 | batch_size = batch_size, 225 | sup_option = sup_option, 226 | unsup_weight = unsup_weight, 227 | aver_words_count = aver_words_count_trainset, 228 | preprocess_method = scaled_method, 229 | length_limit = length_limit, 230 | decrease_constant = decrease_constant, 231 | polyakexp_weight = polyakexp_weight 232 | ) 233 | pretrain_learning_rate = 0 # when it == 0, means no pretraining 234 | 235 | spatial_split = np.asarray(spatial_pyramid, np.int32)**2*trainset_raw['meta']['voc_size'] 236 | region_split = np.append(spatial_split, trainset_raw['meta']['text_voc_size']) 237 | region_split = np.add.accumulate(region_split) 238 | 239 | model.initialize(trainset_raw['meta']['voc_size']*trainset_raw['meta']['n_regions'], 240 | trainset_raw['meta']['text_voc_size'], 241 | trainset_raw['meta']['global_feat_size'], 242 | trainset_raw['meta']['n_classes'], 243 | region_split) 244 | 245 | if use_pretrain: 246 | full_path_premodel = os.path.expanduser(full_path_premodel) 247 | if os.path.isfile(full_path_premodel): 248 | model_file = open(full_path_premodel, 'rb') 249 | pre_model = cPickle.load(model_file) 250 | model_file.close() 251 | pre_model.add_activation() 252 | else: 253 | print 'ERROR: pretrained model not found' 254 | exit(-1) 255 | 256 | assert(pre_model.hidden_size == hidden_size) 257 | assert(pre_model.activation_function_name == activation_function) 258 | 259 | print '========================pre_trained model loaded successfully=======================================' 260 | pretrain_learning_rate = pre_model.learning_rate 261 | model.add_supervised_layer(model.n_layers-1) 262 | model.add_top_layer(model.n_layers-1) 263 | model.copy_parameters(pre_model) 264 | 265 | 266 | model.dec_learning_rate.set_value(model.learning_rate) 267 | model.compile_function(model.n_layers, trainset, validset) 268 | # model.compile_compute_representation_function(model.n_layers, batch_size, trainset) 269 | best_valid_error = -np.inf 270 | best_valid_prec50 = -np.inf 271 | best_epoch = 0 272 | best_model = copy.deepcopy(model) 273 | nb_of_epocs_without_improvement = 0 274 | epoch = 0 275 | print '\n### Training DeepDocNADE ###' 276 | start_training_time = t.time() 277 | while(epoch < max_iter and nb_of_epocs_without_improvement < look_ahead): 278 | epoch += 1 279 | print 'Epoch {0}'.format(epoch) 280 | print '\tTraining ...', 281 | start_time = t.time() 282 | cost_train = [] 283 | unsup_cost_train = [] 284 | sup_cost_train = [] 285 | prob_target_train = np.zeros((n_train, n_classes)) 286 | for minibatch_index in range(n_train_batches): 287 | # cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value, h_value, first_input_value, h_sup_value = model.train(minibatch_index) 288 | cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value = model.train(minibatch_index) 289 | cost_train += [cost_value] 290 | unsup_cost_train += [unsup_cost_value] 291 | sup_cost_train += [sup_cost_value] 292 | prob_target_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value 293 | train_cost_error = np.asarray(cost_train).mean() 294 | train_unsup_cost_error = np.asarray(unsup_cost_train).mean() 295 | train_sup_cost_error = np.asarray(sup_cost_train).mean() 296 | train_map,train_prec50 = compute_MAP_Prec50(np.exp(prob_target_train), train_labels) 297 | print 'Train :', get_done_text(start_time), " MAP: {0:.6f}".format(train_map), " Prec@50: {0:.6f}".format(train_prec50), " Cost Error: {0:.6f}".format(train_cost_error) , " Unsup_Cost Error: {0:.6f}".format(train_unsup_cost_error), " Sup_Cost Error: {0:.6f}".format(train_sup_cost_error), 'mean_p: {0:.6f}'.format(np.exp(prob_target_train).mean()) 298 | 299 | print '\tValidating ...', 300 | start_time = t.time() 301 | cost_valid = [] 302 | unsup_cost_valid = [] 303 | sup_cost_valid = [] 304 | prob_target_valid = np.zeros((n_valid, n_classes)) 305 | for minibatch_index in range(n_valid_batches): 306 | # cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value, h_value, first_input_value , h_sup_value = model.valid(minibatch_index) 307 | cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value = model.valid(minibatch_index) 308 | cost_valid += [cost_value] 309 | unsup_cost_valid += [unsup_cost_value] 310 | sup_cost_valid += [sup_cost_value] 311 | prob_target_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value 312 | valid_cost_error = np.asarray(cost_valid).mean() 313 | valid_unsup_cost_error = np.asarray(unsup_cost_valid).mean() 314 | valid_sup_cost_error = np.asarray(sup_cost_valid).mean() 315 | valid_map,valid_prec50 = compute_MAP_Prec50(np.exp(prob_target_valid), valid_labels) 316 | print 'Validation:', get_done_text(start_time), " MAP: {0:.6f}".format(valid_map), " Prec@50: {0:.6f}".format(valid_prec50), " Cost Error: {0:.6f}".format(valid_cost_error) , " Unsup_Cost Error: {0:.6f}".format(valid_unsup_cost_error), " Sup_Cost Error: {0:.6f}".format(valid_sup_cost_error), 'mean_p: {0:.6f}'.format(np.exp(prob_target_valid).mean()) 317 | if valid_map > best_valid_error: 318 | # start_time = t.time() 319 | best_valid_error = valid_map 320 | best_valid_prec50 = valid_prec50 321 | best_epoch = epoch 322 | nb_of_epocs_without_improvement = 0 323 | del best_model 324 | gc.collect() 325 | best_model = copy.deepcopy(model) 326 | # print 'deep copying...',get_done_text(start_time) 327 | else: 328 | nb_of_epocs_without_improvement += 1 329 | 330 | 331 | 332 | print 'begin polyak svm part' 333 | 334 | #compute hidden representation of the testset 335 | hidden_represenation_trainset = np.zeros((n_train, best_model.hidden_size[-1])) 336 | hidden_represenation_validset = np.zeros((n_valid, best_model.hidden_size[-1])) 337 | hidden_represenation_testset = np.zeros((n_test, best_model.hidden_size[-1])) 338 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, trainset) 339 | for minibatch_index in range(n_train_batches): 340 | h,log_prob_target_value = best_model.compute_representation(minibatch_index) 341 | hidden_represenation_trainset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h 342 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, validset) 343 | for minibatch_index in range(n_valid_batches): 344 | h,log_prob_target_value = best_model.compute_representation(minibatch_index) 345 | hidden_represenation_validset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h 346 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, testset) 347 | prob_target_test = np.zeros((n_test, n_classes)) 348 | for minibatch_index in range(n_test_batches): 349 | h,log_prob_target_value = best_model.compute_representation(minibatch_index) 350 | hidden_represenation_testset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h 351 | prob_target_test[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value 352 | 353 | 354 | hidden_represenation_trainset = hidden_represenation_trainset.tolist() 355 | hidden_represenation_validset = hidden_represenation_validset.tolist() 356 | hidden_represenation_testset = hidden_represenation_testset.tolist() 357 | 358 | #=========================================Logistic layer MAP=========================================== 359 | 360 | test_map_polyak,test_prec50_polyak = compute_MAP_Prec50(np.exp(prob_target_test), test_labels) 361 | print 'Logistic MAP is '+ str(test_map_polyak) 362 | print 'Logistic prec@50 is '+ str(test_prec50_polyak) 363 | 364 | 365 | 366 | #====================================================================================================== 367 | 368 | best_C_Linear_polyak = np.zeros(n_classes) 369 | best_ap_Linear_polyak = -np.ones(n_classes)*np.inf 370 | best_prec_Linear_polyak = -np.ones(n_classes)*np.inf 371 | Linear_C = np.arange(Linear_minC, Linear_maxC, 0.25) 372 | 373 | 374 | print 'Linear SVM Model Training' 375 | cnt_Linear = 0 376 | for C in Linear_C: 377 | # print cnt_Linear 378 | start = t.time() 379 | for i in range(n_classes): 380 | label_train = train_labels[:,i] 381 | label_train = label_train.tolist() 382 | label_valid = valid_labels[:,i] 383 | label_valid = label_valid.tolist() 384 | 385 | train_options = '-s 0 -c %e -q'%np.exp2(C) 386 | test_options = '-b 0 -q' 387 | # train_options = '-s 1 -c %e -q'%np.exp2(C) 388 | # test_options = '-q' 389 | svm_model = train(label_train, hidden_represenation_trainset, train_options) 390 | 391 | p_labels, p_acc, p_vals = predict(label_valid, hidden_represenation_validset, svm_model, test_options) 392 | p_vals = np.asarray(p_vals) 393 | index = svm_model.get_labels().index(1) 394 | if index ==0: 395 | confidence = p_vals 396 | elif index == 1: 397 | confidence = -p_vals 398 | else: 399 | raise TypeError('wrong index') 400 | # confidence = p_vals[:, index] 401 | this_ap, this_prec = compute_AP_Prec50(confidence, valid_labels[:,i]) 402 | if this_ap > best_ap_Linear_polyak[i]: 403 | best_ap_Linear_polyak[i] = this_ap 404 | best_prec_Linear_polyak[i] = this_prec 405 | best_C_Linear_polyak[i] = C 406 | end = t.time() 407 | print '%d/%d cross-validation cost time %f'%(cnt_Linear, len(Linear_C), end-start) 408 | print 'the map for now on validset is %f'%(np.mean(best_ap_Linear_polyak)) 409 | cnt_Linear += 1 410 | 411 | 412 | print '=======================================Final SVM Part===============================================' 413 | Linear_ap_list = [] 414 | Linear_prec_list = [] 415 | Linear_ap = 0 416 | Linear_prec = 0 417 | hidden_represenation_trainset.extend(hidden_represenation_validset) 418 | train_labels_final = np.vstack((train_labels, valid_labels)) 419 | # file_conf = open("/home/local/USHERBROOKE/zhey2402/DeepDocNADE/SupDocNADE_Confidence_value.txt", 'w') 420 | for i in range(n_classes): 421 | print 'Final SVM for class %d'%i 422 | label_train = train_labels_final[:,i] 423 | label_train = label_train.tolist() 424 | label_test = test_labels[:,i] 425 | label_test = label_test.tolist() 426 | 427 | # train_options = '-s 1 -c %e -q'%np.exp2(best_C_Linear[i]) 428 | # test_options = '-q' 429 | train_options = '-s 0 -c %e -q'%np.exp2(best_C_Linear_polyak[i]) 430 | test_options = '-b 0 -q' 431 | svm_model = train(label_train, hidden_represenation_trainset, train_options) 432 | p_labels, p_acc, p_vals = predict(label_test, hidden_represenation_testset, svm_model, test_options) 433 | p_vals = np.asarray(p_vals) 434 | index = svm_model.get_labels().index(1) 435 | if index ==0: 436 | confidence = p_vals 437 | elif index == 1: 438 | confidence = -p_vals 439 | else: 440 | raise TypeError('wrong index') 441 | # confidence = p_vals[:, index] 442 | this_ap, this_prec = compute_AP_Prec50(confidence, test_labels[:,i]) 443 | Linear_ap += this_ap 444 | Linear_prec += this_prec 445 | Linear_ap_list.append(this_ap) 446 | Linear_prec_list.append(this_prec) 447 | # confidence.tofile(file_conf, sep=' ', format='%s') 448 | # file_conf.write('\n') 449 | # file_conf.close() 450 | Linear_map_polyak = Linear_ap/n_classes 451 | Linear_prec50_polyak = Linear_prec/n_classes 452 | print 'Linear SVM map is '+ str(Linear_map_polyak) 453 | print 'Linear SVM prec@50 is '+ str(Linear_prec50_polyak) 454 | #=============================================================================== 455 | 456 | 457 | line_linear = '%f %f %f %f %f %f %f %f %d %s %s %s %s %d %d %d %f %f %s %s %f %s %s %f %f %f %s %s\n'%(Linear_map_polyak, 458 | np.mean(best_ap_Linear_polyak), 459 | test_map_polyak, 460 | best_valid_error, 461 | Linear_prec50_polyak, 462 | np.mean(best_prec_Linear_polyak), 463 | test_prec50_polyak, 464 | best_valid_prec50, 465 | folder_ID, 466 | spatial_pyramid, 467 | hidden_size, 468 | learning_rate, 469 | activation_function, 470 | max_iter, 471 | look_ahead, 472 | epoch, 473 | Linear_minC, 474 | Linear_maxC, 475 | dropout_rate, 476 | unsup_weight, 477 | anno_weight, 478 | sup_option, 479 | scaled_method, 480 | length_limit, 481 | decrease_constant, 482 | polyakexp_weight, 483 | ' '.join(str(x) for x in best_C_Linear_polyak), 484 | full_path_premodel 485 | ) 486 | uniresultfile_linear = open(uniresultfile_name_Linear, 'a') 487 | fcntl.flock(uniresultfile_linear.fileno(), fcntl.LOCK_EX) 488 | uniresultfile_linear.write(line_linear) 489 | uniresultfile_linear.close() # unlocks the file 490 | 491 | 492 | 493 | 494 | print 'done' 495 | -------------------------------------------------------------------------------- /run_pretrain_DeepDocNADE.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, are 6 | # permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # The views and conclusions contained in the software and documentation are those of the 26 | # authors and should not be interpreted as representing official policies, either expressed 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. 28 | 29 | 30 | Created on Aug 15, 2015 31 | 32 | @author: Yin Zheng 33 | ''' 34 | import MIR_Flickr_Theano_Unlab as MFU 35 | from SupDeepDocNADE import DeepDocNADE 36 | import copy 37 | import numpy as np 38 | from itertools import izip 39 | import random 40 | import collections 41 | import cPickle 42 | import sys, os 43 | import fcntl 44 | import time as t 45 | import theano 46 | import theano.tensor as T 47 | import theano.sparse as S 48 | from liblinearutil import * 49 | import gc 50 | import shutil 51 | import glob 52 | 53 | # activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: x * (x > 0), "tanh": theano.tensor.tanh} 54 | def get_done_text(start_time): 55 | sys.stdout.flush() 56 | return "DONE in {:.4f} seconds.".format(t.time() - start_time) 57 | sys.argv.pop(0); # Remove first argument 58 | 59 | # Check if all options are provided 60 | if 11 != len(sys.argv): 61 | print "Usage: python run_pretrain.py n_pretrain pre_learning_rate hidden_size activation_function dropout_rate model_file_dir unlab_dataset_dir batch_size anno_weight platform polyakexp_weight model_init" 62 | sys.exit() 63 | 64 | # scene15.obtain(r'/home/ian/ml_datasets/Scene15') 65 | str2bool = {'True':True, 'False': False} 66 | n_pretrain = int(sys.argv[0]) 67 | pre_learning_rate = float(sys.argv[1]) 68 | hidden_size_split = (sys.argv[2]).split('_') 69 | hidden_size = [int(x) for x in hidden_size_split] 70 | activation_function = sys.argv[3] 71 | dropout_split = (sys.argv[4]).split('_') 72 | dropout_rate = [float(x) for x in dropout_split] 73 | model_file_dir = sys.argv[5] 74 | unlab_dataset_dir = sys.argv[6] 75 | batch_size = int(sys.argv[7]) 76 | normalize_by_document_size = False 77 | anno_weight = float(sys.argv[8]) 78 | log_option = "NoLog" 79 | spatial_pyramid = [1] 80 | platform = 'PC' 81 | scaled_method = 'std' 82 | length_limit = float(100.0) 83 | decrease_constant = float(1.0) 84 | polyakexp_weight = float(sys.argv[9]) 85 | pretrained_models_dir = sys.argv[10] 86 | 87 | if not os.path.exists(unlab_dataset_dir): 88 | print 'no such file for dataset' 89 | exit(-1) 90 | 91 | 92 | #================================================= search the potential model that match the option======================================== 93 | if normalize_by_document_size: 94 | template_ID = 'Wholelayers__%s__%s__%s__%s__*__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight) 95 | else: 96 | template_ID = 'Wholelayers__%s__%s__%s__%s__*__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight ) 97 | template_model_name = os.path.join(pretrained_models_dir, template_ID+'___model.pkl') 98 | 99 | potential_model_name = glob.glob(template_model_name) 100 | 101 | init_model = 'None' 102 | n_trained = -np.inf 103 | for tmp_model in potential_model_name: 104 | tmp_config = tmp_model.split('__') 105 | tmp_n_trained = int(tmp_config[5]) 106 | if tmp_n_trained > n_trained: 107 | n_trained = tmp_n_trained 108 | init_model = tmp_model 109 | 110 | #=========================================================================================================================================== 111 | if not os.path.exists(init_model): 112 | print 'no init model found, we will train the model from epoch 0' 113 | flag_continue = False 114 | else: 115 | print 'the model we based on is %s'%(init_model) 116 | flag_continue = True 117 | # exit(-1) 118 | 119 | 120 | if platform == 'Guilinmin': 121 | src = unlab_dataset_dir 122 | dst = os.path.join('/dev/shm/', os.environ['PBS_JOBID']) 123 | # dst = os.path.join('/home/local/USHERBROOKE/zhey2402/localscratch', os.environ['PBS_JOBID']) 124 | print src 125 | print dst 126 | 127 | print 'starting copying' 128 | for i in xrange(1, 51): 129 | src_filename = os.path.join(src, 'unlabeled'+str(i)+'.npz.gz') 130 | start_copy = t.time() 131 | shutil.copy(src_filename, dst) 132 | print 'Copy file_ID %d to shm'%(i), get_done_text(start_copy) 133 | 134 | 135 | start_copy = t.time() 136 | src_filename = os.path.join(src, 'others.tar.gz') 137 | shutil.copy(src_filename, dst) 138 | print 'Copy others.tar.gz to shm', get_done_text(start_copy) 139 | 140 | start_extract = t.time() 141 | path_tarfile = os.path.join(dst, 'others.tar.gz') 142 | cmd = 'tar xvfzm '+path_tarfile + ' -C ' + dst 143 | print cmd 144 | os.system(cmd) 145 | print 'Extract other.tar.gz', get_done_text(start_extract) 146 | 147 | start_remove_tar = t.time() 148 | cmd = 'rm ' + path_tarfile 149 | print cmd 150 | print 'Removing other.tar.gz file to save space' 151 | os.system(cmd) 152 | print 'Remove tar', get_done_text(start_remove_tar) 153 | 154 | unlab_dataset_dir = dst 155 | 156 | 157 | 158 | if flag_continue: 159 | init_config = init_model.split('__') 160 | init_hidden_size = init_config[1] 161 | init_activation = init_config[2] 162 | init_logoption = init_config[3] 163 | init_spatial = init_config[4] 164 | init_epoch = int(init_config[5]) 165 | init_lr = init_config[6] 166 | init_dropout = init_config[7] 167 | init_annoweight = float(init_config[8]) 168 | init_scale = init_config[9] 169 | init_lengthlimit = float(init_config[10]) 170 | init_decreaseconst = float(init_config[11]) 171 | init_polyweight = float(init_config[12]) 172 | 173 | init_model = os.path.expanduser(init_model) 174 | if os.path.isfile(init_model): 175 | model_file = open(init_model, 'rb') 176 | model_init = cPickle.load(model_file) 177 | model_file.close() 178 | model_init.add_activation() 179 | else: 180 | print 'ERROR: init model not found' 181 | exit(-1) 182 | 183 | assert(model_init.hidden_size == hidden_size) 184 | assert(model_init.learning_rate == pre_learning_rate) 185 | assert(model_init.activation_function_name == activation_function) 186 | assert(model_init.dropout_rate == dropout_rate) 187 | assert(model_init.normalize_by_document_size == normalize_by_document_size) 188 | assert(model_init.anno_weight == anno_weight) 189 | assert(model_init.batch_size == batch_size) 190 | assert(model_init.preprocess_method == scaled_method) 191 | assert(model_init.length_limit == length_limit) 192 | assert(model_init.decrease_constant == decrease_constant) 193 | assert(model_init.polyakexp_weight == polyakexp_weight) 194 | assert(init_logoption == log_option) 195 | assert(init_spatial == '1') 196 | if init_epoch >= n_pretrain: 197 | print 'the model is trained %d epoches, which equals or exceeds the number %d you required'%(init_epoch, n_pretrain) 198 | exit(-1) 199 | else: 200 | init_epoch = 0 201 | 202 | model = DeepDocNADE(hidden_size = hidden_size, 203 | learning_rate = pre_learning_rate, 204 | activation_function = activation_function, 205 | word_representation_size = 0, 206 | dropout_rate = dropout_rate, 207 | normalize_by_document_size = normalize_by_document_size, 208 | anno_weight = anno_weight, 209 | batch_size = batch_size, 210 | preprocess_method = scaled_method, 211 | length_limit = length_limit, 212 | decrease_constant = decrease_constant, 213 | polyakexp_weight = polyakexp_weight, 214 | seed_np = init_epoch + 1126, 215 | seed_theano = init_epoch + 1959 216 | ) 217 | initialized = False 218 | flag_compiled = False 219 | 220 | #================================================= create a object used to save model============================== 221 | copy_model = copy.deepcopy(model) 222 | # copy_model.remove_activation() 223 | train_ahead = n_pretrain 224 | print 'begin pretrain using unlabeled data...' 225 | print 'we need to train it %d more epoches'%(min(init_epoch+train_ahead,n_pretrain)-init_epoch) 226 | 227 | 228 | n_layers = model.n_layers 229 | # for n_build in xrange(n_layers): 230 | n_build = n_layers-1 231 | epoch = init_epoch 232 | 233 | print '\n### Training DeepDocNADE using unlabeled data, n_layers=%d ###'%(n_build+1) 234 | start_training_time = t.time() 235 | model.dec_learning_rate.set_value(model.learning_rate) 236 | copy_model.dec_learning_rate.set_value(copy_model.learning_rate) 237 | if initialized: 238 | model.remove_top_layer() 239 | model.add_top_layer(n_build) 240 | while(epoch < min(init_epoch+train_ahead,n_pretrain)): 241 | 242 | epoch += 1 243 | print 'Epoch {0}'.format(epoch) 244 | start_time_epoch = t.time() 245 | cost_train = [] 246 | for file_id in xrange(1,51): 247 | 248 | start_time = t.time() 249 | start_time_loaddata = t.time() 250 | #===================extract corresponding unlabeled(file_id).npz.gz file================================= 251 | if platform == 'Guilinmin': 252 | start_extract = t.time() 253 | path_tarfile = os.path.join(unlab_dataset_dir, 'unlabeled'+str(file_id)+'.npz.gz') 254 | cmd = 'tar xvfzm '+path_tarfile + ' -C ' + unlab_dataset_dir 255 | print cmd 256 | os.system(cmd) 257 | print 'Extract file_ID %d'%(file_id), get_done_text(start_extract) 258 | #===============================LOAD file================================================== 259 | unlabel_raw = MFU.load(unlab_dataset_dir, file_id, log_option, spatial_pyramid) 260 | 261 | #======================================remove unlabeled(file_id).npz.gz============================ 262 | if platform == 'Guilinmin': 263 | start_remove_tar = t.time() 264 | path_npzfile = os.path.join(unlab_dataset_dir, 'unlabeled'+str(file_id)+'.npz') 265 | cmd = 'rm ' + path_npzfile 266 | print cmd 267 | os.system(cmd) 268 | print 'Remove file_ID %d'%(file_id), get_done_text(start_remove_tar) 269 | #================================================================================================== 270 | print '\tTraining ...', 271 | sys.stdout.write("Load data cost {:.4f} seconds ".format(t.time() - start_time_loaddata)) 272 | if not flag_compiled: 273 | unlabel = {} 274 | unlabel['hists_visual'] = theano.shared(np.asarray(unlabel_raw['hists_visual'], theano.config.floatX), borrow=False) 275 | unlabel['hists_anno'] = theano.shared(unlabel_raw['hists_anno'].astype(theano.config.floatX), borrow=False) 276 | unlabel['global_features'] = theano.shared(np.asarray(unlabel_raw['global_features'], theano.config.floatX), borrow=False) 277 | else: 278 | unlabel['hists_visual'].set_value(np.asarray(unlabel_raw['hists_visual'], theano.config.floatX)) 279 | unlabel['hists_anno'].set_value(unlabel_raw['hists_anno'].astype(theano.config.floatX)) 280 | unlabel['global_features'].set_value(np.asarray(unlabel_raw['global_features'], theano.config.floatX)) 281 | 282 | n_train_batches = unlabel_raw['meta']['length']/batch_size 283 | 284 | aver_words_count = unlabel_raw['hists_visual'].sum(axis=1).mean() 285 | sys.stdout.write("aver word counts is {:.4f} ".format(aver_words_count)) 286 | if not initialized: 287 | spatial_split = np.asarray(spatial_pyramid, np.int32)**2*unlabel_raw['meta']['voc_size'] 288 | region_split = np.append(spatial_split, unlabel_raw['meta']['text_voc_size']) 289 | region_split = np.add.accumulate(region_split) 290 | 291 | 292 | model.initialize(unlabel_raw['meta']['voc_size']*unlabel_raw['meta']['n_regions'], unlabel_raw['meta']['text_voc_size'], unlabel_raw['meta']['global_feat_size'], region_split) 293 | model.remove_top_layer() 294 | model.add_top_layer(n_build) 295 | copy_model.initialize(unlabel_raw['meta']['voc_size']*unlabel_raw['meta']['n_regions'], unlabel_raw['meta']['text_voc_size'], unlabel_raw['meta']['global_feat_size'], region_split) 296 | copy_model.remove_top_layer() 297 | copy_model.add_top_layer(n_build) 298 | del copy_model.rng_theano 299 | del copy_model.rng 300 | if flag_continue: 301 | model.copy_parameters(model_init) 302 | copy_model.copy_parameters(model_init) 303 | del model_init 304 | initialized = True 305 | model.aver_words_count = aver_words_count 306 | copy_model.aver_words_count = aver_words_count 307 | 308 | start_time_process = t.time() 309 | if not flag_compiled: 310 | model.compile_function(n_build+1, unlabel, unlabel) 311 | flag_compiled = True 312 | for minibatch_index in range(n_train_batches): 313 | cost_value = model.train(minibatch_index) 314 | cost_train += [cost_value] 315 | sys.stdout.write("Process data cost {:.4f} seconds ".format(t.time() - start_time_process)) 316 | del unlabel_raw 317 | # del model.train 318 | # del model.valid 319 | # del unlabel 320 | gc.collect() 321 | 322 | print 'Train :', 'File ID %d'%(file_id), get_done_text(start_time) 323 | # unlabel.clear() 324 | train_cost_error = np.asarray(cost_train).mean() 325 | print '\tTraining ...', 326 | print 'Train :', " Cost Error: {0:.6f}".format(train_cost_error), get_done_text(start_time_epoch) 327 | # if np.mod(epoch,2)==0: 328 | # copy_model.copy_parameters(model) 329 | # del model 330 | # gc.collect() 331 | # model = copy.deepcopy(copy_model) 332 | if np.mod(epoch, 25)==0: 333 | if normalize_by_document_size: 334 | cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], epoch, pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight) 335 | else: 336 | cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], epoch, pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight ) 337 | cPickle_model_name = os.path.join(model_file_dir, cPickle_ID+'___model.pkl') 338 | copy_model.copy_parameters(model) 339 | copy_model.remove_activation() 340 | 341 | saved_model_list = open(os.path.join(model_file_dir, 'saved_model_list.txt'), 'a') 342 | fcntl.flock(saved_model_list.fileno(), fcntl.LOCK_EX) 343 | model_file = open(cPickle_model_name, 'wb') 344 | cPickle.dump(copy_model, model_file,protocol=cPickle.HIGHEST_PROTOCOL) 345 | model_file.close() 346 | saved_model_list.write(cPickle_model_name+'\n') 347 | saved_model_list.close() # unlocks the file 348 | copy_model.add_activation() 349 | print cPickle_model_name 350 | print 'is saved' 351 | 352 | print '\n### Pre_Training, n_layers=%d'%(n_build+1), get_done_text(start_training_time) 353 | # copy_model.copy_parameters(model) 354 | # del model 355 | # gc.collect() 356 | # model = copy.deepcopy(copy_model) 357 | 358 | if normalize_by_document_size: 359 | cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], min(init_epoch+train_ahead,n_pretrain), pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight) 360 | else: 361 | cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], min(init_epoch+train_ahead,n_pretrain), pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight) 362 | cPickle_model_name = os.path.join(model_file_dir, cPickle_ID+'___model.pkl') 363 | 364 | # copy_model.copy_parameters(model) 365 | model.remove_activation() 366 | del model.train 367 | del model.valid 368 | del unlabel 369 | del model.rng_theano 370 | del model.rng 371 | gc.collect() 372 | 373 | saved_model_list = open(os.path.join(model_file_dir, 'saved_model_list.txt'), 'a') 374 | fcntl.flock(saved_model_list.fileno(), fcntl.LOCK_EX) 375 | model_file = open(cPickle_model_name, 'wb') 376 | cPickle.dump(model, model_file,protocol=cPickle.HIGHEST_PROTOCOL) 377 | model_file.close() 378 | saved_model_list.write(cPickle_model_name+'\n') 379 | saved_model_list.close() # unlocks the file 380 | 381 | 382 | print cPickle_model_name 383 | print 'is saved' --------------------------------------------------------------------------------