├── MIR_Flickr_Theano_Unlab.py
├── MIR_Flickr_Theano_lab.py
├── README
├── SupDeepDocNADE.py
├── gen_dataset_labeled.py
├── gen_dataset_unlabeled.py
├── licence
├── run_SupDeepDocNADE.py
└── run_pretrain_DeepDocNADE.py


/MIR_Flickr_Theano_Unlab.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without modification, are
 4 | # permitted provided that the following conditions are met:
 5 | #
 6 | #    1. Redistributions of source code must retain the above copyright notice, this list of
 7 | #       conditions and the following disclaimer.
 8 | #
 9 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
10 | #       of conditions and the following disclaimer in the documentation and/or other materials
11 | #       provided with the distribution.
12 | #
13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 | #
23 | # The views and conclusions contained in the software and documentation are those of the
24 | # authors and should not be interpreted as representing official policies, either expressed
25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
26 | 
27 | """
28 | Module ``datasets.MIR_Flickr`` gives access to the MIR_Flickr dataset (labeled part) for Theano.
29 | 
30 | """
31 | import numpy as np
32 | import os
33 | import scipy.sparse as sp
34 | 
35 | def LoadSparse(inputfile, verbose=False):
36 |     """Loads a sparse matrix stored as npz file."""
37 |     npzfile = np.load(inputfile)
38 |     mat = sp.csr_matrix((npzfile['data'], npzfile['indices'],
39 |                          npzfile['indptr']),
40 |                         shape=tuple(list(npzfile['shape'])))
41 |     if verbose:
42 |         
43 |         print 'Loaded sparse matrix from %s of shape %s' % (inputfile,
44 |                                                           mat.shape.__str__())
45 |     return mat
46 | 
47 | def load(dir_path,folder_ID, log_option='NoLog', spatial_split=[1,2,3]):
48 |     """
49 | 
50 |     """
51 | 
52 |     dir_path = os.path.expanduser(dir_path)
53 |     sizes_file = open(os.path.join(dir_path,'sizes.txt'),'r')
54 |     unlabel_size = int(sizes_file.readline())
55 |     sizes_file.close()
56 | #     lengths = [unlabel_size]
57 |     meta_file = open(os.path.join(dir_path, 'meta.txt'))
58 |     meta = {}
59 |     for line in meta_file:
60 |         meta_name, meta_value = line.rstrip().split(':')
61 |         meta.update({meta_name:int(meta_value)})
62 |         
63 |     spatial_split = np.asarray(spatial_split, np.float64)
64 |     n_regions = int((spatial_split**2).sum())
65 |     meta['n_regions'] = n_regions  
66 |     
67 |     unlabel_size = int(unlabel_size/meta['dataset_split'])    
68 |     file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_unlabelled.npz')
69 |     annotations = LoadSparse(file_annotations, verbose = False)
70 |     
71 |     unlabel_str,length_str = 'unlabeled','length'
72 |     unlabel_file = os.path.join(dir_path,unlabel_str+str(folder_ID)+'.npz')
73 |     unlabel_meta = {length_str:unlabel_size}
74 |     unlabel_meta.update(meta)
75 |     unlabel_meta.update({'Folder_ID':folder_ID})
76 | 
77 |     npzfile_train = np.load(unlabel_file)
78 |     if log_option == 'NoLog':
79 |         unlabel_hists = npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']]
80 |     elif log_option == 'Log_Natural':
81 |         unlabel_hists = np.round(np.log(npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']]+1.0)).astype(np.int32)
82 |     elif log_option == 'Log_4':
83 |         unlabel_hists = np.round(np.log(npzfile_train['unlabeled_matrix_hists'][:,:n_regions*unlabel_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32)
84 | #     unlabel_hists = npzfile_train['unlabeled_matrix_hists']
85 |     unlabel_global_features = npzfile_train['unlabeled_matrix_global_features']
86 |     unlabel_annos = annotations[(folder_ID-1)*unlabel_size:folder_ID*unlabel_size,:]
87 |     
88 | 
89 |     return {'hists_visual':unlabel_hists, 'global_features':unlabel_global_features, 'hists_anno':unlabel_annos,'meta':unlabel_meta}
90 | 
91 | def obtain(dir_path):
92 |     """
93 |     Gives information about how to obtain this dataset (``dir_path`` is ignored).
94 |     """
95 | 
96 |     print 'Ask Yin Zheng (yzheng3xg@gmail.com) for the data.'
97 | 
98 | 


--------------------------------------------------------------------------------
/MIR_Flickr_Theano_lab.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without modification, are
  4 | # permitted provided that the following conditions are met:
  5 | #
  6 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  7 | #       conditions and the following disclaimer.
  8 | #
  9 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 10 | #       of conditions and the following disclaimer in the documentation and/or other materials
 11 | #       provided with the distribution.
 12 | #
 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 | #
 23 | # The views and conclusions contained in the software and documentation are those of the
 24 | # authors and should not be interpreted as representing official policies, either expressed
 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 26 | 
 27 | """
 28 | Module ``datasets.MIR_Flickr`` gives access to the MIR_Flickr dataset (labeled part) for Theano.
 29 | 
 30 | 
 31 | 
 32 | """
 33 | import numpy as np
 34 | import os
 35 | import scipy.sparse as sp
 36 | 
 37 | def LoadSparse(inputfile, verbose=False):
 38 |     """Loads a sparse matrix stored as npz file."""
 39 |     npzfile = np.load(inputfile)
 40 |     mat = sp.csr_matrix((npzfile['data'], npzfile['indices'],
 41 |                          npzfile['indptr']),
 42 |                         shape=tuple(list(npzfile['shape'])))
 43 |     if verbose:
 44 |         
 45 |         print 'Loaded sparse matrix from %s of shape %s' % (inputfile,
 46 |                                                           mat.shape.__str__())
 47 |     return mat
 48 | 
 49 | def load(dir_path,folder_ID, log_option='NoLog', spatial_split=[1,2,3]):
 50 |     """
 51 |     """
 52 | 
 53 |     dir_path = os.path.expanduser(dir_path)
 54 |     sizes_file = open(os.path.join(dir_path,'sizes.txt'),'r')
 55 |     train_size,valid_size,test_size = int(sizes_file.readline()),int(sizes_file.readline()),int(sizes_file.readline())
 56 |     sizes_file.close()
 57 |     lengths = [train_size,valid_size,test_size]
 58 |     meta_file = open(os.path.join(dir_path, 'meta.txt'))
 59 |     meta = {}
 60 |     for line in meta_file:
 61 |         meta_name, meta_value = line.rstrip().split(':')
 62 |         meta.update({meta_name:int(meta_value)})
 63 |         
 64 |     spatial_split = np.asarray(spatial_split, np.float64)
 65 |     n_regions = int((spatial_split**2).sum())
 66 |     meta['n_regions'] = n_regions  
 67 |     file_train_indices = os.path.join(dir_path, 'splits', 'train_indices_'+str(folder_ID)+'.npy' )
 68 |     file_valid_indices = os.path.join(dir_path, 'splits', 'valid_indices_'+str(folder_ID)+ '.npy' )
 69 |     file_test_indices = os.path.join(dir_path, 'splits', 'test_indices_'+str(folder_ID)+'.npy' )
 70 |     train_indices = np.load(file_train_indices)
 71 |     valid_indices = np.load(file_valid_indices)
 72 |     test_indices = np.load(file_test_indices)
 73 |     file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_labelled.npz')
 74 |     annotations = LoadSparse(file_annotations, verbose = True)
 75 |     
 76 |     train_str,valid_str,test_str,length_str = 'train','valid','test','length'
 77 |     train_file,valid_file,test_file = [os.path.join(dir_path,name+str(folder_ID)+'.npz') for name in [train_str,valid_str,test_str]]
 78 |     train_meta,valid_meta,test_meta = [{length_str:length} for length in lengths]
 79 |     train_meta.update(meta)
 80 |     valid_meta.update(meta)
 81 |     test_meta.update(meta)
 82 | 
 83 |     npzfile_train = np.load(train_file)
 84 |     if log_option == 'NoLog':
 85 |         trainset_hists = npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']]
 86 |     elif log_option == 'Log_Natural':
 87 |         trainset_hists = np.round(np.log(npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']]+1.0)).astype(np.int32)
 88 |     elif log_option == 'Log_4':
 89 |         trainset_hists = np.round(np.log(npzfile_train['trainset_matrix_hists'][:,:n_regions*train_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32)
 90 |         
 91 |     trainset_global_features = npzfile_train['trainset_matrix_global_features']
 92 |     trainset_targets = npzfile_train['trainset_matrix_targets']
 93 |     trainset_annos = annotations[train_indices, :]
 94 |     
 95 |     npzfile_valid = np.load(valid_file)
 96 |     if log_option == 'NoLog':
 97 |         validset_hists = npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']]
 98 |     elif log_option == 'Log_Natural':
 99 |         validset_hists = np.round(np.log(npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']]+1.0)).astype(np.int32)
100 |     elif log_option == 'Log_4':
101 |         validset_hists = np.round(np.log(npzfile_valid['validset_matrix_hists'][:,:n_regions*valid_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32)
102 | #     validset_hists = npzfile_valid['validset_matrix_hists'][:,:n_regions]
103 |     validset_global_features = npzfile_valid['validset_matrix_global_features']
104 |     validset_targets = npzfile_valid['validset_matrix_targets']
105 |     validset_annos = annotations[valid_indices, :]
106 |     
107 |     npzfile_test = np.load(test_file)
108 |     if log_option == 'NoLog':
109 |         testset_hists = npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']]
110 |     elif log_option == 'Log_Natural':
111 |         testset_hists = np.round(np.log(npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']]+1.0)).astype(np.int32)
112 |     elif log_option == 'Log_4':
113 |         testset_hists = np.round(np.log(npzfile_test['testset_matrix_hists'][:,:n_regions*test_meta['voc_size']]+1.0)/np.log(4)+np.finfo(np.double).eps).astype(np.int32)
114 | #     testset_hists = npzfile_test['testset_matrix_hists'][:,:n_regions]
115 |     testset_global_features = npzfile_test['testset_matrix_global_features']
116 |     testset_targets = npzfile_test['testset_matrix_targets']
117 |     testset_annos = annotations[test_indices, :]
118 | 
119 |     return ({train_str:{'hists_visual':trainset_hists, 'global_features':trainset_global_features, 'targets':trainset_targets, 'hists_anno':trainset_annos,'meta':train_meta},
120 |              valid_str:{'hists_visual':validset_hists, 'global_features':validset_global_features, 'targets':validset_targets, 'hists_anno':validset_annos,'meta':valid_meta},
121 |              test_str:{'hists_visual':testset_hists, 'global_features':testset_global_features, 'targets':testset_targets, 'hists_anno':testset_annos,'meta':test_meta}})
122 | 
123 | def obtain(dir_path):
124 |     """
125 |     Gives information about how to obtain this dataset (``dir_path`` is ignored).
126 |     """
127 | 
128 |     print 'Ask Yin Zheng (yzheng3xg@gmail.com) for the data.'
129 | 
130 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  2 | #
  3 | # Redistribution and use in source and binary forms, with or without modification, are
  4 | # permitted provided that the following conditions are met:
  5 | #
  6 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  7 | #       conditions and the following disclaimer.
  8 | #
  9 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 10 | #       of conditions and the following disclaimer in the documentation and/or other materials
 11 | #       provided with the distribution.
 12 | #
 13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 | #
 23 | # The views and conclusions contained in the software and documentation are those of the
 24 | # authors and should not be interpreted as representing official policies, either expressed
 25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 26 | 
 27 | 
 28 | 		
 29 | '''
 30 | @reference: A Deep and Autoregressive Approach for Topic Modeling of Multimodal Data, IEEE Trans. Pattern Analysis and Machine Intelligence (PAMI)
 31 | 
 32 | 			Project page: https://sites.google.com/site/zhengyin1126/home/supdeepdocnade
 33 | 
 34 | @Authors:     Yin Zheng, Received Ph.D from Tsinghua University, 2015. Homepage: https://sites.google.com/site/zhengyin1126/
 35 |               Yu-Jin Zhang, Tsinghua University,
 36 |               Hugo Larochelle, University of Sherbrooke and Twitter. Homepage: http://www.dmi.usherb.ca/~larocheh/index_en.html
 37 | 
 38 | @contact: Yin Zheng, yzheng3xg@gmail.com
 39 |           
 40 | '''
 41 | 
 42 | 
 43 | We provide:
 44 | 1. SupDeepDocNADE.py : Classes of DeepDocNADE and SupDeepDocNADE, which is used by run_pretrain_DeepDocNADE.py and run_SupDeepDocNADE.py
 45 | 2. run_pretrain_DeepDocNADE.py : Python script to pretrain DeepDocNADE model on MIR Flickr Unlabeled data
 46 | 3. run_SupDeepDocNADE.py : Python script to train and test SupDeepDocNADE mdoel.
 47 | 4. gen_dataset_labeled.py: Python script to generate the labeled dateset files needed by the model
 48 | 5. gen_dataset_unlabeled.py: Python script to generate the unlabeled dateset files needed by the model
 49 | 6. MIR_Flickr_Theano_lab.py: Python script to load labeled dataset and provide interface to SupDeepDocNADE model
 50 | 7. MIR_Flickr_Theano_Unlab.py: Python script to load unlabeled dataset and provide interface to DeepDocNADE model
 51 | 
 52 | ===================================================================================================================== 	
 53 | 
 54 | 0. Install liblinear (http://www.csie.ntu.edu.tw/~cjlin/liblinear/) and put liblinear into the PYTHONPATH
 55 | 	NOTE: make sure that you can use liblinear in python.  To test whether you config liblinear successfully, you could
 56 | 	use "from liblinearutil import *" in python.
 57 | 
 58 | 1. Generate the dataset:
 59 | 	1) Download the dataset from Nitsh's homepage: http://www.cs.toronto.edu/~nitish/multimodal/index.html
 60 | 	2) Run "python gen_dataset_labeled.py" to generate the labeled dataset. 
 61 | 		a) Read the comments in the script about how to use it.
 62 | 		b) create a file IN THE SAME PATH OF THE LABELED DATASET named "meta.txt" with the following lines in the meta.txt
 63 | 			voc_size:2000
 64 | 			n_regions:14
 65 | 			text_voc_size:2000
 66 | 			global_feat_size:1857
 67 | 			n_classes:38
 68 | 		c) Create a file IN THE SAME PATH OF THE LABELED DATASET named "sizes.txt" with the following content:
 69 | 			10000
 70 | 			5000
 71 | 			10000
 72 | 	
 73 | 	3) Run "python gen_dataset_unlabeled.py" to generate the unlabeled dataset. 
 74 | 		a) Read the comments in the script about how to use it.
 75 | 		b) create a file IN THE SAME PATH OF THE UNLABELED DATASET with the name meta.txt, the content is:
 76 | 			voc_size:2000
 77 | 			n_regions:14
 78 | 			text_voc_size:2000
 79 | 			global_feat_size:1857
 80 | 			dataset_split:50
 81 | 		c) create a file IN THE SAME PATH OF THE UNLABELED DATASET named "sizes.txt" with the following content:
 82 | 			975000
 83 | 			
 84 | 		
 85 | 			
 86 | 2. Run run_pretrain_DeepDocNADE.py to pretrain DeepDocNADE model on unlabeled data:
 87 | 		python run_pretrain_DeepDocNADE.py n_pretrain pre_learning_rate hidden_size activation_function  dropout_rate model_file_dir unlab_dataset_dir batch_size anno_weight  polyakexp_weight model_init
 88 | 	
 89 | 
 90 | 	The parameters of the script is as follows:
 91 | 	
 92 | 		n_pretrain : number of iterations
 93 | 		pre_learning_rate : learning rate of the pretraining 
 94 | 		hidden_size : the hidden size of the model, e.g. 2048_2048_2048 is a 3 hidden layers model with 2048 units each layer
 95 | 		activation_function : the activation function of the hidden layers, "sigmoid, relu or tanh" 
 96 | 		dropout_rate: the dropout rate for each hidden layer, e.g. "0.5_0.5_0.5" means dropout rate is 0.5 for each layer
 97 | 		model_file_dir: path to save the pretrained model
 98 | 		unlab_dataset_dir: the path to the unlabeled dataset 
 99 | 		batch_size: the batch size 
100 | 		anno_weight: annotation weight 
101 | 		polyakexp_weight: polyak weight
102 | 		model_init: path to the saved pretrain model, which is used to continue_training based on the trained model. It could be NULL if no model saved before.
103 | 	
104 | 	One example of the scipt is: 
105 | 		python run_pretrain_DeepDocNADE.py 6000 0.03 2048_2048_2048 reclin 0.5_0.5_0.5 PATH_TO_SAVE_THE_MODEL PATH_TO_UNLABELED_DATA 500 12000 std 0.9995 PATH_TO_SAVED_MODEL
106 | 
107 | 
108 | 3. Run run_SupDeepDocNADE.py to train SupDeepDocNADE model on labeled data, which could be trained from scratch or initialize from pretrained model on unlabeled data :
109 | 		python run_SupDeepDocNADE.py folder_ID use_pretrain max_iter look_ahead hidden_size learning_rate unsup_weight activation_function Linear_minC, Linear_maxC, dropout_rate uniresult_dir Pretrain_model_name lab_dataset_dir batch_size anno_weight polyakexp_weight
110 | 	
111 | 	The parameters are as follows:
112 | 		folder_ID : ID of the dataset ( 1 to 5 )
113 | 		use_pretrain: Whether use pretrained model or training from randomly initialized parameters (True or False)
114 | 		max_iter: number of max iterations
115 | 		look_ahead: early stop if number of iterations without improvement exceed the number of look ahead
116 | 		hidden_size: the hidden size of the model, e.g. 2048_2048_2048 is a 3 hidden layers model with 2048 units each layer
117 | 		learning_rate: learning rate of training process
118 | 		unsup_weight: the weight of the unsupervised part 
119 | 		activation_function: the activation function of the hidden layers, "sigmoid, relu or tanh" 
120 | 		Linear_minC: the minimum value of C for linear SVM (in log_2 space) 
121 | 		Linear_maxC: the max value of C for linear SVM (in log_2 space) 
122 | 		dropout_rate: the dropout rate for each hidden layer, e.g. "0.5_0.5_0.5" means dropout rate is 0.5 for each layer
123 | 		uniresult_dir: the path to save the results
124 | 		Pretrain_model_name: the path and the name of the pretrained model if you set use_pretrain True. Otherwise, any string
125 | 		lab_dataset_dir: the path to the labeled dataset
126 | 		batch_size : the mini-batch size
127 | 		anno_weight: the weight of the annotation words
128 | 		polyakexp_weight: polyak weight
129 | 	
130 | 	One example of the script is: 
131 | 		
132 | 		python run_SupDeepDocNADE.py 1 True 20000 300 2048_2048_2048 0.01 0.25 reclin -17 10 0.5_0.5_0.5 PATH_TO_SAVE_RESULTS PATH_NAME_OF_PRETRAINED_MODEL PATH_TO_LABELED_DATA 500 12000  0.9995
133 | 	
134 | 	
135 | 		
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/SupDeepDocNADE.py:
--------------------------------------------------------------------------------
   1 | '''
   2 | 
   3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
   4 | #
   5 | # Redistribution and use in source and binary forms, with or without modification, are
   6 | # permitted provided that the following conditions are met:
   7 | #
   8 | #    1. Redistributions of source code must retain the above copyright notice, this list of
   9 | #       conditions and the following disclaimer.
  10 | #
  11 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
  12 | #       of conditions and the following disclaimer in the documentation and/or other materials
  13 | #       provided with the distribution.
  14 | #
  15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
  16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24 | #
  25 | # The views and conclusions contained in the software and documentation are those of the
  26 | # authors and should not be interpreted as representing official policies, either expressed
  27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
  28 | 
  29 | Created on Aug 15, 2015
  30 | 
  31 | @author: zhengyin
  32 | 
  33 | @contact: yzheng3xg@gmail.com
  34 | 
  35 | @summary: The class for paper A Deep and Autoregressive Approach for Topic Modeling of Multimodal Data, TPAMI 2015
  36 | 
  37 | '''
  38 | 
  39 | import theano
  40 | import theano.tensor as T
  41 | import theano.sandbox.linalg as Tlin
  42 | from theano.ifelse import ifelse
  43 | from theano.tensor.shared_randomstreams import RandomStreams as RS_FixationNADE
  44 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
  45 | import theano.sparse as S
  46 | import Image
  47 | import numpy as np
  48 | import copy as cp
  49 | import scipy.sparse as sp
  50 | import gc
  51 | from theano.tensor.nnet import conv
  52 | from theano.tensor.signal import downsample
  53 | import sys 
  54 | sys.setrecursionlimit(50000) 
  55 | # import pydot
  56 | import warnings
  57 | try:
  58 |     from scipy.linalg import cholesky, det, solve
  59 | except ImportError:
  60 |     warnings.warn("Could not import some scipy.linalg functions")
  61 | # import theano.tensor as T
  62 | from theano import config
  63 | 
  64 | activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: theano.tensor.maximum(x, 0.0), "tanh": theano.tensor.tanh}
  65 | class DeepDocNADE(object):
  66 |     ''' Theano verson for deep DocNADE'''
  67 |     
  68 |     def __init__(self,
  69 |                  hidden_size = [100,100],
  70 |                  learning_rate = 0.001,
  71 |                  activation_function = 'sigmoid',
  72 |                  testing_ensemble_size = 1,
  73 |                  hidden_bias_scaled_by_document_size = False,
  74 |                  word_representation_size = 0,
  75 |                  seed_np = 1234,
  76 |                  seed_theano = 4321, 
  77 |                  use_dropout = False, 
  78 |                  dropout_rate = [0.5],
  79 |                  normalize_by_document_size = False,
  80 |                  anno_weight = 1.0,
  81 |                  global_feature_weight = 1.0,
  82 |                  batch_size = 1,
  83 |                  aver_words_count = 1,
  84 |                  preprocess_method = 'std',
  85 |                  decrease_constant = 0.999,
  86 |                  length_limit = 15.0,
  87 |                  polyakexp_weight = 0.99 
  88 |                  
  89 |                  ):
  90 |         self.hidden_size = hidden_size
  91 |         self.learning_rate = learning_rate
  92 |         self.activation_function_name = activation_function
  93 |         self.aver_words_count = aver_words_count
  94 |         self.testing_ensemble_size = testing_ensemble_size
  95 |         self.hidden_bias_scaled_by_document_size = hidden_bias_scaled_by_document_size
  96 |         self.seed_np = seed_np
  97 |         self.seed_theano = seed_theano
  98 | #         self.seed_shuffle = seed_shuffle
  99 |         self.word_representation_size = word_representation_size
 100 |         self.use_dropout = use_dropout
 101 |         self.dropout_rate = dropout_rate
 102 |         self.normalize_by_document_size = normalize_by_document_size
 103 |         self.n_layers = len(self.hidden_size)
 104 |         self.anno_weight = anno_weight
 105 |         self.global_feature_weight = global_feature_weight
 106 |         self.batch_size = batch_size
 107 |         self.preprocess_method = preprocess_method
 108 |         self.decrease_constant = decrease_constant
 109 |         dec_learning_rate_value = np.asarray(learning_rate, dtype=theano.config.floatX)
 110 |         self.dec_learning_rate = theano.shared(value=dec_learning_rate_value, name='dec_learning_rate')
 111 |         self.length_limit = length_limit
 112 |         self.polyakexp_weight = polyakexp_weight
 113 |         
 114 |           
 115 |     def initialize(self,voc_size, anno_voc_size, global_feature_size, region_split):
 116 |         
 117 |         self.activation = activation_functions[self.activation_function_name]
 118 |         self.rng_theano = RandomStreams(seed=self.seed_theano)
 119 |         self.rng = np.random.mtrand.RandomState(self.seed_np) 
 120 |         self.voc_size = voc_size
 121 |         self.anno_voc_size = anno_voc_size
 122 |         self.global_feat_size = global_feature_size
 123 |         self.region_split = region_split
 124 |         
 125 |         
 126 |         self.W = []
 127 |         self.c = []
 128 |         input_size = self.voc_size + self.anno_voc_size
 129 |         for hidden_size in self.hidden_size:
 130 | #             W_value = (2*self.rng.rand(input_size,hidden_size)-1)/(np.max([input_size, hidden_size]))
 131 | #             W_value = self.rng.uniform(-np.sqrt(6)/(input_size + hidden_size), np.sqrt(6)/(input_size + hidden_size), size=(input_size, hidden_size))
 132 |             W_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(input_size + hidden_size), np.sqrt(6)/np.sqrt(input_size + hidden_size), size=(input_size, hidden_size))
 133 |             W_value = np.asarray(W_value, theano.config.floatX)
 134 |             c_value = np.zeros((hidden_size,),theano.config.floatX)
 135 |             W = theano.shared(value = W_value, name = 'W')
 136 |             c = theano.shared(value = c_value, name = 'c')
 137 |             self.W.append(W)
 138 |             self.c.append(c)
 139 |             input_size = hidden_size
 140 | #         G_value = (2*self.rng.rand(self.global_feat_size,self.hidden_size[0])-1)/(np.max([self.global_feat_size, self.hidden_size[0]]))
 141 | #         G_value = self.rng.uniform(-np.sqrt(6)/(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0]))
 142 |         G_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0]))
 143 |         G_value = np.asarray(G_value, theano.config.floatX)
 144 |         self.G = theano.shared(value=G_value, name = 'G')
 145 |         
 146 |         anno_mask = np.ones((self.batch_size, self.voc_size+self.anno_voc_size), theano.config.floatX)
 147 |         anno_mask[:, -self.anno_voc_size:] = self.anno_weight
 148 |         self.anno_mask = theano.shared(value=anno_mask, name='anno_mask')
 149 |         
 150 |         self.W_polyak = cp.deepcopy(self.W) 
 151 |         self.c_polyak = cp.deepcopy(self.c)
 152 |         self.G_polyak = cp.deepcopy(self.G)   
 153 |     def __deepcopy__(self,memo): 
 154 |         print "Warning: the deepcopy only copies the parameters, you SHOULD call compile_function for the functions"
 155 |         newone = type(self)()
 156 |         memo[id(self)] = newone
 157 |         old_dict = dict(self.__dict__)
 158 |         for key,val in old_dict.items():
 159 |             if key in ['train','valid','test']:
 160 |                 print 'escape %s'%(key)
 161 |                 pass
 162 |             else:
 163 |                 newone.__dict__[key] = cp.deepcopy(val, memo)
 164 |         return newone  
 165 |     
 166 |       
 167 |     def build_graph(self, debug, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c, V, b, G, flag_train):
 168 |         
 169 |         if n_layer_to_build <1:
 170 |             print 'there is at least 1 hidden layer'
 171 |             exit(-1)
 172 |         if n_layer_to_build > self.n_layers:
 173 |             print 'exceed the max number of hidden layers'
 174 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 175 |             exit(-1)
 176 |             
 177 |         
 178 |         hist_anno_dense = hist_anno.toarray()
 179 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
 180 | #         anno_mask = T.ones(hist.shape, theano.config.floatX)
 181 | #         tt = T.ones((hist.shape[0],2000), theano.config.floatX)*self.anno_weight
 182 | #         anno_weighted_mask = T.set_subtensor(anno_mask[:, -2000:], tt)
 183 |         if debug==True:
 184 |             mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX)
 185 |             
 186 |         else:
 187 |             mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX)
 188 |         mask_counts = mask_unif*(hist+1)
 189 |         input = T.floor(mask_counts)*self.anno_mask
 190 |         hist = hist*self.anno_mask
 191 |         d = input.sum(axis = 1)
 192 |         D = hist.sum(axis = 1)
 193 |         predict = hist - input
 194 |         condition_bias = T.dot(global_feature, G)
 195 |         
 196 |         if self.preprocess_method == 'None':
 197 |             tmp_input = input
 198 |         elif self.preprocess_method == 'std':
 199 |             std = T.std(input, axis=1)
 200 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
 201 |         elif self.preprocess_method == 'SPM':
 202 |             div_number = T.sqrt((input**2).sum(axis=1))
 203 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
 204 |         else:
 205 |             print 'Unknow preprocess method'
 206 |             exit(-1)
 207 |         
 208 | #         tmp_input = input
 209 |         for i in xrange(n_layer_to_build):
 210 |             if i==0:
 211 |                 
 212 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i]))
 213 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)
 214 |             else:
 215 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i]))
 216 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i])
 217 |             tmp_input = h
 218 |         log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16)
 219 |         log_prob_each_bin = log_prob_each_word*predict
 220 |         nll = -log_prob_each_bin.sum(axis=1)
 221 |         #TODO:??divide D??
 222 |         if self.normalize_by_document_size:
 223 |             cost = T.mean(1.0/(D-d)*nll)
 224 |         else:
 225 |             cost = T.mean(D/(D-d)/self.aver_words_count*nll)
 226 |         
 227 |         return cost,h,mask_unif,mask_counts,input,predict,hist,log_prob_each_bin,nll,condition_bias
 228 |     
 229 |     def build_compute_representation_graph(self, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c,G, flag_train):
 230 |         
 231 |         
 232 |         if n_layer_to_build <1:
 233 |             print 'there is at least 1 hidden layer'
 234 |             exit(-1)
 235 |         if n_layer_to_build > self.n_layers:
 236 |             print 'exceed the max number of hidden layers'
 237 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 238 |             exit(-1)
 239 |              
 240 |          
 241 |         hist_anno_dense = hist_anno.toarray()
 242 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
 243 | #         anno_mask = T.ones(hist.shape, theano.config.floatX)
 244 | #         anno_weighted_mask = T.set_subtensor(anno_mask[:, -self.anno_voc_size:], self.anno_weight)
 245 |         if self.preprocess_method == 'None':
 246 |             input = hist*self.anno_mask
 247 |             tmp_input = input
 248 |         elif self.preprocess_method == 'std':
 249 |             input = hist*self.anno_mask
 250 |             std = T.std(input, axis=1)
 251 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
 252 |         elif self.preprocess_method == 'SPM':
 253 |             input = hist*self.anno_mask
 254 |             div_number = T.sqrt((input**2).sum(axis=1))
 255 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
 256 | #             squared_input = input**2
 257 | #             init_tmp_input = T.ones(shape=input.shape, dtype=theano.config.floatX)
 258 | #             last_rsp = 0
 259 | #             for r_sp in self.region_split:
 260 | #                 div_number = T.sqrt(squared_input[:,last_rsp:r_sp].sum(axis=1))
 261 | #                 tmp_input = T.set_subtensor(init_tmp_input[:,last_rsp:r_sp], input[:,last_rsp:r_sp]/(div_number[:, np.newaxis]+1e-16))
 262 | #                 init_tmp_input = tmp_input
 263 | #                 last_rsp = r_sp
 264 | #             anno_factor = tmp_input[:,:self.region_split[-2]].sum(axis=1)
 265 | #             tmp_input = T.set_subtensor(tmp_input[:,self.region_split[-2]:], tmp_input[:,self.region_split[-2]:]*anno_factor[:, np.newaxis]*2)
 266 |         else:
 267 |             print 'Unknow preprocess method'
 268 |             exit(-1)
 269 | #         input = hist
 270 | 
 271 | 
 272 | 
 273 | 
 274 |         condition_bias = T.dot(global_feature, G)
 275 | #           
 276 | #        tmp_input = input
 277 |         for i in xrange(n_layer_to_build):
 278 |             if i==0:
 279 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i]))
 280 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)
 281 |             else:
 282 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i])
 283 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i]))
 284 |             tmp_input = h
 285 |              
 286 |          
 287 |         return h
 288 |     
 289 |     def compile_function(self, n_layers, trainset, validset):
 290 |         
 291 |         hist_visual = T.matrix(name='hist_visual')
 292 |         hist_anno = S.csr_matrix(name='hist_anno')
 293 |         global_feature = T.matrix(name='global_features')
 294 |         index = T.cast(T.scalar('index'), 'int32')
 295 |         flag_train = T.scalar(name='flag_train')
 296 |         cost,hidden_representation,mask_unif,mask_counts,input,predict,hist,log_prob_each_bin,nll,condition_bias = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train)
 297 |         
 298 |         params = [self.V, self.b, self.G]
 299 |         params.extend(self.W[:n_layers])
 300 |         params.extend(self.c[:n_layers])
 301 |         
 302 |         polyaks = [self.V_polyak, self.b_polyak, self.G_polyak]
 303 |         polyaks.extend(self.W_polyak[:n_layers])
 304 |         polyaks.extend(self.c_polyak[:n_layers])
 305 |         
 306 |         params_gradient = [T.grad(cost, param) for param in params]
 307 |         
 308 |         
 309 |         updates = []
 310 | 
 311 |         for param, param_gradient, polyak in zip(params, params_gradient, polyaks):
 312 |             param_updated = param - self.dec_learning_rate*param_gradient
 313 |             if param.get_value(borrow=True).ndim==2:
 314 |                 col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0))
 315 |                 desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit))
 316 |                 col_scale = desired_norms / (1e-16 + col_norms)
 317 |                 updates.append((param, param_updated*col_scale))
 318 |             else:
 319 |                 updates.append((param, param_updated))
 320 |                 
 321 |             polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated      
 322 |             updates.append((polyak, polyak_updated))
 323 |             
 324 |             
 325 |         updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant))
 326 |         
 327 |             
 328 |         self.train = theano.function(inputs = [index],
 329 |                                     updates = updates,
 330 |                                      outputs = cost,  
 331 |                                      givens = {
 332 |                                                hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 333 |                                                hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 334 |                                                global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 335 |                                                flag_train:np.asarray(1,dtype=theano.config.floatX)
 336 |                                                },
 337 | #                                      mode='DebugMode'
 338 |                                     )   
 339 | #         theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png')
 340 |         self.valid = theano.function(inputs = [index],
 341 | #                                      updates = updates,
 342 |                                      outputs = cost,  
 343 |                                      givens = {
 344 |                                                hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 345 |                                                hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 346 |                                                global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 347 |                                                flag_train:np.asarray(0,dtype=theano.config.floatX)
 348 |                                                },
 349 | #                                      mode='DebugMode'
 350 |                                     )
 351 |            
 352 |         
 353 |     def compile_compute_representation_function(self,n_layers, dataset): 
 354 |         hist_visual = T.matrix(name='hist_visual')
 355 |         hist_anno = S.csr_matrix(name='hist_anno')
 356 |         global_feature = T.matrix(name='global_features')
 357 |         index = T.lscalar('index')
 358 |         flag_train = T.scalar(name='flag_train')
 359 | #         cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G)
 360 |         hidden_representation = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.G, flag_train)
 361 |         self.compute_representation = theano.function(inputs = [index],
 362 |                                                       outputs = hidden_representation,  
 363 |                                                       givens = {
 364 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 365 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 366 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 367 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
 368 |                                                                }
 369 | #                                                       mode='DebugMode'
 370 |                                                      ) 
 371 |     def compile_compute_representation_function_polyak(self,n_layers, dataset): 
 372 |         hist_visual = T.matrix(name='hist_visual')
 373 |         hist_anno = S.csr_matrix(name='hist_anno')
 374 |         global_feature = T.matrix(name='global_features')
 375 |         index = T.lscalar('index')
 376 |         flag_train = T.scalar(name='flag_train')
 377 | #         cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G)
 378 |         hidden_representation = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.G_polyak, flag_train)
 379 |         self.compute_representation = theano.function(inputs = [index],
 380 |                                                       outputs = hidden_representation,  
 381 |                                                       givens = {
 382 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 383 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 384 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 385 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
 386 |                                                                }
 387 | #                                                       mode='DebugMode'
 388 |                                                      )
 389 |     def compile_LayerByLayer_function(self, n_layers, trainset, validset):
 390 |         
 391 |         hist_visual = T.matrix(name='hist_visual')
 392 |         hist_anno = S.csr_matrix(name='hist_anno')
 393 |         global_feature = T.matrix(name='global_features')
 394 |         index = T.cast(T.scalar('index'), 'int32')
 395 |         flag_train = T.scalar(name='flag_train')
 396 |         cost,hidden_representation = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train)
 397 |         
 398 |         params = [self.V, self.b, self.G, self.W[n_layers-1], self.c[n_layers-1]]
 399 |         polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.W_polyak[n_layers-1], self.c_polyak[n_layers-1]]
 400 |         params_gradient = [T.grad(cost, param) for param in params]
 401 |         
 402 |         
 403 |         updates = []
 404 | 
 405 |         for param, param_gradient, polyak in zip(params, params_gradient, polyaks):
 406 |             param_updated = param - self.dec_learning_rate*param_gradient
 407 |             if param.get_value(borrow=True).ndim==2:
 408 |                 col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0))
 409 |                 desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit))
 410 |                 col_scale = desired_norms / (1e-16 + col_norms)
 411 |                 updates.append((param, param_updated*col_scale))
 412 |             else:
 413 |                 updates.append((param, param_updated))
 414 |                 
 415 |             polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated      
 416 |             updates.append((polyak, polyak_updated))
 417 |         updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant))
 418 |             
 419 |         self.train = theano.function(inputs = [index],
 420 |                                     updates = updates,
 421 |                                      outputs = cost,  
 422 |                                      givens = {
 423 |                                                hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 424 |                                                hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 425 |                                                global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 426 |                                                flag_train:np.asarray(1,dtype=theano.config.floatX)
 427 |                                                },
 428 | #                                      mode='DebugMode'
 429 |                                     )   
 430 | #         theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png')
 431 |         self.valid = theano.function(inputs = [index],
 432 | #                                      updates = updates,
 433 |                                      outputs = cost,  
 434 |                                      givens = {
 435 |                                                hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 436 |                                                hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 437 |                                                global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 438 |                                                flag_train:np.asarray(0,dtype=theano.config.floatX)
 439 |                                                },
 440 | #                                      mode='DebugMode'
 441 |                                     )  
 442 |     
 443 |               
 444 |     def verify_gradients(self):
 445 |         
 446 |         def fun(W0,W1, c0, c1, V, b,G):
 447 |             
 448 |             hist_visual = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX)
 449 |             hist_anno = sp.csr_matrix([[0,0,1,0,0,0,1,2,0,0]], dtype = theano.config.floatX)
 450 |             global_feature = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX)
 451 |             n_layers = 2
 452 |             cost, h=self.build_graph(True, hist_visual, hist_anno, global_feature, n_layers, 
 453 |                                   [W0,W1], [c0,c1], V, b, G)
 454 |                 
 455 |             return cost
 456 |         print 'Warning: verify_gradient will reinitialize the model!!!'
 457 |         self.hidden_size = [100,100]
 458 |         self.n_classes = 7
 459 |         self.dropout_rate = 0.5
 460 |         self.activation = activation_functions['reclin']
 461 |         self.n_layers = len(self.hidden_size)
 462 |         self.initialize(10,10,10)
 463 |         rng = np.random.RandomState(42)
 464 |         
 465 |         
 466 | #         rng = np.random.RandomState(42)
 467 |         theano.tests.unittest_tools.verify_grad(fun, [self.W[0].get_value(), self.W[1].get_value(),self.c[0].get_value(), self.c[1].get_value(),
 468 |                                                       self.V.get_value(), self.b.get_value(), self.G.get_value()], rng = rng)
 469 |         
 470 |         
 471 |         
 472 |         
 473 |     def remove_activation(self):
 474 |         
 475 |         del self.activation
 476 |         
 477 |     def add_activation(self):
 478 |         
 479 |         self.activation = activation_functions[self.activation_function_name]
 480 |         
 481 |     def remove_top_layer(self):
 482 |         if hasattr(self, 'V'):
 483 |             del self.V
 484 |         if hasattr(self, 'b'):
 485 |             del self.b
 486 |     
 487 |     def add_top_layer(self, layer_id):
 488 |         '''
 489 |         layer_id is the id of the hidden layer (starting from 0) on which we build the top layer to compute the conditionals
 490 |         '''
 491 |         if layer_id <0:
 492 |             print 'there is at least 1 hidden layer'
 493 |             exit(-1)
 494 |         if layer_id > self.n_layers-1:
 495 |             print 'exceed the max number of hidden layers'
 496 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 497 |             exit(-1)
 498 | #         V_value = (2*self.rng.rand(self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)-1)/(np.max([self.voc_size+self.anno_voc_size, self.hidden_size[layer_id]])) 
 499 | #         V_value = self.rng.uniform(-np.sqrt(6)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size))
 500 |         V_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size))
 501 |         V_value = np.asarray(V_value, theano.config.floatX)
 502 |         self.V = theano.shared(value = V_value, name = 'V')
 503 |         b_value = np.zeros((self.voc_size+self.anno_voc_size), theano.config.floatX)
 504 |         self.b = theano.shared(value = b_value, name = 'b')
 505 |         self.V_polyak = cp.deepcopy(self.V)
 506 |         self.b_polyak = cp.deepcopy(self.b)
 507 |         
 508 |     def copy_parameters(self, source):
 509 |         
 510 |         self.V.set_value(source.V.get_value())
 511 |         self.b.set_value(source.b.get_value())
 512 |         self.V_polyak.set_value(source.V_polyak.get_value())
 513 |         self.b_polyak.set_value(source.b_polyak.get_value())
 514 |         for i in xrange(self.n_layers):
 515 |             self.W[i].set_value(source.W[i].get_value())
 516 |             self.c[i].set_value(source.c[i].get_value())
 517 |             self.W_polyak[i].set_value(source.W_polyak[i].get_value())
 518 |             self.c_polyak[i].set_value(source.c_polyak[i].get_value())
 519 |         self.G.set_value(source.G.get_value())
 520 |         self.G_polyak.set_value(source.G_polyak.get_value())
 521 |         self.dec_learning_rate.set_value(source.dec_learning_rate.get_value())
 522 |         
 523 |         
 524 |         
 525 | class SupDeepDocNADE(object):
 526 |     ''' Theano verson for Supervised deep DocNADE'''
 527 |     
 528 |     def __init__(self,
 529 |                  hidden_size = [100,100],
 530 |                  learning_rate = 0.001,
 531 |                  learning_rate_unsup = 0.001,
 532 |                  activation_function = 'sigmoid',
 533 |                  testing_ensemble_size = 1,
 534 |                  hidden_bias_scaled_by_document_size = False,
 535 |                  word_representation_size = 0,
 536 |                  seed_np = 1234,
 537 |                  seed_theano = 4321, 
 538 |                  use_dropout = False, 
 539 |                  dropout_rate = [0.5],
 540 |                  normalize_by_document_size = False,
 541 |                  anno_weight = 1.0,
 542 |                  global_feature_weight = 1.0,
 543 |                  batch_size = 1,
 544 |                  unsup_weight = 0.001,
 545 |                  sup_option = 'full',
 546 |                  aver_words_count = 1,
 547 |                  n_connection = 15,
 548 |                  bias = 0.0,
 549 |                  rescale = 0.01,
 550 |                  preprocess_method = 'SPM',
 551 |                  decrease_constant = 0.999,
 552 |                  length_limit = 15.0,
 553 |                  polyakexp_weight = 0.99
 554 |                  
 555 |                  ):
 556 |         self.n_epoches_trained = 0
 557 |         self.hidden_size = hidden_size
 558 |         self.learning_rate = learning_rate
 559 |         self.learning_rate_unsup = learning_rate_unsup
 560 |         self.activation_function_name = activation_function
 561 |         self.aver_words_count = aver_words_count
 562 |         self.testing_ensemble_size = testing_ensemble_size
 563 |         self.hidden_bias_scaled_by_document_size = hidden_bias_scaled_by_document_size
 564 |         self.seed_np = seed_np
 565 |         self.seed_theano = seed_theano
 566 | #         self.seed_shuffle = seed_shuffle
 567 |         self.word_representation_size = word_representation_size
 568 |         self.use_dropout = use_dropout
 569 |         self.dropout_rate = dropout_rate
 570 |         self.normalize_by_document_size = normalize_by_document_size
 571 |         self.n_layers = len(self.hidden_size)
 572 |         self.anno_weight = anno_weight
 573 |         self.global_feature_weight = global_feature_weight
 574 |         self.batch_size = batch_size
 575 |         self.unsup_weight = unsup_weight
 576 | #         self.unsup_weight = theano.shared(value=unsup_weight,name='unsup')
 577 |         self.sup_option = sup_option
 578 |         self.n_connection = n_connection
 579 |         self.bias = bias
 580 |         self.rescale = rescale
 581 |         self.preprocess_method = preprocess_method
 582 |         self.decrease_constant = decrease_constant
 583 |         dec_learning_rate_value = np.asarray(learning_rate, dtype=theano.config.floatX)
 584 |         self.dec_learning_rate = theano.shared(value=dec_learning_rate_value, name='dec_learning_rate')
 585 |         self.length_limit = length_limit
 586 |         self.polyakexp_weight = polyakexp_weight
 587 |         
 588 |           
 589 |     def initialize(self,voc_size, anno_voc_size, global_feature_size, n_classes, region_split):
 590 |         
 591 |         self.activation = activation_functions[self.activation_function_name]
 592 |         self.rng_theano = RandomStreams(seed=self.seed_theano)
 593 |         self.rng = np.random.mtrand.RandomState(self.seed_np) 
 594 | #         self.rng = np.random.mtrand.RandomState(self.seed)
 595 | #         self.rng_shuffle = np.random.mtrand.RandomState(self.seed_shuffle)
 596 |         self.voc_size = voc_size
 597 |         self.anno_voc_size = anno_voc_size
 598 |         self.global_feat_size = global_feature_size
 599 |         self.n_classes = n_classes
 600 |         self.region_split = region_split
 601 |         
 602 |         
 603 |         
 604 |         
 605 |         self.W = []
 606 |         self.c = []
 607 |         input_size = self.voc_size + self.anno_voc_size
 608 |         cnt = 1
 609 |         for hidden_size in self.hidden_size:
 610 |             W_value = 1*self.rng.uniform(-np.sqrt(6)/np.sqrt(input_size + hidden_size), np.sqrt(6)/np.sqrt(input_size + hidden_size), size=(input_size, hidden_size))
 611 | #             W_value = 10*generate_SparseConnectionMat(self.rng, input_size, hidden_size, self.n_connection, self.rescale, self.bias)*cnt
 612 |             W_value = np.asarray(W_value, theano.config.floatX)
 613 |             c_value = np.zeros((hidden_size,),theano.config.floatX)
 614 |             W = theano.shared(value = W_value, name = 'W')
 615 |             c = theano.shared(value = c_value, name = 'c')
 616 |             self.W.append(W)
 617 |             self.c.append(c)
 618 |             input_size = hidden_size
 619 |             cnt *= 3
 620 |             
 621 |         G_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), np.sqrt(6)/np.sqrt(self.global_feat_size + self.hidden_size[0]), size=(self.global_feat_size, self.hidden_size[0]))
 622 |         G_value = np.asarray(G_value, theano.config.floatX)
 623 |         self.G = theano.shared(value=G_value, name = 'G')
 624 |         
 625 |         anno_mask = np.ones((self.batch_size, self.voc_size+self.anno_voc_size), theano.config.floatX)
 626 |         anno_mask[:, -self.anno_voc_size:] = self.anno_weight
 627 |         self.anno_mask = theano.shared(value=anno_mask, name='anno_mask')
 628 |         
 629 |         self.W_polyak = cp.deepcopy(self.W) 
 630 |         self.c_polyak = cp.deepcopy(self.c)
 631 |         self.G_polyak = cp.deepcopy(self.G)
 632 |         
 633 |             
 634 |     def __deepcopy__(self,memo): 
 635 |         print "Warning: the deepcopy only copies the parameters, you SHOULD call compile_function for the functions"
 636 |         newone = type(self)()
 637 |         memo[id(self)] = newone
 638 |         old_dict = dict(self.__dict__)
 639 |         for key,val in old_dict.items():
 640 |             if key in ['train','valid','test']:
 641 |                 print 'escape %s'%(key)
 642 |                 pass
 643 |             else:
 644 |                 newone.__dict__[key] = cp.deepcopy(val, memo)
 645 |         return newone  
 646 |     
 647 |       
 648 |     def build_graph(self, debug, hist_visual, hist_anno, global_feature, target,n_layer_to_build, W, c, V, b, G, U, dd, flag_train):
 649 |         
 650 |         if n_layer_to_build <1:
 651 |             print 'there is at least 1 hidden layer'
 652 |             exit(-1)
 653 |         if n_layer_to_build > self.n_layers:
 654 |             print 'exceed the max number of hidden layers'
 655 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 656 |             exit(-1)
 657 |             
 658 |         
 659 |         hist_anno_dense = hist_anno.toarray()
 660 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
 661 |         if debug==True:
 662 |             mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX)
 663 |             
 664 |         else:
 665 |             mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX)
 666 |         mask_counts = mask_unif*(hist+1)
 667 |         
 668 |         input = T.floor(mask_counts)*self.anno_mask
 669 |         hist = hist*self.anno_mask
 670 |         d = input.sum(axis = 1)
 671 |         D = hist.sum(axis = 1)
 672 |         predict = hist - input
 673 |         condition_bias = T.dot(global_feature, G)
 674 |         
 675 |         if self.preprocess_method == 'None':
 676 |             tmp_input = input
 677 |         elif self.preprocess_method == 'std':
 678 |             std = T.std(input, axis=1)
 679 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
 680 |         elif self.preprocess_method == 'SPM':
 681 |             div_number = T.sqrt((input**2).sum(axis=1))
 682 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
 683 |             
 684 |             
 685 |         else:
 686 |             print 'Unknow preprocess method'
 687 |             exit(-1)
 688 |             
 689 |         
 690 |         first_tmp_input = tmp_input 
 691 |         if self.sup_option == 'full':
 692 |             if self.preprocess_method == 'None':
 693 |                 tmp_sup_input = hist
 694 |             elif self.preprocess_method == 'std':
 695 |                 std_full = T.std(hist, axis=1)
 696 |                 tmp_sup_input = hist/(std_full[:, np.newaxis]+1e-16)
 697 |             elif self.preprocess_method == 'SPM':
 698 |                 div_number = T.sqrt((hist**2).sum(axis=1))
 699 |                 tmp_sup_input = input/(div_number[:,np.newaxis]+1e-16)
 700 |             else:
 701 |                 print 'Unknow preprocess method'
 702 |                 exit(-1)
 703 |             
 704 |         for i in xrange(n_layer_to_build):
 705 |             if i==0:
 706 |                 dropout_mask = ifelse(T.neq(flag_train, 0) ,self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX), (1.-self.dropout_rate[i])*T.ones((tmp_input.shape[0],W[i].shape[1]), theano.config.floatX))
 707 |                 h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*dropout_mask
 708 |                 if self.sup_option == 'full':
 709 |                     h_sup = self.activation(T.dot(tmp_sup_input, W[i])+c[i]+condition_bias)*dropout_mask
 710 |             else:
 711 |                 dropout_mask = ifelse(T.neq(flag_train, 0) ,self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX), (1.-self.dropout_rate[i])*T.ones((tmp_input.shape[0],W[i].shape[1]), theano.config.floatX))
 712 |                 h = self.activation(T.dot(tmp_input, W[i])+c[i])*dropout_mask*1.0
 713 |                 if self.sup_option == 'full':
 714 |                     h_sup = self.activation(T.dot(tmp_sup_input, W[i])+c[i])*dropout_mask*1.0
 715 |             tmp_input = h
 716 |             if self.sup_option == 'full':
 717 |                 tmp_sup_input = h_sup
 718 |                 #         G_value = (2*self.rng.rand(self.global_feat_size,self.hidden_size[0])-1)/(np.max([self.global_feat_size, self.hidden_size[0]]))
 719 |         log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16)
 720 |         log_prob_each_bin = log_prob_each_word*predict
 721 |         nll = -log_prob_each_bin.sum(axis=1)
 722 |         
 723 |             
 724 |         #=====================sup_cost===============================
 725 |         if self.sup_option == 'full':
 726 |             prob_target = T.nnet.sigmoid(T.dot(h_sup, U)+dd)
 727 |         elif self.sup_option == 'partial':
 728 |             prob_target = T.nnet.sigmoid(T.dot(h, U)+dd)
 729 |         else:
 730 |             print "unknown supvervised option"
 731 |             exit(-1)
 732 |         cross_entropy = T.nnet.binary_crossentropy(prob_target, target).sum(axis=1)# the better the smaller (theano crossentropy add a minus here
 733 |         if self.normalize_by_document_size:
 734 |             unsup_cost = 1.0/(D-d)*nll
 735 |         else:
 736 |             unsup_cost = D/(D-d)/self.aver_words_count*nll
 737 |             
 738 |         cost = T.mean(unsup_cost*self.unsup_weight + cross_entropy)
 739 | #         T.mean(D/(D-d)*nll*self.unsup_weight + cross_entropy)
 740 |         log_prob_target = T.log(prob_target)
 741 |         return cost,log_prob_target,h, unsup_cost, cross_entropy, first_tmp_input, h_sup
 742 |     
 743 |     def build_compute_representation_graph(self, hist_visual, hist_anno, global_feature,n_layer_to_build, W, c,G, U, d, flag_train):
 744 |         
 745 |         
 746 |         if n_layer_to_build <1:
 747 |             print 'there is at least 1 hidden layer'
 748 |             exit(-1)
 749 |         if n_layer_to_build > self.n_layers:
 750 |             print 'exceed the max number of hidden layers'
 751 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 752 |             exit(-1)
 753 |              
 754 |          
 755 |         hist_anno_dense = hist_anno.toarray()
 756 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
 757 | 
 758 | 
 759 | 
 760 | 
 761 |         condition_bias = T.dot(global_feature, G)
 762 | #           
 763 |         if self.preprocess_method == 'None':
 764 |             input = hist*self.anno_mask
 765 |             tmp_input = input
 766 |         elif self.preprocess_method == 'std':
 767 |             input = hist*self.anno_mask
 768 |             std = T.std(input, axis=1)
 769 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
 770 |         elif self.preprocess_method == 'SPM':
 771 |             input = hist*self.anno_mask
 772 |             
 773 |             div_number = T.sqrt((input**2).sum(axis=1))
 774 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
 775 |         else:
 776 |             print 'Unknow preprocess method'
 777 |             exit(-1)
 778 |             
 779 |             
 780 |         for i in xrange(n_layer_to_build):
 781 |             if i==0:
 782 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i]))
 783 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)
 784 |             else:
 785 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i])
 786 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i]))
 787 |             tmp_input = h
 788 |         log_prob_target = T.log(T.nnet.sigmoid(T.dot(h, U)+d))     
 789 |          
 790 |         return h,log_prob_target
 791 |     
 792 |     
 793 |     
 794 |     def compile_function(self, n_layers, trainset, validset):
 795 |         
 796 |         hist_visual = T.matrix(name='hist_visual')
 797 |         hist_anno = S.csr_matrix(name='hist_anno')
 798 |         global_feature = T.matrix(name='global_feature')
 799 |         target = T.matrix(name='target')
 800 |         index = T.lscalar('index')
 801 |         flag_train = T.scalar(name='flag_train')
 802 |         cost,log_prob_target, hidden_representation, unsup_cost, cross_entropy, first_tmp_input, h_sup = self.build_graph(False, hist_visual, hist_anno, global_feature, target, n_layers, self.W, self.c, self.V, self.b, self.G,self.U, self.d, flag_train)
 803 |         
 804 |         params = [self.V, self.b, self.G, self.U, self.d]
 805 |         params.extend(self.W[:n_layers])
 806 |         params.extend(self.c[:n_layers])
 807 |         
 808 |         polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.U_polyak, self.d_polyak]
 809 |         polyaks.extend(self.W_polyak[:n_layers])
 810 |         polyaks.extend(self.c_polyak[:n_layers])
 811 |         
 812 |         params_gradient = [T.grad(cost, param) for param in params]
 813 |         
 814 |         
 815 |         updates = []
 816 | 
 817 |         for param, param_gradient, polyak in zip(params, params_gradient, polyaks):
 818 |             param_updated = param - self.dec_learning_rate*param_gradient
 819 |             if param.get_value(borrow=True).ndim==2:
 820 |                 col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0))
 821 |                 desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit))
 822 |                 col_scale = desired_norms / (1e-16 + col_norms)
 823 |                 updates.append((param, param_updated*col_scale))
 824 |             else:
 825 |                 updates.append((param, param_updated))
 826 |                 
 827 |             polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated      
 828 |             updates.append((polyak, polyak_updated))
 829 |             
 830 |             
 831 |         updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant))
 832 |             
 833 |         self.train = theano.function(inputs = [index],
 834 |                                     updates = updates,
 835 | #                                      outputs = [cost, log_prob_target, unsup_cost, cross_entropy, hidden_representation, first_tmp_input, h_sup],  
 836 |                                     outputs = [cost, log_prob_target, unsup_cost, cross_entropy],  
 837 |                                      givens = {
 838 |                                                hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 839 |                                                hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 840 |                                                global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 841 |                                                target:trainset['targets'][index*self.batch_size:(index+1)*self.batch_size, :],
 842 |                                                flag_train:np.asarray(1,dtype=theano.config.floatX)
 843 |                                                },
 844 | #                                      mode='DebugMode'
 845 |                                     )   
 846 | #         theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png')
 847 |         self.valid = theano.function(inputs = [index],
 848 | #                                      updates = updates,
 849 | #                                     outputs = [cost, log_prob_target, unsup_cost, cross_entropy, hidden_representation, first_tmp_input, h_sup], 
 850 |                                     outputs = [cost, log_prob_target, unsup_cost, cross_entropy], 
 851 |                                      givens = {
 852 |                                                hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 853 |                                                hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 854 |                                                global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 855 |                                                target:validset['targets'][index*self.batch_size:(index+1)*self.batch_size, :],
 856 |                                                flag_train:np.asarray(0,dtype=theano.config.floatX)
 857 |                                                },
 858 | #                                      mode='DebugMode'
 859 |                                     )
 860 |     def compile_LayerByLayer_function(self, n_layers, trainset, validset):
 861 |         
 862 |         hist_visual = T.matrix(name='hist_visual')
 863 |         hist_anno = S.csr_matrix(name='hist_anno')
 864 |         global_feature = T.matrix(name='global_features')
 865 |         index = T.cast(T.scalar('index'), 'int32')
 866 |         flag_train = T.scalar(name='flag_train')
 867 |         cost,hidden_representation = self.build_unsupervised_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train)
 868 |         
 869 |         params = [self.V, self.b, self.G, self.W[n_layers-1], self.c[n_layers-1]]
 870 |         polyaks = [self.V_polyak, self.b_polyak, self.G_polyak, self.W_polyak[n_layers-1], self.c_polyak[n_layers-1]]
 871 | 
 872 |         
 873 |         params_gradient = [T.grad(cost, param) for param in params]
 874 |         
 875 |         
 876 |         updates = []
 877 | 
 878 |         for param, param_gradient, polyak in zip(params, params_gradient, polyaks):
 879 |             param_updated = param - self.dec_learning_rate*param_gradient
 880 |             if param.get_value(borrow=True).ndim==2:
 881 |                 col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0))
 882 |                 desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit))
 883 |                 col_scale = desired_norms / (1e-16 + col_norms)
 884 |                 updates.append((param, param_updated*col_scale))
 885 |             else:
 886 |                 updates.append((param, param_updated))
 887 |                 
 888 |             polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated      
 889 |             updates.append((polyak, polyak_updated))
 890 |             
 891 |             
 892 |         updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant))
 893 |             
 894 |         self.train = theano.function(inputs = [index],
 895 |                                     updates = updates,
 896 |                                      outputs = cost,  
 897 |                                      givens = {
 898 |                                                hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 899 |                                                hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 900 |                                                global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 901 |                                                flag_train:np.asarray(1,dtype=theano.config.floatX)
 902 |                                                },
 903 | #                                      mode='DebugMode'
 904 |                                     )   
 905 | #         theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png')
 906 |         self.valid = theano.function(inputs = [index],
 907 | #                                      updates = updates,
 908 |                                      outputs = cost,  
 909 |                                      givens = {
 910 |                                                hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 911 |                                                hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 912 |                                                global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 913 |                                                flag_train:np.asarray(0,dtype=theano.config.floatX)
 914 |                                                },
 915 | #                                      mode='DebugMode'
 916 |                                     )  
 917 |         
 918 |     def compile_compute_representation_function(self,n_layers, dataset): 
 919 |         hist_visual = T.matrix(name='hist_visual')
 920 |         hist_anno = S.csr_matrix(name='hist_anno')
 921 |         global_feature = T.matrix(name='global_feature')
 922 |         index = T.lscalar('index')
 923 |         flag_train = T.scalar(name='flag_train')
 924 | #         cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G)
 925 |         hidden_representation, log_prob_target = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.G, self.U, self.d, flag_train)
 926 |         self.compute_representation = theano.function(inputs = [index],
 927 |                                                       outputs = [hidden_representation,log_prob_target],  
 928 |                                                       givens = {
 929 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 930 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 931 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 932 |                                                                 
 933 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
 934 |                                                                }
 935 | #                                                       mode='DebugMode'
 936 |                                                      ) 
 937 |     def compile_compute_representation_function_polyak(self,n_layers, dataset): 
 938 |         hist_visual = T.matrix(name='hist_visual')
 939 |         hist_anno = S.csr_matrix(name='hist_anno')
 940 |         global_feature = T.matrix(name='global_feature')
 941 |         index = T.lscalar('index')
 942 |         flag_train = T.scalar(name='flag_train')
 943 | #         cost,hidden_representation,input, anno_weighted_mask = self.build_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G)
 944 |         hidden_representation, log_prob_target = self.build_compute_representation_graph(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.G_polyak, self.U_polyak, self.d_polyak, flag_train)
 945 |         self.compute_representation = theano.function(inputs = [index],
 946 |                                                       outputs = [hidden_representation,log_prob_target],  
 947 |                                                       givens = {
 948 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
 949 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
 950 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
 951 |                                                                 
 952 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
 953 |                                                                }
 954 | #                                                       mode='DebugMode'
 955 |                                                      )
 956 |     def build_unsupervised_graph(self, debug, hist_visual, hist_anno, global_feature, n_layer_to_build, W, c, V, b, G, flag_train):
 957 |         
 958 |         if n_layer_to_build <1:
 959 |             print 'there is at least 1 hidden layer'
 960 |             exit(-1)
 961 |         if n_layer_to_build > self.n_layers:
 962 |             print 'exceed the max number of hidden layers'
 963 |             print 'the max number of hidden layers is %d'%(self.n_layers)
 964 |             exit(-1)
 965 |             
 966 |         
 967 |         hist_anno_dense = hist_anno.toarray()
 968 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
 969 |         if debug==True:
 970 |             mask_unif = 0.5*T.ones(shape=hist.shape, dtype=theano.config.floatX)
 971 |             
 972 |         else:
 973 |             mask_unif = 1.0 - self.rng_theano.uniform(size=hist.shape, low=0., high=1., dtype=theano.config.floatX)
 974 |         mask_counts = mask_unif*(hist+1)
 975 |         input = T.floor(mask_counts)*self.anno_mask
 976 |         hist = hist*self.anno_mask
 977 |         d = input.sum(axis = 1)
 978 |         D = hist.sum(axis = 1)
 979 |         predict = hist - input
 980 |         condition_bias = T.dot(global_feature, G)
 981 |         
 982 |         if self.preprocess_method == 'None':
 983 |             tmp_input = input
 984 |         elif self.preprocess_method == 'std':
 985 |             std = T.std(input, axis=1)
 986 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
 987 |         elif self.preprocess_method == 'SPM':
 988 |             div_number = T.sqrt((input**2).sum(axis=1))
 989 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
 990 |             
 991 |         else:
 992 |             print 'Unknow preprocess method'
 993 |             exit(-1)
 994 |         
 995 | #         tmp_input = input
 996 |         for i in xrange(n_layer_to_build):
 997 |             if i==0:
 998 |                 
 999 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i]))
1000 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)
1001 |             else:
1002 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i]))
1003 | #                 h = self.activation(T.dot(tmp_input, W[i])+c[i])
1004 |             tmp_input = h
1005 |         log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16)
1006 |         log_prob_each_bin = log_prob_each_word*predict
1007 |         nll = -log_prob_each_bin.sum(axis=1)
1008 |         #TODO:??divide D??
1009 |         if self.normalize_by_document_size:
1010 |             cost = T.mean(1.0/(D-d)*nll)
1011 |         else:
1012 |             cost = T.mean(D/(D-d)/self.aver_words_count*nll)
1013 |         
1014 |         return cost,h
1015 |     
1016 |     
1017 |     def compile_unsupervised_function(self, n_layers, trainset, validset):
1018 |         
1019 |         hist_visual = T.matrix(name='hist_visual')
1020 |         hist_anno = S.csr_matrix(name='hist_anno')
1021 |         global_feature = T.matrix(name='global_features')
1022 |         index = T.cast(T.scalar('index'), 'int32')
1023 |         flag_train = T.scalar(name='flag_train')
1024 |         cost,hidden_representation = self.build_unsupervised_graph(False, hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train)
1025 |         
1026 |         params = [self.V, self.b, self.G]
1027 |         params.extend(self.W[:n_layers])
1028 |         params.extend(self.c[:n_layers])
1029 |         
1030 |         polyaks = [self.V_polyak, self.b_polyak, self.G_polyak]
1031 |         polyaks.extend(self.W_polyak[:n_layers])
1032 |         polyaks.extend(self.c_polyak[:n_layers])
1033 |         
1034 |         params_gradient = [T.grad(cost, param) for param in params]
1035 |         
1036 |         
1037 |         updates = []
1038 | 
1039 |         for param, param_gradient, polyak in zip(params, params_gradient, polyaks):
1040 |             param_updated = param - self.dec_learning_rate*param_gradient
1041 |             if param.get_value(borrow=True).ndim==2:
1042 |                 col_norms = T.sqrt(T.sum(T.sqr(param_updated), axis=0))
1043 |                 desired_norms = T.clip(col_norms, 0, T.sqrt(self.length_limit))
1044 |                 col_scale = desired_norms / (1e-16 + col_norms)
1045 |                 updates.append((param, param_updated*col_scale))
1046 |             else:
1047 |                 updates.append((param, param_updated))
1048 |                 
1049 |             polyak_updated = self.polyakexp_weight*polyak + (1-self.polyakexp_weight)* param_updated      
1050 |             updates.append((polyak, polyak_updated))
1051 |             
1052 |             
1053 |         updates.append((self.dec_learning_rate, self.dec_learning_rate*self.decrease_constant))
1054 |             
1055 |         self.train = theano.function(inputs = [index],
1056 |                                     updates = updates,
1057 |                                      outputs = [cost,hidden_representation],  
1058 |                                      givens = {
1059 |                                                hist_visual:trainset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
1060 |                                                hist_anno:trainset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
1061 |                                                global_feature:trainset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
1062 |                                                flag_train:np.asarray(1,dtype=theano.config.floatX)
1063 |                                                },
1064 | #                                      mode='DebugMode'
1065 |                                     )   
1066 | #         theano.printing.pydotprint(self.train, outfile='/home/local/USHERBROOKE/zhey2402/DeepDocNADE/pic.png')
1067 |         self.valid = theano.function(inputs = [index],
1068 | #                                      updates = updates,
1069 |                                      outputs = [cost,hidden_representation],  
1070 |                                      givens = {
1071 |                                                hist_visual:validset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
1072 |                                                hist_anno:validset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
1073 |                                                global_feature:validset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
1074 |                                                flag_train:np.asarray(0,dtype=theano.config.floatX)
1075 |                                                },
1076 | #                                      mode='DebugMode'
1077 |                                     )
1078 |     
1079 |     
1080 |     def build_graph_generateTEXT(self, hist_visual, hist_anno, global_feature,n_layer_to_build, W, c, V, b, G, flag_train):
1081 |         
1082 |         if n_layer_to_build <1:
1083 |             print 'there is at least 1 hidden layer'
1084 |             exit(-1)
1085 |         if n_layer_to_build > self.n_layers:
1086 |             print 'exceed the max number of hidden layers'
1087 |             print 'the max number of hidden layers is %d'%(self.n_layers)
1088 |             exit(-1)
1089 |             
1090 |         
1091 |         hist_anno_dense = hist_anno.toarray()*0.0
1092 |         hist = T.concatenate([hist_visual, hist_anno_dense], axis=1)
1093 |         hist = hist*self.anno_mask
1094 |         
1095 |         condition_bias = T.dot(global_feature, G)
1096 | #           
1097 |         if self.preprocess_method == 'None':
1098 |             input = hist*self.anno_mask
1099 |             tmp_input = input
1100 |         elif self.preprocess_method == 'std':
1101 |             input = hist*self.anno_mask
1102 |             std = T.std(input, axis=1)
1103 |             tmp_input = input/(std[:, np.newaxis]+1e-16)
1104 |         elif self.preprocess_method == 'SPM':
1105 |             input = hist*self.anno_mask
1106 |             div_number = T.sqrt((input**2).sum(axis=1))
1107 |             tmp_input = input/(div_number[:,np.newaxis]+1e-16)
1108 |         else:
1109 |             print 'Unknow preprocess method'
1110 |             exit(-1)
1111 |             
1112 |             
1113 |         for i in xrange(n_layer_to_build):
1114 |             if i==0:
1115 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i]+condition_bias)*(1.-self.dropout_rate[i]))
1116 |             else:
1117 |                 h = ifelse(T.neq(flag_train, 0) ,self.activation(T.dot(tmp_input, W[i])+c[i])*(self.rng_theano.binomial(n=1, p=1.0-self.dropout_rate[i], size = (tmp_input.shape[0],W[i].shape[1]),dtype=theano.config.floatX)), self.activation(T.dot(tmp_input, W[i])+c[i])*(1.-self.dropout_rate[i]))
1118 |             tmp_input = h
1119 |          
1120 |             
1121 |         log_prob_each_word = T.log(T.nnet.softmax_with_bias(T.dot(h, V),b)+1e-16)
1122 |         
1123 |             
1124 |         return h, log_prob_each_word 
1125 |     
1126 |     def compile_generateTEXT_function(self,n_layers, dataset): 
1127 |         hist_visual = T.matrix(name='hist_visual')
1128 |         hist_anno = S.csr_matrix(name='hist_anno')
1129 |         global_feature = T.matrix(name='global_feature')
1130 |         index = T.lscalar('index')
1131 |         flag_train = T.scalar(name='flag_train')
1132 |         hidden_representation, log_prob_each_word = self.build_graph_generateTEXT(hist_visual, hist_anno, global_feature, n_layers, self.W, self.c, self.V, self.b, self.G, flag_train)
1133 |         self.generateTEXT = theano.function(inputs = [index],
1134 |                                                       outputs = [hidden_representation,log_prob_each_word],  
1135 |                                                       givens = {
1136 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
1137 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
1138 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
1139 |                                                                 
1140 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
1141 |                                                                }
1142 |                                                      ) 
1143 |     def compile_generateTEXT_function_polyak(self,n_layers, dataset): 
1144 |         hist_visual = T.matrix(name='hist_visual')
1145 |         hist_anno = S.csr_matrix(name='hist_anno')
1146 |         global_feature = T.matrix(name='global_feature')
1147 |         index = T.lscalar('index')
1148 |         flag_train = T.scalar(name='flag_train')
1149 |         hidden_representation, log_prob_each_word = self.build_graph_generateTEXT(hist_visual, hist_anno, global_feature, n_layers, self.W_polyak, self.c_polyak, self.V_polyak, self.b_polyak, self.G_polyak, flag_train)
1150 |         self.generateTEXT = theano.function(inputs = [index],
1151 |                                                       outputs = [hidden_representation,log_prob_each_word],  
1152 |                                                       givens = {
1153 |                                                                 hist_visual:dataset['hists_visual'][index*self.batch_size:(index+1)*self.batch_size,:],
1154 |                                                                 hist_anno:dataset['hists_anno'][index*self.batch_size:(index+1)*self.batch_size,:],
1155 |                                                                 global_feature:dataset['global_features'][index*self.batch_size:(index+1)*self.batch_size, :],
1156 |                                                                 
1157 |                                                                 flag_train:np.asarray(0,dtype=theano.config.floatX)
1158 |                                                                }
1159 |                                                      )
1160 |     
1161 |     def verify_gradients(self):
1162 |         
1163 |         def fun(W0,W1, c0, c1, V, b,G):
1164 |             
1165 |             hist_visual = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX)
1166 |             hist_anno = sp.csr_matrix([[0,0,1,0,0,0,1,2,0,0]], dtype = theano.config.floatX)
1167 |             global_feature = np.array([[1,2,3,4,5,6,7,8,9,0]], dtype = theano.config.floatX)
1168 |             n_layers = 2
1169 |             cost, h=self.build_graph(True, hist_visual, hist_anno, global_feature, n_layers, 
1170 |                                   [W0,W1], [c0,c1], V, b, G)
1171 |                 
1172 |             return cost
1173 |         print 'Warning: verify_gradient will reinitialize the model!!!'
1174 |         self.hidden_size = [100,100]
1175 |         self.n_classes = 7
1176 |         self.dropout_rate = 0.5
1177 |         self.activation = activation_functions['reclin']
1178 |         self.n_layers = len(self.hidden_size)
1179 |         self.initialize(10,10,10)
1180 |         rng = np.random.RandomState(42)
1181 |         
1182 |         
1183 | #         rng = np.random.RandomState(42)
1184 |         theano.tests.unittest_tools.verify_grad(fun, [self.W[0].get_value(), self.W[1].get_value(),self.c[0].get_value(), self.c[1].get_value(),
1185 |                                                       self.V.get_value(), self.b.get_value(), self.G.get_value()], rng = rng)
1186 |         
1187 |         
1188 |         
1189 |         
1190 |     def remove_activation(self):
1191 |         
1192 |         del self.activation
1193 |         
1194 |     def add_activation(self):
1195 |         
1196 |         self.activation = activation_functions[self.activation_function_name]
1197 |         
1198 |     def remove_supervised_layer(self):
1199 |         
1200 |         if hasattr(self, 'U'):
1201 |             del self.U
1202 |         if hasattr(self, 'd'):
1203 |             del self.d
1204 |         
1205 |     def add_supervised_layer(self, layer_id):
1206 |         
1207 |         if layer_id <0:
1208 |             print 'there is at least 1 hidden layer'
1209 |             exit(-1)
1210 |         if layer_id > self.n_layers-1:
1211 |             print 'exceed the max number of hidden layers'
1212 |             print 'the max number of hidden layers is %d'%(self.n_layers)
1213 |             exit(-1)
1214 | #         U_value = 1*(2*self.rng.rand(self.hidden_size[layer_id] ,self.n_classes)-1)/(np.max([self.hidden_size[layer_id],self.n_classes]))
1215 | #         U_value = self.rng.uniform(-np.sqrt(0.05)/(self.hidden_size[layer_id]+self.n_classes), np.sqrt(0.05)/(self.hidden_size[layer_id]+self.n_classes), size=(self.hidden_size[layer_id],self.n_classes))
1216 |         U_value = (1.0**(layer_id))*self.rng.uniform(-np.sqrt(6)/np.sqrt(self.hidden_size[layer_id]+self.n_classes), np.sqrt(6)/np.sqrt(self.hidden_size[layer_id]+self.n_classes), size=(self.hidden_size[layer_id],self.n_classes))
1217 | #         U_value = 0.001*generate_SparseConnectionMat(self.rng, self.hidden_size[layer_id],self.n_classes, self.n_connection, self.rescale, self.bias)
1218 |         U_value = np.asarray(U_value, theano.config.floatX)
1219 |         d_value = np.zeros((self.n_classes), theano.config.floatX)   
1220 |         self.U = theano.shared(value=U_value, name='U') 
1221 |         self.d = theano.shared(value=d_value, name='d') 
1222 |         
1223 |         self.U_polyak = cp.deepcopy(self.U)
1224 |         self.d_polyak = cp.deepcopy(self.d)   
1225 |     
1226 |     def remove_top_layer(self):
1227 |         if hasattr(self, 'V'):
1228 |             del self.V
1229 |         if hasattr(self, 'b'):
1230 |             del self.b
1231 |     
1232 |     def add_top_layer(self, layer_id):
1233 |         '''
1234 |         layer_id is the id of the hidden layer (starting from 0) on which we build the top layer to compute the conditionals
1235 |         '''
1236 |         if layer_id <0:
1237 |             print 'there is at least 1 hidden layer'
1238 |             exit(-1)
1239 |         if layer_id > self.n_layers-1:
1240 |             print 'exceed the max number of hidden layers'
1241 |             print 'the max number of hidden layers is %d'%(self.n_layers)
1242 |             exit(-1)
1243 | #         V_value = 1*(2*self.rng.rand(self.hidden_size[layer_id],self.voc_size+self.anno_voc_size)-1)/(np.max([self.voc_size+self.anno_voc_size, self.hidden_size[layer_id]])) 
1244 | #         V_value = self.rng.uniform(-np.sqrt(0.05)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(0.05)/(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size))
1245 |         V_value = self.rng.uniform(-np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), np.sqrt(6)/np.sqrt(self.voc_size+self.anno_voc_size + self.hidden_size[layer_id]), size=( self.hidden_size[layer_id],self.voc_size+self.anno_voc_size))
1246 | #         V_value = 0.01*generate_SparseConnectionMat(self.rng, self.hidden_size[layer_id],self.voc_size+self.anno_voc_size, self.n_connection, self.rescale, self.bias)
1247 |         V_value = np.asarray(V_value, theano.config.floatX)
1248 |         self.V = theano.shared(value = V_value, name = 'V')
1249 |         b_value = np.zeros((self.voc_size+self.anno_voc_size), theano.config.floatX)
1250 |         self.b = theano.shared(value = b_value, name = 'b')    
1251 |         
1252 |         self.V_polyak = cp.deepcopy(self.V)
1253 |         self.b_polyak = cp.deepcopy(self.b)
1254 |         
1255 |     def copy_parameters(self, source):
1256 |         
1257 |         self.V.set_value(source.V.get_value())
1258 |         self.b.set_value(source.b.get_value())
1259 |         self.V_polyak.set_value(source.V_polyak.get_value())
1260 |         self.b_polyak.set_value(source.b_polyak.get_value())
1261 |         for i in xrange(self.n_layers):
1262 |             self.W[i].set_value(source.W[i].get_value())
1263 |             self.c[i].set_value(source.c[i].get_value())
1264 |             self.W_polyak[i].set_value(source.W_polyak[i].get_value())
1265 |             self.c_polyak[i].set_value(source.c_polyak[i].get_value())
1266 |         self.G.set_value(source.G.get_value())
1267 |         self.G_polyak.set_value(source.G_polyak.get_value())
1268 |         self.dec_learning_rate.set_value(source.dec_learning_rate.get_value())
1269 |         
1270 |         if hasattr(source, 'U'):
1271 |             self.U.set_value(source.U.get_value())
1272 |             self.U_polyak.set_value(source.U_polyak.get_value())
1273 |         if hasattr(source,'d'):
1274 |             self.d.set_value(source.d.get_value())
1275 |             self.d_polyak.set_value(source.d_polyak.get_value())
1276 | 


--------------------------------------------------------------------------------
/gen_dataset_labeled.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification, are
  6 | # permitted provided that the following conditions are met:
  7 | #
  8 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  9 | #       conditions and the following disclaimer.
 10 | #
 11 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 12 | #       of conditions and the following disclaimer in the documentation and/or other materials
 13 | #       provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #
 25 | # The views and conclusions contained in the software and documentation are those of the
 26 | # authors and should not be interpreted as representing official policies, either expressed
 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 28 | 
 29 | 
 30 | Created on Aug 15, 2015
 31 | 
 32 | @author: yin.zheng
 33 | '''
 34 | 
 35 | import numpy as np
 36 | import os
 37 | import string
 38 | import time
 39 | import scipy.sparse as sp
 40 | import collections
 41 | from itertools import izip 
 42 | 
 43 | def LoadSparse(inputfile, verbose=False):
 44 |     """Loads a sparse matrix stored as npz file."""
 45 |     npzfile = np.load(inputfile)
 46 |     mat = sp.csr_matrix((npzfile['data'], npzfile['indices'],
 47 |                          npzfile['indptr']),
 48 |                         shape=tuple(list(npzfile['shape'])))
 49 |     if verbose:
 50 |         
 51 |         print 'Loaded sparse matrix from %s of shape %s' % (inputfile,
 52 |                                                           mat.shape.__str__())
 53 |     return mat
 54 | 
 55 | def load(dir_path, path_output_data, folder_ID):
 56 |         """
 57 |         dir_path: The dataset to the extracted folder from data download from http://www.cs.toronto.edu/~nitish/multimodal/index.html
 58 |         path_output_data: The path to save the processed dataset
 59 |         folder_ID: the split ID (from 1 to 5)
 60 | 
 61 |         """
 62 | 
 63 |         dir_path = os.path.expanduser(dir_path)
 64 |         file_train_indices = os.path.join(dir_path, 'splits', 'train_indices_'+str(folder_ID)+'.npy' )
 65 |         file_valid_indices = os.path.join(dir_path, 'splits', 'valid_indices_'+str(folder_ID)+ '.npy' )
 66 |         file_test_indices = os.path.join(dir_path, 'splits', 'test_indices_'+str(folder_ID)+'.npy' )
 67 |         train_indices = np.load(file_train_indices)
 68 |         valid_indices = np.load(file_valid_indices)
 69 |         test_indices = np.load(file_test_indices)
 70 |         
 71 |         file_labels = os.path.join(dir_path, 'labels.npy')
 72 |         labels = np.load(file_labels).astype(np.int32)
 73 |         trainset_matrix_targets = labels[train_indices, :]
 74 |         validset_matrix_targets = labels[valid_indices, :]
 75 |         testset_matrix_targets = labels[test_indices, :]
 76 |         
 77 |         file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_labelled.npz')
 78 |         annotations = LoadSparse(file_annotations, verbose = True)
 79 |         train_annos = annotations[train_indices, :]
 80 |         valid_annos = annotations[valid_indices, :]
 81 |         test_annos = annotations[test_indices, :]
 82 |         
 83 |         
 84 |         file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00003_1-of-00100.npy')
 85 |         global_features_unlab = np.load(file_global_features_unlab)[:, :-2000]
 86 |         for i in range(2):
 87 |             if i+4<10:
 88 |                 tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-0000'+str(i+4)+'-of-00100.npy')
 89 |             elif i+4 <100:
 90 |                 tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-000'+str(i+4)+'-of-00100.npy')
 91 |             else:
 92 |                 tmp_file_global_features_unlab = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00'+str(i+4)+'-of-00100.npy')
 93 |             tmp_global_features_unlab = np.load(tmp_file_global_features_unlab)[:, :-2000]
 94 |             global_features_unlab = np.vstack((global_features_unlab, tmp_global_features_unlab))
 95 |             
 96 |         mean_global_features_unlab = np.mean(a=global_features_unlab, axis=0, dtype=np.float64)
 97 |         std_global_features_unlab = np.std(a=global_features_unlab, axis=0, dtype=np.float64)    
 98 |         del global_features_unlab
 99 |         
100 |         file_global_features1 = os.path.join(dir_path, 'image', 'labelled', 'combined-00001-of-00100.npy')
101 |         global_features1 = np.load(file_global_features1)
102 |         file_global_features2 = os.path.join(dir_path, 'image', 'labelled', 'combined-00002-of-00100.npy')
103 |         global_features2 = np.load(file_global_features2)
104 |         file_global_features3 = os.path.join(dir_path, 'image', 'labelled', 'combined-00003_0-of-00100.npy')
105 |         global_features3 = np.load(file_global_features3)
106 |         global_features = np.vstack((global_features1, global_features2, global_features3))
107 |         train_global_features = global_features[train_indices, :-2000]
108 |         valid_global_features = global_features[valid_indices, :-2000]
109 |         test_global_features = global_features[test_indices, :-2000] 
110 |         
111 |         trainset_matrix_hists = global_features[train_indices, -2000:]
112 |         validset_matrix_hists = global_features[valid_indices, -2000:]
113 |         testset_matrix_hists = global_features[test_indices, -2000:]
114 |         
115 |         
116 |         train_global_features -= mean_global_features_unlab[np.newaxis,:]
117 |         trainset_matrix_global_features = train_global_features / std_global_features_unlab[:, np.newaxis]
118 |         
119 |         valid_global_features -= mean_global_features_unlab[np.newaxis,:]
120 |         validset_matrix_global_features = valid_global_features / std_global_features_unlab[:, np.newaxis]
121 |         
122 |         test_global_features -= mean_global_features_unlab[np.newaxis,:]
123 |         testset_matrix_global_features = test_global_features / std_global_features_unlab[:, np.newaxis]
124 |         
125 |         file_train = os.path.join(path_output_data, 'train'+str(folder_ID))
126 |         file_valid = os.path.join(path_output_data, 'valid'+str(folder_ID))
127 |         file_test = os.path.join(path_output_data, 'test'+str(folder_ID))
128 |         
129 |         np.savez(file_train, trainset_matrix_hists=trainset_matrix_hists, trainset_matrix_global_features = trainset_matrix_global_features, trainset_matrix_targets=trainset_matrix_targets)
130 |         np.savez(file_valid, validset_matrix_hists=validset_matrix_hists, validset_matrix_global_features = validset_matrix_global_features, validset_matrix_targets=validset_matrix_targets)
131 |         np.savez(file_test, testset_matrix_hists=testset_matrix_hists, testset_matrix_global_features = testset_matrix_global_features, testset_matrix_targets=testset_matrix_targets)
132 |         
133 |         
134 |         
135 |  
136 | if __name__ == "__main__":
137 |     load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 1)
138 |     load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 2)
139 |     load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 3)
140 |     load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 4)
141 |     load('/media/2TDisk/Flickr', '/media/2TDisk/Flickr', 5)
142 | 


--------------------------------------------------------------------------------
/gen_dataset_unlabeled.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification, are
  6 | # permitted provided that the following conditions are met:
  7 | #
  8 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  9 | #       conditions and the following disclaimer.
 10 | #
 11 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 12 | #       of conditions and the following disclaimer in the documentation and/or other materials
 13 | #       provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #
 25 | # The views and conclusions contained in the software and documentation are those of the
 26 | # authors and should not be interpreted as representing official policies, either expressed
 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 28 | 
 29 | 
 30 | Created on Aug 15, 2015
 31 | 
 32 | @author: zhengyin
 33 | 
 34 | 
 35 | '''
 36 | 
 37 | import numpy as np
 38 | import os
 39 | import string
 40 | import time
 41 | import scipy.sparse as sp
 42 | import collections
 43 | from itertools import izip 
 44 | 
 45 | def LoadSparse(inputfile, verbose=False):
 46 |     """Loads a sparse matrix stored as npz file."""
 47 |     npzfile = np.load(inputfile)
 48 |     mat = sp.csr_matrix((npzfile['data'], npzfile['indices'],
 49 |                          npzfile['indptr']),
 50 |                         shape=tuple(list(npzfile['shape'])))
 51 |     if verbose:
 52 |         
 53 |         print 'Loaded sparse matrix from %s of shape %s' % (inputfile,
 54 |                                                           mat.shape.__str__())
 55 |     return mat
 56 | 
 57 | def load(dir_path, path_output_data):
 58 |     """
 59 |     ir_path: The dataset to the extracted folder from data download from http://www.cs.toronto.edu/~nitish/multimodal/index.html
 60 |     path_output_data: The path to save the processed dataset
 61 | 
 62 |     """
 63 | 
 64 |     dir_path = os.path.expanduser(dir_path)
 65 |     file_annotations = os.path.join(dir_path, 'text', 'text_all_2000_unlabelled.npz')
 66 |     annotations = LoadSparse(file_annotations, verbose = True)
 67 |     
 68 |     
 69 |     
 70 |     
 71 |     file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00003_1-of-00100.npy')
 72 |     global_features = np.load(file_global_features)[:, :-2000]
 73 |     unlabeled_matrix_hists = np.load(file_global_features)[:, -2000:]
 74 |     for i in range(97):
 75 |         if i+4<10:
 76 |             tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-0000'+str(i+4)+'-of-00100.npy')
 77 |         elif i+4 <100:
 78 |             tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-000'+str(i+4)+'-of-00100.npy')
 79 |         else:
 80 |             tmp_file_global_features = os.path.join(dir_path, 'image', 'unlabelled', 'combined-00'+str(i+4)+'-of-00100.npy')
 81 |         tmp_global_features = np.load(tmp_file_global_features)[:, :-2000]
 82 |         tmp_mir_unlab_histograms = np.load(tmp_file_global_features)[:, -2000:]
 83 |         global_features = np.vstack((global_features, tmp_global_features))
 84 |         unlabeled_matrix_hists = np.vstack((unlabeled_matrix_hists, tmp_mir_unlab_histograms))
 85 |         
 86 |     mean_global_features = np.mean(a=global_features, axis=0, dtype=np.float64)
 87 |     std_global_features = np.std(a=global_features, axis=0, dtype=np.float64)    
 88 |     global_features -= mean_global_features[np.newaxis,:]
 89 |     global_features /= std_global_features
 90 |     batch_size = global_features.shape[0]/50
 91 |     for i in range(50):
 92 |         file_unlab = os.path.join(path_output_data, 'unlabeled'+str(i+1))
 93 |         np.savez(file_unlab, unlabeled_matrix_hists=unlabeled_matrix_hists[batch_size*i:batch_size*(i+1),:], unlabeled_matrix_global_features = unlabeled_matrix_global_features[batch_size*i:batch_size*(i+1),:])
 94 | #         
 95 |         
 96 |         
 97 |     
 98 |     
 99 |                               
100 |     
101 |     
102 | if __name__ == "__main__":
103 |     load('/run/media/ian/2TDisk/Flickr', '/run/media/ian/2TDisk/Flickr')
104 | 


--------------------------------------------------------------------------------
/licence:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
 2 | #
 3 | # Redistribution and use in source and binary forms, with or without modification, are
 4 | # permitted provided that the following conditions are met:
 5 | #
 6 | #    1. Redistributions of source code must retain the above copyright notice, this list of
 7 | #       conditions and the following disclaimer.
 8 | #
 9 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
10 | #       of conditions and the following disclaimer in the documentation and/or other materials
11 | #       provided with the distribution.
12 | #
13 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
14 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
16 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
17 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
18 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
21 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 | #
23 | # The views and conclusions contained in the software and documentation are those of the
24 | # authors and should not be interpreted as representing official policies, either expressed
25 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
26 | 


--------------------------------------------------------------------------------
/run_SupDeepDocNADE.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification, are
  6 | # permitted provided that the following conditions are met:
  7 | #
  8 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  9 | #       conditions and the following disclaimer.
 10 | #
 11 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 12 | #       of conditions and the following disclaimer in the documentation and/or other materials
 13 | #       provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #
 25 | # The views and conclusions contained in the software and documentation are those of the
 26 | # authors and should not be interpreted as representing official policies, either expressed
 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 28 | 
 29 | 
 30 | Created on Aug 15, 2015
 31 | 
 32 | @author: zhengyin
 33 | '''
 34 | 
 35 | import MIR_Flickr_Theano_lab as MF
 36 | from SupDeepDocNADE import SupDeepDocNADE
 37 | import copy
 38 | import numpy as np
 39 | from itertools import izip 
 40 | import random
 41 | import collections
 42 | # import cPickle
 43 | import sys, os
 44 | import fcntl
 45 | import time as t
 46 | import theano
 47 | import theano.tensor as T
 48 | import theano.sparse as S
 49 | import cPickle
 50 | from sklearn.metrics import average_precision_score
 51 | from liblinearutil import *
 52 | import gc
 53 | sys.argv.pop(0);    # Remove first argument
 54 | def get_done_text(start_time):
 55 |     sys.stdout.flush()
 56 |     return "DONE in {:.4f} seconds.".format(t.time() - start_time)
 57 | 
 58 | def softmax(a):
 59 |     max_a = np.amax(a , axis=1)
 60 |     max_a = max_a.reshape(max_a.shape[0], 1)
 61 |     e = np.exp(a-max_a)
 62 |     dist = e/np.sum(e, axis = 1)[:, np.newaxis]
 63 |     return dist
 64 | 
 65 | # activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: x * (x > 0), "tanh": theano.tensor.tanh}
 66 | # Check if all options are provided
 67 | if 17 != len(sys.argv):
 68 |     print "Usage: python run_SupDeepDocNADE.py folder_ID use_pretrain max_iter look_ahead hidden_size \
 69 |     learning_rate unsup_weight activation_function Linear_minC, Linear_maxC, \
 70 |     dropout_rate uniresult_dir Pretrain_model_name\
 71 |     lab_dataset_dir batch_size anno_weight\
 72 |      polyakexp_weight"
 73 |     sys.exit()
 74 | 
 75 | folder_ID = int(sys.argv[0])
 76 | use_pretrain = str2bool[sys.argv[1]]
 77 | max_iter = int(sys.argv[2])
 78 | look_ahead = int(sys.argv[3])
 79 | hidden_size_split = (sys.argv[4]).split('_')
 80 | hidden_size =  [int(x) for x in hidden_size_split]
 81 | learning_rate = float(sys.argv[5])
 82 | unsup_weight = float(sys.argv[6])
 83 | activation_function = sys.argv[7]
 84 | sup_option = 'full'
 85 | Linear_minC = float(sys.argv[8])
 86 | Linear_maxC = float(sys.argv[9])
 87 | dropout_split = (sys.argv[10]).split('_')
 88 | dropout_rate =  [float(x) for x in dropout_split]
 89 | uniresult_dir = sys.argv[11]
 90 | full_path_premodel = sys.argv[12]
 91 | lab_dataset_dir = sys.argv[13]
 92 | batch_size = int(sys.argv[14])
 93 | normalize_by_document_size = False
 94 | anno_weight = float(sys.argv[15])
 95 | log_option = "NoLog"
 96 | spatial_pyramid =  [1]
 97 | scaled_method = 'std'
 98 | length_limit = float(100.0)
 99 | decrease_constant = float(1.0)
100 | polyakexp_weight = float(sys.argv[16])
101 | 
102 | 
103 |         
104 | 
105 | def compute_AP_Prec50(preds, targets):
106 |     
107 |     targets_sorted = targets[(-preds.T).argsort().flatten()]
108 |     cumsum = targets_sorted.cumsum()
109 |     prec = cumsum / np.arange(1.0, 1 + targets.shape[0])
110 |     ap = average_precision_score(targets, preds)
111 |     prec50 = prec[50]
112 |     return ap, prec50
113 | 
114 | 
115 | def compute_MAP_Prec50(predictions, targets):
116 |     
117 |     numdims = predictions.shape[1]
118 |     ap = 0
119 |     prec50 = 0 
120 |     ap_list = []
121 |     prec50_list = []
122 |     
123 |     for i in range(numdims):
124 |       this_ap, this_prec = compute_AP_Prec50(predictions[:,i], targets[:,i])
125 |       ap_list.append(this_ap)
126 |       prec50_list.append(this_prec)
127 |       ap += this_ap
128 |       prec50 += this_prec
129 |     map = ap/numdims
130 |     mprec50 = prec50/numdims
131 |     return map, mprec50
132 |         
133 |         
134 |     
135 | 
136 | str2bool = {'True':True, 'False': False}
137 | folder_ID = int(sys.argv[0])
138 | use_pretrain = str2bool[sys.argv[1]]
139 | max_iter = int(sys.argv[2])
140 | look_ahead = int(sys.argv[3])
141 | hidden_size_split = (sys.argv[4]).split('_')
142 | hidden_size =  [int(x) for x in hidden_size_split]
143 | learning_rate = float(sys.argv[5])
144 | unsup_weight = float(sys.argv[6])
145 | activation_function = sys.argv[7]
146 | sup_option = 'full'
147 | Linear_minC = float(sys.argv[8])
148 | Linear_maxC = float(sys.argv[9])
149 | dropout_split = (sys.argv[10]).split('_')
150 | dropout_rate =  [float(x) for x in dropout_split]
151 | uniresult_dir = sys.argv[11]
152 | full_path_premodel = sys.argv[12]
153 | lab_dataset_dir = sys.argv[13]
154 | batch_size = int(sys.argv[14])
155 | normalize_by_document_size = False
156 | anno_weight = float(sys.argv[15])
157 | log_option = "NoLog"
158 | spatial_pyramid =  [1]
159 | scaled_method = 'std'
160 | length_limit = float(100.0)
161 | decrease_constant = float(1.0)
162 | polyakexp_weight = float(sys.argv[16])
163 | 
164 | file_name_Linear = 'Polyak_Linear_Flickr_SupDeepDocNADE_%s__%s__%s.txt' %(sys.argv[0], activation_function, log_option)
165 | uniresultfile_name_Linear = os.path.join(uniresult_dir, file_name_Linear)
166 | print uniresultfile_name_Linear
167 | rng_shuffle = np.random.mtrand.RandomState(1111)
168 | if not os.path.exists(lab_dataset_dir):
169 |     print 'label dataset not found'
170 |     exit(-1)
171 | 
172 |     
173 |     
174 | print 'train using labeled data'  
175 |     
176 | 
177 | dataset = MF.load(lab_dataset_dir, folder_ID, log_option, spatial_pyramid)
178 | trainset_raw = dataset['train']
179 | validset_raw = dataset['valid']
180 | testset_raw = dataset['test']
181 | n_classes = trainset_raw['meta']['n_classes']
182 | 
183 | train_labels = trainset_raw['targets']
184 | valid_labels = validset_raw['targets']
185 | test_labels = testset_raw['targets']
186 | 
187 | trainset = {}
188 | validset = {}
189 | testset = {}
190 | trainset['hists_visual'] = theano.shared(np.asarray(trainset_raw['hists_visual'], theano.config.floatX))
191 | trainset['hists_anno'] = theano.shared(trainset_raw['hists_anno'].astype(theano.config.floatX))
192 | trainset['global_features'] = theano.shared(np.asarray(trainset_raw['global_features'], theano.config.floatX))
193 | trainset['targets'] = theano.shared(np.asarray(trainset_raw['targets'], theano.config.floatX))
194 | 
195 | validset['hists_visual'] = theano.shared(np.asarray(validset_raw['hists_visual'], theano.config.floatX))
196 | validset['hists_anno'] = theano.shared(validset_raw['hists_anno'].astype(theano.config.floatX))
197 | validset['global_features'] = theano.shared(np.asarray(validset_raw['global_features'], theano.config.floatX))
198 | validset['targets'] = theano.shared(np.asarray(validset_raw['targets'], theano.config.floatX))
199 | 
200 | testset['hists_visual'] = theano.shared(np.asarray(testset_raw['hists_visual'], theano.config.floatX))
201 | testset['hists_anno'] = theano.shared(testset_raw['hists_anno'].astype(theano.config.floatX))
202 | testset['global_features'] = theano.shared(np.asarray(testset_raw['global_features'], theano.config.floatX))
203 | testset['targets'] = theano.shared(np.asarray(testset_raw['targets'], theano.config.floatX))
204 | 
205 | n_train = trainset_raw['meta']['length']
206 | n_valid = validset_raw['meta']['length']
207 | n_test = testset_raw['meta']['length']
208 | 
209 | n_train_batches = trainset_raw['meta']['length'] / batch_size
210 | n_valid_batches = validset_raw['meta']['length'] / batch_size
211 | n_test_batches = testset_raw['meta']['length'] / batch_size
212 | 
213 | aver_words_count_trainset = trainset_raw['hists_visual'].sum(axis=1).mean()
214 | print 'average word counts of trainset is %f'%(aver_words_count_trainset)
215 | 
216 | model = SupDeepDocNADE(hidden_size = hidden_size,
217 |                        learning_rate = learning_rate,
218 | #                        learning_rate_unsup = learning_rate_unsup,
219 |                        activation_function = activation_function,
220 |                        word_representation_size = 0,
221 |                        dropout_rate = dropout_rate,
222 |                        normalize_by_document_size = normalize_by_document_size,
223 |                        anno_weight = anno_weight,
224 |                        batch_size = batch_size,
225 |                        sup_option = sup_option,
226 |                        unsup_weight = unsup_weight,
227 |                        aver_words_count = aver_words_count_trainset,
228 |                        preprocess_method = scaled_method,
229 |                        length_limit = length_limit,
230 |                        decrease_constant = decrease_constant,
231 |                        polyakexp_weight = polyakexp_weight
232 |                        )
233 | pretrain_learning_rate = 0 # when it == 0, means no pretraining
234 | 
235 | spatial_split = np.asarray(spatial_pyramid, np.int32)**2*trainset_raw['meta']['voc_size']
236 | region_split = np.append(spatial_split, trainset_raw['meta']['text_voc_size'])
237 | region_split = np.add.accumulate(region_split)
238 | 
239 | model.initialize(trainset_raw['meta']['voc_size']*trainset_raw['meta']['n_regions'], 
240 |                  trainset_raw['meta']['text_voc_size'], 
241 |                  trainset_raw['meta']['global_feat_size'],
242 |                  trainset_raw['meta']['n_classes'],
243 |                  region_split)
244 |     
245 | if use_pretrain:
246 |     full_path_premodel = os.path.expanduser(full_path_premodel)
247 |     if os.path.isfile(full_path_premodel):
248 |         model_file = open(full_path_premodel, 'rb')
249 |         pre_model = cPickle.load(model_file)
250 |         model_file.close()
251 |         pre_model.add_activation()
252 |     else:
253 |         print 'ERROR: pretrained model not found'
254 |         exit(-1)
255 |     
256 |     assert(pre_model.hidden_size == hidden_size)
257 |     assert(pre_model.activation_function_name == activation_function)
258 |     
259 |     print '========================pre_trained model loaded successfully======================================='
260 |     pretrain_learning_rate = pre_model.learning_rate
261 |     model.add_supervised_layer(model.n_layers-1)
262 |     model.add_top_layer(model.n_layers-1)
263 |     model.copy_parameters(pre_model)
264 | 
265 | 
266 | model.dec_learning_rate.set_value(model.learning_rate)    
267 | model.compile_function(model.n_layers, trainset, validset)
268 | # model.compile_compute_representation_function(model.n_layers, batch_size, trainset)
269 | best_valid_error = -np.inf
270 | best_valid_prec50 = -np.inf
271 | best_epoch = 0
272 | best_model = copy.deepcopy(model)
273 | nb_of_epocs_without_improvement = 0
274 | epoch = 0
275 | print '\n### Training DeepDocNADE ###'
276 | start_training_time = t.time()
277 | while(epoch < max_iter and nb_of_epocs_without_improvement < look_ahead):
278 |     epoch += 1
279 |     print 'Epoch {0}'.format(epoch)
280 |     print '\tTraining   ...',
281 |     start_time = t.time()
282 |     cost_train = []
283 |     unsup_cost_train = []
284 |     sup_cost_train = []
285 |     prob_target_train = np.zeros((n_train, n_classes))
286 |     for minibatch_index in range(n_train_batches):
287 | #         cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value, h_value, first_input_value, h_sup_value  = model.train(minibatch_index)
288 |         cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value  = model.train(minibatch_index)
289 |         cost_train += [cost_value]
290 |         unsup_cost_train += [unsup_cost_value]
291 |         sup_cost_train += [sup_cost_value]
292 |         prob_target_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value
293 |     train_cost_error = np.asarray(cost_train).mean()
294 |     train_unsup_cost_error = np.asarray(unsup_cost_train).mean()
295 |     train_sup_cost_error = np.asarray(sup_cost_train).mean()
296 |     train_map,train_prec50 = compute_MAP_Prec50(np.exp(prob_target_train), train_labels)
297 |     print 'Train     :', get_done_text(start_time),  " MAP: {0:.6f}".format(train_map), " Prec@50: {0:.6f}".format(train_prec50), " Cost Error: {0:.6f}".format(train_cost_error) , " Unsup_Cost Error: {0:.6f}".format(train_unsup_cost_error), " Sup_Cost Error: {0:.6f}".format(train_sup_cost_error), 'mean_p: {0:.6f}'.format(np.exp(prob_target_train).mean()) 
298 |     
299 |     print '\tValidating ...',
300 |     start_time = t.time()
301 |     cost_valid = []
302 |     unsup_cost_valid = []
303 |     sup_cost_valid = []
304 |     prob_target_valid = np.zeros((n_valid, n_classes))
305 |     for minibatch_index in range(n_valid_batches):
306 | #         cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value, h_value, first_input_value , h_sup_value = model.valid(minibatch_index)
307 |         cost_value,log_prob_target_value, unsup_cost_value, sup_cost_value = model.valid(minibatch_index)
308 |         cost_valid += [cost_value]
309 |         unsup_cost_valid += [unsup_cost_value]
310 |         sup_cost_valid += [sup_cost_value]
311 |         prob_target_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value
312 |     valid_cost_error = np.asarray(cost_valid).mean()
313 |     valid_unsup_cost_error = np.asarray(unsup_cost_valid).mean()
314 |     valid_sup_cost_error = np.asarray(sup_cost_valid).mean()
315 |     valid_map,valid_prec50 = compute_MAP_Prec50(np.exp(prob_target_valid), valid_labels)
316 |     print 'Validation:', get_done_text(start_time),  " MAP: {0:.6f}".format(valid_map), " Prec@50: {0:.6f}".format(valid_prec50), " Cost Error: {0:.6f}".format(valid_cost_error) , " Unsup_Cost Error: {0:.6f}".format(valid_unsup_cost_error), " Sup_Cost Error: {0:.6f}".format(valid_sup_cost_error), 'mean_p: {0:.6f}'.format(np.exp(prob_target_valid).mean())  
317 |     if valid_map > best_valid_error:
318 | #         start_time = t.time()
319 |         best_valid_error = valid_map
320 |         best_valid_prec50 = valid_prec50
321 |         best_epoch = epoch
322 |         nb_of_epocs_without_improvement = 0
323 |         del best_model
324 |         gc.collect()
325 |         best_model = copy.deepcopy(model)
326 | #         print 'deep copying...',get_done_text(start_time)
327 |     else:
328 |         nb_of_epocs_without_improvement += 1
329 | 
330 |                     
331 | 
332 | print 'begin polyak svm part'    
333 | 
334 | #compute hidden representation of the testset
335 | hidden_represenation_trainset = np.zeros((n_train, best_model.hidden_size[-1]))
336 | hidden_represenation_validset = np.zeros((n_valid, best_model.hidden_size[-1]))
337 | hidden_represenation_testset = np.zeros((n_test, best_model.hidden_size[-1]))
338 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, trainset)
339 | for minibatch_index in range(n_train_batches):
340 |     h,log_prob_target_value = best_model.compute_representation(minibatch_index)
341 |     hidden_represenation_trainset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h
342 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, validset)
343 | for minibatch_index in range(n_valid_batches):
344 |     h,log_prob_target_value = best_model.compute_representation(minibatch_index)
345 |     hidden_represenation_validset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h
346 | best_model.compile_compute_representation_function_polyak(best_model.n_layers, testset)
347 | prob_target_test = np.zeros((n_test, n_classes))
348 | for minibatch_index in range(n_test_batches):
349 |     h,log_prob_target_value = best_model.compute_representation(minibatch_index)
350 |     hidden_represenation_testset[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = h
351 |     prob_target_test[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :] = log_prob_target_value
352 | 
353 | 
354 | hidden_represenation_trainset = hidden_represenation_trainset.tolist()
355 | hidden_represenation_validset = hidden_represenation_validset.tolist()    
356 | hidden_represenation_testset = hidden_represenation_testset.tolist()
357 | 
358 | #=========================================Logistic layer MAP===========================================
359 | 
360 | test_map_polyak,test_prec50_polyak = compute_MAP_Prec50(np.exp(prob_target_test), test_labels)
361 | print 'Logistic MAP is '+ str(test_map_polyak)
362 | print 'Logistic prec@50 is '+ str(test_prec50_polyak) 
363 | 
364 | 
365 | 
366 | #======================================================================================================
367 | 
368 | best_C_Linear_polyak = np.zeros(n_classes)
369 | best_ap_Linear_polyak = -np.ones(n_classes)*np.inf
370 | best_prec_Linear_polyak = -np.ones(n_classes)*np.inf
371 | Linear_C = np.arange(Linear_minC, Linear_maxC, 0.25)
372 | 
373 | 
374 | print 'Linear SVM Model Training'
375 | cnt_Linear = 0
376 | for C in Linear_C:
377 | #     print cnt_Linear
378 |     start = t.time()
379 |     for i in range(n_classes):
380 |         label_train = train_labels[:,i]
381 |         label_train = label_train.tolist()
382 |         label_valid = valid_labels[:,i]
383 |         label_valid = label_valid.tolist()
384 |         
385 |         train_options = '-s 0 -c %e -q'%np.exp2(C)
386 |         test_options = '-b 0 -q'
387 | #         train_options = '-s 1 -c %e -q'%np.exp2(C)
388 | #         test_options = '-q'
389 |         svm_model = train(label_train, hidden_represenation_trainset, train_options)
390 |         
391 |         p_labels, p_acc, p_vals = predict(label_valid, hidden_represenation_validset, svm_model, test_options)
392 |         p_vals = np.asarray(p_vals)
393 |         index = svm_model.get_labels().index(1)
394 |         if index ==0:
395 |             confidence = p_vals
396 |         elif index == 1:
397 |             confidence = -p_vals
398 |         else:
399 |             raise TypeError('wrong index')
400 | #         confidence = p_vals[:, index]
401 |         this_ap, this_prec = compute_AP_Prec50(confidence, valid_labels[:,i])
402 |         if this_ap > best_ap_Linear_polyak[i]:
403 |             best_ap_Linear_polyak[i] = this_ap
404 |             best_prec_Linear_polyak[i] = this_prec
405 |             best_C_Linear_polyak[i] = C 
406 |     end = t.time()
407 |     print '%d/%d cross-validation cost time %f'%(cnt_Linear, len(Linear_C), end-start)
408 |     print 'the map for now on validset is %f'%(np.mean(best_ap_Linear_polyak))
409 |     cnt_Linear += 1
410 | 
411 | 
412 | print '=======================================Final SVM Part==============================================='
413 | Linear_ap_list = []
414 | Linear_prec_list = []
415 | Linear_ap = 0
416 | Linear_prec = 0
417 | hidden_represenation_trainset.extend(hidden_represenation_validset)
418 | train_labels_final = np.vstack((train_labels, valid_labels))
419 | # file_conf = open("/home/local/USHERBROOKE/zhey2402/DeepDocNADE/SupDocNADE_Confidence_value.txt", 'w')
420 | for i in range(n_classes):
421 |     print 'Final SVM for class %d'%i
422 |     label_train = train_labels_final[:,i]
423 |     label_train = label_train.tolist()
424 |     label_test = test_labels[:,i]
425 |     label_test = label_test.tolist()
426 |     
427 | #     train_options = '-s 1 -c %e -q'%np.exp2(best_C_Linear[i])
428 | #     test_options = '-q'
429 |     train_options = '-s 0 -c %e -q'%np.exp2(best_C_Linear_polyak[i])
430 |     test_options = '-b 0 -q'
431 |     svm_model = train(label_train, hidden_represenation_trainset, train_options)
432 |     p_labels, p_acc, p_vals = predict(label_test, hidden_represenation_testset, svm_model, test_options)
433 |     p_vals = np.asarray(p_vals)
434 |     index = svm_model.get_labels().index(1)
435 |     if index ==0:
436 |         confidence = p_vals
437 |     elif index == 1:
438 |         confidence = -p_vals
439 |     else:
440 |         raise TypeError('wrong index')
441 | #     confidence = p_vals[:, index]
442 |     this_ap, this_prec = compute_AP_Prec50(confidence, test_labels[:,i])
443 |     Linear_ap += this_ap
444 |     Linear_prec += this_prec
445 |     Linear_ap_list.append(this_ap)
446 |     Linear_prec_list.append(this_prec)
447 | #     confidence.tofile(file_conf, sep=' ', format='%s')
448 | #     file_conf.write('\n')
449 | # file_conf.close()
450 | Linear_map_polyak = Linear_ap/n_classes
451 | Linear_prec50_polyak = Linear_prec/n_classes        
452 | print 'Linear SVM map is '+ str(Linear_map_polyak)
453 | print 'Linear SVM prec@50 is '+ str(Linear_prec50_polyak) 
454 | #===============================================================================      
455 | 
456 | 
457 | line_linear = '%f %f %f %f %f %f %f %f %d %s %s %s %s %d %d %d %f %f %s %s %f %s %s %f %f %f %s %s\n'%(Linear_map_polyak,
458 |                                                                                                        np.mean(best_ap_Linear_polyak),
459 |                                                                                                        test_map_polyak,
460 |                                                                                                        best_valid_error,
461 |                                                                                                        Linear_prec50_polyak,
462 |                                                                                                        np.mean(best_prec_Linear_polyak),
463 |                                                                                                        test_prec50_polyak, 
464 |                                                                                                        best_valid_prec50,
465 |                                                                                                        folder_ID, 
466 |                                                                                                        spatial_pyramid,
467 |                                                                                                        hidden_size,
468 |                                                                                                        learning_rate,
469 |                                                                                                        activation_function,
470 |                                                                                                        max_iter, 
471 |                                                                                                        look_ahead,
472 |                                                                                                        epoch,
473 |                                                                                                        Linear_minC, 
474 |                                                                                                        Linear_maxC,
475 |                                                                                                        dropout_rate,
476 |                                                                                                        unsup_weight,
477 |                                                                                                        anno_weight,
478 |                                                                                                        sup_option,
479 |                                                                                                        scaled_method,
480 |                                                                                                        length_limit, 
481 |                                                                                                        decrease_constant,
482 |                                                                                                        polyakexp_weight,
483 |                                                                                                        ' '.join(str(x) for x in best_C_Linear_polyak),
484 |                                                                                                        full_path_premodel
485 |                                                                                                        )
486 | uniresultfile_linear = open(uniresultfile_name_Linear, 'a')
487 | fcntl.flock(uniresultfile_linear.fileno(), fcntl.LOCK_EX)
488 | uniresultfile_linear.write(line_linear)
489 | uniresultfile_linear.close() # unlocks the file
490 | 
491 | 
492 | 
493 | 
494 | print 'done'
495 | 


--------------------------------------------------------------------------------
/run_pretrain_DeepDocNADE.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | # Copyright 2015 Yin Zheng, Yu-Jin Zhang, Hugo Larochelle. All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification, are
  6 | # permitted provided that the following conditions are met:
  7 | #
  8 | #    1. Redistributions of source code must retain the above copyright notice, this list of
  9 | #       conditions and the following disclaimer.
 10 | #
 11 | #    2. Redistributions in binary form must reproduce the above copyright notice, this list
 12 | #       of conditions and the following disclaimer in the documentation and/or other materials
 13 | #       provided with the distribution.
 14 | #
 15 | # THIS SOFTWARE IS PROVIDED BY Yin Zheng, Yu-Jin Zhang, Hugo Larochelle ``AS IS'' AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 17 | # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 18 | # Yin Zheng, Yu-Jin Zhang, Hugo Larochelle OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 20 | # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 21 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #
 25 | # The views and conclusions contained in the software and documentation are those of the
 26 | # authors and should not be interpreted as representing official policies, either expressed
 27 | # or implied, of Yin Zheng, Yu-Jin Zhang, Hugo Larochelle.
 28 | 
 29 | 
 30 | Created on Aug 15, 2015
 31 | 
 32 | @author: Yin Zheng
 33 | '''
 34 | import MIR_Flickr_Theano_Unlab as MFU
 35 | from SupDeepDocNADE import DeepDocNADE
 36 | import copy
 37 | import numpy as np
 38 | from itertools import izip 
 39 | import random
 40 | import collections
 41 | import cPickle
 42 | import sys, os
 43 | import fcntl
 44 | import time as t
 45 | import theano
 46 | import theano.tensor as T
 47 | import theano.sparse as S
 48 | from liblinearutil import *
 49 | import gc
 50 | import shutil
 51 | import glob
 52 | 
 53 | # activation_functions = {"sigmoid": theano.tensor.nnet.sigmoid, "reclin": lambda x: x * (x > 0), "tanh": theano.tensor.tanh}
 54 | def get_done_text(start_time):
 55 |     sys.stdout.flush()
 56 |     return "DONE in {:.4f} seconds.".format(t.time() - start_time)
 57 | sys.argv.pop(0);    # Remove first argument
 58 | 
 59 | # Check if all options are provided
 60 | if 11 != len(sys.argv):
 61 |     print "Usage: python run_pretrain.py n_pretrain pre_learning_rate hidden_size activation_function  dropout_rate model_file_dir unlab_dataset_dir batch_size anno_weight platform   polyakexp_weight model_init"
 62 |     sys.exit()
 63 | 
 64 | #     scene15.obtain(r'/home/ian/ml_datasets/Scene15')
 65 | str2bool = {'True':True, 'False': False}
 66 | n_pretrain = int(sys.argv[0])
 67 | pre_learning_rate = float(sys.argv[1])
 68 | hidden_size_split = (sys.argv[2]).split('_')
 69 | hidden_size =  [int(x) for x in hidden_size_split]
 70 | activation_function = sys.argv[3]
 71 | dropout_split = (sys.argv[4]).split('_')
 72 | dropout_rate =  [float(x) for x in dropout_split]
 73 | model_file_dir = sys.argv[5]
 74 | unlab_dataset_dir = sys.argv[6]
 75 | batch_size = int(sys.argv[7])
 76 | normalize_by_document_size = False
 77 | anno_weight = float(sys.argv[8])
 78 | log_option = "NoLog"
 79 | spatial_pyramid =  [1]
 80 | platform = 'PC'
 81 | scaled_method = 'std'
 82 | length_limit = float(100.0)
 83 | decrease_constant = float(1.0)
 84 | polyakexp_weight = float(sys.argv[9])
 85 | pretrained_models_dir = sys.argv[10]
 86 | 
 87 | if not os.path.exists(unlab_dataset_dir):
 88 |     print 'no such file for dataset'
 89 |     exit(-1)
 90 |     
 91 |     
 92 | #================================================= search the potential model that match the option========================================
 93 | if normalize_by_document_size:
 94 |     template_ID = 'Wholelayers__%s__%s__%s__%s__*__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight)
 95 | else:
 96 |     template_ID = 'Wholelayers__%s__%s__%s__%s__*__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight )
 97 | template_model_name = os.path.join(pretrained_models_dir, template_ID+'___model.pkl')
 98 | 
 99 | potential_model_name = glob.glob(template_model_name)
100 | 
101 | init_model = 'None'
102 | n_trained = -np.inf
103 | for tmp_model in potential_model_name:
104 |     tmp_config = tmp_model.split('__')
105 |     tmp_n_trained = int(tmp_config[5])
106 |     if tmp_n_trained > n_trained:
107 |         n_trained = tmp_n_trained
108 |         init_model = tmp_model
109 |         
110 | #===========================================================================================================================================
111 | if not os.path.exists(init_model):
112 |     print 'no init model found, we will train the model from epoch 0'
113 |     flag_continue = False
114 | else:
115 |     print 'the model we based on is %s'%(init_model)
116 |     flag_continue = True
117 | #     exit(-1)    
118 | 
119 | 
120 | if platform == 'Guilinmin':
121 |     src = unlab_dataset_dir
122 |     dst = os.path.join('/dev/shm/', os.environ['PBS_JOBID'])
123 | #     dst = os.path.join('/home/local/USHERBROOKE/zhey2402/localscratch', os.environ['PBS_JOBID'])
124 |     print src
125 |     print dst
126 |     
127 |     print 'starting copying'
128 |     for i in xrange(1, 51):
129 |         src_filename = os.path.join(src, 'unlabeled'+str(i)+'.npz.gz')
130 |         start_copy = t.time()
131 |         shutil.copy(src_filename, dst)
132 |         print 'Copy file_ID %d to shm'%(i), get_done_text(start_copy)
133 |     
134 |     
135 |     start_copy = t.time()
136 |     src_filename = os.path.join(src, 'others.tar.gz')
137 |     shutil.copy(src_filename, dst)
138 |     print 'Copy others.tar.gz to shm', get_done_text(start_copy)
139 |     
140 |     start_extract = t.time()
141 |     path_tarfile = os.path.join(dst, 'others.tar.gz')
142 |     cmd = 'tar xvfzm '+path_tarfile + ' -C ' + dst
143 |     print cmd
144 |     os.system(cmd)
145 |     print 'Extract other.tar.gz', get_done_text(start_extract)
146 |     
147 |     start_remove_tar = t.time()
148 |     cmd = 'rm ' + path_tarfile
149 |     print cmd
150 |     print 'Removing other.tar.gz file to save space'
151 |     os.system(cmd)
152 |     print 'Remove tar', get_done_text(start_remove_tar)
153 |     
154 |     unlab_dataset_dir = dst
155 |     
156 | 
157 | 
158 | if flag_continue:
159 |     init_config = init_model.split('__')
160 |     init_hidden_size = init_config[1]
161 |     init_activation = init_config[2]
162 |     init_logoption = init_config[3]
163 |     init_spatial = init_config[4]
164 |     init_epoch = int(init_config[5])
165 |     init_lr = init_config[6]
166 |     init_dropout = init_config[7]
167 |     init_annoweight = float(init_config[8])
168 |     init_scale = init_config[9]
169 |     init_lengthlimit = float(init_config[10])
170 |     init_decreaseconst = float(init_config[11])
171 |     init_polyweight = float(init_config[12]) 
172 |     
173 |     init_model = os.path.expanduser(init_model)
174 |     if os.path.isfile(init_model):
175 |         model_file = open(init_model, 'rb')
176 |         model_init = cPickle.load(model_file)
177 |         model_file.close()
178 |         model_init.add_activation()
179 |     else:
180 |         print 'ERROR: init model not found'
181 |         exit(-1)
182 |     
183 |     assert(model_init.hidden_size == hidden_size)
184 |     assert(model_init.learning_rate == pre_learning_rate)
185 |     assert(model_init.activation_function_name == activation_function)
186 |     assert(model_init.dropout_rate == dropout_rate)
187 |     assert(model_init.normalize_by_document_size == normalize_by_document_size)
188 |     assert(model_init.anno_weight == anno_weight)
189 |     assert(model_init.batch_size == batch_size)
190 |     assert(model_init.preprocess_method == scaled_method)
191 |     assert(model_init.length_limit == length_limit)
192 |     assert(model_init.decrease_constant == decrease_constant)
193 |     assert(model_init.polyakexp_weight == polyakexp_weight)
194 |     assert(init_logoption == log_option)
195 |     assert(init_spatial == '1') 
196 |     if init_epoch >= n_pretrain:
197 |         print 'the model is trained %d epoches, which equals or exceeds the number %d you required'%(init_epoch, n_pretrain)
198 |         exit(-1)
199 | else:
200 |     init_epoch = 0 
201 |     
202 | model = DeepDocNADE(hidden_size = hidden_size,
203 |                     learning_rate = pre_learning_rate,
204 |                     activation_function = activation_function,
205 |                     word_representation_size = 0,
206 |                     dropout_rate = dropout_rate,
207 |                     normalize_by_document_size = normalize_by_document_size,
208 |                     anno_weight = anno_weight,
209 |                     batch_size = batch_size,
210 |                     preprocess_method = scaled_method,
211 |                     length_limit = length_limit,
212 |                     decrease_constant = decrease_constant,
213 |                     polyakexp_weight = polyakexp_weight,
214 |                     seed_np = init_epoch + 1126,
215 |                     seed_theano = init_epoch + 1959
216 |                     )        
217 | initialized = False
218 | flag_compiled = False    
219 |     
220 | #================================================= create a object used to save model==============================
221 | copy_model = copy.deepcopy(model)
222 | # copy_model.remove_activation()
223 | train_ahead = n_pretrain   
224 | print 'begin pretrain using unlabeled data...'
225 | print 'we need to train it %d more epoches'%(min(init_epoch+train_ahead,n_pretrain)-init_epoch)
226 | 
227 | 
228 | n_layers = model.n_layers
229 | # for n_build in xrange(n_layers):
230 | n_build = n_layers-1
231 | epoch = init_epoch
232 | 
233 | print '\n### Training DeepDocNADE using unlabeled data, n_layers=%d ###'%(n_build+1)
234 | start_training_time = t.time()
235 | model.dec_learning_rate.set_value(model.learning_rate)
236 | copy_model.dec_learning_rate.set_value(copy_model.learning_rate)
237 | if initialized:
238 |     model.remove_top_layer()
239 |     model.add_top_layer(n_build)
240 | while(epoch < min(init_epoch+train_ahead,n_pretrain)):
241 |     
242 |     epoch += 1
243 |     print 'Epoch {0}'.format(epoch)
244 |     start_time_epoch = t.time()
245 |     cost_train = []
246 |     for file_id in xrange(1,51):
247 |         
248 |         start_time = t.time()
249 |         start_time_loaddata = t.time()
250 |         #===================extract corresponding unlabeled(file_id).npz.gz file=================================
251 |         if platform == 'Guilinmin':
252 |             start_extract = t.time()
253 |             path_tarfile = os.path.join(unlab_dataset_dir, 'unlabeled'+str(file_id)+'.npz.gz')
254 |             cmd = 'tar xvfzm '+path_tarfile + ' -C ' + unlab_dataset_dir
255 |             print cmd
256 |             os.system(cmd)
257 |             print 'Extract file_ID %d'%(file_id), get_done_text(start_extract)
258 |         #===============================LOAD file==================================================
259 |         unlabel_raw = MFU.load(unlab_dataset_dir, file_id, log_option, spatial_pyramid)
260 |         
261 |         #======================================remove unlabeled(file_id).npz.gz============================
262 |         if platform == 'Guilinmin':
263 |             start_remove_tar = t.time()
264 |             path_npzfile = os.path.join(unlab_dataset_dir, 'unlabeled'+str(file_id)+'.npz')
265 |             cmd = 'rm ' + path_npzfile
266 |             print cmd
267 |             os.system(cmd)
268 |             print 'Remove file_ID %d'%(file_id), get_done_text(start_remove_tar)
269 |         #==================================================================================================
270 |         print '\tTraining   ...',
271 |         sys.stdout.write("Load data cost {:.4f} seconds    ".format(t.time() - start_time_loaddata))
272 |         if not flag_compiled:
273 |             unlabel = {}
274 |             unlabel['hists_visual'] = theano.shared(np.asarray(unlabel_raw['hists_visual'], theano.config.floatX), borrow=False)
275 |             unlabel['hists_anno'] = theano.shared(unlabel_raw['hists_anno'].astype(theano.config.floatX), borrow=False)
276 |             unlabel['global_features'] = theano.shared(np.asarray(unlabel_raw['global_features'], theano.config.floatX), borrow=False)
277 |         else:
278 |             unlabel['hists_visual'].set_value(np.asarray(unlabel_raw['hists_visual'], theano.config.floatX))
279 |             unlabel['hists_anno'].set_value(unlabel_raw['hists_anno'].astype(theano.config.floatX))
280 |             unlabel['global_features'].set_value(np.asarray(unlabel_raw['global_features'], theano.config.floatX))
281 |             
282 |         n_train_batches = unlabel_raw['meta']['length']/batch_size
283 |         
284 |         aver_words_count = unlabel_raw['hists_visual'].sum(axis=1).mean()
285 |         sys.stdout.write("aver word counts is {:.4f} ".format(aver_words_count))            
286 |         if not initialized:
287 |             spatial_split = np.asarray(spatial_pyramid, np.int32)**2*unlabel_raw['meta']['voc_size']
288 |             region_split = np.append(spatial_split, unlabel_raw['meta']['text_voc_size'])
289 |             region_split = np.add.accumulate(region_split)
290 |             
291 |             
292 |             model.initialize(unlabel_raw['meta']['voc_size']*unlabel_raw['meta']['n_regions'], unlabel_raw['meta']['text_voc_size'], unlabel_raw['meta']['global_feat_size'], region_split)
293 |             model.remove_top_layer()
294 |             model.add_top_layer(n_build)
295 |             copy_model.initialize(unlabel_raw['meta']['voc_size']*unlabel_raw['meta']['n_regions'], unlabel_raw['meta']['text_voc_size'], unlabel_raw['meta']['global_feat_size'], region_split)
296 |             copy_model.remove_top_layer()
297 |             copy_model.add_top_layer(n_build)
298 |             del copy_model.rng_theano
299 |             del copy_model.rng
300 |             if flag_continue:
301 |                 model.copy_parameters(model_init)
302 |                 copy_model.copy_parameters(model_init)
303 |                 del model_init
304 |             initialized = True
305 |         model.aver_words_count = aver_words_count
306 |         copy_model.aver_words_count = aver_words_count
307 |         
308 |         start_time_process = t.time()  
309 |         if not flag_compiled: 
310 |             model.compile_function(n_build+1, unlabel, unlabel)
311 |             flag_compiled = True
312 |         for minibatch_index in range(n_train_batches):
313 |             cost_value = model.train(minibatch_index)
314 |             cost_train += [cost_value]
315 |         sys.stdout.write("Process data cost {:.4f} seconds    ".format(t.time() - start_time_process))
316 |         del unlabel_raw
317 | #         del model.train
318 | #         del model.valid
319 | #         del unlabel
320 |         gc.collect()
321 |         
322 |         print 'Train     :', 'File ID %d'%(file_id), get_done_text(start_time)
323 | #         unlabel.clear()
324 |     train_cost_error = np.asarray(cost_train).mean()
325 |     print '\tTraining   ...',
326 |     print 'Train     :', " Cost Error: {0:.6f}".format(train_cost_error), get_done_text(start_time_epoch)
327 | #     if np.mod(epoch,2)==0:
328 | #         copy_model.copy_parameters(model)
329 | #         del model
330 | #         gc.collect()
331 | #         model = copy.deepcopy(copy_model)
332 |     if np.mod(epoch, 25)==0:
333 |         if normalize_by_document_size:
334 |             cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], epoch, pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight)
335 |         else:
336 |             cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], epoch, pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight )
337 |         cPickle_model_name = os.path.join(model_file_dir, cPickle_ID+'___model.pkl')
338 |         copy_model.copy_parameters(model)
339 |         copy_model.remove_activation()
340 |         
341 |         saved_model_list = open(os.path.join(model_file_dir, 'saved_model_list.txt'), 'a')
342 |         fcntl.flock(saved_model_list.fileno(), fcntl.LOCK_EX)
343 |         model_file = open(cPickle_model_name, 'wb')
344 |         cPickle.dump(copy_model, model_file,protocol=cPickle.HIGHEST_PROTOCOL)
345 |         model_file.close()
346 |         saved_model_list.write(cPickle_model_name+'\n')
347 |         saved_model_list.close() # unlocks the file
348 |         copy_model.add_activation()
349 |         print cPickle_model_name
350 |         print 'is saved'
351 |         
352 | print '\n### Pre_Training, n_layers=%d'%(n_build+1), get_done_text(start_training_time)
353 | # copy_model.copy_parameters(model)
354 | # del model
355 | # gc.collect()
356 | # model = copy.deepcopy(copy_model)
357 |     
358 | if normalize_by_document_size:
359 |     cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f__normalized_by_doc_size'%(sys.argv[2],activation_function, log_option, sys.argv[11], min(init_epoch+train_ahead,n_pretrain), pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight)
360 | else:
361 |     cPickle_ID = 'Wholelayers__%s__%s__%s__%s__%d__%s__%s__%f__%s__%f__%f__%f'%(sys.argv[2],activation_function, log_option, sys.argv[11], min(init_epoch+train_ahead,n_pretrain), pre_learning_rate, sys.argv[4], anno_weight, scaled_method, length_limit, decrease_constant, polyakexp_weight)
362 | cPickle_model_name = os.path.join(model_file_dir, cPickle_ID+'___model.pkl')    
363 | 
364 | # copy_model.copy_parameters(model)
365 | model.remove_activation()
366 | del model.train
367 | del model.valid
368 | del unlabel
369 | del model.rng_theano
370 | del model.rng
371 | gc.collect()
372 | 
373 | saved_model_list = open(os.path.join(model_file_dir, 'saved_model_list.txt'), 'a')
374 | fcntl.flock(saved_model_list.fileno(), fcntl.LOCK_EX)
375 | model_file = open(cPickle_model_name, 'wb')
376 | cPickle.dump(model, model_file,protocol=cPickle.HIGHEST_PROTOCOL)
377 | model_file.close()
378 | saved_model_list.write(cPickle_model_name+'\n')
379 | saved_model_list.close() # unlocks the file
380 | 
381 | 
382 | print cPickle_model_name
383 | print 'is saved'


--------------------------------------------------------------------------------