├── .gitattributes ├── .gitignore ├── AE.py ├── AE_brain.py ├── AE_brain_weight_val.py ├── AE_brain_weight_vec.py ├── evaluation ├── association.dev.b.txt ├── eval_category.py ├── men-3k.txt ├── mturk771.txt ├── ranking.py ├── run_sim.sh ├── semsim.txt ├── simlex-999.txt ├── simverb-3500.txt ├── vissim.txt ├── wordrel353.txt ├── wordsim.py ├── wordsim353-full.txt └── wordsim353.txt ├── ranking.py ├── readme ├── tune_AE.sh ├── tune_AE_brain.sh ├── tune_AE_brain_weight_val.sh └── tune_AE_brain_weight_vec.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Scrapy stuff: 60 | .scrapy 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # IPython Notebook 69 | .ipynb_checkpoints 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # celery beat schedule file 75 | celerybeat-schedule 76 | 77 | # dotenv 78 | .env 79 | 80 | # virtualenv 81 | venv/ 82 | ENV/ 83 | 84 | # Spyder project settings 85 | .spyderproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # ========================= 91 | # Operating System Files 92 | # ========================= 93 | 94 | # OSX 95 | # ========================= 96 | 97 | .DS_Store 98 | .AppleDouble 99 | .LSOverride 100 | 101 | # Thumbnails 102 | ._* 103 | 104 | # Files that might appear in the root of a volume 105 | .DocumentRevisions-V100 106 | .fseventsd 107 | .Spotlight-V100 108 | .TemporaryItems 109 | .Trashes 110 | .VolumeIcon.icns 111 | 112 | # Directories potentially created on remote AFP share 113 | .AppleDB 114 | .AppleDesktop 115 | Network Trash Folder 116 | Temporary Items 117 | .apdisk 118 | 119 | # Windows 120 | # ========================= 121 | 122 | # Windows image file caches 123 | Thumbs.db 124 | ehthumbs.db 125 | 126 | # Folder config file 127 | Desktop.ini 128 | 129 | # Recycle Bin used on file shares 130 | $RECYCLE.BIN/ 131 | 132 | # Windows Installer files 133 | *.cab 134 | *.msi 135 | *.msm 136 | *.msp 137 | 138 | # Windows shortcuts 139 | *.lnk 140 | -------------------------------------------------------------------------------- /AE.py: -------------------------------------------------------------------------------- 1 | """ 2 | View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/ 3 | My Youtube Channel: https://www.youtube.com/user/MorvanZhou 4 | Dependencies: 5 | torch: 0.1.11 6 | matplotlib 7 | numpy 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn import init 12 | from torch.autograd import Variable 13 | import torch.utils.data as Data 14 | import numpy as np 15 | import cPickle as pickle 16 | import argparse 17 | from ranking import * 18 | 19 | torch.manual_seed(1) # reproducible 20 | 21 | def evaluation(ep, text, image, sound, autoencoder, vocab, args): 22 | testfile = ['men-3k.txt', 'simlex-999.txt', 'semsim.txt', 'vissim.txt', 'simverb-3500.txt', 23 | 'wordsim353.txt', 'wordrel353.txt', 'association.dev.txt', 'association.dev.b.txt'] 24 | 25 | _, _, _, multi_rep = autoencoder(text, image, sound) 26 | word_vecs = multi_rep.data.cpu().numpy() 27 | torch.save(autoencoder.state_dict(), open(args.outmodel + '.parameters-' + str(ep), 'wb')) 28 | outfile = open(args.outmodel + '-' + str(ep)+ '.rep.txt', 'w') 29 | for ind, w in enumerate(word_vecs): 30 | outfile.write(vocab[ind] + ' ' + ' '.join([str(i) for i in w]) + '\n') 31 | 32 | for file in testfile: 33 | manual_dict, auto_dict = ({}, {}) 34 | not_found, total_size = (0, 0) 35 | for line in open('evaluation/' + file, 'r'): 36 | line = line.strip().lower() 37 | word1, word2, val = line.split() 38 | if word1 in vocab and word2 in vocab: 39 | manual_dict[(word1, word2)] = float(val) 40 | auto_dict[(word1, word2)] = cosine_sim(word_vecs[vocab.index(word1)], 41 | word_vecs[vocab.index(word2)]) 42 | else: 43 | not_found += 1 44 | total_size += 1 45 | sp = spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict)) 46 | print file, 47 | print "%15s" % str(total_size), "%15s" % str(not_found), 48 | print "%15.4f" % sp 49 | print '' 50 | #outfile1.write(testfile[ind]+'\t'+str(sp)+'\n') 51 | # r1, r2, r3 = eval_category(word_vecs) 52 | # outfile1.write('categorization'+'\t'+str(r1)+'\t'+str(r2)+'\t'+str(r3)+'\n') 53 | 54 | class AutoEncoder(nn.Module): 55 | def __init__(self, args): 56 | super(AutoEncoder, self).__init__() 57 | self.tdim = args.text_dim 58 | self.tdim1 = args.text_dim1 59 | self.tdim2 = args.text_dim2 60 | self.idim = args.image_dim 61 | self.idim1 = args.image_dim1 62 | self.idim2 = args.image_dim2 63 | self.sdim = args.sound_dim 64 | self.sdim1 = args.sound_dim1 65 | self.sdim2 = args.sound_dim2 66 | self.zdim = args.multi_dim 67 | 68 | self.encoder1 = nn.Sequential( 69 | nn.Linear(self.tdim, self.tdim1), 70 | nn.Tanh(), 71 | nn.Linear(self.tdim1, self.tdim2), 72 | nn.Tanh() 73 | ) 74 | self.encoder2 = nn.Sequential( 75 | nn.Linear(self.idim, self.idim1), 76 | nn.Tanh(), 77 | nn.Linear(self.idim1, self.idim2), 78 | nn.Tanh() 79 | ) 80 | self.encoder3 = nn.Sequential( 81 | nn.Linear(self.sdim, self.sdim1), 82 | nn.Tanh(), 83 | nn.Linear(self.sdim1, self.sdim2), 84 | nn.Tanh() 85 | ) 86 | self.encoder4 = nn.Sequential( 87 | nn.Linear(self.tdim2+self.idim2+self.sdim2, self.zdim), 88 | nn.Tanh() 89 | ) 90 | 91 | self.decoder4 = nn.Sequential( 92 | nn.Linear(self.zdim, self.tdim2+self.idim2+self.sdim2), 93 | nn.Tanh() 94 | ) 95 | self.decoder3 = nn.Sequential( 96 | nn.Linear(self.tdim2, self.tdim1), 97 | nn.Tanh(), 98 | nn.Linear(self.tdim1, self.tdim), 99 | nn.Tanh() 100 | ) 101 | self.decoder2 = nn.Sequential( 102 | nn.Linear(self.idim2, self.idim1), 103 | nn.Tanh(), 104 | nn.Linear(self.idim1, self.idim), 105 | nn.Tanh() 106 | ) 107 | self.decoder1 = nn.Sequential( 108 | nn.Linear(self.sdim2, self.sdim1), 109 | nn.Tanh(), 110 | nn.Linear(self.sdim1, self.sdim), 111 | nn.Tanh() 112 | ) 113 | self.reset_parameters() 114 | 115 | def reset_parameters(self): 116 | init.kaiming_normal(self.encoder1[0].weight.data) 117 | init.kaiming_normal(self.encoder1[2].weight.data) 118 | init.constant(self.encoder1[0].bias.data, val=0) 119 | init.constant(self.encoder1[2].bias.data, val=0) 120 | 121 | init.kaiming_normal(self.encoder2[0].weight.data) 122 | init.kaiming_normal(self.encoder2[2].weight.data) 123 | init.constant(self.encoder2[0].bias.data, val=0) 124 | init.constant(self.encoder2[2].bias.data, val=0) 125 | 126 | init.kaiming_normal(self.encoder3[0].weight.data) 127 | init.kaiming_normal(self.encoder3[2].weight.data) 128 | init.constant(self.encoder3[0].bias.data, val=0) 129 | init.constant(self.encoder3[2].bias.data, val=0) 130 | 131 | init.kaiming_normal(self.encoder4[0].weight.data) 132 | init.constant(self.encoder4[0].bias.data, val=0) 133 | 134 | 135 | init.kaiming_normal(self.decoder1[0].weight.data) 136 | init.kaiming_normal(self.decoder1[2].weight.data) 137 | init.constant(self.decoder1[0].bias.data, val=0) 138 | init.constant(self.decoder1[2].bias.data, val=0) 139 | 140 | init.kaiming_normal(self.decoder2[0].weight.data) 141 | init.kaiming_normal(self.decoder2[2].weight.data) 142 | init.constant(self.decoder2[0].bias.data, val=0) 143 | init.constant(self.decoder2[2].bias.data, val=0) 144 | 145 | init.kaiming_normal(self.decoder3[0].weight.data) 146 | init.kaiming_normal(self.decoder3[2].weight.data) 147 | init.constant(self.decoder3[0].bias.data, val=0) 148 | init.constant(self.decoder3[2].bias.data, val=0) 149 | 150 | init.kaiming_normal(self.decoder4[0].weight.data) 151 | init.constant(self.decoder4[0].bias.data, val=0) 152 | 153 | 154 | def forward(self, x_t, x_i, x_s): 155 | encoded_text = self.encoder1(x_t) 156 | encoded_image = self.encoder2(x_i) 157 | encoded_sound = self.encoder3(x_s) 158 | encoded_mid = self.encoder4(torch.cat((encoded_text, encoded_image, encoded_sound), dim=1)) 159 | decoded_mid = self.decoder4(encoded_mid) 160 | decoded_text = self.decoder3(decoded_mid[:,0:self.tdim2]) 161 | decoded_image = self.decoder2(decoded_mid[:,self.tdim2:self.tdim2+self.idim2]) 162 | decoded_sound = self.decoder1(decoded_mid[:,self.tdim2+self.idim2:]) 163 | return decoded_text, decoded_image, decoded_sound, encoded_mid 164 | 165 | if __name__ == '__main__': 166 | 167 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 168 | parser.add_argument('--train-data', required=True) 169 | parser.add_argument('--text-dim', required=True, type=int) 170 | parser.add_argument('--image-dim', required=True, type=int) 171 | parser.add_argument('--sound-dim', required=True, type=int) 172 | parser.add_argument('--text-dim1', required=True, type=int) 173 | parser.add_argument('--text-dim2', required=True, type=int) 174 | parser.add_argument('--image-dim1', required=True, type=int) 175 | parser.add_argument('--image-dim2', required=True, type=int) 176 | parser.add_argument('--sound-dim1', required=True, type=int) 177 | parser.add_argument('--sound-dim2', required=True, type=int) 178 | parser.add_argument('--multi-dim', required=True, type=int) 179 | parser.add_argument('--batch-size', required=True, type=int) 180 | parser.add_argument('--epoch', required=True, type=int) 181 | parser.add_argument('--lr', default=0.005, type=float) 182 | parser.add_argument('--outmodel', required=True) 183 | parser.add_argument('--gpu', default=-1, type=int) 184 | args = parser.parse_args() 185 | 186 | # training dataset 187 | indata = open(args.train_data) #300*128 188 | vocab = [] 189 | text = [] 190 | image = [] 191 | sound = [] 192 | for line in indata: 193 | line = line.strip().split() 194 | vocab.append(line[0]) 195 | text.append(np.array([float(i) for i in line[1:args.text_dim+1]])) # (9405, 300) 196 | image.append(np.array([float(i) for i in line[args.text_dim+1:args.text_dim+args.image_dim+1]])) # (9405, 128) 197 | sound.append(np.array([float(i) for i in line[args.text_dim+args.image_dim+1:]])) # (9405, 128) 198 | text = torch.from_numpy(np.array(text)).type(torch.FloatTensor) 199 | image = torch.from_numpy(np.array(image)).type(torch.FloatTensor) 200 | sound = torch.from_numpy(np.array(sound)).type(torch.FloatTensor) 201 | train_ind = range(len(image)) 202 | 203 | 204 | # Data Loader for easy mini-batch return in training 205 | if args.gpu > -1: 206 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True, pin_memory=True) 207 | autoencoder = AutoEncoder(args).cuda(args.gpu) 208 | else: 209 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True) 210 | autoencoder = AutoEncoder(args) 211 | 212 | optimizer = torch.optim.Adam(autoencoder.parameters(), lr=args.lr) 213 | loss_func = nn.MSELoss() 214 | 215 | min_vloss = 99999 216 | if args.gpu > -1: 217 | total_text = Variable(text.cuda(args.gpu)) 218 | total_image = Variable(image.cuda(args.gpu)) 219 | total_sound = Variable(sound.cuda(args.gpu)) 220 | else: 221 | total_text = Variable(text) 222 | total_image = Variable(image) 223 | total_sound = Variable(sound) 224 | 225 | for ep in range(args.epoch): 226 | ep += 1 227 | for step, ind in enumerate(train_loader): 228 | if args.gpu > -1: 229 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 230 | batch_image = Variable(image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 231 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 232 | else: 233 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 234 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 235 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 236 | 237 | decoded_text, decoded_image, decoded_sound, _ = autoencoder(batch_text, batch_image, batch_sound) 238 | 239 | loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func( 240 | decoded_sound, batch_sound) # mean square error 241 | optimizer.zero_grad() # clear gradients for this training step 242 | loss.backward() # backpropagation, compute gradients 243 | optimizer.step() # apply gradients 244 | 245 | if step % 100 == 0: 246 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 247 | 248 | if ep % 100 == 0: 249 | evaluation(ep, total_text, total_image, total_sound, autoencoder, vocab, args) 250 | 251 | -------------------------------------------------------------------------------- /AE_brain.py: -------------------------------------------------------------------------------- 1 | """ 2 | View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/ 3 | My Youtube Channel: https://www.youtube.com/user/MorvanZhou 4 | Dependencies: 5 | torch: 0.1.11 6 | matplotlib 7 | numpy 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn import init 12 | from torch.autograd import Variable 13 | import torch.utils.data as Data 14 | import numpy as np 15 | import copy 16 | import cPickle as pickle 17 | import argparse 18 | from ranking import * 19 | 20 | torch.manual_seed(1) # reproducible 21 | 22 | def evaluation(ep, text, image, sound, autoencoder, vocab, args): 23 | testfile = ['men-3k.txt', 'simlex-999.txt', 'semsim.txt', 'vissim.txt', 'simverb-3500.txt', 24 | 'wordsim353.txt', 'wordrel353.txt', 'association.dev.txt', 'association.dev.b.txt'] 25 | _, _, _, _, multi_rep = autoencoder(text, image, sound) 26 | word_vecs = multi_rep.data.cpu().numpy() 27 | #torch.save(autoencoder.state_dict(), open(args.outmodel + '.parameters-' + str(ep), 'wb')) 28 | #outfile = open(args.outmodel + '-' + str(ep)+ '.rep.txt', 'w') 29 | # outfile = open(args.outmodel+'.rep.txt', 'w') 30 | # #pickle.dump(word_vecs, outfile, protocol=2) 31 | # for ind, w in enumerate(word_vecs): 32 | # outfile.write(vocab[ind] + ' ' + ' '.join([str(i) for i in w]) + '\n') 33 | 34 | for file in testfile: 35 | manual_dict, auto_dict = ({}, {}) 36 | not_found, total_size = (0, 0) 37 | for line in open('evaluation/' + file, 'r'): 38 | line = line.strip().lower() 39 | word1, word2, val = line.split() 40 | if word1 in vocab and word2 in vocab: 41 | manual_dict[(word1, word2)] = float(val) 42 | auto_dict[(word1, word2)] = cosine_sim(word_vecs[vocab.index(word1)], 43 | word_vecs[vocab.index(word2)]) 44 | else: 45 | not_found += 1 46 | total_size += 1 47 | sp = spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict)) 48 | print file, 49 | print "%15s" % str(total_size), "%15s" % str(not_found), 50 | print "%15.4f" % sp 51 | print '' 52 | 53 | 54 | class AutoEncoder(nn.Module): 55 | def __init__(self, args, model_para): 56 | super(AutoEncoder, self).__init__() 57 | self.tdim = args.text_dim 58 | self.tdim1 = args.text_dim1 59 | self.tdim2 = args.text_dim2 60 | self.idim = args.image_dim 61 | self.idim1 = args.image_dim1 62 | self.idim2 = args.image_dim2 63 | self.sdim = args.sound_dim 64 | self.sdim1 = args.sound_dim1 65 | self.sdim2 = args.sound_dim2 66 | self.zdim = args.multi_dim 67 | self.brain_dim1 = args.brain_dim1 68 | self.brain_dim = args.brain_dim 69 | self.model_para = model_para 70 | 71 | self.encoder1 = nn.Sequential( 72 | nn.Linear(self.tdim, self.tdim1), 73 | nn.Tanh(), 74 | nn.Linear(self.tdim1, self.tdim2), 75 | nn.Tanh() 76 | ) 77 | self.encoder2 = nn.Sequential( 78 | nn.Linear(self.idim, self.idim1), 79 | nn.Tanh(), 80 | nn.Linear(self.idim1, self.idim2), 81 | nn.Tanh() 82 | ) 83 | self.encoder3 = nn.Sequential( 84 | nn.Linear(self.sdim, self.sdim1), 85 | nn.Tanh(), 86 | nn.Linear(self.sdim1, self.sdim2), 87 | nn.Tanh() 88 | ) 89 | self.encoder4 = nn.Sequential( 90 | nn.Linear(self.tdim2 + self.idim2 + self.sdim2, self.zdim), 91 | nn.Tanh() 92 | ) 93 | 94 | self.decoder4 = nn.Sequential( 95 | nn.Linear(self.zdim, self.tdim2 + self.idim2 + self.sdim2), 96 | nn.Tanh() 97 | ) 98 | 99 | self.decoder3 = nn.Sequential( 100 | nn.Linear(self.tdim2, self.tdim1), 101 | nn.Tanh(), 102 | nn.Linear(self.tdim1, self.tdim), 103 | nn.Tanh() 104 | ) 105 | self.decoder2 = nn.Sequential( 106 | nn.Linear(self.idim2, self.idim1), 107 | nn.Tanh(), 108 | nn.Linear(self.idim1, self.idim), 109 | nn.Tanh() 110 | ) 111 | self.decoder1 = nn.Sequential( 112 | nn.Linear(self.sdim2, self.sdim1), 113 | nn.Tanh(), 114 | nn.Linear(self.sdim1, self.sdim), 115 | nn.Tanh() 116 | ) 117 | self.decoder_brain = nn.Sequential( 118 | nn.Linear(self.zdim, self.brain_dim1), 119 | nn.Tanh(), 120 | nn.Linear(self.brain_dim1, self.brain_dim), 121 | nn.Sigmoid() 122 | ) 123 | 124 | self.reset_parameters() 125 | self.load_parameters() 126 | 127 | def reset_parameters(self): 128 | init.kaiming_normal(self.decoder_brain[0].weight.data) 129 | init.kaiming_normal(self.decoder_brain[2].weight.data) 130 | init.constant(self.decoder_brain[0].bias.data, val=0) 131 | init.constant(self.decoder_brain[2].bias.data, val=0) 132 | 133 | def load_parameters(self): 134 | 135 | self.encoder1[0].weight.data = copy.deepcopy(self.model_para['encoder1.0.weight']) 136 | self.encoder1[2].weight.data = copy.deepcopy(self.model_para['encoder1.2.weight']) 137 | self.encoder1[0].bias.data = copy.deepcopy(self.model_para['encoder1.0.bias']) 138 | self.encoder1[2].bias.data = copy.deepcopy(self.model_para['encoder1.2.bias']) 139 | 140 | self.encoder2[0].weight.data = copy.deepcopy(self.model_para['encoder2.0.weight']) 141 | self.encoder2[2].weight.data = copy.deepcopy(self.model_para['encoder2.2.weight']) 142 | self.encoder2[0].bias.data = copy.deepcopy(self.model_para['encoder2.0.bias']) 143 | self.encoder2[2].bias.data = copy.deepcopy(self.model_para['encoder2.2.bias']) 144 | 145 | self.encoder3[0].weight.data = copy.deepcopy(self.model_para['encoder3.0.weight']) 146 | self.encoder3[2].weight.data = copy.deepcopy(self.model_para['encoder3.2.weight']) 147 | self.encoder3[0].bias.data = copy.deepcopy(self.model_para['encoder3.0.bias']) 148 | self.encoder3[2].bias.data = copy.deepcopy(self.model_para['encoder3.2.bias']) 149 | 150 | self.encoder4[0].weight.data = copy.deepcopy(self.model_para['encoder4.0.weight']) 151 | self.encoder4[0].bias.data = copy.deepcopy(self.model_para['encoder4.0.bias']) 152 | 153 | self.decoder1[0].weight.data = copy.deepcopy(self.model_para['decoder1.0.weight']) 154 | self.decoder1[2].weight.data = copy.deepcopy(self.model_para['decoder1.2.weight']) 155 | self.decoder1[0].bias.data = copy.deepcopy(self.model_para['decoder1.0.bias']) 156 | self.decoder1[2].bias.data = copy.deepcopy(self.model_para['decoder1.2.bias']) 157 | 158 | self.decoder2[0].weight.data = copy.deepcopy(self.model_para['decoder2.0.weight']) 159 | self.decoder2[2].weight.data = copy.deepcopy(self.model_para['decoder2.2.weight']) 160 | self.decoder2[0].bias.data = copy.deepcopy(self.model_para['decoder2.0.bias']) 161 | self.decoder2[2].bias.data = copy.deepcopy(self.model_para['decoder2.2.bias']) 162 | 163 | self.decoder3[0].weight.data = copy.deepcopy(self.model_para['decoder3.0.weight']) 164 | self.decoder3[2].weight.data = copy.deepcopy(self.model_para['decoder3.2.weight']) 165 | self.decoder3[0].bias.data = copy.deepcopy(self.model_para['decoder3.0.bias']) 166 | self.decoder3[2].bias.data = copy.deepcopy(self.model_para['decoder3.2.bias']) 167 | 168 | self.decoder4[0].weight.data = copy.deepcopy(self.model_para['decoder4.0.weight']) 169 | self.decoder4[0].bias.data = copy.deepcopy(self.model_para['decoder4.0.bias']) 170 | 171 | def forward(self, x_t, x_i, x_s): 172 | encoded_text = self.encoder1(x_t) 173 | encoded_image = self.encoder2(x_i) 174 | encoded_sound = self.encoder3(x_s) 175 | encoded_mid = self.encoder4(torch.cat((encoded_text, encoded_image, encoded_sound), dim=1)) 176 | decoded_mid = self.decoder4(encoded_mid) 177 | decoded_text = self.decoder3(decoded_mid[:, 0:self.tdim2]) 178 | decoded_image = self.decoder2(decoded_mid[:, self.tdim2:self.tdim2 + self.idim2]) 179 | decoded_sound = self.decoder1(decoded_mid[:, self.tdim2 + self.idim2:]) 180 | decoded_brain = self.decoder_brain(encoded_mid) 181 | return decoded_text, decoded_image, decoded_brain, decoded_sound, encoded_mid 182 | 183 | if __name__ == '__main__': 184 | 185 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 186 | parser.add_argument('--total-data', required=True) 187 | parser.add_argument('--train-data', required=True) 188 | parser.add_argument('--brain-data', required=True) 189 | parser.add_argument('--text-dim', required=True, type=int) 190 | parser.add_argument('--image-dim', required=True, type=int) 191 | parser.add_argument('--sound-dim', required=True, type=int) 192 | parser.add_argument('--text-dim1', required=True, type=int) 193 | parser.add_argument('--text-dim2', required=True, type=int) 194 | parser.add_argument('--image-dim1', required=True, type=int) 195 | parser.add_argument('--image-dim2', required=True, type=int) 196 | parser.add_argument('--sound-dim1', required=True, type=int) 197 | parser.add_argument('--sound-dim2', required=True, type=int) 198 | parser.add_argument('--brain-dim1', required=True, type=int) 199 | parser.add_argument('--brain-dim', required=True, type=int) 200 | parser.add_argument('--multi-dim', required=True, type=int) 201 | parser.add_argument('--batch-size', required=True, type=int) 202 | parser.add_argument('--epoch', required=True, type=int) 203 | parser.add_argument('--lr', default=0.005, type=float) 204 | parser.add_argument('--load-model', required=True) 205 | parser.add_argument('--outmodel', required=True) 206 | parser.add_argument('--regularization', default=-1, type=float) 207 | parser.add_argument('--gpu', default=-1, type=int) 208 | args = parser.parse_args() 209 | 210 | # total_data 211 | vocab = [] 212 | total_text = [] 213 | total_image = [] 214 | total_sound = [] 215 | num = 0 216 | for line in open(args.total_data): 217 | line = line.strip().split() 218 | total_text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 219 | total_image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 220 | total_sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 221 | vocab.append(line[0]) 222 | num += 1 223 | total_text = torch.from_numpy(np.array(total_text)).type(torch.FloatTensor) 224 | total_image = torch.from_numpy(np.array(total_image)).type(torch.FloatTensor) 225 | total_sound = torch.from_numpy(np.array(total_sound)).type(torch.FloatTensor) 226 | 227 | # training dataset 228 | indata = open(args.train_data) # 300*128 229 | text = [] 230 | image = [] 231 | sound = [] 232 | for line in indata: 233 | line = line.strip().split() 234 | text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 235 | image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 236 | sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 237 | text = torch.from_numpy(np.array(text)).type(torch.FloatTensor) 238 | image = torch.from_numpy(np.array(image)).type(torch.FloatTensor) 239 | sound = torch.from_numpy(np.array(sound)).type(torch.FloatTensor) 240 | train_ind = range(len(image)) 241 | 242 | indata = open(args.brain_data) # 300*128 243 | brain_multi = [] 244 | for line in indata: 245 | line = line.strip().split() 246 | brain_multi.append(np.array([float(i) for i in line[1:]])) 247 | brain_multi = torch.from_numpy(np.array(brain_multi)).type(torch.FloatTensor) 248 | 249 | model_para = torch.load(open(args.load_model, 'rb')) 250 | 251 | # Data Loader for easy mini-batch return in training 252 | if args.gpu > -1: 253 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True, pin_memory=True) 254 | autoencoder = AutoEncoder(args, model_para).cuda(args.gpu) 255 | else: 256 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True) 257 | autoencoder = AutoEncoder(args, model_para) 258 | 259 | 260 | if args.gpu > -1: 261 | total_text = Variable(total_text.cuda(args.gpu)) 262 | total_image = Variable(total_image.cuda(args.gpu)) 263 | total_sound = Variable(total_sound.cuda(args.gpu)) 264 | else: 265 | total_text = Variable(total_text) 266 | total_image = Variable(total_image) 267 | total_sound = Variable(total_sound) 268 | 269 | optimizer = torch.optim.Adam(autoencoder.decoder_brain.parameters(), lr=args.lr) 270 | loss_func = nn.MSELoss() 271 | for ep in range(50): 272 | ep += 1 273 | for step, ind in enumerate(train_loader): 274 | if args.gpu > -1: 275 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 276 | batch_image = Variable(image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 277 | batch_sound = Variable( 278 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 279 | batch_brain = Variable( 280 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 281 | else: 282 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 283 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 284 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 285 | batch_brain = Variable(brain_multi[ind]) 286 | 287 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, batch_sound) 288 | #loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 289 | loss = loss_func(decoded_brain, batch_brain) 290 | optimizer.zero_grad() # clear gradients for this training step 291 | loss.backward() # backpropagation, compute gradients 292 | optimizer.step() # apply gradients 293 | 294 | if step % 100 == 0: 295 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 296 | 297 | # 298 | # if ep % 50 == 0: 299 | # evaluation(ep, total_text, total_image, autoencoder, vocab, args) 300 | 301 | #fine-tune 302 | optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.1*args.lr) 303 | loss_func = nn.MSELoss() 304 | 305 | for ep in range(args.epoch): 306 | ep += 1 307 | for step, ind in enumerate(train_loader): 308 | if args.gpu > -1: 309 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 310 | batch_image = Variable( 311 | image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 312 | batch_sound = Variable( 313 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 314 | batch_brain = Variable( 315 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 316 | else: 317 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 318 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 319 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 320 | batch_brain = Variable(brain_multi[ind]) 321 | 322 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, 323 | batch_sound) 324 | # loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 325 | loss = loss_func(decoded_brain, batch_brain) 326 | optimizer.zero_grad() # clear gradients for this training step 327 | loss.backward() # backpropagation, compute gradients 328 | optimizer.step() # apply gradients 329 | 330 | if step % 100 == 0: 331 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 332 | 333 | if ep % 50 == 0: 334 | evaluation(ep, total_text, total_image, total_sound, autoencoder, vocab, args) -------------------------------------------------------------------------------- /AE_brain_weight_val.py: -------------------------------------------------------------------------------- 1 | """ 2 | View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/ 3 | My Youtube Channel: https://www.youtube.com/user/MorvanZhou 4 | Dependencies: 5 | torch: 0.1.11 6 | matplotlib 7 | numpy 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn import init 12 | from torch.autograd import Variable 13 | import torch.utils.data as Data 14 | import numpy as np 15 | import copy 16 | import cPickle as pickle 17 | import argparse 18 | from ranking import * 19 | 20 | torch.manual_seed(1) # reproducible 21 | 22 | def evaluation(ep, text, image, sound, autoencoder, vocab, args): 23 | testfile = ['men-3k.txt', 'simlex-999.txt', 'semsim.txt', 'vissim.txt', 'simverb-3500.txt', 24 | 'wordsim353.txt', 'wordrel353.txt', 'association.dev.txt', 'association.dev.b.txt'] 25 | _, _,_,_, multi_rep = autoencoder(text, image, sound) 26 | word_vecs = multi_rep.data.cpu().numpy() 27 | #torch.save(autoencoder.state_dict(), open(args.outmodel + '.parameters-' + str(ep), 'wb')) 28 | #outfile = open(args.outmodel + '-' + str(ep)+ '.rep.txt', 'w') 29 | # outfile = open(args.outmodel+'.rep.txt', 'w') 30 | # #pickle.dump(word_vecs, outfile, protocol=2) 31 | # for ind, w in enumerate(word_vecs): 32 | # outfile.write(vocab[ind] + ' ' + ' '.join([str(i) for i in w]) + '\n') 33 | 34 | for file in testfile: 35 | manual_dict, auto_dict = ({}, {}) 36 | not_found, total_size = (0, 0) 37 | for line in open('evaluation/' + file, 'r'): 38 | line = line.strip().lower() 39 | word1, word2, val = line.split() 40 | if word1 in vocab and word2 in vocab: 41 | manual_dict[(word1, word2)] = float(val) 42 | auto_dict[(word1, word2)] = cosine_sim(word_vecs[vocab.index(word1)], 43 | word_vecs[vocab.index(word2)]) 44 | else: 45 | not_found += 1 46 | total_size += 1 47 | sp = spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict)) 48 | print file, 49 | print "%15s" % str(total_size), "%15s" % str(not_found), 50 | print "%15.4f" % sp 51 | print '' 52 | 53 | 54 | class AutoEncoder(nn.Module): 55 | def __init__(self, args, model_para): 56 | super(AutoEncoder, self).__init__() 57 | self.tdim = args.text_dim 58 | self.tdim1 = args.text_dim1 59 | self.tdim2 = args.text_dim2 60 | self.idim = args.image_dim 61 | self.idim1 = args.image_dim1 62 | self.idim2 = args.image_dim2 63 | self.sdim = args.sound_dim 64 | self.sdim1 = args.sound_dim1 65 | self.sdim2 = args.sound_dim2 66 | self.zdim = args.multi_dim 67 | self.brain_dim1 = args.brain_dim1 68 | self.brain_dim = args.brain_dim 69 | self.model_para = model_para 70 | 71 | # vector modality 72 | # self.text_weight = nn.Sequential( 73 | # nn.Linear(self.tdim, self.tdim), 74 | # nn.Tanh() 75 | # ) 76 | # self.image_weight = nn.Sequential( 77 | # nn.Linear(self.idim, self.idim), 78 | # nn.Tanh() 79 | # ) 80 | # self.sound_weight = nn.Sequential( 81 | # nn.Linear(self.idim, self.idim), 82 | # nn.Tanh() 83 | # ) 84 | # value modality 85 | self.text_weight = nn.Sequential( 86 | nn.Linear(self.tdim, 1), 87 | nn.Tanh() 88 | ) 89 | self.image_weight = nn.Sequential( 90 | nn.Linear(self.idim, 1), 91 | nn.Tanh() 92 | ) 93 | self.sound_weight = nn.Sequential( 94 | nn.Linear(self.idim, 1), 95 | nn.Tanh() 96 | ) 97 | 98 | self.encoder1 = nn.Sequential( 99 | nn.Linear(self.tdim, self.tdim1), 100 | nn.Tanh(), 101 | nn.Linear(self.tdim1, self.tdim2), 102 | nn.Tanh() 103 | ) 104 | self.encoder2 = nn.Sequential( 105 | nn.Linear(self.idim, self.idim1), 106 | nn.Tanh(), 107 | nn.Linear(self.idim1, self.idim2), 108 | nn.Tanh() 109 | ) 110 | self.encoder3 = nn.Sequential( 111 | nn.Linear(self.sdim, self.sdim1), 112 | nn.Tanh(), 113 | nn.Linear(self.sdim1, self.sdim2), 114 | nn.Tanh() 115 | ) 116 | self.encoder4 = nn.Sequential( 117 | nn.Linear(self.tdim2 + self.idim2 + self.sdim2, self.zdim), 118 | nn.Tanh() 119 | ) 120 | 121 | self.decoder4 = nn.Sequential( 122 | nn.Linear(self.zdim, self.tdim2 + self.idim2 + self.sdim2), 123 | nn.Tanh() 124 | ) 125 | 126 | self.decoder3 = nn.Sequential( 127 | nn.Linear(self.tdim2, self.tdim1), 128 | nn.Tanh(), 129 | nn.Linear(self.tdim1, self.tdim), 130 | nn.Tanh() 131 | ) 132 | self.decoder2 = nn.Sequential( 133 | nn.Linear(self.idim2, self.idim1), 134 | nn.Tanh(), 135 | nn.Linear(self.idim1, self.idim), 136 | nn.Tanh() 137 | ) 138 | self.decoder1 = nn.Sequential( 139 | nn.Linear(self.sdim2, self.sdim1), 140 | nn.Tanh(), 141 | nn.Linear(self.sdim1, self.sdim), 142 | nn.Tanh() 143 | ) 144 | self.decoder_brain = nn.Sequential( 145 | nn.Linear(self.zdim, self.brain_dim1), 146 | nn.Tanh(), 147 | nn.Linear(self.brain_dim1, self.brain_dim), 148 | nn.Sigmoid() 149 | ) 150 | 151 | self.reset_parameters() 152 | self.load_parameters() 153 | 154 | def reset_parameters(self): 155 | init.kaiming_normal(self.text_weight[0].weight.data) 156 | init.kaiming_normal(self.image_weight[0].weight.data) 157 | init.kaiming_normal(self.sound_weight[0].weight.data) 158 | init.constant(self.text_weight[0].bias.data, val=0) 159 | init.constant(self.image_weight[0].bias.data, val=0) 160 | init.constant(self.sound_weight[0].bias.data, val=0) 161 | init.kaiming_normal(self.decoder_brain[0].weight.data) 162 | init.kaiming_normal(self.decoder_brain[2].weight.data) 163 | init.constant(self.decoder_brain[0].bias.data, val=0) 164 | init.constant(self.decoder_brain[2].bias.data, val=0) 165 | 166 | def load_parameters(self): 167 | 168 | self.encoder1[0].weight.data = copy.deepcopy(self.model_para['encoder1.0.weight']) 169 | self.encoder1[2].weight.data = copy.deepcopy(self.model_para['encoder1.2.weight']) 170 | self.encoder1[0].bias.data = copy.deepcopy(self.model_para['encoder1.0.bias']) 171 | self.encoder1[2].bias.data = copy.deepcopy(self.model_para['encoder1.2.bias']) 172 | 173 | self.encoder2[0].weight.data = copy.deepcopy(self.model_para['encoder2.0.weight']) 174 | self.encoder2[2].weight.data = copy.deepcopy(self.model_para['encoder2.2.weight']) 175 | self.encoder2[0].bias.data = copy.deepcopy(self.model_para['encoder2.0.bias']) 176 | self.encoder2[2].bias.data = copy.deepcopy(self.model_para['encoder2.2.bias']) 177 | 178 | self.encoder3[0].weight.data = copy.deepcopy(self.model_para['encoder3.0.weight']) 179 | self.encoder3[2].weight.data = copy.deepcopy(self.model_para['encoder3.2.weight']) 180 | self.encoder3[0].bias.data = copy.deepcopy(self.model_para['encoder3.0.bias']) 181 | self.encoder3[2].bias.data = copy.deepcopy(self.model_para['encoder3.2.bias']) 182 | 183 | self.encoder4[0].weight.data = copy.deepcopy(self.model_para['encoder4.0.weight']) 184 | self.encoder4[0].bias.data = copy.deepcopy(self.model_para['encoder4.0.bias']) 185 | 186 | self.decoder1[0].weight.data = copy.deepcopy(self.model_para['decoder1.0.weight']) 187 | self.decoder1[2].weight.data = copy.deepcopy(self.model_para['decoder1.2.weight']) 188 | self.decoder1[0].bias.data = copy.deepcopy(self.model_para['decoder1.0.bias']) 189 | self.decoder1[2].bias.data = copy.deepcopy(self.model_para['decoder1.2.bias']) 190 | 191 | self.decoder2[0].weight.data = copy.deepcopy(self.model_para['decoder2.0.weight']) 192 | self.decoder2[2].weight.data = copy.deepcopy(self.model_para['decoder2.2.weight']) 193 | self.decoder2[0].bias.data = copy.deepcopy(self.model_para['decoder2.0.bias']) 194 | self.decoder2[2].bias.data = copy.deepcopy(self.model_para['decoder2.2.bias']) 195 | 196 | self.decoder3[0].weight.data = copy.deepcopy(self.model_para['decoder3.0.weight']) 197 | self.decoder3[2].weight.data = copy.deepcopy(self.model_para['decoder3.2.weight']) 198 | self.decoder3[0].bias.data = copy.deepcopy(self.model_para['decoder3.0.bias']) 199 | self.decoder3[2].bias.data = copy.deepcopy(self.model_para['decoder3.2.bias']) 200 | 201 | self.decoder4[0].weight.data = copy.deepcopy(self.model_para['decoder4.0.weight']) 202 | self.decoder4[0].bias.data = copy.deepcopy(self.model_para['decoder4.0.bias']) 203 | 204 | def forward(self, x_t, x_i, x_s): 205 | mm0 = self.text_weight(x_t).expand_as(x_t) * 0.1 + 1 206 | mm1 = self.image_weight(x_i).expand_as(x_i) * 0.1 + 1 207 | mm2 = self.sound_weight(x_s).expand_as(x_s) * 0.1 + 1 208 | x_t = mm0 * x_t 209 | x_i = mm1 * x_i 210 | x_s = mm2 * x_s 211 | encoded_text = self.encoder1(x_t) 212 | encoded_image = self.encoder2(x_i) 213 | encoded_sound = self.encoder3(x_s) 214 | encoded_mid = self.encoder4(torch.cat((encoded_text, encoded_image, encoded_sound), dim=1)) 215 | decoded_mid = self.decoder4(encoded_mid) 216 | decoded_text = self.decoder3(decoded_mid[:, 0:self.tdim2]) 217 | decoded_image = self.decoder2(decoded_mid[:, self.tdim2:self.tdim2 + self.idim2]) 218 | decoded_sound = self.decoder1(decoded_mid[:, self.tdim2 + self.idim2:]) 219 | decoded_brain = self.decoder_brain(encoded_mid) 220 | return decoded_text, decoded_image, decoded_brain, decoded_sound, encoded_mid 221 | 222 | if __name__ == '__main__': 223 | 224 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 225 | parser.add_argument('--total-data', required=True) 226 | parser.add_argument('--train-data', required=True) 227 | parser.add_argument('--brain-data', required=True) 228 | parser.add_argument('--text-dim', required=True, type=int) 229 | parser.add_argument('--image-dim', required=True, type=int) 230 | parser.add_argument('--sound-dim', required=True, type=int) 231 | parser.add_argument('--text-dim1', required=True, type=int) 232 | parser.add_argument('--text-dim2', required=True, type=int) 233 | parser.add_argument('--image-dim1', required=True, type=int) 234 | parser.add_argument('--image-dim2', required=True, type=int) 235 | parser.add_argument('--sound-dim1', required=True, type=int) 236 | parser.add_argument('--sound-dim2', required=True, type=int) 237 | parser.add_argument('--brain-dim1', required=True, type=int) 238 | parser.add_argument('--brain-dim', required=True, type=int) 239 | parser.add_argument('--multi-dim', required=True, type=int) 240 | parser.add_argument('--batch-size', required=True, type=int) 241 | parser.add_argument('--epoch', required=True, type=int) 242 | parser.add_argument('--lr', default=0.005, type=float) 243 | parser.add_argument('--load-model', required=True) 244 | parser.add_argument('--outmodel', required=True) 245 | parser.add_argument('--regularization', default=-1, type=float) 246 | parser.add_argument('--gpu', default=-1, type=int) 247 | args = parser.parse_args() 248 | 249 | # total_data 250 | vocab = [] 251 | total_text = [] 252 | total_image = [] 253 | total_sound = [] 254 | num = 0 255 | for line in open(args.total_data): 256 | line = line.strip().split() 257 | total_text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 258 | total_image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 259 | total_sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 260 | vocab.append(line[0]) 261 | num += 1 262 | total_text = torch.from_numpy(np.array(total_text)).type(torch.FloatTensor) 263 | total_image = torch.from_numpy(np.array(total_image)).type(torch.FloatTensor) 264 | total_sound = torch.from_numpy(np.array(total_sound)).type(torch.FloatTensor) 265 | 266 | # training dataset 267 | indata = open(args.train_data) # 300*128 268 | text = [] 269 | image = [] 270 | sound = [] 271 | for line in indata: 272 | line = line.strip().split() 273 | text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 274 | image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 275 | sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 276 | text = torch.from_numpy(np.array(text)).type(torch.FloatTensor) 277 | image = torch.from_numpy(np.array(image)).type(torch.FloatTensor) 278 | sound = torch.from_numpy(np.array(sound)).type(torch.FloatTensor) 279 | train_ind = range(len(image)) 280 | 281 | indata = open(args.brain_data) # 300*128 282 | brain_multi = [] 283 | for line in indata: 284 | line = line.strip().split() 285 | brain_multi.append(np.array([float(i) for i in line[1:]])) 286 | brain_multi = torch.from_numpy(np.array(brain_multi)).type(torch.FloatTensor) 287 | 288 | model_para = torch.load(open(args.load_model, 'rb')) 289 | 290 | # Data Loader for easy mini-batch return in training 291 | if args.gpu > -1: 292 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True, pin_memory=True) 293 | autoencoder = AutoEncoder(args, model_para).cuda(args.gpu) 294 | else: 295 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True) 296 | autoencoder = AutoEncoder(args, model_para) 297 | 298 | 299 | if args.gpu > -1: 300 | total_text = Variable(total_text.cuda(args.gpu)) 301 | total_image = Variable(total_image.cuda(args.gpu)) 302 | total_sound = Variable(total_sound.cuda(args.gpu)) 303 | else: 304 | total_text = Variable(total_text) 305 | total_image = Variable(total_image) 306 | total_sound = Variable(total_sound) 307 | 308 | optimizer = torch.optim.Adam(autoencoder.decoder_brain.parameters(), lr=args.lr) 309 | loss_func = nn.MSELoss() 310 | for ep in range(50): 311 | ep += 1 312 | for step, ind in enumerate(train_loader): 313 | if args.gpu > -1: 314 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 315 | batch_image = Variable(image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 316 | batch_sound = Variable( 317 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 318 | batch_brain = Variable( 319 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 320 | else: 321 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 322 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 323 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 324 | batch_brain = Variable(brain_multi[ind]) 325 | 326 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, batch_sound) 327 | #loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 328 | loss = loss_func(decoded_brain, batch_brain) 329 | optimizer.zero_grad() # clear gradients for this training step 330 | loss.backward() # backpropagation, compute gradients 331 | optimizer.step() # apply gradients 332 | 333 | if step % 100 == 0: 334 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 335 | 336 | # 337 | # if ep % 50 == 0: 338 | # evaluation(ep, total_text, total_image, autoencoder, vocab, args) 339 | 340 | #fine-tune 341 | optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.1*args.lr) 342 | loss_func = nn.MSELoss() 343 | 344 | for ep in range(args.epoch): 345 | ep += 1 346 | for step, ind in enumerate(train_loader): 347 | if args.gpu > -1: 348 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 349 | batch_image = Variable( 350 | image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 351 | batch_sound = Variable( 352 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 353 | batch_brain = Variable( 354 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 355 | else: 356 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 357 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 358 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 359 | batch_brain = Variable(brain_multi[ind]) 360 | 361 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, 362 | batch_sound) 363 | # loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 364 | loss = loss_func(decoded_brain, batch_brain) 365 | optimizer.zero_grad() # clear gradients for this training step 366 | loss.backward() # backpropagation, compute gradients 367 | optimizer.step() # apply gradients 368 | 369 | if step % 100 == 0: 370 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 371 | 372 | if ep % 50 == 0: 373 | evaluation(ep, total_text, total_image, total_sound, autoencoder, vocab, args) -------------------------------------------------------------------------------- /AE_brain_weight_vec.py: -------------------------------------------------------------------------------- 1 | """ 2 | View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/ 3 | My Youtube Channel: https://www.youtube.com/user/MorvanZhou 4 | Dependencies: 5 | torch: 0.1.11 6 | matplotlib 7 | numpy 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn import init 12 | from torch.autograd import Variable 13 | import torch.utils.data as Data 14 | import numpy as np 15 | import copy 16 | import cPickle as pickle 17 | import argparse 18 | from ranking import * 19 | 20 | torch.manual_seed(1) # reproducible 21 | 22 | def evaluation(ep, text, image, sound, autoencoder, vocab, args): 23 | testfile = ['men-3k.txt', 'simlex-999.txt', 'semsim.txt', 'vissim.txt', 'simverb-3500.txt', 24 | 'wordsim353.txt', 'wordrel353.txt', 'association.dev.txt', 'association.dev.b.txt'] 25 | _, _,_,_, multi_rep = autoencoder(text, image, sound) 26 | word_vecs = multi_rep.data.cpu().numpy() 27 | #torch.save(autoencoder.state_dict(), open(args.outmodel + '.parameters-' + str(ep), 'wb')) 28 | #outfile = open(args.outmodel + '-' + str(ep)+ '.rep.txt', 'w') 29 | # outfile = open(args.outmodel+'.rep.txt', 'w') 30 | # #pickle.dump(word_vecs, outfile, protocol=2) 31 | # for ind, w in enumerate(word_vecs): 32 | # outfile.write(vocab[ind] + ' ' + ' '.join([str(i) for i in w]) + '\n') 33 | 34 | for file in testfile: 35 | manual_dict, auto_dict = ({}, {}) 36 | not_found, total_size = (0, 0) 37 | for line in open('evaluation/' + file, 'r'): 38 | line = line.strip().lower() 39 | word1, word2, val = line.split() 40 | if word1 in vocab and word2 in vocab: 41 | manual_dict[(word1, word2)] = float(val) 42 | auto_dict[(word1, word2)] = cosine_sim(word_vecs[vocab.index(word1)], 43 | word_vecs[vocab.index(word2)]) 44 | else: 45 | not_found += 1 46 | total_size += 1 47 | sp = spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict)) 48 | print file, 49 | print "%15s" % str(total_size), "%15s" % str(not_found), 50 | print "%15.4f" % sp 51 | print '' 52 | 53 | 54 | class AutoEncoder(nn.Module): 55 | def __init__(self, args, model_para): 56 | super(AutoEncoder, self).__init__() 57 | self.tdim = args.text_dim 58 | self.tdim1 = args.text_dim1 59 | self.tdim2 = args.text_dim2 60 | self.idim = args.image_dim 61 | self.idim1 = args.image_dim1 62 | self.idim2 = args.image_dim2 63 | self.sdim = args.sound_dim 64 | self.sdim1 = args.sound_dim1 65 | self.sdim2 = args.sound_dim2 66 | self.zdim = args.multi_dim 67 | self.brain_dim1 = args.brain_dim1 68 | self.brain_dim = args.brain_dim 69 | self.model_para = model_para 70 | 71 | # vector modality 72 | self.text_weight = nn.Sequential( 73 | nn.Linear(self.tdim, self.tdim), 74 | nn.Tanh() 75 | ) 76 | self.image_weight = nn.Sequential( 77 | nn.Linear(self.idim, self.idim), 78 | nn.Tanh() 79 | ) 80 | self.sound_weight = nn.Sequential( 81 | nn.Linear(self.idim, self.idim), 82 | nn.Tanh() 83 | ) 84 | # value modality 85 | # self.text_weight = nn.Sequential( 86 | # nn.Linear(self.tdim, 1), 87 | # nn.Tanh() 88 | # ) 89 | # self.image_weight = nn.Sequential( 90 | # nn.Linear(self.idim, 1), 91 | # nn.Tanh() 92 | # ) 93 | # self.sound_weight = nn.Sequential( 94 | # nn.Linear(self.idim, 1), 95 | # nn.Tanh() 96 | # ) 97 | 98 | self.encoder1 = nn.Sequential( 99 | nn.Linear(self.tdim, self.tdim1), 100 | nn.Tanh(), 101 | nn.Linear(self.tdim1, self.tdim2), 102 | nn.Tanh() 103 | ) 104 | self.encoder2 = nn.Sequential( 105 | nn.Linear(self.idim, self.idim1), 106 | nn.Tanh(), 107 | nn.Linear(self.idim1, self.idim2), 108 | nn.Tanh() 109 | ) 110 | self.encoder3 = nn.Sequential( 111 | nn.Linear(self.sdim, self.sdim1), 112 | nn.Tanh(), 113 | nn.Linear(self.sdim1, self.sdim2), 114 | nn.Tanh() 115 | ) 116 | self.encoder4 = nn.Sequential( 117 | nn.Linear(self.tdim2 + self.idim2 + self.sdim2, self.zdim), 118 | nn.Tanh() 119 | ) 120 | 121 | self.decoder4 = nn.Sequential( 122 | nn.Linear(self.zdim, self.tdim2 + self.idim2 + self.sdim2), 123 | nn.Tanh() 124 | ) 125 | 126 | self.decoder3 = nn.Sequential( 127 | nn.Linear(self.tdim2, self.tdim1), 128 | nn.Tanh(), 129 | nn.Linear(self.tdim1, self.tdim), 130 | nn.Tanh() 131 | ) 132 | self.decoder2 = nn.Sequential( 133 | nn.Linear(self.idim2, self.idim1), 134 | nn.Tanh(), 135 | nn.Linear(self.idim1, self.idim), 136 | nn.Tanh() 137 | ) 138 | self.decoder1 = nn.Sequential( 139 | nn.Linear(self.sdim2, self.sdim1), 140 | nn.Tanh(), 141 | nn.Linear(self.sdim1, self.sdim), 142 | nn.Tanh() 143 | ) 144 | self.decoder_brain = nn.Sequential( 145 | nn.Linear(self.zdim, self.brain_dim1), 146 | nn.Tanh(), 147 | nn.Linear(self.brain_dim1, self.brain_dim), 148 | nn.Sigmoid() 149 | ) 150 | 151 | self.reset_parameters() 152 | self.load_parameters() 153 | 154 | def reset_parameters(self): 155 | init.kaiming_normal(self.text_weight[0].weight.data) 156 | init.kaiming_normal(self.image_weight[0].weight.data) 157 | init.kaiming_normal(self.sound_weight[0].weight.data) 158 | init.constant(self.text_weight[0].bias.data, val=0) 159 | init.constant(self.image_weight[0].bias.data, val=0) 160 | init.constant(self.sound_weight[0].bias.data, val=0) 161 | init.kaiming_normal(self.decoder_brain[0].weight.data) 162 | init.kaiming_normal(self.decoder_brain[2].weight.data) 163 | init.constant(self.decoder_brain[0].bias.data, val=0) 164 | init.constant(self.decoder_brain[2].bias.data, val=0) 165 | 166 | def load_parameters(self): 167 | 168 | self.encoder1[0].weight.data = copy.deepcopy(self.model_para['encoder1.0.weight']) 169 | self.encoder1[2].weight.data = copy.deepcopy(self.model_para['encoder1.2.weight']) 170 | self.encoder1[0].bias.data = copy.deepcopy(self.model_para['encoder1.0.bias']) 171 | self.encoder1[2].bias.data = copy.deepcopy(self.model_para['encoder1.2.bias']) 172 | 173 | self.encoder2[0].weight.data = copy.deepcopy(self.model_para['encoder2.0.weight']) 174 | self.encoder2[2].weight.data = copy.deepcopy(self.model_para['encoder2.2.weight']) 175 | self.encoder2[0].bias.data = copy.deepcopy(self.model_para['encoder2.0.bias']) 176 | self.encoder2[2].bias.data = copy.deepcopy(self.model_para['encoder2.2.bias']) 177 | 178 | self.encoder3[0].weight.data = copy.deepcopy(self.model_para['encoder3.0.weight']) 179 | self.encoder3[2].weight.data = copy.deepcopy(self.model_para['encoder3.2.weight']) 180 | self.encoder3[0].bias.data = copy.deepcopy(self.model_para['encoder3.0.bias']) 181 | self.encoder3[2].bias.data = copy.deepcopy(self.model_para['encoder3.2.bias']) 182 | 183 | self.encoder4[0].weight.data = copy.deepcopy(self.model_para['encoder4.0.weight']) 184 | self.encoder4[0].bias.data = copy.deepcopy(self.model_para['encoder4.0.bias']) 185 | 186 | self.decoder1[0].weight.data = copy.deepcopy(self.model_para['decoder1.0.weight']) 187 | self.decoder1[2].weight.data = copy.deepcopy(self.model_para['decoder1.2.weight']) 188 | self.decoder1[0].bias.data = copy.deepcopy(self.model_para['decoder1.0.bias']) 189 | self.decoder1[2].bias.data = copy.deepcopy(self.model_para['decoder1.2.bias']) 190 | 191 | self.decoder2[0].weight.data = copy.deepcopy(self.model_para['decoder2.0.weight']) 192 | self.decoder2[2].weight.data = copy.deepcopy(self.model_para['decoder2.2.weight']) 193 | self.decoder2[0].bias.data = copy.deepcopy(self.model_para['decoder2.0.bias']) 194 | self.decoder2[2].bias.data = copy.deepcopy(self.model_para['decoder2.2.bias']) 195 | 196 | self.decoder3[0].weight.data = copy.deepcopy(self.model_para['decoder3.0.weight']) 197 | self.decoder3[2].weight.data = copy.deepcopy(self.model_para['decoder3.2.weight']) 198 | self.decoder3[0].bias.data = copy.deepcopy(self.model_para['decoder3.0.bias']) 199 | self.decoder3[2].bias.data = copy.deepcopy(self.model_para['decoder3.2.bias']) 200 | 201 | self.decoder4[0].weight.data = copy.deepcopy(self.model_para['decoder4.0.weight']) 202 | self.decoder4[0].bias.data = copy.deepcopy(self.model_para['decoder4.0.bias']) 203 | 204 | def forward(self, x_t, x_i, x_s): 205 | mm0 = self.text_weight(x_t).expand_as(x_t) * 0.1 + 1 206 | mm1 = self.image_weight(x_i).expand_as(x_i) * 0.1 + 1 207 | mm2 = self.sound_weight(x_s).expand_as(x_s) * 0.1 + 1 208 | x_t = mm0 * x_t 209 | x_i = mm1 * x_i 210 | x_s = mm2 * x_s 211 | encoded_text = self.encoder1(x_t) 212 | encoded_image = self.encoder2(x_i) 213 | encoded_sound = self.encoder3(x_s) 214 | encoded_mid = self.encoder4(torch.cat((encoded_text, encoded_image, encoded_sound), dim=1)) 215 | decoded_mid = self.decoder4(encoded_mid) 216 | decoded_text = self.decoder3(decoded_mid[:, 0:self.tdim2]) 217 | decoded_image = self.decoder2(decoded_mid[:, self.tdim2:self.tdim2 + self.idim2]) 218 | decoded_sound = self.decoder1(decoded_mid[:, self.tdim2 + self.idim2:]) 219 | decoded_brain = self.decoder_brain(encoded_mid) 220 | return decoded_text, decoded_image, decoded_brain, decoded_sound, encoded_mid 221 | 222 | if __name__ == '__main__': 223 | 224 | parser = argparse.ArgumentParser(fromfile_prefix_chars='@') 225 | parser.add_argument('--total-data', required=True) 226 | parser.add_argument('--train-data', required=True) 227 | parser.add_argument('--brain-data', required=True) 228 | parser.add_argument('--text-dim', required=True, type=int) 229 | parser.add_argument('--image-dim', required=True, type=int) 230 | parser.add_argument('--sound-dim', required=True, type=int) 231 | parser.add_argument('--text-dim1', required=True, type=int) 232 | parser.add_argument('--text-dim2', required=True, type=int) 233 | parser.add_argument('--image-dim1', required=True, type=int) 234 | parser.add_argument('--image-dim2', required=True, type=int) 235 | parser.add_argument('--sound-dim1', required=True, type=int) 236 | parser.add_argument('--sound-dim2', required=True, type=int) 237 | parser.add_argument('--brain-dim1', required=True, type=int) 238 | parser.add_argument('--brain-dim', required=True, type=int) 239 | parser.add_argument('--multi-dim', required=True, type=int) 240 | parser.add_argument('--batch-size', required=True, type=int) 241 | parser.add_argument('--epoch', required=True, type=int) 242 | parser.add_argument('--lr', default=0.005, type=float) 243 | parser.add_argument('--load-model', required=True) 244 | parser.add_argument('--outmodel', required=True) 245 | parser.add_argument('--regularization', default=-1, type=float) 246 | parser.add_argument('--gpu', default=-1, type=int) 247 | args = parser.parse_args() 248 | 249 | # total_data 250 | vocab = [] 251 | total_text = [] 252 | total_image = [] 253 | total_sound = [] 254 | num = 0 255 | for line in open(args.total_data): 256 | line = line.strip().split() 257 | total_text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 258 | total_image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 259 | total_sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 260 | vocab.append(line[0]) 261 | num += 1 262 | total_text = torch.from_numpy(np.array(total_text)).type(torch.FloatTensor) 263 | total_image = torch.from_numpy(np.array(total_image)).type(torch.FloatTensor) 264 | total_sound = torch.from_numpy(np.array(total_sound)).type(torch.FloatTensor) 265 | 266 | # training dataset 267 | indata = open(args.train_data) # 300*128 268 | text = [] 269 | image = [] 270 | sound = [] 271 | for line in indata: 272 | line = line.strip().split() 273 | text.append(np.array([float(i) for i in line[1:args.text_dim + 1]])) # (9405, 300) 274 | image.append(np.array([float(i) for i in line[args.text_dim + 1:args.text_dim + args.image_dim + 1]])) # (9405, 128) 275 | sound.append(np.array([float(i) for i in line[args.text_dim + args.image_dim + 1:]])) # (9405, 128) 276 | text = torch.from_numpy(np.array(text)).type(torch.FloatTensor) 277 | image = torch.from_numpy(np.array(image)).type(torch.FloatTensor) 278 | sound = torch.from_numpy(np.array(sound)).type(torch.FloatTensor) 279 | train_ind = range(len(image)) 280 | 281 | indata = open(args.brain_data) # 300*128 282 | brain_multi = [] 283 | for line in indata: 284 | line = line.strip().split() 285 | brain_multi.append(np.array([float(i) for i in line[1:]])) 286 | brain_multi = torch.from_numpy(np.array(brain_multi)).type(torch.FloatTensor) 287 | 288 | model_para = torch.load(open(args.load_model, 'rb')) 289 | 290 | # Data Loader for easy mini-batch return in training 291 | if args.gpu > -1: 292 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True, pin_memory=True) 293 | autoencoder = AutoEncoder(args, model_para).cuda(args.gpu) 294 | else: 295 | train_loader = Data.DataLoader(dataset=train_ind, batch_size=args.batch_size, shuffle=True) 296 | autoencoder = AutoEncoder(args, model_para) 297 | 298 | 299 | if args.gpu > -1: 300 | total_text = Variable(total_text.cuda(args.gpu)) 301 | total_image = Variable(total_image.cuda(args.gpu)) 302 | total_sound = Variable(total_sound.cuda(args.gpu)) 303 | else: 304 | total_text = Variable(total_text) 305 | total_image = Variable(total_image) 306 | total_sound = Variable(total_sound) 307 | 308 | optimizer = torch.optim.Adam(autoencoder.decoder_brain.parameters(), lr=args.lr) 309 | loss_func = nn.MSELoss() 310 | for ep in range(50): 311 | ep += 1 312 | for step, ind in enumerate(train_loader): 313 | if args.gpu > -1: 314 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 315 | batch_image = Variable(image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 316 | batch_sound = Variable( 317 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 318 | batch_brain = Variable( 319 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 320 | else: 321 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 322 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 323 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 324 | batch_brain = Variable(brain_multi[ind]) 325 | 326 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, batch_sound) 327 | #loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 328 | loss = loss_func(decoded_brain, batch_brain) 329 | optimizer.zero_grad() # clear gradients for this training step 330 | loss.backward() # backpropagation, compute gradients 331 | optimizer.step() # apply gradients 332 | 333 | if step % 100 == 0: 334 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 335 | 336 | # 337 | # if ep % 50 == 0: 338 | # evaluation(ep, total_text, total_image, autoencoder, vocab, args) 339 | 340 | #fine-tune 341 | optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.1*args.lr) 342 | loss_func = nn.MSELoss() 343 | 344 | for ep in range(args.epoch): 345 | ep += 1 346 | for step, ind in enumerate(train_loader): 347 | if args.gpu > -1: 348 | batch_text = Variable(text[ind].view(-1, args.text_dim).cuda(args.gpu)) # batch x, shape (batch, 300) 349 | batch_image = Variable( 350 | image[ind].view(-1, args.image_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 351 | batch_sound = Variable( 352 | sound[ind].view(-1, args.sound_dim).cuda(args.gpu)) # batch y, shape (batch, 128) 353 | batch_brain = Variable( 354 | brain_multi[ind].cuda(args.gpu)) # batch x, shape (batch, 300) 355 | else: 356 | batch_text = Variable(text[ind].view(-1, args.text_dim)) # batch x, shape (batch, 300) 357 | batch_image = Variable(image[ind].view(-1, args.image_dim)) # batch y, shape (batch, 128) 358 | batch_sound = Variable(sound[ind].view(-1, args.sound_dim)) # batch y, shape (batch, 128) 359 | batch_brain = Variable(brain_multi[ind]) 360 | 361 | decoded_text, decoded_image, decoded_brain, decoded_sound, _ = autoencoder(batch_text, batch_image, 362 | batch_sound) 363 | # loss = loss_func(decoded_text, batch_text) + loss_func(decoded_image, batch_image) + loss_func(decoded_brain, batch_brain) # mean square error 364 | loss = loss_func(decoded_brain, batch_brain) 365 | optimizer.zero_grad() # clear gradients for this training step 366 | loss.backward() # backpropagation, compute gradients 367 | optimizer.step() # apply gradients 368 | 369 | if step % 100 == 0: 370 | print 'Epoch: ', ep, '| train loss: %.4f' % loss.data[0] 371 | 372 | if ep % 50 == 0: 373 | evaluation(ep, total_text, total_image, total_sound, autoencoder, vocab, args) -------------------------------------------------------------------------------- /evaluation/association.dev.b.txt: -------------------------------------------------------------------------------- 1 | burner wood 0.403 2 | bare foot 0.123 3 | mirror reflect 0.611 4 | scarf fashion 0.362 5 | single space 0.085 6 | disbelieve distrust 0.254 7 | endure pain 1.537 8 | circus freak 0.112 9 | chewy cookie 0.14 10 | linux ubuntu 0.367 11 | reference research 0.254 12 | bleep bloop 0.171 13 | latter ladder 0.887 14 | lies evil 0.462 15 | dedication service 0.263 16 | photography lens 0.138 17 | peacock colorful 0.347 18 | display work 0.14 19 | with using 0.122 20 | blond pretty 0.204 21 | obtain buy 0.701 22 | cuss stubborn 0.319 23 | antique vase 0.322 24 | uptight stingy 0.251 25 | blessed grace 0.209 26 | evident proof 0.324 27 | ivory ebony 0.45 28 | macho nacho 0.817 29 | fax beep 0.108 30 | hallway entrance 0.357 31 | hopeful hopeless 0.289 32 | shield zelda 0.109 33 | suggestion recommendation 0.319 34 | affect hurt 0.176 35 | incentive motivation 1.631 36 | modest dress 0.323 37 | stroll go 0.135 38 | stern strong 0.343 39 | unpredictable wild 1.02 40 | centipede crawly 0.163 41 | say voice 0.19 42 | landfill smelly 0.14 43 | beard grey 0.124 44 | spin spun 0.354 45 | mister male 0.738 46 | amoeba paramecium 0.449 47 | trauma emergency 0.69 48 | mistake incorrect 0.278 49 | clique unfriendly 0.196 50 | expression freedom 0.365 51 | endive bitter 1.332 52 | breed kind 0.279 53 | outrage scandal 1.058 54 | burglary robber 0.897 55 | doorstep black 0.136 56 | parchment skin 0.201 57 | linger loiter 1.037 58 | dwell reside 0.479 59 | balance equilibrium 0.777 60 | merchant vendor 0.11 61 | improve enhance 0.291 62 | dimension line 0.148 63 | query inquiry 0.36 64 | monotonous same 0.77 65 | exploitation trafficking 0.275 66 | shelves store 0.319 67 | earnest hardworking 0.611 68 | intellectual cerebral 0.131 69 | diplomat peace 0.291 70 | urgent speed 0.188 71 | worst ugly 0.159 72 | godly heaven 0.319 73 | analysis doctor 0.386 74 | import food 0.261 75 | compose letter 0.303 76 | environment animals 0.138 77 | whether choose 0.149 78 | sound loud 1.75 79 | axis maths 0.154 80 | stability steady 0.463 81 | sock hole 0.33 82 | fare price 0.941 83 | tiara bling 0.081 84 | saliva hungry 0.166 85 | hard durable 0.165 86 | needy child 0.211 87 | soil dirty 0.946 88 | fuck you 1.493 89 | distinguished professor 1.131 90 | musty wet 0.506 91 | reading railroad 0.322 92 | gold jewelry 0.947 93 | liberal politics 1.559 94 | air music 0.157 95 | facts true 0.536 96 | hissing sound 0.655 97 | ghostly eerie 0.448 98 | scope rifle 0.278 99 | truly scrumptious 0.139 100 | circular roundabout 0.187 101 | capability skilled 0.186 102 | bleak dim 0.145 103 | knife cutting 0.119 104 | saviour christianity 0.226 105 | shore shell 0.318 106 | access granted 0.6 107 | sanity rational 0.154 108 | haha sarcasm 0.156 109 | nutty silly 0.513 110 | taxes april 0.173 111 | sand glass 0.73 112 | shell home 0.145 113 | veggie peas 0.129 114 | substitute alternative 0.167 115 | snotty snobby 0.149 116 | peer group 0.79 117 | autonomy country 0.219 118 | fraud charlatan 0.132 119 | management executive 0.554 120 | kangaroo jumping 0.236 121 | filling stuffed 0.18 122 | guilt sad 0.909 123 | marrow white 0.216 124 | insane mad 1.702 125 | lacking incomplete 0.772 126 | quail cower 0.215 127 | strict parent 0.924 128 | fortress strong 0.663 129 | paperwork school 0.426 130 | barmaid apron 0.639 131 | chore bore 0.201 132 | container lunch 0.288 133 | infidelity affair 0.423 134 | ecological natural 0.145 135 | growl wolf 0.443 136 | range shooting 0.552 137 | merchant store 1.188 138 | bravado bluster 0.345 139 | numb sad 0.497 140 | disallow permission 0.551 141 | sermon religion 0.403 142 | usage purpose 0.6 143 | crying tearing 0.107 144 | signs portents 0.386 145 | brunette woman 0.883 146 | effect sound 0.246 147 | bullshit untrue 0.205 148 | mommy america 0.104 149 | optional mandatory 0.772 150 | anticipate ready 0.273 151 | arrival journey 0.117 152 | popeye olive 1.307 153 | flop water 0.152 154 | reason mind 0.163 155 | funky afro 0.148 156 | board ship 0.19 157 | breezeway tunnel 0.579 158 | atom nucleus 0.936 159 | amen end 0.778 160 | recline comfort 0.284 161 | pokemon pokeball 0.22 162 | universe cosmos 0.477 163 | decompose worm 0.138 164 | stocking socks 0.625 165 | spade soil 0.089 166 | briefcase luggage 0.173 167 | grunge youth 0.146 168 | stones beach 0.241 169 | campaign election 1.037 170 | volt current 0.362 171 | outside fresh 0.178 172 | arrogant conceited 0.219 173 | uncertainty certainty 0.551 174 | pageant christmas 0.244 175 | rider motorbike 0.148 176 | slap whack 0.098 177 | polar icecap 0.076 178 | damp clammy 0.17 179 | manufacturer cars 0.226 180 | functional purpose 0.266 181 | furry cuddly 0.216 182 | home heart 1.099 183 | series numbers 0.667 184 | bagpipes skirt 0.081 185 | sprinkle glitter 0.201 186 | surmount conquer 0.404 187 | leisure activity 0.166 188 | keep own 0.423 189 | chocolate tasty 0.112 190 | oblivion beyond 0.224 191 | palindrome forwards 0.936 192 | cannabis illegal 0.162 193 | raspberries strawberries 1.265 194 | rebellion fight 1.933 195 | messy chaos 0.216 196 | swan graceful 0.675 197 | dream cloud 0.556 198 | arriving going 0.257 199 | quiz pencil 0.091 200 | except besides 0.221 201 | weekday tuesday 0.955 202 | wasteful irresponsible 0.242 203 | haven home 1.63 204 | indeed so 0.221 205 | compulsion habit 0.522 206 | stealth dark 0.301 207 | representation no 0.445 208 | blanket safe 0.16 209 | viking history 0.139 210 | prison criminal 0.432 211 | cranberry pie 0.085 212 | beaker beak 0.107 213 | mostly usually 1.074 214 | killer prison 0.154 215 | full overflowing 0.12 216 | learning curve 0.288 217 | entertain party 1.963 218 | gain achieve 0.198 219 | convince believe 0.412 220 | trigonometry lines 0.073 221 | intoxicated silly 0.093 222 | healthy sick 0.631 223 | superb brilliant 0.429 224 | american war 0.378 225 | bronze copper 0.74 226 | opinion argument 0.594 227 | obscene ugly 0.351 228 | occult mysterious 0.286 229 | inefficient waste 0.493 230 | resilient sturdy 0.462 231 | green kermit 0.257 232 | bribe corruption 0.586 233 | astronaut pilot 0.123 234 | gloves finger 0.117 235 | creased line 0.227 236 | reddish hue 0.239 237 | field hay 0.327 238 | potential can 0.282 239 | tackle work 0.399 240 | keg fraternity 0.218 241 | haze misty 0.208 242 | lad good 0.099 243 | mechanism action 0.162 244 | facet angle 0.639 245 | colorful flowers 0.169 246 | trip holiday 0.747 247 | prosper rich 1.482 248 | harley motorbike 1.068 249 | abbey redwall 0.206 250 | posture stance 1.099 251 | bakery warm 0.277 252 | steer driving 0.284 253 | touching skin 0.78 254 | low temperature 0.118 255 | sober straight 0.138 256 | bush leaves 0.536 257 | enclosure animal 0.335 258 | trouble crime 0.207 259 | aboriginal first 0.157 260 | shop clothes 0.496 261 | rays x-rays 0.365 262 | ceo president 0.51 263 | wail weep 0.398 264 | lighter zippo 0.18 265 | bad bone 0.138 266 | come orgasm 0.23 267 | hurdle olympics 0.123 268 | ferocious loud 0.098 269 | doughnut policeman 0.22 270 | tread stairs 0.14 271 | lubricate grease 0.893 272 | baggy pockets 0.152 273 | cart wagon 0.802 274 | stormy dark 1.287 275 | obvious clearly 0.19 276 | choices make 0.2 277 | skin dry 0.891 278 | loud raucous 0.169 279 | orientation location 0.231 280 | moisture sweat 0.351 281 | resolve solve 1.745 282 | spending cost 0.299 283 | issues things 0.19 284 | vulture death 1.117 285 | tight hole 0.25 286 | poem write 0.133 287 | extraordinary ordinary 0.79 288 | evident court 0.146 289 | piece whole 0.114 290 | permanent stuck 0.526 291 | screen test 0.322 292 | placenta yuck 0.089 293 | sovereign ruler 0.947 294 | suite honeymoon 0.14 295 | cripple weak 0.215 296 | weekend friends 0.116 297 | angle angel 0.973 298 | sticks brown 0.704 299 | butcher apron 0.475 300 | vile gross 0.907 301 | actually seriously 0.219 302 | embrace accept 0.9 303 | stuffed chicken 0.676 304 | letter words 0.095 305 | italian meatball 0.164 306 | diarrhea flu 0.133 307 | cost amount 0.498 308 | expensive pricey 0.714 309 | honor courage 0.489 310 | vagabond travel 0.283 311 | pimple red 1.229 312 | register list 0.319 313 | sunrise sky 0.182 314 | hostile dangerous 0.685 315 | survivor strong 1.5 316 | elderly young 0.128 317 | british welsh 0.143 318 | envy sin 0.473 319 | myself ego 0.335 320 | grocery money 0.31 321 | bookshop love 0.162 322 | visualize mind 0.335 323 | scallop butter 0.159 324 | trapped locked 0.643 325 | boot feet 0.396 326 | rubbish bullshit 0.26 327 | nerve axon 0.384 328 | progressive politics 0.883 329 | lake house 0.233 330 | stock yard 0.449 331 | balloon red 1.694 332 | tug hard 0.193 333 | dresser drawer 1.299 334 | opulent showy 0.302 335 | righteousness sword 0.275 336 | ridiculous outlandish 0.239 337 | fluent fast 0.275 338 | script words 0.596 339 | obama democrat 0.726 340 | omen damien 0.329 341 | prone prune 0.289 342 | collector hobbyist 0.229 343 | greasy fried 0.102 344 | boob tits 0.26 345 | depending reliable 0.352 346 | facade wall 0.396 347 | collector hoarder 1.544 348 | flamboyant loud 0.714 349 | liar cheater 0.283 350 | broom floor 0.135 351 | smell stinky 0.106 352 | warning stop 1.397 353 | buttons coat 0.765 354 | survivor hero 0.226 355 | psychotherapist crazy 0.73 356 | gather berry 0.173 357 | legion legionnaire 0.207 358 | scapegoat goat 1.0 359 | dam barrier 0.132 360 | arrival train 0.831 361 | outside sun 0.477 362 | core inside 0.246 363 | hold fast 0.196 364 | poetic prose 0.283 365 | outfit matching 0.429 366 | inspector food 0.235 367 | anywhere places 0.117 368 | mama me 0.106 369 | humanity civilization 0.397 370 | shrine god 1.071 371 | taco salsa 0.572 372 | struggle overcome 0.178 373 | reviewer magazine 0.271 374 | equilibrium physics 0.216 375 | confession lie 0.226 376 | fragrance aroma 0.745 377 | police help 0.434 378 | abnormal psychology 0.704 379 | driver license 0.205 380 | fallen disgraced 0.296 381 | thief theft 0.271 382 | behavior naughty 0.176 383 | sly deceitful 0.128 384 | marine fish 0.481 385 | capita economics 0.429 386 | shoulder bone 0.308 387 | dislike disgust 0.535 388 | poll politician 0.186 389 | heat heater 0.228 390 | violation wrong 0.485 391 | blink tears 0.159 392 | morbid humor 0.154 393 | dab drop 0.275 394 | sophisticated learned 0.209 395 | until time 1.086 396 | spice sugar 0.726 397 | eggy oval 0.275 398 | mr husband 0.698 399 | brandy whisky 0.294 400 | crunchy crispy 0.84 401 | handsome me 0.327 402 | smooth skin 0.913 403 | tobacco gross 0.231 404 | pole poland 0.203 405 | bribery underhanded 0.275 406 | comma break 0.119 407 | bathrobe blue 0.206 408 | versus opposed 0.477 409 | power source 0.16 410 | intervene save 0.266 411 | devout priest 0.108 412 | cigars tobacco 1.167 413 | new exciting 0.315 414 | intelligence intellect 0.194 415 | salute wave 0.116 416 | destroy war 0.87 417 | circumference ball 0.158 418 | cornucopia lots 0.178 419 | judgement lawyer 0.146 420 | song love 0.17 421 | cain walking 0.275 422 | shorten lengthen 1.329 423 | growl scary 0.161 424 | purpose determined 0.128 425 | oats healthy 0.403 426 | flour kitchen 0.076 427 | chains alice 0.25 428 | lava soap 0.293 429 | theme motif 0.806 430 | organize file 0.544 431 | shackle cuff 0.281 432 | slippers soft 0.626 433 | brace support 1.184 434 | croissant buttery 0.704 435 | sporty car 1.099 436 | tranquility restful 0.068 437 | tumble trip 0.645 438 | vaccine protection 0.204 439 | comforting gentle 0.128 440 | cock sex 0.442 441 | borrow owe 0.184 442 | primitive cave 0.335 443 | negate opposite 0.714 444 | monocle eyeglass 0.254 445 | membership union 0.128 446 | rash spots 0.171 447 | marvelous amazing 1.208 448 | renounce surrender 0.497 449 | surround envelop 0.494 450 | coordinate position 0.239 451 | peasy soup 0.248 452 | renegade rogue 0.308 453 | housing family 0.19 454 | photocopy reproduce 0.103 455 | relaxed content 0.162 456 | chemical scientist 0.169 457 | ladybug small 0.243 458 | comfort calm 0.131 459 | den lion 1.215 460 | legal drugs 0.151 461 | glamorous fake 0.173 462 | eastern japan 0.459 463 | patriot act 0.755 464 | manufacturer creator 0.312 465 | proposal knee 0.18 466 | aviation bird 0.42 467 | headband sports 0.351 468 | caravan parade 0.18 469 | rental loan 0.277 470 | daunting dark 0.237 471 | invention new 1.981 472 | pest nuisance 0.507 473 | midnight sky 0.102 474 | agent money 0.176 475 | smash glass 0.741 476 | electron cloud 0.103 477 | random words 0.439 478 | defrost scraper 0.084 479 | shrink dink 0.146 480 | hassle annoy 1.024 481 | rocker baby 0.84 482 | cost dollars 0.266 483 | corny corn 0.748 484 | gaming nerds 0.313 485 | demand anger 0.348 486 | crevice canyon 0.492 487 | wax museum 0.369 488 | hung horse 0.505 489 | teenage school 1.143 490 | molasses treacle 0.404 491 | whiskers mustache 0.351 492 | sub substitute 0.264 493 | risk scary 0.259 494 | duet together 0.8 495 | police badge 0.117 496 | starch white 0.575 497 | spatula pancake 0.367 498 | anatomy biology 0.921 499 | public toilet 0.351 500 | hip square 0.143 501 | diver sea 2.082 502 | belong group 2.131 503 | supervise boss 2.326 504 | democrat republican 3.204 505 | draw picture 2.061 506 | mechanism machine 3.277 507 | clue game 2.785 508 | straightforward honest 2.066 509 | cognition thought 2.169 510 | warn danger 3.249 511 | carving turkey 2.497 512 | archeology dig 2.306 513 | guidance help 2.959 514 | panther black 3.861 515 | stars night 2.18 516 | loving caring 3.394 517 | dramatic play 2.367 518 | profile picture 2.448 519 | marching army 2.053 520 | adjacent next 3.563 521 | rodent mouse 3.754 522 | adversary opponent 2.417 523 | math numbers 2.943 524 | palindrome backwards 3.504 525 | father dad 3.659 526 | christian religion 2.794 527 | shoulder arm 2.965 528 | fireplace warm 2.193 529 | sesame street 3.777 530 | mast sail 3.16 531 | cremate death 2.553 532 | deed good 3.393 533 | iffy maybe 3.102 534 | bet wager 2.977 535 | pioneer first 3.025 536 | glittery sparkly 2.184 537 | narrow thin 3.301 538 | insufficient funds 2.576 539 | boom explosion 3.035 540 | trillion billion 3.527 541 | slushy ice 2.657 542 | accept take 2.383 543 | chivalry knight 3.844 544 | rhythmic music 3.825 545 | cease desist 3.249 546 | me you 3.987 547 | copyright book 2.046 548 | loft apartment 2.598 549 | wholesome healthy 3.183 550 | cannabis marijuana 2.286 551 | competence ability 2.938 552 | spank hit 2.763 553 | booty pirate 2.362 554 | slender skinny 2.678 555 | criticism negative 2.161 556 | athletics sports 3.666 557 | pilgrimage journey 2.794 558 | annoying irritating 2.437 559 | february winter 2.092 560 | fashion style 2.188 561 | thrive live 2.266 562 | definition meaning 3.887 563 | ruler king 3.387 564 | picket protest 2.134 565 | transportation bus 2.969 566 | shin bone 3.121 567 | mesh wire 2.302 568 | either neither 3.186 569 | rural country 3.625 570 | palace king 3.157 571 | income salary 2.002 572 | pier water 2.294 573 | sincerity honesty 3.014 574 | flint fire 3.386 575 | hefty big 2.769 576 | phase time 2.434 577 | screws screwdriver 2.133 578 | dubious doubtful 3.173 579 | disciple jesus 3.487 580 | idiot fool 2.303 581 | intrinsic innate 2.001 582 | citation reference 2.34 583 | trail path 2.668 584 | conscious awake 3.844 585 | split banana 3.087 586 | crown king 3.844 587 | vulnerable weak 2.593 588 | jacket coat 3.432 589 | sailors sea 2.072 590 | helping hand 3.102 591 | yield stop 3.031 592 | vehicles car 3.098 593 | delta river 3.664 594 | valentine heart 3.106 595 | immature young 3.375 596 | bubble pop 2.145 597 | photos camera 3.642 598 | blueberry fruit 2.319 599 | whining child 2.451 600 | strawberry fruit 2.534 601 | install software 3.786 602 | soapy bubbles 2.404 603 | chore work 3.824 604 | level even 3.434 605 | wiggle worm 2.722 606 | unwise dumb 2.29 607 | diabetes insulin 2.359 608 | uptight tense 2.481 609 | mince chop 2.229 610 | tub water 2.51 611 | chance luck 2.407 612 | bondage sex 2.652 613 | justice court 2.482 614 | dishwasher clean 2.216 615 | rugby sport 2.395 616 | emotions sad 2.112 617 | honeymoon marriage 2.746 618 | tow pull 2.668 619 | gypsum mineral 2.41 620 | hiker mountain 2.428 621 | couple two 3.751 622 | usurp king 3.878 623 | cheating test 2.253 624 | pinch hurt 2.823 625 | infrequent rare 3.031 626 | recipe food 3.533 627 | famished starving 3.066 628 | noodles pasta 2.589 629 | eight number 2.004 630 | chicago windy 2.707 631 | devout christian 2.929 632 | territory area 2.895 633 | portable small 2.181 634 | surely definitely 2.308 635 | nazi hitler 3.449 636 | icing frosting 2.357 637 | backyard garden 2.41 638 | tragedy sad 2.571 639 | dimension space 2.477 640 | parachute plane 2.192 641 | centimeter inch 2.325 642 | hallmark cards 3.79 643 | execution death 3.797 644 | puny weak 2.838 645 | loss death 3.383 646 | latex gloves 2.155 647 | stapler office 2.708 648 | latex rubber 2.914 649 | pit hole 3.214 650 | pets cats 2.797 651 | oval circle 2.484 652 | stripes zebra 3.2 653 | tough hard 3.926 654 | depression sadness 2.617 655 | cemetery death 2.858 656 | gigantic big 2.637 657 | humid hot 2.781 658 | sight eyes 2.719 659 | toffee candy 2.731 660 | seeing believing 2.168 661 | graphics design 3.308 662 | rind orange 3.365 663 | microsoft windows 3.006 664 | manufacturing factory 2.634 665 | bad evil 2.034 666 | idiom language 2.561 667 | organise clean 2.118 668 | band music 3.944 669 | calf baby 2.211 670 | sincere honest 2.873 671 | today now 3.717 672 | ready prepared 3.238 673 | loan bank 2.106 674 | twig tree 3.712 675 | projector film 2.075 676 | zucchini vegetable 2.963 677 | uh um 2.028 678 | iceberg cold 2.8 679 | colonel military 2.507 680 | surface top 2.58 681 | stunt man 2.017 682 | outline paper 2.066 683 | took take 2.342 684 | is be 2.225 685 | talking speaking 2.106 686 | point sharp 2.017 687 | monastery monks 2.298 688 | stopper bottle 2.584 689 | razor blade 3.129 690 | justify explain 2.078 691 | curtains window 2.683 692 | illustration drawing 3.951 693 | understand know 3.051 694 | deny lie 2.092 695 | genitals penis 3.611 696 | cheque bank 2.313 697 | execute do 2.196 698 | receive give 3.283 699 | counsel advice 2.465 700 | jab punch 2.34 701 | glimpse look 2.109 702 | dialogue talk 2.341 703 | isolated island 2.978 704 | town village 2.399 705 | mourning sad 2.738 706 | escort prostitute 2.061 707 | shield sword 2.101 708 | cannibal human 2.159 709 | brothers sisters 3.823 710 | racing car 3.728 711 | loser sad 2.092 712 | intoxicate alcohol 3.139 713 | clasp hold 2.396 714 | frisky playful 2.363 715 | conceited vain 3.165 716 | accord honda 2.267 717 | superlative best 3.919 718 | stance position 3.069 719 | lineage family 3.331 720 | exploit use 3.403 721 | lubricant oil 3.93 722 | munch food 2.014 723 | version type 3.311 724 | asphalt black 2.155 725 | stamps letter 2.25 726 | every one 2.02 727 | pocketbook money 2.726 728 | panic fear 2.495 729 | finite limited 2.046 730 | demand ask 2.413 731 | delight joy 2.076 732 | hectic crazy 2.393 733 | excuse reason 2.763 734 | chief indian 3.272 735 | their there 2.81 736 | jump leap 2.674 737 | imply infer 2.111 738 | corn-beef sandwich 2.129 739 | pageant beauty 3.798 740 | sing music 2.102 741 | philanthropy charity 2.263 742 | sprout grow 2.265 743 | centimeter ruler 2.25 744 | doorknob door 2.357 745 | rectangular shape 2.246 746 | advil headache 2.871 747 | reduction smaller 2.145 748 | pro professional 3.119 749 | exit door 2.417 750 | analog clock 2.319 751 | swan bird 2.931 752 | passenger car 2.509 753 | hatchet axe 3.867 754 | transportation train 2.625 755 | urinate toilet 2.367 756 | banner flag 3.972 757 | helpful kind 3.24 758 | retarded stupid 2.29 759 | nautical sea 3.533 760 | shaman magic 2.205 761 | conditioner shampoo 3.453 762 | lumpy bumpy 2.606 763 | publication magazine 2.6 764 | bigger larger 2.974 765 | imitation fake 3.277 766 | bagel lox 2.188 767 | have possess 2.593 768 | lessen reduce 2.558 769 | suggestion advice 2.082 770 | reserved shy 2.158 771 | candle wax 3.337 772 | aisle wedding 3.323 773 | selfish greedy 2.605 774 | wallet leather 2.086 775 | legal law 2.688 776 | remove take 3.55 777 | improvement home 2.984 778 | guys men 2.498 779 | germ sick 2.018 780 | dungarees pants 2.933 781 | ammo guns 2.559 782 | arrival departure 2.099 783 | diarrhea sick 2.084 784 | negative bad 2.461 785 | hour minute 2.914 786 | telephone call 3.545 787 | sweep clean 2.843 788 | cherry fruit 2.653 789 | pizza cheese 3.702 790 | rub massage 3.45 791 | disciple follower 3.571 792 | lubrication sex 3.121 793 | gravestone cemetery 2.485 794 | spectacle show 2.595 795 | abnormal weird 2.813 796 | kneel pray 3.209 797 | select pick 3.303 798 | replacement substitute 2.637 799 | motor engine 3.158 800 | satisfactory good 2.963 801 | paperwork boring 2.504 802 | lettuce salad 3.477 803 | verandah porch 3.734 804 | defeat loss 2.262 805 | gopher animal 2.555 806 | tummy belly 2.831 807 | academic school 2.916 808 | motive crime 2.539 809 | curry spicy 2.319 810 | fortune money 3.499 811 | fictional book 2.27 812 | dairy cow 3.604 813 | christianity god 2.082 814 | dopamine drug 2.859 815 | chains metal 2.383 816 | intuition feeling 2.451 817 | mom mother 2.63 818 | printing paper 3.066 819 | aesthetics beauty 3.955 820 | confession church 2.652 821 | bitter sweet 3.339 822 | decimal point 3.277 823 | incline slope 2.246 824 | dungeon dragon 2.726 825 | variable changing 2.233 826 | classic old 2.805 827 | gas car 3.016 828 | inquiry ask 2.629 829 | situate locate 2.214 830 | superstar celebrity 3.12 831 | subjective opinion 2.774 832 | white black 3.926 833 | socks feet 3.243 834 | groin crotch 2.087 835 | iq test 3.137 836 | saturated fat 2.053 837 | bravo applause 2.196 838 | bolster support 2.879 839 | retainer braces 2.053 840 | graceful ballet 2.108 841 | chap fellow 2.511 842 | ray light 2.435 843 | pennies copper 2.687 844 | transfer money 2.145 845 | trek journey 2.331 846 | calendar year 3.037 847 | desperation need 2.383 848 | bustle hustle 3.749 849 | seafood lobster 2.165 850 | funk music 3.331 851 | dromedary hump 2.895 852 | edge cliff 2.864 853 | snapshot picture 3.155 854 | wanting needing 2.132 855 | heard sound 2.522 856 | scarecrow straw 2.072 857 | diagram picture 2.145 858 | beyond far 2.383 859 | contraception condom 3.465 860 | persuasive convincing 2.2 861 | stoplight traffic 2.763 862 | irrational crazy 3.987 863 | egg chicken 2.497 864 | fungus mold 2.035 865 | fee money 3.533 866 | position place 3.916 867 | prince princess 2.509 868 | plush toy 2.238 869 | proportion size 3.373 870 | hospital sick 2.239 871 | jiggle wiggle 2.826 872 | batteries power 3.539 873 | encyclopedia book 2.102 874 | warning caution 2.325 875 | dopey stupid 2.3 876 | individual person 3.969 877 | xerox paper 2.014 878 | uniform military 2.073 879 | meek shy 2.431 880 | sucker fool 2.005 881 | crocodile alligator 3.826 882 | frosty snowman 2.464 883 | numeral roman 3.862 884 | layout plan 2.866 885 | closure end 3.097 886 | overbearing mother 2.657 887 | unavailable busy 2.258 888 | agitate shake 2.001 889 | go-cart fun 2.788 890 | slight small 3.882 891 | insipid bland 2.029 892 | prophet jesus 2.383 893 | transfer move 3.763 894 | traffic cars 2.903 895 | haha funny 2.995 896 | lab coat 2.963 897 | bran cereal 3.341 898 | grammar english 2.435 899 | oyster pearl 2.045 900 | angels heaven 2.837 901 | evening night 3.229 902 | no yes 3.946 903 | cleanse wash 2.563 904 | lobster red 3.435 905 | continue proceed 2.463 906 | disperse crowd 2.229 907 | parka jacket 2.223 908 | youth child 2.537 909 | cranberry sauce 2.145 910 | swan white 2.666 911 | accurate precise 3.277 912 | cosmopolitan city 3.716 913 | unethical immoral 2.434 914 | trunk car 3.006 915 | supreme court 2.004 916 | submarine water 2.032 917 | fully completely 3.906 918 | parent child 2.512 919 | enormous gigantic 2.135 920 | dew wet 2.276 921 | send letter 2.359 922 | kettle black 2.219 923 | weekly monthly 2.476 924 | easy hard 3.506 925 | gag choke 3.016 926 | clip hair 2.605 927 | twins identical 2.231 928 | fluffy soft 2.254 929 | unity one 3.11 930 | airline plane 3.358 931 | cheek face 3.189 932 | nightmare dream 3.445 933 | dandruff hair 3.333 934 | nine number 2.6 935 | bull shit 2.739 936 | apart separate 2.789 937 | january february 2.718 938 | caution danger 2.42 939 | patterned fabric 2.296 940 | fail lose 2.342 941 | exhaustive tired 2.09 942 | clown scary 2.181 943 | reptile lizard 3.635 944 | crooked bent 2.606 945 | onset beginning 3.292 946 | perhaps possibly 2.098 947 | newton fig 2.008 948 | foresight future 3.051 949 | expanse wide 3.869 950 | curse swear 3.452 951 | curious cat 2.326 952 | assign give 2.474 953 | viable possible 3.445 954 | drum beat 2.437 955 | processor food 3.241 956 | muffin blueberry 2.27 957 | dim light 3.597 958 | absurd ridiculous 3.559 959 | literature books 3.635 960 | death life 2.217 961 | calender month 2.724 962 | creed belief 2.916 963 | mingle mix 2.319 964 | barn farm 3.079 965 | forum discussion 2.046 966 | revolt uprising 2.09 967 | grab bag 2.219 968 | amateur professional 2.114 969 | tasty delicious 2.35 970 | coaster roller 3.666 971 | painful ouch 2.082 972 | coca-cola soda 2.69 973 | utter speak 3.373 974 | rain wet 3.171 975 | paranoia fear 3.689 976 | city town 3.616 977 | fraction math 2.763 978 | superb excellent 3.255 979 | fancy dress 2.022 980 | gather collect 2.522 981 | crease iron 2.171 982 | jumbo elephant 3.142 983 | alpha beta 3.63 984 | trench coat 2.234 985 | biology life 2.357 986 | by near 2.537 987 | branch bank 2.048 988 | none nothing 2.64 989 | airport plane 3.067 990 | likely probably 2.398 991 | extension hair 2.229 992 | hippie hair 2.329 993 | renounce deny 2.145 994 | sucker lollipop 2.626 995 | manic depressive 2.451 996 | birth death 2.099 997 | testimony court 3.564 998 | canoe paddle 2.273 999 | rampage destruction 2.042 1000 | intention good 2.703 1001 | romantic love 4.386 1002 | hurdle jump 5.328 1003 | grateful thankful 4.599 1004 | tribune newspaper 4.258 1005 | depart leave 5.314 1006 | predicament problem 4.002 1007 | disappoint sad 5.02 1008 | either or 5.615 1009 | sound noise 4.271 1010 | slack loose 4.607 1011 | hanger clothes 4.246 1012 | employment job 5.481 1013 | angry mad 4.587 1014 | remaining left 5.2 1015 | yearly annual 4.374 1016 | again repeat 4.807 1017 | carpentry wood 4.875 1018 | bees honey 5.261 1019 | approach near 5.543 1020 | hands fingers 4.271 1021 | crow black 4.742 1022 | images pictures 4.841 1023 | stocking christmas 4.637 1024 | pecan nut 5.28 1025 | crow bird 4.143 1026 | reflective mirror 5.217 1027 | caboose train 5.722 1028 | gull bird 4.801 1029 | wimp weak 4.041 1030 | key lock 4.839 1031 | title book 4.93 1032 | circular round 5.345 1033 | ladle spoon 4.077 1034 | tools hammer 4.028 1035 | pie apple 4.596 1036 | astronomy stars 5.395 1037 | slip fall 5.255 1038 | monarchy king 4.963 1039 | satchel bag 5.858 1040 | siding house 4.729 1041 | bouillon soup 4.387 1042 | hubris pride 4.69 1043 | dam water 4.266 1044 | cocky arrogant 4.039 1045 | iris flower 4.691 1046 | neurology brain 5.303 1047 | gut stomach 4.004 1048 | scissors cut 4.922 1049 | juice orange 4.932 1050 | entire whole 4.44 1051 | flowing water 4.123 1052 | converse talk 4.786 1053 | online internet 5.251 1054 | in out 5.889 1055 | eating food 5.943 1056 | bass fish 4.132 1057 | samurai sword 4.563 1058 | missing lost 4.062 1059 | out in 4.316 1060 | hen chicken 5.078 1061 | driveway car 4.759 1062 | dose medicine 5.677 1063 | fork spoon 4.448 1064 | pouch kangaroo 4.613 1065 | clique group 4.954 1066 | dollar money 4.806 1067 | pantry food 5.139 1068 | procure get 4.559 1069 | orange fruit 4.061 1070 | read book 4.722 1071 | nacho cheese 5.419 1072 | woods forest 4.102 1073 | dream sleep 5.681 1074 | portly fat 5.946 1075 | numb cold 4.101 1076 | winter snow 4.614 1077 | rule law 4.067 1078 | conspiracy theory 4.55 1079 | frosting cake 5.023 1080 | librarian books 4.544 1081 | agitate annoy 4.123 1082 | row boat 5.286 1083 | snout nose 5.19 1084 | pester annoy 4.855 1085 | uncertain unsure 4.821 1086 | mattress sleep 4.264 1087 | jail prison 4.386 1088 | grant money 4.033 1089 | peasant poor 4.744 1090 | haunt ghost 5.966 1091 | boa snake 5.166 1092 | strange weird 4.825 1093 | egocentric selfish 5.083 1094 | commerce money 4.024 1095 | bolts nuts 4.366 1096 | yeah yes 4.575 1097 | careful cautious 4.43 1098 | adversary enemy 5.407 1099 | breeze wind 5.447 1100 | inn hotel 4.936 1101 | examination test 5.385 1102 | functional useful 4.908 1103 | glossy shiny 4.899 1104 | brake stop 5.772 1105 | refrigerator cold 4.644 1106 | pale white 4.896 1107 | entity being 4.544 1108 | mayhem chaos 4.578 1109 | snout pig 4.821 1110 | owl bird 4.001 1111 | genes dna 4.663 1112 | bark dog 5.597 1113 | rocker chair 5.166 1114 | ecstatic happy 5.451 1115 | lobby hotel 4.264 1116 | disposal garbage 5.293 1117 | greatest best 5.985 1118 | psalm bible 4.093 1119 | godliness cleanliness 4.053 1120 | west east 4.857 1121 | breakable glass 4.289 1122 | arduous hard 4.28 1123 | vessel ship 4.777 1124 | oasis desert 4.41 1125 | impose force 5.463 1126 | root tree 4.204 1127 | sandpaper rough 5.195 1128 | wicked witch 4.307 1129 | confrontation fight 4.637 1130 | vine wine 4.772 1131 | sooner later 4.431 1132 | obsolete old 5.62 1133 | contra against 4.609 1134 | postman mail 4.061 1135 | calculus math 5.017 1136 | squeak mouse 5.398 1137 | editorial newspaper 4.196 1138 | steering wheel 4.079 1139 | present gift 4.649 1140 | calorie food 4.157 1141 | strawberries red 4.429 1142 | speedy fast 5.999 1143 | partridge bird 5.471 1144 | surround sound 5.288 1145 | proximity close 4.254 1146 | equipment tools 4.395 1147 | shot gun 5.252 1148 | mower grass 4.47 1149 | harm hurt 4.921 1150 | nourish food 4.501 1151 | meatball spaghetti 4.596 1152 | emu bird 4.932 1153 | creature animal 4.486 1154 | esteem self 4.718 1155 | dilute water 4.835 1156 | software computer 4.747 1157 | tile floor 4.313 1158 | salutations greetings 5.957 1159 | gander goose 5.745 1160 | charisma charm 4.263 1161 | inverse opposite 5.465 1162 | punk rock 4.489 1163 | mars planet 5.166 1164 | pissed angry 4.297 1165 | leftover food 5.691 1166 | hello hi 4.744 1167 | coral reef 4.41 1168 | mildew mold 4.04 1169 | stale bread 5.04 1170 | neat tidy 4.293 1171 | bedtime sleep 4.508 1172 | fowl bird 4.668 1173 | jeans blue 4.267 1174 | personal private 4.437 1175 | solar sun 4.671 1176 | artificial fake 5.629 1177 | confide secret 5.212 1178 | demon devil 4.165 1179 | compact small 4.77 1180 | scrabble game 4.87 1181 | condom sex 5.116 1182 | riding horse 5.865 1183 | smirk smile 5.047 1184 | limb leg 4.478 1185 | nail hammer 4.803 1186 | district area 4.519 1187 | forgive forget 5.895 1188 | wag dog 4.703 1189 | anime cartoon 4.835 1190 | exposed naked 4.858 1191 | barracuda fish 5.951 1192 | argument fight 4.053 1193 | issues problems 4.647 1194 | jumpy nervous 4.153 1195 | pick choose 4.118 1196 | termites wood 4.344 1197 | haunting ghost 5.522 1198 | fluid water 4.539 1199 | debt money 4.869 1200 | haze fog 4.316 1201 | lass girl 5.36 1202 | potent strong 5.359 1203 | sugar sweet 5.22 1204 | wrinkles old 4.621 1205 | flawless perfect 5.592 1206 | thorough complete 4.842 1207 | mower lawn 5.639 1208 | seek find 5.625 1209 | setting place 4.008 1210 | caterpillar butterfly 4.068 1211 | bristle brush 5.551 1212 | dungarees jeans 4.312 1213 | numeral number 4.674 1214 | glee happy 4.41 1215 | serene calm 4.276 1216 | overweight fat 5.528 1217 | pullover sweater 4.138 1218 | huge big 4.298 1219 | mozzarella cheese 5.941 1220 | infuriate anger 4.087 1221 | stud horse 4.021 1222 | aboriginal native 4.145 1223 | underneath below 5.668 1224 | wear clothes 5.031 1225 | similarity same 4.435 1226 | drugstore pharmacy 4.17 1227 | stranger danger 4.343 1228 | prickly pear 4.012 1229 | humility humble 5.71 1230 | legume bean 4.035 1231 | supermarket food 4.398 1232 | fragrance perfume 5.01 1233 | bad good 4.189 1234 | hectic busy 4.231 1235 | population people 4.906 1236 | oar boat 4.875 1237 | compose music 4.968 1238 | conductor train 4.103 1239 | aphrodisiac sex 4.877 1240 | medicine doctor 4.41 1241 | superbowl football 5.136 1242 | opportunity chance 5.076 1243 | chef food 4.204 1244 | zit pimple 4.044 1245 | orgasm sex 5.63 1246 | condensation water 4.136 1247 | warrant arrest 5.2 1248 | bucket water 4.259 1249 | disciples jesus 4.892 1250 | quiz test 4.623 1251 | bazooka gun 5.137 1252 | counterfeit money 4.483 1253 | stroller baby 5.192 1254 | affirmative yes 4.883 1255 | filth dirt 4.26 1256 | bison buffalo 4.374 1257 | equation math 4.637 1258 | miniature small 4.41 1259 | oxygen air 4.637 1260 | inheritance money 5.491 1261 | education school 4.034 1262 | daring brave 4.95 1263 | junkie drugs 4.637 1264 | sky blue 5.477 1265 | push shove 4.79 1266 | victor winner 5.931 1267 | mellow yellow 5.366 1268 | caffeine coffee 5.626 1269 | snooze sleep 5.166 1270 | leech blood 4.165 1271 | lend borrow 4.79 1272 | cougar cat 4.383 1273 | smack hit 4.386 1274 | attorney lawyer 4.753 1275 | anal sex 5.043 1276 | landlord rent 4.309 1277 | muck mud 5.251 1278 | wrinkled old 4.957 1279 | content happy 5.687 1280 | well-being health 4.544 1281 | merry happy 4.251 1282 | tedious boring 4.829 1283 | macho man 4.995 1284 | sloppy messy 4.906 1285 | pun joke 4.29 1286 | mechanic car 4.204 1287 | bandicoot crash 4.572 1288 | consider think 5.664 1289 | quiver shake 4.016 1290 | barrier wall 4.115 1291 | feminism women 4.182 1292 | enforcement police 5.051 1293 | pluto planet 5.973 1294 | hushed quiet 5.491 1295 | kosher jewish 4.53 1296 | swabs cotton 4.992 1297 | scapegoat blame 5.326 1298 | sled snow 5.887 1299 | campsite tent 5.276 1300 | rose flower 4.064 1301 | inhabit live 4.968 1302 | lawnmower grass 5.687 1303 | disability wheelchair 5.593 1304 | resemble similar 4.425 1305 | dislike hate 5.457 1306 | digit finger 5.238 1307 | volcano lava 4.096 1308 | sail boat 5.627 1309 | situate place 5.273 1310 | gavel judge 5.223 1311 | billiards pool 4.185 1312 | skull head 4.053 1313 | display show 4.236 1314 | overflow water 5.646 1315 | vacant empty 5.786 1316 | maintain keep 4.626 1317 | macaroni cheese 4.995 1318 | manor house 5.556 1319 | owe money 5.193 1320 | shore beach 4.016 1321 | hostility anger 4.324 1322 | illicit illegal 4.873 1323 | amen prayer 4.539 1324 | stumble trip 5.192 1325 | transaction money 4.387 1326 | presence here 4.114 1327 | devotion love 5.471 1328 | polished shiny 4.119 1329 | whoops mistake 4.578 1330 | destruct destroy 4.738 1331 | eclipse sun 4.179 1332 | explode bomb 4.447 1333 | truce peace 5.271 1334 | null void 4.41 1335 | pause stop 4.553 1336 | considerate kind 4.619 1337 | ulcer stomach 4.85 1338 | expense cost 4.662 1339 | hiv aids 5.598 1340 | sequence order 4.728 1341 | bond james 4.053 1342 | terse short 5.201 1343 | twig branch 4.24 1344 | floss teeth 5.211 1345 | dough bread 4.16 1346 | melody song 4.748 1347 | compare contrast 5.166 1348 | rely depend 4.064 1349 | theology religion 5.337 1350 | vista view 5.484 1351 | creek water 4.275 1352 | profound deep 5.003 1353 | rouge red 4.732 1354 | daffodil flower 4.729 1355 | equivalent same 4.732 1356 | falcon bird 5.271 1357 | joker batman 4.753 1358 | leak water 4.165 1359 | trot horse 5.753 1360 | blackboard chalk 4.906 1361 | wrath anger 5.109 1362 | cassette tape 5.627 1363 | long short 4.204 1364 | procession parade 4.284 1365 | fondness love 4.226 1366 | banana yellow 4.988 1367 | restaurant food 5.679 1368 | salient relevant 4.578 1369 | handicapped wheelchair 5.269 1370 | unlock key 4.644 1371 | request ask 5.882 1372 | santa christmas 4.075 1373 | plural many 4.823 1374 | algebra math 4.929 1375 | can't wont 4.386 1376 | booger nose 5.066 1377 | sane insane 4.213 1378 | worse better 4.423 1379 | disrespect rude 5.78 1380 | breed dog 4.044 1381 | shrink small 4.512 1382 | laptop computer 5.955 1383 | surmount overcome 4.395 1384 | desert sand 4.013 1385 | sty pig 4.204 1386 | ransom money 5.736 1387 | refrain stop 5.524 1388 | cease stop 5.562 1389 | courageous brave 5.525 1390 | gorgeous beautiful 5.491 1391 | games fun 4.013 1392 | style fashion 5.373 1393 | pine tree 5.947 1394 | linger stay 5.047 1395 | era time 5.278 1396 | menu food 5.308 1397 | grime dirt 5.135 1398 | near far 4.6 1399 | iq intelligence 4.765 1400 | buck deer 4.873 1401 | digits numbers 5.044 1402 | rage anger 5.496 1403 | photo picture 4.84 1404 | anything everything 4.257 1405 | additionally also 4.197 1406 | retail store 4.099 1407 | winter cold 4.539 1408 | beta alpha 4.168 1409 | skater ice 4.246 1410 | parasol umbrella 5.959 1411 | sell buy 5.805 1412 | conclude finish 4.481 1413 | bits pieces 5.041 1414 | zest lemon 4.72 1415 | wealthy money 4.348 1416 | cuisine food 4.685 1417 | reform change 4.345 1418 | pitchfork hay 4.549 1419 | hull ship 5.348 1420 | mirror reflection 4.352 1421 | juggle balls 4.13 1422 | vine grape 4.519 1423 | father mother 4.103 1424 | dialect language 5.137 1425 | skit play 4.17 1426 | notify tell 4.163 1427 | nerdy glasses 4.013 1428 | inefficient slow 4.074 1429 | reproduction sex 4.553 1430 | consult ask 4.058 1431 | almonds nuts 4.298 1432 | weep cry 5.908 1433 | moron idiot 5.638 1434 | professor teacher 4.765 1435 | busted caught 4.618 1436 | greedy money 4.134 1437 | tomahawk indian 4.02 1438 | answer question 4.895 1439 | absorb sponge 4.395 1440 | cocoon butterfly 5.372 1441 | arm leg 4.388 1442 | struggle fight 4.467 1443 | thanksgiving turkey 4.284 1444 | windshield car 4.231 1445 | cautious careful 5.826 1446 | pottery clay 4.399 1447 | vulva vagina 5.258 1448 | drowning water 5.904 1449 | abbreviation short 4.627 1450 | payment money 5.447 1451 | gay happy 4.83 1452 | log wood 4.437 1453 | charged battery 4.087 1454 | insect bug 5.417 1455 | sponsor money 5.131 1456 | excise tax 4.863 1457 | cantaloupe melon 4.709 1458 | autonomy independence 4.07 1459 | superior better 4.618 1460 | accountant money 4.197 1461 | spreadsheet excel 4.778 1462 | haha laugh 4.45 1463 | triple three 5.358 1464 | prawn shrimp 4.621 1465 | rite passage 5.049 1466 | fore golf 5.255 1467 | grotesque ugly 4.759 1468 | gecko lizard 5.238 1469 | procreate sex 4.992 1470 | soar fly 4.674 1471 | nutcracker ballet 4.41 1472 | celebrate party 5.543 1473 | translucent clear 4.204 1474 | smut porn 4.392 1475 | opulent rich 5.906 1476 | loo bathroom 4.002 1477 | adjustment change 4.999 1478 | rest sleep 4.561 1479 | boob breast 4.835 1480 | consonant vowel 5.606 1481 | shrug shoulders 5.205 1482 | complain whine 4.363 1483 | novelty new 4.943 1484 | pretender fake 4.69 1485 | cuckoo bird 4.839 1486 | recline chair 4.248 1487 | cd music 5.248 1488 | stethoscope doctor 5.924 1489 | undress naked 4.031 1490 | migraine headache 5.798 1491 | cigars smoke 4.065 1492 | yesterday tomorrow 4.296 1493 | untrue false 4.913 1494 | us we 4.6 1495 | gherkins pickles 4.736 1496 | learner student 4.465 1497 | bail jail 4.872 1498 | spirituality religion 4.176 1499 | realise understand 4.41 1500 | bassinet baby 5.877 1501 | bunk bed 7.5714 1502 | whisker cat 6.8888 1503 | grubby dirty 6.9277 1504 | brie cheese 7.3473 1505 | newborn baby 6.9355 1506 | manager boss 6.1779 1507 | pleased happy 7.1443 1508 | udder cow 6.4623 1509 | blot ink 6.1296 1510 | nog egg 6.7922 1511 | depressing sad 7.6592 1512 | timid shy 6.208 1513 | tub bath 6.1375 1514 | poultry chicken 6.1888 1515 | exhaustion tired 7.2602 1516 | morose sad 6.4151 1517 | adored loved 6.7482 1518 | tote bag 7.9132 1519 | beagle dog 7.3791 1520 | commence begin 6.1725 1521 | sparrow bird 7.4001 1522 | ought should 7.296 1523 | locomotive train 6.8378 1524 | curly hair 7.5823 1525 | feline cat 7.2183 1526 | woof dog 7.2424 1527 | spotless clean 7.5945 1528 | spree shopping 6.3855 1529 | concept idea 7.4999 1530 | obesity fat 6.4442 1531 | insane crazy 6.6239 1532 | drapes curtains 6.0152 1533 | kitty cat 6.3837 1534 | tunes music 6.5992 1535 | spending money 6.5082 1536 | wick candle 7.9976 1537 | currently now 7.0282 1538 | constrictor boa 6.4275 1539 | yeast bread 6.346 1540 | pragmatic practical 6.0718 1541 | flipper dolphin 6.0742 1542 | calf cow 6.0985 1543 | however but 6.6389 1544 | stallion horse 7.5991 1545 | nightingale bird 6.9479 1546 | peasy easy 7.5015 1547 | difficult hard 7.4852 1548 | frequently often 7.0536 1549 | foreplay sex 7.6448 1550 | pony horse 6.0373 1551 | inexpensive cheap 7.4678 1552 | pail bucket 6.2431 1553 | buckle belt 6.4243 1554 | queue line 6.206 1555 | chapter book 6.5069 1556 | wacko crazy 6.3713 1557 | weighty heavy 6.0681 1558 | finch bird 6.9066 1559 | maniac crazy 6.7615 1560 | spar fight 6.274 1561 | questions answers 7.1988 1562 | bovine cow 7.7017 1563 | flood water 7.064 1564 | cashew nut 6.8974 1565 | chirp bird 6.6311 1566 | notion idea 6.6259 1567 | outdated old 6.6217 1568 | moo cow 7.6034 1569 | funds money 7.7197 1570 | curl hair 6.7237 1571 | intriguing interesting 6.811 1572 | crescent moon 7.7197 1573 | quench thirst 6.6655 1574 | waltz dance 6.7784 1575 | suffering pain 6.7013 1576 | feely touchy 7.8733 1577 | peril danger 7.5742 1578 | begin start 6.7055 1579 | prior before 6.8126 1580 | crossword puzzle 6.1455 1581 | origami paper 6.1189 1582 | banker money 6.1858 1583 | dune sand 7.1807 1584 | automobile car 6.6438 1585 | cobra snake 6.5753 1586 | almighty god 7.064 1587 | vehicle car 6.1412 1588 | cain able 6.2229 1589 | synapse brain 6.0616 1590 | intercourse sex 7.4001 1591 | pubes hair 6.4343 1592 | unproductive lazy 7.2461 1593 | incorrect wrong 6.676 1594 | banks money 6.9576 1595 | misplaced lost 7.8088 1596 | vanish disappear 6.8503 1597 | whipped cream 6.9167 1598 | adoration love 6.4481 1599 | annihilate destroy 6.0681 1600 | eruption volcano 6.3815 1601 | alter change 6.8287 1602 | supervisor boss 6.5461 1603 | hissing snake 7.3779 1604 | grin smile 6.254 1605 | larger bigger 6.2604 1606 | sans without 6.634 1607 | bumper car 6.206 1608 | espionage spy 7.2171 1609 | hinge door 7.0205 1610 | furious angry 6.4608 1611 | gal girl 6.0103 1612 | elderly old 6.1896 1613 | oink pig 7.0739 1614 | imbibe drink 7.0452 1615 | puck hockey 7.0879 1616 | film movie 6.3171 1617 | prohibition alcohol 6.5039 1618 | branch tree 6.9111 1619 | barbie doll 6.0466 1620 | english language 6.5979 1621 | pricey expensive 6.7599 1622 | nap sleep 6.1014 1623 | heifer cow 7.1728 1624 | layer cake 6.7839 1625 | carp fish 7.2917 1626 | erupt volcano 6.9452 1627 | hare rabbit 7.6697 1628 | powerful strong 6.2615 1629 | conceal hide 7.0879 1630 | lumber wood 6.1371 1631 | penetration sex 6.2669 1632 | cheerful happy 7.044 1633 | britannica encyclopedia 6.7946 1634 | career job 6.1216 1635 | yolk egg 6.6005 1636 | beak bird 6.5371 1637 | nourishment food 6.8638 1638 | modify change 7.2397 1639 | sodium salt 6.676 1640 | gallop horse 6.7903 1641 | costly expensive 7.3763 1642 | shelves books 6.3991 1643 | dull boring 7.1438 1644 | adore love 7.6034 1645 | bashful shy 7.0117 1646 | trout fish 6.7372 1647 | locate find 6.3815 1648 | petite small 6.1942 1649 | sandy beach 7.1512 1650 | ladle soup 6.0207 1651 | unhappy sad 6.868 1652 | orchid flower 7.3233 1653 | litchi fruit 6.4623 1654 | deceased dead 7.6244 1655 | allowance money 7.4101 1656 | duct tape 6.6484 1657 | federal government 6.3469 1658 | holler yell 6.1347 1659 | hiss snake 6.7826 1660 | millionaire rich 6.0218 1661 | cheddar cheese 7.4769 1662 | states united 6.2014 1663 | vein blood 6.1176 1664 | hog pig 7.2329 1665 | substitute teacher 6.0306 1666 | presume assume 6.1296 1667 | hoot owl 6.1035 1668 | roar lion 6.6639 1669 | mundane boring 6.1124 1670 | banking money 7.426 1671 | willow tree 6.7856 1672 | teller bank 6.1888 1673 | elementary school 6.5 1674 | steeple church 6.2229 1675 | subscription magazine 6.9277 1676 | hacker computer 6.1469 1677 | fortitude strength 7.2312 1678 | unjust unfair 6.2669 1679 | tuna fish 6.0763 1680 | joyful happy 6.4938 1681 | ideal perfect 7.0289 1682 | viper snake 6.7189 1683 | meow cat 7.6917 1684 | jog run 7.0948 1685 | noisy loud 6.823 1686 | occupation job 6.5803 1687 | gums teeth 6.0256 1688 | tapioca pudding 7.3491 1689 | assistance help 7.953 1690 | doe deer 6.3469 1691 | nope no 6.2229 1692 | capitulate surrender 6.7515 1693 | chopper helicopter 6.6043 1694 | romance love 6.8638 1695 | velocity speed 6.2839 1696 | saltine cracker 6.3886 1697 | brew beer 6.5058 1698 | jaws shark 6.3557 1699 | perspire sweat 6.9885 1700 | obama president 6.5 1701 | tampa florida 6.4947 1702 | hydrate water 6.676 1703 | bestow give 6.7422 1704 | herring fish 6.7329 1705 | everything all 6.1846 1706 | ill sick 7.1744 1707 | obese fat 6.8222 1708 | numerous many 6.4297 1709 | occasional sometimes 6.3967 1710 | percolator coffee 7.8088 1711 | aid help 6.1242 1712 | apologize sorry 7.5385 1713 | sterling silver 7.401 1714 | smooch kiss 7.3378 1715 | liberty freedom 6.2903 1716 | apology sorry 7.0385 1717 | petal flower 6.7588 1718 | filthy dirty 6.8759 1719 | paid money 6.093 1720 | crimson red 6.5 1721 | plaything toy 7.4312 1722 | celebration party 6.3747 1723 | husky dog 6.453 1724 | heron bird 6.155 1725 | brawl fight 6.8923 1726 | prevent stop 7.683 1727 | fir tree 6.6647 1728 | google search 6.42 1729 | fret worry 6.7135 1730 | puzzled confused 6.3909 1731 | feeble weak 6.175 1732 | mortality death 7.4442 1733 | quail bird 6.4243 1734 | foe enemy 6.4733 1735 | exhausted tired 7.3378 1736 | pickup truck 6.2839 1737 | alloy metal 6.5658 1738 | braces teeth 6.003 1739 | authentic real 6.208 1740 | embrace hug 6.6311 1741 | external outside 6.634 1742 | remain stay 7.267 1743 | savings money 6.3344 1744 | simple easy 6.5854 1745 | forecast weather 6.8225 1746 | comprehend understand 7.4852 1747 | consume eat 6.3063 1748 | lard fat 6.7396 1749 | whiskers cat 6.5816 1750 | men women 6.159 1751 | synthesizer music 6.961 1752 | squander waste 7.0768 1753 | economics money 6.676 1754 | lofty high 6.0596 1755 | difficulty hard 6.8492 1756 | immediate now 6.4655 1757 | demonstrate show 6.0559 1758 | hurl throw 6.2714 1759 | tripod camera 6.676 1760 | sap tree 6.8051 1761 | trigger gun 6.3747 1762 | previously before 7.1267 1763 | ponder think 6.4655 1764 | visage face 6.0742 1765 | pursue chase 6.0126 1766 | payday money 6.2398 1767 | flamingo pink 6.4313 1768 | poverty poor 6.5298 1769 | hooker prostitute 6.3582 1770 | croak frog 6.1913 1771 | purr cat 7.0731 1772 | belch burp 6.4741 1773 | hilarious funny 6.2229 1774 | canine dog 7.0078 1775 | boast brag 6.3183 1776 | dissimilar different 6.1296 1777 | several many 6.6194 1778 | frightened scared 6.9764 1779 | found lost 7.0536 1780 | drive car 6.1216 1781 | glucose sugar 6.3874 1782 | apparition ghost 6.7666 1783 | frame picture 6.6151 1784 | revolver gun 6.4494 1785 | leash dog 7.0517 1786 | stump tree 6.3505 1787 | educator teacher 6.9936 1788 | grass green 6.1935 1789 | parchment paper 6.6415 1790 | fortunate lucky 6.2525 1791 | sensibility sense 6.0314 1792 | wealthy rich 7.3269 1793 | clock time 6.0943 1794 | mare horse 6.9479 1795 | quack duck 7.6286 1796 | detest hate 7.3373 1797 | creator god 6.4355 1798 | relocate move 7.5354 1799 | quarterback football 7.2927 1800 | blaze fire 7.0722 1801 | loaf bread 7.6774 1802 | lunar moon 6.3635 1803 | physician doctor 6.3049 1804 | cuss swear 6.3771 1805 | companion friend 6.0156 1806 | narrative story 7.6774 1807 | cranny nook 6.9592 1808 | sugary sweet 6.455 1809 | fatigue tired 7.2329 1810 | unclean dirty 7.8536 1811 | reuse recycle 7.3484 1812 | shall will 6.066 1813 | select choose 6.0967 1814 | pawn chess 6.6402 1815 | quarrel fight 6.2754 1816 | uncooked raw 7.6055 1817 | cherish love 6.7922 1818 | hi hello 6.4416 1819 | tentacles octopus 6.7004 1820 | instruct teach 6.0551 1821 | fascinating interesting 6.4967 1822 | python snake 6.8323 1823 | dromedary camel 6.4953 1824 | idiotic stupid 6.5522 1825 | graphite pencil 6.1738 1826 | classical music 7.6802 1827 | novel book 6.2229 1828 | pane window 7.4272 1829 | currency money 6.4141 1830 | untruth lie 6.4872 1831 | pubic hair 6.0787 1832 | antique old 6.3704 1833 | payback revenge 6.4572 1834 | opposed against 6.676 1835 | flounder fish 7.5685 1836 | proprietor owner 6.7666 1837 | reflection mirror 6.1491 1838 | aged old 6.2386 1839 | shove push 7.1267 1840 | attempt try 7.5212 1841 | extremely very 7.3088 1842 | finance money 7.0806 1843 | arid dry 6.3914 1844 | infatuation love 6.1335 1845 | collapse fall 6.6458 1846 | agony pain 6.6651 1847 | stride walk 6.5517 1848 | rye bread 6.1216 1849 | congregation church 6.3789 1850 | immediately now 6.561 1851 | chervil herb 6.2229 1852 | secure safe 6.7569 1853 | merry christmas 6.3266 1854 | component part 7.2491 1855 | destiny fate 6.1267 1856 | inebriated drunk 6.9479 1857 | circulation blood 6.2659 1858 | holster gun 6.946 1859 | yourself me 6.4343 1860 | unsafe dangerous 6.7091 1861 | inquiry question 6.1128 1862 | klutz clumsy 7.6185 1863 | structure building 7.3915 1864 | loathe hate 7.1443 1865 | toad frog 6.1176 1866 | elm tree 7.2917 1867 | retired old 6.2229 1868 | gooey sticky 6.1171 1869 | itch scratch 6.8176 1870 | rifle gun 6.1738 1871 | beneath under 7.02 1872 | cyborg robot 6.5298 1873 | taut tight 6.5517 1874 | buffet food 6.6239 1875 | serpent snake 7.3557 1876 | kiwi fruit 6.5489 1877 | equilibrium balance 6.4591 1878 | query question 6.5272 1879 | wealth money 7.2424 1880 | hurt pain 6.4572 1881 | devour eat 7.262 1882 | joyous happy 6.2702 1883 | vengeance revenge 6.5681 1884 | edible food 6.4959 1885 | magpie bird 7.4095 1886 | ancient old 6.8335 1887 | freezing cold 6.3891 1888 | brewery beer 7.8487 1889 | birch tree 6.577 1890 | jigsaw puzzle 7.4163 1891 | frigid cold 6.5401 1892 | nearby close 6.8539 1893 | gel hair 6.4355 1894 | cash money 6.8667 1895 | lad boy 6.1201 1896 | truthful honest 6.1512 1897 | perspiration sweat 6.2839 1898 | transition change 6.9885 1899 | off on 6.4041 1900 | bumble bee 7.1659 1901 | homosexual gay 6.8989 1902 | tusk elephant 6.3242 1903 | wand magic 6.4872 1904 | this that 6.4094 1905 | desktop computer 6.0287 1906 | web spider 6.3344 1907 | recall remember 6.4481 1908 | sob cry 6.1644 1909 | ruminate think 6.9008 1910 | jockey horse 6.367 1911 | sufficient enough 7.7602 1912 | gallery art 6.1762 1913 | aquarium fish 6.8717 1914 | salmon fish 6.5058 1915 | trio three 6.1896 1916 | poodle dog 6.5 1917 | unexpected surprise 6.1808 1918 | buddy friend 6.1491 1919 | ale beer 6.3242 1920 | budgie bird 6.2641 1921 | knob door 7.1921 1922 | confidential secret 6.8974 1923 | fountain water 7.0654 1924 | pending waiting 6.7304 1925 | drowsy sleepy 6.1682 1926 | slacks pants 6.6409 1927 | barking dog 7.2047 1928 | probable likely 6.1159 1929 | instructor teacher 7.0331 1930 | drove car 7.446 1931 | heap pile 6.3317 1932 | scheme plan 6.9885 1933 | tipsy drunk 6.3491 1934 | ponytail hair 6.5473 1935 | bloom flower 6.4183 1936 | house home 6.2229 1937 | disappointed sad 6.4382 1938 | regulations rules 7.5823 1939 | umbrella rain 7.8536 1940 | prepared ready 6.4872 1941 | intoxicated drunk 6.0228 1942 | intelligence smart 6.1808 1943 | illuminate light 6.0602 1944 | pal friend 6.322 1945 | fondue cheese 6.2949 1946 | actual real 6.5351 1947 | motionless still 6.1035 1948 | weary tired 7.0722 1949 | nostril nose 6.1539 1950 | skim milk 7.0557 1951 | cab taxi 6.0346 1952 | sirloin steak 6.9764 1953 | loath hate 6.9715 1954 | melancholy sad 6.2641 1955 | bluejay bird 7.2927 1956 | specialist doctor 6.0145 1957 | honeydew melon 6.9002 1958 | labyrinth maze 7.2214 1959 | wreak havoc 6.6259 1960 | perish die 7.7474 1961 | wager bet 6.8809 1962 | stumble fall 6.051 1963 | cavern cave 6.1846 1964 | icing cake 6.4636 1965 | finances money 6.8785 1966 | touchy feely 6.9569 1967 | uncommon rare 6.4953 1968 | improbable unlikely 7.8676 1969 | you me 6.3195 1970 | bank money 6.2941 1971 | possibly maybe 6.6515 1972 | keg beer 7.0031 1973 | jerky beef 6.9161 1974 | beacon light 6.2727 1975 | grater cheese 7.6713 1976 | swatter fly 7.5194 1977 | swine pig 7.4564 1978 | touchdown football 6.0967 1979 | antler deer 6.0421 1980 | stroll walk 6.8709 1981 | lamp light 6.5858 1982 | senile old 6.2229 1983 | transform change 6.8438 1984 | depressed sad 6.5179 1985 | tale story 6.2229 1986 | annual yearly 6.7615 1987 | comb hair 6.7255 1988 | bulb light 6.0425 1989 | cap hat 6.7296 1990 | scrambled eggs 7.3915 1991 | frankenstein monster 6.9978 1992 | morse code 7.529 1993 | unwell sick 6.0272 1994 | adieu goodbye 6.1666 1995 | outrage anger 6.3086 1996 | constellation stars 6.2229 1997 | grub food 6.3063 1998 | monotonous boring 6.9178 1999 | chubby fat 6.731 2000 | phony fake 7.4312 2001 | prim proper 8.3181 2002 | unattractive ugly 8.319 2003 | spud potato 8.913 2004 | occur happen 8.2082 2005 | cob corn 8.0232 2006 | poplar tree 8.4984 2007 | fib lie 8.8001 2008 | impolite rude 10.0 2009 | ubiquitous everywhere 8.4469 2010 | halt stop 8.4188 2011 | sill window 9.0998 2012 | utilize use 8.5085 2013 | cortex brain 8.0941 2014 | myself me 8.4364 2015 | fewer less 8.0792 2016 | grizzly bear 8.3434 2017 | concur agree 9.4187 2018 | effortless easy 8.1025 2019 | option choice 8.2262 2020 | marrow bone 8.4275 2021 | cod fish 8.0703 2022 | deity god 8.167 2023 | tardy late 8.1453 2024 | disregard ignore 9.0527 2025 | unconditional love 8.1439 2026 | confound confuse 8.5419 2027 | plaice fish 8.7865 2028 | occasionally sometimes 8.9257 2029 | lobe ear 8.2753 2030 | dashboard car 8.0786 2031 | obsessive compulsive 8.2685 2032 | perhaps maybe 8.2608 2033 | assist help 8.182 2034 | -------------------------------------------------------------------------------- /evaluation/eval_category.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import yaml 3 | from sklearn.cluster import KMeans 4 | from sklearn import metrics 5 | import numpy as np 6 | 7 | labels_true = [] 8 | labels_word = [] 9 | labels_pred = [] 10 | 11 | cat0 = yaml.load(open('mcrae_typicality.yaml')) 12 | 13 | emb = {} 14 | for line in open(sys.argv[1]): 15 | line = line.strip().split() 16 | emb[line[0]] = [float(i) for i in line[1:]] 17 | 18 | cat = {} 19 | vv = {} 20 | num = 0 21 | for i in cat0: 22 | if not i in cat: 23 | cat[i] = [] 24 | vv[i] = num 25 | num += 1 26 | for j in cat0[i]: 27 | if j in emb.keys(): 28 | cat[i].append(j) 29 | labels_true.append(vv[i]) 30 | labels_word.append(j) 31 | 32 | X = [] 33 | for w in labels_word: 34 | X.append(emb[w]) 35 | 36 | X=np.array(X) 37 | 38 | kmeans = KMeans(n_clusters=41, random_state=0).fit(X) 39 | #print kmeans.labels_ 40 | labels_pred = list(kmeans.labels_) 41 | 42 | print metrics.adjusted_rand_score(labels_true, labels_pred), 43 | print metrics.adjusted_mutual_info_score(labels_true, labels_pred), 44 | print metrics.normalized_mutual_info_score(labels_true, labels_pred) 45 | -------------------------------------------------------------------------------- /evaluation/mturk771.txt: -------------------------------------------------------------------------------- 1 | access gateway 3.791666667 2 | account explanation 2 3 | account invoice 3.75 4 | account statement 3.681818182 5 | acoustic remedy 1.227272727 6 | acrylic cloth 2.739130435 7 | action adjustment 2 8 | action entrance 1.583333333 9 | activity event 4.083333333 10 | activity music 2.681818182 11 | activity skiing 3.45 12 | addition segment 2.5 13 | adhesive glue 4.608695652 14 | adult dentist 2.47826087 15 | adult doctor 2.782608696 16 | afternoon substance 1 17 | age childhood 3.782608696 18 | agency army 2.916666667 19 | agency office 3.857142857 20 | agency police 3.19047619 21 | agent spy 4 22 | agreement contract 4.476190476 23 | aim purpose 4.363636364 24 | aircraft balloon 2.869565217 25 | aircraft yacht 2.434782609 26 | alarm horn 3.458333333 27 | alarm press 2 28 | algorithm search 1.863636364 29 | alien stranger 3.428571429 30 | alloy metal 3.954545455 31 | alphabet script 3.5 32 | aluminum oxygen 1.608695652 33 | amount distance 1.958333333 34 | amount number 4.136363636 35 | amount season 1.434782609 36 | amusement athletics 2.6 37 | amusement play 4.041666667 38 | amusement procedure 1.454545455 39 | anatomy creation 2.565217391 40 | animal flora 2 41 | animal worm 2.72 42 | ankle joint 4.227272727 43 | anniversary birthday 3.727272727 44 | answer plea 2.44 45 | apparel dress 4.227272727 46 | appearance shadow 2.52173913 47 | apple bank 1.125 48 | apple orange 3.47826087 49 | apple pod 2.043478261 50 | appliance dryer 3.333333333 51 | appliance refrigerator 4.227272727 52 | approach swing 1.652173913 53 | approval encouragement 2.863636364 54 | approving interaction 2 55 | arc rainbow 3.347826087 56 | architecture engineering 3.25 57 | area patio 2.545454545 58 | area region 4.318181818 59 | argument evidence 3.045454545 60 | argument indication 1.772727273 61 | arm arrow 2.230769231 62 | arm missile 2.772727273 63 | armor mail 1.913043478 64 | army legion 3.285714286 65 | aroma fragrance 4.681818182 66 | aroma smell 4.19047619 67 | arrangement blizzard 1.25 68 | arrangement room 2.380952381 69 | article girl 1.208333333 70 | artillery gun 3.541666667 71 | aspen maple 2.791666667 72 | ass donkey 4.85 73 | assembly crowd 3.363636364 74 | assets capital 4.090909091 75 | assets credit 3.47826087 76 | assets income 4.038461538 77 | association organization 4.362318841 78 | athlete participant 3.458333333 79 | athletics racing 3.826086957 80 | athletics swimming 3.45 81 | attitude notice 1.486486486 82 | attitude study 1.88 83 | attorney lawyer 4.681818182 84 | attraction pressure 1.565217391 85 | attraction quality 2.347826087 86 | attribute condition 2.576923077 87 | average time 2.380952381 88 | baby computer 1.24 89 | bail bond 3.086956522 90 | bait instrument 2.090909091 91 | bait pump 1.208333333 92 | bakery work 2.541666667 93 | bakery workplace 3 94 | balance gauge 2.6 95 | ball egg 1.727272727 96 | ball nut 1.727272727 97 | ball poker 2.165217391 98 | ball sphere 4.142857143 99 | band circle 2.954545455 100 | bar needle 1.772727273 101 | bar rod 4.047619048 102 | barrel gallon 3.153846154 103 | base club 1.619047619 104 | base stock 2.476190476 105 | baseball softball 3.230769231 106 | basin vessel 4.076923077 107 | basketball hockey 2.782608696 108 | basketball squash 2.833333333 109 | bathroom chamber 2.681818182 110 | battle fight 4.583333333 111 | bay mere 1.583333333 112 | beach chain 1.047619048 113 | beach ridge 2.260869565 114 | beam column 2.961538462 115 | beat meter 1.92 116 | beat rhythm 4.363636364 117 | bed layer 2.136363636 118 | bedroom cell 2.136363636 119 | bedroom construction 2.208333333 120 | bee insect 4.043478261 121 | beef cattle 4.173913043 122 | beef meat 4.619047619 123 | beginner novice 4.166666667 124 | behavior purpose 2.304347826 125 | belief magic 2.565217391 126 | belief purpose 2.333333333 127 | bench seat 4.428571429 128 | bend curve 4.541666667 129 | berry citrus 3 130 | bill invoice 4.588235294 131 | billboard structure 3 132 | bin box 3.956521739 133 | bin cup 2.217391304 134 | binary star 1.863636364 135 | bird creature 3.458333333 136 | bird solid 1.090909091 137 | birth modification 1.434782609 138 | bishop priest 4.269230769 139 | bit tool 2.708333333 140 | bite breakfast 2.869565217 141 | bite taste 3.636363636 142 | black juvenile 1.238095238 143 | blade projector 1.434782609 144 | blanket sleeve 2.111111111 145 | blizzard rash 1.307692308 146 | blow contact 1.695652174 147 | blue red 3.272727273 148 | board commission 3.173913043 149 | boat ferry 4.083333333 150 | boat vessel 3.208333333 151 | body trunk 3.260869565 152 | bond security 3.904761905 153 | bone skull 4.272727273 154 | book encyclopedia 3.904761905 155 | booklet reference 3.380952381 156 | boot kick 3.173913043 157 | boot punch 1.75 158 | bottom sole 3.347826087 159 | boy male 4.52173913 160 | boy rover 2 161 | brace stand 2.913043478 162 | brain head 4.173913043 163 | brake click 1.761904762 164 | branch department 4.043478261 165 | brand knife 1.681818182 166 | brand surname 2.318181818 167 | brandy liquor 4.541666667 168 | brass executive 1.913043478 169 | bread bun 4.304347826 170 | break dash 2.916666667 171 | break insert 2.041666667 172 | brick cement 3.625 173 | brick strip 1.541666667 174 | broadcast packet 1.727272727 175 | brochure publication 3.875 176 | brochure reference 2.95 177 | brother member 2.583333333 178 | brush implement 2.47826087 179 | budget fund 4 180 | buffer cache 2.409090909 181 | bug child 1.276595745 182 | build flesh 1.833333333 183 | building cafe 3.142857143 184 | bulb onion 2.92 185 | bulletin news 4.666666667 186 | bunny server 1.25 187 | burn hurt 3.730769231 188 | burning flame 4.48 189 | burning punishment 2.52 190 | burst transformation 1.952380952 191 | business disposition 1.583333333 192 | business railway 2.541666667 193 | butter stick 2.375 194 | butterfly comma 1.130434783 195 | cab taxi 4.476190476 196 | cake pie 3.545454545 197 | calendar circulation 1.695652174 198 | calendar system 1.916666667 199 | call meeting 2.727272727 200 | call statement 2.125 201 | campaign operation 2.826086957 202 | candy sweet 4.510638298 203 | cap covering 3.681818182 204 | capital seat 2.32 205 | captain officer 4 206 | card plastic 2.391304348 207 | carriage coach 3.708333333 208 | cart wagon 4.375 209 | cartoon wit 2.666666667 210 | case grip 2.12 211 | case luggage 3.68 212 | case tin 2.6 213 | cast fishing 2.6 214 | cat vision 1.431818182 215 | category flavor 2.428571429 216 | cave formation 2.347826087 217 | ceiling overhead 3.52 218 | cement glue 3 219 | center loss 1.304347826 220 | century decade 3.434782609 221 | century period 3.769230769 222 | century temperature 1.130434783 223 | certificate study 3.136363636 224 | certificate wave 1.145833333 225 | chair furniture 3.869565217 226 | chair rocker 2.826086957 227 | chance opportunity 4.590909091 228 | chance probability 4.619047619 229 | chandler retailer 2.130434783 230 | change decrease 3 231 | change move 3.434782609 232 | channel sound 3.2 233 | character vision 1.666666667 234 | charge damage 2.375 235 | charge tax 4.086956522 236 | check draft 3.952380952 237 | cheese food 4 238 | chemical salt 3.590909091 239 | chess duty 1.166666667 240 | chick hen 3.863636364 241 | chicken poultry 4.217391304 242 | chief guru 3.904761905 243 | child kid 4.857142857 244 | chin feature 2.875 245 | choice option 4.590909091 246 | chuck jaw 2.090909091 247 | church temple 4.347826087 248 | circle oval 3.347826087 249 | citrus orange 4.208333333 250 | climb mount 4.166666667 251 | clock timer 4.416666667 252 | cloth satin 3.857142857 253 | cloud energy 1.727272727 254 | club society 3.869565217 255 | coach trainer 4.6 256 | coat newspaper 1.088888889 257 | coat roof 2.304347826 258 | code software 3.52 259 | coffee meeting 2.602941176 260 | coin currency 4.571428571 261 | collapse shock 3.043478261 262 | collection packet 2.772727273 263 | collision smash 3.904761905 264 | color purple 4.090909091 265 | color quality 2.407407407 266 | coloring yellow 3.791666667 267 | comfort relief 3.857142857 268 | commander editor 2.041666667 269 | commerce transport 2.739130435 270 | commitment guarantee 3.791666667 271 | communication message 4.083333333 272 | communication statement 3.391304348 273 | communication tune 2.409090909 274 | community province 3.391304348 275 | community territory 2.739130435 276 | company distributor 3.523809524 277 | company establishment 3.833333333 278 | comparison scrutiny 2.727272727 279 | compound salt 3.045454545 280 | conclusion result 4.523809524 281 | condition status 4.090909091 282 | conditions weather 3.666666667 283 | congress sex 1.818181818 284 | connection keyboard 2.4 285 | conservation traveling 1.717391304 286 | construction window 2.761904762 287 | continent ground 2.791666667 288 | control driving 3.714285714 289 | cook printer 1.347826087 290 | cooking cuisine 4.25 291 | copy image 3.25 292 | copy work 3.125 293 | cord pick 1.590909091 294 | cord yarn 2.761904762 295 | corridor hall 4.041666667 296 | corruption house 1.255319149 297 | cost postage 3.086956522 298 | cost reward 2.80952381 299 | couch lounge 3.347826087 300 | count number 4.166666667 301 | counter furniture 2.3 302 | country playground 1.791666667 303 | course starter 2.666666667 304 | court drawer 1.272727273 305 | court tribunal 3.791666667 306 | cousin relation 4.043478261 307 | cousin relative 4.5 308 | cover feather 2.44 309 | cover hair 2.65 310 | cover sleeve 3.260869565 311 | covering skin 3.35 312 | crack hole 3.04 313 | craft trade 3.458333333 314 | creation stitch 2.68 315 | creativity vision 3.181818182 316 | credit sum 3 317 | creek stream 3.88 318 | crew society 2 319 | crew unit 3.590909091 320 | crop plant 4.083333333 321 | crow jay 2.217391304 322 | crown place 1.541666667 323 | crush push 2.181818182 324 | cube dice 3.925925926 325 | cup handbag 1.318181818 326 | cup son 1.086956522 327 | current flow 3.761904762 328 | curve rainbow 3.44 329 | customers mission 1.641304348 330 | cut meat 2.904761905 331 | cutter knife 4.458333333 332 | cylinder pen 2.227272727 333 | dad parent 4.545454545 334 | daisy flower 4.5 335 | damage terms 1.761904762 336 | danger status 1.904761905 337 | dash sprint 3.375 338 | dashboard protection 2.173913043 339 | database list 4.08 340 | daughter girl 4.04 341 | day shoes 1.288888889 342 | deal hand 3.041666667 343 | debt deficit 3.458333333 344 | debt loan 4.347826087 345 | debt possession 2.481481481 346 | decision option 2.909090909 347 | decrease increase 3.541666667 348 | deep ocean 3.636363636 349 | deficit trust 1.576923077 350 | degree quantity 2.681818182 351 | degree style 1.652173913 352 | delivery distribution 3.590909091 353 | department division 4.625 354 | descent fall 3.304347826 355 | desert tract 2.2 356 | desire feeling 4.227272727 357 | desk table 4.172413793 358 | determination discovery 2.608695652 359 | determination selection 2.75 360 | development exploitation 2.2 361 | device drum 2.583333333 362 | devil satan 4.782608696 363 | dialogue play 3.44 364 | diamond parcel 1.217391304 365 | digit toe 2.64 366 | digit unit 3.681818182 367 | dinner party 3.826086957 368 | direction government 2.173913043 369 | direction protocol 3.142857143 370 | direction traveling 3.136363636 371 | dirt sand 3.391304348 372 | dirt soil 4 373 | discharge spark 2.782608696 374 | disease illness 4.739130435 375 | display language 1.916666667 376 | distance distribution 1.739130435 377 | diversion skiing 1.826086957 378 | dividend net 2.52 379 | dividend profit 4.045454545 380 | diving swim 3.96 381 | dock herb 1.238095238 382 | document report 3.826086957 383 | dog fauna 2.666666667 384 | domain land 2.523809524 385 | door light 1.577777778 386 | doubt ego 1.772727273 387 | drama genre 2.714285714 388 | draw finish 2.375 389 | draw tie 2.958333333 390 | drawer pan 1.818181818 391 | dressing patch 2.523809524 392 | dressing sauce 2.869565217 393 | drill implement 2.666666667 394 | drink slice 2.772727273 395 | driver supporter 2.090909091 396 | driver worker 3.1 397 | drop serving 2.136363636 398 | drug liquor 3.523809524 399 | drug operator 1.416666667 400 | drum piano 3.652173913 401 | eagle hawk 3.833333333 402 | ear organ 3.909090909 403 | ease relaxation 4.5 404 | ease rest 3.916666667 405 | editing instrument 1.64 406 | editor worker 3.125 407 | eight movement 1.318181818 408 | element iron 3.318181818 409 | element mixture 3 410 | element nickel 3.782608696 411 | elevator lift 4.72 412 | emission gum 1.25 413 | employee server 3.318181818 414 | endorsement signature 3.545454545 415 | energy microwave 3.583333333 416 | engineering technology 4.16 417 | environment land 3.090909091 418 | equipment recorder 3.391304348 419 | equipment seat 2.136363636 420 | establishment religion 2.173913043 421 | event influence 1.76 422 | event phenomenon 3.173913043 423 | evidence format 1.476190476 424 | evidence record 3.681818182 425 | examination quiz 4.391304348 426 | examiner tea 1.3 427 | executive minister 3.095238095 428 | executive organization 3.2 429 | explanation theory 3.652173913 430 | express mail 3.619047619 431 | extract selection 3.086956522 432 | eye organ 4.125 433 | fabric lace 3.625 434 | fabric sail 2.380952381 435 | faith religion 4.227272727 436 | fantasy recycling 1.125 437 | farmer individual 2.545454545 438 | fault mistake 4.541666667 439 | fauna toy 1.304347826 440 | feature side 1.833333333 441 | feedback flow 2.666666667 442 | feeling hope 3.48 443 | female slave 2.125 444 | female woman 4.96 445 | fiction literature 3.590909091 446 | fiction romance 2.476190476 447 | field science 3 448 | field yard 3.8 449 | fight separation 2.681818182 450 | figure stamp 1.88 451 | film movie 4.912280702 452 | find implementation 2.045454545 453 | find occurrence 2.739130435 454 | finger toe 3.76 455 | flag iris 1.708333333 456 | flame reaction 1.954545455 457 | flash lightning 3.96 458 | flat housing 3.96 459 | flavor variety 3.318181818 460 | flight trip 3.772727273 461 | floor level 3.333333333 462 | floor porch 2.739130435 463 | flora plant 4.384615385 464 | flora violet 3.347826087 465 | flour garlic 2.4 466 | flower pink 2.739130435 467 | flute wind 2.826086957 468 | flyer justice 1.181818182 469 | food sausage 3.913043478 470 | foot recognition 1.431372549 471 | football rugby 3.208333333 472 | forecast message 2.5 473 | forest ground 2.681818182 474 | form type 3.909090909 475 | format packaging 2.47826087 476 | format style 4.086956522 477 | foundation support 3.913043478 478 | fox wolf 3.090909091 479 | framework grill 3.428571429 480 | freeze frost 4 481 | friend individual 2.791666667 482 | front school 1.48 483 | front surface 3 484 | fruit seed 3.818181818 485 | fuel gasoline 4.48 486 | fuel nutrition 2.428571429 487 | fund store 1.826086957 488 | funds interest 3.375 489 | furniture table 4.181818182 490 | gamble kitty 1.636363636 491 | gamble pyramid 1.380952381 492 | game tennis 4.166666667 493 | garbage rubbish 4.520833333 494 | garden plantation 3.55 495 | garlic meal 3.083333333 496 | garment sweater 3.7 497 | garment tie 3.636363636 498 | gas hydrogen 4.090909091 499 | gas neon 3.869565217 500 | gathering parade 3.625 501 | gauge meter 3.863636364 502 | gear mechanism 4.090909091 503 | gem quartz 3.227272727 504 | gender sex 4.434782609 505 | gender size 1.571428571 506 | genre prose 2.363636364 507 | glass tub 2.88 508 | glove wear 3.416666667 509 | goal objective 4.590909091 510 | golf hockey 2.727272727 511 | good sheet 1.458333333 512 | governor mayor 3.666666667 513 | governor politician 4.125 514 | graphic image 4.318181818 515 | grass universe 1.76 516 | gray property 1.5 517 | grip handle 4.047619048 518 | grip hold 4.391304348 519 | growth process 3.291666667 520 | guarantee warranty 4.230769231 521 | guess universe 1.243243243 522 | gulf ocean 3.227272727 523 | hack machine 2.458333333 524 | hamburger nutrition 3.041666667 525 | hand script 3.260869565 526 | happening surprise 2.708333333 527 | head question 1.24 528 | head secretary 1.909090909 529 | health welfare 3.5 530 | hearing proceedings 3.130434783 531 | heart space 1.583333333 532 | heart ticker 3.181818182 533 | heat temperature 4.25 534 | height infinite 2.434782609 535 | helmet scale 1.380952381 536 | help support 4.619047619 537 | help supporter 3.833333333 538 | heritage loss 1.681818182 539 | highway street 3.545454545 540 | highway trail 2.863636364 541 | hit tourist 1.44 542 | hole opening 3.76 543 | holiday vacation 4.619047619 544 | hood protection 2.913043478 545 | hood shelter 2.571428571 546 | horn tail 2.692307692 547 | horse mount 2.875 548 | hose pipe 4 549 | housing vault 2 550 | icon representation 3.380952381 551 | implement stick 2.5 552 | impulse motive 2.869565217 553 | impulse urge 4.5 554 | information target 1.95 555 | ink liquid 3.64 556 | installation zoo 1.44 557 | institution prison 2.565217391 558 | instruction lesson 3.8 559 | instruction teaching 4.523809524 560 | instrumentation perfume 1.208333333 561 | instrumentation rod 2.863636364 562 | intensity quiet 2.181818182 563 | interest lien 2.541666667 564 | intervention treatment 2.363636364 565 | inventory listing 3.380952381 566 | investment tomato 1.155555556 567 | jail nick 1.782608696 568 | jail prison 4.739130435 569 | jaw lens 1.260869565 570 | join union 4 571 | joke message 2.636363636 572 | journey travel 4.8 573 | judgment sense 3.541666667 574 | jumper sweater 3.347826087 575 | jury school 1.476190476 576 | justice official 3.043478261 577 | kiss sweet 3 578 | kitchen toilet 2.545454545 579 | knight prince 3.375 580 | knowledge revolution 2.2 581 | knowledge taste 1.869565217 582 | lake stream 4.130434783 583 | lamb young 3.285714286 584 | language tongue 3.652173913 585 | latex rubber 3.913043478 586 | law personnel 1.772727273 587 | layer region 2.458333333 588 | layer snow 2.434782609 589 | leader politician 4.238095238 590 | lesson teaching 4.136363636 591 | letter text 3.84 592 | level stage 4 593 | license permission 4.19047619 594 | license security 3.130434783 595 | lien share 2.136363636 596 | life story 3 597 | line occupation 2.260869565 598 | line plane 2.833333333 599 | line queue 4.541666667 600 | lineup roll 2.958333333 601 | link union 3.583333333 602 | lion tiger 3.565217391 603 | literature poem 3.76 604 | load weight 4.043478261 605 | location property 3.347826087 606 | loss possession 2.954545455 607 | low shoulder 1.387755102 608 | lyric printer 1.244444444 609 | magnolia maple 2.68 610 | male man 4.619047619 611 | man soldier 3.875 612 | manager trainer 3.347826087 613 | map representation 3.434782609 614 | map sewing 1.347826087 615 | map video 1.695652174 616 | maple tree 3.833333333 617 | mark print 2.954545455 618 | mark slash 3.304347826 619 | mask roof 1.782608696 620 | mate relation 3.434782609 621 | matter text 3.269230769 622 | matter verse 1.913043478 623 | mayor water 1.113636364 624 | meal mixture 2.272727273 625 | meal rice 4 626 | measure money 2.565217391 627 | measure twist 1.590909091 628 | meat solid 2.5 629 | melody music 4.5 630 | memory operation 2.043478261 631 | metal zinc 3.956521739 632 | meter radar 2.954545455 633 | microwave radiation 3.454545455 634 | middle scene 1.4 635 | minute quantity 2.608695652 636 | mode scale 2.173913043 637 | modification surprise 1.714285714 638 | moment thief 1.242424242 639 | mortal mother 2.25 640 | mortal visitor 1.923076923 641 | motion snowboarding 2.363636364 642 | motion step 3.142857143 643 | motive reason 3.909090909 644 | motorcycle tank 2.043478261 645 | mount volcano 3.238095238 646 | mouth opening 3.304347826 647 | mouth trap 2.333333333 648 | murphy potato 1.173913043 649 | museum store 2.695652174 650 | music print 1.958333333 651 | musician performer 3.952380952 652 | needle sharp 3.954545455 653 | noise trouble 3.15 654 | note obligation 2.166666667 655 | notebook product 2.217391304 656 | notebook production 1.666666667 657 | novel story 4.363636364 658 | oak tree 4.576923077 659 | objective target 4.086956522 660 | occasion second 1.75 661 | occupation place 1.956521739 662 | occurrence tsunami 2.47826087 663 | ocean pond 3.545454545 664 | office outlet 2.142857143 665 | office situation 1.24 666 | onion topic 1.183673469 667 | operation processing 3.409090909 668 | operative spy 2.739130435 669 | opinion papers 1.72 670 | opinion sentiment 3.136363636 671 | origin root 4.333333333 672 | outlet shop 3.565217391 673 | oxygen substance 2.565217391 674 | package software 3.4 675 | padding tower 1.863636364 676 | painting picture 4.186046512 677 | papers security 2.4 678 | papers ticket 3.181818182 679 | parcel region 2 680 | park stadium 3.333333333 681 | passage quotation 3.666666667 682 | patch spot 3.181818182 683 | payment spending 3.708333333 684 | permission tolerance 2.04 685 | person technician 3.458333333 686 | phantom shadow 3.227272727 687 | piazza square 2.2 688 | piece sail 1.333333333 689 | pinnacle tower 3.083333333 690 | place position 4.230769231 691 | plane sheet 2.130434783 692 | plane tool 2.304347826 693 | play turn 2.681818182 694 | plot strategy 2.9 695 | point second 1.826086957 696 | point site 2.96 697 | point spot 4.2 698 | polyester textile 4.285714286 699 | position view 3 700 | postage rate 3.086956522 701 | postage signal 1.48 702 | power skill 3.16 703 | prayer request 3.36 704 | problem trouble 4.608695652 705 | process rule 2.5 706 | process tail 1.12 707 | product wear 2.041666667 708 | property texture 2 709 | protection roof 3.708333333 710 | protocol rule 4.090909091 711 | publication textbook 3.428571429 712 | pumpkin vine 2.409090909 713 | pupil student 4.523809524 714 | pyramid speculation 1.7 715 | query question 4.739130435 716 | quiet silence 4.909090909 717 | racer taxi 2.19047619 718 | radio receiver 3.807692308 719 | rain storm 3.958333333 720 | ray shark 2.545454545 721 | recreation skiing 3.090909091 722 | red wine 3.4 723 | report study 3.875 724 | representative voice 2.875 725 | ring water 1.333333333 726 | rise travel 1.608695652 727 | rock stone 4.476190476 728 | roll toast 2.826086957 729 | root stem 3.666666667 730 | rub wipe 4.227272727 731 | rubber stuff 2.1 732 | rugby soccer 3.260869565 733 | sail sheet 2.083333333 734 | scandal week 1.086956522 735 | science shelter 1.136363636 736 | score success 3.208333333 737 | season summer 4.045454545 738 | season winter 4.347826087 739 | second time 4.217391304 740 | seminar sweet 1.202898551 741 | sex stance 1.476190476 742 | share stake 3.208333333 743 | shelter tent 4.25 744 | shelter wind 2.641791045 745 | shirt tiger 1.038461538 746 | side slope 3.08 747 | sight vision 4.818181818 748 | simulation theory 2.227272727 749 | skull tooth 2.590909091 750 | slash stroke 3.25 751 | smash success 2.791666667 752 | snap touch 2.038461538 753 | software writing 2.347826087 754 | song vocal 3.857142857 755 | soup spaghetti 3 756 | soup sweet 2.454545455 757 | speech word 4.045454545 758 | steel weapon 3.380952381 759 | step travel 2.086956522 760 | step walk 4.173913043 761 | storm weather 4.083333333 762 | straight stretch 3.291666667 763 | sun toy 1.25 764 | tank tub 3.52173913 765 | taxpayer window 1.21875 766 | throne toilet 1.956521739 767 | ticket writing 2.375 768 | victory watch 1.553191489 769 | washer worker 2.909090909 770 | wife woman 3.884615385 771 | workplace workshop 4.04 772 | -------------------------------------------------------------------------------- /evaluation/ranking.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy 3 | from operator import itemgetter 4 | from numpy.linalg import norm 5 | import yaml 6 | from sklearn.cluster import KMeans 7 | from sklearn import metrics 8 | import numpy as np 9 | 10 | EPSILON = 1e-6 11 | 12 | def euclidean(vec1, vec2): 13 | diff = vec1 - vec2 14 | return math.sqrt(diff.dot(diff)) 15 | 16 | def cosine_sim(vec1, vec2): 17 | vec1 += EPSILON * numpy.ones(len(vec1)) 18 | vec2 += EPSILON * numpy.ones(len(vec1)) 19 | return vec1.dot(vec2)/(norm(vec1)*norm(vec2)) 20 | 21 | def assign_ranks(item_dict): 22 | ranked_dict = {} 23 | sorted_list = [(key, val) for (key, val) in sorted(item_dict.items(), 24 | key=itemgetter(1), 25 | reverse=True)] 26 | for i, (key, val) in enumerate(sorted_list): 27 | same_val_indices = [] 28 | for j, (key2, val2) in enumerate(sorted_list): 29 | if val2 == val: 30 | same_val_indices.append(j+1) 31 | if len(same_val_indices) == 1: 32 | ranked_dict[key] = i+1 33 | else: 34 | ranked_dict[key] = 1.*sum(same_val_indices)/len(same_val_indices) 35 | return ranked_dict 36 | 37 | def correlation(dict1, dict2): 38 | avg1 = 1.*sum([val for key, val in dict1.iteritems()])/len(dict1) 39 | avg2 = 1.*sum([val for key, val in dict2.iteritems()])/len(dict2) 40 | numr, den1, den2 = (0., 0., 0.) 41 | for val1, val2 in zip(dict1.itervalues(), dict2.itervalues()): 42 | numr += (val1 - avg1) * (val2 - avg2) 43 | den1 += (val1 - avg1) ** 2 44 | den2 += (val2 - avg2) ** 2 45 | return numr / math.sqrt(den1 * den2) 46 | 47 | def spearmans_rho(ranked_dict1, ranked_dict2): 48 | assert len(ranked_dict1) == len(ranked_dict2) 49 | if len(ranked_dict1) == 0 or len(ranked_dict2) == 0: 50 | return 0. 51 | x_avg = 1.*sum([val for val in ranked_dict1.values()])/len(ranked_dict1) 52 | y_avg = 1.*sum([val for val in ranked_dict2.values()])/len(ranked_dict2) 53 | num, d_x, d_y = (0., 0., 0.) 54 | for key in ranked_dict1.keys(): 55 | xi = ranked_dict1[key] 56 | yi = ranked_dict2[key] 57 | num += (xi-x_avg)*(yi-y_avg) 58 | d_x += (xi-x_avg)**2 59 | d_y += (yi-y_avg)**2 60 | return num/(math.sqrt(d_x*d_y)) 61 | 62 | 63 | def eval_category(word_vecs): 64 | labels_true = [] 65 | labels_word = [] 66 | 67 | cat0 = yaml.load(open('evaluation/mcrae_typicality.yaml')) 68 | 69 | emb = {} 70 | nn = 0 71 | for line in word_vecs: 72 | emb[nn] = line 73 | nn += 1 74 | 75 | cat = {} 76 | vv = {} 77 | num = 0 78 | for i in cat0: 79 | if not i in cat: 80 | cat[i] = [] 81 | vv[i] = num 82 | num += 1 83 | for j in cat0[i]: 84 | if j in emb.keys(): 85 | cat[i].append(j) 86 | labels_true.append(vv[i]) 87 | labels_word.append(j) 88 | 89 | X = [] 90 | for w in labels_word: 91 | X.append(emb[w]) 92 | 93 | X = np.array(X) 94 | 95 | kmeans = KMeans(n_clusters=41, random_state=0).fit(X) 96 | # print kmeans.labels_ 97 | labels_pred = list(kmeans.labels_) 98 | 99 | r1 = metrics.adjusted_rand_score(labels_true, labels_pred), 100 | r2 = metrics.adjusted_mutual_info_score(labels_true, labels_pred), 101 | r3 = metrics.normalized_mutual_info_score(labels_true, labels_pred) 102 | 103 | return r1, r2, r3 -------------------------------------------------------------------------------- /evaluation/run_sim.sh: -------------------------------------------------------------------------------- 1 | #python wordsim.py $1 usf.ass.txt 2 | python wordsim.py $1 men-3k.txt 3 | python wordsim.py $1 simlex-999.txt 4 | python wordsim.py $1 simverb-3500.txt 5 | python wordsim.py $1 semsim.txt 6 | python wordsim.py $1 vissim.txt 7 | python wordsim.py $1 wordsim353.txt 8 | python wordsim.py $1 wordrel353.txt 9 | python wordsim.py $1 wordsim353-full.txt 10 | python wordsim.py $1 mturk771.txt 11 | python wordsim.py $1 association.dev.txt 12 | python wordsim.py $1 association.dev.b.txt 13 | python eval_category.py $1 14 | -------------------------------------------------------------------------------- /evaluation/simlex-999.txt: -------------------------------------------------------------------------------- 1 | old new 1.58 2 | smart intelligent 9.2 3 | hard difficult 8.77 4 | happy cheerful 9.55 5 | hard easy 0.95 6 | fast rapid 8.75 7 | happy glad 9.17 8 | short long 1.23 9 | stupid dumb 9.58 10 | weird strange 8.93 11 | wide narrow 1.03 12 | bad awful 8.42 13 | easy difficult 0.58 14 | bad terrible 7.78 15 | hard simple 1.38 16 | smart dumb 0.55 17 | insane crazy 9.57 18 | happy mad 0.95 19 | large huge 9.47 20 | hard tough 8.05 21 | new fresh 6.83 22 | sharp dull 0.6 23 | quick rapid 9.7 24 | dumb foolish 6.67 25 | wonderful terrific 8.63 26 | strange odd 9.02 27 | happy angry 1.28 28 | narrow broad 1.18 29 | simple easy 9.4 30 | old fresh 0.87 31 | apparent obvious 8.47 32 | inexpensive cheap 8.72 33 | nice generous 5 34 | weird normal 0.72 35 | weird odd 9.2 36 | bad immoral 7.62 37 | sad funny 0.95 38 | wonderful great 8.05 39 | guilty ashamed 6.38 40 | beautiful wonderful 6.5 41 | confident sure 8.27 42 | dumb dense 7.27 43 | large big 9.55 44 | nice cruel 0.67 45 | impatient anxious 6.03 46 | big broad 6.73 47 | strong proud 3.17 48 | unnecessary necessary 0.63 49 | restless young 1.6 50 | dumb intelligent 0.75 51 | bad great 0.35 52 | difficult simple 0.87 53 | necessary important 7.37 54 | bad terrific 0.65 55 | mad glad 1.45 56 | honest guilty 1.18 57 | easy tough 0.52 58 | easy flexible 4.1 59 | certain sure 8.42 60 | essential necessary 8.97 61 | different normal 1.08 62 | sly clever 7.25 63 | crucial important 8.82 64 | harsh cruel 8.18 65 | childish foolish 5.5 66 | scarce rare 9.17 67 | friendly generous 5.9 68 | fragile frigid 2.38 69 | long narrow 3.57 70 | big heavy 6.18 71 | rough frigid 2.47 72 | bizarre strange 9.37 73 | illegal immoral 4.28 74 | bad guilty 4.2 75 | modern ancient 0.73 76 | new ancient 0.23 77 | dull funny 0.55 78 | happy young 2 79 | easy big 1.12 80 | great awful 1.17 81 | tiny huge 0.6 82 | polite proper 7.63 83 | modest ashamed 2.65 84 | exotic rare 8.05 85 | dumb clever 1.17 86 | delightful wonderful 8.65 87 | noticeable obvious 8.48 88 | afraid anxious 5.07 89 | formal proper 8.02 90 | dreary dull 8.25 91 | delightful cheerful 6.58 92 | unhappy mad 5.95 93 | sad terrible 5.4 94 | sick crazy 3.57 95 | violent angry 6.98 96 | laden heavy 5.9 97 | dirty cheap 1.6 98 | elastic flexible 7.78 99 | hard dense 5.9 100 | recent new 7.05 101 | bold proud 3.97 102 | sly strange 1.97 103 | strange sly 2.07 104 | dumb rare 0.48 105 | sly tough 0.58 106 | terrific mad 0.4 107 | modest flexible 0.98 108 | fresh wide 0.4 109 | huge dumb 0.48 110 | large flexible 0.48 111 | dirty narrow 0.3 112 | wife husband 2.3 113 | book text 6.35 114 | groom bride 3.17 115 | night day 1.88 116 | south north 2.2 117 | plane airport 3.65 118 | uncle aunt 5.5 119 | horse mare 8.33 120 | bottom top 0.7 121 | friend buddy 8.78 122 | student pupil 9.35 123 | world globe 6.67 124 | leg arm 2.88 125 | plane jet 8.1 126 | woman man 3.33 127 | horse colt 7.07 128 | actress actor 7.12 129 | teacher instructor 9.25 130 | movie film 8.87 131 | bird hawk 7.85 132 | word dictionary 3.68 133 | money salary 7.88 134 | dog cat 1.75 135 | area region 9.47 136 | navy army 6.43 137 | book literature 7.53 138 | clothes closet 3.27 139 | sunset sunrise 2.47 140 | child adult 2.98 141 | cow cattle 9.52 142 | book story 5.63 143 | winter summer 2.38 144 | taxi cab 9.2 145 | tree maple 5.53 146 | bed bedroom 3.4 147 | roof ceiling 7.58 148 | disease infection 7.15 149 | arm shoulder 4.85 150 | sheep lamb 8.42 151 | lady gentleman 3.42 152 | boat anchor 2.25 153 | priest monk 6.28 154 | toe finger 4.68 155 | river stream 7.3 156 | anger fury 8.73 157 | date calendar 4.42 158 | sea ocean 8.27 159 | second minute 4.62 160 | hand thumb 3.88 161 | wood log 7.3 162 | mud dirt 7.32 163 | hallway corridor 9.28 164 | way manner 7.62 165 | mouse cat 1.12 166 | cop sheriff 9.05 167 | death burial 4.93 168 | music melody 6.98 169 | beer alcohol 7.5 170 | mouth lip 7.1 171 | storm hurricane 6.38 172 | tax income 2.38 173 | flower violet 6.95 174 | paper cardboard 5.38 175 | floor ceiling 1.73 176 | beach seashore 8.33 177 | rod curtain 3.03 178 | hound fox 2.38 179 | street alley 5.48 180 | boat deck 4.28 181 | car horn 2.57 182 | friend guest 4.25 183 | employer employee 3.65 184 | hand wrist 3.97 185 | ball cannon 2.58 186 | alcohol brandy 6.98 187 | victory triumph 8.98 188 | telephone booth 3.63 189 | door doorway 5.4 190 | motel inn 8.17 191 | clothes cloth 5.47 192 | steak meat 7.47 193 | nail thumb 3.55 194 | band orchestra 7.08 195 | book bible 5 196 | business industry 7.02 197 | winter season 6.27 198 | decade century 3.48 199 | alcohol gin 8.65 200 | hat coat 2.67 201 | window door 3.33 202 | arm wrist 3.57 203 | house apartment 5.8 204 | glass crystal 6.27 205 | wine brandy 5.15 206 | creator maker 9.62 207 | dinner breakfast 3.33 208 | arm muscle 3.72 209 | bubble suds 8.57 210 | bread flour 3.33 211 | death tragedy 5.8 212 | absence presence 0.4 213 | gun cannon 5.68 214 | grass blade 4.57 215 | ball basket 1.67 216 | hose garden 1.67 217 | boy kid 7.5 218 | church choir 2.95 219 | clothes drawer 3.02 220 | tower bell 1.9 221 | father parent 7.07 222 | school grade 4.42 223 | parent adult 5.37 224 | bar jail 1.9 225 | car highway 3.4 226 | dictionary definition 6.25 227 | door cellar 1.97 228 | army legion 5.95 229 | metal aluminum 7.25 230 | chair bench 6.67 231 | cloud fog 6 232 | boy son 6.75 233 | water ice 6.47 234 | bed blanket 3.02 235 | attorney lawyer 9.35 236 | area zone 8.33 237 | business company 9.02 238 | clothes fabric 5.87 239 | sweater jacket 7.15 240 | money capital 6.67 241 | hand foot 4.17 242 | alcohol cocktail 6.73 243 | yard inch 3.78 244 | molecule atom 6.45 245 | lens camera 4.28 246 | meal dinner 7.15 247 | eye tear 3.55 248 | god devil 1.8 249 | loop belt 3.1 250 | rat mouse 7.78 251 | motor engine 8.65 252 | car cab 7.42 253 | cat lion 6.75 254 | size magnitude 6.33 255 | reality fantasy 1.03 256 | door gate 5.25 257 | cat pet 5.95 258 | tin aluminum 6.42 259 | bone jaw 4.17 260 | cereal wheat 3.75 261 | house key 1.9 262 | blood flesh 4.28 263 | door corridor 3.73 264 | god spirit 7.3 265 | capability competence 7.62 266 | abundance plenty 8.97 267 | sofa chair 6.67 268 | wall brick 4.68 269 | horn drum 2.68 270 | organ liver 6.15 271 | strength might 7.07 272 | phrase word 5.48 273 | band parade 3.92 274 | stomach waist 5.9 275 | cloud storm 5.6 276 | joy pride 5 277 | noise rattle 6.17 278 | rain mist 5.97 279 | beer beverage 5.42 280 | man uncle 3.92 281 | apple juice 2.88 282 | intelligence logic 6.5 283 | communication language 7.47 284 | mink fur 6.83 285 | mob crowd 7.85 286 | shore coast 8.83 287 | wire cord 7.62 288 | bird turkey 6.58 289 | bed crib 7.3 290 | competence ability 7.5 291 | cloud haze 7.32 292 | supper meal 7.53 293 | bar cage 2.8 294 | water salt 1.3 295 | sense intuition 7.68 296 | situation condition 6.58 297 | crime theft 7.53 298 | style fashion 8.5 299 | boundary border 9.08 300 | arm body 4.05 301 | boat car 2.37 302 | sandwich lunch 6.3 303 | bride princess 2.8 304 | heroine hero 8.78 305 | car gauge 1.13 306 | insect bee 6.07 307 | crib cradle 8.55 308 | animal person 3.05 309 | marijuana herb 6.5 310 | bed hospital 0.92 311 | cheek tongue 4.52 312 | disc computer 3.2 313 | curve angle 3.33 314 | grass moss 5 315 | school law 1.13 316 | foot head 2.3 317 | mother guardian 6.5 318 | orthodontist dentist 8.27 319 | alcohol whiskey 7.27 320 | mouth tooth 6.3 321 | breakfast bacon 4.37 322 | bathroom bedroom 3.4 323 | plate bowl 5.23 324 | meat bacon 5.8 325 | air helium 3.63 326 | worker employer 5.37 327 | body chest 4.45 328 | son father 3.82 329 | heart surgery 1.08 330 | woman secretary 1.98 331 | man father 4.83 332 | beach island 5.6 333 | story topic 5 334 | game fun 3.42 335 | weekend week 4 336 | couple pair 8.33 337 | woman wife 5.72 338 | sheep cattle 4.77 339 | purse bag 8.33 340 | ceiling cathedral 2.42 341 | bean coffee 5.15 342 | wood paper 2.88 343 | top side 1.9 344 | crime fraud 5.65 345 | pain harm 5.38 346 | lover companion 5.97 347 | evening dusk 7.78 348 | father daughter 2.62 349 | wine liquor 7.85 350 | cow goat 2.93 351 | belief opinion 7.7 352 | reality illusion 1.42 353 | pact agreement 9.02 354 | wealth poverty 1.27 355 | accident emergency 4.93 356 | battle conquest 7.22 357 | friend teacher 2.62 358 | illness infection 6.9 359 | game trick 2.32 360 | brother son 3.48 361 | aunt nephew 3.1 362 | worker mechanic 4.92 363 | doctor orthodontist 5.58 364 | oak maple 6.03 365 | bee queen 3.27 366 | car bicycle 3.47 367 | goal quest 5.83 368 | august month 5.53 369 | army squad 5.08 370 | cloud weather 4.87 371 | physician doctor 8.88 372 | canyon valley 6.75 373 | river valley 1.67 374 | sun sky 2.27 375 | target arrow 3.25 376 | chocolate pie 2.27 377 | circumstance situation 7.85 378 | opinion choice 5.43 379 | rhythm melody 6.12 380 | gut nerve 4.93 381 | day dawn 5.47 382 | cattle beef 7.03 383 | doctor professor 4.65 384 | arm vein 3.65 385 | room bath 3.33 386 | corporation business 9.02 387 | fun football 1.97 388 | hill cliff 4.28 389 | bone ankle 3.82 390 | apple candy 2.08 391 | helper maid 5.58 392 | leader manager 7.27 393 | lemon tea 1.6 394 | bee ant 2.78 395 | basketball baseball 4.92 396 | rice bean 2.72 397 | bed furniture 6.08 398 | emotion passion 7.72 399 | anarchy chaos 7.93 400 | crime violation 7.12 401 | machine engine 5.58 402 | beach sea 4.68 403 | alley bowl 1.53 404 | jar bottle 7.83 405 | strength capability 5.28 406 | seed mustard 3.48 407 | guitar drum 3.78 408 | opinion idea 5.7 409 | north west 3.63 410 | diet salad 2.98 411 | mother wife 3.02 412 | dad mother 3.55 413 | captain sailor 5 414 | meter yard 5.6 415 | beer champagne 4.45 416 | motor boat 2.57 417 | card bridge 1.97 418 | science psychology 4.92 419 | sinner saint 1.6 420 | destruction construction 0.98 421 | crowd bunch 7.42 422 | beach reef 3.77 423 | man child 4.13 424 | bread cheese 1.95 425 | champion winner 8.73 426 | celebration ceremony 7.72 427 | menu order 3.62 428 | king princess 3.27 429 | wealth prestige 6.07 430 | endurance strength 6.58 431 | danger threat 8.78 432 | god priest 4.5 433 | men fraternity 3.13 434 | buddy companion 8.65 435 | teacher helper 4.28 436 | body stomach 3.93 437 | tongue throat 3.1 438 | house carpet 1.38 439 | intelligence skill 5.35 440 | journey conquest 4.72 441 | god prey 1.23 442 | brother soul 0.97 443 | adversary opponent 9.05 444 | death catastrophe 4.13 445 | monster demon 6.95 446 | day morning 4.87 447 | man victor 1.9 448 | friend guy 3.88 449 | song story 3.97 450 | ray sunshine 6.83 451 | guy stud 5.83 452 | chicken rice 1.43 453 | box elevator 1.32 454 | butter potato 1.22 455 | apartment furniture 1.28 456 | lake swamp 4.92 457 | salad vinegar 1.13 458 | flower bulb 4.48 459 | cloud mist 6.67 460 | driver pilot 6.28 461 | sugar honey 5.13 462 | body shoulder 2.88 463 | idea image 3.55 464 | father brother 4.2 465 | moon planet 5.87 466 | ball costume 2.32 467 | rail fence 5.22 468 | room bed 2.35 469 | flower bush 4.25 470 | bone knee 4.17 471 | arm knee 2.75 472 | bottom side 2.63 473 | vessel vein 5.15 474 | cat rabbit 2.37 475 | meat sandwich 2.35 476 | belief concept 5.08 477 | intelligence insight 5.9 478 | attention interest 7.22 479 | attitude confidence 4.35 480 | right justice 7.05 481 | argument agreement 1.45 482 | depth magnitude 6.12 483 | medium news 3.65 484 | winner candidate 2.78 485 | birthday date 5.08 486 | fee payment 7.15 487 | bible hymn 5.15 488 | exit doorway 5.5 489 | man sentry 3.25 490 | aisle hall 6.35 491 | whiskey gin 6.28 492 | blood marrow 3.4 493 | oil mink 1.23 494 | floor deck 5.55 495 | roof floor 2.62 496 | door floor 1.67 497 | shoulder head 3.42 498 | wagon carriage 7.7 499 | car carriage 5.13 500 | elbow ankle 3.13 501 | wealth fame 4.02 502 | sorrow shame 4.77 503 | administration management 7.25 504 | communication conversation 8.02 505 | pollution atmosphere 4.25 506 | anatomy biology 5.33 507 | college profession 3.12 508 | book topic 2.07 509 | formula equation 7.95 510 | book information 5 511 | boy partner 1.9 512 | sky universe 4.68 513 | population people 7.68 514 | college class 4.13 515 | chief mayor 4.85 516 | rabbi minister 7.62 517 | meter inch 5.08 518 | polyester cotton 5.63 519 | lawyer banker 1.88 520 | violin instrument 6.58 521 | camp cabin 4.2 522 | pot appliance 2.53 523 | linen fabric 7.47 524 | whiskey champagne 5.33 525 | girl child 5.38 526 | cottage cabin 7.72 527 | bird hen 7.03 528 | racket noise 8.1 529 | sunset evening 5.98 530 | drizzle rain 9.17 531 | adult baby 2.22 532 | charcoal coal 7.63 533 | body spine 4.78 534 | head nail 2.47 535 | log timber 8.05 536 | spoon cup 2.02 537 | body nerve 3.13 538 | man husband 5.32 539 | bone neck 2.53 540 | frustration anger 6.5 541 | river sea 5.72 542 | task job 8.87 543 | club society 5.23 544 | reflection image 7.27 545 | prince king 5.92 546 | snow weather 5.48 547 | people party 2.2 548 | boy brother 6.67 549 | root grass 3.55 550 | brow eye 3.82 551 | money pearl 2.1 552 | money diamond 3.42 553 | vehicle bus 6.47 554 | cab bus 5.6 555 | house barn 4.33 556 | finger palm 3.33 557 | car bridge 0.95 558 | effort difficulty 4.45 559 | fact insight 4.77 560 | job management 3.97 561 | cancer sickness 7.93 562 | word newspaper 2.47 563 | composer writer 6.58 564 | actor singer 4.52 565 | shelter hut 6.47 566 | bathroom kitchen 3.1 567 | cabin hut 6.53 568 | door kitchen 1.67 569 | value belief 7.07 570 | wisdom intelligence 7.47 571 | ignorance intelligence 1.5 572 | happiness luck 2.38 573 | idea scheme 6.75 574 | mood emotion 8.12 575 | happiness peace 6.03 576 | despair misery 7.22 577 | logic arithmetic 3.97 578 | denial confession 1.03 579 | argument criticism 5.08 580 | aggression hostility 8.48 581 | hysteria confusion 6.33 582 | chemistry theory 3.17 583 | trial verdict 3.33 584 | comfort safety 5.8 585 | confidence self 3.12 586 | vision perception 6.88 587 | era decade 5.4 588 | biography fiction 1.38 589 | discussion argument 5.48 590 | code symbol 6.03 591 | danger disease 3 592 | accident catastrophe 5.9 593 | journey trip 8.88 594 | activity movement 7.15 595 | gossip news 5.22 596 | father god 3.57 597 | action course 5.45 598 | fever illness 7.65 599 | aviation flight 8.18 600 | game action 4.85 601 | molecule air 3.05 602 | home state 2.58 603 | word literature 4.77 604 | adult guardian 6.9 605 | newspaper information 5.65 606 | communication television 5.6 607 | cousin uncle 4.63 608 | author reader 1.6 609 | guy partner 3.57 610 | area corner 2.07 611 | ballad song 7.53 612 | wall decoration 2.62 613 | word page 2.92 614 | nurse scientist 2.08 615 | politician president 7.38 616 | president mayor 5.68 617 | book essay 4.72 618 | man warrior 4.72 619 | article journal 6.18 620 | breakfast supper 4.4 621 | crowd parade 3.93 622 | aisle hallway 6.75 623 | teacher rabbi 4.37 624 | hip lip 1.43 625 | book article 5.43 626 | room cell 4.58 627 | box booth 3.8 628 | daughter kid 4.17 629 | limb leg 6.9 630 | liver lung 2.7 631 | classroom hallway 2 632 | mountain ledge 3.73 633 | car elevator 1.03 634 | bed couch 3.42 635 | clothes button 2.3 636 | clothes coat 5.35 637 | kidney organ 6.17 638 | apple sauce 1.43 639 | chicken steak 3.73 640 | car hose 0.87 641 | tobacco cigarette 7.5 642 | student professor 1.95 643 | baby daughter 5 644 | pipe cigar 6.03 645 | milk juice 4.05 646 | box cigar 1.25 647 | apartment hotel 3.33 648 | cup cone 3.17 649 | horse ox 3.02 650 | throat nose 2.8 651 | bone teeth 4.17 652 | bone elbow 3.78 653 | bacon bean 1.22 654 | cup jar 5.13 655 | proof fact 7.3 656 | appointment engagement 6.75 657 | birthday year 1.67 658 | word clue 2.53 659 | author creator 8.02 660 | atom carbon 3.1 661 | archbishop bishop 7.05 662 | letter paragraph 4 663 | page paragraph 3.03 664 | steeple chapel 7.08 665 | muscle bone 3.65 666 | muscle tongue 5 667 | boy soldier 2.15 668 | belly abdomen 8.13 669 | guy girl 3.33 670 | bed chair 3.5 671 | clothes jacket 5.15 672 | gun knife 3.65 673 | tin metal 5.63 674 | bottle container 7.93 675 | hen turkey 6.13 676 | meat bread 1.67 677 | arm bone 3.83 678 | neck spine 5.32 679 | apple lemon 4.05 680 | agony grief 7.63 681 | assignment task 8.7 682 | night dawn 2.95 683 | dinner soup 3.72 684 | calf bull 4.93 685 | snow storm 4.8 686 | nail hand 3.42 687 | dog horse 2.38 688 | arm neck 1.58 689 | ball glove 1.75 690 | flu fever 6.08 691 | fee salary 3.72 692 | nerve brain 3.88 693 | beast animal 7.83 694 | dinner chicken 2.85 695 | girl maid 2.93 696 | child boy 5.75 697 | alcohol wine 7.42 698 | nose mouth 3.73 699 | street car 2.38 700 | bell door 2.2 701 | box hat 1.3 702 | belief impression 5.95 703 | bias opinion 5.6 704 | attention awareness 8.73 705 | anger mood 4.1 706 | elegance style 5.72 707 | beauty age 1.58 708 | book theme 2.58 709 | friend mother 2.53 710 | vitamin iron 5.55 711 | car factory 2.75 712 | pact condition 2.45 713 | chapter choice 0.48 714 | arithmetic rhythm 2.35 715 | winner presence 1.08 716 | belief flower 0.4 717 | winner goal 3.23 718 | trick size 0.48 719 | choice vein 0.98 720 | hymn conquest 0.68 721 | endurance band 0.4 722 | jail choice 1.08 723 | condition boy 0.48 724 | flower endurance 0.4 725 | hole agreement 0.3 726 | doctor temper 0.48 727 | fraternity door 0.68 728 | task woman 0.68 729 | fraternity baseball 0.88 730 | cent size 0.4 731 | presence door 0.48 732 | mouse management 0.48 733 | task highway 0.48 734 | liquor century 0.4 735 | task straw 0.68 736 | island task 0.3 737 | night chapter 0.48 738 | pollution president 0.68 739 | gun trick 0.48 740 | bath trick 0.58 741 | diet apple 1.18 742 | cent wife 0.58 743 | chapter tail 0.3 744 | course stomach 0.58 745 | hymn straw 0.4 746 | dentist colonel 0.4 747 | wife straw 0.4 748 | hole wife 0.68 749 | pupil president 0.78 750 | bath wife 0.48 751 | people cent 0.48 752 | formula log 1.77 753 | woman fur 0.58 754 | apple sunshine 0.58 755 | gun dawn 1.18 756 | meal waist 0.98 757 | camera president 0.48 758 | liquor band 0.68 759 | stomach vein 2.35 760 | gun fur 0.3 761 | couch baseball 0.88 762 | worker camera 0.68 763 | deck mouse 0.48 764 | rice boy 0.4 765 | people gun 0.68 766 | cliff tail 0.3 767 | ankle window 0.3 768 | princess island 0.3 769 | container mouse 0.3 770 | wagon container 2.65 771 | people balloon 0.48 772 | dollar people 0.4 773 | bath balloon 0.4 774 | stomach bedroom 0.4 775 | bicycle bedroom 0.4 776 | log bath 0.4 777 | bowl tail 0.48 778 | go come 2.42 779 | take steal 6.18 780 | listen hear 8.17 781 | think rationalize 8.25 782 | occur happen 9.32 783 | vanish disappear 9.8 784 | multiply divide 1.75 785 | plead beg 9.08 786 | begin originate 8.2 787 | protect defend 9.13 788 | kill destroy 5.9 789 | create make 8.72 790 | accept reject 0.83 791 | ignore avoid 6.87 792 | carry bring 5.8 793 | leave enter 0.95 794 | choose elect 7.62 795 | lose fail 7.33 796 | encourage discourage 1.58 797 | achieve accomplish 8.57 798 | make construct 8.33 799 | listen obey 4.93 800 | inform notify 9.25 801 | receive give 1.47 802 | borrow beg 2.62 803 | take obtain 7.1 804 | advise recommend 8.1 805 | imitate portray 6.75 806 | win succeed 7.9 807 | think decide 5.13 808 | greet meet 6.17 809 | agree argue 0.77 810 | enjoy entertain 5.92 811 | destroy make 1.6 812 | save protect 6.58 813 | give lend 7.22 814 | understand know 7.47 815 | take receive 5.08 816 | accept acknowledge 6.88 817 | decide choose 8.87 818 | accept believe 6.75 819 | keep possess 8.27 820 | roam wander 8.83 821 | succeed fail 0.83 822 | spend save 0.55 823 | leave go 7.63 824 | come attend 8.1 825 | know believe 5.5 826 | gather meet 7.3 827 | make earn 7.62 828 | forget ignore 3.07 829 | multiply add 2.7 830 | shrink grow 0.23 831 | arrive leave 1.33 832 | succeed try 3.98 833 | accept deny 1.75 834 | arrive come 7.05 835 | agree differ 1.05 836 | send receive 1.08 837 | win dominate 5.68 838 | add divide 2.3 839 | kill choke 4.92 840 | acquire get 8.82 841 | participate join 7.7 842 | leave remain 2.53 843 | go enter 4 844 | take carry 5.23 845 | forget learn 1.18 846 | appoint elect 8.17 847 | engage marry 5.43 848 | ask pray 3.72 849 | go send 3.75 850 | take deliver 4.37 851 | speak hear 3.02 852 | analyze evaluate 8.03 853 | argue rationalize 4.2 854 | lose keep 1.05 855 | compare analyze 8.1 856 | disorganize organize 1.45 857 | go allow 3.62 858 | take possess 7.2 859 | learn listen 3.88 860 | destroy construct 0.92 861 | create build 8.48 862 | steal buy 1.13 863 | kill hang 4.45 864 | forget know 0.92 865 | create imagine 5.13 866 | do happen 4.23 867 | win accomplish 7.85 868 | give deny 1.43 869 | deserve earn 5.8 870 | get put 1.98 871 | locate find 8.73 872 | appear attend 6.28 873 | know comprehend 7.63 874 | pretend imagine 8.47 875 | satisfy please 7.67 876 | cherish keep 4.85 877 | argue differ 5.15 878 | overcome dominate 6.25 879 | behave obey 7.3 880 | cooperate participate 6.43 881 | achieve try 4.42 882 | fail discourage 3.33 883 | begin quit 1.28 884 | say participate 3.82 885 | come bring 2.42 886 | declare announce 9.08 887 | read comprehend 4.7 888 | take leave 2.47 889 | proclaim announce 8.18 890 | acquire obtain 8.57 891 | conclude decide 7.75 892 | please plead 2.98 893 | argue prove 4.83 894 | ask plead 6.47 895 | find disappear 0.77 896 | inspect examine 8.75 897 | verify justify 4.08 898 | assume predict 4.85 899 | learn evaluate 4.17 900 | argue justify 5 901 | make become 4.77 902 | discover originate 4.83 903 | achieve succeed 7.5 904 | give put 3.65 905 | understand listen 4.68 906 | expand grow 8.27 907 | borrow sell 1.73 908 | keep protect 5.4 909 | explain prove 4.1 910 | assume pretend 3.72 911 | agree please 4.13 912 | forgive forget 3.92 913 | clarify explain 8.33 914 | understand forgive 4.87 915 | remind forget 0.87 916 | get remain 1.6 917 | realize discover 7.47 918 | require inquire 1.82 919 | ignore ask 1.07 920 | think inquire 4.77 921 | reject avoid 4.78 922 | argue persuade 6.23 923 | pursue persuade 3.17 924 | accept forgive 3.73 925 | do quit 1.17 926 | investigate examine 8.1 927 | discuss explain 6.67 928 | owe lend 2.32 929 | explore discover 8.48 930 | complain argue 4.8 931 | withdraw reject 6.38 932 | keep borrow 2.25 933 | beg ask 6 934 | arrange organize 8.27 935 | reduce shrink 8.02 936 | speak acknowledge 4.67 937 | give borrow 2.22 938 | kill defend 2.63 939 | disappear shrink 5.8 940 | deliver carry 3.88 941 | breathe choke 1.37 942 | acknowledge notify 5.3 943 | become seem 2.63 944 | pretend seem 4.68 945 | accomplish become 4 946 | contemplate think 8.82 947 | determine predict 5.8 948 | please entertain 5 949 | remain retain 5.75 950 | pretend portray 7.03 951 | forget retain 0.63 952 | want choose 4.78 953 | lose get 0.77 954 | try think 2.62 955 | become appear 4.77 956 | leave ignore 4.42 957 | accept recommend 2.75 958 | leave wander 3.57 959 | keep give 1.05 960 | give allow 5.15 961 | bring send 2.97 962 | absorb learn 5.48 963 | acquire find 6.38 964 | leave appear 0.97 965 | create destroy 0.63 966 | begin go 7.42 967 | get buy 5.08 968 | collect save 6.67 969 | replace restore 5.73 970 | join add 8.1 971 | join marry 5.35 972 | accept deliver 1.58 973 | attach join 7.75 974 | put hang 3 975 | go sell 0.97 976 | communicate pray 3.55 977 | give steal 0.5 978 | add build 4.92 979 | bring restore 2.62 980 | comprehend satisfy 2.55 981 | portray decide 1.18 982 | organize become 1.77 983 | give know 0.88 984 | say verify 4.9 985 | cooperate join 5.18 986 | arrange require 0.98 987 | borrow want 1.77 988 | investigate pursue 7.15 989 | ignore explore 0.4 990 | bring complain 0.98 991 | enter owe 0.68 992 | portray notify 0.78 993 | remind sell 0.4 994 | absorb possess 5 995 | join acquire 2.85 996 | send attend 1.67 997 | gather attend 4.8 998 | absorb withdraw 2.97 999 | attend arrive 6.08 1000 | -------------------------------------------------------------------------------- /evaluation/wordrel353.txt: -------------------------------------------------------------------------------- 1 | computer keyboard 7.62 2 | Jerusalem Israel 8.46 3 | planet galaxy 8.11 4 | canyon landscape 7.53 5 | OPEC country 5.63 6 | day summer 3.94 7 | day dawn 7.53 8 | country citizen 7.31 9 | planet people 5.75 10 | environment ecology 8.81 11 | Maradona football 8.62 12 | OPEC oil 8.59 13 | money bank 8.50 14 | computer software 8.50 15 | law lawyer 8.38 16 | weather forecast 8.34 17 | network hardware 8.31 18 | nature environment 8.31 19 | FBI investigation 8.31 20 | money wealth 8.27 21 | psychology Freud 8.21 22 | news report 8.16 23 | war troops 8.13 24 | physics proton 8.12 25 | bank money 8.12 26 | stock market 8.08 27 | planet constellation 8.06 28 | credit card 8.06 29 | hotel reservation 8.03 30 | closet clothes 8.00 31 | soap opera 7.94 32 | planet astronomer 7.94 33 | planet space 7.92 34 | movie theater 7.92 35 | treatment recovery 7.91 36 | baby mother 7.85 37 | money deposit 7.73 38 | television film 7.72 39 | psychology mind 7.69 40 | game team 7.69 41 | admission ticket 7.69 42 | Jerusalem Palestinian 7.65 43 | Arafat terror 7.65 44 | boxing round 7.61 45 | computer internet 7.58 46 | money property 7.57 47 | tennis racket 7.56 48 | telephone communication 7.50 49 | currency market 7.50 50 | psychology cognition 7.48 51 | seafood sea 7.47 52 | book paper 7.46 53 | book library 7.46 54 | psychology depression 7.42 55 | fighting defeating 7.41 56 | movie star 7.38 57 | hundred percent 7.38 58 | dollar profit 7.38 59 | money possession 7.29 60 | cup drink 7.25 61 | psychology health 7.23 62 | summer drought 7.16 63 | investor earning 7.13 64 | company stock 7.08 65 | stroke hospital 7.03 66 | liability insurance 7.03 67 | game victory 7.03 68 | psychology anxiety 7.00 69 | game defeat 6.97 70 | FBI fingerprint 6.94 71 | money withdrawal 6.88 72 | psychology fear 6.85 73 | drug abuse 6.85 74 | concert virtuoso 6.81 75 | computer laboratory 6.78 76 | love sex 6.77 77 | problem challenge 6.75 78 | movie critic 6.73 79 | Arafat peace 6.73 80 | bed closet 6.72 81 | lawyer evidence 6.69 82 | fertility egg 6.69 83 | precedent law 6.65 84 | minister party 6.63 85 | psychology clinic 6.58 86 | cup coffee 6.58 87 | water seepage 6.56 88 | government crisis 6.56 89 | space world 6.53 90 | dividend calculation 6.48 91 | victim emergency 6.47 92 | luxury car 6.47 93 | tool implement 6.46 94 | competition price 6.44 95 | psychology doctor 6.42 96 | gender equality 6.41 97 | listing category 6.38 98 | video archive 6.34 99 | oil stock 6.34 100 | governor office 6.34 101 | discovery space 6.34 102 | record number 6.31 103 | brother monk 6.27 104 | production crew 6.25 105 | nature man 6.25 106 | family planning 6.25 107 | disaster area 6.25 108 | food preparation 6.22 109 | preservation world 6.19 110 | movie popcorn 6.19 111 | lover quarrel 6.19 112 | game series 6.19 113 | dollar loss 6.09 114 | weapon secret 6.06 115 | shower flood 6.03 116 | registration arrangement 6.00 117 | arrival hotel 6.00 118 | announcement warning 6.00 119 | game round 5.97 120 | baseball season 5.97 121 | drink mouth 5.96 122 | life lesson 5.94 123 | grocery money 5.94 124 | energy crisis 5.94 125 | reason criterion 5.91 126 | equipment maker 5.91 127 | cup liquid 5.90 128 | deployment withdrawal 5.88 129 | tiger zoo 5.87 130 | journey car 5.85 131 | money laundering 5.65 132 | summer nature 5.63 133 | decoration valor 5.63 134 | Mars scientist 5.63 135 | alcohol chemistry 5.54 136 | disability death 5.47 137 | change attitude 5.44 138 | arrangement accommodation 5.41 139 | territory surface 5.34 140 | size prominence 5.31 141 | exhibit memorabilia 5.31 142 | credit information 5.31 143 | territory kilometer 5.28 144 | death row 5.25 145 | doctor liability 5.19 146 | impartiality interest 5.16 147 | energy laboratory 5.09 148 | secretary senate 5.06 149 | death inmate 5.03 150 | monk oracle 5.00 151 | cup food 5.00 152 | journal association 4.97 153 | street children 4.94 154 | car flight 4.94 155 | space chemistry 4.88 156 | situation conclusion 4.81 157 | word similarity 4.75 158 | peace plan 4.75 159 | consumer energy 4.75 160 | ministry culture 4.69 161 | smart student 4.62 162 | investigation effort 4.59 163 | image surface 4.56 164 | life term 4.50 165 | start match 4.47 166 | computer news 4.47 167 | board recommendation 4.47 168 | lad brother 4.46 169 | observation architecture 4.38 170 | coast hill 4.38 171 | deployment departure 4.25 172 | benchmark index 4.25 173 | attempt peace 4.25 174 | consumer confidence 4.13 175 | start year 4.06 176 | focus life 4.06 177 | development issue 3.97 178 | theater history 3.91 179 | situation isolation 3.88 180 | profit warning 3.88 181 | media trading 3.88 182 | chance credibility 3.88 183 | precedent information 3.85 184 | architecture century 3.78 185 | population development 3.75 186 | stock live 3.73 187 | peace atmosphere 3.69 188 | morality marriage 3.69 189 | minority peace 3.69 190 | atmosphere landscape 3.69 191 | report gain 3.63 192 | music project 3.63 193 | seven series 3.56 194 | experience music 3.47 195 | school center 3.44 196 | five month 3.38 197 | announcement production 3.38 198 | morality importance 3.31 199 | money operation 3.31 200 | delay news 3.31 201 | governor interview 3.25 202 | practice institution 3.19 203 | century nation 3.16 204 | coast forest 3.15 205 | shore woodland 3.08 206 | drink car 3.04 207 | president medal 3.00 208 | prejudice recognition 3.00 209 | viewer serial 2.97 210 | peace insurance 2.94 211 | Mars water 2.94 212 | media gain 2.88 213 | precedent cognition 2.81 214 | announcement effort 2.75 215 | line insurance 2.69 216 | crane implement 2.69 217 | drink mother 2.65 218 | opera industry 2.63 219 | volunteer motto 2.56 220 | listing proximity 2.56 221 | precedent collection 2.50 222 | cup article 2.40 223 | sign recess 2.38 224 | problem airport 2.38 225 | reason hypertension 2.31 226 | direction combination 2.25 227 | Wednesday news 2.22 228 | glass magician 2.08 229 | cemetery woodland 2.08 230 | possibility girl 1.94 231 | cup substance 1.92 232 | forest graveyard 1.85 233 | stock egg 1.81 234 | month hotel 1.81 235 | energy secretary 1.81 236 | precedent group 1.77 237 | production hike 1.75 238 | stock phone 1.62 239 | holy sex 1.62 240 | stock CD 1.31 241 | drink ear 1.31 242 | delay racism 1.19 243 | stock life 0.92 244 | stock jaguar 0.92 245 | monk slave 0.92 246 | lad wizard 0.92 247 | sugar approach 0.88 248 | rooster voyage 0.62 249 | noon string 0.54 250 | chord smile 0.54 251 | professor cucumber 0.31 252 | king cabbage 0.23 253 | -------------------------------------------------------------------------------- /evaluation/wordsim.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import sys 3 | 4 | from ranking import * 5 | 6 | ''' Read all the word vectors and normalize them ''' 7 | def read_word_vectors(filename): 8 | word_vecs = {} 9 | if filename.endswith('.gz'): file_object = gzip.open(filename, 'r') 10 | else: file_object = open(filename, 'r') 11 | 12 | for line_num, line in enumerate(file_object): 13 | line = line.strip().lower() 14 | word = line.split()[0] 15 | word_vecs[word] = numpy.zeros(len(line.split())-1, dtype=float) 16 | for index, vec_val in enumerate(line.split()[1:]): 17 | word_vecs[word][index] = float(vec_val) 18 | ''' normalize weight vector ''' 19 | word_vecs[word] /= math.sqrt((word_vecs[word]**2).sum() + 1e-6) 20 | 21 | sys.stderr.write("Vectors read from: "+filename+" \n") 22 | return word_vecs 23 | 24 | 25 | if __name__=='__main__': 26 | word_vec_file = sys.argv[1] 27 | word_sim_file = sys.argv[2] 28 | 29 | print "Evaluating with : " + word_sim_file 30 | word_vecs = read_word_vectors(word_vec_file) 31 | print "%15s" % "Num Pairs", "%15s" % "Not found", "%15s" % "Rho" 32 | manual_dict, auto_dict = ({}, {}) 33 | not_found, total_size = (0, 0) 34 | for line in open(word_sim_file,'r'): 35 | line = line.strip().lower() 36 | word1, word2, val = line.split() 37 | if word1 in word_vecs and word2 in word_vecs: 38 | manual_dict[(word1, word2)] = float(val) 39 | auto_dict[(word1, word2)] = cosine_sim(word_vecs[word1], word_vecs[word2]) 40 | else: 41 | not_found += 1 42 | total_size += 1 43 | print "%15s" % str(total_size), "%15s" % str(not_found), 44 | print "%15.4f" % spearmans_rho(assign_ranks(manual_dict), assign_ranks(auto_dict)) 45 | print '' 46 | -------------------------------------------------------------------------------- /evaluation/wordsim353-full.txt: -------------------------------------------------------------------------------- 1 | love sex 6.77 2 | tiger cat 7.35 3 | tiger tiger 10.00 4 | book paper 7.46 5 | computer keyboard 7.62 6 | computer internet 7.58 7 | plane car 5.77 8 | train car 6.31 9 | telephone communication 7.50 10 | television radio 6.77 11 | media radio 7.42 12 | drug abuse 6.85 13 | bread butter 6.19 14 | cucumber potato 5.92 15 | doctor nurse 7.00 16 | professor doctor 6.62 17 | student professor 6.81 18 | smart student 4.62 19 | smart stupid 5.81 20 | company stock 7.08 21 | stock market 8.08 22 | stock phone 1.62 23 | stock CD 1.31 24 | stock jaguar 0.92 25 | stock egg 1.81 26 | fertility egg 6.69 27 | stock live 3.73 28 | stock life 0.92 29 | book library 7.46 30 | bank money 8.12 31 | wood forest 7.73 32 | money cash 9.15 33 | professor cucumber 0.31 34 | king cabbage 0.23 35 | king queen 8.58 36 | king rook 5.92 37 | bishop rabbi 6.69 38 | Jerusalem Israel 8.46 39 | Jerusalem Palestinian 7.65 40 | holy sex 1.62 41 | fuck sex 9.44 42 | Maradona football 8.62 43 | football soccer 9.03 44 | football basketball 6.81 45 | football tennis 6.63 46 | tennis racket 7.56 47 | Arafat peace 6.73 48 | Arafat terror 7.65 49 | Arafat Jackson 2.50 50 | law lawyer 8.38 51 | movie star 7.38 52 | movie popcorn 6.19 53 | movie critic 6.73 54 | movie theater 7.92 55 | physics proton 8.12 56 | physics chemistry 7.35 57 | space chemistry 4.88 58 | alcohol chemistry 5.54 59 | vodka gin 8.46 60 | vodka brandy 8.13 61 | drink car 3.04 62 | drink ear 1.31 63 | drink mouth 5.96 64 | drink eat 6.87 65 | baby mother 7.85 66 | drink mother 2.65 67 | car automobile 8.94 68 | gem jewel 8.96 69 | journey voyage 9.29 70 | boy lad 8.83 71 | coast shore 9.10 72 | asylum madhouse 8.87 73 | magician wizard 9.02 74 | midday noon 9.29 75 | furnace stove 8.79 76 | food fruit 7.52 77 | bird cock 7.10 78 | bird crane 7.38 79 | tool implement 6.46 80 | brother monk 6.27 81 | crane implement 2.69 82 | lad brother 4.46 83 | journey car 5.85 84 | monk oracle 5.00 85 | cemetery woodland 2.08 86 | food rooster 4.42 87 | coast hill 4.38 88 | forest graveyard 1.85 89 | shore woodland 3.08 90 | monk slave 0.92 91 | coast forest 3.15 92 | lad wizard 0.92 93 | chord smile 0.54 94 | glass magician 2.08 95 | noon string 0.54 96 | rooster voyage 0.62 97 | money dollar 8.42 98 | money cash 9.08 99 | money currency 9.04 100 | money wealth 8.27 101 | money property 7.57 102 | money possession 7.29 103 | money bank 8.50 104 | money deposit 7.73 105 | money withdrawal 6.88 106 | money laundering 5.65 107 | money operation 3.31 108 | tiger jaguar 8.00 109 | tiger feline 8.00 110 | tiger carnivore 7.08 111 | tiger mammal 6.85 112 | tiger animal 7.00 113 | tiger organism 4.77 114 | tiger fauna 5.62 115 | tiger zoo 5.87 116 | psychology psychiatry 8.08 117 | psychology anxiety 7.00 118 | psychology fear 6.85 119 | psychology depression 7.42 120 | psychology clinic 6.58 121 | psychology doctor 6.42 122 | psychology Freud 8.21 123 | psychology mind 7.69 124 | psychology health 7.23 125 | psychology science 6.71 126 | psychology discipline 5.58 127 | psychology cognition 7.48 128 | planet star 8.45 129 | planet constellation 8.06 130 | planet moon 8.08 131 | planet sun 8.02 132 | planet galaxy 8.11 133 | planet space 7.92 134 | planet astronomer 7.94 135 | precedent example 5.85 136 | precedent information 3.85 137 | precedent cognition 2.81 138 | precedent law 6.65 139 | precedent collection 2.50 140 | precedent group 1.77 141 | precedent antecedent 6.04 142 | cup coffee 6.58 143 | cup tableware 6.85 144 | cup article 2.40 145 | cup artifact 2.92 146 | cup object 3.69 147 | cup entity 2.15 148 | cup drink 7.25 149 | cup food 5.00 150 | cup substance 1.92 151 | cup liquid 5.90 152 | jaguar cat 7.42 153 | jaguar car 7.27 154 | energy secretary 1.81 155 | secretary senate 5.06 156 | energy laboratory 5.09 157 | computer laboratory 6.78 158 | weapon secret 6.06 159 | FBI fingerprint 6.94 160 | FBI investigation 8.31 161 | investigation effort 4.59 162 | Mars water 2.94 163 | Mars scientist 5.63 164 | news report 8.16 165 | canyon landscape 7.53 166 | image surface 4.56 167 | discovery space 6.34 168 | water seepage 6.56 169 | sign recess 2.38 170 | Wednesday news 2.22 171 | mile kilometer 8.66 172 | computer news 4.47 173 | territory surface 5.34 174 | atmosphere landscape 3.69 175 | president medal 3.00 176 | war troops 8.13 177 | record number 6.31 178 | skin eye 6.22 179 | Japanese American 6.50 180 | theater history 3.91 181 | volunteer motto 2.56 182 | prejudice recognition 3.00 183 | decoration valor 5.63 184 | century year 7.59 185 | century nation 3.16 186 | delay racism 1.19 187 | delay news 3.31 188 | minister party 6.63 189 | peace plan 4.75 190 | minority peace 3.69 191 | attempt peace 4.25 192 | government crisis 6.56 193 | deployment departure 4.25 194 | deployment withdrawal 5.88 195 | energy crisis 5.94 196 | announcement news 7.56 197 | announcement effort 2.75 198 | stroke hospital 7.03 199 | disability death 5.47 200 | victim emergency 6.47 201 | treatment recovery 7.91 202 | journal association 4.97 203 | doctor personnel 5.00 204 | doctor liability 5.19 205 | liability insurance 7.03 206 | school center 3.44 207 | reason hypertension 2.31 208 | reason criterion 5.91 209 | hundred percent 7.38 210 | Harvard Yale 8.13 211 | hospital infrastructure 4.63 212 | death row 5.25 213 | death inmate 5.03 214 | lawyer evidence 6.69 215 | life death 7.88 216 | life term 4.50 217 | word similarity 4.75 218 | board recommendation 4.47 219 | governor interview 3.25 220 | OPEC country 5.63 221 | peace atmosphere 3.69 222 | peace insurance 2.94 223 | territory kilometer 5.28 224 | travel activity 5.00 225 | competition price 6.44 226 | consumer confidence 4.13 227 | consumer energy 4.75 228 | problem airport 2.38 229 | car flight 4.94 230 | credit card 8.06 231 | credit information 5.31 232 | hotel reservation 8.03 233 | grocery money 5.94 234 | registration arrangement 6.00 235 | arrangement accommodation 5.41 236 | month hotel 1.81 237 | type kind 8.97 238 | arrival hotel 6.00 239 | bed closet 6.72 240 | closet clothes 8.00 241 | situation conclusion 4.81 242 | situation isolation 3.88 243 | impartiality interest 5.16 244 | direction combination 2.25 245 | street place 6.44 246 | street avenue 8.88 247 | street block 6.88 248 | street children 4.94 249 | listing proximity 2.56 250 | listing category 6.38 251 | cell phone 7.81 252 | production hike 1.75 253 | benchmark index 4.25 254 | media trading 3.88 255 | media gain 2.88 256 | dividend payment 7.63 257 | dividend calculation 6.48 258 | calculation computation 8.44 259 | currency market 7.50 260 | OPEC oil 8.59 261 | oil stock 6.34 262 | announcement production 3.38 263 | announcement warning 6.00 264 | profit warning 3.88 265 | profit loss 7.63 266 | dollar yen 7.78 267 | dollar buck 9.22 268 | dollar profit 7.38 269 | dollar loss 6.09 270 | computer software 8.50 271 | network hardware 8.31 272 | phone equipment 7.13 273 | equipment maker 5.91 274 | luxury car 6.47 275 | five month 3.38 276 | report gain 3.63 277 | investor earning 7.13 278 | liquid water 7.89 279 | baseball season 5.97 280 | game victory 7.03 281 | game team 7.69 282 | marathon sprint 7.47 283 | game series 6.19 284 | game defeat 6.97 285 | seven series 3.56 286 | seafood sea 7.47 287 | seafood food 8.34 288 | seafood lobster 8.70 289 | lobster food 7.81 290 | lobster wine 5.70 291 | food preparation 6.22 292 | video archive 6.34 293 | start year 4.06 294 | start match 4.47 295 | game round 5.97 296 | boxing round 7.61 297 | championship tournament 8.36 298 | fighting defeating 7.41 299 | line insurance 2.69 300 | day summer 3.94 301 | summer drought 7.16 302 | summer nature 5.63 303 | day dawn 7.53 304 | nature environment 8.31 305 | environment ecology 8.81 306 | nature man 6.25 307 | man woman 8.30 308 | man governor 5.25 309 | murder manslaughter 8.53 310 | soap opera 7.94 311 | opera performance 6.88 312 | life lesson 5.94 313 | focus life 4.06 314 | production crew 6.25 315 | television film 7.72 316 | lover quarrel 6.19 317 | viewer serial 2.97 318 | possibility girl 1.94 319 | population development 3.75 320 | morality importance 3.31 321 | morality marriage 3.69 322 | Mexico Brazil 7.44 323 | gender equality 6.41 324 | change attitude 5.44 325 | family planning 6.25 326 | opera industry 2.63 327 | sugar approach 0.88 328 | practice institution 3.19 329 | ministry culture 4.69 330 | problem challenge 6.75 331 | size prominence 5.31 332 | country citizen 7.31 333 | planet people 5.75 334 | development issue 3.97 335 | experience music 3.47 336 | music project 3.63 337 | glass metal 5.56 338 | aluminum metal 7.83 339 | chance credibility 3.88 340 | exhibit memorabilia 5.31 341 | concert virtuoso 6.81 342 | rock jazz 7.59 343 | museum theater 7.19 344 | observation architecture 4.38 345 | space world 6.53 346 | preservation world 6.19 347 | admission ticket 7.69 348 | shower thunderstorm 6.31 349 | shower flood 6.03 350 | weather forecast 8.34 351 | disaster area 6.25 352 | governor office 6.34 353 | architecture century 3.78 354 | -------------------------------------------------------------------------------- /evaluation/wordsim353.txt: -------------------------------------------------------------------------------- 1 | tiger cat 7.35 2 | tiger tiger 10.00 3 | plane car 5.77 4 | train car 6.31 5 | television radio 6.77 6 | media radio 7.42 7 | bread butter 6.19 8 | cucumber potato 5.92 9 | doctor nurse 7.00 10 | professor doctor 6.62 11 | student professor 6.81 12 | smart stupid 5.81 13 | wood forest 7.73 14 | money cash 9.15 15 | king queen 8.58 16 | king rook 5.92 17 | bishop rabbi 6.69 18 | fuck sex 9.44 19 | football soccer 9.03 20 | football basketball 6.81 21 | football tennis 6.63 22 | Arafat Jackson 2.50 23 | physics chemistry 7.35 24 | vodka gin 8.46 25 | vodka brandy 8.13 26 | drink eat 6.87 27 | car automobile 8.94 28 | gem jewel 8.96 29 | journey voyage 9.29 30 | boy lad 8.83 31 | coast shore 9.10 32 | asylum madhouse 8.87 33 | magician wizard 9.02 34 | midday noon 9.29 35 | furnace stove 8.79 36 | food fruit 7.52 37 | bird cock 7.10 38 | bird crane 7.38 39 | food rooster 4.42 40 | money dollar 8.42 41 | money currency 9.04 42 | tiger jaguar 8.00 43 | tiger feline 8.00 44 | tiger carnivore 7.08 45 | tiger mammal 6.85 46 | tiger animal 7.00 47 | tiger organism 4.77 48 | tiger fauna 5.62 49 | psychology psychiatry 8.08 50 | psychology science 6.71 51 | psychology discipline 5.58 52 | planet star 8.45 53 | planet moon 8.08 54 | planet sun 8.02 55 | precedent example 5.85 56 | precedent antecedent 6.04 57 | cup tableware 6.85 58 | cup artifact 2.92 59 | cup object 3.69 60 | cup entity 2.15 61 | jaguar cat 7.42 62 | jaguar car 7.27 63 | mile kilometer 8.66 64 | skin eye 6.22 65 | Japanese American 6.50 66 | century year 7.59 67 | announcement news 7.56 68 | doctor personnel 5.00 69 | Harvard Yale 8.13 70 | hospital infrastructure 4.63 71 | life death 7.88 72 | travel activity 5.00 73 | type kind 8.97 74 | street place 6.44 75 | street avenue 8.88 76 | street block 6.88 77 | cell phone 7.81 78 | dividend payment 7.63 79 | calculation computation 8.44 80 | profit loss 7.63 81 | dollar yen 7.78 82 | dollar buck 9.22 83 | phone equipment 7.13 84 | liquid water 7.89 85 | marathon sprint 7.47 86 | seafood food 8.34 87 | seafood lobster 8.70 88 | lobster food 7.81 89 | lobster wine 5.70 90 | championship tournament 8.36 91 | man woman 8.30 92 | man governor 5.25 93 | murder manslaughter 8.53 94 | opera performance 6.88 95 | Mexico Brazil 7.44 96 | glass metal 5.56 97 | aluminum metal 7.83 98 | rock jazz 7.59 99 | museum theater 7.19 100 | shower thunderstorm 6.31 101 | monk oracle 5.00 102 | cup food 5.00 103 | journal association 4.97 104 | street children 4.94 105 | car flight 4.94 106 | space chemistry 4.88 107 | situation conclusion 4.81 108 | word similarity 4.75 109 | peace plan 4.75 110 | consumer energy 4.75 111 | ministry culture 4.69 112 | smart student 4.62 113 | investigation effort 4.59 114 | image surface 4.56 115 | life term 4.50 116 | start match 4.47 117 | computer news 4.47 118 | board recommendation 4.47 119 | lad brother 4.46 120 | observation architecture 4.38 121 | coast hill 4.38 122 | deployment departure 4.25 123 | benchmark index 4.25 124 | attempt peace 4.25 125 | consumer confidence 4.13 126 | start year 4.06 127 | focus life 4.06 128 | development issue 3.97 129 | theater history 3.91 130 | situation isolation 3.88 131 | profit warning 3.88 132 | media trading 3.88 133 | chance credibility 3.88 134 | precedent information 3.85 135 | architecture century 3.78 136 | population development 3.75 137 | stock live 3.73 138 | peace atmosphere 3.69 139 | morality marriage 3.69 140 | minority peace 3.69 141 | atmosphere landscape 3.69 142 | report gain 3.63 143 | music project 3.63 144 | seven series 3.56 145 | experience music 3.47 146 | school center 3.44 147 | five month 3.38 148 | announcement production 3.38 149 | morality importance 3.31 150 | money operation 3.31 151 | delay news 3.31 152 | governor interview 3.25 153 | practice institution 3.19 154 | century nation 3.16 155 | coast forest 3.15 156 | shore woodland 3.08 157 | drink car 3.04 158 | president medal 3.00 159 | prejudice recognition 3.00 160 | viewer serial 2.97 161 | peace insurance 2.94 162 | Mars water 2.94 163 | media gain 2.88 164 | precedent cognition 2.81 165 | announcement effort 2.75 166 | line insurance 2.69 167 | crane implement 2.69 168 | drink mother 2.65 169 | opera industry 2.63 170 | volunteer motto 2.56 171 | listing proximity 2.56 172 | precedent collection 2.50 173 | cup article 2.40 174 | sign recess 2.38 175 | problem airport 2.38 176 | reason hypertension 2.31 177 | direction combination 2.25 178 | Wednesday news 2.22 179 | glass magician 2.08 180 | cemetery woodland 2.08 181 | possibility girl 1.94 182 | cup substance 1.92 183 | forest graveyard 1.85 184 | stock egg 1.81 185 | month hotel 1.81 186 | energy secretary 1.81 187 | precedent group 1.77 188 | production hike 1.75 189 | stock phone 1.62 190 | holy sex 1.62 191 | stock CD 1.31 192 | drink ear 1.31 193 | delay racism 1.19 194 | stock life 0.92 195 | stock jaguar 0.92 196 | monk slave 0.92 197 | lad wizard 0.92 198 | sugar approach 0.88 199 | rooster voyage 0.62 200 | noon string 0.54 201 | chord smile 0.54 202 | professor cucumber 0.31 203 | king cabbage 0.23 204 | -------------------------------------------------------------------------------- /ranking.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy 3 | from operator import itemgetter 4 | from numpy.linalg import norm 5 | import yaml 6 | from sklearn.cluster import KMeans 7 | from sklearn import metrics 8 | import numpy as np 9 | 10 | EPSILON = 1e-6 11 | 12 | def euclidean(vec1, vec2): 13 | diff = vec1 - vec2 14 | return math.sqrt(diff.dot(diff)) 15 | 16 | def cosine_sim(vec1, vec2): 17 | vec1 += EPSILON * numpy.ones(len(vec1)) 18 | vec2 += EPSILON * numpy.ones(len(vec1)) 19 | return vec1.dot(vec2)/(norm(vec1)*norm(vec2)) 20 | 21 | def assign_ranks(item_dict): 22 | ranked_dict = {} 23 | sorted_list = [(key, val) for (key, val) in sorted(item_dict.items(), 24 | key=itemgetter(1), 25 | reverse=True)] 26 | for i, (key, val) in enumerate(sorted_list): 27 | same_val_indices = [] 28 | for j, (key2, val2) in enumerate(sorted_list): 29 | if val2 == val: 30 | same_val_indices.append(j+1) 31 | if len(same_val_indices) == 1: 32 | ranked_dict[key] = i+1 33 | else: 34 | ranked_dict[key] = 1.*sum(same_val_indices)/len(same_val_indices) 35 | return ranked_dict 36 | 37 | def correlation(dict1, dict2): 38 | avg1 = 1.*sum([val for key, val in dict1.iteritems()])/len(dict1) 39 | avg2 = 1.*sum([val for key, val in dict2.iteritems()])/len(dict2) 40 | numr, den1, den2 = (0., 0., 0.) 41 | for val1, val2 in zip(dict1.itervalues(), dict2.itervalues()): 42 | numr += (val1 - avg1) * (val2 - avg2) 43 | den1 += (val1 - avg1) ** 2 44 | den2 += (val2 - avg2) ** 2 45 | return numr / math.sqrt(den1 * den2) 46 | 47 | def spearmans_rho(ranked_dict1, ranked_dict2): 48 | assert len(ranked_dict1) == len(ranked_dict2) 49 | if len(ranked_dict1) == 0 or len(ranked_dict2) == 0: 50 | return 0. 51 | x_avg = 1.*sum([val for val in ranked_dict1.values()])/len(ranked_dict1) 52 | y_avg = 1.*sum([val for val in ranked_dict2.values()])/len(ranked_dict2) 53 | num, d_x, d_y = (0., 0., 0.) 54 | for key in ranked_dict1.keys(): 55 | xi = ranked_dict1[key] 56 | yi = ranked_dict2[key] 57 | num += (xi-x_avg)*(yi-y_avg) 58 | d_x += (xi-x_avg)**2 59 | d_y += (yi-y_avg)**2 60 | return num/(math.sqrt(d_x*d_y)) 61 | 62 | 63 | def eval_category(word_vecs): 64 | labels_true = [] 65 | labels_word = [] 66 | 67 | cat0 = yaml.load(open('evaluation/mcrae_typicality.yaml')) 68 | 69 | emb = {} 70 | nn = 0 71 | for line in word_vecs: 72 | emb[nn] = line 73 | nn += 1 74 | 75 | cat = {} 76 | vv = {} 77 | num = 0 78 | for i in cat0: 79 | if not i in cat: 80 | cat[i] = [] 81 | vv[i] = num 82 | num += 1 83 | for j in cat0[i]: 84 | if j in emb.keys(): 85 | cat[i].append(j) 86 | labels_true.append(vv[i]) 87 | labels_word.append(j) 88 | 89 | X = [] 90 | for w in labels_word: 91 | X.append(emb[w]) 92 | 93 | X = np.array(X) 94 | 95 | kmeans = KMeans(n_clusters=41, random_state=0).fit(X) 96 | # print kmeans.labels_ 97 | labels_pred = list(kmeans.labels_) 98 | 99 | r1 = metrics.adjusted_rand_score(labels_true, labels_pred), 100 | r2 = metrics.adjusted_mutual_info_score(labels_true, labels_pred), 101 | r3 = metrics.normalized_mutual_info_score(labels_true, labels_pred) 102 | 103 | return r1, r2, r3 -------------------------------------------------------------------------------- /readme: -------------------------------------------------------------------------------- 1 | The code is for paper 'Associative Multichannel Autoencoder for Multimodal Word Representation' 2 | 3 | AE.py is basic multichannel autoencoder model 4 | AE_brain.py is associative multichannel autoencoder model 5 | AE_brain_weight_val.py is associative multichannel autoencoder model with different value weight for each input modality 6 | AE_brain_weight_vec.py is associative multichannel autoencoder model with different vector weight for each input modality 7 | 8 | tune_*.sh is training and evaluation script 9 | -------------------------------------------------------------------------------- /tune_AE.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python AE.py --train-data data/glove-vgg-sound-full.txt --text-dim 300 --image-dim 128 --text-dim1 250 --text-dim2 150 --image-dim1 90 --image-dim2 60 --sound-dim 128 --sound-dim1 90 --sound-dim2 90 --multi-dim 300 --batch-size 64 --epoch 300 --outmodel result/ae-m300-250-150 --gpu 1 > log.300-250-150--128-90-60--300 4 | 5 | 6 | -------------------------------------------------------------------------------- /tune_AE_brain.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python AE_brain.py --total-data data/glove-vgg-sound-full.txt --train-data data/glove_vgg_sound_m.txt --brain-data data/ass_emb_m.txt --text-dim 300 --image-dim 128 --text-dim1 250 --text-dim2 150 --image-dim1 90 --image-dim2 60 --sound-dim 128 --sound-dim1 90 --sound-dim2 60 --multi-dim 300 --brain-dim1 350 --brain-dim 556 --batch-size 64 --epoch 600 --load-model result/ae-m300-250-150.parameters-100 --outmodel result_brain/ae_brain_m-m300 --gpu 1 > log.ae_brain2_rel_m 3 | 4 | -------------------------------------------------------------------------------- /tune_AE_brain_weight_val.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python AE_brain_weight_val.py --total-data data/glove-vgg-sound-full.txt --train-data data/glove_vgg_sound_m.txt --brain-data data/ass_emb_m.txt --text-dim 300 --image-dim 128 --text-dim1 250 --text-dim2 150 --image-dim1 90 --image-dim2 60 --sound-dim 128 --sound-dim1 90 --sound-dim2 60 --multi-dim 300 --brain-dim1 350 --brain-dim 556 --batch-size 64 --epoch 600 --load-model result/ae-m300-250-150.parameters-100 --outmodel result_brain_weight/ae_brain_m-m300 --gpu 1 > log.ae_brain_weight_val_rel_m 3 | 4 | -------------------------------------------------------------------------------- /tune_AE_brain_weight_vec.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python AE_brain_weight_vec.py --total-data data/glove-vgg-sound-full.txt --train-data data/glove_vgg_sound_m.txt --brain-data data/ass_emb_m.txt --text-dim 300 --image-dim 128 --text-dim1 250 --text-dim2 150 --image-dim1 90 --image-dim2 60 --sound-dim 128 --sound-dim1 90 --sound-dim2 60 --multi-dim 300 --brain-dim1 350 --brain-dim 556 --batch-size 64 --epoch 600 --load-model result/ae-m300-250-150.parameters-100 --outmodel result_brain_weight/ae_brain_m-m300 --gpu 1 > log.ae_brain_weight_vec_rel_m 3 | 4 | --------------------------------------------------------------------------------