├── .gitignore ├── README.md ├── configs ├── caliskan.json ├── compare_embeddings.json └── histwords.json ├── images ├── google_news_reddit.png └── inst_weap_science_art.png ├── read_config.py ├── requirements.txt ├── results └── caliskan.json ├── results_compare_embeddings.json ├── results_histwords.json ├── run_tests.py ├── sgns-to-txt.py └── weat.py /.gitignore: -------------------------------------------------------------------------------- 1 | embeddings/ 2 | *.pyc 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compare-embedding-bias 2 | 3 | Compare bias in word embeddings (over time, using different algorithms, using different corpora, before/after debiasing) using Word Embedding Association Tests (WEATs). Results are stored as JSON -- examples of graphing these results can be found in this colab notebook: 4 | https://colab.research.google.com/drive/1WNdOOmEenxtDhG-PRJ3K79HXBzZ-Nt-Q 5 | 6 | The WEAT statistic was developed by Caliskan et al. https://purehost.bath.ac.uk/ws/portalfiles/portal/168480066/CaliskanEtAl_authors_full.pdf 7 | 8 | ![Compare biases in Google News vs Reddit](images/google_news_reddit.png) 9 | 10 | ![Compare biases over time](images/inst_weap_science_art.png) 11 | 12 | 13 | # Requirements: 14 | - Python 3 15 | - Gensim 16 | - Numpy 17 | - cPickle 18 | - json 19 | - scipy 20 | 21 | Install with: 22 | 23 | $ pip install -r requirements.txt 24 | 25 | ## Quick Start 26 | 27 | ### Replicate Caliskan Results 28 | 29 | 1. Download the the word embedding used in the original research paper: Word2Vec Google News pretrained embeddings https://code.google.com/archive/p/word2vec/ 30 | 31 | 2. Place the embedding inside a directory (EX: `embeddings`) 32 | 33 | ``` 34 | python run_tests.py embeddings/GoogleNews-vectors-negative300.bin 35 | ``` 36 | 37 | ## Embeddings 38 | 39 | ### Examples of word embeddings to examine: 40 | 41 | Word2Vec: https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit 42 | 43 | GloVe (Common Crawl 840B): http://nlp.stanford.edu/data/glove.840B.300d.zip 44 | 45 | GloVe (Twitter 2B): http://nlp.stanford.edu/data/glove.twitter.27B.zip 46 | 47 | ### Example of word embeddings to examine bias over time: 48 | 49 | HistWords: https://nlp.stanford.edu/projects/histwords/ 50 | 51 | *Note:* the original histwords word embeddings are not compatible with gensim. Run sgns-to-txt to convert to a format compatible with gensim. 52 | 53 | Example: 54 | 55 | ``` 56 | python sgns-to-txt.py embeddings/sgns-fiction 57 | ``` 58 | 59 | # Example Commands 60 | ``` 61 | python weat.py configs/histwords.json 62 | python weat.py configs/compare_embeddings.json 63 | ``` 64 | -------------------------------------------------------------------------------- /configs/caliskan.json: -------------------------------------------------------------------------------- 1 | { "tests": 2 | { 3 | "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)":{ 4 | "targets_desc":"Flowers vs Insects (25)", 5 | "attributes_desc":"Pleasant vs Unpleasant (25)", 6 | "X":[ 7 | "aster", 8 | "clover", 9 | "hyacinth", 10 | "marigold", 11 | "poppy", 12 | "azalea", 13 | "crocus", 14 | "iris", 15 | "orchid", 16 | "rose", 17 | "bluebell", 18 | "daffodil", 19 | "lilac", 20 | "pansy", 21 | "tulip", 22 | "buttercup", 23 | "daisy", 24 | "lily", 25 | "peony", 26 | "violet", 27 | "carnation", 28 | "gladiola", 29 | "magnolia", 30 | "petunia", 31 | "zinnia" 32 | ], 33 | "Y":[ 34 | "ant", 35 | "caterpillar", 36 | "flea", 37 | "locust", 38 | "spider", 39 | "bedbug", 40 | "centipede", 41 | "fly", 42 | "maggot", 43 | "tarantula", 44 | "bee", 45 | "cockroach", 46 | "gnat", 47 | "mosquito", 48 | "termite", 49 | "beetle", 50 | "cricket", 51 | "hornet", 52 | "moth", 53 | "wasp", 54 | "blackfly", 55 | "dragonfly", 56 | "horsefly", 57 | "roach", 58 | "weevil" 59 | ], 60 | "A":[ 61 | "caress", 62 | "freedom", 63 | "health", 64 | "love", 65 | "peace", 66 | "cheer", 67 | "friend", 68 | "heaven", 69 | "loyal", 70 | "pleasure", 71 | "diamond", 72 | "gentle", 73 | "honest", 74 | "lucky", 75 | "rainbow", 76 | "diploma", 77 | "gift", 78 | "honor", 79 | "miracle", 80 | "sunrise", 81 | "family", 82 | "happy", 83 | "laughter", 84 | "paradise", 85 | "vacation" 86 | ], 87 | "B":[ 88 | "abuse", 89 | "crash", 90 | "filth", 91 | "murder", 92 | "sickness", 93 | "accident", 94 | "death", 95 | "grief", 96 | "poison", 97 | "stink", 98 | "assault", 99 | "disaster", 100 | "hatred", 101 | "pollute", 102 | "tragedy", 103 | "divorce", 104 | "jail", 105 | "poverty", 106 | "ugly", 107 | "cancer", 108 | "kill", 109 | "rotten", 110 | "vomit", 111 | "agony", 112 | "prison" 113 | ] 114 | }, 115 | "Instruments vs Weapons / Pleasant vs Unpleasant":{ 116 | "X":[ 117 | "bagpipe", 118 | "cello", 119 | "guitar", 120 | "lute", 121 | "trombone", 122 | "banjo", 123 | "clarinet", 124 | "harmonica", 125 | "mandolin", 126 | "trumpet", 127 | "bassoon", 128 | "drum", 129 | "harp", 130 | "oboe", 131 | "tuba", 132 | "bell", 133 | "fiddle", 134 | "harpsichord", 135 | "piano", 136 | "viola", 137 | "bongo", 138 | "flute", 139 | "horn", 140 | "saxophone", 141 | "violin" 142 | ], 143 | "Y":[ 144 | "arrow", 145 | "club", 146 | "gun", 147 | "missile", 148 | "spear", 149 | "ax", 150 | "dagger", 151 | "harpoon", 152 | "pistol", 153 | "sword", 154 | "blade", 155 | "dynamite", 156 | "hatchet", 157 | "rifle", 158 | "tank", 159 | "bomb", 160 | "firearm", 161 | "knife", 162 | "shotgun", 163 | "teargas", 164 | "cannon", 165 | "grenade", 166 | "mace", 167 | "slingshot", 168 | "whip" 169 | ], 170 | "A":[ 171 | "caress", 172 | "freedom", 173 | "health", 174 | "love", 175 | "peace", 176 | "cheer", 177 | "friend", 178 | "heaven", 179 | "loyal", 180 | "pleasure", 181 | "diamond", 182 | "gentle", 183 | "honest", 184 | "lucky", 185 | "rainbow", 186 | "diploma", 187 | "gift", 188 | "honor", 189 | "miracle", 190 | "sunrise", 191 | "family", 192 | "happy", 193 | "laughter", 194 | "paradise", 195 | "vacation" 196 | ], 197 | "B":[ 198 | "abuse", 199 | "crash", 200 | "filth", 201 | "murder", 202 | "sickness", 203 | "accident", 204 | "death", 205 | "grief", 206 | "poison", 207 | "stink", 208 | "assault", 209 | "disaster", 210 | "hatred", 211 | "pollute", 212 | "tragedy", 213 | "divorce", 214 | "jail", 215 | "poverty", 216 | "ugly", 217 | "cancer", 218 | "kill", 219 | "rotten", 220 | "vomit", 221 | "agony", 222 | "prison" 223 | ] 224 | }, 225 | "European names vs African American Names / Pleasant3 vs Unpleasant3":{ 226 | "X":[ 227 | "Adam", 228 | "Harry", 229 | "Josh", 230 | "Roger", 231 | "Alan", 232 | "Frank", 233 | "Justin", 234 | "Ryan", 235 | "Andrew", 236 | "Jack", 237 | "Matthew", 238 | "Stephen", 239 | "Brad", 240 | "Greg", 241 | "Paul", 242 | "Jonathan", 243 | "Peter", 244 | "Amanda", 245 | "Courtney", 246 | "Heather", 247 | "Melanie", 248 | "Katie", 249 | "Betsy", 250 | "Kristin", 251 | "Nancy", 252 | "Stephanie", 253 | "Ellen", 254 | "Lauren", 255 | "Colleen", 256 | "Emily", 257 | "Megan", 258 | "Rachel" 259 | ], 260 | "Y":[ 261 | "Alonzo", 262 | "Jamel", 263 | "Theo", 264 | "Alphonse", 265 | "Jerome", 266 | "Leroy", 267 | "Torrance", 268 | "Darnell", 269 | "Lamar", 270 | "Lionel", 271 | "Tyree", 272 | "Deion", 273 | "Lamont", 274 | "Malik", 275 | "Terrence", 276 | "Tyrone", 277 | "Lavon", 278 | "Marcellus", 279 | "Wardell", 280 | "Nichelle", 281 | "Shereen", 282 | "Ebony", 283 | "Latisha", 284 | "Shaniqua", 285 | "Jasmine", 286 | "Tanisha", 287 | "Tia", 288 | "Lakisha", 289 | "Latoya", 290 | "Yolanda", 291 | "Malika", 292 | "Yvette" 293 | ], 294 | "A":[ 295 | "caress", 296 | "freedom", 297 | "health", 298 | "love", 299 | "peace", 300 | "cheer", 301 | "friend", 302 | "heaven", 303 | "loyal", 304 | "pleasure", 305 | "diamond", 306 | "gentle", 307 | "honest", 308 | "lucky", 309 | "rainbow", 310 | "diploma", 311 | "gift", 312 | "honor", 313 | "miracle", 314 | "sunrise", 315 | "family", 316 | "happy", 317 | "laughter", 318 | "paradise", 319 | "vacation" 320 | ], 321 | "B":[ 322 | "abuse", 323 | "crash", 324 | "filth", 325 | "murder", 326 | "sickness", 327 | "accident", 328 | "death", 329 | "grief", 330 | "poison", 331 | "stink", 332 | "assault", 333 | "disaster", 334 | "hatred", 335 | "pollute", 336 | "tragedy", 337 | "bomb", 338 | "divorce", 339 | "jail", 340 | "poverty", 341 | "ugly", 342 | "cancer", 343 | "evil", 344 | "kill", 345 | "rotten", 346 | "vomit" 347 | ] 348 | }, 349 | "Male vs Female / Career vs Family":{ 350 | "X":[ 351 | "John", 352 | "Paul", 353 | "Mike", 354 | "Kevin", 355 | "Steve", 356 | "Greg", 357 | "Jeff", 358 | "Bill" 359 | ], 360 | "Y":[ 361 | "Amy", 362 | "Joan", 363 | "Lisa", 364 | "Sarah", 365 | "Diana", 366 | "Kate", 367 | "Ann", 368 | "Donna" 369 | ], 370 | "A":[ 371 | "executive", 372 | "management", 373 | "professional", 374 | "corporation", 375 | "salary", 376 | "office", 377 | "business", 378 | "career" 379 | ], 380 | "B":[ 381 | "home", 382 | "parents", 383 | "children", 384 | "family", 385 | "cousins", 386 | "marriage", 387 | "wedding", 388 | "relatives" 389 | ] 390 | }, 391 | "Math vs Art / Male vs Female":{ 392 | "X":[ 393 | "math", 394 | "algebra", 395 | "geometry", 396 | "calculus", 397 | "equations", 398 | "computation", 399 | "numbers", 400 | "addition" 401 | ], 402 | "Y":[ 403 | "poetry", 404 | "art", 405 | "Shakespeare", 406 | "dance", 407 | "literature", 408 | "novel", 409 | "symphony", 410 | "drama" 411 | ], 412 | "A":[ 413 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 414 | ], 415 | "B":[ 416 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 417 | ] 418 | }, 419 | "Science vs Art / Male vs Female":{ 420 | "X":[ 421 | "science", 422 | "technology", 423 | "physics", 424 | "chemistry", 425 | "Einstein", 426 | "NASA", 427 | "experiment", 428 | "astronomy" 429 | ], 430 | "Y":[ 431 | "poetry", 432 | "art", 433 | "Shakespeare", 434 | "dance", 435 | "literature", 436 | "novel", 437 | "symphony", 438 | "drama" 439 | ], 440 | "A":[ 441 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 442 | ], 443 | "B":[ 444 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 445 | ] 446 | } 447 | } 448 | } 449 | -------------------------------------------------------------------------------- /configs/compare_embeddings.json: -------------------------------------------------------------------------------- 1 | { "embeddings" : {"Reddit": "embeddings/model_full_reddit", "Google News": "embeddings/GoogleNews-vectors-negative300.bin"}, 2 | "tests": 3 | { 4 | "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)":{ 5 | "targets_desc":"Flowers vs Insects (25)", 6 | "attributes_desc":"Pleasant vs Unpleasant (25)", 7 | "X":[ 8 | "aster", 9 | "clover", 10 | "hyacinth", 11 | "marigold", 12 | "poppy", 13 | "azalea", 14 | "crocus", 15 | "iris", 16 | "orchid", 17 | "rose", 18 | "bluebell", 19 | "daffodil", 20 | "lilac", 21 | "pansy", 22 | "tulip", 23 | "buttercup", 24 | "daisy", 25 | "lily", 26 | "peony", 27 | "violet", 28 | "carnation", 29 | "gladiola", 30 | "magnolia", 31 | "petunia", 32 | "zinnia" 33 | ], 34 | "Y":[ 35 | "ant", 36 | "caterpillar", 37 | "flea", 38 | "locust", 39 | "spider", 40 | "bedbug", 41 | "centipede", 42 | "fly", 43 | "maggot", 44 | "tarantula", 45 | "bee", 46 | "cockroach", 47 | "gnat", 48 | "mosquito", 49 | "termite", 50 | "beetle", 51 | "cricket", 52 | "hornet", 53 | "moth", 54 | "wasp", 55 | "blackfly", 56 | "dragonfly", 57 | "horsefly", 58 | "roach", 59 | "weevil" 60 | ], 61 | "A":[ 62 | "caress", 63 | "freedom", 64 | "health", 65 | "love", 66 | "peace", 67 | "cheer", 68 | "friend", 69 | "heaven", 70 | "loyal", 71 | "pleasure", 72 | "diamond", 73 | "gentle", 74 | "honest", 75 | "lucky", 76 | "rainbow", 77 | "diploma", 78 | "gift", 79 | "honor", 80 | "miracle", 81 | "sunrise", 82 | "family", 83 | "happy", 84 | "laughter", 85 | "paradise", 86 | "vacation" 87 | ], 88 | "B":[ 89 | "abuse", 90 | "crash", 91 | "filth", 92 | "murder", 93 | "sickness", 94 | "accident", 95 | "death", 96 | "grief", 97 | "poison", 98 | "stink", 99 | "assault", 100 | "disaster", 101 | "hatred", 102 | "pollute", 103 | "tragedy", 104 | "divorce", 105 | "jail", 106 | "poverty", 107 | "ugly", 108 | "cancer", 109 | "kill", 110 | "rotten", 111 | "vomit", 112 | "agony", 113 | "prison" 114 | ] 115 | }, 116 | "Instruments vs Weapons / Pleasant vs Unpleasant":{ 117 | "X":[ 118 | "bagpipe", 119 | "cello", 120 | "guitar", 121 | "lute", 122 | "trombone", 123 | "banjo", 124 | "clarinet", 125 | "harmonica", 126 | "mandolin", 127 | "trumpet", 128 | "bassoon", 129 | "drum", 130 | "harp", 131 | "oboe", 132 | "tuba", 133 | "bell", 134 | "fiddle", 135 | "harpsichord", 136 | "piano", 137 | "viola", 138 | "bongo", 139 | "flute", 140 | "horn", 141 | "saxophone", 142 | "violin" 143 | ], 144 | "Y":[ 145 | "arrow", 146 | "club", 147 | "gun", 148 | "missile", 149 | "spear", 150 | "ax", 151 | "dagger", 152 | "harpoon", 153 | "pistol", 154 | "sword", 155 | "blade", 156 | "dynamite", 157 | "hatchet", 158 | "rifle", 159 | "tank", 160 | "bomb", 161 | "firearm", 162 | "knife", 163 | "shotgun", 164 | "teargas", 165 | "cannon", 166 | "grenade", 167 | "mace", 168 | "slingshot", 169 | "whip" 170 | ], 171 | "A":[ 172 | "caress", 173 | "freedom", 174 | "health", 175 | "love", 176 | "peace", 177 | "cheer", 178 | "friend", 179 | "heaven", 180 | "loyal", 181 | "pleasure", 182 | "diamond", 183 | "gentle", 184 | "honest", 185 | "lucky", 186 | "rainbow", 187 | "diploma", 188 | "gift", 189 | "honor", 190 | "miracle", 191 | "sunrise", 192 | "family", 193 | "happy", 194 | "laughter", 195 | "paradise", 196 | "vacation" 197 | ], 198 | "B":[ 199 | "abuse", 200 | "crash", 201 | "filth", 202 | "murder", 203 | "sickness", 204 | "accident", 205 | "death", 206 | "grief", 207 | "poison", 208 | "stink", 209 | "assault", 210 | "disaster", 211 | "hatred", 212 | "pollute", 213 | "tragedy", 214 | "divorce", 215 | "jail", 216 | "poverty", 217 | "ugly", 218 | "cancer", 219 | "kill", 220 | "rotten", 221 | "vomit", 222 | "agony", 223 | "prison" 224 | ] 225 | }, 226 | "European names vs African American Names / Pleasant3 vs Unpleasant3":{ 227 | "X":[ 228 | "Adam", 229 | "Harry", 230 | "Josh", 231 | "Roger", 232 | "Alan", 233 | "Frank", 234 | "Justin", 235 | "Ryan", 236 | "Andrew", 237 | "Jack", 238 | "Matthew", 239 | "Stephen", 240 | "Brad", 241 | "Greg", 242 | "Paul", 243 | "Jonathan", 244 | "Peter", 245 | "Amanda", 246 | "Courtney", 247 | "Heather", 248 | "Melanie", 249 | "Katie", 250 | "Betsy", 251 | "Kristin", 252 | "Nancy", 253 | "Stephanie", 254 | "Ellen", 255 | "Lauren", 256 | "Colleen", 257 | "Emily", 258 | "Megan", 259 | "Rachel" 260 | ], 261 | "Y":[ 262 | "Alonzo", 263 | "Jamel", 264 | "Theo", 265 | "Alphonse", 266 | "Jerome", 267 | "Leroy", 268 | "Torrance", 269 | "Darnell", 270 | "Lamar", 271 | "Lionel", 272 | "Tyree", 273 | "Deion", 274 | "Lamont", 275 | "Malik", 276 | "Terrence", 277 | "Tyrone", 278 | "Lavon", 279 | "Marcellus", 280 | "Wardell", 281 | "Nichelle", 282 | "Shereen", 283 | "Ebony", 284 | "Latisha", 285 | "Shaniqua", 286 | "Jasmine", 287 | "Tanisha", 288 | "Tia", 289 | "Lakisha", 290 | "Latoya", 291 | "Yolanda", 292 | "Malika", 293 | "Yvette" 294 | ], 295 | "A":[ 296 | "caress", 297 | "freedom", 298 | "health", 299 | "love", 300 | "peace", 301 | "cheer", 302 | "friend", 303 | "heaven", 304 | "loyal", 305 | "pleasure", 306 | "diamond", 307 | "gentle", 308 | "honest", 309 | "lucky", 310 | "rainbow", 311 | "diploma", 312 | "gift", 313 | "honor", 314 | "miracle", 315 | "sunrise", 316 | "family", 317 | "happy", 318 | "laughter", 319 | "paradise", 320 | "vacation" 321 | ], 322 | "B":[ 323 | "abuse", 324 | "crash", 325 | "filth", 326 | "murder", 327 | "sickness", 328 | "accident", 329 | "death", 330 | "grief", 331 | "poison", 332 | "stink", 333 | "assault", 334 | "disaster", 335 | "hatred", 336 | "pollute", 337 | "tragedy", 338 | "bomb", 339 | "divorce", 340 | "jail", 341 | "poverty", 342 | "ugly", 343 | "cancer", 344 | "evil", 345 | "kill", 346 | "rotten", 347 | "vomit" 348 | ] 349 | }, 350 | "Male vs Female / Career vs Family":{ 351 | "X":[ 352 | "John", 353 | "Paul", 354 | "Mike", 355 | "Kevin", 356 | "Steve", 357 | "Greg", 358 | "Jeff", 359 | "Bill" 360 | ], 361 | "Y":[ 362 | "Amy", 363 | "Joan", 364 | "Lisa", 365 | "Sarah", 366 | "Diana", 367 | "Kate", 368 | "Ann", 369 | "Donna" 370 | ], 371 | "A":[ 372 | "executive", 373 | "management", 374 | "professional", 375 | "corporation", 376 | "salary", 377 | "office", 378 | "business", 379 | "career" 380 | ], 381 | "B":[ 382 | "home", 383 | "parents", 384 | "children", 385 | "family", 386 | "cousins", 387 | "marriage", 388 | "wedding", 389 | "relatives" 390 | ] 391 | }, 392 | "Math vs Art / Male8 vs Female8":{ 393 | "X":[ 394 | "math", 395 | "algebra", 396 | "geometry", 397 | "calculus", 398 | "equations", 399 | "computation", 400 | "numbers", 401 | "addition" 402 | ], 403 | "Y":[ 404 | "poetry", 405 | "art", 406 | "Shakespeare", 407 | "dance", 408 | "literature", 409 | "novel", 410 | "symphony", 411 | "drama" 412 | ], 413 | "A":[ 414 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 415 | ], 416 | "B":[ 417 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 418 | ] 419 | }, 420 | "Science vs Art / Male8 vs Female8":{ 421 | "X":[ 422 | "science", 423 | "technology", 424 | "physics", 425 | "chemistry", 426 | "Einstein", 427 | "NASA", 428 | "experiment", 429 | "astronomy" 430 | ], 431 | "Y":[ 432 | "poetry", 433 | "art", 434 | "Shakespeare", 435 | "dance", 436 | "literature", 437 | "novel", 438 | "symphony", 439 | "drama" 440 | ], 441 | "A":[ 442 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 443 | ], 444 | "B":[ 445 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 446 | ] 447 | }, 448 | "Straight vs Gay / Good vs Bad":{ 449 | "X":[ 450 | "straight", 451 | "heterosexual" 452 | ], 453 | "Y":[ 454 | "gay", 455 | "homosexual" 456 | ], 457 | "A":[ 458 | "beautiful", 459 | "superb", 460 | "joyful", 461 | "lovely", 462 | "glorious", 463 | "pleasure", 464 | "marvelous", 465 | "wonderful" 466 | ], 467 | "B":[ 468 | "humiliate", 469 | "agony", 470 | "awful", 471 | "nasty", 472 | "terrible", 473 | "horrible", 474 | "tragic", 475 | "painful" 476 | ] 477 | }, 478 | "Arab + Muslim vs Other / Good vs Bad":{ 479 | "X":[ 480 | "Ernesto", 481 | "Matthais", 482 | "Maarten", 483 | "Philippe", 484 | "Guillame", 485 | "Benoit", 486 | "Takuya", 487 | "Kazuki", 488 | "Chaiyo", 489 | "Marcelo" 490 | ], 491 | "Y":[ 492 | "hakim", 493 | "sharif", 494 | "yousef", 495 | "wahib", 496 | "muhsin", 497 | "salim", 498 | "karim", 499 | "habib", 500 | "ashraf", 501 | "akbar" 502 | ], 503 | "A":[ 504 | "laughter", 505 | "happy", 506 | "joy", 507 | "love", 508 | "glorious", 509 | "pleasure", 510 | "peace", 511 | "wonderful" 512 | ], 513 | "B":[ 514 | "failure", 515 | "agony", 516 | "awful", 517 | "nasty", 518 | "terrible", 519 | "horrible", 520 | "nasty", 521 | "evil" 522 | ] 523 | }, 524 | "Christianity vs Judaism / Good vs Bad":{ 525 | "X":[ 526 | "church", 527 | "bible", 528 | "christian", 529 | "christianity" 530 | ], 531 | "Y":[ 532 | "synagogue", 533 | "torah", 534 | "jew", 535 | "judaism" 536 | ], 537 | "A":[ 538 | "love", 539 | "cheer", 540 | "friend", 541 | "pleasure", 542 | "adore", 543 | "cheerful", 544 | "friendship", 545 | "joyful", 546 | "smiling", 547 | "cherish", 548 | "excellent", 549 | "glad", 550 | "joyous", 551 | "spectacular", 552 | "appealing", 553 | "delight", 554 | "excitement", 555 | "laughing", 556 | "attractive", 557 | "delightful", 558 | "fabulous", 559 | "glorious", 560 | "pleasing", 561 | "beautiful", 562 | "fantastic", 563 | "happy", 564 | "lovely", 565 | "terrific", 566 | "celebrate", 567 | "enjoy", 568 | "magnificent", 569 | "triumph" 570 | ], 571 | "B":[ 572 | "abuse", 573 | "grief", 574 | "poison", 575 | "sadness", 576 | "pain", 577 | "despise", 578 | "failure", 579 | "nasty", 580 | "angry", 581 | "detest", 582 | "horrible", 583 | "negative", 584 | "ugly", 585 | "dirty", 586 | "gross", 587 | "evil", 588 | "rotten", 589 | "annoy", 590 | "disaster", 591 | "horrific", 592 | "scorn", 593 | "awful", 594 | "disgust", 595 | "hate", 596 | "humiliate", 597 | "selfish", 598 | "tragic", 599 | "bothersome", 600 | "hatred", 601 | "hurtful", 602 | "sickening", 603 | "yucky" 604 | ] 605 | }, 606 | "Christianity vs Islam / Good vs Bad":{ 607 | "X":[ 608 | "church", 609 | "bible", 610 | "christian", 611 | "christianity" 612 | ], 613 | "Y":[ 614 | "mosque", 615 | "koran", 616 | "muslim", 617 | "islam" 618 | ], 619 | "A":[ 620 | "love", 621 | "cheer", 622 | "friend", 623 | "pleasure", 624 | "adore", 625 | "cheerful", 626 | "friendship", 627 | "joyful", 628 | "smiling", 629 | "cherish", 630 | "excellent", 631 | "glad", 632 | "joyous", 633 | "spectacular", 634 | "appealing", 635 | "delight", 636 | "excitement", 637 | "laughing", 638 | "attractive", 639 | "delightful", 640 | "fabulous", 641 | "glorious", 642 | "pleasing", 643 | "beautiful", 644 | "fantastic", 645 | "happy", 646 | "lovely", 647 | "terrific", 648 | "celebrate", 649 | "enjoy", 650 | "magnificent", 651 | "triumph" 652 | ], 653 | "B":[ 654 | "abuse", 655 | "grief", 656 | "poison", 657 | "sadness", 658 | "pain", 659 | "despise", 660 | "failure", 661 | "nasty", 662 | "angry", 663 | "detest", 664 | "horrible", 665 | "negative", 666 | "ugly", 667 | "dirty", 668 | "gross", 669 | "evil", 670 | "rotten", 671 | "annoy", 672 | "disaster", 673 | "horrific", 674 | "scorn", 675 | "awful", 676 | "disgust", 677 | "hate", 678 | "humiliate", 679 | "selfish", 680 | "tragic", 681 | "bothersome", 682 | "hatred", 683 | "hurtful", 684 | "sickening", 685 | "yucky" 686 | ] 687 | }, 688 | "Judaism vs Islam / Good vs Bad":{ 689 | "X":[ 690 | "synagogue", 691 | "torah", 692 | "jew", 693 | "judaism" 694 | ], 695 | "Y":[ 696 | "mosque", 697 | "koran", 698 | "muslim", 699 | "islam" 700 | ], 701 | "A":[ 702 | "love", 703 | "cheer", 704 | "friend", 705 | "pleasure", 706 | "adore", 707 | "cheerful", 708 | "friendship", 709 | "joyful", 710 | "smiling", 711 | "cherish", 712 | "excellent", 713 | "glad", 714 | "joyous", 715 | "spectacular", 716 | "appealing", 717 | "delight", 718 | "excitement", 719 | "laughing", 720 | "attractive", 721 | "delightful", 722 | "fabulous", 723 | "glorious", 724 | "pleasing", 725 | "beautiful", 726 | "fantastic", 727 | "happy", 728 | "lovely", 729 | "terrific", 730 | "celebrate", 731 | "enjoy", 732 | "magnificent", 733 | "triumph" 734 | ], 735 | "B":[ 736 | "abuse", 737 | "grief", 738 | "poison", 739 | "sadness", 740 | "pain", 741 | "despise", 742 | "failure", 743 | "nasty", 744 | "angry", 745 | "detest", 746 | "horrible", 747 | "negative", 748 | "ugly", 749 | "dirty", 750 | "gross", 751 | "evil", 752 | "rotten", 753 | "annoy", 754 | "disaster", 755 | "horrific", 756 | "scorn", 757 | "awful", 758 | "disgust", 759 | "hate", 760 | "humiliate", 761 | "selfish", 762 | "tragic", 763 | "bothersome", 764 | "hatred", 765 | "hurtful", 766 | "sickening", 767 | "yucky" 768 | ] 769 | } 770 | } 771 | } 772 | -------------------------------------------------------------------------------- /configs/histwords.json: -------------------------------------------------------------------------------- 1 | { "embeddings" : {"Histwords": 2 | {"1800" : "embeddings/sgns-all-txts/1800.txt", 3 | "1810" : "embeddings/sgns-all-txts/1810.txt", 4 | "1820" : "embeddings/sgns-all-txts/1820.txt", 5 | "1830" : "embeddings/sgns-all-txts/1830.txt", 6 | "1840" : "embeddings/sgns-all-txts/1840.txt", 7 | "1850" : "embeddings/sgns-all-txts/1850.txt", 8 | "1860" : "embeddings/sgns-all-txts/1860.txt", 9 | "1870" : "embeddings/sgns-all-txts/1870.txt", 10 | "1880" : "embeddings/sgns-all-txts/1880.txt", 11 | "1890" : "embeddings/sgns-all-txts/1890.txt", 12 | "1900" : "embeddings/sgns-all-txts/1900.txt", 13 | "1910" : "embeddings/sgns-all-txts/1910.txt", 14 | "1920" : "embeddings/sgns-all-txts/1920.txt", 15 | "1930" : "embeddings/sgns-all-txts/1930.txt", 16 | "1940" : "embeddings/sgns-all-txts/1940.txt", 17 | "1950" : "embeddings/sgns-all-txts/1950.txt", 18 | "1960" : "embeddings/sgns-all-txts/1960.txt", 19 | "1970" : "embeddings/sgns-all-txts/1970.txt", 20 | "1980" : "embeddings/sgns-all-txts/1980.txt", 21 | "1990" : "embeddings/sgns-all-txts/1990.txt" 22 | } 23 | }, 24 | "tests": 25 | { 26 | "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)":{ 27 | "targets_desc":"Flowers vs Insects (25)", 28 | "attributes_desc":"Pleasant vs Unpleasant (25)", 29 | "X":[ 30 | "aster", 31 | "clover", 32 | "hyacinth", 33 | "marigold", 34 | "poppy", 35 | "azalea", 36 | "crocus", 37 | "iris", 38 | "orchid", 39 | "rose", 40 | "bluebell", 41 | "daffodil", 42 | "lilac", 43 | "pansy", 44 | "tulip", 45 | "buttercup", 46 | "daisy", 47 | "lily", 48 | "peony", 49 | "violet", 50 | "carnation", 51 | "gladiola", 52 | "magnolia", 53 | "petunia", 54 | "zinnia" 55 | ], 56 | "Y":[ 57 | "ant", 58 | "caterpillar", 59 | "flea", 60 | "locust", 61 | "spider", 62 | "bedbug", 63 | "centipede", 64 | "fly", 65 | "maggot", 66 | "tarantula", 67 | "bee", 68 | "cockroach", 69 | "gnat", 70 | "mosquito", 71 | "termite", 72 | "beetle", 73 | "cricket", 74 | "hornet", 75 | "moth", 76 | "wasp", 77 | "blackfly", 78 | "dragonfly", 79 | "horsefly", 80 | "roach", 81 | "weevil" 82 | ], 83 | "A":[ 84 | "caress", 85 | "freedom", 86 | "health", 87 | "love", 88 | "peace", 89 | "cheer", 90 | "friend", 91 | "heaven", 92 | "loyal", 93 | "pleasure", 94 | "diamond", 95 | "gentle", 96 | "honest", 97 | "lucky", 98 | "rainbow", 99 | "diploma", 100 | "gift", 101 | "honor", 102 | "miracle", 103 | "sunrise", 104 | "family", 105 | "happy", 106 | "laughter", 107 | "paradise", 108 | "vacation" 109 | ], 110 | "B":[ 111 | "abuse", 112 | "crash", 113 | "filth", 114 | "murder", 115 | "sickness", 116 | "accident", 117 | "death", 118 | "grief", 119 | "poison", 120 | "stink", 121 | "assault", 122 | "disaster", 123 | "hatred", 124 | "pollute", 125 | "tragedy", 126 | "divorce", 127 | "jail", 128 | "poverty", 129 | "ugly", 130 | "cancer", 131 | "kill", 132 | "rotten", 133 | "vomit", 134 | "agony", 135 | "prison" 136 | ] 137 | }, 138 | "Instruments vs Weapons / Pleasant vs Unpleasant":{ 139 | "X":[ 140 | "bagpipe", 141 | "cello", 142 | "guitar", 143 | "lute", 144 | "trombone", 145 | "banjo", 146 | "clarinet", 147 | "harmonica", 148 | "mandolin", 149 | "trumpet", 150 | "bassoon", 151 | "drum", 152 | "harp", 153 | "oboe", 154 | "tuba", 155 | "bell", 156 | "fiddle", 157 | "harpsichord", 158 | "piano", 159 | "viola", 160 | "bongo", 161 | "flute", 162 | "horn", 163 | "saxophone", 164 | "violin" 165 | ], 166 | "Y":[ 167 | "arrow", 168 | "club", 169 | "gun", 170 | "missile", 171 | "spear", 172 | "ax", 173 | "dagger", 174 | "harpoon", 175 | "pistol", 176 | "sword", 177 | "blade", 178 | "dynamite", 179 | "hatchet", 180 | "rifle", 181 | "tank", 182 | "bomb", 183 | "firearm", 184 | "knife", 185 | "shotgun", 186 | "teargas", 187 | "cannon", 188 | "grenade", 189 | "mace", 190 | "slingshot", 191 | "whip" 192 | ], 193 | "A":[ 194 | "caress", 195 | "freedom", 196 | "health", 197 | "love", 198 | "peace", 199 | "cheer", 200 | "friend", 201 | "heaven", 202 | "loyal", 203 | "pleasure", 204 | "diamond", 205 | "gentle", 206 | "honest", 207 | "lucky", 208 | "rainbow", 209 | "diploma", 210 | "gift", 211 | "honor", 212 | "miracle", 213 | "sunrise", 214 | "family", 215 | "happy", 216 | "laughter", 217 | "paradise", 218 | "vacation" 219 | ], 220 | "B":[ 221 | "abuse", 222 | "crash", 223 | "filth", 224 | "murder", 225 | "sickness", 226 | "accident", 227 | "death", 228 | "grief", 229 | "poison", 230 | "stink", 231 | "assault", 232 | "disaster", 233 | "hatred", 234 | "pollute", 235 | "tragedy", 236 | "divorce", 237 | "jail", 238 | "poverty", 239 | "ugly", 240 | "cancer", 241 | "kill", 242 | "rotten", 243 | "vomit", 244 | "agony", 245 | "prison" 246 | ] 247 | }, 248 | "European names vs African American Names / Pleasant3 vs Unpleasant3":{ 249 | "X":[ 250 | "Adam", 251 | "Harry", 252 | "Josh", 253 | "Roger", 254 | "Alan", 255 | "Frank", 256 | "Justin", 257 | "Ryan", 258 | "Andrew", 259 | "Jack", 260 | "Matthew", 261 | "Stephen", 262 | "Brad", 263 | "Greg", 264 | "Paul", 265 | "Jonathan", 266 | "Peter", 267 | "Amanda", 268 | "Courtney", 269 | "Heather", 270 | "Melanie", 271 | "Katie", 272 | "Betsy", 273 | "Kristin", 274 | "Nancy", 275 | "Stephanie", 276 | "Ellen", 277 | "Lauren", 278 | "Colleen", 279 | "Emily", 280 | "Megan", 281 | "Rachel" 282 | ], 283 | "Y":[ 284 | "Alonzo", 285 | "Jamel", 286 | "Theo", 287 | "Alphonse", 288 | "Jerome", 289 | "Leroy", 290 | "Torrance", 291 | "Darnell", 292 | "Lamar", 293 | "Lionel", 294 | "Tyree", 295 | "Deion", 296 | "Lamont", 297 | "Malik", 298 | "Terrence", 299 | "Tyrone", 300 | "Lavon", 301 | "Marcellus", 302 | "Wardell", 303 | "Nichelle", 304 | "Shereen", 305 | "Ebony", 306 | "Latisha", 307 | "Shaniqua", 308 | "Jasmine", 309 | "Tanisha", 310 | "Tia", 311 | "Lakisha", 312 | "Latoya", 313 | "Yolanda", 314 | "Malika", 315 | "Yvette" 316 | ], 317 | "A":[ 318 | "caress", 319 | "freedom", 320 | "health", 321 | "love", 322 | "peace", 323 | "cheer", 324 | "friend", 325 | "heaven", 326 | "loyal", 327 | "pleasure", 328 | "diamond", 329 | "gentle", 330 | "honest", 331 | "lucky", 332 | "rainbow", 333 | "diploma", 334 | "gift", 335 | "honor", 336 | "miracle", 337 | "sunrise", 338 | "family", 339 | "happy", 340 | "laughter", 341 | "paradise", 342 | "vacation" 343 | ], 344 | "B":[ 345 | "abuse", 346 | "crash", 347 | "filth", 348 | "murder", 349 | "sickness", 350 | "accident", 351 | "death", 352 | "grief", 353 | "poison", 354 | "stink", 355 | "assault", 356 | "disaster", 357 | "hatred", 358 | "pollute", 359 | "tragedy", 360 | "bomb", 361 | "divorce", 362 | "jail", 363 | "poverty", 364 | "ugly", 365 | "cancer", 366 | "evil", 367 | "kill", 368 | "rotten", 369 | "vomit" 370 | ] 371 | }, 372 | "Male vs Female / Career vs Family":{ 373 | "X":[ 374 | "John", 375 | "Paul", 376 | "Mike", 377 | "Kevin", 378 | "Steve", 379 | "Greg", 380 | "Jeff", 381 | "Bill" 382 | ], 383 | "Y":[ 384 | "Amy", 385 | "Joan", 386 | "Lisa", 387 | "Sarah", 388 | "Diana", 389 | "Kate", 390 | "Ann", 391 | "Donna" 392 | ], 393 | "A":[ 394 | "executive", 395 | "management", 396 | "professional", 397 | "corporation", 398 | "salary", 399 | "office", 400 | "business", 401 | "career" 402 | ], 403 | "B":[ 404 | "home", 405 | "parents", 406 | "children", 407 | "family", 408 | "cousins", 409 | "marriage", 410 | "wedding", 411 | "relatives" 412 | ] 413 | }, 414 | "Math vs Art / Male8 vs Female8":{ 415 | "X":[ 416 | "math", 417 | "algebra", 418 | "geometry", 419 | "calculus", 420 | "equations", 421 | "computation", 422 | "numbers", 423 | "addition" 424 | ], 425 | "Y":[ 426 | "poetry", 427 | "art", 428 | "Shakespeare", 429 | "dance", 430 | "literature", 431 | "novel", 432 | "symphony", 433 | "drama" 434 | ], 435 | "A":[ 436 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 437 | ], 438 | "B":[ 439 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 440 | ] 441 | }, 442 | "Science vs Art / Male8 vs Female8":{ 443 | "X":[ 444 | "science", 445 | "technology", 446 | "physics", 447 | "chemistry", 448 | "Einstein", 449 | "NASA", 450 | "experiment", 451 | "astronomy" 452 | ], 453 | "Y":[ 454 | "poetry", 455 | "art", 456 | "Shakespeare", 457 | "dance", 458 | "literature", 459 | "novel", 460 | "symphony", 461 | "drama" 462 | ], 463 | "A":[ 464 | "brother", "father", "uncle", "grandfather", "son", "he", "his", "him" 465 | ], 466 | "B":[ 467 | "sister", "mother", "aunt", "grandmother", "daughter", "she", "hers", "her" 468 | ] 469 | }, 470 | "Straight vs Gay / Good vs Bad":{ 471 | "X":[ 472 | "straight", 473 | "heterosexual" 474 | ], 475 | "Y":[ 476 | "gay", 477 | "homosexual" 478 | ], 479 | "A":[ 480 | "beautiful", 481 | "superb", 482 | "joyful", 483 | "lovely", 484 | "glorious", 485 | "pleasure", 486 | "marvelous", 487 | "wonderful" 488 | ], 489 | "B":[ 490 | "humiliate", 491 | "agony", 492 | "awful", 493 | "nasty", 494 | "terrible", 495 | "horrible", 496 | "tragic", 497 | "painful" 498 | ] 499 | }, 500 | "Arab + Muslim vs Other / Good vs Bad":{ 501 | "X":[ 502 | "Ernesto", 503 | "Matthais", 504 | "Maarten", 505 | "Philippe", 506 | "Guillame", 507 | "Benoit", 508 | "Takuya", 509 | "Kazuki", 510 | "Chaiyo", 511 | "Marcelo" 512 | ], 513 | "Y":[ 514 | "hakim", 515 | "sharif", 516 | "yousef", 517 | "wahib", 518 | "muhsin", 519 | "salim", 520 | "karim", 521 | "habib", 522 | "ashraf", 523 | "akbar" 524 | ], 525 | "A":[ 526 | "laughter", 527 | "happy", 528 | "joy", 529 | "love", 530 | "glorious", 531 | "pleasure", 532 | "peace", 533 | "wonderful" 534 | ], 535 | "B":[ 536 | "failure", 537 | "agony", 538 | "awful", 539 | "nasty", 540 | "terrible", 541 | "horrible", 542 | "nasty", 543 | "evil" 544 | ] 545 | }, 546 | "Christianity vs Judaism / Good vs Bad":{ 547 | "X":[ 548 | "church", 549 | "bible", 550 | "christian", 551 | "christianity" 552 | ], 553 | "Y":[ 554 | "synagogue", 555 | "torah", 556 | "jew", 557 | "judaism" 558 | ], 559 | "A":[ 560 | "love", 561 | "cheer", 562 | "friend", 563 | "pleasure", 564 | "adore", 565 | "cheerful", 566 | "friendship", 567 | "joyful", 568 | "smiling", 569 | "cherish", 570 | "excellent", 571 | "glad", 572 | "joyous", 573 | "spectacular", 574 | "appealing", 575 | "delight", 576 | "excitement", 577 | "laughing", 578 | "attractive", 579 | "delightful", 580 | "fabulous", 581 | "glorious", 582 | "pleasing", 583 | "beautiful", 584 | "fantastic", 585 | "happy", 586 | "lovely", 587 | "terrific", 588 | "celebrate", 589 | "enjoy", 590 | "magnificent", 591 | "triumph" 592 | ], 593 | "B":[ 594 | "abuse", 595 | "grief", 596 | "poison", 597 | "sadness", 598 | "pain", 599 | "despise", 600 | "failure", 601 | "nasty", 602 | "angry", 603 | "detest", 604 | "horrible", 605 | "negative", 606 | "ugly", 607 | "dirty", 608 | "gross", 609 | "evil", 610 | "rotten", 611 | "annoy", 612 | "disaster", 613 | "horrific", 614 | "scorn", 615 | "awful", 616 | "disgust", 617 | "hate", 618 | "humiliate", 619 | "selfish", 620 | "tragic", 621 | "bothersome", 622 | "hatred", 623 | "hurtful", 624 | "sickening", 625 | "yucky" 626 | ] 627 | }, 628 | "Christianity vs Islam / Good vs Bad":{ 629 | "X":[ 630 | "church", 631 | "bible", 632 | "christian", 633 | "christianity" 634 | ], 635 | "Y":[ 636 | "mosque", 637 | "koran", 638 | "muslim", 639 | "islam" 640 | ], 641 | "A":[ 642 | "love", 643 | "cheer", 644 | "friend", 645 | "pleasure", 646 | "adore", 647 | "cheerful", 648 | "friendship", 649 | "joyful", 650 | "smiling", 651 | "cherish", 652 | "excellent", 653 | "glad", 654 | "joyous", 655 | "spectacular", 656 | "appealing", 657 | "delight", 658 | "excitement", 659 | "laughing", 660 | "attractive", 661 | "delightful", 662 | "fabulous", 663 | "glorious", 664 | "pleasing", 665 | "beautiful", 666 | "fantastic", 667 | "happy", 668 | "lovely", 669 | "terrific", 670 | "celebrate", 671 | "enjoy", 672 | "magnificent", 673 | "triumph" 674 | ], 675 | "B":[ 676 | "abuse", 677 | "grief", 678 | "poison", 679 | "sadness", 680 | "pain", 681 | "despise", 682 | "failure", 683 | "nasty", 684 | "angry", 685 | "detest", 686 | "horrible", 687 | "negative", 688 | "ugly", 689 | "dirty", 690 | "gross", 691 | "evil", 692 | "rotten", 693 | "annoy", 694 | "disaster", 695 | "horrific", 696 | "scorn", 697 | "awful", 698 | "disgust", 699 | "hate", 700 | "humiliate", 701 | "selfish", 702 | "tragic", 703 | "bothersome", 704 | "hatred", 705 | "hurtful", 706 | "sickening", 707 | "yucky" 708 | ] 709 | }, 710 | "Judaism vs Islam / Good vs Bad":{ 711 | "X":[ 712 | "synagogue", 713 | "torah", 714 | "jew", 715 | "judaism" 716 | ], 717 | "Y":[ 718 | "mosque", 719 | "koran", 720 | "muslim", 721 | "islam" 722 | ], 723 | "A":[ 724 | "love", 725 | "cheer", 726 | "friend", 727 | "pleasure", 728 | "adore", 729 | "cheerful", 730 | "friendship", 731 | "joyful", 732 | "smiling", 733 | "cherish", 734 | "excellent", 735 | "glad", 736 | "joyous", 737 | "spectacular", 738 | "appealing", 739 | "delight", 740 | "excitement", 741 | "laughing", 742 | "attractive", 743 | "delightful", 744 | "fabulous", 745 | "glorious", 746 | "pleasing", 747 | "beautiful", 748 | "fantastic", 749 | "happy", 750 | "lovely", 751 | "terrific", 752 | "celebrate", 753 | "enjoy", 754 | "magnificent", 755 | "triumph" 756 | ], 757 | "B":[ 758 | "abuse", 759 | "grief", 760 | "poison", 761 | "sadness", 762 | "pain", 763 | "despise", 764 | "failure", 765 | "nasty", 766 | "angry", 767 | "detest", 768 | "horrible", 769 | "negative", 770 | "ugly", 771 | "dirty", 772 | "gross", 773 | "evil", 774 | "rotten", 775 | "annoy", 776 | "disaster", 777 | "horrific", 778 | "scorn", 779 | "awful", 780 | "disgust", 781 | "hate", 782 | "humiliate", 783 | "selfish", 784 | "tragic", 785 | "bothersome", 786 | "hatred", 787 | "hurtful", 788 | "sickening", 789 | "yucky" 790 | ] 791 | } 792 | } 793 | } 794 | -------------------------------------------------------------------------------- /images/google_news_reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hljoren/compare-embedding-bias/7a185137c66d2a9e92a9d10c7e4550048b24ff8e/images/google_news_reddit.png -------------------------------------------------------------------------------- /images/inst_weap_science_art.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hljoren/compare-embedding-bias/7a185137c66d2a9e92a9d10c7e4550048b24ff8e/images/inst_weap_science_art.png -------------------------------------------------------------------------------- /read_config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Read config file for WEAT test 3 | ''' 4 | import sys 5 | import json 6 | import os 7 | 8 | 9 | def read_json_config(file_name): 10 | ''' 11 | Read a set of experiment configuration parameters from a JSON file, 12 | and return a dictionary with those parameters. 13 | 14 | The JSON must two values: 15 | 16 | 1. embeddings: list of EITHER paths to all embeddings to compare OR nested 17 | JSON containing embedding name then JSON mapping years to embedding paths 18 | (used for time series data) 19 | 20 | 2. tests: JSON of tests including test name, followed by a JSON representing 21 | the particular test configuration. Each test must have X, Y, A, and B as keys. 22 | 23 | (3.) compare_tests: (OPTIONAL) If the experiment is a time series with multiple 24 | tests and multiple embeddings, indicates whether to compare embeddings 25 | (one graph per test) or to compare tests (one graph per emebedding). 26 | Defaults to false, or the latter. 27 | 28 | :param file_name: Name of the file containing the configuration 29 | :return: a dictionary with key the name of the experiment and value a dictionary representing 30 | ''' 31 | with open(file_name) as json_file: 32 | data = json.load(json_file) 33 | 34 | if 'embeddings' not in data: 35 | print('Config must contain embedding_paths') 36 | sys.exit() 37 | elif 'tests' not in data: 38 | print('Config must contain tests') 39 | sys.exit() 40 | for test_name, experiment_config in data['tests'].items(): 41 | for k in ['X','Y','A','B']: 42 | if k not in experiment_config: 43 | print('required key ' + k + ' not found in config') 44 | sys.exit() 45 | 46 | return data 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gensim>=3.7.2 2 | numpy>=1.16.3 3 | cPickle>=1.71 4 | json>=2.0.9 5 | scipy>=1.2.1 6 | -------------------------------------------------------------------------------- /results/caliskan.json: -------------------------------------------------------------------------------- 1 | {"Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.54], 2 | "Instruments vs Weapons / Pleasant vs Unpleasant": [1.63], 3 | "European names vs African American Names / Pleasant3 vs Unpleasant3": [0.58], 4 | "Male vs Female / Career vs Family": [1.89 ], 5 | "Math vs Art / Male vs Female": [0.97], 6 | "Science vs Art / Male vs Female": [1.24] 7 | } 8 | -------------------------------------------------------------------------------- /results_compare_embeddings.json: -------------------------------------------------------------------------------- 1 | {"Google News": {"Math vs Art / Male8 vs Female8": [0.9536, 0.0482], "Christianity vs Islam / Good vs Bad": [1.2832, 0.0546], "Christianity vs Judaism / Good vs Bad": [-0.2248, 0.0878], "Arab + Muslim vs Other / Good vs Bad": [-0.6628, 0.1184], "Straight vs Gay / Good vs Bad": [0.96, 0.1763], "Science vs Art / Male8 vs Female8": [1.175, 0.0411], "Male vs Female / Career vs Family": [1.9006, 0.0074], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.5802, 0.0145], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.4469, 0.021], "Judaism vs Islam / Good vs Bad": [1.2434, 0.0547], "European names vs African American Names / Pleasant3 vs Unpleasant3": [0.5099, 0.0432]}, "Reddit": {"Math vs Art / Male8 vs Female8": [-0.3288, 0.1003], "Christianity vs Islam / Good vs Bad": [0.7285, 0.0902], "Christianity vs Judaism / Good vs Bad": [-0.3761, 0.0944], "Straight vs Gay / Good vs Bad": [1.32, 0.151], "Science vs Art / Male8 vs Female8": [0.0315, 0.0846], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.6374, 0.0127], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.4366, 0.0184], "Judaism vs Islam / Good vs Bad": [0.8813, 0.0768]}} -------------------------------------------------------------------------------- /results_histwords.json: -------------------------------------------------------------------------------- 1 | {"Histwords": {"1990": {"Math vs Art / Male8 vs Female8": [-0.2178, 0.097], "Christianity vs Islam / Good vs Bad": [0.7892, 0.0936], "Christianity vs Judaism / Good vs Bad": [0.5508, 0.0822], "Straight vs Gay / Good vs Bad": [1.12, 0.1665], "Science vs Art / Male8 vs Female8": [-1.0035, 0.0878], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.465, 0.02], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.9482, 0.0307], "Judaism vs Islam / Good vs Bad": [0.0129, 0.1049]}, "1830": {"Math vs Art / Male8 vs Female8": [0.6742, 0.0708], "Christianity vs Islam / Good vs Bad": [0.36, 0.1977], "Christianity vs Judaism / Good vs Bad": [1.88, 0.0686], "Science vs Art / Male8 vs Female8": [1.4644, 0.0546], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.3434, 0.0332], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.0253, 0.0938], "Judaism vs Islam / Good vs Bad": [-1.44, 0.1395]}, "1810": {"Math vs Art / Male8 vs Female8": [0.6764, 0.0688], "Christianity vs Islam / Good vs Bad": [-0.2, 0.2], "Christianity vs Judaism / Good vs Bad": [1.44, 0.1395], "Science vs Art / Male8 vs Female8": [1.363, 0.0602], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.3856, 0.0528], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.4648, 0.1026], "Judaism vs Islam / Good vs Bad": [-1.36, 0.1474]}, "1980": {"Math vs Art / Male8 vs Female8": [-0.5036, 0.0866], "Christianity vs Islam / Good vs Bad": [-0.2089, 0.1088], "Christianity vs Judaism / Good vs Bad": [0.3208, 0.1015], "Straight vs Gay / Good vs Bad": [-0.0, 0.201], "Science vs Art / Male8 vs Female8": [-0.5708, 0.0776], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.4454, 0.021], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.5138, 0.0484], "Judaism vs Islam / Good vs Bad": [-0.6322, 0.0986]}, "1800": {"Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.12, 0.2006], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.711, 0.0389], "Math vs Art / Male8 vs Female8": [1.3277, 0.0656], "Science vs Art / Male8 vs Female8": [1.5655, 0.0523], "Christianity vs Judaism / Good vs Bad": [1.76, 0.0955]}, "1850": {"Math vs Art / Male8 vs Female8": [0.996, 0.0584], "Christianity vs Islam / Good vs Bad": [1.0, 0.1741], "Christianity vs Judaism / Good vs Bad": [1.96, 0.04], "Science vs Art / Male8 vs Female8": [1.1213, 0.073], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.457, 0.0243], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.7002, 0.0646], "Judaism vs Islam / Good vs Bad": [-1.84, 0.0788]}, "1940": {"Math vs Art / Male8 vs Female8": [-0.5336, 0.0865], "Christianity vs Islam / Good vs Bad": [1.04, 0.0764], "Christianity vs Judaism / Good vs Bad": [1.055, 0.0679], "Science vs Art / Male8 vs Female8": [-0.5235, 0.0836], "Instruments vs Weapons / Pleasant vs Unpleasant": [0.931, 0.0411], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.8622, 0.0507], "Judaism vs Islam / Good vs Bad": [-0.1757, 0.0972]}, "1840": {"Math vs Art / Male8 vs Female8": [0.9635, 0.0653], "Christianity vs Islam / Good vs Bad": [0.6, 0.1917], "Christianity vs Judaism / Good vs Bad": [1.56, 0.1258], "Science vs Art / Male8 vs Female8": [1.4431, 0.0605], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.436, 0.0312], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.4256, 0.0849], "Judaism vs Islam / Good vs Bad": [-1.0, 0.1741]}, "1960": {"Math vs Art / Male8 vs Female8": [-0.8122, 0.074], "Christianity vs Islam / Good vs Bad": [-0.2377, 0.1105], "Christianity vs Judaism / Good vs Bad": [0.6324, 0.1008], "Straight vs Gay / Good vs Bad": [-0.32, 0.1984], "Science vs Art / Male8 vs Female8": [-0.1685, 0.0841], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.2334, 0.0283], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.7823, 0.0438], "Judaism vs Islam / Good vs Bad": [-1.0001, 0.0821]}, "1820": {"Math vs Art / Male8 vs Female8": [0.8802, 0.0707], "Christianity vs Islam / Good vs Bad": [-0.08, 0.2008], "Christianity vs Judaism / Good vs Bad": [1.72, 0.1026], "Science vs Art / Male8 vs Female8": [1.3887, 0.0583], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.3693, 0.0331], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.9509, 0.0943], "Judaism vs Islam / Good vs Bad": [-1.6, 0.1206]}, "1970": {"Math vs Art / Male8 vs Female8": [-0.7636, 0.0845], "Christianity vs Islam / Good vs Bad": [0.1711, 0.1043], "Christianity vs Judaism / Good vs Bad": [0.8838, 0.079], "Straight vs Gay / Good vs Bad": [0.16, 0.2004], "Science vs Art / Male8 vs Female8": [-0.5369, 0.0875], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.5107, 0.0183], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.7652, 0.0462], "Judaism vs Islam / Good vs Bad": [-0.6089, 0.0876]}, "1920": {"Math vs Art / Male8 vs Female8": [0.2376, 0.0735], "Christianity vs Islam / Good vs Bad": [0.7562, 0.0837], "Christianity vs Judaism / Good vs Bad": [1.3283, 0.0587], "Science vs Art / Male8 vs Female8": [0.7002, 0.0777], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.3371, 0.0241], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.2535, 0.0417], "Judaism vs Islam / Good vs Bad": [-0.4834, 0.1001]}, "1860": {"Math vs Art / Male8 vs Female8": [1.1793, 0.0516], "Christianity vs Islam / Good vs Bad": [-0.04, 0.201], "Christianity vs Judaism / Good vs Bad": [1.96, 0.04], "Science vs Art / Male8 vs Female8": [1.3217, 0.0624], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.4387, 0.0324], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.0056, 0.0563], "Judaism vs Islam / Good vs Bad": [-2.0, 0.0]}, "1930": {"Math vs Art / Male8 vs Female8": [0.4705, 0.0948], "Christianity vs Islam / Good vs Bad": [-0.1858, 0.106], "Christianity vs Judaism / Good vs Bad": [0.2581, 0.1012], "Science vs Art / Male8 vs Female8": [1.3398, 0.0495], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.083, 0.0311], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.1333, 0.0466], "Judaism vs Islam / Good vs Bad": [-0.4414, 0.0867]}, "1950": {"Math vs Art / Male8 vs Female8": [0.0094, 0.0869], "Christianity vs Islam / Good vs Bad": [0.0753, 0.102], "Christianity vs Judaism / Good vs Bad": [0.4243, 0.1021], "Straight vs Gay / Good vs Bad": [0.08, 0.2008], "Science vs Art / Male8 vs Female8": [0.7273, 0.0812], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.1959, 0.0329], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.7161, 0.0471], "Judaism vs Islam / Good vs Bad": [-0.4993, 0.1033]}, "1870": {"Math vs Art / Male8 vs Female8": [0.7645, 0.0714], "Christianity vs Islam / Good vs Bad": [1.44, 0.1395], "Christianity vs Judaism / Good vs Bad": [2.0, 0.0], "Science vs Art / Male8 vs Female8": [1.0882, 0.058], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.386, 0.0302], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.4463, 0.0731], "Judaism vs Islam / Good vs Bad": [-1.92, 0.0563]}, "1910": {"Math vs Art / Male8 vs Female8": [0.6551, 0.0741], "Christianity vs Islam / Good vs Bad": [0.8259, 0.0875], "Christianity vs Judaism / Good vs Bad": [1.3547, 0.0594], "Science vs Art / Male8 vs Female8": [0.8195, 0.0607], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.288, 0.0265], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [1.0859, 0.0456], "Judaism vs Islam / Good vs Bad": [-0.7938, 0.0968]}, "1890": {"Math vs Art / Male8 vs Female8": [0.5593, 0.0781], "Christianity vs Islam / Good vs Bad": [0.48, 0.1951], "Christianity vs Judaism / Good vs Bad": [1.2511, 0.0572], "Science vs Art / Male8 vs Female8": [1.1959, 0.0532], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.4768, 0.0213], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.9655, 0.05], "Judaism vs Islam / Good vs Bad": [-1.36, 0.1474]}, "1900": {"Math vs Art / Male8 vs Female8": [0.7513, 0.0767], "Christianity vs Islam / Good vs Bad": [0.7688, 0.0736], "Christianity vs Judaism / Good vs Bad": [1.4998, 0.044], "Science vs Art / Male8 vs Female8": [0.7675, 0.0805], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.3443, 0.028], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.7097, 0.0457], "Judaism vs Islam / Good vs Bad": [-1.1863, 0.0653]}, "1880": {"Math vs Art / Male8 vs Female8": [0.5081, 0.0759], "Christianity vs Islam / Good vs Bad": [0.72, 0.1875], "Christianity vs Judaism / Good vs Bad": [2.0, 0.0], "Science vs Art / Male8 vs Female8": [0.8756, 0.0763], "Instruments vs Weapons / Pleasant vs Unpleasant": [1.4184, 0.025], "Flowers vs Insects (25) / Pleasant vs Unpleasant (25)": [0.8134, 0.05], "Judaism vs Islam / Good vs Bad": [-1.72, 0.1026]}}} -------------------------------------------------------------------------------- /run_tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | Validate results with Caliskan Paper 3 | 4 | USAGE: python run_tests.py path_to_google_news_corpus 5 | """ 6 | import weat 7 | import read_config 8 | import sys 9 | import json 10 | 11 | def replicate_caliskan(embed_path): 12 | print('loading caliskan embedding...') 13 | embedding = weat.load_embedding(embed_path) 14 | print('embedding loaded') 15 | with open('configs/caliskan.json') as config_file: 16 | config = json.load(config_file) 17 | with open('results/caliskan.json') as res_file: 18 | exp_results = json.load(res_file) 19 | for name_of_test, test_config in config['tests'].items(): 20 | res = weat.diff_assoc(test_config['X'],test_config['Y'],test_config['A'],test_config['B'],embedding) 21 | print(name_of_test + ':') 22 | print('Result: {} Original Finding: {}\n'.format(res, exp_results[name_of_test][0])) 23 | 24 | if __name__ == '__main__': 25 | if len(sys.argv) < 2: 26 | print('usage: python run_tests.py path_to_google_news_corpus') 27 | sys.exit() 28 | if len(sys.argv) > 1: 29 | em_path = sys.argv[1] 30 | replicate_caliskan(em_path) 31 | print('tests complete') 32 | -------------------------------------------------------------------------------- /sgns-to-txt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Convert pretrained histwords embeddings to be compatible with gensim 3 | ''' 4 | 5 | import sys 6 | import glob, os 7 | import numpy as np 8 | import pickle 9 | 10 | if __name__ == '__main__': 11 | if len(sys.argv) < 2: 12 | print('usage: python sgns-to-txt.py sgn-directory decade=all') 13 | sys.exit(1) 14 | sgn_dir = sys.argv[1] 15 | if sgn_dir[-1] == '/': 16 | sgn_dir = sgn_dir[:-1] 17 | # Create target Directory if doesn't exist 18 | outputdir = './' + sgn_dir + '-txts' 19 | if not os.path.exists(outputdir): 20 | os.mkdir(outputdir) 21 | print('Directory {} created'.format(outputdir)) 22 | else: 23 | print('Directory {} already exists'.format(outputdir)) 24 | if len(sys.argv) > 2: 25 | decade = sys.argv[2] 26 | vectors = np.load(sgn_dir + '/'+ decade + "-w.npy", mmap_mode="c") 27 | f = open(sgn_dir + '/' + decade + "-vocab.pkl", "rb") 28 | vocab = pickle.load(f) 29 | word_indicies = {w:i for i,w in enumerate(vocab)} 30 | # embeddings = Embedding.load('../' + sgn_dir + '/' + decade) 31 | vocab_size = len(vocab) 32 | print('vocab_size: {}'.format(vocab_size)) 33 | vector_dim = len(vectors[0]) 34 | print('vector_dim: {}'.format(vector_dim)) 35 | 36 | with open(outputdir + '/' + decade + '.txt', 'w') as fp: 37 | fp.write(str(vocab_size) + ' ' + str(vector_dim) + '\n') 38 | for word in vocab: 39 | fp.write((word + ' ' + ' '.join(map(str, (vectors[word_indicies[word], :]))) + '\n').encode('utf-8')) 40 | else: 41 | print("Changing directory to {}".format('./' + sgn_dir)) 42 | os.chdir('./' + sgn_dir) 43 | print("Current directory is {}".format(os.getcwd())) 44 | for file in glob.glob("*.npy"): 45 | # get the year of the file 46 | d = file[:4] 47 | print("Loading embedding for {}".format(d)) 48 | vectors = np.load(d + "-w.npy", mmap_mode="c") 49 | f = open(d + "-vocab.pkl", "rb") 50 | vocab = pickle.load(f) 51 | vocab_size = len(vocab) 52 | vector_dim = len(vectors[0]) 53 | word_indicies = {w:i for i,w in enumerate(vocab)} 54 | output_txt = '../' + outputdir.split('/')[-1] + '/' + d + '.txt' 55 | print("Writing {}".format(output_txt)) 56 | with open(output_txt, 'w') as fp: 57 | fp.write(str(vocab_size) + ' ' + str(vector_dim) + '\n') 58 | for word in vocab: 59 | fp.write((word + ' ' + ' '.join(map(str, (vectors[word_indicies[word], :]))) + '\n')) 60 | -------------------------------------------------------------------------------- /weat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gensim.models import KeyedVectors 3 | import gensim 4 | import random 5 | import read_config 6 | import sys 7 | import glob 8 | import os 9 | import json 10 | from gensim.models import Word2Vec 11 | from scipy import stats 12 | import sys 13 | import math 14 | 15 | def word_assoc(w,A,B,embedding): 16 | """ 17 | Calculates difference in mean cosine similarity between a word and two sets 18 | of words. 19 | """ 20 | return embedding.n_similarity([w],A) - embedding.n_similarity([w],B) 21 | 22 | def diff_assoc(X,Y,A,B,embedding): 23 | """ 24 | Caclulates the WEAT test statics for four sets of words in an embeddings 25 | """ 26 | word_assoc_X = np.array(list(map(lambda x : word_assoc(x,A,B,embedding), X))) 27 | word_assoc_Y = np.array(list(map(lambda y : word_assoc(y,A,B,embedding), Y))) 28 | mean_diff = np.mean(word_assoc_X) - np.mean(word_assoc_Y) 29 | std = np.std(np.concatenate((word_assoc_X, word_assoc_Y), axis=0)) 30 | return mean_diff / std 31 | 32 | def get_bias_scores_mean_err(word_pairs,embedding): 33 | """ 34 | Caculate the mean WEAT statistic and standard error using a permutation test 35 | on the sets of words (defaults to 100 samples) 36 | """ 37 | # divide smaller word_list by two 38 | subset_size_target = min(len(word_pairs['X']),len(word_pairs['Y']))//2 39 | subset_size_attr = min(len(word_pairs['A']),len(word_pairs['B']))//2 40 | bias_scores = [] 41 | for i in range(100): 42 | sX = np.random.choice(word_pairs['X'],subset_size_target,replace=False) 43 | sY = np.random.choice(word_pairs['Y'],subset_size_target,replace=False) 44 | sA = np.random.choice(word_pairs['A'],subset_size_attr,replace=False) 45 | sB = np.random.choice(word_pairs['B'],subset_size_attr,replace=False) 46 | bias_scores.append(diff_assoc(sX,sY,sA,sB,embedding)) 47 | return np.mean(bias_scores), stats.sem(bias_scores) 48 | 49 | 50 | def run_test(config, embedding): 51 | word_pairs = {} 52 | min_len = sys.maxsize 53 | # Only include words that are present in the word embedding 54 | for word_list_name, word_list in config.items(): 55 | if word_list_name in ['X', 'Y', 'A', 'B']: 56 | word_list_filtered = list(filter(lambda x: x in embedding and np.count_nonzero(embedding[x]) > 0, word_list)) 57 | word_pairs[word_list_name] = word_list_filtered 58 | if len(word_list_filtered) < 2: 59 | print('ERROR: Words from list {} not found in embedding\n {}'.\ 60 | format(word_list_name, word_list)) 61 | print('All word groups must contain at least two words') 62 | return None, None 63 | return get_bias_scores_mean_err(word_pairs,embedding) 64 | 65 | def load_embedding(embed_path): 66 | if embed_path.endswith('wv'): 67 | return KeyedVectors.load(embed_path) 68 | elif embed_path.endswith('txt'): 69 | return KeyedVectors.load_word2vec_format(embed_path, binary=False) 70 | elif embed_path.endswith('bin'): 71 | return KeyedVectors.load_word2vec_format(embed_path, binary=True) 72 | # NOTE reddit embedding is saved as model (no ext) + syn1neg + syn0 73 | else: 74 | return Word2Vec.load(embed_path) 75 | 76 | if __name__ == '__main__': 77 | if len(sys.argv) < 2: 78 | print('usage: python weat.py config.json results_file=config_results.json') 79 | sys.exit(1) 80 | 81 | fname = sys.argv[1] 82 | if len(sys.argv) > 2: 83 | results_file = sys.argv[2] 84 | else: 85 | results_file = 'results_' + fname 86 | results = {} 87 | config = read_config.read_json_config(fname) 88 | for e_name, e in config['embeddings'].items(): 89 | results[e_name] = {} 90 | if not isinstance(e,dict): 91 | print('loading embedding {}...'.format(e_name)) 92 | try: 93 | embedding = load_embedding(e) 94 | except: 95 | print('could not load embedding {}'.format(e_name)) 96 | continue; 97 | for name_of_test, test_config in config['tests'].items(): 98 | mean, err = run_test(test_config, embedding) 99 | print('mean: {} err: {}'.format(mean, err)) 100 | if mean is not None: 101 | results[e_name][name_of_test] = (round(mean, 4), round(err,4)) 102 | else: 103 | print('loading time series embeddings...') 104 | for time, embed_path in e.items(): 105 | results[e_name][time] = {} 106 | embedding = load_embedding(embed_path) 107 | for name_of_test, test_config in config['tests'].items(): 108 | print(name_of_test) 109 | mean, err = run_test(test_config, embedding) 110 | print('mean: {} err: {}'.format(mean, err)) 111 | if mean is not None: 112 | results[e_name][time][name_of_test] = (round(mean, 4), round(err,4)) 113 | with open(results_file, 'wb') as outfile: 114 | json.dump(results, outfile) 115 | --------------------------------------------------------------------------------