├── CompNet_archi.png ├── README.md ├── labels ├── identity_meta.csv ├── identity_meta_edit.csv ├── identity_test.csv └── identity_train.csv └── src ├── __pycache__ ├── dataloader.cpython-36.pyc ├── models.cpython-36.pyc └── utils.cpython-36.pyc ├── dataloader.py ├── models.py ├── split_identity_meta.py ├── train.py └── utils.py /CompNet_archi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeomko22/ComparatorNetwork_pytorch/b0c3dfac619043d57ac72acede5e7cbe89a52c38/CompNet_archi.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComparatorNetwork_PyTorch 2 | This is an unofficial implementation of VGG's Comparator Network [1] using PyTorch. 3 | Overall architecture is below. 4 | 5 | 6 | ## Dependencies 7 | 8 | - Python 3.5+ 9 | - PyTorch 0.4.0 10 | - python-opencv 11 | 12 | ## Implemented Part 13 | - Basic architecture of detect, attend and compare module. 14 | - Model train code. 15 | - Diversity Regularization of local landmarks. 16 | 17 | ## Currently ongoing part 18 | - Using pretrained ResNet, SENet model in detect process. 19 | - Keypoint Regularization of local landmarks. 20 | - Hard sampling 21 | 22 | ## Dataset 23 | Download VGGFace2 dataset [[link]](https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/) 24 | 25 | ## Train 26 | `python train.py -i "path to your local VGGFace2 train img dir"` 27 | 28 | ## References 29 | 1. Weidi Xie, Li Shen, Andrew Zisserman, Comparator Networks, 2018 [[pdf]](https://arxiv.org/pdf/1807.11440.pdf) 30 | -------------------------------------------------------------------------------- /labels/identity_test.csv: -------------------------------------------------------------------------------- 1 | Class_ID, Name, Sample_Num, Flag, Gender 2 | n000001,"""14th_Dalai_Lama""",424,0, m 3 | n000009," ""AYAMI""",136,0, f 4 | n000029," ""Abdullah_II_of_Jordan""",454,0, m 5 | n000040," ""Aco_Petrović""",180,0, m 6 | n000078," ""Adhyayan_Suman""",458,0, m 7 | n000082," ""Aditya_Seal""",441,0, m 8 | n000106," ""Agata_Passent""",279,0, f 9 | n000129," ""Ahmet_Davutoğlu""",585,0, m 10 | n000148," ""Airi_Suzuki""",367,0, f 11 | n000149," ""Aishwarya_Rai_Bachchan""",522,0, f 12 | n000178," ""Alain_Traoré""",208,0, m 13 | n000259," ""Alex_Gonzaga""",431,0, f 14 | n000284," ""Alexandra_Edenborough""",344,0, f 15 | n000363," ""Alodia_Gosiengfiao""",505,0, f 16 | n000394," ""Amber_Brkich""",242,0, f 17 | n000410," ""Amina_Shafaat""",199,0, f 18 | n000452," ""Ana_Rosa_Quintana""",454,0, f 19 | n000480," ""Andrea_Anders""",312,0, f 20 | n000527," ""Andrew_Upton""",329,0, m 21 | n000596," ""Angélique_Kidjo""",357,0, m 22 | n000624," ""Anna_Kalata""",446,0, f 23 | n000654," ""Anne_Schedeen""",200,0, f 24 | n000658," ""Anne_Princess_Royal""",440,0, f 25 | n000659," ""Anne-Marie_Duff""",491,0, f 26 | n000667," ""Annette_Bening""",526,0, f 27 | n000689," ""Ante_Gotovina""",400,0, m 28 | n000706," ""Antonia_Kidman""",301,0, f 29 | n000736," ""Aníbal_Cavaco_Silva""",444,0, m 30 | n000740," ""Apollo_Quiboloy""",210,0, m 31 | n000746," ""Archana_Puran_Singh""",324,0, f 32 | n000774," ""Armando_Iannucci""",310,0, m 33 | n000775," ""Armin_Shimerman""",261,0, m 34 | n000785," ""Arsène_Wenger""",580,0, m 35 | n000836," ""Axel_Pape""",186,0, m 36 | n000838," ""Aya_Miyama""",306,0, f 37 | n000854," ""Ayumi_Hamasaki""",477,0, f 38 | n000912," ""Bart_Freundlich""",410,0, m 39 | n000928," ""Bebe_Cool""",259,0, m 40 | n000945," ""Ben_Falcone""",353,0, m 41 | n000950," ""Ben_Ofoedu""",257,0, m 42 | n000958," ""Benigno_Noynoy_Aquino_III""",451,0, m 43 | n000998," ""Besik_Kudukhov""",229,0, m 44 | n001021," ""Bhairvi_Goswami""",219,0, f 45 | n001059," ""Billy_Dennehy""",236,0, m 46 | n001125," ""Bobby_Voelker""",159,0, m 47 | n001127," ""Bodhi_Elfman""",308,0, m 48 | n001146," ""Boris_Tadić""",594,0, m 49 | n001153," ""Bracha_van_Doesburgh""",203,0, f 50 | n001156," ""Brad_Gushue""",302,0, m 51 | n001174," ""Brandon_Ríos""",231,0, m 52 | n001190," ""Brendan_Rodgers""",544,0, m 53 | n001197," ""Brett_Tucker""",182,0, m 54 | n001199," ""Brian_Austin_Green""",423,0, m 55 | n001211," ""Brian_Stokes_Mitchell""",379,0, m 56 | n001242," ""Bronisław_Komorowski""",644,0, m 57 | n001256," ""Bruce_Jenner""",430,0, m 58 | n001277," ""Bud_Cort""",243,0, m 59 | n001291," ""Bárbara_Mori""",502,0, f 60 | n001293," ""Börje_Salming""",305,0, m 61 | n001296," ""C.C.Catch""",303,0, f 62 | n001299," ""Cacá_Bueno""",237,0, m 63 | n001302," ""Caio_Ribeiro""",324,0, m 64 | n001303," ""Caissie_Levy""",320,0, f 65 | n001304," ""Caitlin_Foord""",268,0, f 66 | n001337," ""Candice_Crawford""",206,0, f 67 | n001341," ""Candice_Night""",267,0, f 68 | n001368," ""Carlo_Conti""",573,0, m 69 | n001401," ""Carmen_Russo""",369,0, f 70 | n001418," ""Carolina_Klüft""",452,0, f 71 | n001439," ""Carsten_Maschmeyer""",402,0, m 72 | n001446," ""Cash_Warren""",395,0, m 73 | n001467," ""Cathy_Tsui""",217,0, f 74 | n001485," ""Cem_Özdemir""",438,0, m 75 | n001576," ""Chin_Ka-lok""",204,0, m 76 | n001650," ""Christina_Plate""",204,0, f 77 | n001655," ""Christine_Devine""",321,0, f 78 | n001672," ""Christopher_Backus""",275,0, m 79 | n001683," ""Chuck_Comeau""",294,0, m 80 | n001687," ""Chuck_Scarborough""",328,0, m 81 | n001708," ""Clare_Grant""",481,0, f 82 | n001710," ""Clarence_Clemons""",225,0, f 83 | n001781," ""Conan_OBrien""",456,0, m 84 | n001811," ""Cris_Judd""",240,0, m 85 | n001816," ""Cristian_Boureanu""",230,0, m 86 | n001817," ""Cristian_Mungiu""",275,0, m 87 | n001830," ""Cristóbal_Montoro""",538,0, m 88 | n001836," ""Cybill_Shepherd""",373,0, f 89 | n001838," ""Cynthia_Lennon""",281,0, f 90 | n001850," ""César_Santin""",99,0, m 91 | n001857," ""D._Ramanaidu""",281,0, m 92 | n001878," ""Dalia_Grybauskaitė""",638,0, m 93 | n001898," ""Dan_Smith""",225,0, m 94 | n001923," ""Daniel_Jarque""",167,0, m 95 | n001934," ""Daniel_Zelman""",212,0, m 96 | n001935," ""Daniela_Bianchi""",253,0, f 97 | n001956," ""Danish_Taimoor""",416,0, m 98 | n001976," ""Danson_Tang""",440,0, m 99 | n002009," ""Dave_Nonis""",150,0, m 100 | n002024," ""David_Burtka""",397,0, m 101 | n002080," ""Dean_Morgan""",305,0, m 102 | n002081," ""Dean_Sheremet""",278,0, m 103 | n002082," ""Dean_Skelos""",313,0, m 104 | n002093," ""Deborra-Lee_Furness""",470,0, f 105 | n002106," ""Del_McCoury""",383,0, m 106 | n002109," ""Delnaaz_Irani""",266,0, f 107 | n002153," ""Desiree_Gruber""",345,0, f 108 | n002158," ""Devayani""",327,0, f 109 | n002166," ""Dian_Parkinson""",158,0, f 110 | n002167," ""Diana_Bolocco""",319,0, f 111 | n002216," ""Dilip_Vengsarkar""",336,0, m 112 | n002223," ""Dina_Sfat""",248,0, f 113 | n002245," ""Dollah_Salleh""",213,0, m 114 | n002257," ""Dominique_Farrugia""",202,0, m 115 | n002258," ""Dominique_Geisendorff""",221,0, f 116 | n002263," ""Don_Johnson""",385,0, m 117 | n002267," ""Donald_Faison""",420,0, m 118 | n002268," ""Donald_Sterling""",257,0, m 119 | n002282," ""Dorkas_Kiefer""",331,0, f 120 | n002284," ""Dorothea_Hurley""",331,0, f 121 | n002309," ""Durga_Jasraj""",229,0, f 122 | n002329," ""Dési_Bouterse""",296,0, m 123 | n002351," ""Eddie_Steeples""",208,0, m 124 | n002372," ""Edoardo_Costa""",230,0, m 125 | n002381," ""Eduardo_Schwank""",217,0, m 126 | n002384," ""Eduardo_Yañez""",210,0, m 127 | n002389," ""Edward_Furlong""",472,0, m 128 | n002414," ""Elaine_Irwin_Mellencamp""",228,0, f 129 | n002421," ""Elena_Risteska""",329,0, f 130 | n002474," ""Ellen_Johnson""",357,0, f 131 | n002475," ""Ellen_Muth""",270,0, f 132 | n002503," ""Emanuela_de_Paula""",387,0, f 133 | n002517," ""Emilio_Butragueño""",380,0, m 134 | n002556," ""Enrica_Bonaccorti""",216,0, f 135 | n002561," ""Enrique_Peña_Nieto""",495,0, m 136 | n002574," ""Eric_Christian_Olsen""",464,0, m 137 | n002581," ""Eric_Mabius""",451,0, m 138 | n002596," ""Erika_Pluhar""",296,0, f 139 | n002604," ""Erinn_Bartlett""",206,0, f 140 | n002647," ""Eva_Ayllón""",378,0, f 141 | n002659," ""Evan_Goldberg""",240,0, m 142 | n002664," ""Evangelos_Venizelos""",611,0, m 143 | n002669," ""Eve_Plumb""",394,0, f 144 | n002681," ""Ewa_Szykulska""",239,0, f 145 | n002684," ""Ewan_McGregor""",554,0, m 146 | n002703," ""Fabio_Fulco""",253,0, m 147 | n002715," ""Facundo_Conte""",376,0, m 148 | n002743," ""Fatma_Girik""",294,0, f 149 | n002749," ""Federico_Castelluccio""",325,0, m 150 | n002761," ""Felipe_Alou""",262,0, m 151 | n002763," ""Felipe_Calderón""",477,0, m 152 | n002770," ""Felix_Brych""",346,0, m 153 | n002773," ""Femi_Fani-Kayode""",158,0, m 154 | n002775," ""Feng_Xiaogang""",223,0, m 155 | n002803," ""Filiz_Akın""",361,0, f 156 | n002838," ""Fran_Lebowitz""",382,0, f 157 | n002840," ""Fran_Mérida""",240,0, m 158 | n002855," ""Francis_Fulton-Smith""",423,0, m 159 | n002857," ""Francis_Huster""",377,0, m 160 | n002869," ""Frank_Elstner""",394,0, m 161 | n002873," ""Frank_McCourt""",326,0, m 162 | n002878," ""Frank_de_Boer""",515,0, m 163 | n002880," ""Frankie_Muniz""",495,0, m 164 | n002884," ""Frans_Duijts""",276,0, m 165 | n002889," ""François_Hollande""",483,0, m 166 | n002891," ""François_Vincentelli""",248,0, m 167 | n002894," ""Françoise_Dorléac""",386,0, f 168 | n003001," ""Genelia_DSouza""",561,0, f 169 | n003009," ""Geoffrey_Edelsten""",248,0, m 170 | n003079," ""Giggs""",276,0, m 171 | n003092," ""Gillian_Chung""",477,0, f 172 | n003093," ""Gillian_Leigh""",288,0, f 173 | n003104," ""Ginette_Reno""",312,0, f 174 | n003115," ""Giorgio_Tirabassi""",298,0, m 175 | n003134," ""Giuliano_Stroe""",256,0, m 176 | n003140," ""Glaiza_de_Castro""",442,0, f 177 | n003205," ""Gregg_Popovich""",542,0, m 178 | n003215," ""Grutle_Kjellson""",221,0, m 179 | n003217," ""Grzegorz_Schetyna""",641,0, m 180 | n003258," ""Guus_Meeuwis""",352,0, m 181 | n003277," ""Göran_Eriksson""",215,0, m 182 | n003288," ""Ha_Ji-won""",329,0, f 183 | n003298," ""Haley_Joel_Osment""",501,0, m 184 | n003356," ""Harumi_Nemoto""",219,0, f 185 | n003379," ""Heath_Hocking""",148,0, m 186 | n003415," ""Hendrik_Pekeler""",266,0, m 187 | n003430," ""Heo_Young-saeng""",420,0, m 188 | n003436," ""Herbert_Kickl""",159,0, m 189 | n003461," ""Himesh_Reshammiya""",488,0, m 190 | n003468," ""Hizaki""",290,0, f 191 | n003490," ""Hoàng_Thùy_Linh""",323,0, f 192 | n003513," ""Héctor_Timerman""",411,0, m 193 | n003540," ""Iekeliene_Stange""",477,0, f 194 | n003554," ""Ileana_DCruz""",490,0, f 195 | n003570," ""Indira_Radić""",378,0, f 196 | n003606," ""Iris_Strubegger""",387,0, f 197 | n003635," ""Itsunori_Onodera""",304,0, m 198 | n003653," ""Ivo_Josipović""",574,0, m 199 | n003675," ""Jacek_Rozenek""",455,0, m 200 | n003725," ""Jaime_Ray_Newman""",158,0, f 201 | n003728," ""Jake_Gyllenhaal""",370,0, m 202 | n003775," ""James_Younghusband""",458,0, m 203 | n003786," ""Jamie_Lomas""",275,0, m 204 | n003836," ""Jared_Followill""",468,0, m 205 | n003873," ""Jasper_Cillessen""",404,0, m 206 | n003881," ""Javiera_Contador""",241,0, f 207 | n003894," ""Jay-Z""",433,0, m 208 | n003896," ""Jayalalithaa""",511,0, f 209 | n003917," ""Jean-Julien_Rojer""",243,0, m 210 | n003958," ""Jeffrey_Dean""",227,0, m 211 | n004007," ""Jero_Wacik""",377,0, m 212 | n004050," ""Jessica_Seinfeld""",513,0, f 213 | n004064," ""Jette_Joop""",346,0, f 214 | n004068," ""Ji-Won_To""",120,0, f 215 | n004070," ""Jictzad_Viña""",137,0, f 216 | n004078," ""Jim_Balsillie""",225,0, m 217 | n004085," ""Jim_Gaffigan""",382,0, m 218 | n004123," ""Joachim_Brudziński""",306,0, m 219 | n004157," ""Joe_Giudice""",247,0, m 220 | n004180," ""Johan_Boskamp""",282,0, m 221 | n004208," ""John_Ham""",290,0, m 222 | n004233," ""John_Wesley_Shipp""",286,0, m 223 | n004239," ""Johnny_Lozada""",306,0, m 224 | n004240," ""Johnny_McDaid""",368,0, m 225 | n004243," ""Johnny_Vegas""",411,0, m 226 | n004281," ""Jonathan_Wilkes""",250,0, m 227 | n004333," ""Joseph_Stiglitz""",344,0, m 228 | n004338," ""Josh_Kelley""",426,0, m 229 | n004353," ""José_Enrique""",394,0, m 230 | n004357," ""José_Manuel_Barroso""",475,0, m 231 | n004366," ""José_Mujica""",398,0, m 232 | n004372," ""Joumana_Kidd""",258,0, f 233 | n004380," ""João_Carlos_Paes_Mendonça""",168,0, m 234 | n004387," ""Juan_Diego_Botto""",221,0, m 235 | n004394," ""Juan_Osorio""",222,0, m 236 | n004411," ""Judith_Giuliani""",269,0, f 237 | n004424," ""Julia_Görges""",521,0, f 238 | n004440," ""Julianne_Michelle""",250,0, f 239 | n004449," ""Julie_Piétri""",322,0, f 240 | n004461," ""Julio_Iglesias""",329,0, m 241 | n004469," ""Jung_Jae-sung""",138,0, m 242 | n004482," ""Justin_Masterson""",249,0, m 243 | n004486," ""Justin_Tuck""",207,0, m 244 | n004555," ""Kara_Hui""",294,0, f 245 | n004563," ""Kareena_Kapoor_Khan""",761,0, f 246 | n004580," ""Karin_Stoiber""",319,0, f 247 | n004586," ""Karl_Urban""",527,0, m 248 | n004588," ""Karl-Heinz_Rummenigge""",382,0, m 249 | n004634," ""Katharina_Thalbach""",462,0, f 250 | n004635," ""Katharina_Wagner""",278,0, f 251 | n004652," ""Katie_Holmes""",506,0, f 252 | n004658," ""Katilette""",360,0, f 253 | n004661," ""Katja_Riemann""",467,0, f 254 | n004662," ""Katja_Weitzenböck""",273,0, f 255 | n004663," ""Katlego_Mphela""",214,0, m 256 | n004678," ""Kazimierz_Moskal""",209,0, m 257 | n004679," ""Kazimierz_Nycz""",465,0, m 258 | n004709," ""Kemal_Kılıçdaroğlu""",546,0, m 259 | n004712," ""Ken_Davitian""",397,0, m 260 | n004719," ""Kendrick_Perkins""",311,0, m 261 | n004738," ""Kevin_Dillon""",537,0, m 262 | n004743," ""Kevin_Jonas""",524,0, m 263 | n004756," ""Kevin_Sorbo""",441,0, m 264 | n004771," ""Khloé_Kardashian""",463,0, f 265 | n004788," ""Kim_Hyun-joong""",251,0, m 266 | n004793," ""Kim_Jong-un""",464,0, m 267 | n004798," ""Kim_Ki-duk""",222,0, m 268 | n004801," ""Kim_Kötter""",253,0, f 269 | n004812," ""Kim_Tae-hoi""",372,0, f 270 | n004813," ""Kim_Wilde""",541,0, f 271 | n004826," ""Kimora_Lee_Simmons""",576,0, f 272 | n004850," ""Klaas_Heufer-Umlauf""",457,0, m 273 | n004885," ""Kris_Kristofferson""",377,0, m 274 | n004891," ""Krista_Lahteenmaki""",303,0, f 275 | n004898," ""Kristian_Luuk""",259,0, m 276 | n004905," ""Kristin_Scott_Thomas""",322,0, f 277 | n004911," ""Kristy_McNichol""",405,0, m 278 | n004915," ""Krzysztof_Cugowski""",335,0, m 279 | n004925," ""Kubrat_Pulev""",284,0, m 280 | n004945," ""Kyle_Schmid""",334,0, m 281 | n004999," ""Lars_Ricken""",285,0, m 282 | n005073," ""Lee_Jong-suk""",314,0, m 283 | n005088," ""Leena_Chandavarkar""",252,0, f 284 | n005101," ""Len_Wiseman""",431,0, m 285 | n005104," ""Lena_Olin""",401,0, f 286 | n005112," ""Lenny_Henry""",419,0, m 287 | n005114," ""Leo_Sayer""",315,0, m 288 | n005120," ""Leonard_Cohen""",344,0, m 289 | n005122," ""Leonard_Lansink""",358,0, m 290 | n005123," ""Leonardo_DiCaprio""",339,0, m 291 | n005135," ""Leslie_Mandoki""",392,0, m 292 | n005136," ""Leslie_Moonves""",396,0, m 293 | n005137," ""Lesly_Masson""",173,0, f 294 | n005145," ""Levona_Lewis""",168,0, f 295 | n005148," ""Lewis_Jetta""",248,0, m 296 | n005157," ""Li_Xiaoxia""",233,0, f 297 | n005159," ""Liam_Neeson""",527,0, m 298 | n005179," ""Lin_Dan""",469,0, m 299 | n005181," ""Linda_Hogan""",397,0, f 300 | n005188," ""Lindsey_Buckingham""",391,0, m 301 | n005225," ""Liza_Huber""",241,0, f 302 | n005226," ""Liza_Minnelli""",489,0, f 303 | n005233," ""Lolita_Davidovich""",279,0, f 304 | n005280," ""Low_Ki""",206,0, m 305 | n005282," ""Lu_Parker""",250,0, f 306 | n005294," ""Luca_Parmitano""",270,0, m 307 | n005301," ""Lucia_Šoralová""",266,0, f 308 | n005303," ""Luciana_Barroso""",468,0, f 309 | n005306," ""Luciana_Paluzzi""",256,0, f 310 | n005312," ""Lucie_Bílá""",564,0, f 311 | n005316," ""Lucien_Favre""",442,0, m 312 | n005319," ""Lucía_Méndez""",479,0, f 313 | n005326," ""Luis_Armando_Reynoso""",133,0, m 314 | n005328," ""Luis_Ernesto_Franco""",208,0, m 315 | n005334," ""Luis_Suárez""",614,0, m 316 | n005340," ""Luiz_Inácio_Lula_da_Silva""",485,0, m 317 | n005347," ""Luke_Hemsworth""",422,0, m 318 | n005359," ""Lute_Olson""",202,0, m 319 | n005373," ""Lynsey_de_Paul""",275,0, f 320 | n005377," ""Lê_Quang_Liêm""",271,0, m 321 | n005380," ""MC_Harvey""",249,0, m 322 | n005417," ""Magdalena_Modra""",318,0, f 323 | n005425," ""Magic_Juan""",223,0, m 324 | n005427," ""Magnus_Norman""",207,0, m 325 | n005448," ""Mala_Rodríguez""",300,0, f 326 | n005473," ""Mandy_Teefey""",461,0, f 327 | n005474," ""Mandy_Wong""",243,0, f 328 | n005490," ""Manuel_Neuer""",514,0, m 329 | n005513," ""Marc_Rzatkowski""",207,0, m 330 | n005552," ""Marco_Reus""",396,0, m 331 | n005565," ""Mareike_Carrière""",271,0, m 332 | n005577," ""Margareta_Pâslaru""",369,0, f 333 | n005607," ""Marian_Opania""",340,0, m 334 | n005612," ""Marianne_Faithfull""",397,0, f 335 | n005619," ""Maricar_Reyes""",337,0, f 336 | n005621," ""Marie_Dorin_Habert""",255,0, f 337 | n005623," ""Marie_Laforêt""",283,0, f 338 | n005630," ""Mariel_Rodriguez""",365,0, f 339 | n005633," ""Marielle_Heller""",222,0, f 340 | n005636," ""Marika_Gombitová""",341,0, f 341 | n005639," ""Marilyn_McCoo""",313,0, f 342 | n005648," ""Mario_Cantone""",408,0, m 343 | n005652," ""Mario_Kempes""",293,0, m 344 | n005664," ""Mariska_Veres""",236,0, f 345 | n005666," ""Marissa_Jaret_Winokur""",478,0, f 346 | n005668," ""Marit_Breivik""",123,0, f 347 | n005670," ""Marius_Müller-Westernhagen""",381,0, m 348 | n005680," ""Mark_Burnett""",386,0, m 349 | n005693," ""Mark_Owen""",395,0, m 350 | n005695," ""Mark_Rylance""",325,0, m 351 | n005706," ""Marleen_van_Iersel""",373,0, f 352 | n005709," ""Marlo_Thomas""",434,0, f 353 | n005723," ""Martha_Reeves""",530,0, f 354 | n005726," ""Martie_Maguire""",461,0, f 355 | n005730," ""Martin_Gore""",363,0, m 356 | n005748," ""Marty_Feldman""",203,0, m 357 | n005755," ""Marvelous_Marvin_Hagler""",263,0, m 358 | n005758," ""Marvin_Humes""",584,0, m 359 | n005762," ""Mary_Elizabeth_Mastrantonio""",246,0, f 360 | n005764," ""Mary_Hart""",357,0, f 361 | n005776," ""María_Corina_Machado""",447,0, f 362 | n005799," ""Mathias_Lauridsen""",330,0, m 363 | n005831," ""Matthew_Perry""",423,0, m 364 | n005832," ""Matthew_Reeve""",284,0, m 365 | n005833," ""Matthew_Stafford""",229,0, m 366 | n005872," ""Max_Handelman""",148,0, m 367 | n005915," ""Megan_Kelly""",339,0, f 368 | n005956," ""Mellody_Hobson""",388,0, f 369 | n005963," ""Menna_Shalabi""",254,0, f 370 | n005973," ""Meryl_Davis""",467,0, f 371 | n006053," ""Michelle_Chen""",378,0, f 372 | n006123," ""Mikey_Way""",452,0, m 373 | n006134," ""Mile_Jedinak""",366,0, m 374 | n006211," ""Mohammad_Ali""",249,0, m 375 | n006247," ""Morgan_Tsvangirai""",424,0, m 376 | n006301," ""Mía_Maestro""",468,0, f 377 | n006347," ""Nancy_Juvonen""",302,0, f 378 | n006451," ""Nelly_Makdessy""",249,0, f 379 | n006458," ""Ness_Wadia""",341,0, m 380 | n006497," ""Niclas_Wahlgren""",259,0, m 381 | n006531," ""Nicolás_Maduro""",527,0, m 382 | n006532," ""Nicu_Paleru""",192,0, m 383 | n006574," ""Nina_García""",488,0, f 384 | n006626," ""Néstor_Kirchner""",371,0, m 385 | n006653," ""Olga_Fatkulina""",126,0, f 386 | n006659," ""Oliver_Berben""",427,0, m 387 | n006772," ""Park_Bo-young""",520,0, f 388 | n006808," ""Patrick_Fabian""",292,0, m 389 | n006858," ""Paul_Waaktaar-Savoy""",256,0, m 390 | n006909," ""Pelé""",365,0, m 391 | n006922," ""Percy_Gibson""",317,0, m 392 | n006987," ""Philipp_Rösler""",98,0, m 393 | n006992," ""Philippe_Junot""",175,0, m 394 | n007008," ""Piedad_Córdoba""",387,0, f 395 | n007021," ""Pierre-Emerick_Aubameyang""",215,0, m 396 | n007068," ""Prince_Félix_of_Luxembourg""",291,0, m 397 | n007104," ""Quirin_Berg""",281,0, m 398 | n007121," ""Rachel_Bolan""",446,0, m 399 | n007133," ""Radamel_Falcao""",353,0, m 400 | n007146," ""Rafael_Cardoso""",360,0, m 401 | n007159," ""Raghava_Lawrence""",367,0, m 402 | n007162," ""Rahat_Fateh_Ali_Khan""",343,0, m 403 | n007166," ""Rahul_Dravid""",220,0, m 404 | n007169," ""Raine_Maida""",201,0, m 405 | n007210," ""Rande_Gerber""",383,0, m 406 | n007240," ""Ray_Mabus""",347,0, m 407 | n007241," ""Ray_Newman""",221,0, f 408 | n007246," ""Raymond_van_Barneveld""",368,0, m 409 | n007261," ""Recep_Tayyip_Erdoğan""",338,0, m 410 | n007296," ""René_Girard""",127,0, m 411 | n007358," ""Rick_Rose""",141,0, m 412 | n007368," ""Ricky_Schroder""",487,0, m 413 | n007381," ""Rina_Uchiyama""",152,0, f 414 | n007385," ""Risa_Niigaki""",363,0, f 415 | n007397," ""Rob_Kardashian""",380,0, m 416 | n007439," ""Roberto_Costa""",209,0, m 417 | n007474," ""Rocío_Igarzábal""",412,0, f 418 | n007531," ""Ron-Robert_Zieler""",397,0, m 419 | n007548," ""Roop_Kumar_Rathod""",349,0, m 420 | n007571," ""Rosi_Mittermaier""",336,0, f 421 | n007594," ""Ruba_Nadda""",174,0, f 422 | n007650," ""Ryan_Sweeting""",249,0, m 423 | n007651," ""Ryne_Sandberg""",243,0, m 424 | n007664," ""SATOMI""",183,0, f 425 | n007668," ""Sabina_Classen""",150,0, f 426 | n007700," ""Salvatore_Adamo""",277,0, m 427 | n007703," ""Sam_Attwater""",325,0, m 428 | n007753," ""Sandra_Izbașa""",511,0, f 429 | n007854," ""Sebastian_Arcelus""",299,0, m 430 | n007865," ""Sebastian_Schipper""",200,0, m 431 | n007900," ""Seohyun""",432,0, f 432 | n007909," ""Serena_Autieri""",426,0, f 433 | n007919," ""Sergio_Agüero""",530,0, m 434 | n007949," ""Shahar_Peer""",287,0, f 435 | n007951," ""Shakti_Anand""",191,0, m 436 | n008003," ""Sheena_Halili""",364,0, f 437 | n008015," ""Sheri_Moon_Zombie""",269,0, f 438 | n008020," ""Sheryl_Berkoff""",300,0, f 439 | n008028," ""Shimon_Moore""",322,0, m 440 | n008036," ""Shinya_Yamanaka""",267,0, m 441 | n008037," ""Shinzō_Abe""",571,0, m 442 | n008047," ""Shreyas_Talpade""",456,0, m 443 | n008056," ""Sibi_Blazic""",425,0, f 444 | n008105," ""Siouxsie_Sioux""",310,0, f 445 | n008108," ""Siti_Hardiyanti_Rukmana""",236,0, f 446 | n008110," ""Siva_Balaji""",211,0, m 447 | n008155," ""Sonique""",202,0, f 448 | n008164," ""Soon-Yi_Previn""",423,0, f 449 | n008179," ""Soundarya_R._Ashwin""",308,0, f 450 | n008183," ""Spencer_Pratt""",480,0, m 451 | n008213," ""Stella_Arroyave""",292,0, f 452 | n008314," ""Susan_Downey""",365,0, f 453 | n008317," ""Susana_Dosamantes""",228,0, f 454 | n008325," ""Susie_Feldman""",288,0, f 455 | n008361," ""Ségolène_Royal""",617,0, f 456 | n008436," ""Taylor_Hackford""",448,0, m 457 | n008484," ""Tessa_Mittelstaedt""",255,0, f 458 | n008488," ""Thaao_Penghlis""",336,0, m 459 | n008528," ""Thomas_Mesereau""",244,0, m 460 | n008530," ""Thomas_Müller""",510,0, m 461 | n008539," ""Thure_Riefenstein""",225,0, m 462 | n008558," ""Tim_Krul""",398,0, m 463 | n008567," ""Timothy_Shriver""",249,0, m 464 | n008595," ""Todd_Tucker""",306,0, m 465 | n008613," ""Tom_Thibodeau""",429,0, m 466 | n008615," ""Tom_Welling""",267,0, m 467 | n008630," ""Tomo_Yanagishita""",280,0, m 468 | n008653," ""Tony_Montana""",217,0, m 469 | n008655," ""Tony_Oller""",355,0, m 470 | n008662," ""Tooske_Ragas""",206,0, f 471 | n008674," ""Touriya_Haoud""",243,0, f 472 | n008682," ""Traian_Băsescu""",506,0, m 473 | n008710," ""Trudie_Styler""",507,0, f 474 | n008773," ""Vahid_Halilhodžić""",459,0, m 475 | n008778," ""Valen_Hsu""",240,0, f 476 | n008829," ""Vasyl_Lomachenko""",268,0, m 477 | n008858," ""Vicente_Fernández""",362,0, m 478 | n008890," ""Viktor_Orbán""",525,0, m 479 | n008932," ""Viveca_Paulin""",290,0, f 480 | n008937," ""Vivica_A._Fox""",496,0, f 481 | n008948," ""Vlado_Kalember""",230,0, m 482 | n008958," ""Václav_Klaus""",528,0, m 483 | n008989," ""Wendi_Deng_Murdoch""",481,0, f 484 | n009028," ""Wojciech_Szczęsny""",509,0, m 485 | n009114," ""Yoyo_Chen""",210,0, f 486 | n009123," ""Yulia_Volkova""",488,0, f 487 | n009175," ""Zhang_Xin""",113,0, f 488 | n009185," ""Zlatan_Ibrahimović""",472,0, m 489 | n009199," ""Zoë_Lister""",383,0, f 490 | n009213," ""Zélia_Duncan""",330,0, f 491 | n009225," ""Álvaro_Uribe""",465,0, m 492 | n009232," ""Élisabeth_Guigou""",380,0, f 493 | n009235," ""Éric_Boullier""",418,0, m 494 | n009283," ""Jackie_Chan""",296,0, m 495 | n009285," ""Pei-pei_Cheng""",179,0, f 496 | n009286," ""Rosamund_Kwan""",272,0, f 497 | n009287," ""Ni_Ping""",226,0, f 498 | n009288," ""Song_Dandan""",252,0, f 499 | n009289," ""Pan_Hong""",236,0, f 500 | n009291," ""Stephen_Chow""",200,0, m 501 | n009294," ""Lang_Ping""",234,0, f -------------------------------------------------------------------------------- /src/__pycache__/dataloader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeomko22/ComparatorNetwork_pytorch/b0c3dfac619043d57ac72acede5e7cbe89a52c38/src/__pycache__/dataloader.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeomko22/ComparatorNetwork_pytorch/b0c3dfac619043d57ac72acede5e7cbe89a52c38/src/__pycache__/models.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeomko22/ComparatorNetwork_pytorch/b0c3dfac619043d57ac72acede5e7cbe89a52c38/src/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /src/dataloader.py: -------------------------------------------------------------------------------- 1 | # 파일 개요 : 파일 형태로 저장되어 있는 이미지들과 메타 데이터를 읽어온다. 2 | # 그 다음, 학습에 적합한 형태로 전처리하여 신경망에 전달하는 역할을 한다. 3 | 4 | # 학습 데이터 형식 : 구현하고자 하는 Comparator Network의 경우 template라 불리는 이미지 묶음을 한번에 2개 입력 받는다. 5 | # 하나의 template는 동일 인물 이미지를 묶어서 구성한다. 6 | # 두 template가 같은 인물에 대한 이미지면 positive 라벨을, 다른 인물이면 negative 라벨을 매겨준다. 7 | # 이렇게 이미지 6장 (2개의 template)와 1개의 라벨 정보가 한 쌍을 이루게 된다. 8 | # 이를 다시 batch size로 묶어서 신경망에 전달하게 된다. 9 | 10 | import torch 11 | from torch.utils.data import Dataset 12 | import numpy as np 13 | import random 14 | import cv2 15 | import os 16 | import utils 17 | import pandas as pd 18 | import time 19 | 20 | class CustomDataset(Dataset) : 21 | def __init__(self, train_dir=None, test_dir=None): 22 | self.label_path = '../labels/' 23 | self.identity_train = self.label_path+'identity_train.csv' 24 | self.identity_test = self.label_path+'identity_test.csv' 25 | 26 | self.id_label_dict = utils.get_id_label_map(self.identity_train) 27 | 28 | if train_dir is not None : 29 | # 학습용 데이터 셋 경로 지정 30 | self.img_label = pd.read_csv(self.identity_train, delimiter=',') 31 | self.img_dir = train_dir 32 | else : 33 | # 테스트용 데이터 셋 경로 지정 34 | self.img_label = pd.read_csv(self.identity_test, delimiter=',') 35 | self.img_dir = test_dir 36 | 37 | # 모션 블러 적용 시에 필요한 필터 생성 38 | self.motion_filter_size = 15 39 | self.motion_blur_filter = np.zeros((self.motion_filter_size, self.motion_filter_size)) 40 | self.motion_blur_filter[int((self.motion_filter_size - 1) / 2), :] = np.ones(self.motion_filter_size) 41 | self.motion_blur_filter = self.motion_blur_filter / self.motion_filter_size 42 | 43 | 44 | # 데이터 로더가 데이터를 읽어올 때 호출되는 함수 45 | # 한 쌍의 템플릿 이미지와 라벨을 묶어서 반환한다. 46 | # 템플릿을 만들 때에는 데이터 어그멘테이션을 거친다. 47 | def __getitem__(self, index): 48 | # positive 템플릿을 생성할 것인지, negative 템플릿을 생성할 지 결정한다. 49 | # 0 이면 서로 다른 인물, 1 이면 동일한 인물로 두 쌍의 템플릿을 구성한다. 50 | label = random.randint(0,1) 51 | 52 | template1 = set() 53 | template2 = set() 54 | 55 | identity_one = self.img_label.iloc[index, 0] 56 | identity_two = '' 57 | 58 | # 서로 다른 인물로 구성된 템플릿 생성 59 | if label == 0: 60 | # 현재 인물과 다른 인물을 랜덤하게 선택 61 | # 추후에 여기 부분에 hard sampling 부분을 추가할 것 62 | while True : 63 | other_identity_index = random.randint(0, len(self.img_label)-1) 64 | if other_identity_index != index : 65 | break 66 | identity_two = self.img_label.iloc[other_identity_index, 0] 67 | 68 | # 동일인으로 구성된 템플릿 생성 69 | else : 70 | identity_two = identity_one 71 | 72 | # 현재 인물의 클래스 값을 가져온다. 73 | class1 = self.id_label_dict.get(identity_one) 74 | class2 = self.id_label_dict.get(identity_two) 75 | 76 | # 현재 인물과 다른 인물의 이미지가 저장된 폴더를 설정한 뒤, 이미지 목록을 가져온다. 77 | cur_img_dir = self.img_dir+identity_one+'/' 78 | cur_img_list = os.listdir(cur_img_dir) 79 | other_img_dir = self.img_dir+identity_two+'/' 80 | other_img_list = os.listdir(other_img_dir) 81 | 82 | # identity1 인물의 이미지 3장을 읽어와 텐서형식으로 변환한 다음, template1 안에 추가 83 | while len(template1) < 3 : 84 | # 이미지를 읽어와 데이터 어그멘테이션 적용 85 | cur_img_path = cur_img_dir+cur_img_list[random.randint(1, len(cur_img_list)-1)] 86 | cur_img = cv2.imread(cur_img_path) 87 | cur_img = self.transform_img(cur_img) 88 | 89 | # 이미지를 텐서 형식으로 변한한 뒤 템플릿에 저장 90 | cur_img = cur_img.transpose((2, 0, 1)) 91 | cur_img = torch.from_numpy(np.flip(cur_img, axis=0).copy()).float() 92 | template1.add(cur_img) 93 | 94 | # identity2 인물의 이미지 3장을 읽어와 텐서형식으로 변환한 다음, template2 안에 추가 95 | while len(template2) < 3 : 96 | # 이미지를 읽어와 데이터 어그멘테이션 적용 97 | cur_img_path=other_img_dir + other_img_list[random.randint(1, len(other_img_list) - 1)] 98 | cur_img = cv2.imread(cur_img_path) 99 | cur_img = self.transform_img(cur_img) 100 | 101 | # 이미지를 텐서 형식으로 변한한 뒤 템플릿에 저장 102 | cur_img = cur_img.transpose((2, 0, 1)) 103 | cur_img = torch.from_numpy(np.flip(cur_img, axis=0).copy()).float() 104 | 105 | template2.add(cur_img) 106 | 107 | template1 = list(template1) 108 | template2 = list(template2) 109 | 110 | # 라벨 값을 텐서 형식으로 변환해준다. 111 | label = torch.LongTensor(np.array([label], dtype=np.int64)) 112 | 113 | # 템플릿 두 개, 각 템플릿 별 클래스, 전체 라벨을 하나의 샘플로 묶어서 리턴한다. 114 | sample = {"template1": template1, "template2": template2, "class1": class1, "class2": class2, 115 | "label": label} 116 | return sample 117 | 118 | def __len__(self): 119 | return len(self.img_label) 120 | 121 | # 이미지 전처리 함수 122 | # 1. 이미지의 높이, 너비 중 짧은 쪽을 144 크기로 변경하며, 나머지는 가운데를 중심으로 크롭한다. 123 | # 2. 각각의 이미지들은 20 % 확률로 좌우 반전 가우시안 블러, 모션 블러, 흑백 변환을 거친다. 124 | def transform_img(self, cur_img): 125 | cur_img = self.img_resize(cur_img) 126 | 127 | # 20% 확률로 좌우 반전 적용 128 | if random.randint(1, 10) < 3: 129 | cur_img = cur_img[:, ::-1] 130 | 131 | # 20% 확률로 가우시안 블러 적용 132 | if random.randint(1, 10) < 3: 133 | cur_img = cv2.GaussianBlur(cur_img, (5, 5), 0) 134 | 135 | # 20% 확률로 모션 블러 적용 136 | if random.randint(1, 10) < 3: 137 | cur_img = cv2.filter2D(cur_img, -1, self.motion_blur_filter) 138 | 139 | # 20% 확률로 흑백 변환 적용 140 | if random.randint(1, 10) < 3: 141 | cur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2GRAY) 142 | cur_img = np.stack((cur_img,)*3, -1) 143 | 144 | # 이미지의 각 체널에서 127.5를 빼주며, 0보다 작은 값은 0으로 치환해준다. 145 | subtract_value = np.full((144, 144, 3), 127.5) 146 | cur_img = (cur_img - subtract_value).clip(min=0) 147 | 148 | return cur_img 149 | 150 | # 입력 이미지의 크기를 144x144로 맞춰주는 함수 151 | def img_resize(self, cur_img): 152 | height, width = cur_img.shape[:2] 153 | 154 | # 너비가 높이보다 크면 너비를 144에 맞춰준다. 155 | if width > height: 156 | transform_ratio = 144 / height 157 | new_width = int(width * transform_ratio) 158 | resized_img = cv2.resize(cur_img, (new_width, 144)) 159 | 160 | # 그 다음 높이를 가운데를 기준으로 크롭하여 144 크기로 맞춰준다. 161 | if new_width != 144: 162 | crop_size = int((new_width - 144) / 2) 163 | 164 | if new_width % 2 == 0: 165 | resized_img = resized_img[0:144, crop_size:new_width - crop_size] 166 | else: 167 | resized_img = resized_img[0:144, crop_size:new_width - crop_size - 1] 168 | 169 | 170 | # 높이가 너비보다 크면 너비를 144에 맞춰준다. 171 | else: 172 | transform_ratio = 144 / width 173 | new_height = int(height * transform_ratio) 174 | resized_img = cv2.resize(cur_img, (144, new_height)) 175 | 176 | # 그 다음 높이를 가운데를 기준으로 크롭하여 144 크기로 맞춰준다. 177 | if new_height != 144: 178 | crop_size = int((new_height - 144) / 2) 179 | 180 | if new_height % 2 == 0: 181 | resized_img = resized_img[crop_size:new_height - crop_size, 0:144] 182 | else: 183 | resized_img = resized_img[crop_size:new_height - crop_size - 1, 0:144] 184 | 185 | return resized_img 186 | -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- 1 | # 파일 개요 : ComparatorNetwork을 구성하는 신경망 모델이 담겨있는 파일 2 | # Detector, Attender, Comparator 클래스, 3 | # 이들을 조합한 ComparatorNetwork 클래스가 작성되어 있다. 4 | 5 | import torch.nn as nn 6 | import torch 7 | from torch.autograd import Variable 8 | 9 | # ComparatorNetwork 모델 10 | # Detector, Attender, Comparator 세 부분이 합쳐진 형태로 구성되어 있다. 11 | class ComparatorNetwork(nn.Module): 12 | def __init__(self, batch_size=None, K=None): 13 | super(ComparatorNetwork, self).__init__() 14 | self.K = K 15 | self.batch_size = batch_size 16 | self.detector = Detector(Bottleneck, [3, 4, 1], K=self.K, batch_size=self.batch_size, regularization='diversity') 17 | self.attender = Attender(K=self.K, batch_size=self.batch_size) 18 | self.comparator = Comparator(K=self.K, batch_size=self.batch_size) 19 | 20 | def detect(self, template1, template2, class1, class2, label, isTest): 21 | return self.detector(template1, template2, class1, class2, label, isTest) 22 | 23 | def attend(self, local_landmarks, global_map, isTest): 24 | return self.attender(local_landmarks, global_map) 25 | 26 | def compare(self, temp1_attended_vector, temp2_attended_vector, isTest): 27 | return self.comparator(temp1_attended_vector, temp2_attended_vector) 28 | 29 | def test(self, template1, template2, label): 30 | label = Variable(label).cuda() 31 | temp1_local_landmarks, temp2_local_landmarks, temp1_global_maps, temp2_global_maps, loss_cls1, loss_cls2, loss_reg = self.detect(template1, template2, 0, 0, label, isTest=True) 32 | 33 | temp1_attended_vector = self.attend(temp1_local_landmarks, temp1_global_maps, isTest=True) 34 | temp2_attended_vector = self.attend(temp2_local_landmarks, temp2_global_maps, isTest=True) 35 | 36 | similarity_vector = self.compare(temp1_attended_vector, temp2_attended_vector, isTest=True) 37 | return similarity_vector 38 | 39 | # Detector 40 | # 기본적으로 ResNet 50의 구조를 따른다. 41 | # 차이점은 마지막 FC 레이어의 크기를 identity 수에 맞게 8631로 설정해준 부분과 42 | # 미리 설정한 K개 만큼 로컬 스코어 맵을 추출하는 부분, 43 | # 로컬 스코어 맵들을 max projection으로 합치는 부분, 44 | # 마지막 레이어 이전 feature map을 리턴하는 부분이다. 45 | class Detector(nn.Module): 46 | def __init__(self, block, layers, num_classes=8651, K=None, batch_size=None, regularization=None): 47 | super(Detector, self).__init__() 48 | self.K=K 49 | self.batch_size=batch_size 50 | self.regularization = regularization 51 | 52 | self.inplanes = 64 53 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 54 | bias=False) 55 | self.bn1 = nn.BatchNorm2d(64) 56 | self.relu = nn.ReLU(inplace=True) 57 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 58 | 59 | # Residaul 레이어들 60 | self.layer1 = self._make_layer(block, 64, layers[0]) 61 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 62 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1) 63 | 64 | # 클래시피케이션을 위한 avgpool과 fc 65 | self.avgpool = nn.AvgPool2d(7, stride=1) 66 | self.fc = nn.Linear(9216, num_classes) 67 | 68 | # K 개 로컬 피쳐맵을 뽑아내기 위한 1x1 컨볼루션 69 | self.conv_1x1_K = nn.Conv2d(1024, self.K, kernel_size=1, stride=1, padding=0, bias=False) 70 | 71 | # Diversity Regularizer 필요 요소들 72 | self.softmax = nn.Softmax2d() 73 | self.criterion = nn.CrossEntropyLoss() 74 | 75 | def _make_layer(self, block, planes, blocks, stride=1): 76 | downsample = None 77 | if stride != 1 or self.inplanes != planes * block.expansion: 78 | downsample = nn.Sequential( 79 | nn.Conv2d(self.inplanes, planes * block.expansion, 80 | kernel_size=1, stride=stride, bias=False), 81 | nn.BatchNorm2d(planes * block.expansion), 82 | ) 83 | 84 | layers = [] 85 | layers.append(block(self.inplanes, planes, stride, downsample)) 86 | self.inplanes = planes * block.expansion 87 | for i in range(1, blocks): 88 | layers.append(block(self.inplanes, planes)) 89 | 90 | return nn.Sequential(*layers) 91 | 92 | def forward(self, template1, template2, class1, class2, label, isTest=None): 93 | # 각 템플릿 별로 안에 묶여있는 이미지들을 각각 identity classify를 진행한다. 94 | # 이를 통해 1개의 이미지에서 1개의 global feature, K+1 local feature maps, classify_output을 추출한다. 95 | temp1_classify_outputs = [] 96 | temp1_local_landmarks = [] 97 | temp1_K_local_maps = [] 98 | temp1_global_maps=[] 99 | 100 | for img_tensor in template1: 101 | img_tensor = Variable(img_tensor).cuda() 102 | global_map, classify_output, local_landmarks, K_local_maps = self.identity_classify(img_tensor) 103 | temp1_classify_outputs.append(classify_output) 104 | temp1_local_landmarks.append(local_landmarks) 105 | temp1_K_local_maps.append(K_local_maps) 106 | temp1_global_maps.append(global_map) 107 | 108 | # 이미지 3장의 결과 행렬의 평균을 내어 최종 클래시피케이션 결과를 구한다. 109 | # 이를 클레스 라벨과 비교하여 로스를 구해준다. 110 | temp1_classify_avg = (temp1_classify_outputs[0]+temp1_classify_outputs[1] 111 | +temp1_classify_outputs[2])/3 112 | 113 | # 템플릿 2에 대해서도 같은 작업을 반복해준다. 114 | temp2_classify_outputs = [] 115 | temp2_local_landmarks = [] 116 | temp2_K_local_maps = [] 117 | temp2_global_maps = [] 118 | for img_tensor in template2: 119 | img_tensor = Variable(img_tensor).cuda() 120 | global_map, classify_output, local_landmarks, K_local_maps = self.identity_classify(img_tensor) 121 | temp2_classify_outputs.append(classify_output) 122 | temp2_local_landmarks.append(local_landmarks) 123 | temp2_K_local_maps.append(K_local_maps) 124 | temp2_global_maps.append(global_map) 125 | 126 | temp2_classify_avg = (temp2_classify_outputs[0] + temp2_classify_outputs[1] + temp2_classify_outputs[2]) / 3 127 | 128 | if isTest==True : 129 | loss_cls1 = 0 130 | loss_cls2 = 0 131 | 132 | else : 133 | loss_cls1 = self.criterion(temp1_classify_avg, class1) 134 | loss_cls2 = self.criterion(temp2_classify_avg, class2) 135 | 136 | # -------------landmark regulization------------- # 137 | # 논문 상에는 정규화 로스 값이 0 ~ 1 사이로 기재가 되어 있었는데 138 | # 현재 구현 상으로는 -300에 가까운 숫자가 리턴되는 현상 발생 139 | # 문제는 파악중이며 현재 모델 학습에는 정규화를 하지 않는다. 140 | 141 | loss_reg = 0 142 | if self.regularization=='diversity' : 143 | max_projection1_array = [] 144 | max_projection2_array = [] 145 | 146 | for n in range(3) : 147 | # 템플릿 1 정규화 로스 계산 148 | # softmax normalization 149 | cur_K_local_maps = temp1_K_local_maps[n] 150 | cur_K_local_maps = self.softmax(cur_K_local_maps) 151 | 152 | # max projection 153 | cur_max_projection = torch.max(cur_K_local_maps, 1, False)[0] 154 | cur_max_projection = torch.unsqueeze(cur_max_projection, dim=1) 155 | max_projection1_array.append(cur_max_projection) 156 | 157 | # 템플릿 2 정규화 로스 계산 158 | # softmax normalization 159 | cur_K_local_maps = temp2_K_local_maps[n] 160 | cur_K_local_maps = self.softmax(cur_K_local_maps) 161 | 162 | # max projection 163 | cur_max_projection = torch.max(cur_K_local_maps, 1, False)[0] 164 | cur_max_projection = torch.unsqueeze(cur_max_projection, dim=1) 165 | max_projection2_array.append(cur_max_projection) 166 | 167 | merged_max_map1 = torch.stack(max_projection1_array) 168 | merged_max_map2 = torch.stack(max_projection2_array) 169 | 170 | final_max_projcetion1 = torch.max(merged_max_map1, 0, False)[0] 171 | final_max_projcetion2 = torch.max(merged_max_map2, 0, False)[0] 172 | 173 | sum1 = ((final_max_projcetion1.sum(1)).sum(1)).sum(1).sum(0) 174 | sum2 = ((final_max_projcetion1.sum(1)).sum(1)).sum(1).sum(0) 175 | 176 | loss_reg1 = self.batch_size * 3 * self.K - sum1 177 | loss_reg2 = self.batch_size * 3 * self.K - sum2 178 | 179 | # 여기까지 계산한 정규화 로그 값의 범위가 비정상적으로 크므로 180 | # 모델 학습에는 적용하지 않는다. 181 | # loss_reg = loss_reg1 + loss_reg2 182 | 183 | # Key point regulization 추가될 영역 184 | else : 185 | loss_reg=0 186 | 187 | return temp1_local_landmarks, temp2_local_landmarks, temp1_global_maps, temp2_global_maps, loss_cls1, loss_cls2, loss_reg 188 | 189 | 190 | def identity_classify(self, x): 191 | # input_size : [batch_size, 3, 144, 144] 192 | x = self.conv1(x) 193 | x = self.bn1(x) 194 | x = self.relu(x) 195 | # output_size : [batch_size, 64, 72, 72] 196 | 197 | x = self.maxpool(x) 198 | x = self.layer1(x) 199 | # output_size : [batch_size, 256, 36, 36] 200 | 201 | x = self.layer2(x) 202 | # output_size : [batch_size, 512, 18, 18] 203 | 204 | # Attender로 넘길 피쳐맵 205 | global_map = self.layer3(x) 206 | # output_size : [batch_size, 1024, 18, 18] 207 | 208 | # K개 로컬 피쳐 맵 추출 209 | K_local_maps = self.conv_1x1_K(global_map) 210 | # output_size : [batch_size, K, 18, 18] 211 | 212 | # 가장 최대 값만 뽑아낸 피쳐맵을 생성한다. 213 | max_projection = torch.max(K_local_maps, 1, False)[0] 214 | max_projection = torch.unsqueeze(max_projection, dim=1) 215 | # output_size : [batch_size, 1, 18, 18] 216 | 217 | # 이를 기존 K개 피쳐맵에 덧붙여주어 K+1 차원의 로컬 랜드마크를 생성한다. 218 | local_landmarks = torch.cat((K_local_maps, max_projection), 1) 219 | # output_size : [batch_size, K+1, 18, 18] 220 | 221 | # 나머지 얼굴 이미지 클래시피케이션 진행 222 | x = self.maxpool(global_map) 223 | x = self.avgpool(x) 224 | x = x.view(x.size(0), -1) 225 | # output_size : [batch_size, 9216] 226 | 227 | # 얼굴 이미지 클래시피케이션 결과 행렬 228 | classify_output = self.fc(x) 229 | # output_size : [9216, 8651] 230 | 231 | return global_map, classify_output, local_landmarks, K_local_maps 232 | 233 | # Detector가 기반으로하는 ResNet 구성 요소들 234 | def conv3x3(in_planes, out_planes, stride=1): 235 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 236 | padding=1, bias=False) 237 | 238 | class BasicBlock(nn.Module): 239 | expansion = 1 240 | 241 | def __init__(self, inplanes, planes, stride=1, downsample=None): 242 | super(BasicBlock, self).__init__() 243 | self.conv1 = conv3x3(inplanes, planes, stride) 244 | self.bn1 = nn.BatchNorm2d(planes) 245 | self.relu = nn.ReLU(inplace=True) 246 | self.conv2 = conv3x3(planes, planes) 247 | self.bn2 = nn.BatchNorm2d(planes) 248 | self.downsample = downsample 249 | self.stride = stride 250 | 251 | def forward(self, x): 252 | residual = x 253 | 254 | out = self.conv1(x) 255 | out = self.bn1(out) 256 | out = self.relu(out) 257 | 258 | out = self.conv2(out) 259 | out = self.bn2(out) 260 | 261 | if self.downsample is not None: 262 | residual = self.downsample(x) 263 | 264 | out += residual 265 | out = self.relu(out) 266 | 267 | return out 268 | 269 | class Bottleneck(nn.Module): 270 | expansion = 4 271 | 272 | def __init__(self, inplanes, planes, stride=1, downsample=None): 273 | super(Bottleneck, self).__init__() 274 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 275 | self.bn1 = nn.BatchNorm2d(planes) 276 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 277 | padding=1, bias=False) 278 | self.bn2 = nn.BatchNorm2d(planes) 279 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) 280 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 281 | self.relu = nn.ReLU(inplace=True) 282 | self.downsample = downsample 283 | self.stride = stride 284 | 285 | def forward(self, x): 286 | residual = x 287 | 288 | out = self.conv1(x) 289 | out = self.bn1(out) 290 | out = self.relu(out) 291 | 292 | out = self.conv2(out) 293 | out = self.bn2(out) 294 | out = self.relu(out) 295 | 296 | out = self.conv3(out) 297 | out = self.bn3(out) 298 | 299 | if self.downsample is not None: 300 | residual = self.downsample(x) 301 | 302 | out += residual 303 | out = self.relu(out) 304 | 305 | return out 306 | 307 | # Attender 308 | # 각 로컬 영역별 피쳐맵들에 대하여 recalibration, attion pooling을 수행한다. 309 | # 이는 템플릿 내에서 더 품질이 좋은 학습 이미지를 선별하는 과정에 해당한다. 310 | class Attender(nn.Module) : 311 | def __init__(self, K=None, batch_size=None): 312 | super(Attender, self).__init__() 313 | self.recalibrate = nn.Softmax2d() 314 | self.K=K 315 | self.batch_size=batch_size 316 | return 317 | 318 | # 템플릿 안의 3장의 이미지들에 대한 로컬 피쳐맵 벡터와 글로벌 덴스 피쳐를 전달받는다. 319 | def forward(self, local_landmarks, global_maps): 320 | # 배치 사이즈 만큼 반복 321 | batch_tensor_list=[] 322 | for b in range(self.batch_size): 323 | # 로컬 피쳐맵의 개수 만큼 반복 324 | # 각 로컬 영역별로 피쳐 디스크립터를 저장할 배열 생성 325 | feature_descs = [] 326 | 327 | for k in range(self.K+1): 328 | # 각 이미지 별로 어텐셔널 풀링을 진행하여 결과 값을 배열에 저장 329 | attention_pooled_values = [] 330 | for n in range(len(local_landmarks)) : 331 | # 먼저 해당 이미지의 로컬 피쳐맵의 recalibrate 수행 332 | cur_recalibrated_maps=self.recalibrate(local_landmarks[n]) 333 | 334 | # 1024x18x18 차원의 글로벌 피쳐맵의 각 차원을 반복하면서 335 | # recalibrate를 거친 로컬 피쳐맵을 element-wise로 곱해준 다음, 총합을 구한다. 336 | temp_map = (global_maps[n][b] * cur_recalibrated_maps[b][k]) 337 | temp_sum = (temp_map.sum(1)).sum(1) 338 | attention_pooled_values.append(temp_sum) 339 | 340 | # 각 이미지 별 피쳐 디스크립터를 하나로 합쳐서 템플릿 단위의 피쳐 디스크립터 생성 341 | feature_descs.append(sum(attention_pooled_values)) 342 | 343 | # 각 로컬 영역들의 피쳐 디스크립터를 하나로 모아서 (K+1)x1024 피쳐맵을 생성 344 | # 그 뒤에 L2 normalization 수행 345 | merged_feature = torch.stack(feature_descs) 346 | merged_feature = nn.functional.normalize(input=merged_feature, p=2, dim=1) 347 | 348 | # 이를 배치 연산을 위해서 배치 텐서 리스트에 저장 349 | batch_tensor_list.append(merged_feature) 350 | 351 | # 배치 텐서 리스트를 다시 텐서로 변환해주어 리턴 352 | # Attend 과정 종료 353 | result_tensor = torch.stack(batch_tensor_list) 354 | return result_tensor 355 | 356 | # Comparator 357 | # 템플릿 별로 추출한 벡터들을 주요 영역별로 합쳐준다. 358 | # 또한 어느 부위를 나타내는지 표시하는 one-hot 벡터도 이어준다. 359 | # 이를 각 영역별 fc, maxpool, 마지막 fc를 통과시킨다. 360 | # 이렇게 구한 최종 유사도 백터를 배치 크기만큼 묶어서 리턴한다. 361 | class Comparator(nn.Module) : 362 | def __init__(self, K=None, batch_size=None): 363 | super(Comparator, self).__init__() 364 | self.K = K 365 | self.batch_size = batch_size 366 | self.fc_dict = {} 367 | for k in range(self.K) : 368 | self.fc_dict.update({k:nn.Linear(2061, 2048).cuda()}) 369 | self.last_classifier = nn.Linear(2048, 2) 370 | return 371 | 372 | def forward(self, temp1_attended_vector, temp2_attended_vector, batch_size=None, K=None): 373 | # 어느 부위인지 나타내는 one_hot_vector 생성 374 | one_hot_2d = torch.zeros((self.K + 1), (self.K + 1)) 375 | for i in range(self.K + 1): 376 | one_hot_2d[i][i] = one_hot_2d[i][i] + 1 377 | 378 | # 배치 사이즈 만큼 one_hot_vector를 쌓아준 뒤, 다시 텐서로 변환 379 | one_hot_list=[] 380 | for i in range(self.batch_size): 381 | one_hot_list.append(one_hot_2d) 382 | 383 | # 템플릿 1, 2 피쳐 벡터와 one-hot 벡터를 합쳐준다. 384 | one_hot_tensor = Variable(torch.stack(one_hot_list)).cuda() 385 | concat_templates_partid = torch.cat((temp1_attended_vector, temp2_attended_vector, one_hot_tensor), dim=2) 386 | 387 | # 각 영역별 fc를 통과한다. 388 | similarity_vector_list = [] 389 | for b in range(self.batch_size): 390 | local_vector_list = [] 391 | for k in range(self.K): 392 | cur_local_expert = self.fc_dict.get(k) 393 | local_vector_list.append(cur_local_expert(concat_templates_partid[b][k])) 394 | 395 | # (K+1)x2048 피쳐맵을 생성한다. 396 | local_tensor = torch.stack(local_vector_list) 397 | 398 | # maxpooling을 거쳐 1x2048 벡터를 추출한다. 399 | max_pooled_tensor = torch.max(input=local_tensor, dim=0, keepdim=False)[0] 400 | max_pooled_tensor = torch.unsqueeze(max_pooled_tensor, dim=0) 401 | 402 | # 마지막 fc를 통과하여 유사도를 판별할 최종 벡터를 추출하여 배열에 저장. 403 | similarity_vector = self.last_classifier(max_pooled_tensor) 404 | similarity_vector = similarity_vector.view(2) 405 | similarity_vector_list.append(similarity_vector) 406 | 407 | # 배치 크기 만큼 최종 유사도 판별 벡터를 묶어서 반환 408 | similarity_vector = torch.stack(similarity_vector_list) 409 | return similarity_vector -------------------------------------------------------------------------------- /src/split_identity_meta.py: -------------------------------------------------------------------------------- 1 | # 파일 개요 : VGGFace2 데이터 셋에 포함된 인물 목록은 identity_meta.csv 파일 안에 저장되어 있다. 2 | # 이는 학습용 데이터와 테스트용 데이터가 분리되어 있지 않아서 불편한 측면이 있다. 3 | # 때문에 이를 분리시키려 한다. 4 | 5 | import csv 6 | 7 | base_dir = '/usr/junny/VGGFace2/' 8 | identity_meta_path = base_dir+'identity_meta.csv' 9 | identity_meta_reader = csv.reader(open(identity_meta_path, 'r')) 10 | 11 | identity_train_path = '../labels/identity_train.csv' 12 | identity_train_writer = csv.writer(open(identity_train_path, 'w')) 13 | 14 | identity_test_path = '../labels/identity_test.csv' 15 | identity_test_writer = csv.writer(open(identity_test_path, 'w')) 16 | 17 | train_path = base_dir+'train_list.txt' 18 | train_file = open(train_path, 'r') 19 | 20 | test_path = base_dir+'test_list.txt' 21 | test_file = open(test_path, 'r') 22 | 23 | train_id_set = set() 24 | test_id_set = set() 25 | 26 | for line in train_file : 27 | train_id_set.add(line.split('/')[0]) 28 | 29 | for line in test_file : 30 | test_id_set.add(line.split('/')[0]) 31 | 32 | for i, line in enumerate(identity_meta_reader) : 33 | if i==0 : 34 | identity_train_writer.writerow(line) 35 | identity_test_writer.writerow(line) 36 | else : 37 | if train_id_set.__contains__(line[0]) : 38 | identity_train_writer.writerow(line) 39 | else : 40 | identity_test_writer.writerow(line) 41 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | # 파일 개요 : 데이터 로더와 모델들을 불러와 데이터 학습을 진행하는 파일 2 | import models 3 | import dataloader 4 | import torch 5 | import torch.nn as nn 6 | import utils 7 | import sys 8 | from torch.autograd import Variable 9 | 10 | # 코드 실행 시 전달되는 파라미터를 파싱하는 파서 설정 11 | args = utils.get_arg_parser() 12 | 13 | # 학습용 얼굴 이미지들이 담겨있는 디렉터리 경로 전달받음 14 | input_dir = args.input_dir 15 | if input_dir is 'none' : 16 | print('Please input -i train_img_dir_path') 17 | sys.exit() 18 | 19 | if input_dir[-1] != '/': 20 | input_dir+='/' 21 | 22 | # 학습에 필요한 기본적인 변수 설정 23 | num_epochs = 500 24 | learning_rate = 0.0001 25 | num_identities = 8651 26 | 27 | # 로스 펑션에 적용되는 가중치 28 | a1 = 2 29 | a2 = 5 30 | a3 = 30 31 | 32 | # 배치 크기는 현재 사용 중인 하드웨어 환경에 따라 10으로 설정 33 | # 논문에서는 64로 설정되어 있으니, 이 부분은 자신의 환경에 따라 조절할 것 34 | batch_size = 10 35 | 36 | # 얼굴 이미지에서 뽑아낼 주요 특징 부위 수 설정 37 | K = 12 38 | 39 | # 학습된 모델을 저장할 디렉터리 생성 40 | utils.checkpoint_create() 41 | 42 | # 디텍터 모델 객체 생성 43 | # GPU 활용을 위해 쿠다 설정 44 | comparator_network = models.ComparatorNetwork(batch_size=batch_size, K=K) 45 | comparator_network.cuda() 46 | 47 | # 소프트 맥스 학습을 위해서 크로스 엔트로피 로스 함수를 사용 48 | # 옵티마이저로는 아담을 사용 49 | criterion = nn.CrossEntropyLoss() 50 | optimizer = torch.optim.Adam(comparator_network.parameters(), lr=learning_rate) 51 | 52 | # 더 이상 에러율이 감소하지 않는 error plateau 현상이 10번 발생할 경우 53 | # 학습율을 줄여주도록 lr_scheduler 설정 54 | lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10) 55 | 56 | # 학습과 테스트에 필요한 데이터 셋 객체를 만든다. 57 | # 그리고 데이터 셋 객체를 활용해 데이터 로더 객체를 만든다. 58 | train_data = dataloader.CustomDataset(train_dir=input_dir) 59 | train_loader = torch.utils.data.DataLoader(dataset=train_data, 60 | batch_size=batch_size, 61 | shuffle=True, 62 | drop_last=True) 63 | 64 | # 모델 학습 부분 65 | # 먼저 전체 에포크 만큼 반복 설정 66 | for epoch in range(num_epochs): 67 | 68 | # 학습용 데이터 로더를 순차적으로 읽어오면서 학습 진행 69 | iter_count = 0 70 | for i, sample in enumerate(train_loader): 71 | iter_count += 1 72 | # 이미지 텐서가 3개씩 묶여있는 템플릿을 읽어온다. 73 | template1 = sample['template1'] 74 | template2 = sample['template2'] 75 | 76 | # 라벨과 클래스 정보를 GPU 학습이 가능하게끔 쿠다 설정을 해준다 77 | class1 = Variable(sample['class1']).cuda() 78 | class1 = class1.squeeze() 79 | class2 = Variable(sample['class2']).cuda() 80 | class2 = class2.squeeze() 81 | 82 | label = Variable(sample['label']).cuda() 83 | label = label.squeeze() 84 | 85 | optimizer.zero_grad() 86 | 87 | # ---------- Detect 과정 ---------- # 88 | temp1_local_landmarks, temp2_local_landmarks, temp1_global_maps, temp2_global_maps, loss_cls1, loss_cls2, loss_reg \ 89 | = comparator_network.detect(template1, template2, class1, class2, label, isTest=False) 90 | 91 | # ---------- Attend 과정 ---------- # 92 | temp1_attended_vector = comparator_network.attend(temp1_local_landmarks, temp1_global_maps, isTest=False) 93 | temp2_attended_vector = comparator_network.attend(temp2_local_landmarks, temp2_global_maps, isTest=False) 94 | 95 | # ---------- Compare 과정 ---------- # 96 | similarity_vector = comparator_network.compare(temp1_attended_vector, temp2_attended_vector, isTest=False) 97 | loss_sim = criterion(similarity_vector, label) 98 | 99 | # 클래시피케이션 로스, 유사도 측정 로스, 정규화 로스를 합쳐 전체 로스를 구한다. 100 | # 이를 백프로퍼게이션하여 신경망을 학습시킨다. 101 | total_loss = a1*(loss_cls1+loss_cls2) + a2*(loss_sim) + a3*(loss_reg) 102 | total_loss.backward() 103 | optimizer.step() 104 | 105 | if (i + 1) % 20 == 0: 106 | print("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, num_epochs, (i + 1), int(num_identities / batch_size), total_loss.item())) 107 | 108 | # 매 60000 iteration 마다 a3 절반으로 감소 109 | if iter_count == 60000: 110 | a3 *= 0.5 111 | iter_count = 0 112 | 113 | # 매 에포크마다 모델 저장 114 | torch.save(comparator_network.state_dict(), '../checkpoint/comparator_netork.pkl') 115 | 116 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | # 파일 개요 : Comparator Network 학습에 필요한 기타 함수들이 작성되어 있는 파일 2 | # 참고 링크 : https://github.com/cydonia999/VGGFace2-pytorch 3 | 4 | import csv 5 | import argparse 6 | import os 7 | 8 | # 인물 아이디(n000002)를 신경망이 classification 하기에 용이한 9 | # 정수 형태의 아이디로 변환하여 매핑한 딕셔너리로 생성하여 리턴하는 함수 10 | # dataloader.py 에서 호출한다. 11 | def get_id_label_map(meta_file): 12 | meta_reader = csv.reader(open(meta_file)) 13 | label_count = 0 14 | label_dict = {} 15 | 16 | for i, row in enumerate(meta_reader): 17 | label_dict.update({row[0]:label_count}) 18 | label_count+=1 19 | 20 | return label_dict 21 | 22 | # 모델 학습 코드를 실행할 때 필요한 파라미터들을 입력받고 파싱해주는 23 | # 파서를 생성 및 리턴하는 함수 24 | def get_arg_parser(): 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('-i', '--input', 27 | dest="input_dir", 28 | default="none", 29 | help="Directory path which contains VGGFace2 dataset train images") 30 | 31 | parser.add_argument('-t', '--test', 32 | dest="test_dir", 33 | default="none", 34 | help="Directory path which contains VGGFace2 dataset test images") 35 | 36 | parser.add_argument('-a', '--inputA', 37 | dest="input_imgA", 38 | default="none", 39 | help="imageA that conduct test") 40 | 41 | parser.add_argument('-b', '--inputB', 42 | dest="input_imgB", 43 | default="none", 44 | help="imageB that conduct test") 45 | 46 | return parser.parse_args() 47 | 48 | # 모델을 저장할 디렉터리가 없을 경우 생성 49 | def checkpoint_create(): 50 | if not os.path.exists('../checkpoint') : 51 | os.mkdir('../checkpoint') --------------------------------------------------------------------------------