├── .gitignore ├── README.md ├── config └── sample_config.py ├── dataset_utils ├── imagenet_class_index.json └── voc0712.py ├── networks ├── googlenet.py ├── resnet_cifar.py ├── uap.py └── vgg_cifar.py ├── pcc_analysis.ipynb ├── requirements.txt ├── run.sh ├── train_uap.py ├── utils ├── custom_loss.py ├── data.py ├── network.py ├── training.py └── utils.py └── vgg19_sea_lion └── checkpoint.pth.tar /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | 4 | .ipynb_checkpoints 5 | data/* 6 | models/* 7 | results/* 8 | config/config.py 9 | .Trash* 10 | .idea/* 11 | 12 | *.npy 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Universal Adversarial Perturbation with virtual data 2 | This is the repository accompanying our CVPR 2020 paper [Understanding Adversarial Examples from the Mutual Influence of Images and Perturbations](https://openaccess.thecvf.com/content_CVPR_2020/papers/Zhang_Understanding_Adversarial_Examples_From_the_Mutual_Influence_of_Images_and_CVPR_2020_paper.pdf) 3 | 4 | ## Setup 5 | You can install the requirements with `pip3 install requirements.txt`. 6 | 7 | ### Config 8 | Copy the `sample_config.py` to `config.py` (`cp ./config/sample_config.py ./config/config.py`) and edit the paths accordingly. 9 | 10 | ### Datasets 11 | The code supports training UAPs on ImageNet, MS COCO, PASCAL VOC and Places365 12 | 13 | #### ImageNet 14 | The [ImageNet](http://www.image-net.org/) dataset should be preprocessed, such that the validation images are located in labeled subfolders as for the training set. You can have a look at this [bash-script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) if you did not process your data already. Set the paths in your `config.py`. 15 | ``` 16 | IMAGENET_PATH = "/path/to/Data/ImageNet" 17 | ``` 18 | 19 | #### COCO 20 | The [COCO](https://cocodataset.org/#home) 2017 images can be downloaded from here for [training](http://images.cocodataset.org/zips/train2017.zip) and [validation](http://images.cocodataset.org/zips/val2017.zip). After downloading and extracting the data update the paths in your `config.py`. 21 | ``` 22 | COCO_2017_TRAIN_IMGS = "/path/to/COCO/train2017/" 23 | COCO_2017_TRAIN_ANN = "/path/to/COCO/annotations/instances_train2017.json" 24 | COCO_2017_VAL_IMGS = "/path/to/COCO/val2017/" 25 | COCO_2017_VAL_ANN = "/path/to/instances_val2017.json" 26 | ``` 27 | 28 | #### PASCAL VOC 29 | The training/validation data of the [PASCAL VOC2012 Challenge](http://host.robots.ox.ac.uk/pascal/VOC/) can be downloaded from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). After downloading and extracting the data update the paths in your `config.py`. 30 | ``` 31 | VOC_2012_ROOT = "/path/to/Data/VOCdevkit/" 32 | ``` 33 | 34 | #### Places 365 35 | The [Places365](http://places2.csail.mit.edu/index.html) data can be downloaded from [here](http://places2.csail.mit.edu/download.html). After downloading and extracting the data update the paths in your `config.py`. 36 | ``` 37 | PLACES365_ROOT = "/home/user/Data/places365/" 38 | ``` 39 | 40 | ## Run 41 | Run `bash ./run.sh` to generate UAPs for different target models trained on ImageNet using virtual data Places365. The bash script should be easy to adapt to perform different experiments. The jupyter notebook `pcc_analysis.ipynb` is an example for the PCC-analysis discussed in the paper. 42 | 43 | ## Citation 44 | ``` 45 | @inproceedings{zhang2020understanding, 46 | title={Understanding Adversarial Examples From the Mutual Influence of Images and Perturbations}, 47 | author={Zhang, Chaoning and Benz, Philipp and Imtiaz, Tooba and Kweon, In So}, 48 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 49 | pages={14521--14530}, 50 | year={2020} 51 | } 52 | ``` -------------------------------------------------------------------------------- /config/sample_config.py: -------------------------------------------------------------------------------- 1 | # Copy and edit this file to config/config.py 2 | RESULT_PATH = "/path/to/uap_virtual_data.pytorch/results" # Destination folder to store the results to 3 | MODEL_PATH = "/path/to/uap_virtual_data.pytorch/models" # Destination folder to store the models to 4 | 5 | PROJECT_PATH = "/path/to/uap_virtual_data.pytorch" # Directory to this project 6 | DATASET_BASE_PATH = "/path/to/Data" # Directory path wehre pytorch datasets are stored 7 | IMAGENET_PATH = "/path/to/ImageNet" # Directory to ImageNet for Pytorch 8 | 9 | # Directories to COCO 10 | COCO_2017_TRAIN_IMGS = "/path/to/COCO/train2017/" 11 | COCO_2017_TRAIN_ANN = "/path/to/COCO/annotations/instances_train2017.json" 12 | COCO_2017_VAL_IMGS = "/path/to/COCO/val2017/" 13 | COCO_2017_VAL_ANN = "/path/to/COCO/annotations/instances_val2017.json" 14 | 15 | # Directory for PASCAL VOC 16 | VOC_2012_ROOT = "/path/to/VOCdevkit/" 17 | 18 | # Directory to Places365 19 | PLACES365_ROOT = "/path/to/places365/" -------------------------------------------------------------------------------- /dataset_utils/imagenet_class_index.json: -------------------------------------------------------------------------------- 1 | {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} -------------------------------------------------------------------------------- /dataset_utils/voc0712.py: -------------------------------------------------------------------------------- 1 | """VOC Dataset Classes 2 | 3 | Original author: Francisco Massa 4 | https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py 5 | 6 | Updated by: Ellis Brown, Max deGroot 7 | Modified by: Philipp Benz to fit our simple needs 8 | """ 9 | 10 | import os 11 | import pickle 12 | import os.path 13 | import sys 14 | import torch 15 | import torch.utils.data as data 16 | import torchvision.transforms as transforms 17 | from PIL import Image, ImageDraw, ImageFont 18 | import cv2 19 | import numpy as np 20 | if sys.version_info[0] == 2: 21 | import xml.etree.cElementTree as ET 22 | else: 23 | import xml.etree.ElementTree as ET 24 | 25 | 26 | VOC_CLASSES = ( '__background__', # always index 0 27 | 'aeroplane', 'bicycle', 'bird', 'boat', 28 | 'bottle', 'bus', 'car', 'cat', 'chair', 29 | 'cow', 'diningtable', 'dog', 'horse', 30 | 'motorbike', 'person', 'pottedplant', 31 | 'sheep', 'sofa', 'train', 'tvmonitor') 32 | 33 | # for making bounding boxes pretty 34 | COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), 35 | (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128)) 36 | 37 | 38 | class VOCDetection(data.Dataset): 39 | 40 | """VOC Detection Dataset Object 41 | 42 | input is image, target is annotation 43 | 44 | Arguments: 45 | root (string): filepath to VOCdevkit folder. 46 | image_set (string): imageset to use (eg. 'train', 'val', 'test') 47 | transform (callable, optional): transformation to perform on the 48 | input image 49 | target_transform (callable, optional): transformation to perform on the 50 | target `annotation` 51 | (eg: take in caption string, return tensor of word indices) 52 | dataset_name (string, optional): which dataset to load 53 | (default: 'VOC2007') 54 | """ 55 | 56 | def __init__(self, root, year, image_set, transform=None, target_transform=None, 57 | dataset_name='VOC0712'): 58 | self.root = root 59 | self.image_set = image_set 60 | self.transform = transform 61 | self.target_transform = target_transform 62 | self.name = dataset_name 63 | self._annopath = os.path.join('%s', 'Annotations', '%s.xml') 64 | self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg') 65 | self.ids = list() 66 | self._year = year 67 | 68 | rootpath = os.path.join(self.root, 'VOC' + self._year) 69 | for line in open(os.path.join(rootpath, 'ImageSets', 'Main', self.image_set + '.txt')): 70 | self.ids.append((rootpath, line.strip())) 71 | 72 | def __getitem__(self, index): 73 | img_id = self.ids[index] 74 | # target = ET.parse(self._annopath % img_id).getroot() 75 | target = torch.randint(0, 21, size=(1,), dtype=torch.long)[0] 76 | img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) 77 | height, width, _ = img.shape 78 | 79 | if self.target_transform is not None: 80 | target = self.target_transform(target) 81 | 82 | if self.transform is not None: 83 | img = self.transform(img) 84 | 85 | return img, target 86 | 87 | def __len__(self): 88 | return len(self.ids) 89 | 90 | 91 | def detection_collate(batch): 92 | """Custom collate fn for dealing with batches of images that have a different 93 | number of associated object annotations (bounding boxes). 94 | 95 | Arguments: 96 | batch: (tuple) A tuple of tensor images and lists of annotations 97 | 98 | Return: 99 | A tuple containing: 100 | 1) (tensor) batch of images stacked on their 0 dim 101 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 102 | """ 103 | targets = [] 104 | imgs = [] 105 | for _, sample in enumerate(batch): 106 | for _, tup in enumerate(sample): 107 | if torch.is_tensor(tup): 108 | imgs.append(tup) 109 | elif isinstance(tup, type(np.empty(0))): 110 | annos = torch.from_numpy(tup).float() 111 | targets.append(annos) 112 | return (torch.stack(imgs, 0), targets) 113 | -------------------------------------------------------------------------------- /networks/googlenet.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import namedtuple 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | # from .utils import load_state_dict_from_url 7 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 8 | 9 | __all__ = ['GoogLeNet', 'googlenet'] 10 | 11 | model_urls = { 12 | # GoogLeNet ported from TensorFlow 13 | 'googlenet': 'https://download.pytorch.org/models/googlenet-1378be20.pth', 14 | } 15 | 16 | _GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1']) 17 | 18 | 19 | def googlenet(pretrained=False, progress=True, **kwargs): 20 | r"""GoogLeNet (Inception v1) model architecture from 21 | `"Going Deeper with Convolutions" `_. 22 | 23 | Args: 24 | pretrained (bool): If True, returns a model pre-trained on ImageNet 25 | progress (bool): If True, displays a progress bar of the download to stderr 26 | aux_logits (bool): If True, adds two auxiliary branches that can improve training. 27 | Default: *False* when pretrained is True otherwise *True* 28 | transform_input (bool): If True, preprocesses the input according to the method with which it 29 | was trained on ImageNet. Default: *False* 30 | """ 31 | if pretrained: 32 | if 'transform_input' not in kwargs: 33 | kwargs['transform_input'] = True 34 | if 'aux_logits' not in kwargs: 35 | kwargs['aux_logits'] = False 36 | if kwargs['aux_logits']: 37 | warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' 38 | 'so make sure to train them') 39 | original_aux_logits = kwargs['aux_logits'] 40 | kwargs['aux_logits'] = True 41 | kwargs['init_weights'] = False 42 | model = GoogLeNet(**kwargs) 43 | state_dict = load_state_dict_from_url(model_urls['googlenet'], 44 | progress=progress) 45 | model.load_state_dict(state_dict) 46 | if not original_aux_logits: 47 | model.aux_logits = False 48 | del model.aux1, model.aux2 49 | return model 50 | 51 | return GoogLeNet(**kwargs) 52 | 53 | 54 | class GoogLeNet(nn.Module): 55 | 56 | def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, init_weights=True): 57 | super(GoogLeNet, self).__init__() 58 | self.aux_logits = aux_logits 59 | self.transform_input = transform_input 60 | 61 | self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3) 62 | self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) 63 | self.conv2 = BasicConv2d(64, 64, kernel_size=1) 64 | self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1) 65 | self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) 66 | 67 | self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32) 68 | self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64) 69 | self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True) 70 | 71 | self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64) 72 | self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64) 73 | self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64) 74 | self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64) 75 | self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128) 76 | self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) 77 | 78 | self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128) 79 | self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128) 80 | 81 | if aux_logits: 82 | self.aux1 = InceptionAux(512, num_classes) 83 | self.aux2 = InceptionAux(528, num_classes) 84 | 85 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 86 | self.dropout = nn.Dropout(0.2) 87 | self.fc = nn.Linear(1024, num_classes) 88 | 89 | if init_weights: 90 | self._initialize_weights() 91 | 92 | def _initialize_weights(self): 93 | for m in self.modules(): 94 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 95 | import scipy.stats as stats 96 | X = stats.truncnorm(-2, 2, scale=0.01) 97 | values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) 98 | values = values.view(m.weight.size()) 99 | with torch.no_grad(): 100 | m.weight.copy_(values) 101 | elif isinstance(m, nn.BatchNorm2d): 102 | nn.init.constant_(m.weight, 1) 103 | nn.init.constant_(m.bias, 0) 104 | 105 | def forward(self, x): 106 | if self.transform_input: 107 | x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 108 | x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 109 | x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 110 | x = torch.cat((x_ch0, x_ch1, x_ch2), 1) 111 | 112 | # N x 3 x 224 x 224 113 | x = self.conv1(x) 114 | # N x 64 x 112 x 112 115 | x = self.maxpool1(x) 116 | # N x 64 x 56 x 56 117 | x = self.conv2(x) 118 | # N x 64 x 56 x 56 119 | x = self.conv3(x) 120 | # N x 192 x 56 x 56 121 | x = self.maxpool2(x) 122 | 123 | # N x 192 x 28 x 28 124 | x = self.inception3a(x) 125 | # N x 256 x 28 x 28 126 | x = self.inception3b(x) 127 | # N x 480 x 28 x 28 128 | x = self.maxpool3(x) 129 | # N x 480 x 14 x 14 130 | x = self.inception4a(x) 131 | # N x 512 x 14 x 14 132 | if self.training and self.aux_logits: 133 | aux1 = self.aux1(x) 134 | 135 | x = self.inception4b(x) 136 | # N x 512 x 14 x 14 137 | x = self.inception4c(x) 138 | # N x 512 x 14 x 14 139 | x = self.inception4d(x) 140 | # N x 528 x 14 x 14 141 | if self.training and self.aux_logits: 142 | aux2 = self.aux2(x) 143 | 144 | x = self.inception4e(x) 145 | # N x 832 x 14 x 14 146 | x = self.maxpool4(x) 147 | # N x 832 x 7 x 7 148 | x = self.inception5a(x) 149 | # N x 832 x 7 x 7 150 | x = self.inception5b(x) 151 | # N x 1024 x 7 x 7 152 | 153 | x = self.avgpool(x) 154 | # N x 1024 x 1 x 1 155 | x = torch.flatten(x, 1) 156 | # N x 1024 157 | x = self.dropout(x) 158 | x = self.fc(x) 159 | # N x 1000 (num_classes) 160 | if self.training and self.aux_logits: 161 | return _GoogLeNetOutputs(x, aux2, aux1) 162 | return x 163 | 164 | 165 | class Inception(nn.Module): 166 | 167 | def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj): 168 | super(Inception, self).__init__() 169 | 170 | self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1) 171 | 172 | self.branch2 = nn.Sequential( 173 | BasicConv2d(in_channels, ch3x3red, kernel_size=1), 174 | BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1) 175 | ) 176 | 177 | self.branch3 = nn.Sequential( 178 | BasicConv2d(in_channels, ch5x5red, kernel_size=1), 179 | BasicConv2d(ch5x5red, ch5x5, kernel_size=3, padding=1) 180 | ) 181 | 182 | self.branch4 = nn.Sequential( 183 | nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), 184 | BasicConv2d(in_channels, pool_proj, kernel_size=1) 185 | ) 186 | 187 | def forward(self, x): 188 | branch1 = self.branch1(x) 189 | branch2 = self.branch2(x) 190 | branch3 = self.branch3(x) 191 | branch4 = self.branch4(x) 192 | 193 | outputs = [branch1, branch2, branch3, branch4] 194 | return torch.cat(outputs, 1) 195 | 196 | 197 | class InceptionAux(nn.Module): 198 | 199 | def __init__(self, in_channels, num_classes): 200 | super(InceptionAux, self).__init__() 201 | self.conv = BasicConv2d(in_channels, 128, kernel_size=1) 202 | 203 | self.fc1 = nn.Linear(2048, 1024) 204 | self.fc2 = nn.Linear(1024, num_classes) 205 | 206 | def forward(self, x): 207 | # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 208 | x = F.adaptive_avg_pool2d(x, (4, 4)) 209 | # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 210 | x = self.conv(x) 211 | # N x 128 x 4 x 4 212 | x = torch.flatten(x, 1) 213 | # N x 2048 214 | x = F.relu(self.fc1(x), inplace=True) 215 | # N x 1024 216 | x = F.dropout(x, 0.7, training=self.training) 217 | # N x 1024 218 | x = self.fc2(x) 219 | # N x 1000 (num_classes) 220 | 221 | return x 222 | 223 | 224 | class BasicConv2d(nn.Module): 225 | 226 | def __init__(self, in_channels, out_channels, **kwargs): 227 | super(BasicConv2d, self).__init__() 228 | self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) 229 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001) 230 | 231 | def forward(self, x): 232 | x = self.conv(x) 233 | x = self.bn(x) 234 | return F.relu(x, inplace=True) 235 | -------------------------------------------------------------------------------- /networks/resnet_cifar.py: -------------------------------------------------------------------------------- 1 | ''' 2 | resnet for cifar in pytorch 3 | 4 | Reference: 5 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016. 6 | [2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016. 7 | ''' 8 | 9 | import torch 10 | import torch.nn as nn 11 | import math 12 | 13 | 14 | def conv3x3(in_planes, out_planes, stride=1): 15 | " 3x3 convolution with padding " 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 17 | 18 | 19 | class BasicBlock(nn.Module): 20 | expansion=1 21 | 22 | def __init__(self, inplanes, planes, stride=1, downsample=None): 23 | super(BasicBlock, self).__init__() 24 | self.conv1 = conv3x3(inplanes, planes, stride) 25 | self.bn1 = nn.BatchNorm2d(planes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv2 = conv3x3(planes, planes) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.downsample = downsample 30 | self.stride = stride 31 | 32 | def forward(self, x): 33 | residual = x 34 | 35 | out = self.conv1(x) 36 | out = self.bn1(out) 37 | out = self.relu(out) 38 | 39 | out = self.conv2(out) 40 | out = self.bn2(out) 41 | 42 | if self.downsample is not None: 43 | residual = self.downsample(x) 44 | 45 | out += residual 46 | out = self.relu(out) 47 | 48 | return out 49 | 50 | 51 | class Bottleneck(nn.Module): 52 | expansion=4 53 | 54 | def __init__(self, inplanes, planes, stride=1, downsample=None): 55 | super(Bottleneck, self).__init__() 56 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 57 | self.bn1 = nn.BatchNorm2d(planes) 58 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes*4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class PreActBasicBlock(nn.Module): 90 | expansion = 1 91 | 92 | def __init__(self, inplanes, planes, stride=1, downsample=None): 93 | super(PreActBasicBlock, self).__init__() 94 | self.bn1 = nn.BatchNorm2d(inplanes) 95 | self.relu = nn.ReLU(inplace=True) 96 | self.conv1 = conv3x3(inplanes, planes, stride) 97 | self.bn2 = nn.BatchNorm2d(planes) 98 | self.conv2 = conv3x3(planes, planes) 99 | self.downsample = downsample 100 | self.stride = stride 101 | 102 | def forward(self, x): 103 | residual = x 104 | 105 | out = self.bn1(x) 106 | out = self.relu(out) 107 | 108 | if self.downsample is not None: 109 | residual = self.downsample(out) 110 | 111 | out = self.conv1(out) 112 | 113 | out = self.bn2(out) 114 | out = self.relu(out) 115 | out = self.conv2(out) 116 | 117 | out += residual 118 | 119 | return out 120 | 121 | 122 | class PreActBottleneck(nn.Module): 123 | expansion = 4 124 | 125 | def __init__(self, inplanes, planes, stride=1, downsample=None): 126 | super(PreActBottleneck, self).__init__() 127 | self.bn1 = nn.BatchNorm2d(inplanes) 128 | self.relu = nn.ReLU(inplace=True) 129 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 130 | self.bn2 = nn.BatchNorm2d(planes) 131 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 132 | self.bn3 = nn.BatchNorm2d(planes) 133 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 134 | self.downsample = downsample 135 | self.stride = stride 136 | 137 | def forward(self, x): 138 | residual = x 139 | 140 | out = self.bn1(x) 141 | out = self.relu(out) 142 | 143 | if self.downsample is not None: 144 | residual = self.downsample(out) 145 | 146 | out = self.conv1(out) 147 | 148 | out = self.bn2(out) 149 | out = self.relu(out) 150 | out = self.conv2(out) 151 | 152 | out = self.bn3(out) 153 | out = self.relu(out) 154 | out = self.conv3(out) 155 | 156 | out += residual 157 | 158 | return out 159 | 160 | 161 | class ResNet_Cifar(nn.Module): 162 | 163 | def __init__(self, block, layers, num_classes=10): 164 | super(ResNet_Cifar, self).__init__() 165 | self.inplanes = 16 166 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 167 | self.bn1 = nn.BatchNorm2d(16) 168 | self.relu = nn.ReLU(inplace=True) 169 | self.layer1 = self._make_layer(block, 16, layers[0]) 170 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2) 171 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2) 172 | self.avgpool = nn.AvgPool2d(8, stride=1) 173 | self.fc = nn.Linear(64 * block.expansion, num_classes) 174 | 175 | for m in self.modules(): 176 | if isinstance(m, nn.Conv2d): 177 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 178 | m.weight.data.normal_(0, math.sqrt(2. / n)) 179 | elif isinstance(m, nn.BatchNorm2d): 180 | m.weight.data.fill_(1) 181 | m.bias.data.zero_() 182 | 183 | def _make_layer(self, block, planes, blocks, stride=1): 184 | downsample = None 185 | if stride != 1 or self.inplanes != planes * block.expansion: 186 | downsample = nn.Sequential( 187 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 188 | nn.BatchNorm2d(planes * block.expansion) 189 | ) 190 | 191 | layers = [] 192 | layers.append(block(self.inplanes, planes, stride, downsample)) 193 | self.inplanes = planes * block.expansion 194 | for _ in range(1, blocks): 195 | layers.append(block(self.inplanes, planes)) 196 | 197 | return nn.Sequential(*layers) 198 | 199 | def forward(self, x): 200 | x = self.conv1(x) 201 | x = self.bn1(x) 202 | x = self.relu(x) 203 | 204 | x = self.layer1(x) 205 | x = self.layer2(x) 206 | x = self.layer3(x) 207 | 208 | x = self.avgpool(x) 209 | x = x.view(x.size(0), -1) 210 | x = self.fc(x) 211 | 212 | return x 213 | 214 | 215 | class PreAct_ResNet_Cifar(nn.Module): 216 | 217 | def __init__(self, block, layers, num_classes=10): 218 | super(PreAct_ResNet_Cifar, self).__init__() 219 | self.inplanes = 16 220 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 221 | self.layer1 = self._make_layer(block, 16, layers[0]) 222 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2) 223 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2) 224 | self.bn = nn.BatchNorm2d(64*block.expansion) 225 | self.relu = nn.ReLU(inplace=True) 226 | self.avgpool = nn.AvgPool2d(8, stride=1) 227 | self.fc = nn.Linear(64*block.expansion, num_classes) 228 | 229 | for m in self.modules(): 230 | if isinstance(m, nn.Conv2d): 231 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 232 | m.weight.data.normal_(0, math.sqrt(2. / n)) 233 | elif isinstance(m, nn.BatchNorm2d): 234 | m.weight.data.fill_(1) 235 | m.bias.data.zero_() 236 | 237 | def _make_layer(self, block, planes, blocks, stride=1): 238 | downsample = None 239 | if stride != 1 or self.inplanes != planes*block.expansion: 240 | downsample = nn.Sequential( 241 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 242 | ) 243 | 244 | layers = [] 245 | layers.append(block(self.inplanes, planes, stride, downsample)) 246 | self.inplanes = planes*block.expansion 247 | for _ in range(1, blocks): 248 | layers.append(block(self.inplanes, planes)) 249 | return nn.Sequential(*layers) 250 | 251 | def forward(self, x): 252 | x = self.conv1(x) 253 | 254 | x = self.layer1(x) 255 | x = self.layer2(x) 256 | x = self.layer3(x) 257 | 258 | x = self.bn(x) 259 | x = self.relu(x) 260 | x = self.avgpool(x) 261 | x = x.view(x.size(0), -1) 262 | x = self.fc(x) 263 | 264 | return x 265 | 266 | 267 | 268 | def resnet20_cifar(**kwargs): 269 | model = ResNet_Cifar(BasicBlock, [3, 3, 3], **kwargs) 270 | return model 271 | 272 | 273 | def resnet32_cifar(**kwargs): 274 | model = ResNet_Cifar(BasicBlock, [5, 5, 5], **kwargs) 275 | return model 276 | 277 | 278 | def resnet44_cifar(**kwargs): 279 | model = ResNet_Cifar(BasicBlock, [7, 7, 7], **kwargs) 280 | return model 281 | 282 | 283 | def resnet56_cifar(**kwargs): 284 | model = ResNet_Cifar(BasicBlock, [9, 9, 9], **kwargs) 285 | return model 286 | 287 | 288 | def resnet110_cifar(**kwargs): 289 | model = ResNet_Cifar(BasicBlock, [18, 18, 18], **kwargs) 290 | return model 291 | 292 | 293 | def resnet1202_cifar(**kwargs): 294 | model = ResNet_Cifar(BasicBlock, [200, 200, 200], **kwargs) 295 | return model 296 | 297 | 298 | def resnet164_cifar(**kwargs): 299 | model = ResNet_Cifar(Bottleneck, [18, 18, 18], **kwargs) 300 | return model 301 | 302 | 303 | def resnet1001_cifar(**kwargs): 304 | model = ResNet_Cifar(Bottleneck, [111, 111, 111], **kwargs) 305 | return model 306 | 307 | 308 | def preact_resnet110_cifar(**kwargs): 309 | model = PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) 310 | return model 311 | 312 | 313 | def preact_resnet164_cifar(**kwargs): 314 | model = PreAct_ResNet_Cifar(PreActBottleneck, [18, 18, 18], **kwargs) 315 | return model 316 | 317 | 318 | def preact_resnet1001_cifar(**kwargs): 319 | model = PreAct_ResNet_Cifar(PreActBottleneck, [111, 111, 111], **kwargs) 320 | return model 321 | 322 | 323 | if __name__ == '__main__': 324 | net = resnet20_cifar() 325 | y = net(torch.randn(1, 3, 64, 64)) 326 | print(net) 327 | print(y.size()) 328 | -------------------------------------------------------------------------------- /networks/uap.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | class UAP(nn.Module): 7 | def __init__(self, 8 | shape=(224, 224), 9 | num_channels=3, 10 | mean=[0.,0.,0.], 11 | std=[1.,1.,1.], 12 | use_cuda=True): 13 | super(UAP, self).__init__() 14 | 15 | self.use_cuda = use_cuda 16 | self.num_channels = num_channels 17 | self.shape = shape 18 | self.uap = nn.Parameter(torch.zeros(size=(num_channels, *shape), requires_grad=True)) 19 | 20 | self.mean_tensor = torch.ones(1, num_channels, *shape) 21 | for idx in range(num_channels): 22 | self.mean_tensor[:,idx] *= mean[idx] 23 | if use_cuda: 24 | self.mean_tensor = self.mean_tensor.cuda() 25 | 26 | self.std_tensor = torch.ones(1, num_channels, *shape) 27 | for idx in range(num_channels): 28 | self.std_tensor[:,idx] *= std[idx] 29 | if use_cuda: 30 | self.std_tensor = self.std_tensor.cuda() 31 | 32 | def forward(self, x): 33 | uap = self.uap 34 | # Put image into original form 35 | orig_img = x * self.std_tensor + self.mean_tensor 36 | 37 | # Add uap to input 38 | adv_orig_img = orig_img + uap 39 | # Put image into normalized form 40 | adv_x = (adv_orig_img - self.mean_tensor)/self.std_tensor 41 | 42 | return adv_x 43 | -------------------------------------------------------------------------------- /networks/vgg_cifar.py: -------------------------------------------------------------------------------- 1 | ''' 2 | VGG11/13/16/19 in Pytorch. 3 | Credit: https://github.com/kuangliu/pytorch-cifar/blob/master/models/vgg.py 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | cfg = { 10 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 11 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 12 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 13 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 14 | } 15 | 16 | 17 | class VGG(nn.Module): 18 | def __init__(self, vgg_name, num_classes=10): 19 | super(VGG, self).__init__() 20 | self.features = self._make_layers(cfg[vgg_name]) 21 | self.classifier = nn.Linear(512, num_classes) 22 | 23 | def forward(self, x): 24 | out = self.features(x) 25 | out = out.view(out.size(0), -1) 26 | out = self.classifier(out) 27 | return out 28 | 29 | def _make_layers(self, cfg): 30 | layers = [] 31 | in_channels = 3 32 | for x in cfg: 33 | if x == 'M': 34 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 35 | else: 36 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 37 | nn.BatchNorm2d(x), 38 | nn.ReLU(inplace=True)] 39 | in_channels = x 40 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 41 | return nn.Sequential(*layers) 42 | 43 | 44 | def test(): 45 | net = VGG('VGG11') 46 | x = torch.randn(2,3,32,32) 47 | y = net(x) 48 | print(y.size()) 49 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | matplotlib 4 | torch=1.0.1.post2 5 | torchvision=0.2.2 6 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Fixed Params 4 | PRETRAINED_DATASET="imagenet" 5 | DATASET="places365" 6 | EPSILON=0.03922 7 | LOSS_FN="bounded_logit_fixed_ref" 8 | CONFIDENCE=10 9 | BATCH_SIZE=32 10 | TARGET_CLASS=150 11 | LEARNING_RATE=0.005 12 | NUM_ITERATIONS=2000 13 | WORKERS=4 14 | NGPU=1 15 | SUBF="imagenet_targeted" 16 | 17 | TARGET_NETS="alexnet googlenet vgg16 vgg19 resnet152" 18 | 19 | for target_net in $TARGET_NETS; do 20 | python3 train_uap.py \ 21 | --dataset $DATASET \ 22 | --pretrained_dataset $PRETRAINED_DATASET --pretrained_arch $target_net \ 23 | --target_class $TARGET_CLASS --targeted \ 24 | --epsilon $EPSILON \ 25 | --loss_function $LOSS_FN --confidence $CONFIDENCE \ 26 | --num_iterations $NUM_ITERATIONS \ 27 | --batch_size $BATCH_SIZE --learning_rate $LEARNING_RATE \ 28 | --workers $WORKERS --ngpu $NGPU \ 29 | --result_subfolder $SUBF 30 | done 31 | -------------------------------------------------------------------------------- /train_uap.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os, sys, time, random, copy 4 | import numpy as np 5 | import torch 6 | import argparse 7 | import torch.backends.cudnn as cudnn 8 | import torch.nn as nn 9 | from collections import OrderedDict 10 | 11 | from networks.uap import UAP 12 | from utils.data import get_data_specs, get_data 13 | from utils.utils import get_model_path, get_result_path 14 | from utils.utils import print_log 15 | from utils.network import get_network, set_parameter_requires_grad 16 | from utils.network import get_num_parameters, get_num_non_trainable_parameters, get_num_trainable_parameters 17 | from utils.training import train, save_checkpoint, metrics_evaluate 18 | from utils.custom_loss import LogitLoss, BoundedLogitLoss, NegativeCrossEntropy, BoundedLogitLossFixedRef, BoundedLogitLoss_neg 19 | 20 | def parse_arguments(): 21 | parser = argparse.ArgumentParser(description='Trains a UAP') 22 | # pretrained 23 | parser.add_argument('--dataset', default='imagenet', choices=['cifar10', 'cifar100', 'imagenet', 'coco', 'voc', 'places365'], 24 | help='Used dataset to generate UAP (default: imagenet)') 25 | parser.add_argument('--pretrained_dataset', default='imagenet', choices=['cifar10', 'cifar100', 'imagenet'], 26 | help='Used dataset to train the initial model (default: imagenet)') 27 | parser.add_argument('--pretrained_arch', default='vgg16', choices=['vgg16_cifar', 'vgg19_cifar', 'resnet20', 'resnet56', 28 | 'alexnet', 'googlenet', 'vgg16', 'vgg19', 29 | 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 30 | 'inception_v3'], 31 | help='Used model architecture: (default: vgg16)') 32 | parser.add_argument('--pretrained_seed', type=int, default=123, 33 | help='Seed used in the generation process (default: 123)') 34 | # Parameters regarding UAP 35 | parser.add_argument('--epsilon', type=float, default=0.03922, 36 | help='Norm restriction of UAP (default: 10/255)') 37 | parser.add_argument('--num_iterations', type=int, default=2000, 38 | help='Number of iterations (default: 2000)') 39 | parser.add_argument('--result_subfolder', default='default', type=str, 40 | help='result subfolder name') 41 | parser.add_argument('--postfix', default='', 42 | help='Postfix to attach to result folder') 43 | # Optimization options 44 | parser.add_argument('--loss_function', default='ce', choices=['ce', 'neg_ce', 'logit', 'bounded_logit', 45 | 'bounded_logit_fixed_ref', 'bounded_logit_neg'], 46 | help='Used loss function for source classes: (default: cw_logit)') 47 | parser.add_argument('--confidence', default=0., type=float, 48 | help='Confidence value for C&W losses (default: 0.0)') 49 | parser.add_argument('--targeted', action='store_true', 50 | help='Target a specific class (default: False)') 51 | parser.add_argument('--target_class', type=int, default=0, 52 | help='Target class (default: 0)') 53 | parser.add_argument('--batch_size', type=int, default=32, 54 | help='Batch size (default: 32)') 55 | parser.add_argument('--learning_rate', type=float, default=0.001, 56 | help='Learning Rate (default: 0.001)') 57 | parser.add_argument('--print_freq', default=200, type=int, metavar='N', 58 | help='print frequency (default: 200)') 59 | parser.add_argument('--ngpu', type=int, default=1, 60 | help='Number of used GPUs (0 = CPU) (default: 1)') 61 | parser.add_argument('--workers', type=int, default=6, 62 | help='Number of data loading workers (default: 6)') 63 | args = parser.parse_args() 64 | 65 | args.use_cuda = args.ngpu>0 and torch.cuda.is_available() 66 | 67 | if args.pretrained_seed is None: 68 | args.pretrained_seed = random.randint(1, 10000) 69 | return args 70 | 71 | 72 | def main(): 73 | args = parse_arguments() 74 | 75 | random.seed(args.pretrained_seed) 76 | torch.manual_seed(args.pretrained_seed) 77 | if args.use_cuda: 78 | torch.cuda.manual_seed_all(args.pretrained_seed) 79 | cudnn.benchmark = True 80 | 81 | # get the result path to store the results 82 | result_path = get_result_path(dataset_name=args.dataset, 83 | network_arch=args.pretrained_arch, 84 | random_seed=args.pretrained_seed, 85 | result_subfolder=args.result_subfolder, 86 | postfix=args.postfix) 87 | 88 | # Init logger 89 | log_file_name = os.path.join(result_path, 'log.txt') 90 | print("Log file: {}".format(log_file_name)) 91 | log = open(log_file_name, 'w') 92 | print_log('save path : {}'.format(result_path), log) 93 | state = {k: v for k, v in args._get_kwargs()} 94 | for key, value in state.items(): 95 | print_log("{} : {}".format(key, value), log) 96 | print_log("Random Seed: {}".format(args.pretrained_seed), log) 97 | print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log) 98 | print_log("Torch version : {}".format(torch.__version__), log) 99 | print_log("Cudnn version : {}".format(torch.backends.cudnn.version()), log) 100 | 101 | _, pretrained_data_test = get_data(args.pretrained_dataset, args.pretrained_dataset) 102 | 103 | pretrained_data_test_loader = torch.utils.data.DataLoader(pretrained_data_test, 104 | batch_size=args.batch_size, 105 | shuffle=False, 106 | num_workers=args.workers, 107 | pin_memory=True) 108 | 109 | ##### Dataloader for training #### 110 | num_classes, (mean, std), input_size, num_channels = get_data_specs(args.pretrained_dataset) 111 | 112 | data_train, _ = get_data(args.dataset, args.pretrained_dataset) 113 | data_train_loader = torch.utils.data.DataLoader(data_train, 114 | batch_size=args.batch_size, 115 | shuffle=True, 116 | num_workers=args.workers, 117 | pin_memory=True) 118 | 119 | #################################### 120 | # Init model, criterion, and optimizer 121 | print_log("=> Creating model '{}'".format(args.pretrained_arch), log) 122 | # get a path for loading the model to be attacked 123 | model_path = get_model_path(dataset_name=args.pretrained_dataset, 124 | network_arch=args.pretrained_arch, 125 | random_seed=args.pretrained_seed) 126 | model_weights_path = os.path.join(model_path, "checkpoint.pth.tar") 127 | 128 | target_network = get_network(args.pretrained_arch, 129 | input_size=input_size, 130 | num_classes=num_classes, 131 | finetune=False) 132 | 133 | print_log("=> Network :\n {}".format(target_network), log) 134 | target_network = torch.nn.DataParallel(target_network, device_ids=list(range(args.ngpu))) 135 | # Set the target model into evaluation mode 136 | target_network.eval() 137 | # Imagenet models use the pretrained pytorch weights 138 | if args.pretrained_dataset != "imagenet": 139 | network_data = torch.load(model_weights_path) 140 | target_network.load_state_dict(network_data['state_dict']) 141 | 142 | # Set all weights to not trainable 143 | set_parameter_requires_grad(target_network, requires_grad=False) 144 | 145 | non_trainale_params = get_num_non_trainable_parameters(target_network) 146 | trainale_params = get_num_trainable_parameters(target_network) 147 | total_params = get_num_parameters(target_network) 148 | print_log("Target Network Trainable parameters: {}".format(trainale_params), log) 149 | print_log("Target Network Non Trainable parameters: {}".format(non_trainale_params), log) 150 | print_log("Target Network Total # parameters: {}".format(total_params), log) 151 | 152 | print_log("=> Inserting Generator", log) 153 | 154 | generator = UAP(shape=(input_size, input_size), 155 | num_channels=num_channels, 156 | mean=mean, 157 | std=std, 158 | use_cuda=args.use_cuda) 159 | 160 | print_log("=> Generator :\n {}".format(generator), log) 161 | non_trainale_params = get_num_non_trainable_parameters(generator) 162 | trainale_params = get_num_trainable_parameters(generator) 163 | total_params = get_num_parameters(generator) 164 | print_log("Generator Trainable parameters: {}".format(trainale_params), log) 165 | print_log("Generator Non Trainable parameters: {}".format(non_trainale_params), log) 166 | print_log("Generator Total # parameters: {}".format(total_params), log) 167 | 168 | perturbed_net = nn.Sequential(OrderedDict([('generator', generator), ('target_model', target_network)])) 169 | perturbed_net = torch.nn.DataParallel(perturbed_net, device_ids=list(range(args.ngpu))) 170 | 171 | non_trainale_params = get_num_non_trainable_parameters(perturbed_net) 172 | trainale_params = get_num_trainable_parameters(perturbed_net) 173 | total_params = get_num_parameters(perturbed_net) 174 | print_log("Perturbed Net Trainable parameters: {}".format(trainale_params), log) 175 | print_log("Perturbed Net Non Trainable parameters: {}".format(non_trainale_params), log) 176 | print_log("Perturbed Net Total # parameters: {}".format(total_params), log) 177 | 178 | # Set the target model into evaluation mode 179 | perturbed_net.module.target_model.eval() 180 | perturbed_net.module.generator.train() 181 | 182 | if args.loss_function == "ce": 183 | criterion = torch.nn.CrossEntropyLoss() 184 | elif args.loss_function == "neg_ce": 185 | criterion = NegativeCrossEntropy() 186 | elif args.loss_function == "logit": 187 | criterion = LogitLoss(num_classes=num_classes, use_cuda=args.use_cuda) 188 | elif args.loss_function == "bounded_logit": 189 | criterion = BoundedLogitLoss(num_classes=num_classes, confidence=args.confidence, use_cuda=args.use_cuda) 190 | elif args.loss_function == "bounded_logit_fixed_ref": 191 | criterion = BoundedLogitLossFixedRef(num_classes=num_classes, confidence=args.confidence, use_cuda=args.use_cuda) 192 | elif args.loss_function == "bounded_logit_neg": 193 | criterion = BoundedLogitLoss_neg(num_classes=num_classes, confidence=args.confidence, use_cuda=args.use_cuda) 194 | else: 195 | raise ValueError 196 | 197 | if args.use_cuda: 198 | target_network.cuda() 199 | generator.cuda() 200 | perturbed_net.cuda() 201 | criterion.cuda() 202 | 203 | optimizer = torch.optim.Adam(perturbed_net.parameters(), lr=state['learning_rate']) 204 | 205 | # Measure the time needed for the UAP generation 206 | start = time.time() 207 | train(data_loader=data_train_loader, 208 | model=perturbed_net, 209 | criterion=criterion, 210 | optimizer=optimizer, 211 | epsilon=args.epsilon, 212 | num_iterations=args.num_iterations, 213 | targeted=args.targeted, 214 | target_class=args.target_class, 215 | log=log, 216 | print_freq=args.print_freq, 217 | use_cuda=args.use_cuda) 218 | end = time.time() 219 | print_log("Time needed for UAP generation: {}".format(end - start), log) 220 | # evaluate 221 | print_log("Final evaluation:", log) 222 | metrics_evaluate(data_loader=pretrained_data_test_loader, 223 | target_model=target_network, 224 | perturbed_model=perturbed_net, 225 | targeted=args.targeted, 226 | target_class=args.target_class, 227 | log=log, 228 | use_cuda=args.use_cuda) 229 | 230 | save_checkpoint({ 231 | 'arch' : args.pretrained_arch, 232 | # 'state_dict' : perturbed_net.state_dict(), 233 | 'state_dict' : perturbed_net.module.generator.state_dict(), 234 | 'optimizer' : optimizer.state_dict(), 235 | 'args' : copy.deepcopy(args), 236 | }, result_path, 'checkpoint.pth.tar') 237 | 238 | log.close() 239 | 240 | if __name__ == '__main__': 241 | main() 242 | -------------------------------------------------------------------------------- /utils/custom_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.nn.modules.loss import _WeightedLoss, _Loss 6 | from utils.utils import one_hot 7 | 8 | 9 | class LogitLoss(_WeightedLoss): 10 | def __init__(self, num_classes, use_cuda=False): 11 | super(LogitLoss, self).__init__() 12 | self.num_classes = num_classes 13 | self.use_cuda = use_cuda 14 | 15 | def forward(self, input, target): 16 | one_hot_labels = one_hot(target.cpu(), num_classes=self.num_classes) 17 | if self.use_cuda: 18 | one_hot_labels = one_hot_labels.cuda() 19 | 20 | # Get the logit output value 21 | logits = (one_hot_labels * input).max(1)[0] 22 | # Increase the logit value 23 | return torch.mean(-logits) 24 | 25 | class BoundedLogitLoss(_WeightedLoss): 26 | def __init__(self, num_classes, confidence, use_cuda=False): 27 | super(BoundedLogitLoss, self).__init__() 28 | self.num_classes = num_classes 29 | self.confidence = confidence 30 | self.use_cuda = use_cuda 31 | 32 | def forward(self, input, target): 33 | one_hot_labels = one_hot(target.cpu(), num_classes=self.num_classes) 34 | if self.use_cuda: 35 | one_hot_labels = one_hot_labels.cuda() 36 | 37 | target_logits = (one_hot_labels * input).sum(1) 38 | not_target_logits = ((1. - one_hot_labels) * input - one_hot_labels * 10000.).max(1)[0] 39 | logit_loss = torch.clamp(not_target_logits - target_logits, min=-self.confidence) 40 | return torch.mean(logit_loss) 41 | 42 | class BoundedLogitLossFixedRef(_WeightedLoss): 43 | def __init__(self, num_classes, confidence, use_cuda=False): 44 | super(BoundedLogitLossFixedRef, self).__init__() 45 | self.num_classes = num_classes 46 | self.confidence = confidence 47 | self.use_cuda = use_cuda 48 | 49 | def forward(self, input, target): 50 | one_hot_labels = one_hot(target.cpu(), num_classes=self.num_classes) 51 | if self.use_cuda: 52 | one_hot_labels = one_hot_labels.cuda() 53 | 54 | target_logits = (one_hot_labels * input).sum(1) 55 | not_target_logits = ((1. - one_hot_labels) * input - one_hot_labels * 10000.).max(1)[0] 56 | logit_loss = torch.clamp(not_target_logits.data.detach() - target_logits, min=-self.confidence) 57 | return torch.mean(logit_loss) 58 | 59 | class BoundedLogitLoss_neg(_WeightedLoss): 60 | def __init__(self, num_classes, confidence, use_cuda=False): 61 | super(BoundedLogitLoss_neg, self).__init__() 62 | self.num_classes = num_classes 63 | self.confidence = confidence 64 | self.use_cuda = use_cuda 65 | 66 | def forward(self, input, target): 67 | one_hot_labels = one_hot(target.cpu(), num_classes=self.num_classes) 68 | if self.use_cuda: 69 | one_hot_labels = one_hot_labels.cuda() 70 | 71 | target_logits = (one_hot_labels * input).sum(1) 72 | 73 | not_target_logits = ((1. - one_hot_labels) * input - one_hot_labels * 10000.).max(1)[0] 74 | logit_loss = torch.clamp(target_logits - not_target_logits, min=-self.confidence) 75 | return torch.mean(logit_loss) 76 | 77 | class NegativeCrossEntropy(_WeightedLoss): 78 | def __init__(self,): 79 | super(NegativeCrossEntropy, self).__init__() 80 | 81 | def forward(self, input, target): 82 | loss = -F.cross_entropy(input, target, weight=None, ignore_index=-100, reduction='elementwise_mean') 83 | return loss 84 | -------------------------------------------------------------------------------- /utils/data.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import numpy as np 4 | import glob 5 | import torch 6 | import random 7 | # import cv2 8 | from torch.utils.data import Dataset 9 | 10 | import torchvision.datasets as dset 11 | import torchvision.transforms as transforms 12 | import torchvision.transforms.functional as tfunc 13 | 14 | from config.config import IMAGENET_PATH, DATASET_BASE_PATH 15 | from config.config import COCO_2017_TRAIN_IMGS, COCO_2017_VAL_IMGS, COCO_2017_TRAIN_ANN, COCO_2017_VAL_ANN, VOC_2012_ROOT, PLACES365_ROOT 16 | from dataset_utils.voc0712 import VOCDetection 17 | 18 | 19 | def get_data_specs(pretrained_dataset): 20 | if pretrained_dataset == "imagenet": 21 | mean = [0.485, 0.456, 0.406] 22 | std = [0.229, 0.224, 0.225] 23 | num_classes = 1000 24 | input_size = 224 25 | # input_size = 299 # inception_v3 26 | num_channels = 3 27 | elif pretrained_dataset == "cifar10": 28 | mean = [0., 0., 0.] 29 | std = [1., 1., 1.] 30 | num_classes = 10 31 | input_size = 32 32 | num_channels = 3 33 | elif pretrained_dataset == "cifar100": 34 | mean = [0., 0., 0.] 35 | std = [1., 1., 1.] 36 | num_classes = 100 37 | input_size = 32 38 | num_channels = 3 39 | else: 40 | raise ValueError 41 | return num_classes, (mean, std), input_size, num_channels 42 | 43 | 44 | def get_data(dataset, pretrained_dataset): 45 | 46 | num_classes, (mean, std), input_size, num_channels = get_data_specs(pretrained_dataset) 47 | 48 | if dataset == 'cifar10': 49 | train_transform = transforms.Compose( 50 | [transforms.RandomHorizontalFlip(), 51 | transforms.RandomCrop(input_size, padding=4), 52 | transforms.ToTensor(), 53 | transforms.Normalize(mean, std)]) 54 | 55 | test_transform = transforms.Compose( 56 | [transforms.ToTensor(), 57 | transforms.Normalize(mean, std)]) 58 | 59 | train_data = dset.CIFAR10(DATASET_BASE_PATH, train=True, transform=train_transform, download=True) 60 | test_data = dset.CIFAR10(DATASET_BASE_PATH, train=False, transform=test_transform, download=True) 61 | 62 | elif dataset == 'cifar100': 63 | train_transform = transforms.Compose( 64 | [transforms.RandomHorizontalFlip(), 65 | transforms.RandomCrop(input_size, padding=4), 66 | transforms.ToTensor(), 67 | transforms.Normalize(mean, std)]) 68 | 69 | test_transform = transforms.Compose( 70 | [transforms.ToTensor(), 71 | transforms.Normalize(mean, std)]) 72 | 73 | train_data = dset.CIFAR100(DATASET_BASE_PATH, train=True, transform=train_transform, download=True) 74 | test_data = dset.CIFAR100(DATASET_BASE_PATH, train=False, transform=test_transform, download=True) 75 | 76 | elif dataset == "imagenet": 77 | traindir = os.path.join(IMAGENET_PATH, 'train') 78 | valdir = os.path.join(IMAGENET_PATH, 'val') 79 | 80 | train_transform = transforms.Compose([ 81 | transforms.Resize(256), 82 | # transforms.Resize(299), # inception_v3 83 | transforms.RandomCrop(input_size), 84 | transforms.ToTensor(), 85 | transforms.Normalize(mean, std)]) 86 | 87 | test_transform = transforms.Compose([ 88 | transforms.Resize(256), 89 | # transforms.Resize(299), # inception_v3 90 | transforms.CenterCrop(input_size), 91 | transforms.ToTensor(), 92 | transforms.Normalize(mean, std)]) 93 | 94 | train_data = dset.ImageFolder(root=traindir, transform=train_transform) 95 | test_data = dset.ImageFolder(root=valdir, transform=test_transform) 96 | 97 | elif dataset == "coco": 98 | train_transform = transforms.Compose([ 99 | transforms.Resize(int(input_size * 1.143)), 100 | transforms.RandomCrop(input_size), 101 | transforms.ToTensor(), 102 | transforms.Normalize(mean, std)]) 103 | 104 | test_transform = transforms.Compose([ 105 | transforms.Resize(int(input_size * 1.143)), 106 | transforms.CenterCrop(input_size), 107 | transforms.ToTensor(), 108 | transforms.Normalize(mean, std)]) 109 | 110 | train_data = dset.CocoDetection(root=COCO_2017_TRAIN_IMGS, 111 | annFile=COCO_2017_TRAIN_ANN, 112 | transform=train_transform) 113 | test_data = dset.CocoDetection(root=COCO_2017_VAL_IMGS, 114 | annFile=COCO_2017_VAL_ANN, 115 | transform=test_transform) 116 | 117 | elif dataset == "voc": 118 | train_transform = transforms.Compose([ 119 | transforms.ToPILImage(), 120 | transforms.Resize(int(input_size * 1.143)), 121 | transforms.RandomCrop(input_size), 122 | transforms.ToTensor(), 123 | transforms.Normalize(mean, std)]) 124 | 125 | test_transform = transforms.Compose([ 126 | transforms.ToPILImage(), 127 | transforms.Resize(int(input_size * 1.143)), 128 | transforms.CenterCrop(input_size), 129 | transforms.ToTensor(), 130 | transforms.Normalize(mean, std)]) 131 | 132 | train_data = VOCDetection(root=VOC_2012_ROOT, 133 | year="2012", 134 | image_set='train', 135 | transform=train_transform) 136 | test_data = VOCDetection(root=VOC_2012_ROOT, 137 | year="2012", 138 | image_set='val', 139 | transform=test_transform) 140 | 141 | elif dataset == "places365": 142 | traindir = os.path.join(PLACES365_ROOT, "train") 143 | testdir = os.path.join(PLACES365_ROOT, "train") 144 | # Places365 downloaded as 224x224 images 145 | 146 | train_transform = transforms.Compose([ 147 | transforms.Resize(input_size), # Places images downloaded as 224 148 | transforms.RandomCrop(input_size), 149 | transforms.ToTensor(), 150 | transforms.Normalize(mean, std)]) 151 | 152 | test_transform = transforms.Compose([ 153 | transforms.Resize(input_size), 154 | transforms.CenterCrop(input_size), 155 | transforms.ToTensor(), 156 | transforms.Normalize(mean, std)]) 157 | 158 | train_data = dset.ImageFolder(root=traindir, transform=train_transform) 159 | test_data = dset.ImageFolder(root=testdir, transform=test_transform) 160 | 161 | return train_data, test_data 162 | -------------------------------------------------------------------------------- /utils/network.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | import torchvision.models as models 8 | from networks.googlenet import googlenet 9 | from networks.resnet_cifar import resnet20_cifar, resnet56_cifar 10 | from networks.vgg_cifar import VGG 11 | 12 | 13 | def get_network(model_arch, input_size, num_classes=1000, finetune=False): 14 | 15 | #### CIFAR-10 & CIFAR-100 models #### 16 | if model_arch == "resnet20": 17 | net = resnet20_cifar(num_classes=num_classes) 18 | elif model_arch == "resnet56": 19 | net = resnet56_cifar(num_classes=num_classes) 20 | elif model_arch == "vgg16_cifar": 21 | net = VGG('VGG16', num_classes=num_classes) 22 | elif model_arch == "vgg19_cifar": 23 | net = VGG('VGG19', num_classes=num_classes) 24 | #### ImageNet models #### 25 | elif model_arch == "alexnet": 26 | net = models.alexnet(pretrained=True) 27 | elif model_arch == "googlenet": 28 | net = googlenet(pretrained=True) 29 | elif model_arch == "vgg16": 30 | net = models.vgg16(pretrained=True) 31 | elif model_arch == "vgg19": 32 | net = models.vgg19(pretrained=True) 33 | elif model_arch == "resnet18": 34 | net = models.resnet18(pretrained=True) 35 | elif model_arch == "resnet34": 36 | net = models.resnet34(pretrained=True) 37 | elif model_arch == "resnet50": 38 | net = models.resnet50(pretrained=True) 39 | elif model_arch == "resnet101": 40 | net = models.resnet101(pretrained=True) 41 | elif model_arch == "resnet152": 42 | net = models.resnet152(pretrained=True) 43 | elif model_arch == "inception_v3": 44 | net = models.inception_v3(pretrained=True) 45 | else: 46 | raise ValueError("Network {} not supported".format(model_arch)) 47 | return net 48 | 49 | def set_parameter_requires_grad(model, requires_grad=False): 50 | for param in model.parameters(): 51 | param.requires_grad = False 52 | 53 | def get_num_parameters(model): 54 | return sum(p.numel() for p in model.parameters()) 55 | 56 | def get_num_trainable_parameters(model): 57 | model_parameters = filter(lambda p: p.requires_grad==True, model.parameters()) 58 | return sum([np.prod(p.size()) for p in model_parameters]) 59 | 60 | def get_num_non_trainable_parameters(model): 61 | model_parameters = filter(lambda p: p.requires_grad==False, model.parameters()) 62 | return sum([np.prod(p.size()) for p in model_parameters]) 63 | -------------------------------------------------------------------------------- /utils/training.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import os, shutil, time 4 | import itertools 5 | import torch 6 | 7 | from utils.utils import time_string, print_log 8 | 9 | 10 | def train(data_loader, 11 | model, 12 | criterion, 13 | optimizer, 14 | epsilon, 15 | num_iterations, 16 | targeted, 17 | target_class, 18 | log, 19 | print_freq=200, 20 | use_cuda=True): 21 | # train function (forward, backward, update) 22 | batch_time = AverageMeter() 23 | data_time = AverageMeter() 24 | losses = AverageMeter() 25 | top1 = AverageMeter() 26 | top5 = AverageMeter() 27 | 28 | # switch to train mode 29 | model.module.generator.train() 30 | model.module.target_model.eval() 31 | 32 | end = time.time() 33 | 34 | data_iterator = iter(data_loader) 35 | 36 | iteration=0 37 | while (iteration 1: 67 | target = torch.argmax(target, dim=-1) 68 | prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) 69 | losses.update(loss.item(), input.size(0)) 70 | top1.update(prec1.item(), input.size(0)) 71 | top5.update(prec5.item(), input.size(0)) 72 | 73 | # compute gradient and do SGD step 74 | optimizer.zero_grad() 75 | loss.backward() 76 | optimizer.step() 77 | # Projection 78 | model.module.generator.uap.data = torch.clamp(model.module.generator.uap.data, -epsilon, epsilon) 79 | 80 | # measure elapsed time 81 | batch_time.update(time.time() - end) 82 | end = time.time() 83 | 84 | if iteration % print_freq == 0: 85 | print_log(' Iteration: [{:03d}/{:03d}] ' 86 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 87 | 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 88 | 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 89 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 90 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( 91 | iteration, num_iterations, batch_time=batch_time, 92 | data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) 93 | 94 | iteration+=1 95 | print_log(' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, 96 | top5=top5, 97 | error1=100-top1.avg), log) 98 | 99 | def metrics_evaluate(data_loader, target_model, perturbed_model, targeted, target_class, log=None, use_cuda=True): 100 | # switch to evaluate mode 101 | target_model.eval() 102 | perturbed_model.eval() 103 | perturbed_model.module.generator.eval() 104 | perturbed_model.module.target_model.eval() 105 | 106 | clean_acc = AverageMeter() 107 | perturbed_acc = AverageMeter() 108 | attack_success_rate = AverageMeter() # Among the correctly classified samples, the ratio of being different from clean prediction (same as gt) 109 | if targeted: 110 | all_to_target_success_rate = AverageMeter() # The ratio of samples going to the sink classes 111 | all_to_target_success_rate_filtered = AverageMeter() 112 | 113 | total_num_samples = 0 114 | num_same_classified = 0 115 | num_diff_classified = 0 116 | 117 | for input, gt in data_loader: 118 | if use_cuda: 119 | gt = gt.cuda() 120 | input = input.cuda() 121 | 122 | # compute output 123 | with torch.no_grad(): 124 | clean_output = target_model(input) 125 | pert_output = perturbed_model(input) 126 | 127 | correctly_classified_mask = torch.argmax(clean_output, dim=-1).cpu() == gt.cpu() 128 | cl_acc = accuracy(clean_output.data, gt, topk=(1,)) 129 | clean_acc.update(cl_acc[0].item(), input.size(0)) 130 | pert_acc = accuracy(pert_output.data, gt, topk=(1,)) 131 | perturbed_acc.update(pert_acc[0].item(), input.size(0)) 132 | 133 | # Calculating Fooling Ratio params 134 | clean_out_class = torch.argmax(clean_output, dim=-1) 135 | pert_out_class = torch.argmax(pert_output, dim=-1) 136 | 137 | total_num_samples += len(clean_out_class) 138 | num_same_classified += torch.sum(clean_out_class == pert_out_class).cpu().numpy() 139 | num_diff_classified += torch.sum(~(clean_out_class == pert_out_class)).cpu().numpy() 140 | 141 | if torch.sum(correctly_classified_mask)>0: 142 | with torch.no_grad(): 143 | pert_output_corr_cl = perturbed_model(input[correctly_classified_mask]) 144 | attack_succ_rate = accuracy(pert_output_corr_cl, gt[correctly_classified_mask], topk=(1,)) 145 | attack_success_rate.update(attack_succ_rate[0].item(), pert_output_corr_cl.size(0)) 146 | 147 | 148 | # Calculate Absolute Accuracy Drop 149 | aad_source = clean_acc.avg - perturbed_acc.avg 150 | # Calculate Relative Accuracy Drop 151 | if clean_acc.avg !=0: 152 | rad_source = (clean_acc.avg - perturbed_acc.avg)/clean_acc.avg * 100. 153 | else: 154 | rad_source = 0. 155 | # Calculate fooling ratio 156 | fooling_ratio = num_diff_classified/total_num_samples * 100. 157 | 158 | if targeted: 159 | # 2. How many of all samples go the sink class (Only relevant for others loader) 160 | target_cl = torch.ones_like(gt) * target_class 161 | all_to_target_succ_rate = accuracy(pert_output, target_cl, topk=(1,)) 162 | all_to_target_success_rate.update(all_to_target_succ_rate[0].item(), pert_output.size(0)) 163 | 164 | # 3. How many of all samples go the sink class, except gt sink class (Only relevant for others loader) 165 | # Filter all idxs which are not belonging to sink class 166 | non_target_class_idxs = [i != target_class for i in gt] 167 | non_target_class_mask = torch.Tensor(non_target_class_idxs)==True 168 | if torch.sum(non_target_class_mask)>0: 169 | gt_non_target_class = gt[non_target_class_mask] 170 | pert_output_non_target_class = pert_output[non_target_class_mask] 171 | 172 | target_cl = torch.ones_like(gt_non_target_class) * target_class 173 | all_to_target_succ_rate_filtered = accuracy(pert_output_non_target_class, target_cl, topk=(1,)) 174 | all_to_target_success_rate_filtered.update(all_to_target_succ_rate_filtered[0].item(), pert_output_non_target_class.size(0)) 175 | if log: 176 | print_log('\n\t#######################', log) 177 | print_log('\tClean model accuracy: {:.3f}'.format(clean_acc.avg), log) 178 | print_log('\tPerturbed model accuracy: {:.3f}'.format(perturbed_acc.avg), log) 179 | print_log('\tAbsolute Accuracy Drop: {:.3f}'.format(aad_source), log) 180 | print_log('\tRelative Accuracy Drop: {:.3f}'.format(rad_source), log) 181 | print_log('\tAttack Success Rate: {:.3f}'.format(100-attack_success_rate.avg), log) 182 | print_log('\tFooling Ratio: {:.3f}'.format(fooling_ratio), log) 183 | if targeted: 184 | print_log('\tAll --> Target Class {} Prec@1 {:.3f}'.format(target_class, all_to_target_success_rate.avg), log) 185 | print_log('\tAll (w/o sink samples) --> Sink {} Prec@1 {:.3f}'.format(target_class, all_to_target_success_rate_filtered.avg), log) 186 | 187 | 188 | 189 | def save_checkpoint(state, save_path, filename): 190 | filename = os.path.join(save_path, filename) 191 | torch.save(state, filename) 192 | 193 | 194 | def accuracy(output, target, topk=(1,)): 195 | """Computes the precision@k for the specified values of k""" 196 | with torch.no_grad(): 197 | maxk = max(topk) 198 | batch_size = target.size(0) 199 | 200 | _, pred = output.topk(maxk, 1, True, True) 201 | pred = pred.t() 202 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 203 | 204 | res = [] 205 | for k in topk: 206 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 207 | res.append(correct_k.mul_(100.0 / batch_size)) 208 | return res 209 | 210 | 211 | class AverageMeter(object): 212 | """Computes and stores the average and current value""" 213 | def __init__(self): 214 | self.reset() 215 | 216 | def reset(self): 217 | self.val = 0 218 | self.avg = 0 219 | self.sum = 0 220 | self.count = 0 221 | 222 | def update(self, val, n=1): 223 | self.val = val 224 | self.sum += val * n 225 | self.count += n 226 | self.avg = self.sum / self.count 227 | 228 | 229 | class RecorderMeter(object): 230 | """Computes and stores the minimum loss value and its epoch index""" 231 | def __init__(self, total_epoch): 232 | self.reset(total_epoch) 233 | 234 | def reset(self, total_epoch): 235 | assert total_epoch > 0 236 | self.total_epoch = total_epoch 237 | self.current_epoch = 0 238 | self.epoch_losses = np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] 239 | self.epoch_losses = self.epoch_losses - 1 240 | 241 | self.epoch_accuracy= np.zeros((self.total_epoch, 2), dtype=np.float32) # [epoch, train/val] 242 | self.epoch_accuracy= self.epoch_accuracy 243 | 244 | def update(self, idx, train_loss, train_acc, val_loss, val_acc): 245 | assert idx >= 0 and idx < self.total_epoch, 'total_epoch : {} , but update with the {} index'.format(self.total_epoch, idx) 246 | self.epoch_losses [idx, 0] = train_loss 247 | self.epoch_losses [idx, 1] = val_loss 248 | self.epoch_accuracy[idx, 0] = train_acc 249 | self.epoch_accuracy[idx, 1] = val_acc 250 | self.current_epoch = idx + 1 251 | return self.max_accuracy(False) == val_acc 252 | 253 | def max_accuracy(self, istrain): 254 | if self.current_epoch <= 0: return 0 255 | if istrain: return self.epoch_accuracy[:self.current_epoch, 0].max() 256 | else: return self.epoch_accuracy[:self.current_epoch, 1].max() 257 | 258 | def plot_curve(self, save_path): 259 | title = 'the accuracy/loss curve of train/val' 260 | dpi = 80 261 | width, height = 1200, 800 262 | legend_fontsize = 10 263 | scale_distance = 48.8 264 | figsize = width / float(dpi), height / float(dpi) 265 | 266 | fig = plt.figure(figsize=figsize) 267 | x_axis = np.array([i for i in range(self.total_epoch)]) # epochs 268 | y_axis = np.zeros(self.total_epoch) 269 | 270 | plt.xlim(0, self.total_epoch) 271 | plt.ylim(0, 100) 272 | interval_y = 5 273 | interval_x = 5 274 | plt.xticks(np.arange(0, self.total_epoch + interval_x, interval_x)) 275 | plt.yticks(np.arange(0, 100 + interval_y, interval_y)) 276 | plt.grid() 277 | plt.title(title, fontsize=20) 278 | plt.xlabel('the training epoch', fontsize=16) 279 | plt.ylabel('accuracy', fontsize=16) 280 | 281 | y_axis[:] = self.epoch_accuracy[:, 0] 282 | plt.plot(x_axis, y_axis, color='g', linestyle='-', label='train-accuracy', lw=2) 283 | plt.legend(loc=4, fontsize=legend_fontsize) 284 | 285 | y_axis[:] = self.epoch_accuracy[:, 1] 286 | plt.plot(x_axis, y_axis, color='y', linestyle='-', label='valid-accuracy', lw=2) 287 | plt.legend(loc=4, fontsize=legend_fontsize) 288 | 289 | 290 | y_axis[:] = self.epoch_losses[:, 0] 291 | plt.plot(x_axis, y_axis*50, color='g', linestyle=':', label='train-loss-x50', lw=2) 292 | plt.legend(loc=4, fontsize=legend_fontsize) 293 | 294 | y_axis[:] = self.epoch_losses[:, 1] 295 | plt.plot(x_axis, y_axis*50, color='y', linestyle=':', label='valid-loss-x50', lw=2) 296 | plt.legend(loc=4, fontsize=legend_fontsize) 297 | 298 | if save_path is not None: 299 | fig.savefig(save_path, dpi=dpi, bbox_inches='tight') 300 | print ('---- save figure {} into {}'.format(title, save_path)) 301 | plt.close(fig) 302 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os, sys, time, random 4 | import torch 5 | import json 6 | import numpy as np 7 | 8 | from config.config import RESULT_PATH, MODEL_PATH, PROJECT_PATH 9 | 10 | def get_model_path(dataset_name, network_arch, random_seed): 11 | if not os.path.isdir(MODEL_PATH): 12 | os.makedirs(MODEL_PATH) 13 | model_path = os.path.join(MODEL_PATH, "{}_{}_{}".format(dataset_name, network_arch, random_seed)) 14 | if not os.path.isdir(model_path): 15 | os.makedirs(model_path) 16 | return model_path 17 | 18 | def get_result_path(dataset_name, network_arch, random_seed, result_subfolder, postfix=''): 19 | if not os.path.isdir(RESULT_PATH): 20 | os.makedirs(RESULT_PATH) 21 | ISOTIMEFORMAT='%Y-%m-%d_%X' 22 | t_string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) 23 | result_path = os.path.join(RESULT_PATH, result_subfolder, "{}_{}_{}_{}{}".format(t_string, dataset_name, network_arch, random_seed, postfix)) 24 | os.makedirs(result_path) 25 | return result_path 26 | 27 | def time_string(): 28 | ISOTIMEFORMAT='%Y-%m-%d %X' 29 | string = '[{}]'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) 30 | return string 31 | 32 | def convert_secs2time(epoch_time): 33 | need_hour = int(epoch_time / 3600) 34 | need_mins = int((epoch_time - 3600*need_hour) / 60) 35 | need_secs = int(epoch_time - 3600*need_hour - 60*need_mins) 36 | return need_hour, need_mins, need_secs 37 | 38 | def time_file_str(): 39 | ISOTIMEFORMAT='%Y-%m-%d' 40 | string = '{}'.format(time.strftime( ISOTIMEFORMAT, time.gmtime(time.time()) )) 41 | return string + '-{}'.format(random.randint(1, 10000)) 42 | 43 | def print_log(print_string, log): 44 | print("{}".format(print_string)) 45 | log.write('{}\n'.format(print_string)) 46 | log.flush() 47 | 48 | def one_hot(class_labels, num_classes=None): 49 | if num_classes==None: 50 | return torch.zeros(len(class_labels), class_labels.max()+1).scatter_(1, class_labels.unsqueeze(1), 1.) 51 | else: 52 | return torch.zeros(len(class_labels), num_classes).scatter_(1, class_labels.unsqueeze(1), 1.) 53 | 54 | def softmax(x): 55 | """Compute softmax values for each sets of scores in x.""" 56 | e_x = np.exp(x - np.max(x)) 57 | return e_x / e_x.sum(axis=0) 58 | 59 | def get_imagenet_dicts(): 60 | # Imagenet class names 61 | idx2label = [] 62 | cls2label = {} 63 | with open(os.path.join(PROJECT_PATH, "dataset_utils/imagenet_class_index.json"), "r") as read_file: 64 | class_idx = json.load(read_file) 65 | idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] 66 | cls2label = {class_idx[str(k)][0]: class_idx[str(k)][1] for k in range(len(class_idx))} 67 | 68 | return idx2label, cls2label 69 | -------------------------------------------------------------------------------- /vgg19_sea_lion/checkpoint.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phibenz/uap_virtual_data.pytorch/13c0a25ff138738f347d7a562735dfbbc83d85c0/vgg19_sea_lion/checkpoint.pth.tar --------------------------------------------------------------------------------