├── dataset └── __init__.py ├── nets ├── STNet │ ├── __init__.py │ ├── grid_sample.py │ ├── affine_STN.py │ ├── tps_STN.py │ ├── AffineSTN.py │ ├── tps_grid_gen.py │ └── STNLocalizer.py ├── AdvPatch │ ├── __init__.py │ ├── advPatch.py │ ├── advPatch_model_builder.py │ ├── advPatch_util.py │ ├── collaborative_advPatch.py │ └── hybrid_advPatch.py ├── ColorNet │ ├── __init__.py │ ├── color_transformer.py │ └── PCT_transformation.py ├── backbone │ ├── __init__.py │ └── backbone_config.py ├── PatchTransformer │ ├── __init__.py │ ├── patch_blurring.py │ ├── patchTransformer_model_builder.py │ └── patch_transformer_net.py ├── LightingNet │ ├── __init__.py │ ├── cc_gen.py │ ├── cc_drn.py │ └── cc_f4.py └── EOTTransformer │ └── EOT_transformer.py ├── trainer └── __init__.py ├── detector ├── yolov2 │ ├── __init__.py │ ├── arial.ttf │ ├── coco.names │ ├── patch_config.py │ └── yolo.cfg ├── yolov3 │ ├── utils │ │ ├── __init__.py │ │ ├── augmentations.py │ │ ├── logger.py │ │ ├── parse_config.py │ │ └── datasets.py │ ├── data │ │ ├── custom │ │ │ ├── classes.names │ │ │ ├── train.txt │ │ │ ├── valid.txt │ │ │ ├── labels │ │ │ │ └── train.txt │ │ │ └── images │ │ │ │ └── train.jpg │ │ ├── samples │ │ │ ├── dog.jpg │ │ │ ├── eagle.jpg │ │ │ ├── field.jpg │ │ │ ├── messi.jpg │ │ │ ├── room.jpg │ │ │ ├── giraffe.jpg │ │ │ ├── person.jpg │ │ │ ├── street.jpg │ │ │ └── herd_of_horses.jpg │ │ ├── coco.names │ │ └── get_coco_dataset.sh │ ├── assets │ │ ├── dog.png │ │ ├── messi.png │ │ ├── giraffe.png │ │ └── traffic.png │ ├── config │ │ ├── custom.data │ │ ├── coco.data │ │ └── yolov3-tiny.cfg │ ├── requirements.txt │ ├── weights │ │ └── download_weights.sh │ ├── test.py │ ├── detect.py │ ├── README.md │ └── train.py ├── build_object_detector.py ├── object_detector.py ├── SSD_detector.py ├── yolov3_detector.py ├── yolo_util.py ├── yolov2_detector.py └── faster_rcnn_detector.py ├── .gitignore ├── requirement.txt ├── utils ├── arial.ttf ├── logger.py └── gaussian_blur.py ├── kaidi_color_model ├── weights2_0_1.npz ├── weights2__1_1.npz └── weights2_digital2new_0_1.npz ├── .gitmodules ├── configs ├── config_patchTransformer.yaml ├── config_advPatch_detectron2.yaml ├── config_advPatch.yaml └── config_collaborative_advPatch.yaml ├── losses ├── ohem_loss.py ├── mask_losses.py └── smooth_l1_loss.py ├── README.md ├── demo_detector.py ├── pytorch_msssim └── __init__.py ├── eval_advPath_oneperson.py └── opts.py /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/STNet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /detector/yolov2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/AdvPatch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/ColorNet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ -------------------------------------------------------------------------------- /detector/yolov3/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nets/PatchTransformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /detector/yolov3/data/custom/classes.names: -------------------------------------------------------------------------------- 1 | train 2 | -------------------------------------------------------------------------------- /detector/yolov3/data/custom/train.txt: -------------------------------------------------------------------------------- 1 | data/custom/images/train.jpg 2 | -------------------------------------------------------------------------------- /detector/yolov3/data/custom/valid.txt: -------------------------------------------------------------------------------- 1 | data/custom/images/train.jpg 2 | -------------------------------------------------------------------------------- /detector/yolov3/data/custom/labels/train.txt: -------------------------------------------------------------------------------- 1 | 0 0.515 0.5 0.21694873 0.18286777 2 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | yacs 4 | cython 5 | packaging 6 | easydict -------------------------------------------------------------------------------- /utils/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/utils/arial.ttf -------------------------------------------------------------------------------- /detector/yolov2/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov2/arial.ttf -------------------------------------------------------------------------------- /detector/yolov3/assets/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/dog.png -------------------------------------------------------------------------------- /detector/yolov3/assets/messi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/messi.png -------------------------------------------------------------------------------- /detector/yolov3/assets/giraffe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/giraffe.png -------------------------------------------------------------------------------- /detector/yolov3/assets/traffic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/traffic.png -------------------------------------------------------------------------------- /kaidi_color_model/weights2_0_1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2_0_1.npz -------------------------------------------------------------------------------- /kaidi_color_model/weights2__1_1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2__1_1.npz -------------------------------------------------------------------------------- /detector/yolov3/data/samples/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/dog.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/eagle.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/field.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/field.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/messi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/messi.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/room.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/room.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/giraffe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/giraffe.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/person.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/samples/street.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/street.jpg -------------------------------------------------------------------------------- /detector/yolov3/data/custom/images/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/custom/images/train.jpg -------------------------------------------------------------------------------- /detector/yolov3/config/custom.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train=data/custom/train.txt 3 | valid=data/custom/valid.txt 4 | names=data/custom/classes.names 5 | -------------------------------------------------------------------------------- /detector/yolov3/data/samples/herd_of_horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/herd_of_horses.jpg -------------------------------------------------------------------------------- /kaidi_color_model/weights2_digital2new_0_1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2_digital2new_0_1.npz -------------------------------------------------------------------------------- /detector/yolov3/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch>=1.0 3 | torchvision 4 | matplotlib 5 | tensorflow 6 | tensorboard 7 | terminaltables 8 | pillow 9 | tqdm 10 | -------------------------------------------------------------------------------- /detector/yolov3/config/coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | train=data/coco/trainvalno5k.txt 3 | valid=data/coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "detector/detectron2"] 2 | path = detector/detectron2 3 | url = https://github.com/facebookresearch/detectron2.git 4 | [submodule "detector/SSD"] 5 | path = detector/SSD 6 | url = https://github.com/lufficc/SSD 7 | -------------------------------------------------------------------------------- /detector/yolov3/utils/augmentations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | 6 | def horisontal_flip(images, targets): 7 | images = torch.flip(images, [-1]) 8 | targets[:, 2] = 1 - targets[:, 2] 9 | return images, targets 10 | -------------------------------------------------------------------------------- /detector/yolov3/weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download weights for vanilla YOLOv3 3 | wget -c https://pjreddie.com/media/files/yolov3.weights 4 | # # Download weights for tiny YOLOv3 5 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 6 | # Download weights for backbone network 7 | wget -c https://pjreddie.com/media/files/darknet53.conv.74 8 | -------------------------------------------------------------------------------- /nets/PatchTransformer/patch_blurring.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | class PatchBlurringModule(nn.Module): 7 | def __init__(self): 8 | super(PatchBlurringModule, self).__init__() 9 | self.blurring_factor = torch.nn.Parameter(torch.tensor([1.0])) 10 | 11 | def forward(self, x): 12 | blurring_factor = torch.clamp(self.blurring_factor, min=0.1, max=1.0) 13 | print (self.blurring_factor, blurring_factor) 14 | return F.interpolate(x, scale_factor= blurring_factor.item(), mode='bilinear', align_corners=False) 15 | -------------------------------------------------------------------------------- /nets/STNet/grid_sample.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # credit to https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py 3 | 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | def grid_sample(input, grid, canvas = None): 8 | output = F.grid_sample(input, grid) 9 | 10 | if canvas is None: 11 | return output 12 | else: 13 | input_mask = Variable(input.data.new(input.size()).fill_(1)) 14 | output_mask = F.grid_sample(input_mask, grid) 15 | padded_output = output * output_mask + canvas * (1 - output_mask) 16 | return padded_output 17 | -------------------------------------------------------------------------------- /detector/yolov3/utils/logger.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, log_dir): 6 | """Create a summary writer logging to log_dir.""" 7 | self.writer = tf.summary.FileWriter(log_dir) 8 | 9 | def scalar_summary(self, tag, value, step): 10 | """Log a scalar variable.""" 11 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 12 | self.writer.add_summary(summary, step) 13 | 14 | def list_of_scalars_summary(self, tag_value_pairs, step): 15 | """Log scalar variables.""" 16 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) 17 | self.writer.add_summary(summary, step) 18 | -------------------------------------------------------------------------------- /nets/LightingNet/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | import torch.nn as nn 4 | from fvcore.common.registry import Registry 5 | 6 | LIGHTINGNET_REGISTRY = Registry('LIGHTINGNET') 7 | LIGHTINGNET_REGISTRY.__doc__ = """ 8 | Registry for lightning model. 9 | The registered object will be called with `obj(config)`. 10 | The call should return a `torch.nn.Module` object. 11 | """ 12 | 13 | 14 | def lighting_net_builder(config: Dict[str, Any]) -> nn.Module: 15 | 16 | return LIGHTINGNET_REGISTRY.get(config['LightingCT'])(config) 17 | 18 | 19 | from .cc_drn import CCDRN 20 | from .cc_f4 import CC_FCN4 21 | from .fine_generator import Generator 22 | from .cc_gen import CCGenerator 23 | 24 | 25 | # TODO: support deprecated name 26 | LIGHTINGNET_REGISTRY._do_register('cc', CC_FCN4) 27 | LIGHTINGNET_REGISTRY._do_register('gen', Generator) 28 | -------------------------------------------------------------------------------- /detector/yolov2/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /detector/yolov3/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /detector/yolov3/data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 4 | 5 | # Clone COCO API 6 | git clone https://github.com/pdollar/coco 7 | cd coco 8 | 9 | mkdir images 10 | cd images 11 | 12 | # Download Images 13 | wget -c https://pjreddie.com/media/files/train2014.zip 14 | wget -c https://pjreddie.com/media/files/val2014.zip 15 | 16 | # Unzip 17 | unzip -q train2014.zip 18 | unzip -q val2014.zip 19 | 20 | cd .. 21 | 22 | # Download COCO Metadata 23 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 24 | wget -c https://pjreddie.com/media/files/coco/5k.part 25 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 26 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 27 | tar xzf labels.tgz 28 | unzip -q instances_train-val2014.zip 29 | 30 | # Set Up Image Lists 31 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 32 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 33 | -------------------------------------------------------------------------------- /detector/yolov3/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def parse_model_config(path): 4 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 5 | file = open(path, 'r') 6 | lines = file.read().split('\n') 7 | lines = [x for x in lines if x and not x.startswith('#')] 8 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 9 | module_defs = [] 10 | for line in lines: 11 | if line.startswith('['): # This marks the start of a new block 12 | module_defs.append({}) 13 | module_defs[-1]['type'] = line[1:-1].rstrip() 14 | if module_defs[-1]['type'] == 'convolutional': 15 | module_defs[-1]['batch_normalize'] = 0 16 | else: 17 | key, value = line.split("=") 18 | value = value.strip() 19 | module_defs[-1][key.rstrip()] = value.strip() 20 | 21 | return module_defs 22 | 23 | def parse_data_config(path): 24 | """Parses the data configuration file""" 25 | options = dict() 26 | options['gpus'] = '0,1,2,3' 27 | options['num_workers'] = '10' 28 | with open(path, 'r') as fp: 29 | lines = fp.readlines() 30 | for line in lines: 31 | line = line.strip() 32 | if line == '' or line.startswith('#'): 33 | continue 34 | key, value = line.split('=') 35 | options[key.strip()] = value.strip() 36 | return options 37 | -------------------------------------------------------------------------------- /nets/STNet/affine_STN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import torchvision.models as models 5 | from nets.STNet.STNLocalizer import AffineLocalizer 6 | 7 | def init_module(module): 8 | for m in module(): 9 | if isinstance(m, nn.Conv2d): 10 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 11 | if m.bias is not None: 12 | nn.init.zeros_(m.bias) 13 | elif isinstance(m, nn.BatchNorm2d): 14 | nn.init.ones_(m.weight) 15 | nn.init.zeros_(m.bias) 16 | elif isinstance(m, nn.Linear): 17 | nn.init.normal_(m.weight, 0, 0.01) 18 | if m.bias is not None: 19 | nn.init.zeros_(m.bias) 20 | 21 | 22 | class AffineSTNNet(nn.Module): 23 | def __init__(self, config): 24 | super(AffineSTNNet, self).__init__() 25 | self.localizer = AffineLocalizer(backbone=config['loc_backbone'], 26 | downsample_dim=config['loc_downsample_dim'], 27 | fc_dim=config['loc_fc_dim'], 28 | predict_dimension=config['adjust_patch_size']) 29 | 30 | # transform the template 31 | def forward(self, x, template): 32 | # transform the input 33 | theta, output_scale = self.localizer(x) 34 | grid = F.affine_grid(theta, template.size()) 35 | y = F.grid_sample(template, grid) 36 | 37 | return y, theta, output_scale 38 | -------------------------------------------------------------------------------- /detector/build_object_detector.py: -------------------------------------------------------------------------------- 1 | from .detector_info import * 2 | 3 | def build_object_detector(config): 4 | detector_impl = config['detector_impl'] 5 | detector_name = config['detector_name'] 6 | input_size = config['detector_input_size'] 7 | test_size = config['detector_test_size'] 8 | target_object_id = config['target_object_id'] 9 | object_dataset = config['object_dataset'] 10 | 11 | model_name = detector_name + '_' + object_dataset 12 | detector_info = DETECTOR_INFO[detector_impl][model_name] 13 | detector= detector_info['detector'] 14 | cfg_path = detector_info['cfg_path'] 15 | model_path = detector_info['model_path'] 16 | 17 | if test_size[0] < 0: 18 | test_size = detector_info['test_size'] 19 | 20 | if target_object_id < 0: 21 | target_object_id = detector_info['target_object_id'] 22 | 23 | print ('====== Object Detector Information ========') 24 | print ('Detector: %s: %s...' % (detector_impl, detector_name)) 25 | print ('CFG path: %s Model path: %s ' % (cfg_path, model_path)) 26 | print ('Input_size: (%d %d) test_size (%d %d)' % (input_size[0], input_size[1], test_size[0], test_size[1])) 27 | print ('Dataset: %s' % (object_dataset)) 28 | print ('Target object ID: %d\n' % (target_object_id)) 29 | 30 | return detector(model_name=model_name, 31 | cfg_path=cfg_path, 32 | model_path= model_path, 33 | class_names = OBJECT_CLASS_NAMES[object_dataset], 34 | input_size=input_size, 35 | test_size=test_size, 36 | target_object_id = target_object_id) 37 | -------------------------------------------------------------------------------- /nets/LightingNet/cc_gen.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | 5 | from . import LIGHTINGNET_REGISTRY 6 | from .fine_generator import FineGenerator 7 | 8 | @LIGHTINGNET_REGISTRY.register() 9 | class CCGenerator(nn.Module): 10 | 11 | def __init__(self, config=None): 12 | super().__init__() 13 | self.input_dim = config['input_dim'] 14 | self.cnum = config['ngf'] 15 | self.use_cuda = config['cuda'] 16 | self.device_ids = config['gpu_ids'] 17 | 18 | self.backbone = FineGenerator(self.input_dim, self.cnum, self.use_cuda, self.device_ids, output_dim=4) 19 | 20 | # self.pool = nn.Conv2d(3, 3, kernel_size=9, stride=8, bias=True) 21 | self.pool = nn.Conv2d(3, 3, kernel_size=11, stride=10, bias=True) 22 | 23 | # transform the template 24 | def forward(self, x): 25 | return self.forward_template(x) 26 | 27 | # the normalized output is NOT required as we need to learn the lighting condition 28 | # changes in the environment 29 | def forward_template(self, x): 30 | y = self.backbone(x, add_input_back=False) 31 | _, _, h, w = y.shape 32 | 33 | rgb = y[:, :3, :, :] 34 | rgb = F.normalize(rgb, p=2, dim=1) 35 | confidence = y[:, -1, :, :].view(-1, h * w) 36 | confidence = F.softmax(confidence, dim=1) 37 | confidence = confidence.view(-1, 1, h, w) 38 | 39 | rgb = rgb * confidence 40 | #rgb = F.relu(rgb) 41 | rgb = F.relu(self.pool(rgb)) 42 | 43 | return rgb 44 | 45 | def generate(self, src_img, frame_img): 46 | rgb = self.forward_template(frame_img) 47 | return src_img * rgb 48 | -------------------------------------------------------------------------------- /configs/config_patchTransformer.yaml: -------------------------------------------------------------------------------- 1 | # data parameters 2 | use_augmentation: True 3 | 4 | template_shape: [252, 150, 3] # H, W 5 | template_resize: False 6 | 7 | # geometric transformation --- STN parameters 8 | learnableSTN: False # learn STN or fix it 9 | STN_loss: L1Mask # L1, L2, SIMMMask, L1Mask 10 | STN: tps # affine or tps 11 | loc_backbone: resnet18 12 | loc_downsample_dim: 128 13 | loc_fc_dim: 256 14 | adjust_patch_size: False # learn to adjust the patch size for pasting 15 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn 16 | TPS_range: [0.999, 0.999] 17 | TPS_grid: [20, 10] 18 | 19 | # patch blurring module 20 | use_PBM: False 21 | 22 | # printer color transformation (PCT) 23 | use_PCT: False 24 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural 25 | PCT_loss: L1 26 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz' 27 | 28 | use_LCT: False 29 | LCT_loss: L1Mask 30 | LightingCT: gen #cc (color constancy, i.e. cc_fc4) or gen (image generator) 31 | lct_backbone: alexnet #alextnet, resnet18 32 | lct_input_size: [256, 256] 33 | #generator_input_size: [768, 768] 34 | #generator_input_size: [384, 384] 35 | generator_input_size: [256, 256] 36 | 37 | epochs: 1000 38 | batch_size: 72 39 | input_dim: 3 40 | ngf: 48 41 | image_shape: [256, 256, 3] 42 | 43 | # log 44 | model_checkpoint: checkpoint.pth.tar 45 | model_best: model_best.pth.tar 46 | 47 | # training parameters 48 | cuda: True 49 | gpu_ids: [0, 1, 2, 3, 4, 5] # set the GPU ids to use, e.g. [0] or [1, 2] 50 | num_workers: 24 51 | lr: 0.0001 52 | beta1: 0.5 53 | beta2: 0.9 54 | print_iter: 20 55 | 56 | # scheduler 57 | scheduler_patience: 25 58 | scheduler_factor: 0.5 59 | -------------------------------------------------------------------------------- /losses/ohem_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | ''' 5 | class NLL_OHEM(th.nn.NLLLoss): 6 | """ Online hard example mining. 7 | Needs input from nn.LogSotmax() """ 8 | 9 | def __init__(self, ratio): 10 | super(NLL_OHEM, self).__init__(None, True) 11 | self.ratio = ratio 12 | 13 | def forward(self, x, y, ratio=None): 14 | if ratio is not None: 15 | self.ratio = ratio 16 | num_inst = x.size(0) 17 | num_hns = int(self.ratio * num_inst) 18 | x_ = x.clone() 19 | inst_losses = th.autograd.Variable(th.zeros(num_inst)).cuda() 20 | for idx, label in enumerate(y.data): 21 | inst_losses[idx] = -x_.data[idx, label] 22 | # loss_incs = -x_.sum(1) 23 | _, idxs = inst_losses.topk(num_hns) 24 | x_hn = x.index_select(0, idxs) 25 | y_hn = y.index_select(0, idxs) 26 | return th.nn.functional.nll_loss(x_hn, y_hn) 27 | ''' 28 | 29 | class Adv_OHEM(nn.Module): 30 | """ Online hard example mining. 31 | Needs the max probability of the bboxes in each image """ 32 | 33 | def __init__(self, ratio): 34 | super(Adv_OHEM, self).__init__() 35 | self.ratio = ratio 36 | 37 | def forward(self, x, ratio=None): 38 | if ratio is not None: 39 | self.ratio = ratio 40 | num_inst = x.size(0) 41 | num_hns = int(self.ratio * num_inst) 42 | #x_ = x.clone() 43 | inst_losses = torch.autograd.Variable(torch.zeros(num_inst)).cuda() 44 | for idx, prob in enumerate(x.data): 45 | inst_losses[idx] = x.data[idx] 46 | # loss_incs = -x_.sum(1) 47 | 48 | _, idxs = inst_losses.topk(num_hns) 49 | x_hn = x.index_select(0, idxs) 50 | return torch.mean(x_hn) 51 | -------------------------------------------------------------------------------- /nets/LightingNet/cc_drn.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim 5 | 6 | from . import LIGHTINGNET_REGISTRY 7 | 8 | 9 | @LIGHTINGNET_REGISTRY.register() 10 | class CCDRN(nn.Module): 11 | 12 | def __init__(self, config=None): 13 | super().__init__() 14 | 15 | backbone_name = config['lct_backbone'] 16 | self.backbone = get_backbone(backbone_name) 17 | last_conv_dim = get_last_conv_dim(backbone_name) 18 | ori_output_size = 256 // 4 19 | 20 | self.projection = nn.Sequential( 21 | nn.Conv2d(last_conv_dim, 64, kernel_size=1, padding=0, bias=False), 22 | nn.BatchNorm2d(64), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(64, 4, kernel_size=1, padding=0, bias=False), 25 | nn.Dropout(0.5), 26 | nn.ReLU(inplace=True) 27 | ) 28 | 29 | self.pool = nn.Conv2d(3, 3, kernel_size=9, stride=ori_output_size // 8, bias=True) 30 | 31 | # transform the template 32 | def forward(self, x): 33 | return self.forward_template(x) 34 | 35 | # the normalized output is NOT required as we need to learn the lighting condition 36 | # changes in the environment 37 | def forward_template(self, x): 38 | y = self.backbone(x) 39 | _, _, h, w = y.shape 40 | 41 | rgb = y[:, :3, :, :] 42 | rgb = F.normalize(rgb, p=2, dim=1) 43 | confidence = y[:, -1, :, :].view(-1, h * w) 44 | confidence = F.softmax(confidence, dim=1) 45 | confidence = confidence.view(-1, 1, h, w) 46 | 47 | rgb = rgb * confidence 48 | rgb = F.relu(self.pool(rgb)) 49 | return rgb 50 | 51 | def generate(self, src_img, frame_img): 52 | rgb = self.forward_template(frame_img) 53 | return src_img * rgb 54 | -------------------------------------------------------------------------------- /nets/backbone/backbone_config.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torchvision.models as models 3 | from functools import partial 4 | 5 | try: 6 | from torch.hub import load_state_dict_from_url 7 | except ImportError: 8 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 9 | 10 | 11 | def dilated_resnet(depth, num_classes=1000, pretrained: bool = False): 12 | block = models.resnet.BasicBlock if depth < 50 else models.resnet.Bottleneck 13 | layers = { 14 | 18: [2, 2, 2, 2], 15 | 34: [3, 4, 6, 3], 16 | 50: [3, 4, 6, 3], 17 | 101: [3, 4, 23, 3], 18 | 152: [3, 8, 36, 3]}[depth] 19 | 20 | model = models.ResNet(block, layers, num_classes=num_classes, replace_stride_with_dilation=[True, True, True]) 21 | if pretrained: 22 | state_dict = load_state_dict_from_url(models.resnet.model_urls[f'resnet{depth}'], map_location='cpu') 23 | model.load_state_dict(state_dict) 24 | return model 25 | 26 | 27 | backbone_info = { 28 | 'resnet18': {'model': models.resnet18, 'last_conv_dim': 512}, 29 | 'resnet50': {'model': models.resnet50, 'last_conv_dim': 2048}, 30 | 'resnet101': {'model': models.resnet101, 'last_conv_dim': 2048}, 31 | 'alexnet': {'model': models.alexnet, 'last_conv_dim': 256}, 32 | 'vgg11_bn': {'model': models.vgg11_bn, 'last_conv_dim': 512}, 33 | 'vgg11': {'model': models.vgg11, 'last_conv_dim': 512}, 34 | 'vgg19_bn': {'model': models.vgg19_bn, 'last_conv_dim': 512}, 35 | 'vgg19': {'model': models.vgg19, 'last_conv_dim': 512}, 36 | 'dresnet18': {'model': partial(dilated_resnet, depth=18), 'last_conv_dim': 512}, 37 | 'dresnet50': {'model': partial(dilated_resnet, depth=50), 'last_conv_dim': 512} 38 | } 39 | 40 | 41 | def get_backbone(name: str, pretrained: bool = True) -> nn.Module: 42 | model = backbone_info[name]['model'](num_classes=1000, pretrained=pretrained) 43 | return nn.Sequential(*list(model.children())[0:-2]) 44 | 45 | 46 | def get_last_conv_dim(name: str) -> int: 47 | return backbone_info[name]['last_conv_dim'] 48 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import datetime 4 | import logging 5 | import shutil 6 | 7 | 8 | def date_uid(): 9 | """Generate a unique id based on date. 10 | 11 | Returns: 12 | str: Return uid string, e.g. '20171122171307111552'. 13 | 14 | """ 15 | return str(datetime.datetime.now()).replace('-', '') \ 16 | .replace(' ', '').replace(':', '').replace('.', '') 17 | 18 | 19 | def get_logger(checkpoint_path, filename, filemode='w'): 20 | """ 21 | Get the root logger 22 | :param checkpoint_path: only specify this when the first time call it 23 | :return: the root logger 24 | """ 25 | if filemode == 'w' and os.path.exists(os.path.join(checkpoint_path, filename)): 26 | print ("\n**************************************************", flush=True) 27 | print("Found old results, copying it to avoid for overwritten.", flush=True) 28 | target_path = checkpoint_path.rstrip('/') 29 | i = 0 30 | curr_target_path = target_path + f".{i}" 31 | while True: 32 | if os.path.exists(curr_target_path): 33 | i += 1 34 | curr_target_path = target_path + f".{i}" 35 | else: 36 | break 37 | print(f"Copying old log folder to {curr_target_path}", flush=True) 38 | print ("**************************************************\n", flush=True) 39 | shutil.copytree(checkpoint_path, curr_target_path) 40 | 41 | if checkpoint_path: 42 | logger = logging.getLogger() 43 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 44 | stream_hdlr = logging.StreamHandler(sys.stdout) 45 | # log_filename = date_uid() 46 | file_hdlr = logging.FileHandler(os.path.join(checkpoint_path, filename), mode=filemode) 47 | stream_hdlr.setFormatter(formatter) 48 | file_hdlr.setFormatter(formatter) 49 | logger.addHandler(stream_hdlr) 50 | logger.addHandler(file_hdlr) 51 | logger.setLevel(logging.INFO) 52 | else: 53 | logger = logging.getLogger() 54 | return logger 55 | -------------------------------------------------------------------------------- /nets/EOTTransformer/EOT_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class EOTTransformer(nn.Module): 7 | def __init__(self, contrast=(0.9, 1.1), brightness=(-0.1, 0.1), rotation=8.0, scale=(0.85, 1.15)): 8 | #def __init__(self, contrast=(0.8, 1.2), brightness=(-0.2, 0.2), rotation=8.0, scale=(1.0, 1.0)): 9 | super(EOTTransformer, self).__init__() 10 | self.contrast_min, self.contrast_max = contrast 11 | self.brightness_min, self.brightness_max = brightness 12 | self.rotation_min, self.rotation_max = -rotation, rotation 13 | self.scale_min, self.scale_max = scale 14 | #self.theta = nn.Parameter(torch.tensor([[1, 0, 0], [0, 1, 0]], dtype=torch.float)) 15 | #self.theta.cuda() 16 | 17 | #self.compose = transforms.Compose([transforms.ColorJitter(brightness, contrast), 18 | # transforms.RandomAffine(rotation, scale=scale, fillcolor=0.0)]) 19 | 20 | # x in range [0 1] 21 | def forward(self, x, do_rotate=True): 22 | num_batch= x.shape[0] 23 | contrast = torch.FloatTensor(num_batch, 1, 1, 1).uniform_(self.contrast_min, self.contrast_max).cuda() 24 | brightness = torch.FloatTensor(num_batch, 1, 1, 1).uniform_(-self.brightness_min, self.brightness_max).cuda() 25 | y = torch.clamp(x * contrast + brightness, 0, 1) 26 | 27 | # do affine transformation 28 | a = np.random.uniform(self.rotation_min, self.rotation_max, num_batch) / 180 * np.pi 29 | s = np.random.uniform(self.scale_min, self.scale_max, num_batch) 30 | 31 | t = np.stack ((np.cos(a)*s, -np.sin(a)*s, np.zeros(num_batch), np.sin(a)*s, np.cos(a)*s, np.zeros(num_batch)), axis=1) 32 | t = t.reshape(num_batch, 2, 3) 33 | # t = np.array([[np.cos(angle), -1.0*np.sin(angle), 0], [np.sin(angle), np.cos(angle), 0]], dtype=np.float)*scale 34 | # self.theta.data.copy_(torch.from_numpy(t)) 35 | #print (angle/np.pi*180, scale, t, self.theta) 36 | t = torch.tensor(t, dtype=torch.float).cuda() 37 | grid = F.affine_grid(t, y.size(), align_corners=False) 38 | y = F.grid_sample(y, grid) 39 | y = torch.clamp(y, 0, 1) 40 | 41 | return y 42 | -------------------------------------------------------------------------------- /nets/STNet/tps_STN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import itertools 6 | from nets.STNet.STNLocalizer import BoundedTPSLocalizer, UnBoundedTPSLocalizer 7 | from nets.STNet.tps_grid_gen import TPSGridGen 8 | from nets.STNet.grid_sample import grid_sample 9 | 10 | class TpsSTNNet(nn.Module): 11 | def __init__(self, config): 12 | super(TpsSTNNet, self).__init__() 13 | 14 | r1, r2 = config['TPS_range'] # height and width 15 | grid_height, grid_width = config['TPS_grid'] 16 | assert r1 < 1 and r2 < 1 # if >= 1, arctanh will cause error in BoundedGridLocNet 17 | target_control_points = torch.Tensor(list(itertools.product( 18 | np.arange(-r1, r1 + 0.00001, 2.0 * r1 / (grid_height - 1)), 19 | np.arange(-r2, r2 + 0.00001, 2.0 * r2 / (grid_width - 1)), 20 | ))) 21 | 22 | Y, X = target_control_points.split(1, dim=1) 23 | target_control_points = torch.cat([X, Y], dim=1) 24 | 25 | GridLocNet = { 26 | 'unbounded_stn': UnBoundedTPSLocalizer, 27 | 'bounded_stn': BoundedTPSLocalizer, 28 | }[config['TPS_localizer']] 29 | 30 | backbone = config['loc_backbone'] 31 | # img_height, img_width, _ = config['image_shape'] 32 | #img_height, img_width, _ = config['image_shape'] if config['template_resize'] else config['template_shape'] 33 | img_height, img_width, _ = config['template_shape'] 34 | downsample_dim = config['loc_downsample_dim'] 35 | fc_dim = config['loc_fc_dim'] 36 | adjust_patchDim = config['adjust_patch_size'] 37 | self.loc_net = GridLocNet(backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=adjust_patchDim) 38 | self.tps = TPSGridGen(img_height, img_width, target_control_points) 39 | 40 | # transform the template 41 | def forward(self, x, template): 42 | # transform the input 43 | batch_size = x.size(0) 44 | source_control_points, output_scale = self.loc_net(x) 45 | source_coordinate = self.tps(source_control_points) 46 | _, _, H, W = template.size() 47 | grid = source_coordinate.view(batch_size, H, W, 2) 48 | y = grid_sample(template, grid) 49 | 50 | return y, source_control_points, output_scale 51 | -------------------------------------------------------------------------------- /configs/config_advPatch_detectron2.yaml: -------------------------------------------------------------------------------- 1 | # data parameters 2 | use_augmentation: True 3 | use_ohem: False 4 | ohem_ratio: 0.5 5 | # similar to data augmentation 6 | use_EOT: False 7 | 8 | # patch related parameters 9 | adv_patch_size: [416, 416, 3] 10 | apply_border_mask: False 11 | border_mask_ratio: 0.05769 # 24/416 12 | border_value: 0.75 # white T-shirt 13 | tv_loss_weight: 2.5 14 | 15 | # Detector information 16 | detector_name: Detectron2 # YOLO_V2|YOLO_V3|Detectron2 17 | #detector_model_path: ./detector/yolov3 18 | detector_model_path: ./detector/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml # set to different yaml for different model 19 | detector_input_size: [1080, 1920] 20 | detector_scale_size: [416, 416] 21 | target_obj_id: 0 22 | train_nms_thresh: 0.8 23 | train_conf_thresh: 0.2999 24 | val_nms_thresh: 0.4 25 | val_conf_thresh: 0.7 26 | val_iou_threshold: 0.1 # 0.5 27 | 28 | template_shape: [252, 150, 3] # H, W print size 29 | template_resize: False # resize template and place it in the input image 30 | use_loc_net: True # use localization net 31 | 32 | 33 | # geometric transformation --- STN parameters 34 | STN: tps # affine or tps 35 | use_STN_loss: True 36 | #STN_loss_weight: 1.0 37 | loc_backbone: resnet18 38 | loc_downsample_dim: 128 39 | loc_fc_dim: 256 40 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn 41 | TPS_range: [0.999, 0.999] 42 | TPS_grid: [20, 10] 43 | 44 | # printer color transformation (PCT) 45 | color_transformation_path: 'kaidi_color_model/weights2_0_1.npz' 46 | PrinterCT: PCT # PCT, PCTLinear (linear), PCTNeural (non-linear) or None (no PCT applied) 47 | use_double_PCT: False 48 | 49 | # lighting color transformation 50 | use_LightingCT: False 51 | LightingCT: cc_fcn4 #cc_fcn4 or generator 52 | 53 | image_shape: [256, 256, 3] 54 | mask_shape: [128, 128] 55 | 56 | # log 57 | log_dir: 58 | log_file: log.log 59 | model_checkpoint: checkpoint.pth.tar 60 | model_best: model_best.pth.tar 61 | adv_patch_img: adv_patch.png 62 | adv_patch_img_best: best_adv_patch.png 63 | 64 | 65 | # training parameters 66 | cuda: True 67 | gpu_ids: [0, 1, 2, 3, 4, 5] # set the GPU ids to use, e.g. [0] or [1, 2] 68 | num_workers: 24 69 | compute_dsr: False 70 | visualize: False 71 | epochs: 1000 72 | batch_size: 72 73 | lr: 0.1 74 | beta1: 0.5 75 | beta2: 0.9 76 | print_iter: 20 77 | # scheduler 78 | scheduler_patience: 25 79 | scheduler_factor: 0.5 80 | -------------------------------------------------------------------------------- /nets/AdvPatch/advPatch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Training code for Adversarial patch training 3 | 4 | """ 5 | from torchvision import transforms 6 | import torch 7 | from torch import nn 8 | from PIL import Image 9 | from .advPatch_util import generate_patch, generate_border_mask 10 | 11 | class AdvPatch(nn.Module): 12 | def __init__(self, config): 13 | super(AdvPatch, self).__init__() 14 | self.adv_patch_size = tuple(config['adv_patch_size']) 15 | self.apply_border_mask = config['apply_border_mask'] 16 | print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size)) 17 | 18 | if self.apply_border_mask: 19 | self.border_value = config['border_value'] 20 | border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5) 21 | print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value)) 22 | self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size)) 23 | 24 | self.adv_patch = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2])) 25 | 26 | @property 27 | def patch_size(self): 28 | return self.adv_patch_size 29 | 30 | @property 31 | def border_size(self): 32 | return self.border_size if self.apply_border_mask else 0 33 | 34 | def learnable(self): 35 | return [self.adv_patch] 36 | 37 | def clip(self): 38 | self.adv_patch.data.clamp_(0, 1) # keep patch in image range 39 | 40 | def forward(self): 41 | if self.apply_border_mask: 42 | # note that nn.parameter cannot be assigned directly, so an internal change is needed 43 | self.adv_patch.data *= self.border_mask.data 44 | self.adv_patch.data += (1 - self.border_mask.data) * self.border_value 45 | 46 | return self.adv_patch 47 | 48 | def save_patch(self, patch_path): 49 | adv_patch = self.adv_patch.detach().cpu() 50 | im = transforms.ToPILImage('RGB')(adv_patch) 51 | im.save(patch_path) 52 | 53 | def load_patch(self, patch_path): 54 | patch_img = Image.open(patch_path).convert('RGB') 55 | w, h = patch_img.size 56 | adv_h, adv_w = self.adv_patch_size[:2] 57 | if w != adv_w or h != adv_h: 58 | patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img) 59 | 60 | self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img)) 61 | 62 | def create_advPatch_model(config): 63 | return AdvPatch(config) 64 | -------------------------------------------------------------------------------- /nets/STNet/AffineSTN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import torchvision.models as models 5 | 6 | 7 | def init_module(module): 8 | for m in module(): 9 | if isinstance(m, nn.Conv2d): 10 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 11 | if m.bias is not None: 12 | nn.init.zeros_(m.bias) 13 | elif isinstance(m, nn.BatchNorm2d): 14 | nn.init.ones_(m.weight) 15 | nn.init.zeros_(m.bias) 16 | elif isinstance(m, nn.Linear): 17 | nn.init.normal_(m.weight, 0, 0.01) 18 | if m.bias is not None: 19 | nn.init.zeros_(m.bias) 20 | 21 | 22 | class AffineSTNNet_depreciated(nn.Module): 23 | def __init__(self, backbone): 24 | super(AffineSTNNet_depreciated, self).__init__() 25 | if backbone == 'resnet18': 26 | resnet_model = models.resnet18(num_classes=10) 27 | elif backbone == 'resnet50': 28 | resnet_model = models.resnet50(num_classes=10) 29 | self.localizer = nn.Sequential(*list(resnet_model.children())[0:8]) 30 | 31 | self.last_conv_dim = 128 32 | self.down_sampler = nn.Sequential( 33 | nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0), 34 | nn.ReLU(True), 35 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 36 | 37 | # Regressor for the 3 * 2 affine matrix 38 | self.fc_loc = nn.Sequential( 39 | nn.Linear(128 * 4 * 4, 256), 40 | nn.ReLU(True), 41 | nn.Dropout(0.3), 42 | nn.Linear(256, 3 * 2) 43 | ) 44 | 45 | # weight initialization 46 | # init_module(self.down_sampler) 47 | # init_module(self.fc_loc) 48 | 49 | # Initialize the weights/bias with identity transformation 50 | self.fc_loc[-1].weight.data.zero_() 51 | self.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) 52 | 53 | # localization 54 | def localization(self, x): 55 | xs = self.localizer(x) 56 | # print (xs.shape) 57 | xs = self.down_sampler(xs) 58 | # print (xs.shape) 59 | xs = xs.view(-1, 128 * 4 * 4) 60 | theta = self.fc_loc(xs) 61 | theta = theta.view(-1, 2, 3) 62 | return theta 63 | 64 | # transform the template 65 | def forward(self, x, template): 66 | # transform the input 67 | theta = self.localization(x) 68 | grid = F.affine_grid(theta, template.size(), align_corners=False) 69 | try: 70 | y = F.grid_sample(template, grid, align_corners=False) 71 | except: 72 | y = F.grid_sample(template, grid) 73 | 74 | return y, theta 75 | -------------------------------------------------------------------------------- /detector/object_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class ObjectDetector(nn.Module): 5 | def __init__(self, detector_name, cfg_path, model_path, class_names=None, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1): 6 | super().__init__() 7 | self.name = detector_name 8 | self.model = self.load_model(cfg_path, model_path, class_names) 9 | self.class_names = class_names 10 | self.input_size = input_size 11 | self.test_size = test_size 12 | self.target_object_id = target_object_id 13 | 14 | def load_model(self, cfg_path, model_path, class_names=None): 15 | raise NotImplementedError('base class not implemented') 16 | 17 | def forward(self, x, *args, **kwargs): 18 | return self.detect(x, *args, **kwargs) 19 | 20 | # used for training/val 21 | def detect(self, images, conf_thresh=0.1, nms_thresh=0): 22 | raise NotImplementedError('base class not implemented') 23 | 24 | # used for test. Most times it is the same as 'detect', but in some cases such as YOLO_V2, it might be implemented differently from 'detect'. 25 | def detector_detect(self, images, conf_thresh=0.1, nms_thresh=0): 26 | raise NotImplementedError('base class not implemented') 27 | 28 | def detect_train(self, images, conf_thresh=0.1, nms_thresh=0): 29 | """Only use for FasterRCNN type of model since we attack of FasterRCNN at RPN not final output.""" 30 | return self.detect(images, conf_thresh, nms_thresh) 31 | 32 | @property 33 | def name(self): 34 | return self._name 35 | 36 | @property 37 | def model(self): 38 | return self._model 39 | 40 | @property 41 | def class_names(self): 42 | return self._class_names 43 | 44 | @property 45 | def input_size(self): 46 | return self._input_size 47 | 48 | @property 49 | def test_size(self): 50 | return self._test_size 51 | 52 | @property 53 | def target_object_id(self): 54 | return self._target_object_id 55 | 56 | @name.setter 57 | def name(self,val): 58 | self._name = val 59 | 60 | @model.setter 61 | def model(self,val): 62 | self._model = val 63 | 64 | @class_names.setter 65 | def class_names(self, val): 66 | self._class_names = val 67 | 68 | @input_size.setter 69 | def input_size(self, val): 70 | self._input_size = val 71 | 72 | @test_size.setter 73 | def test_size(self, val): 74 | self._test_size = val 75 | 76 | @target_object_id.setter 77 | def target_object_id(self, val): 78 | self._target_object_id = val 79 | 80 | def cuda(self, device): 81 | self.model.cuda(device) 82 | 83 | def eval(self): 84 | self.model.eval() 85 | 86 | def training(self): 87 | self.model.train() 88 | -------------------------------------------------------------------------------- /detector/SSD_detector.py: -------------------------------------------------------------------------------- 1 | from detector.SSD.ssd.modeling.detector import build_detection_model 2 | from detector.SSD.ssd.config import cfg 3 | from detector.SSD.ssd.utils.checkpoint import CheckPointer 4 | from detector.yolo_util import wrap_detection_results, nms 5 | 6 | from .object_detector import ObjectDetector 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | class SSD_Detector(ObjectDetector): 11 | def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1): 12 | # load SSD 13 | super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id) 14 | data_mean = cfg.INPUT.PIXEL_MEAN 15 | data_mean[0], data_mean[1], data_mean[2] = data_mean[2], data_mean[0], data_mean[1] 16 | self.mean = data_mean 17 | 18 | if test_size[0] != cfg.INPUT.IMAGE_SIZE or test_size[1] != cfg.INPUT.IMAGE_SIZE: 19 | raise Warning('Scale size (%d, %d) is different from the default (%d %d)!' \ 20 | % (test_size[0], test_size[1], cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE)) 21 | 22 | # skip background i.e. 0 23 | self.class_names = [name for k, name in enumerate(class_names) if k > 0] 24 | 25 | def load_model(self, cfg_path, model_path, class_names=None): 26 | cfg.merge_from_file(cfg_path) 27 | cfg.freeze() 28 | 29 | ssd_model = build_detection_model(cfg) 30 | checkpointer = CheckPointer(ssd_model, save_dir=cfg.OUTPUT_DIR) 31 | checkpointer.load(model_path, use_latest=False) 32 | ssd_model.eval() 33 | 34 | return ssd_model 35 | 36 | ''' 37 | def _gpu_normalize(self, x_batch): 38 | x_batch *= 255.0 39 | mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda() 40 | std = torch.tensor(self.std).view(1, len(self.std), 1, 1).cuda() 41 | return (x_batch - mean) / std 42 | ''' 43 | 44 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 45 | _, h, w, _ = images.shape 46 | if self.test_size[0] == w and self.test_size[1] == h: 47 | scaled_images = images 48 | else: 49 | scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False) 50 | 51 | scaled_images *= 255.0 52 | mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda(device=images.device) 53 | inputs = scaled_images - mean 54 | outputs = self.model(inputs) 55 | outputs = [torch.cat((o['boxes'], o['scores'].unsqueeze_(-1), o['labels'].unsqueeze_(-1).float()-1.0), dim=-1) for o in outputs] 56 | outputs = nms(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh) 57 | 58 | results = wrap_detection_results(outputs, self.test_size[0], self.input_size, skip=False) 59 | return results 60 | 61 | def detector_detect(self, img, conf_thresh, nms_thresh): 62 | with torch.no_grad(): 63 | output = self.detect(img, conf_thresh, nms_thresh) 64 | 65 | return output -------------------------------------------------------------------------------- /losses/mask_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from pytorch_msssim import ssim 5 | from utils.tools import tensor_to_grey 6 | from .smooth_l1_loss import SmoothL1Loss 7 | 8 | class MaskLoss(nn.Module): 9 | def __init__(self, loss_func): 10 | super().__init__() 11 | self.loss_func = loss_func 12 | 13 | ''' 14 | def forward(self, pred, label, mask_bb): 15 | # pred: reconstructed or transformed 2d images 16 | # label: ground-truth images 17 | l1_losses = [] 18 | for i, bbox in enumerate(mask_bb): 19 | b, l, h, w = bbox 20 | crop_pred = pred[i, :, b:b+h, l:l+w] 21 | crop_label = label[i, :, b:b+h, l:l+w] 22 | #crop_label = Variable(crop_label.data.cuda(),requires_grad=False) 23 | l1_losses.append(F.l1_loss(crop_pred, crop_label)) 24 | return torch.mean(torch.stack(l1_losses, dim=0)) 25 | ''' 26 | 27 | def forward(self, pred, label, mask_bb): 28 | # pred: reconstructed or transformed 2d images 29 | # label: ground-truth images 30 | n, _, h, w = pred.shape 31 | l1_losses = self.loss_func(pred, label) 32 | l1_losses = torch.mean(l1_losses.view(n, -1), dim=1) 33 | # mask_size = mask_bb[:,2:4].clone(device=mask_bb.device).detach() 34 | w_loss = torch.prod(mask_bb[:,2:4].float().detach(), 1) / (h * w) 35 | l1_losses /= w_loss 36 | l1_losses = torch.mean(l1_losses) 37 | return l1_losses 38 | 39 | class L1MaskLoss(MaskLoss): 40 | def __init__(self): 41 | super().__init__(nn.L1Loss(reduction='none')) 42 | 43 | class L2MaskLoss(MaskLoss): 44 | def __init__(self): 45 | super().__init__(nn.MSELoss(reduction='none')) 46 | 47 | class SmoothL1MaskLoss(MaskLoss): 48 | def __init__(self, beta=0.5): 49 | super().__init__(SmoothL1Loss(reduction='none', beta=beta)) 50 | 51 | class SIMMMaskLoss(nn.Module): 52 | def __init__(self, val_range=None): 53 | super(SIMMMaskLoss, self).__init__() 54 | self.val_range = val_range 55 | 56 | def forward(self, pred, label, mask_bb): 57 | # pred: reconstructed or transformed 2d images 58 | # label: ground-truth images 59 | ssim_losses = [] 60 | for i, bbox in enumerate(mask_bb): 61 | b, l, h, w = bbox 62 | #pred_grey = tensor_to_grey(pred[i, :, b:b+h, l:l+w]).view(1,1,h,w) 63 | #label_grey = tensor_to_grey(label[i, :, b:b+h, l:l+w]).view(1,1,h,w) 64 | pred_grey = pred[i, :, b:b+h, l:l+w] 65 | label_grey = label[i, :, b:b+h, l:l+w] 66 | pred_grey = pred_grey.view(1,-1,h,w) 67 | label_grey = label_grey.view(1,-1,h,w) 68 | # label_grey = Variable(label_grey.data.cuda(),requires_grad=False) 69 | 70 | #print (pred_grey.shape, label_grey.shape) 71 | ssim_val = 1.0 - ssim(pred_grey, label_grey, val_range=self.val_range) 72 | ssim_losses.append(ssim_val) 73 | #print (ssim_losses) 74 | return torch.mean(torch.stack(ssim_losses, dim=0)) 75 | -------------------------------------------------------------------------------- /nets/ColorNet/color_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim 5 | 6 | class PatternColorTransformer(nn.Module): 7 | def __init__(self, use_cuda, device_ids): 8 | super(PatternColorTransformer, self).__init__() 9 | self.use_cuda = use_cuda 10 | self.device_ids = device_ids 11 | self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3 12 | 13 | # transform the color 14 | def forward(self, x): 15 | # transform the input 16 | n, c, h, w = x.shape 17 | y = torch.matmul(self.color_mapping, x.view(n, c, -1)) 18 | y = torch.clamp(y, -1., 1.) 19 | return y.view(n, c, h, w) 20 | 21 | class ColorMapEstimator(nn.Module): 22 | def __init__(self, backbone, fc_dim=256, num_output=9): 23 | super(ColorMapEstimator, self).__init__() 24 | 25 | resnet_model = get_backbone(backbone)(num_classes=10) 26 | last_conv_dim = get_last_conv_dim(backbone) 27 | 28 | self.backbone = nn.Sequential(*list(resnet_model.children())[0:-2]) 29 | self.fc_dim = fc_dim 30 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 31 | 32 | # Regressor for the 3 * 2 affine matrix 33 | self.fc_loc = nn.Sequential( 34 | nn.Dropout(0.3), 35 | nn.Linear(last_conv_dim, self.fc_dim), 36 | nn.ReLU(True), 37 | nn.Linear(self.fc_dim, num_output) 38 | ) 39 | 40 | def forward(self, x): 41 | x = self.backbone(x) 42 | x = self.avgpool(x) 43 | theta = self.fc_loc(x.squeeze()) 44 | return theta 45 | 46 | 47 | class ColorMapNet(nn.Module): 48 | def __init__(self, backbone, downsample_dim, fc_dim): 49 | super(ColorMapNet, self).__init__() 50 | self.color_map = ColorMapEstimator(backbone, fc_dim=fc_dim, num_output=9) 51 | 52 | # initialization 53 | self.color_map.fc_loc[-1].weight.data.zero_() 54 | self.color_map.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=torch.float)) 55 | 56 | # localization 57 | def forward(self, x): 58 | x = self.color_map(x) 59 | x = F.relu(x) 60 | return x.view(-1, 3, 3) 61 | 62 | 63 | class LightingColorTransformer(nn.Module): 64 | def __init__(self, config): 65 | super(LightingColorTransformer, self).__init__() 66 | self.color_map = ColorMapNet(backbone=config['loc_backbone'], 67 | downsample_dim=config['loc_downsample_dim'], 68 | fc_dim=config['loc_fc_dim']) 69 | 70 | # transform the template 71 | def forward(self, x, template): 72 | # transform the input 73 | c_map = self.color_map(x) 74 | n, c, h, w = template.shape 75 | y = torch.matmul(c_map, template.view(n, c, -1)) 76 | y = torch.clamp(y, -1., 1.) 77 | y = y.view(n, c, h, w) 78 | return y, c_map 79 | 80 | def forward_template(self, x, template): 81 | y, _= self.forward(x, template) 82 | return y 83 | -------------------------------------------------------------------------------- /detector/yolov3/config/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | # 0 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # 1 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | # 2 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | # 3 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | # 4 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | # 5 63 | [maxpool] 64 | size=2 65 | stride=2 66 | 67 | # 6 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | # 7 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | # 8 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | # 9 91 | [maxpool] 92 | size=2 93 | stride=2 94 | 95 | # 10 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | # 11 105 | [maxpool] 106 | size=2 107 | stride=1 108 | 109 | # 12 110 | [convolutional] 111 | batch_normalize=1 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | ########### 119 | 120 | # 13 121 | [convolutional] 122 | batch_normalize=1 123 | filters=256 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | # 14 130 | [convolutional] 131 | batch_normalize=1 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | # 15 139 | [convolutional] 140 | size=1 141 | stride=1 142 | pad=1 143 | filters=255 144 | activation=linear 145 | 146 | 147 | 148 | # 16 149 | [yolo] 150 | mask = 3,4,5 151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 152 | classes=80 153 | num=6 154 | jitter=.3 155 | ignore_thresh = .7 156 | truth_thresh = 1 157 | random=1 158 | 159 | # 17 160 | [route] 161 | layers = -4 162 | 163 | # 18 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | # 19 173 | [upsample] 174 | stride=2 175 | 176 | # 20 177 | [route] 178 | layers = -1, 8 179 | 180 | # 21 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | # 22 190 | [convolutional] 191 | size=1 192 | stride=1 193 | pad=1 194 | filters=255 195 | activation=linear 196 | 197 | # 23 198 | [yolo] 199 | mask = 1,2,3 200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 201 | classes=80 202 | num=6 203 | jitter=.3 204 | ignore_thresh = .7 205 | truth_thresh = 1 206 | random=1 207 | -------------------------------------------------------------------------------- /detector/yolov3_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch.nn.functional as F 4 | import torch 5 | 6 | from detector.yolov3.models import Darknet 7 | from detector.yolov3.utils.utils import non_max_suppression 8 | from detector.object_detector import ObjectDetector 9 | from detector.yolo_util import nms, wrap_detection_results 10 | 11 | 12 | class YOLOV3_Detector(ObjectDetector): 13 | def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1): 14 | # load darknet 15 | super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id) 16 | 17 | # skip background i.e. 0 18 | self.class_names = [name for k, name in enumerate(class_names) if k > 0] 19 | 20 | def load_model(self, cfg_path, model_path, class_names=None): 21 | # Initiate model 22 | model = Darknet(cfg_path) 23 | model.load_darknet_weights(model_path) 24 | model = model.eval() 25 | return model 26 | 27 | ''' 28 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 29 | scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False) 30 | outputs = self.model(scaled_images) 31 | outputs = test_size_suppression(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh) 32 | 33 | #print ([item.shape for item in outputs]) 34 | new_outputs = [None for _ in outputs] 35 | for k in range(len(outputs)): 36 | if outputs[k] is not None: 37 | new_outputs[k] = resize_boxes(outputs[k], self.test_size[0], self.input_size) 38 | new_outputs[k] = new_outputs[k][:, [0,1,2,3,4,6]] 39 | else: 40 | #new_outputs[k] = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda() 41 | new_outputs[k] = [None] 42 | #print ('------', k, new_outputs[k].shape, new_outputs[k]) 43 | #print (k, outputs[k], new_outputs[k]) 44 | 45 | #print (new_outputs) 46 | return new_outputs 47 | ''' 48 | 49 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 50 | _, h, w, _ = images.shape 51 | if self.test_size[0] == w and self.test_size[1] == h: 52 | scaled_images = images 53 | else: 54 | scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False) 55 | 56 | outputs = self.model(scaled_images) 57 | outputs = non_max_suppression(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh) 58 | results = wrap_detection_results(outputs, self.test_size[0], self.input_size) 59 | return results 60 | 61 | def detector_detect(self, img, conf_thresh, nms_thresh): 62 | with torch.no_grad(): 63 | output = self.detect(img, conf_thresh, nms_thresh) 64 | 65 | return output 66 | 67 | ''' 68 | def resize_boxes(detection, test_size, input_size): 69 | h, w = input_size 70 | rh, rw = float(h)/test_size, float(w)/test_size 71 | detection[:,0] *= rw 72 | detection[:,1] *= rh 73 | detection[:,2] *= rw 74 | detection[:,3] *= rh 75 | 76 | return detection 77 | ''' 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # advPatch-pytorch 2 | code for generating adversarial patches 3 | 4 | # Required packages 5 | pip install pytorch-msssim 6 | 7 | 8 | # Installation 9 | 10 | Clone the project with the submodules. 11 | 12 | ```bash 13 | git clone --rescursive URL 14 | ``` 15 | 16 | This repo requires Python >= 3.6. 17 | To get dependent packages, you can install the required packages in `requirement.txt` via 18 | ```bash 19 | pip install -r requirement.txt 20 | ``` 21 | 22 | In order to use the object detectors from `SSD` or `Detectrons`, you will need to install them from the submodule 23 | 24 | To install SSD, more details can be found [here](https://github.com/lufficc/SSD#installation). 25 | 26 | ```bash 27 | cd REPO/detector/SSD 28 | pip install -e . 29 | ``` 30 | 31 | To install Detectron2, more details can be found [here](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md) 32 | 33 | ```bash 34 | cd REPO/detector/detectron2 35 | pip install -e . 36 | ``` 37 | 38 | 39 | 40 | 41 | #Usage 42 | 43 | 1 Learning STN with Generator 44 | 1) Training 45 | 46 | ```bash 47 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir STN-results --dataset neu_color \ 48 | --datadir ../../adv_data/neu_data --epochs 600 --STN tps --learnableSTN --use_LCT --LightingCT gen --batch_size 72` 49 | ``` 50 | 2) Evaluation 51 | 52 | ```bash 53 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir thinklab-STN-results --dataset neu_color \ 54 | --datadir ../../adv_data/neu_data --epochs 600 --STN tps --learnableSTN --use_LCT --LightingCT gen --batch_size 60 \ 55 | --patch_transformer_path thinklab-STN-results/PT_neu_color_STN_resnet18_ds128_fc256_tps_bounded20x10_gen_p256_L1Mask_bs60_e600/model_best.pth.tar \ 56 | --visualize --evaluate 57 | ``` 58 | 59 | --visualize: save intermediate results into a folder 'vis_output' under 'patch_transformer_path' 60 | --val_list_file: specify which subset to be evaluated: train or validation 61 | 62 | 2 Learning Printer Color (PCT) and Lighting Transformation (LCT) 63 | 1) Training 64 | 65 | ```bash 66 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir PatNet-results --dataset neu_color \ 67 | --datadir ../../adv_data/neu_data --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72 \ 68 | --pretrained STN-results/PT_neu_color_STN_resnet18_ds128_fc256_tps_bounded20x10_gen_bs72_e600/model_best.pth.tar 69 | ``` 70 | 3 Learning Adversarial Attack Model 71 | 1) Training 72 | 73 | ```bash 74 | python train_advPatch.py --config configs/config_advPatch.yaml --logdir AdvNet-results --dataset neu_color --datadir ../../adv_data/neu_data \ 75 | --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72 \ 76 | --patch_transformer_path PatNet-results/PatNet/PT_neu_color_fixedSTN_blur6_resnet18_ds128_fc256_tps_bounded20x10_PCTLinear_cc_alexnet_bs72_e600_pretrained_nopctloss_blur/model_best.pth.tar 77 | ``` 78 | 79 | 2) Evaluation 80 | 81 | ```bash 82 | python train_advPatch.py --config configs/config_advPatch.yaml --logdir AdvNet-results --dataset neu_color --datadir ../../adv_data/neu_data \ 83 | --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72 \ 84 | --patch_transformer_path PatNet-results/PatNet/PT_neu_color_fixedSTN_blur6_resnet18_ds128_fc256_tps_bounded20x10_PCTLinear_cc_alexnet_bs72_e600_pretrained_nopctloss_blur/model_best.pth.tar\ 85 | --evaluate 86 | ``` 87 | -------------------------------------------------------------------------------- /detector/yolov2/patch_config.py: -------------------------------------------------------------------------------- 1 | from torch import optim 2 | 3 | 4 | class BaseConfig(object): 5 | """ 6 | Default parameters for all config files. 7 | """ 8 | 9 | def __init__(self): 10 | """ 11 | Set the defaults. 12 | """ 13 | self.img_dir = "inria/Train/pos" 14 | self.lab_dir = "inria/Train/pos/yolo-labels" 15 | self.cfgfile = "model/darknet/yolo.cfg" 16 | self.weightfile = "model/darknet/yolo.weights" 17 | self.printfile = "non_printability/30values.txt" 18 | self.class_name = 'model/darknet/coco.names' 19 | self.patch_size = 300 20 | 21 | self.start_learning_rate = 0.0001 22 | 23 | self.patch_name = 'base' 24 | 25 | self.scheduler_factory = lambda x: optim.lr_scheduler.ReduceLROnPlateau(x, 'min', patience=50, factor=0.5) 26 | self.max_tv = 0 27 | 28 | self.batch_size = 20 29 | 30 | self.loss_target = lambda obj, cls: obj * cls 31 | 32 | 33 | class Experiment1(BaseConfig): 34 | """ 35 | Model that uses a maximum total variation, tv cannot go below this point. 36 | """ 37 | 38 | def __init__(self): 39 | """ 40 | Change stuff... 41 | """ 42 | super().__init__() 43 | 44 | self.patch_name = 'Experiment1' 45 | self.max_tv = 0.165 46 | 47 | 48 | class Experiment2HighRes(Experiment1): 49 | """ 50 | Higher res 51 | """ 52 | 53 | def __init__(self): 54 | """ 55 | Change stuff... 56 | """ 57 | super().__init__() 58 | 59 | self.max_tv = 0.165 60 | self.patch_size = 400 61 | self.patch_name = 'Exp2HighRes' 62 | 63 | class Experiment3LowRes(Experiment1): 64 | """ 65 | Lower res 66 | """ 67 | 68 | def __init__(self): 69 | """ 70 | Change stuff... 71 | """ 72 | super().__init__() 73 | 74 | self.max_tv = 0.165 75 | self.patch_size = 100 76 | self.patch_name = "Exp3LowRes" 77 | 78 | class Experiment4ClassOnly(Experiment1): 79 | """ 80 | Only minimise class score. 81 | """ 82 | 83 | def __init__(self): 84 | """ 85 | Change stuff... 86 | """ 87 | super().__init__() 88 | 89 | self.patch_name = 'Experiment4ClassOnly' 90 | self.loss_target = lambda obj, cls: cls 91 | 92 | 93 | 94 | 95 | class Experiment1Desktop(Experiment1): 96 | """ 97 | """ 98 | 99 | def __init__(self): 100 | """ 101 | Change batch size. 102 | """ 103 | super().__init__() 104 | 105 | self.batch_size = 8 106 | self.patch_size = 400 107 | 108 | 109 | class ReproducePaperObj(BaseConfig): 110 | """ 111 | Reproduce the results from the paper: Generate a patch that minimises object score. 112 | """ 113 | 114 | def __init__(self): 115 | super().__init__() 116 | 117 | self.batch_size = 12 118 | self.patch_size = 416 119 | 120 | self.patch_name = 'ObjectOnlyPaper' 121 | self.max_tv = 0.165 122 | 123 | self.loss_target = lambda obj, cls: obj 124 | 125 | 126 | patch_configs = { 127 | "base": BaseConfig, 128 | "exp1": Experiment1, 129 | "exp1_des": Experiment1Desktop, 130 | "exp2_high_res": Experiment2HighRes, 131 | "exp3_low_res": Experiment3LowRes, 132 | "exp4_class_only": Experiment4ClassOnly, 133 | "paper_obj": ReproducePaperObj 134 | } 135 | -------------------------------------------------------------------------------- /configs/config_advPatch.yaml: -------------------------------------------------------------------------------- 1 | # data parameters 2 | use_augmentation: True 3 | use_ohem: False 4 | ohem_ratio: 0.5 5 | # similar to data augmentation 6 | use_EOT: False 7 | 8 | # patch related parameters 9 | #adv_patch_size: [416, 416, 3] 10 | adv_patch_size: [252, 150, 3] 11 | apply_border_mask: False 12 | border_mask_ratio: 0.05769 # 24/416 13 | border_value: 0.75 # white T-shirt 14 | tv_loss_weight: 2.5 15 | 16 | # Detector information 17 | #YOLO: YOLO_V2, YOLO_V3 18 | #SSD: SSD300_VGG16, SSD512_VGG16 19 | #Faster_RCNN: Faster_RCNN_VGG16, Faster_RCNN_R50, Faster_RCNN_R101 20 | #DETECTRON2: DFaster_RCNN_R50, DFaster_RCNN_R101, DRCNN_FPN_R50, DRCNN_FPN_R101, DRetinaNet_R50, DRetinaNet_R101 21 | 22 | detector_impl: YOLO 23 | detector_name: YOLO_V2 24 | detector_input_size: [416, 416] 25 | #detector_input_size: [1080, 1920] 26 | #detector_input_size: [540, 960] 27 | detector_test_size: [-1,-1] 28 | object_dataset: COCO # COCO or VOC 29 | target_object_id: -1 30 | train_nms_thresh: 0.8 31 | train_conf_thresh: 0.2999 32 | val_nms_thresh: 0.4 33 | val_conf_thresh: 0.7 34 | val_iou_threshold: 0.1 # 0.5 35 | 36 | template_shape: [252, 150, 3] # H, W 37 | template_resize: False 38 | #template_scaling_factor: -1 # how much blurring to apply on the template 39 | 40 | # geometric transformation --- STN parameters 41 | learnableSTN: False # learn STN or fix it 42 | STN_loss: L1Mask # L1, L2, SIMMMask, L1Mask 43 | STN: tps # affine or tps 44 | loc_backbone: resnet18 45 | loc_downsample_dim: 128 46 | loc_fc_dim: 256 47 | adjust_patch_size: False # learn to adjust the patch size for pasting 48 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn 49 | TPS_range: [0.999, 0.999] 50 | TPS_grid: [20, 10] 51 | 52 | # printer color transformation (PCT) 53 | use_PCT: False 54 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural 55 | PCT_loss: L1 56 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz' 57 | 58 | use_LCT: False 59 | LCT_loss: L1Mask 60 | LightingCT: gen #cc (color constancy, i.e. cc_fc4) or gen (image generator) 61 | lct_backbone: alexnet #alextnet, resnet18 62 | lct_input_size: [256, 256] 63 | #generator_input_size: [1024, 1024] 64 | #generator_input_size: [512, 512] 65 | generator_input_size: [384, 384] 66 | #generator_input_size: [256, 256] 67 | #generator_input_size: [288, 288] 68 | #generator_input_size: [320, 320] 69 | #generator_input_size: [352, 352] 70 | 71 | patch_size_median: 0.2519 # i.e. (150-50+1) / (450-50+1) 72 | #patch_size_range: [60, 400] #[min_height, max_height] 73 | patch_size_range: [50, 450] #[min_height, max_height] 74 | 75 | #collaborative_learning: False 76 | #patch_size_median: 0.17 # i.e. (100-60+1) / (300-60+1) 77 | #kd_type: margin #margin (our proposed) |mutual (deep mutual Learning) |one (online knowledge disttillation) 78 | #kd_norm: 2 # 1: L1 2: L2 79 | #patch_size_range: [60, 300] #[min_height, max_height] 80 | #CL_pretrained: False 81 | #near_patch_path: 82 | #far_patch_path: 83 | 84 | #image_shape: [256, 256, 3] 85 | #mask_shape: [128, 128] 86 | 87 | # log 88 | log_dir: 89 | log_file: log.log 90 | model_checkpoint: checkpoint.pth.tar 91 | model_best: model_best.pth.tar 92 | adv_patch_img: adv_patch.png 93 | adv_patch_img_best: best_adv_patch.png 94 | 95 | 96 | # training parameters 97 | cuda: True 98 | gpu_ids: [0,1,2,3,4,5] # set the GPU ids to use, e.g. [0] or [1, 2] 99 | num_workers: 24 100 | compute_dsr: False 101 | visualize: False 102 | epochs: 1000 103 | batch_size: 72 104 | lr: 0.1 105 | beta1: 0.5 106 | beta2: 0.9 107 | print_iter: 20 108 | # scheduler 109 | scheduler_patience: 25 110 | scheduler_factor: 0.5 111 | -------------------------------------------------------------------------------- /nets/STNet/tps_grid_gen.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # credit to https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py 3 | 4 | import torch 5 | import itertools 6 | import torch.nn as nn 7 | from torch.autograd import Function, Variable 8 | 9 | # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2 10 | def compute_partial_repr(input_points, control_points): 11 | N = input_points.size(0) 12 | M = control_points.size(0) 13 | pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2) 14 | # original implementation, very slow 15 | # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance 16 | pairwise_diff_square = pairwise_diff * pairwise_diff 17 | pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1] 18 | repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist) 19 | # fix numerical error for 0 * log(0), substitute all nan with 0 20 | mask = repr_matrix != repr_matrix 21 | repr_matrix.masked_fill_(mask, 0) 22 | return repr_matrix 23 | 24 | class TPSGridGen(nn.Module): 25 | 26 | def __init__(self, target_height, target_width, target_control_points): 27 | super(TPSGridGen, self).__init__() 28 | assert target_control_points.ndimension() == 2 29 | assert target_control_points.size(1) == 2 30 | N = target_control_points.size(0) 31 | self.num_points = N 32 | target_control_points = target_control_points.float() 33 | 34 | # create padded kernel matrix 35 | forward_kernel = torch.zeros(N + 3, N + 3) 36 | target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points) 37 | forward_kernel[:N, :N].copy_(target_control_partial_repr) 38 | forward_kernel[:N, -3].fill_(1) 39 | forward_kernel[-3, :N].fill_(1) 40 | forward_kernel[:N, -2:].copy_(target_control_points) 41 | forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1)) 42 | # compute inverse matrix 43 | inverse_kernel = torch.inverse(forward_kernel) 44 | 45 | # create target cordinate matrix 46 | HW = target_height * target_width 47 | target_coordinate = list(itertools.product(range(target_height), range(target_width))) 48 | target_coordinate = torch.Tensor(target_coordinate) # HW x 2 49 | Y, X = target_coordinate.split(1, dim = 1) 50 | Y = Y * 2 / (target_height - 1) - 1 51 | X = X * 2 / (target_width - 1) - 1 52 | target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y) 53 | target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points) 54 | target_coordinate_repr = torch.cat([ 55 | target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate 56 | ], dim = 1) 57 | 58 | # register precomputed matrices 59 | self.register_buffer('inverse_kernel', inverse_kernel) 60 | self.register_buffer('padding_matrix', torch.zeros(3, 2)) 61 | self.register_buffer('target_coordinate_repr', target_coordinate_repr) 62 | 63 | def forward(self, source_control_points): 64 | assert source_control_points.ndimension() == 3 65 | assert source_control_points.size(1) == self.num_points 66 | assert source_control_points.size(2) == 2 67 | batch_size = source_control_points.size(0) 68 | 69 | Y = torch.cat([source_control_points, Variable(self.padding_matrix.expand(batch_size, 3, 2))], 1) 70 | mapping_matrix = torch.matmul(Variable(self.inverse_kernel), Y) 71 | source_coordinate = torch.matmul(Variable(self.target_coordinate_repr), mapping_matrix) 72 | return source_coordinate -------------------------------------------------------------------------------- /configs/config_collaborative_advPatch.yaml: -------------------------------------------------------------------------------- 1 | # data parameters 2 | use_augmentation: True 3 | use_ohem: False 4 | ohem_ratio: 0.5 5 | # similar to data augmentation 6 | use_EOT: False 7 | 8 | # patch related parameters 9 | #adv_patch_size: [416, 416, 3] 10 | adv_patch_size: [252, 150, 3] 11 | apply_border_mask: False 12 | border_mask_ratio: 0.05769 # 24/416 13 | border_value: 0.75 # white T-shirt 14 | tv_loss_weight: 2.5 15 | 16 | # Detector information 17 | #YOLO: YOLO_V2, YOLO_V3 18 | #SSD: SSD300_VGG16, SSD512_VGG16 19 | #Faster_RCNN: Faster_RCNN_VGG16, Faster_RCNN_R50, Faster_RCNN_R101 20 | #DETECTRON2: DFaster_RCNN_R50, DFaster_RCNN_R101, DRCNN_FPN_R50, DRCNN_FPN_R101, DRetinaNet_R50, DRetinaNet_R101 21 | 22 | detector_impl: Faster_RCNN 23 | detector_name: Faster_RCNN_VGG16 24 | detector_input_size: [1080, 1920] 25 | #detector_input_size: [540, 960] 26 | #detector_input_size: [416, 416] 27 | detector_test_size: [-1,-1] 28 | object_dataset: COCO # COCO or VOC 29 | target_object_id: -1 30 | train_nms_thresh: 0.8 31 | train_conf_thresh: 0.2999 32 | val_nms_thresh: 0.4 33 | val_conf_thresh: 0.7 34 | val_iou_threshold: 0.1 # 0.5 35 | 36 | template_shape: [252, 150, 3] # H, W 37 | #template_scaling_factor: -1 # how much blurring to apply on the template 38 | 39 | # geometric transformation --- STN parameters 40 | learnableSTN: False # learn STN or fix it 41 | STN_loss: SIMMMask # L1, L2, SIMMMask, L1Mask 42 | STN: tps # affine or tps 43 | loc_backbone: resnet18 44 | loc_downsample_dim: 128 45 | loc_fc_dim: 256 46 | adjust_patch_size: False # learn to adjust the patch size for pasting 47 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn 48 | TPS_range: [0.999, 0.999] 49 | TPS_grid: [20, 10] 50 | 51 | # printer color transformation (PCT) 52 | use_PCT: False 53 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural 54 | PCT_loss: L1 55 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz' 56 | 57 | use_LCT: False 58 | LCT_loss: L1 59 | LightingCT: gen #cc (color constancy, i.e. cc_fc4) or gen (image generator) 60 | lct_backbone: alexnet #alextnet, resnet18 61 | lct_input_size: [256, 256] 62 | #generator_input_size: [512, 512] 63 | generator_input_size: [384, 384] 64 | #generator_input_size: [256, 256] 65 | #generator_input_size: [272, 272] 66 | 67 | collaborative_learning: False 68 | CL_pretrained: False 69 | collaborative_weight: False 70 | 71 | half_patches: False 72 | kd_type: MSE #margin , MSE , L1, SmoothL1 or mutual, one, SmoothL1Mask 73 | kd_norm: 2 # 1: L1 2: L2 74 | 75 | # for non-colloaborative_learning 76 | near_patch_path: test_collaborative/YOLO_V2_COCO_adv252_neu_near_tps_PCTLinear_cc_alexnet_p256_L1Mask_tv25_bs72_e500/best_adv_patch.png 77 | far_patch_path: test_collaborative/YOLO_V2_COCO_adv252_neu_far_tps_PCTLinear_cc_alexnet_p256_L1Mask_tv25_bs72_e500/best_adv_patch.png 78 | 79 | # advT_data 80 | patch_size_median: 0.2519 # i.e. (150-50+1) / (450-50+1) 81 | #patch_size_range: [60, 400] #[min_height, max_height] 82 | patch_size_range: [50, 450] #[min_height, max_height] 83 | 84 | #image_shape: [256, 256, 3] 85 | #mask_shape: [128, 128] 86 | 87 | # log 88 | log_dir: 89 | log_file: log.log 90 | model_checkpoint: checkpoint.pth.tar 91 | model_best: model_best.pth.tar 92 | adv_patch_img: adv_patch.png 93 | adv_patch_img_best: best_adv_patch.png 94 | 95 | 96 | # training parameters 97 | cuda: True 98 | gpu_ids: [0,1,2,3,4,5] # set the GPU ids to use, e.g. [0] or [1, 2] 99 | num_workers: 24 100 | compute_dsr: False 101 | visualize: False 102 | epochs: 1000 103 | batch_size: 72 104 | lr: 0.1 105 | beta1: 0.5 106 | beta2: 0.9 107 | print_iter: 20 108 | # scheduler 109 | scheduler_patience: 25 110 | scheduler_factor: 0.5 111 | -------------------------------------------------------------------------------- /nets/AdvPatch/advPatch_model_builder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from PIL import Image 4 | from nets.AdvPatch.advPatch_net import AdvPatchNet 5 | from nets.AdvPatch.collaborative_advPatch_net import CollaborativeAdvPatchNet 6 | from nets.AdvPatch.hybrid_advPatch import HybridAdvPatch 7 | from utils.tools import normalize 8 | import os 9 | 10 | def get_adv_model_path(config): 11 | model_name = config['detector_name'] 12 | model_name += '_' + config['object_dataset'] 13 | if config['collaborative_learning'] is True: 14 | model_name += '_CL' 15 | if config['half_patches'] is True: 16 | model_name += 'half' 17 | if config['collaborative_weight']: 18 | model_name += '_weighted' 19 | if config['CL_pretrained'] is True: 20 | model_name += '_pretrained' 21 | model_name += '_adv%d' % (config['adv_patch_size'][0]) 22 | 23 | model_name += '_' + config['dataset'] 24 | # if config['use_augmentation']: 25 | # model_name += '_aug' 26 | if config['use_ohem']: 27 | model_name += '_ohem%d' % (int(config['ohem_ratio']*100)) 28 | # model_name += '_ohem' 29 | if config['apply_border_mask']: 30 | model_name += '_border' 31 | 32 | #model_name += '_STN' if config['learnableSTN'] else '_fixedSTN' 33 | 34 | # model_name += '_p%d' % (config['person_crop_size'][0]) 35 | #if config['template_scaling_factor'] > 0: 36 | # model_name += '_blur%d' % (config['template_scaling_factor']) 37 | #model_name += '_' + config['loc_backbone'] 38 | #model_name += '_ds%d'% (config['loc_downsample_dim']) 39 | #model_name += '_fc%d'% (config['loc_fc_dim']) 40 | model_name += '_' + config['STN'] 41 | #if config['STN'] == 'tps': 42 | # if config['TPS_localizer'] == 'bounded_stn': 43 | # model_name += '_bounded' 44 | # else: 45 | # model_name += '_unbounded' 46 | # model_name += '%dx%d'% (config['TPS_grid'][0], config['TPS_grid'][1]) 47 | 48 | if config['use_PCT']: 49 | model_name += '_' + config['PrinterCT'] 50 | 51 | if config['use_LCT']: 52 | model_name += '_' + config['LightingCT'] 53 | if config['LightingCT'] == 'cc': 54 | model_name += '_' + config['lct_backbone'] 55 | model_name += '_p%d' % (config['lct_input_size'][0]) 56 | 57 | if config['use_LCT']: 58 | model_name += '_' + config['LCT_loss'] 59 | elif config['use_PCT']: 60 | model_name += '_' + config['PCT_loss'] 61 | 62 | if config['use_EOT']: 63 | model_name += '_EOT' 64 | 65 | # model_name += '_%s_loss' % (config['MaxProbExtractor_loss']) 66 | model_name += '_tv%d' % (int(config['tv_loss_weight']*10)) 67 | model_name += '_bs%d' % (config['batch_size']) 68 | model_name += '_e%d' % (config['epochs']) 69 | 70 | return model_name 71 | 72 | def build_advPatch_model(config): 73 | return AdvPatchNet(config) if not config['collaborative_learning'] else \ 74 | CollaborativeAdvPatchNet(config) 75 | 76 | def build_advPatch_model_from_checkpoint_file(model, model_path): 77 | print ('loading advPatch model: %s' % (model_path)) 78 | checkpoint = torch.load(model_path, map_location='cpu') 79 | y1 = torch.sigmoid(checkpoint['adv_patch']['blending']) 80 | import torchvision.transforms as transforms 81 | img = transforms.ToPILImage()(y1) 82 | img.save('blend_mask.png') 83 | print (y1) 84 | 85 | model.adv_patch_model.load_state_dict(checkpoint['adv_patch']) 86 | return model, checkpoint['epoch'], checkpoint['iteration'], checkpoint['lr'], checkpoint['best_error'] 87 | 88 | def build_advPatch_model_from_image_file(model, model_path): 89 | model.adv_patch_model.load_patch(model_path) 90 | return model 91 | 92 | #def set_gradient_false(model): 93 | # for p in model.parameters(): 94 | # p.requires_grad = False 95 | -------------------------------------------------------------------------------- /detector/yolov2/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /utils/gaussian_blur.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.functional import conv2d 6 | 7 | 8 | def gaussian(window_size, sigma): 9 | def gauss_fcn(x): 10 | return -(x - window_size // 2)**2 / float(2 * sigma**2) 11 | gauss = torch.stack( 12 | [torch.exp(torch.tensor(gauss_fcn(x))) for x in range(window_size)]) 13 | return gauss / gauss.sum() 14 | 15 | 16 | def get_gaussian_kernel(ksize, sigma): 17 | window_1d = gaussian(ksize, sigma) 18 | return window_1d 19 | 20 | 21 | def get_gaussian_kernel2d(ksize, sigma): 22 | ksize_x, ksize_y = ksize 23 | sigma_x, sigma_y = sigma 24 | kernel_x = get_gaussian_kernel(ksize_x, sigma_x) 25 | kernel_y = get_gaussian_kernel(ksize_y, sigma_y) 26 | kernel_2d = torch.matmul( 27 | kernel_x.unsqueeze(-1), kernel_y.unsqueeze(-1).t()) 28 | return kernel_2d 29 | 30 | class GaussianBlur(nn.Module): 31 | r"""Creates an operator that blurs a tensor using a Gaussian filter. 32 | 33 | The operator smooths the given tensor with a gaussian kernel by convolving 34 | it to each channel. It suports batched operation. 35 | 36 | Arguments: 37 | kernel_size (Tuple[int, int]): the size of the kernel. 38 | sigma (Tuple[float, float]): the standard deviation of the kernel. 39 | 40 | Returns: 41 | Tensor: the blurred tensor. 42 | 43 | Shape: 44 | - Input: :math:`(B, C, H, W)` 45 | - Output: :math:`(B, C, H, W)` 46 | 47 | Examples:: 48 | 49 | >>> input = torch.rand(2, 4, 5, 5) 50 | >>> gauss = tgm.image.GaussianBlur((3, 3), (1.5, 1.5)) 51 | >>> output = gauss(input) # 2x4x5x5 52 | """ 53 | 54 | def __init__(self, kernel_size, sigma): 55 | super(GaussianBlur, self).__init__() 56 | self.kernel_size = kernel_size 57 | self.sigma = sigma 58 | self._padding = self.compute_zero_padding(kernel_size) 59 | self.kernel = self.create_gaussian_kernel(kernel_size, sigma) 60 | 61 | @staticmethod 62 | def create_gaussian_kernel(kernel_size, sigma): 63 | """Returns a 2D Gaussian kernel array.""" 64 | kernel = get_gaussian_kernel2d(kernel_size, sigma) 65 | return kernel 66 | 67 | @staticmethod 68 | def compute_zero_padding(kernel_size): 69 | """Computes zero padding tuple.""" 70 | computed = [(k - 1) // 2 for k in kernel_size] 71 | return computed[0], computed[1] 72 | 73 | def forward(self, x): 74 | if not torch.is_tensor(x): 75 | raise TypeError("Input x type is not a torch.Tensor. Got {}" 76 | .format(type(x))) 77 | if not len(x.shape) == 4: 78 | raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" 79 | .format(x.shape)) 80 | # prepare kernel 81 | b, c, h, w = x.shape 82 | tmp_kernel = self.kernel.to(x.device).to(x.dtype) 83 | kernel = tmp_kernel.repeat(c, 1, 1, 1) 84 | 85 | # convolve tensor with gaussian kernel 86 | return conv2d(x, kernel, padding=self._padding, stride=1, groups=c) 87 | 88 | 89 | 90 | ###################### 91 | # functional interface 92 | ###################### 93 | 94 | 95 | def gaussian_blur(src, kernel_size, sigma): 96 | r"""Function that blurs a tensor using a Gaussian filter. 97 | 98 | The operator smooths the given tensor with a gaussian kernel by convolving 99 | it to each channel. It suports batched operation. 100 | 101 | Arguments: 102 | src (Tensor): the input tensor. 103 | kernel_size (Tuple[int, int]): the size of the kernel. 104 | sigma (Tuple[float, float]): the standard deviation of the kernel. 105 | 106 | Returns: 107 | Tensor: the blurred tensor. 108 | 109 | Shape: 110 | - Input: :math:`(B, C, H, W)` 111 | - Output: :math:`(B, C, H, W)` 112 | 113 | Examples:: 114 | 115 | >>> input = torch.rand(2, 4, 5, 5) 116 | >>> output = tgm.image.gaussian_blur(input, (3, 3), (1.5, 1.5)) 117 | """ 118 | return GaussianBlur(kernel_size, sigma)(src) 119 | -------------------------------------------------------------------------------- /nets/AdvPatch/advPatch_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def generate_patch(type, size=(416, 416)): 5 | """ 6 | Generate a random patch as a starting point for optimization. 7 | 8 | :param type: Can be 'gray' or 'random'. Whether or not generate a gray or a random patch. 9 | :return: 10 | """ 11 | 12 | if type == 'gray': 13 | adv_patch = torch.full((3, size[0], size[1]), 0.5) 14 | elif type == 'random': 15 | adv_patch = torch.rand((3, size[0], size[1])) 16 | 17 | return adv_patch 18 | 19 | def generate_border_mask(patch_size, border_size): 20 | h = patch_size[0] 21 | w = patch_size[1] 22 | border_mask = torch.full((3, h, w), 0) 23 | bottom = border_size 24 | top = h - bottom 25 | border_mask[:, bottom:top, :] = 1.0 26 | 27 | return border_mask 28 | 29 | def paste_patch_to_frame(patch, patch_bb, img, img_bb): 30 | n, c, _, _ = patch.shape 31 | # create tensor 32 | img_h, img_w = img.shape[2:] 33 | x = torch.cuda.FloatTensor(n, c, img_h, img_w).fill_(0) 34 | for i, bbox in enumerate(patch_bb): 35 | pb, pl, ph, pw = bbox 36 | ib, il, ih, iw = img_bb[i] 37 | resized_tmpl = F.interpolate(patch[i, :, pb:pb + ph, pl:pl + pw].unsqueeze(0), size=(ih, iw), 38 | mode='bilinear', align_corners=False) 39 | x[i, :, ib:ib + ih, il:il + iw] = resized_tmpl.squeeze() 40 | 41 | return x 42 | 43 | def get_max_detection_score(output, obj_bbox, target_obj_id=0, min_detection_score=0.3, loss_type = 'max'): 44 | # output a list of (x1,y1,x2,y2, object_conf, class_pred) 45 | # obj_bbox: a list of (x1, y1, x2, y2) 46 | assert len(output) == obj_bbox.shape[0] 47 | 48 | # minimum prob. is set to 0.3 49 | max_prob = torch.zeros((obj_bbox.shape[0], 1)).cuda() 50 | #print ('max_prob_0', max_prob) 51 | for k in range(len(output)): 52 | detection = output[k] 53 | if isinstance(detection, list) and detection[0] is None: 54 | continue 55 | 56 | person_detection = detection[detection[:, -1] == target_obj_id] 57 | if person_detection.shape[0] == 0: 58 | continue 59 | 60 | bbox = obj_bbox[k] 61 | xc = (person_detection[:, 0] + person_detection[:, 2]) / 2.0 62 | yc = (person_detection[:, 1] + person_detection[:, 3]) / 2.0 63 | 64 | x_inside = (xc > bbox[0]) & (bbox[2] > xc) 65 | y_inside = (yc > bbox[1]) & (bbox[3] > yc) 66 | 67 | xy_inside = x_inside & y_inside 68 | # assert any(xy_inside>0), (xy_inside, xc, yc, detection, bbox, x_inside, y_inside, xy_inside) 69 | if loss_type == 'ce': 70 | prob = person_detection[xy_inside, 4:-1] 71 | if len(prob) > 0: 72 | max_prob[k] = torch.nn.functional.nll_loss(prob.log(), (prob.shape[-1] - 1) * torch.ones(len(prob), dtype=torch.long, device=prob.device)) 73 | min_detection_score = 0.0 74 | else: 75 | if any(xy_inside > 0): 76 | if loss_type == 'avg': 77 | max_prob[k] = torch.mean(person_detection[xy_inside, 4]) 78 | min_detection_score = 0.0 79 | else: 80 | max_prob[k] = torch.max(person_detection[xy_inside, 4]) 81 | max_prob = torch.clamp(max_prob, min=min_detection_score) 82 | return max_prob 83 | 84 | 85 | # total variation of the patch 86 | def get_totalVariation(adv_patch): 87 | tvcomp1 = torch.sum(torch.abs(adv_patch[:, :, 1:] - adv_patch[:, :, :-1] + 0.000001), 0) 88 | tvcomp1 = torch.sum(torch.sum(tvcomp1, 0), 0) 89 | tvcomp2 = torch.sum(torch.abs(adv_patch[:, 1:, :] - adv_patch[:, :-1, :] + 0.000001), 0) 90 | tvcomp2 = torch.sum(torch.sum(tvcomp2, 0), 0) 91 | tv = tvcomp1 + tvcomp2 92 | return tv / torch.numel(adv_patch) 93 | 94 | # adversarial loss 95 | def advsarial_loss(max_detection_score, loss_type): 96 | if loss_type == '': 97 | return None 98 | 99 | if loss_type == '': 100 | return None 101 | 102 | if loss_type == '': 103 | return None 104 | 105 | return max_detection_score -------------------------------------------------------------------------------- /detector/yolov3/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.utils import * 5 | from utils.datasets import * 6 | from utils.parse_config import * 7 | 8 | import os 9 | import sys 10 | import time 11 | import datetime 12 | import argparse 13 | import tqdm 14 | 15 | import torch 16 | from torch.utils.data import DataLoader 17 | from torchvision import datasets 18 | from torchvision import transforms 19 | from torch.autograd import Variable 20 | import torch.optim as optim 21 | 22 | 23 | def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size): 24 | model.eval() 25 | 26 | # Get dataloader 27 | dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False) 28 | dataloader = torch.utils.data.DataLoader( 29 | dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn 30 | ) 31 | 32 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 33 | 34 | labels = [] 35 | sample_metrics = [] # List of tuples (TP, confs, pred) 36 | for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")): 37 | 38 | # Extract labels 39 | labels += targets[:, 1].tolist() 40 | # Rescale target 41 | targets[:, 2:] = xywh2xyxy(targets[:, 2:]) 42 | targets[:, 2:] *= img_size 43 | 44 | imgs = Variable(imgs.type(Tensor), requires_grad=False) 45 | 46 | with torch.no_grad(): 47 | outputs = model(imgs) 48 | outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) 49 | 50 | sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) 51 | 52 | # Concatenate sample statistics 53 | true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))] 54 | precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) 55 | 56 | return precision, recall, AP, f1, ap_class 57 | 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch") 62 | parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") 63 | parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file") 64 | parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") 65 | parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") 66 | parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected") 67 | parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold") 68 | parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression") 69 | parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation") 70 | parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") 71 | opt = parser.parse_args() 72 | print(opt) 73 | 74 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 75 | 76 | data_config = parse_data_config(opt.data_config) 77 | valid_path = data_config["valid"] 78 | class_names = load_classes(data_config["names"]) 79 | 80 | # Initiate model 81 | model = Darknet(opt.model_def).to(device) 82 | if opt.weights_path.endswith(".weights"): 83 | # Load darknet weights 84 | model.load_darknet_weights(opt.weights_path) 85 | else: 86 | # Load checkpoint weights 87 | model.load_state_dict(torch.load(opt.weights_path)) 88 | 89 | print("Compute mAP...") 90 | 91 | precision, recall, AP, f1, ap_class = evaluate( 92 | model, 93 | path=valid_path, 94 | iou_thres=opt.iou_thres, 95 | conf_thres=opt.conf_thres, 96 | nms_thres=opt.nms_thres, 97 | img_size=opt.img_size, 98 | batch_size=8, 99 | ) 100 | 101 | print("Average Precisions:") 102 | for i, c in enumerate(ap_class): 103 | print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}") 104 | 105 | print(f"mAP: {AP.mean()}") 106 | -------------------------------------------------------------------------------- /nets/PatchTransformer/patchTransformer_model_builder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nets.PatchTransformer.patchTransformer_network import PatchTransformerNetwork 3 | from utils.utils import fix_checkpoint_key 4 | 5 | def get_patchTransformer_model_path(config): 6 | model_name = 'PT' 7 | model_name += '_%s' % (config['dataset']) 8 | 9 | if config['use_PBM']: 10 | model_name += '_PBM' 11 | 12 | model_name += '_STN' if config['learnableSTN'] else '_fixedSTN' 13 | #if config['adjust_patch_size']: 14 | # model_name += '_APS' 15 | #if config['template_scaling_factor'] > 0: 16 | # model_name += '_blur%d' % (config['template_scaling_factor']) 17 | model_name += '_' + config['loc_backbone'] 18 | model_name += '_ds%d'% (config['loc_downsample_dim']) 19 | model_name += '_fc%d'% (config['loc_fc_dim']) 20 | model_name += '_' + config['STN'] 21 | if config['STN'] == 'tps': 22 | if config['TPS_localizer'] == 'bounded_stn': 23 | model_name += '_bounded' 24 | else: 25 | model_name += '_unbounded' 26 | model_name += '%dx%d'% (config['TPS_grid'][0], config['TPS_grid'][1]) 27 | 28 | if config['use_PCT']: 29 | model_name += '_' + config['PrinterCT'] 30 | 31 | if config['use_LCT']: 32 | model_name += '_' + config['LightingCT'] 33 | if config['LightingCT'] == 'cc': 34 | model_name += '_' + config['lct_backbone'] 35 | model_name += '_p%d' % (config['lct_input_size'][0]) 36 | 37 | if config['use_LCT']: 38 | model_name += '_' + config['LCT_loss'] 39 | elif config['use_PCT']: 40 | model_name += '_' + config['PCT_loss'] 41 | 42 | model_name += '_bs%d' % (config['batch_size']) 43 | model_name += '_e%d' % (config['epochs']) 44 | 45 | return model_name 46 | 47 | def build_patchTransformer_model(config): 48 | return PatchTransformerNetwork(config) 49 | 50 | def build_patchTransformer_from_checkpoint(model, model_path): 51 | checkpoint = torch.load(model_path, map_location='cpu') 52 | 53 | pt_modules = {'PBM':model.PBM, 'STN':model.STN, 'PCT':model.PCT, 'LCT':model.LCT} 54 | 55 | print ('Loading weights from %s' % (model_path)) 56 | for module_name in ['PBM', 'STN', 'PCT', 'LCT']: 57 | module = pt_modules[module_name] 58 | if module is not None: 59 | if checkpoint[module_name] is not None: 60 | try: 61 | module.load_state_dict(fix_checkpoint_key((checkpoint[module_name])), strict=True) 62 | print ("===== Finished loading %s module =====" % (module_name)) 63 | except Exception as e: 64 | print (e) 65 | print ("==== Woops, no %s module loaded ====" % (module_name)) 66 | else: 67 | print ('==== %s module is not available ====' % (module_name)) 68 | 69 | #print ("\nGenerator input size: {size}".format(size=model.generator_input_size)) 70 | #if 'generator_input_size' in checkpoint and model.generator_input_size != checkpoint['generator_input_size']: 71 | #print ("Change generator input size from {size1} to {size2}".format(size1=model.generator_input_size, size2=checkpoint['generator_input_size'])) 72 | #model.generator_input_size = checkpoint['generator_input_size'] 73 | # print ("Warnging: generator input size {size1} is different from the size {size2} in the loaded model.".format(size1=model.generator_input_size, size2=checkpoint['generator_input_size'])) 74 | 75 | ''' 76 | pct_key = 'PCT' 77 | if model.PCT is not None: 78 | try: 79 | if model.PCT is not None: 80 | model.PCT.load_state_dict(fix_checkpoint_key((checkpoint[pct_key])), strict=True) 81 | print ("===== Finished loading PCT module with key: %s =====" % (pct_key)) 82 | except Exception as e: 83 | print (e) 84 | print ("==== Woops, no PCT module loaded ====") 85 | 86 | lct_key = 'LCT' 87 | try: 88 | model.LCT.load_state_dict(fix_checkpoint_key((checkpoint[lct_key])), strict=True) 89 | print("===== Finished loading LCT module with key:%s =====" % (lct_key)) 90 | except Exception as e: 91 | print (e) 92 | print("==== Woops, no LCT module loaded ====") 93 | print ('\n') 94 | ''' 95 | # now load optimizer 96 | epoch = checkpoint['epoch'] if 'epoch' in checkpoint else 0 97 | iteration = checkpoint['iteration'] if 'iteration' in checkpoint else 0 98 | lr = checkpoint['lr'] if 'lr' in checkpoint else 0.0 99 | 100 | best_error = checkpoint['best_error'] if 'best_error' in checkpoint else 9999.0 101 | 102 | return model, epoch, iteration, lr, best_error 103 | -------------------------------------------------------------------------------- /detector/yolo_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def wrap_detection_results(outputs, scale_size, input_size, skip=True): 4 | # print ([item.shape for item in outputs]) 5 | new_outputs = [None for _ in outputs] 6 | for k in range(len(outputs)): 7 | if outputs[k] is not None: 8 | new_outputs[k] = resize_boxes(outputs[k], scale_size, input_size) 9 | if skip: 10 | new_outputs[k] = new_outputs[k][:, [0, 1, 2, 3, 4, 6]] 11 | else: 12 | # new_outputs[k] = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda() 13 | new_outputs[k] = [None] 14 | 15 | # print (new_outputs) 16 | return new_outputs 17 | 18 | def resize_boxes(detection, scale_size, input_size): 19 | w, h = input_size 20 | rh, rw = float(h)/scale_size, float(w)/scale_size 21 | detection[:,0] *= rw 22 | detection[:,1] *= rh 23 | detection[:,2] *= rw 24 | detection[:,3] *= rh 25 | 26 | return detection 27 | 28 | def xywh2xyxy(x): 29 | y = x.new(x.shape) 30 | y[..., 0] = x[..., 0] - x[..., 2] / 2 31 | y[..., 1] = x[..., 1] - x[..., 3] / 2 32 | y[..., 2] = x[..., 0] + x[..., 2] / 2 33 | y[..., 3] = x[..., 1] + x[..., 3] / 2 34 | return y 35 | 36 | def bbox_iou(box1, box2, x1y1x2y2=True): 37 | """ 38 | Returns the IoU of two bounding boxes 39 | """ 40 | if not x1y1x2y2: 41 | # Transform from center and width to exact coordinates 42 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 43 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 44 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 45 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 46 | else: 47 | # Get the coordinates of bounding boxes 48 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 49 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 50 | 51 | # get the corrdinates of the intersection rectangle 52 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 53 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 54 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 55 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 56 | # Intersection area 57 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( 58 | inter_rect_y2 - inter_rect_y1 + 1, min=0 59 | ) 60 | # Union Area 61 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 62 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 63 | 64 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 65 | 66 | return iou 67 | 68 | def nms(prediction, conf_thres=0.5, nms_thres=0.4): 69 | """ 70 | Removes detections with lower object confidence score than 'conf_thres' and performs 71 | Non-Maximum Suppression to further filter detections. 72 | Returns detections with shape: 73 | (x1, y1, x2, y2, object_conf, class_score, class_pred) 74 | """ 75 | 76 | ## From (center x, center y, width, height) to (x1, y1, x2, y2) 77 | #prediction[..., :4] = xywh2xyxy(prediction[..., :4]) 78 | output = [None for _ in range(len(prediction))] 79 | for image_i, image_pred in enumerate(prediction): 80 | # Filter out confidence scores below threshold 81 | image_pred = image_pred[image_pred[:, 4] >= conf_thres] 82 | # If none are remaining => process next image 83 | if not image_pred.size(0): 84 | continue 85 | # Object confidence times class confidence 86 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] 87 | # Sort by it 88 | image_pred = image_pred[(-score).argsort()] 89 | class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) 90 | 91 | detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) 92 | 93 | # Perform non-maximum suppression 94 | keep_boxes = [] 95 | while detections.size(0): 96 | large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) >= nms_thres 97 | label_match = detections[0, -1] == detections[:, -1] 98 | # Indices of boxes with lower confidence scores, large IOUs and matching labels 99 | invalid = large_overlap & label_match 100 | weights = detections[invalid, 4:5] 101 | # Merge overlapping bboxes by order of confidence 102 | #detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() 103 | keep_boxes += [detections[0]] 104 | detections = detections[~invalid] 105 | if keep_boxes: 106 | output[image_i] = torch.stack(keep_boxes) 107 | 108 | return output 109 | 110 | -------------------------------------------------------------------------------- /demo_detector.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | from detector.build_object_detector import build_object_detector 5 | from utils.tools import get_config 6 | from opts import arg_parser, merge_args 7 | import glob 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | import torch 11 | import numpy as np 12 | import json 13 | from json import encoder 14 | encoder.FLOAT_REPR = lambda o: format(o, '.2f') 15 | 16 | def load_detection(filename): 17 | with open(filename, 'r') as f: 18 | data = json.load(f) 19 | return data 20 | 21 | def prune_detection(detection): 22 | person_num = len(detection) 23 | if person_num <= 2: 24 | return detection 25 | 26 | areas =[(det[2] - det[0]) * (det[3] - det[1]) for det in detection] 27 | I = sorted(range(len(areas)), key=lambda k: areas[k], reverse=True) 28 | #print (areas, I) 29 | first_person = detection[I[0]] 30 | xc = (first_person[0] + first_person[2]) / 2.0 31 | second_person = None 32 | for k in range(1, len(I)): 33 | det = detection[I[k]] 34 | half_width = abs((first_person[2] - first_person[0])) / 2.0 35 | xc_temp = (det[0] + det[2]) / 2.0 36 | if abs(xc_temp - xc) > half_width: 37 | second_person = det 38 | break 39 | 40 | return [first_person, second_person] if second_person is not None else [first_person] 41 | 42 | def main(): 43 | global args 44 | parser = arg_parser() 45 | args = parser.parse_args() 46 | 47 | config = get_config(args.config) 48 | config = merge_args(args, config) 49 | #config['detector_input_size'] = [540, 960] 50 | detector = build_object_detector(config) 51 | 52 | device_ids = args.gpu_ids 53 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in device_ids) 54 | device_ids = list(range(len(device_ids))) 55 | detector.cuda(int(device_ids[0])) 56 | 57 | input_size = config['detector_input_size'] 58 | target_obj_id = detector.target_object_id 59 | results = {} 60 | 61 | video_name = args.test_dir.split('/')[-1] 62 | #gt_detections = load_detection('../../CVPR_detection_results/20201106/DFaster_RCNN_R101_COCO+' + video_name+'.json') 63 | with torch.no_grad(): 64 | for img_path in sorted(glob.glob(args.test_dir+"/*.png")): 65 | frame_img = Image.open(img_path) 66 | w, h = frame_img.size 67 | if w != input_size[0] or h != input_size[1]: 68 | frame_img = frame_img.resize(input_size, Image.BILINEAR) 69 | 70 | ''' 71 | frm_name = img_path.split('/')[-1] 72 | frm_name = frm_name.split('.')[0] 73 | gt_det = gt_detections[frm_name] 74 | pruned_det = prune_detection(gt_det) 75 | bb = list(map(int, pruned_det[0][:4])) 76 | ph = bb[3] - bb[1] + 1 77 | pw = bb[2] - bb[0] + 1 78 | bb = [bb[0]+int(pw*0.15), bb[1]+int(ph*0.15), bb[2]-int(pw*0.25), bb[3]-int(ph*0.4)] 79 | np_im = np.array(frame_img) 80 | #print (np_im.shape, bb) 81 | np_im[bb[1]:bb[3],bb[0]:bb[2],:] = 128 82 | frame_img = Image.fromarray(np_im, 'RGB') 83 | ''' 84 | frame_img = transforms.ToTensor()(frame_img) 85 | frame_img = frame_img.cuda() 86 | frame_img = torch.unsqueeze(frame_img, dim=0) 87 | #detections = detector.detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh']) 88 | detections = detector.detector_detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh']) 89 | person_detection = [] 90 | for idx, detection in enumerate(detections): 91 | for det in detection: 92 | if det is None: 93 | continue 94 | if det[-1] == target_obj_id: # only count the person 95 | det = det.detach().cpu().numpy().tolist() 96 | person_detection.append(det) 97 | filename = os.path.basename(img_path) 98 | print (filename) 99 | results[filename.split('.')[0]] = person_detection 100 | if not os.path.isdir(args.detection_output_dir): 101 | os.mkdir(args.detection_output_dir) 102 | output_filename = os.path.join(args.detection_output_dir, detector.name + '+' + os.path.basename(args.test_dir) + '.json') 103 | print ('Results are written to %s' % output_filename) 104 | with open(output_filename, 'w') as f: 105 | json.dump(results, f) 106 | 107 | ''' 108 | if __name__ == '__main__': 109 | main() 110 | 111 | config = merge_args(args, config) 112 | 113 | detector = build_object_detector(config).cuda() 114 | for img in glob.glob("Path/to/dir/*.jpg"): 115 | frame_img = Image.open(img) 116 | frame_img = transforms.ToTensor()(frame_img) 117 | frame_img = frame_img.cuda() 118 | torch.squeeze(frame_image, dim=0) 119 | detection_results = detector.detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh']) 120 | print (detection_results) 121 | ''' 122 | 123 | if __name__ == '__main__': 124 | main() 125 | -------------------------------------------------------------------------------- /detector/yolov3/utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import sys 5 | import numpy as np 6 | from PIL import Image 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from utils.augmentations import horisontal_flip 11 | from torch.utils.data import Dataset 12 | import torchvision.transforms as transforms 13 | 14 | 15 | def pad_to_square(img, pad_value): 16 | c, h, w = img.shape 17 | dim_diff = np.abs(h - w) 18 | # (upper / left) padding and (lower / right) padding 19 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 20 | # Determine padding 21 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 22 | # Add padding 23 | img = F.pad(img, pad, "constant", value=pad_value) 24 | 25 | return img, pad 26 | 27 | 28 | def resize(image, size): 29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 30 | return image 31 | 32 | 33 | def random_resize(images, min_size=288, max_size=448): 34 | new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] 35 | images = F.interpolate(images, size=new_size, mode="nearest") 36 | return images 37 | 38 | 39 | class ImageFolder(Dataset): 40 | def __init__(self, folder_path, img_size=416): 41 | self.files = sorted(glob.glob("%s/*.*" % folder_path)) 42 | self.img_size = img_size 43 | 44 | def __getitem__(self, index): 45 | img_path = self.files[index % len(self.files)] 46 | # Extract image as PyTorch tensor 47 | img = transforms.ToTensor()(Image.open(img_path)) 48 | # Pad to square resolution 49 | img, _ = pad_to_square(img, 0) 50 | # Resize 51 | img = resize(img, self.img_size) 52 | 53 | return img_path, img 54 | 55 | def __len__(self): 56 | return len(self.files) 57 | 58 | 59 | class ListDataset(Dataset): 60 | def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): 61 | with open(list_path, "r") as file: 62 | self.img_files = file.readlines() 63 | 64 | self.label_files = [ 65 | path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") 66 | for path in self.img_files 67 | ] 68 | self.img_size = img_size 69 | self.max_objects = 100 70 | self.augment = augment 71 | self.multiscale = multiscale 72 | self.normalized_labels = normalized_labels 73 | self.min_size = self.img_size - 3 * 32 74 | self.max_size = self.img_size + 3 * 32 75 | self.batch_count = 0 76 | 77 | def __getitem__(self, index): 78 | 79 | # --------- 80 | # Image 81 | # --------- 82 | 83 | img_path = self.img_files[index % len(self.img_files)].rstrip() 84 | 85 | # Extract image as PyTorch tensor 86 | img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) 87 | 88 | # Handle images with less than three channels 89 | if len(img.shape) != 3: 90 | img = img.unsqueeze(0) 91 | img = img.expand((3, img.shape[1:])) 92 | 93 | _, h, w = img.shape 94 | h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) 95 | # Pad to square resolution 96 | img, pad = pad_to_square(img, 0) 97 | _, padded_h, padded_w = img.shape 98 | 99 | # --------- 100 | # Label 101 | # --------- 102 | 103 | label_path = self.label_files[index % len(self.img_files)].rstrip() 104 | 105 | targets = None 106 | if os.path.exists(label_path): 107 | boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) 108 | # Extract coordinates for unpadded + unscaled image 109 | x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) 110 | y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) 111 | x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) 112 | y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) 113 | # Adjust for added padding 114 | x1 += pad[0] 115 | y1 += pad[2] 116 | x2 += pad[1] 117 | y2 += pad[3] 118 | # Returns (x, y, w, h) 119 | boxes[:, 1] = ((x1 + x2) / 2) / padded_w 120 | boxes[:, 2] = ((y1 + y2) / 2) / padded_h 121 | boxes[:, 3] *= w_factor / padded_w 122 | boxes[:, 4] *= h_factor / padded_h 123 | 124 | targets = torch.zeros((len(boxes), 6)) 125 | targets[:, 1:] = boxes 126 | 127 | # Apply augmentations 128 | if self.augment: 129 | if np.random.random() < 0.5: 130 | img, targets = horisontal_flip(img, targets) 131 | 132 | return img_path, img, targets 133 | 134 | def collate_fn(self, batch): 135 | paths, imgs, targets = list(zip(*batch)) 136 | # Remove empty placeholder targets 137 | targets = [boxes for boxes in targets if boxes is not None] 138 | # Add sample index to targets 139 | for i, boxes in enumerate(targets): 140 | boxes[:, 0] = i 141 | targets = torch.cat(targets, 0) 142 | # Selects new image size every tenth batch 143 | if self.multiscale and self.batch_count % 10 == 0: 144 | self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) 145 | # Resize images to input shape 146 | imgs = torch.stack([resize(img, self.img_size) for img in imgs]) 147 | self.batch_count += 1 148 | return paths, imgs, targets 149 | 150 | def __len__(self): 151 | return len(self.img_files) 152 | -------------------------------------------------------------------------------- /detector/yolov3/detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | 5 | from detector.yolov3.utils.utils import rescale_boxes 6 | from utils.utils import * 7 | from utils.datasets import * 8 | 9 | import os 10 | import sys 11 | import time 12 | import datetime 13 | import argparse 14 | 15 | from PIL import Image 16 | 17 | import torch 18 | from torch.utils.data import DataLoader 19 | from torchvision import datasets 20 | from torch.autograd import Variable 21 | 22 | import matplotlib.pyplot as plt 23 | import matplotlib.patches as patches 24 | from matplotlib.ticker import NullLocator 25 | 26 | if __name__ == "__main__": 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset") 29 | parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") 30 | parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file") 31 | parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file") 32 | parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold") 33 | parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression") 34 | parser.add_argument("--batch_size", type=int, default=1, help="size of the batches") 35 | parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation") 36 | parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") 37 | parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model") 38 | opt = parser.parse_args() 39 | print(opt) 40 | 41 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 42 | 43 | os.makedirs("output", exist_ok=True) 44 | 45 | # Set up model 46 | model = Darknet(opt.model_def, img_size=opt.img_size).to(device) 47 | 48 | if opt.weights_path.endswith(".weights"): 49 | # Load darknet weights 50 | model.load_darknet_weights(opt.weights_path) 51 | else: 52 | # Load checkpoint weights 53 | model.load_state_dict(torch.load(opt.weights_path)) 54 | 55 | model.eval() # Set in evaluation mode 56 | 57 | dataloader = DataLoader( 58 | ImageFolder(opt.image_folder, img_size=opt.img_size), 59 | batch_size=opt.batch_size, 60 | shuffle=False, 61 | num_workers=opt.n_cpu, 62 | ) 63 | 64 | classes = load_classes(opt.class_path) # Extracts class labels from file 65 | 66 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 67 | 68 | imgs = [] # Stores image paths 69 | img_detections = [] # Stores detections for each image index 70 | 71 | print("\nPerforming object detection:") 72 | prev_time = time.time() 73 | for batch_i, (img_paths, input_imgs) in enumerate(dataloader): 74 | # Configure input 75 | input_imgs = Variable(input_imgs.type(Tensor)) 76 | 77 | # Get detections 78 | with torch.no_grad(): 79 | detections = model(input_imgs) 80 | detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres) 81 | 82 | # Log progress 83 | current_time = time.time() 84 | inference_time = datetime.timedelta(seconds=current_time - prev_time) 85 | prev_time = current_time 86 | print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time)) 87 | 88 | # Save image and detections 89 | imgs.extend(img_paths) 90 | img_detections.extend(detections) 91 | 92 | # Bounding-box colors 93 | cmap = plt.get_cmap("tab20b") 94 | colors = [cmap(i) for i in np.linspace(0, 1, 20)] 95 | 96 | print("\nSaving images:") 97 | # Iterate through images and save plot of detections 98 | for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): 99 | 100 | print("(%d) Image: '%s'" % (img_i, path)) 101 | 102 | # Create plot 103 | img = np.array(Image.open(path)) 104 | plt.figure() 105 | fig, ax = plt.subplots(1) 106 | ax.imshow(img) 107 | 108 | # Draw bounding boxes and labels of detections 109 | if detections is not None: 110 | # Rescale boxes to original image 111 | detections = rescale_boxes(detections, opt.img_size, img.shape[:2]) 112 | unique_labels = detections[:, -1].cpu().unique() 113 | n_cls_preds = len(unique_labels) 114 | bbox_colors = random.sample(colors, n_cls_preds) 115 | for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: 116 | 117 | print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item())) 118 | 119 | box_w = x2 - x1 120 | box_h = y2 - y1 121 | 122 | color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] 123 | # Create a Rectangle patch 124 | bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none") 125 | # Add the bbox to the plot 126 | ax.add_patch(bbox) 127 | # Add label 128 | plt.text( 129 | x1, 130 | y1, 131 | s=classes[int(cls_pred)], 132 | color="white", 133 | verticalalignment="top", 134 | bbox={"color": color, "pad": 0}, 135 | ) 136 | 137 | # Save generated image with detections 138 | plt.axis("off") 139 | plt.gca().xaxis.set_major_locator(NullLocator()) 140 | plt.gca().yaxis.set_major_locator(NullLocator()) 141 | filename = path.split("/")[-1].split(".")[0] 142 | plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0) 143 | plt.close() 144 | -------------------------------------------------------------------------------- /nets/STNet/STNLocalizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torchvision.models as models 4 | import numpy as np 5 | import torch.nn.functional as F 6 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim 7 | 8 | #backbone_info = { 'resnet18': {'model': models.resnet18, 'last_conv_dim':512}, 9 | # 'resnet50': {'model': models.resnet50, 'last_conv_dim':2048}, 10 | # 'resnet101': {'model': models.resnet101, 'last_conv_dim': 2048} 11 | # } 12 | 13 | class BasicLocalizer(nn.Module): 14 | def __init__(self, backbone, downsample_dim=128, fc_dim=256, num_output=6): 15 | super(BasicLocalizer, self).__init__() 16 | 17 | # resnet_model = backbone_info[backbone]['model'](num_classes=10) 18 | # last_conv_dim = backbone_info[backbone]['last_conv_dim'] 19 | 20 | # self.backbone = nn.Sequential(*list(resnet_model.children())[0:-2]) 21 | self.backbone = get_backbone(backbone) 22 | last_conv_dim = get_last_conv_dim(backbone) 23 | 24 | 25 | self.downsample_dim = downsample_dim 26 | self.down_sampler = nn.Sequential( 27 | nn.Conv2d(last_conv_dim, self.downsample_dim, kernel_size=1, stride=1, padding=0), 28 | nn.ReLU(True), 29 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 30 | 31 | # Regressor for the 3 * 2 affine matrix 32 | self.fc_dim = fc_dim 33 | self.last_spatial_dim = 4 34 | self.fc_loc = nn.Sequential( 35 | nn.Dropout(0.3), 36 | nn.Linear(self.downsample_dim * self.last_spatial_dim * self.last_spatial_dim, self.fc_dim), 37 | nn.ReLU(True), 38 | nn.Linear(self.fc_dim, num_output) 39 | ) 40 | 41 | # localization 42 | def forward(self, x): 43 | xs = self.backbone(x) 44 | # print (xs.shape) 45 | xs = self.down_sampler(xs) 46 | # print (xs.shape) 47 | xs = xs.view(-1, self.downsample_dim * self.last_spatial_dim * self.last_spatial_dim) 48 | theta = self.fc_loc(xs) 49 | # theta = theta.view(-1, 2, 3) 50 | return theta 51 | 52 | class AffineLocalizer(nn.Module): 53 | def __init__(self, backbone, downsample_dim, fc_dim, predict_dimension=False): 54 | super(AffineLocalizer, self).__init__() 55 | self.predict_dimension=predict_dimension 56 | num_output = 6 57 | if self.predict_dimension: 58 | num_output += 1 59 | 60 | self.localizer = BasicLocalizer(backbone, downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output) 61 | 62 | # initialization 63 | self.localizer.fc_loc[-1].weight.data.zero_() 64 | if self.predict_dimension: 65 | self.localizer.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0, 1], dtype=torch.float)) 66 | else: 67 | self.localizer.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float)) 68 | 69 | # localization 70 | def forward(self, x): 71 | x = self.localizer(x) 72 | if self.predict_dimension: 73 | return x[:,:6].view(-1, 2, 3), x[:,-1] 74 | else: 75 | return x.view(-1, 2, 3), None 76 | 77 | # based on https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py 78 | class BoundedTPSLocalizer(nn.Module): 79 | 80 | def __init__(self, backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=False): 81 | super(BoundedTPSLocalizer, self).__init__() 82 | self.precit_dimension = predict_dimension 83 | num_output = grid_height * grid_width * 2 84 | if self.precit_dimension: 85 | num_output += 1 86 | self.cnn = BasicLocalizer(backbone, downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output) 87 | 88 | #bias = torch.from_numpy(np.arctanh(target_control_points.numpy())) 89 | #bias = bias.view(-1) 90 | bias = torch.zeros(num_output) 91 | if self.precit_dimension: 92 | bias[:-1] = torch.from_numpy(np.arctanh(target_control_points.numpy())).view(-1) 93 | bias[-1] = 1.0 94 | else: 95 | bias = torch.from_numpy(np.arctanh(target_control_points.numpy())).view(-1) 96 | 97 | self.cnn.fc_loc[-1].bias.data.copy_(bias) 98 | self.cnn.fc_loc[-1].weight.data.zero_() 99 | 100 | def forward(self, x): 101 | batch_size = x.size(0) 102 | points = self.cnn(x) 103 | if self.precit_dimension: 104 | return torch.tanh(points[:,:-1]).view(batch_size, -1, 2), points[:,-1] 105 | else: 106 | return torch.tanh(points).view(batch_size, -1, 2), None 107 | 108 | # based on https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py 109 | class UnBoundedTPSLocalizer(nn.Module): 110 | 111 | def __init__(self, backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=False): 112 | super(UnBoundedTPSLocalizer, self).__init__() 113 | 114 | self.precit_dimension = predict_dimension 115 | num_output = grid_height * grid_width * 2 116 | if self.precit_dimension: 117 | num_output += 1 118 | 119 | self.cnn = BasicLocalizer(backbone, downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output) 120 | 121 | # bias = target_control_points.view(-1) 122 | bias = torch.zeros(num_output) 123 | if self.precit_dimension: 124 | bias[:-1] = target_control_points.view(-1) 125 | bias[-1] = 1.0 126 | else: 127 | bias = target_control_points.view(-1) 128 | 129 | self.cnn.fc_loc[-1].bias.data.copy_(bias) 130 | self.cnn.fc_loc[-1].weight.data.zero_() 131 | 132 | def forward(self, x): 133 | batch_size = x.size(0) 134 | points = self.cnn(x) 135 | if self.precit_dimension: 136 | return points[:, :-1].view(batch_size, -1, 2), points[:,-1] 137 | else: 138 | return points.view(batch_size, -1, 2), None 139 | -------------------------------------------------------------------------------- /nets/ColorNet/PCT_transformation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | ''' 6 | class ColorTransformation(nn.Module): 7 | def __init__(self, config): 8 | super(ColorTransformation, self).__init__() 9 | file = config['color_transformation_path'] 10 | self.W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32).cuda() 11 | self.W1 = self.W1.unsqueeze(0).unsqueeze(0) 12 | self.W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32).cuda() 13 | self.W2 = self.W2.unsqueeze(0).unsqueeze(0) 14 | self.b = torch.tensor(np.load(file)["bias"], dtype=torch.float32).cuda() 15 | 16 | def forward(self, x): 17 | x = x.transpose(1, -1) 18 | x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b 19 | x = x.transpose(1, -1) 20 | return x 21 | ''' 22 | 23 | class PCTTransformation(nn.Module): 24 | def __init__(self, config): 25 | super(PCTTransformation, self).__init__() 26 | # use_cuda = config['cuda'] 27 | # device_ids = config['gpu_ids'] 28 | 29 | file = config['color_transformation_path'] 30 | W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32) 31 | self.W1 = torch.nn.Parameter(W1.unsqueeze(0).unsqueeze(0)) 32 | W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32) 33 | self.W2 = torch.nn.Parameter(W2.unsqueeze(0).unsqueeze(0)) 34 | b = torch.tensor(np.load(file)["bias"], dtype=torch.float32) 35 | self.b = torch.nn.Parameter(b) 36 | 37 | def forward(self, x): 38 | x = x.transpose(1, -1) 39 | x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b 40 | x = x.transpose(1, -1) 41 | x = torch.clamp(x, 0., 1.) 42 | return x 43 | 44 | # linear PCT transformation 45 | class PCTLinearTransformation(nn.Module): 46 | def __init__(self, config): 47 | super(PCTLinearTransformation, self).__init__() 48 | # self.use_cuda = use_cuda 49 | # self.device_ids = device_ids 50 | self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3 51 | # transform the color 52 | def forward(self, x): 53 | # transform the input 54 | n, c, h, w = x.shape 55 | y = torch.matmul(self.color_mapping, x.view(n, c, -1)) 56 | 57 | #y = torch.clamp(y, -1., 1.) 58 | y = torch.clamp(y, 0., 1.) 59 | return y.view(n, c, h, w) 60 | 61 | # linear PCT transformation 62 | class PCTLinearBiasTransformation(nn.Module): 63 | def __init__(self, config): 64 | super(PCTLinearBiasTransformation, self).__init__() 65 | # self.use_cuda = use_cuda 66 | # self.device_ids = device_ids 67 | self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3 68 | self.b = torch.nn.Parameter(torch.tensor([[0.0, 0.0, 0.0]])) 69 | 70 | # transform the color 71 | def forward(self, x): 72 | # transform the input 73 | n, c, h, w = x.shape 74 | y = torch.matmul(self.color_mapping, x.view(n, c, -1)) 75 | #y += self.b.view(1, 3, 1) 76 | 77 | #y = torch.clamp(y, -1., 1.) 78 | min_y = torch.min(y) 79 | max_y = torch.max(y) 80 | y = (y -min_y ) / (max_y-min_y) 81 | # y = torch.clamp(y, 0.0, 1.) 82 | return y.view(n, c, h, w) 83 | 84 | # non-linear PCT transformation 85 | class PCTNeuralTransformation(nn.Module): 86 | def __init__(self, config): 87 | super(PCTNeuralTransformation, self).__init__() 88 | # self.use_cuda = use_cuda 89 | # self.device_ids = device_ids 90 | self.M1 = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3 91 | self.M2 = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3 92 | 93 | # transform the color 94 | def forward(self, x): 95 | # transform the input 96 | n, c, h, w = x.shape 97 | y = torch.matmul(self.M1, x.view(n, c, -1)) 98 | y = nn.functional.relu(y) 99 | y = torch.matmul(self.M2, y.view(n, c, -1)) 100 | y = torch.clamp(y, 0., 1.) 101 | # y = torch.clamp(y, 0.0, 1.) 102 | return y.view(n, c, h, w) 103 | 104 | ''' 105 | # non-linear PCT transformation 106 | class PCTNeuralTransformation(nn.Module): 107 | def __init__(self, config): 108 | super(PCTNeuralTransformation, self).__init__() 109 | # self.use_cuda = use_cuda 110 | # self.device_ids = device_ids 111 | fc_dim = 100 112 | self.fc_transform = nn.Sequential( 113 | nn.Dropout(0.3), 114 | nn.Linear(3, fc_dim), 115 | nn.ReLU(True), 116 | nn.Linear(fc_dim, 3) 117 | ) 118 | 119 | # transform the color 120 | def forward(self, x): 121 | # transform the input 122 | n, c, h, w = x.shape 123 | output = self.fc_transform(x.view(-1,c)) 124 | #output = torch.tanh(output) 125 | #output = 0.5* (output + 1.0) # normalize to [0 1] 126 | #output = torch.clamp(output, 0.0, 1.0) 127 | return output.view(n,c,h,w) 128 | ''' 129 | 130 | class PCTTransformationOld2New(nn.Module): 131 | def __init__(self): 132 | super(PCTTransformationOld2New, self).__init__() 133 | # use_cuda = config['cuda'] 134 | # device_ids = config['gpu_ids'] 135 | 136 | file = 'weights2_old2new_.npz' 137 | W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32) 138 | self.W1 = torch.nn.Parameter(W1.unsqueeze(0).unsqueeze(0)) 139 | print (self.W1) 140 | W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32) 141 | self.W2 = torch.nn.Parameter(W2.unsqueeze(0).unsqueeze(0)) 142 | print (self.W2) 143 | b = torch.tensor(np.load(file)["bias"], dtype=torch.float32) 144 | self.b = torch.nn.Parameter(b) 145 | 146 | def forward(self, x): 147 | x = x.transpose(1, -1) 148 | x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b 149 | x = x.transpose(1, -1) 150 | return x 151 | 152 | -------------------------------------------------------------------------------- /nets/PatchTransformer/patch_transformer_net.py: -------------------------------------------------------------------------------- 1 | from nets.STNet.affine_STN import AffineSTNNet 2 | from nets.STNet.tps_STN import TpsSTNNet 3 | from utils.tools import transform_template_input 4 | from nets.ColorNet.cc_f4 import CC_Alex_FCN4 5 | from nets.ColorNet.PCT_transformation import * 6 | 7 | PCT_INFO = {'PCT':PCTTransformation, 'PCTLinear': PCTLinearTransformation, 'PCTNeural': PCTNeuralTransformation} 8 | LCT_INFO = {'cc_fcn4':CC_Alex_FCN4} 9 | 10 | class PatchTransformerNet(nn.Module): 11 | def __init__(self, config): 12 | super(PatchTransformerNet, self).__init__() 13 | self.config = config 14 | self.use_cuda = self.config['cuda'] 15 | self.device_ids = self.config['gpu_ids'] 16 | 17 | if config['STN'] == 'affine': 18 | self.STN = AffineSTNNet(config) 19 | elif config['STN'] == 'tps': 20 | self.STN = TpsSTNNet(config) 21 | 22 | # printer color transformation (PCT) 23 | self.predefined_PCT = True if self.config['PrinterCT'] == 'PCT' else False 24 | self.PCT = PCT_INFO[self.config['PrinterCT']](self.config) if self.config['PrinterCT'] != 'None' else None 25 | # only applied PCT once. This demonstrates better performance 26 | self.apply_PCT_twice = config['use_double_PCT'] 27 | assert self.apply_PCT_twice == False 28 | 29 | # Lighting color transformation (LCT) 30 | self.use_LCT = self.config['use_LightingCT'] 31 | if self.use_LCT: 32 | assert self.PCT is not None 33 | self.LCT = LCT_INFO[self.config['LightingCT']]() 34 | 35 | # Note: 'x' is within [-1 1] while template_img and frame_img are within [0 1]. 36 | # the output is within [0 1] 37 | def forward(self, x, bboxes, masks, template_img, frame_img): 38 | if self.apply_PCT_twice: 39 | new_template_img = self.PCT(template_img) 40 | x_stn, _ = self.STN(x, new_template_img) 41 | else: 42 | x_stn, _ = self.STN(x, template_img) 43 | 44 | # paste the transformed patch to the frame image 45 | x_stn = transform_template_input(x_stn, bboxes, frame_img.shape[2:]) 46 | 47 | # perform printer color transformation 48 | x_pct = self.PCT(x_stn) if self.PCT is not None else x_stn 49 | x_pct = x_pct * masks + frame_img * (1. - masks) 50 | x_pct = torch.clamp(x_pct, 0, 0.999) 51 | 52 | if self.use_LCT: 53 | lct = self.LCT.forward_template(frame_img) 54 | # print (lct) 55 | x_lct = x_pct * lct 56 | x_lct = x_lct * masks + frame_img * (1. - masks) 57 | x_lct = torch.clamp(x_lct, 0, 0.999) 58 | 59 | if self.use_LCT: 60 | return x_lct, x_pct, lct 61 | else: 62 | return x_pct 63 | 64 | ''' 65 | # Note that the range of output [0 1] is different from that of input [-1 1] because of the multiplication 66 | # of line 91 does not work with the input range. Also 'ground_truth' got changed. Good for now, but it's 67 | # better to REWRITE in the future for reducing confusion and potential issues. 68 | def forward(self, x, bboxes, masks, template_img, frame_img): 69 | # geometric 70 | new_template_img = self.PCT(template_img) 71 | x_stn, _ = self.STN(x, new_template_img) 72 | 73 | # x_stn, _ = self.STN(x, template_img) 74 | # x_stn = F.avg_pool2d(x_stn, 3, stride=1, padding=1) 75 | 76 | # transform the template to be the input image to the generator 77 | x_stn = transform_template_input(x_stn, bboxes, frame_img.shape[2:]) 78 | 79 | # self.vis_tensor(x_stn * masks + frame_img * (1. - masks), 'before_') 80 | 81 | x_stn = self.PCT(x_stn) 82 | 83 | x_pct = x_stn * masks + frame_img * (1. - masks) 84 | 85 | # self.vis_tensor(x_pct, 'after_') 86 | 87 | x_pct = torch.clamp(x_pct, -0.999, 0.999) 88 | 89 | # frame_img_in_lct = transform_frames_input(frame_img, coord_w_set, (256, 256)) 90 | # put the color in range [0 1] !!! critical 91 | x_pct.add_(1.0).div_(2.0) 92 | if self.use_LCT: 93 | frame_img.add_(1.0).div_(2.0) 94 | lct = self.LCT.forward_template(frame_img) 95 | lct = F.interpolate(lct, size=frame_img.shape[2:], mode='bilinear', align_corners=False) 96 | x_lct = x_pct * lct 97 | x_lct = x_lct * masks + frame_img * (1. - masks) 98 | # x_lct = torch.clamp(x_lct, -0.999, 0.999) 99 | x_lct = torch.clamp(x_lct, 0, 0.999) 100 | 101 | if self.use_LCT: 102 | return x_lct, lct 103 | else: 104 | return x_pct 105 | ''' 106 | 107 | def load_from_file(self, model_path): 108 | try: 109 | checkpoint = torch.load(model_path, map_location='cpu') 110 | #for key, _ in checkpoint.items(): 111 | # print (key) 112 | self.STN.load_state_dict(fix_checkpoint_key((checkpoint['stn'])), strict=True) 113 | print ('--- Finished loading STN module .....') 114 | 115 | if self.PCT is not None: 116 | if not self.predefined_PCT: 117 | self.PCT.load_state_dict(fix_checkpoint_key((checkpoint['color_transformer'])), strict=True) 118 | print (checkpoint['color_transformer']) 119 | print ('--- Finished loading Printer Color Transformation (%s) module' % (self.config['PrinterCT'])) 120 | else: 121 | print ('--- Printer Color Transformation (%s) module loaded from somewhere else' % (self.config['PrinterCT'])) 122 | 123 | if self.use_LCT: 124 | self.LCT.load_state_dict(fix_checkpoint_key((checkpoint['generator'])), strict=True) 125 | print ('--- Finished loading Lighting Color Transformation (%s) module' % (self.config['LightingCT'])) 126 | 127 | except Exception as e: 128 | print (e) 129 | raise IOError('Warning ---mostly this is because the model was trained using different names for its submodules. Please double check if the right model is used.') 130 | 131 | 132 | def fix_checkpoint_key(checkpoint): 133 | new_dict = {} 134 | for k, v in checkpoint.items(): 135 | # TODO: a better approach: 136 | new_dict[k.replace("module.", "")] = v 137 | return new_dict 138 | -------------------------------------------------------------------------------- /nets/AdvPatch/collaborative_advPatch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torchvision import transforms 4 | from .advPatch import AdvPatch 5 | from PIL import Image 6 | from .advPatch_util import generate_patch, generate_border_mask 7 | import os 8 | 9 | class CollaborativeAdvPatch(nn.Module): 10 | def __init__(self, config): 11 | super(CollaborativeAdvPatch, self).__init__() 12 | self.adv_patch_size = tuple(config['adv_patch_size']) 13 | self.apply_border_mask = config['apply_border_mask'] 14 | print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size)) 15 | 16 | if self.apply_border_mask: 17 | self.border_value = config['border_value'] 18 | border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5) 19 | print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value)) 20 | self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size)) 21 | 22 | self.collaborative_learning = not config['CL_pretrained'] 23 | #self.adv_patch = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2])) 24 | #self.adv_patch_near = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2])) 25 | #self.adv_patch_far = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2])) 26 | self.adv_patch = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 27 | self.adv_patch_near = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 28 | self.adv_patch_far = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 29 | 30 | # learnable weights 31 | if config.get('collaborative_weights', False): 32 | self.collaborative_weight = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid()) 33 | nn.init.constant_(self.collaborative_weight[0].weight, 10.0) 34 | nn.init.constant_(self.collaborative_weight[0].bias, -2.5) 35 | else: 36 | self.collaborative_weight = None 37 | 38 | @property 39 | def patch_size(self): 40 | return self.adv_patch_size 41 | 42 | @property 43 | def border_size(self): 44 | return self.border_size if self.apply_border_mask else 0 45 | 46 | def learnable(self): 47 | out = [self.adv_patch] if not self.collaborative_learning else \ 48 | [self.adv_patch, self.adv_patch_near, self.adv_patch_far] 49 | if self.collaborative_weight: 50 | out += [self.collaborative_weight[0].weight, self.collaborative_weight[0].bias] 51 | return out 52 | 53 | def clip(self): 54 | self.adv_patch.data.clamp_(0, 1) # keep patch in image range 55 | if self.collaborative_learning: 56 | self.adv_patch_near.data.clamp_(0, 1) 57 | self.adv_patch_far.data.clamp_(0, 1) 58 | 59 | if self.collaborative_weight: 60 | self.collaborative_weight[0].weight.data.clamp_(9.0, 11.0) 61 | self.collaborative_weight[0].bias.data.clamp_(-3.0, -2.0) 62 | 63 | def forward(self): 64 | if self.apply_border_mask: 65 | # note that nn.parameter cannot be assigned directly, so an internal change is needed 66 | self.adv_patch.data *= self.border_mask.data 67 | self.adv_patch.data += (1 - self.border_mask.data) * self.border_value 68 | 69 | if self.training: 70 | return self.adv_patch, self.adv_patch_near, self.adv_patch_far 71 | 72 | return self.adv_patch 73 | 74 | def save_patch(self, patch_path): 75 | adv_patch = self.adv_patch.detach().cpu() 76 | im = transforms.ToPILImage('RGB')(adv_patch) 77 | im.save(patch_path) 78 | 79 | if self.collaborative_learning: 80 | base_path, adv_file = os.path.split(patch_path) 81 | base_file, ext = adv_file.split('.') 82 | 83 | adv_patch_near = self.adv_patch_near.detach().cpu() 84 | im_near = transforms.ToPILImage('RGB')(adv_patch_near) 85 | im_near.save(os.path.join(base_path, base_file + '_near.' + ext)) 86 | 87 | adv_patch_far = self.adv_patch_far.detach().cpu() 88 | im_far = transforms.ToPILImage('RGB')(adv_patch_far) 89 | im_far.save(os.path.join(base_path, base_file + '_far.' + ext)) 90 | 91 | def _load_patch_image(self, patch_path): 92 | patch_img = Image.open(patch_path).convert('RGB') 93 | w, h = patch_img.size 94 | # first dim is height 95 | adv_h, adv_w = self.adv_patch_size[:2] 96 | if w != adv_w or h != adv_h: 97 | patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img) 98 | return patch_img 99 | 100 | def load_patch(self, patch_path): 101 | patch_img = self._load_patch_image(patch_path) 102 | self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img)) 103 | 104 | if self.collaborative_learning: 105 | base_path, adv_file = os.path.split(patch_path) 106 | base_file, ext = adv_file.split('.') 107 | adv_near_file = os.path.join(base_path, base_file+'_near.'+ext) 108 | if os.path.isfile(adv_near_file): 109 | near_patch_img = self._load_patch_image(adv_near_file) 110 | self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img)) 111 | 112 | adv_far_file = os.path.join(base_path, base_file+'_far.'+ext) 113 | if os.path.isfile(adv_far_file): 114 | far_patch_img = self._load_patch_image(adv_far_file) 115 | self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img)) 116 | 117 | def load_pretrained_patch(self, near_patch_path, far_patch_path): 118 | assert not self.collaborative_learning 119 | if near_patch_path is not None: 120 | near_patch_img = self._load_patch_image(near_patch_path) 121 | self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img)) 122 | print ('Loading near model from %s' % (near_patch_path)) 123 | 124 | if far_patch_path is not None: 125 | far_patch_img = self._load_patch_image(far_patch_path) 126 | self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img)) 127 | print ('Loading far model from %s' % (far_patch_path)) 128 | 129 | def create_collaborative_advPatch_model(config): 130 | return CollaborativeAdvPatch(config) 131 | -------------------------------------------------------------------------------- /detector/yolov2_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | from detector.yolov2.darknet import Darknet 7 | from detector.yolov2.utils import do_detect_1 8 | from detector.object_detector import ObjectDetector 9 | from detector.yolo_util import nms, xywh2xyxy, wrap_detection_results 10 | 11 | 12 | class YOLOV2_Detector(ObjectDetector): 13 | def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1): 14 | # load darknet 15 | # model, class_names = self._load_model(cfg_path, model_path) 16 | super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id) 17 | 18 | # skip background i.e. 0 19 | self.class_names = [name for k, name in enumerate(class_names) if k > 0] 20 | 21 | def load_model(self, cfg_path, model_path, class_names=None): 22 | darknet_model = Darknet(cfg_path) 23 | darknet_model.load_weights(model_path) 24 | darknet_model = darknet_model.eval() 25 | 26 | return darknet_model 27 | 28 | ''' 29 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 30 | scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False) 31 | outputs = self.model(scaled_images) 32 | boxes = get_region_boxes(outputs, conf_thresh, self.model.num_classes, self.model.anchors, self.model.num_anchors) 33 | if nms_thresh > 0: 34 | boxes = [nms(box, nms_thresh) for box in boxes] 35 | 36 | # convert it to coordinates with regards to the orginal sizes 37 | outputs = [] 38 | height, width = self.input_size 39 | for b in boxes: 40 | if len(b) == 0: 41 | #outputs += [torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda()] 42 | outputs += [None] 43 | else: 44 | t = torch.stack(b).cuda() 45 | t_new = t.clone() 46 | t_new[:,0] = (t[:,0] - t[:,2] / 2.0) * width 47 | t_new[:,1] = (t[:,1] - t[:,3] / 2.0) * height 48 | t_new[:,2] = (t[:,0] + t[:,2] / 2.0) * width 49 | t_new[:,3] = (t[:,1] + t[:,3] / 2.0) * height 50 | # print ('t_new', t_new) 51 | # skip classification score 52 | outputs += [t_new[:, [0,1,2,3,4,6]]] 53 | return outputs 54 | ''' 55 | 56 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 57 | _, h, w, _ = images.shape 58 | if self.test_size[0] == w and self.test_size[1] == h: 59 | scaled_images = images 60 | else: 61 | scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False) 62 | 63 | outputs = self.model(scaled_images) 64 | # print (outputs.shape) 65 | outputs = post_processing(outputs, self.model.num_classes, self.model.anchors, self.model.num_anchors, self.test_size) 66 | # From (center x, center y, width, height) to (x1, y1, x2, y2) 67 | outputs[..., :4] = xywh2xyxy(outputs[..., :4]) 68 | 69 | outputs = nms(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh) 70 | results = wrap_detection_results(outputs, self.test_size[0], self.input_size) 71 | 72 | return results 73 | 74 | # The 'detect' method is implemented differently from the one in the original implmentation of yolov2. 75 | # 'detector_detect' attempts to keep the same implementation as the original one. 76 | def detector_detect(self, img, conf_thresh, nms_thresh): 77 | batch, h, w, _ = img.shape 78 | if self.test_size[0] == w and self.test_size[1] == h: 79 | scaled_img = img 80 | else: 81 | scaled_img = F.interpolate(img, size=self.test_size, mode='bilinear', align_corners=False) 82 | 83 | outputs = do_detect_1(self.model, scaled_img, conf_thresh, nms_thresh) 84 | if not outputs: 85 | return [[None]] * batch 86 | 87 | for item in outputs: 88 | item[:4] = xywh2xyxy(item[:4]) 89 | item[:4] *= self.test_size[0] 90 | outputs = [torch.stack(outputs, dim=0)] 91 | results = wrap_detection_results(outputs, self.test_size[0], self.input_size) 92 | # resize 93 | return results 94 | 95 | def post_processing(output, num_classes, anchors, num_anchors, test_size): 96 | # anchor_step = len(anchors)/num_anchors 97 | FloatTensor = torch.cuda.FloatTensor if output.is_cuda else torch.FloatTensor 98 | 99 | anchor_step = len(anchors) // num_anchors 100 | if output.dim() == 3: 101 | output = output.unsqueeze(0) 102 | 103 | batch, _, h, w = output.shape 104 | assert (output.size(1) == (5 + num_classes) * num_anchors) 105 | 106 | #print(output.size()) 107 | output = output.view(batch * num_anchors, 5 + num_classes, h * w) 108 | #print(output.size()) 109 | output = output.transpose(1, 2).contiguous() 110 | #print(output.size()) 111 | output = output.view(batch * num_anchors * h * w, 5 + num_classes) 112 | #print(output.size()) 113 | 114 | # Get outputs 115 | x = torch.sigmoid(output[..., 0]) # Center x 116 | y = torch.sigmoid(output[..., 1]) # Center y 117 | pred_conf = torch.sigmoid(output[..., 4]) # Conf 118 | pred_cls = torch.sigmoid(output[..., 5:]) # Cls pred. 119 | 120 | # print(output.size()) 121 | grid_x = torch.linspace(0, w - 1, w).repeat(h, 1).repeat(batch * num_anchors, 1, 1).view( 122 | batch * num_anchors * h * w).cuda() 123 | grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view( 124 | batch * num_anchors * h * w).cuda() 125 | xs = x + grid_x 126 | ys = y + grid_y 127 | 128 | anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0])) 129 | anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1])) 130 | anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda() 131 | anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda() 132 | ws = torch.exp(output[..., 2]) * anchor_w 133 | hs = torch.exp(output[..., 3]) * anchor_h 134 | 135 | iw, ih = test_size 136 | output = torch.cat( 137 | ( 138 | xs.view(batch, -1, 1) / w * iw, 139 | ys.view(batch, -1, 1) / h * ih, 140 | ws.view(batch, -1, 1) / w * iw, 141 | hs.view(batch, -1, 1) / h * ih, 142 | pred_conf.view(batch, -1, 1), 143 | pred_cls.view(batch, -1, num_classes), 144 | ), 145 | dim=2, 146 | ) 147 | #print (output.shape) 148 | return output 149 | -------------------------------------------------------------------------------- /pytorch_msssim/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from math import exp 4 | import numpy as np 5 | 6 | 7 | def gaussian(window_size, sigma): 8 | gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) 9 | return gauss/gauss.sum() 10 | 11 | 12 | def create_window(window_size, channel=1): 13 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 14 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 15 | window = _2D_window.expand(channel, 1, window_size, window_size).contiguous() 16 | return window 17 | 18 | 19 | def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None): 20 | # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh). 21 | if val_range is None: 22 | if torch.max(img1) > 128: 23 | max_val = 255 24 | else: 25 | max_val = 1 26 | 27 | if torch.min(img1) < -0.5: 28 | min_val = -1 29 | else: 30 | min_val = 0 31 | L = max_val - min_val 32 | else: 33 | L = val_range 34 | 35 | padd = 0 36 | (_, channel, height, width) = img1.size() 37 | if window is None: 38 | real_size = min(window_size, height, width) 39 | window = create_window(real_size, channel=channel).to(img1.device) 40 | 41 | mu1 = F.conv2d(img1, window, padding=padd, groups=channel) 42 | mu2 = F.conv2d(img2, window, padding=padd, groups=channel) 43 | 44 | mu1_sq = mu1.pow(2) 45 | mu2_sq = mu2.pow(2) 46 | mu1_mu2 = mu1 * mu2 47 | 48 | sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq 49 | sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq 50 | sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2 51 | 52 | C1 = (0.01 * L) ** 2 53 | C2 = (0.03 * L) ** 2 54 | 55 | v1 = 2.0 * sigma12 + C2 56 | v2 = sigma1_sq + sigma2_sq + C2 57 | cs = torch.mean(v1 / v2) # contrast sensitivity 58 | 59 | ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2) 60 | 61 | if size_average: 62 | ret = ssim_map.mean() 63 | else: 64 | ret = ssim_map.mean(1).mean(1).mean(1) 65 | 66 | if full: 67 | return ret, cs 68 | return ret 69 | 70 | 71 | def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=False): 72 | device = img1.device 73 | weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device) 74 | levels = weights.size()[0] 75 | mssim = [] 76 | mcs = [] 77 | for _ in range(levels): 78 | sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range) 79 | mssim.append(sim) 80 | mcs.append(cs) 81 | 82 | img1 = F.avg_pool2d(img1, (2, 2)) 83 | img2 = F.avg_pool2d(img2, (2, 2)) 84 | 85 | mssim = torch.stack(mssim) 86 | mcs = torch.stack(mcs) 87 | 88 | # Normalize (to avoid NaNs during training unstable models, not compliant with original definition) 89 | if normalize: 90 | mssim = (mssim + 1) / 2 91 | mcs = (mcs + 1) / 2 92 | 93 | pow1 = mcs ** weights 94 | pow2 = mssim ** weights 95 | # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/ 96 | output = torch.prod(pow1[:-1] * pow2[-1]) 97 | return output 98 | 99 | 100 | # normalized cross correlation 101 | def ncc(img1, img2, window_size=11, window=None, size_average=True): 102 | padd = 0 103 | (_, channel, height, width) = img1.size() 104 | if window is None: 105 | real_size = min(window_size, height, width) 106 | window = create_window(real_size, channel=channel).to(img1.device) 107 | 108 | mu1 = F.conv2d(img1, window, padding=padd, groups=channel) 109 | mu2 = F.conv2d(img2, window, padding=padd, groups=channel) 110 | 111 | mu1_sq = mu1.pow(2) 112 | mu2_sq = mu2.pow(2) 113 | mu1_mu2 = mu1 * mu2 114 | 115 | sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq 116 | sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq 117 | sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2 118 | print (sigma1_sq.shape) 119 | print (sigma1_sq.mean(), sigma2_sq.mean(), sigma12.mean()) 120 | ncc = sigma12 / (torch.sqrt(sigma1_sq * sigma2_sq) + 0.0000001) 121 | print (ncc) 122 | if size_average: 123 | ret = ncc.mean() 124 | print (ret) 125 | else: 126 | ret = ncc.mean(1).mean(1).mean(1) 127 | 128 | return ret 129 | 130 | # Classes to re-use window 131 | class SSIM(torch.nn.Module): 132 | def __init__(self, window_size=11, size_average=True, val_range=None): 133 | super(SSIM, self).__init__() 134 | self.window_size = window_size 135 | self.size_average = size_average 136 | self.val_range = val_range 137 | 138 | # Assume 1 channel for SSIM 139 | self.channel = 1 140 | self.window = create_window(window_size) 141 | 142 | def forward(self, img1, img2): 143 | (_, channel, _, _) = img1.size() 144 | 145 | if channel == self.channel and self.window.dtype == img1.dtype: 146 | window = self.window 147 | else: 148 | window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype) 149 | self.window = window 150 | self.channel = channel 151 | 152 | return ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average) 153 | 154 | class MSSSIM(torch.nn.Module): 155 | def __init__(self, window_size=11, size_average=True, channel=3): 156 | super(MSSSIM, self).__init__() 157 | self.window_size = window_size 158 | self.size_average = size_average 159 | self.channel = channel 160 | 161 | def forward(self, img1, img2): 162 | # TODO: store window between calls if possible 163 | return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average) 164 | 165 | # Classes to re-use window 166 | class NCC(torch.nn.Module): 167 | def __init__(self, window_size=11, size_average=True): 168 | super(NCC, self).__init__() 169 | self.window_size = window_size 170 | self.size_average = size_average 171 | 172 | # Assume 1 channel for SSIM 173 | self.channel = 1 174 | self.window = create_window(window_size) 175 | 176 | def forward(self, img1, img2): 177 | (_, channel, _, _) = img1.size() 178 | 179 | if channel == self.channel and self.window.dtype == img1.dtype: 180 | window = self.window 181 | else: 182 | window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype) 183 | self.window = window 184 | self.channel = channel 185 | 186 | return ncc(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average) 187 | -------------------------------------------------------------------------------- /detector/yolov3/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-YOLOv3 2 | A minimal PyTorch implementation of YOLOv3, with support for training, inference and evaluation. 3 | 4 | ## Installation 5 | ##### Clone and install requirements 6 | $ git clone https://github.com/eriklindernoren/PyTorch-YOLOv3 7 | $ cd PyTorch-YOLOv3/ 8 | $ sudo pip3 install -r requirements.txt 9 | 10 | ##### Download pretrained weights 11 | $ cd weights/ 12 | $ bash download_weights.sh 13 | 14 | ##### Download COCO 15 | $ cd data/ 16 | $ bash get_coco_dataset.sh 17 | 18 | ## Test 19 | Evaluates the model on COCO test. 20 | 21 | $ python3 test.py --weights_path weights/yolov3.weights 22 | 23 | | Model | mAP (min. 50 IoU) | 24 | | ----------------------- |:-----------------:| 25 | | YOLOv3 608 (paper) | 57.9 | 26 | | YOLOv3 608 (this impl.) | 57.3 | 27 | | YOLOv3 416 (paper) | 55.3 | 28 | | YOLOv3 416 (this impl.) | 55.5 | 29 | 30 | ## Inference 31 | Uses pretrained weights to make predictions on images. Below table displays the inference times when using as inputs images scaled to 256x256. The ResNet backbone measurements are taken from the YOLOv3 paper. The Darknet-53 measurement marked shows the inference time of this implementation on my 1080ti card. 32 | 33 | | Backbone | GPU | FPS | 34 | | ----------------------- |:--------:|:--------:| 35 | | ResNet-101 | Titan X | 53 | 36 | | ResNet-152 | Titan X | 37 | 37 | | Darknet-53 (paper) | Titan X | 76 | 38 | | Darknet-53 (this impl.) | 1080ti | 74 | 39 | 40 | $ python3 detect.py --image_folder data/samples/ 41 | 42 |

43 |

44 |

45 |

46 | 47 | ## Train 48 | ``` 49 | $ train.py [-h] [--epochs EPOCHS] [--batch_size BATCH_SIZE] 50 | [--gradient_accumulations GRADIENT_ACCUMULATIONS] 51 | [--model_def MODEL_DEF] [--data_config DATA_CONFIG] 52 | [--pretrained_weights PRETRAINED_WEIGHTS] [--n_cpu N_CPU] 53 | [--img_size IMG_SIZE] 54 | [--checkpoint_interval CHECKPOINT_INTERVAL] 55 | [--evaluation_interval EVALUATION_INTERVAL] 56 | [--compute_map COMPUTE_MAP] 57 | [--multiscale_training MULTISCALE_TRAINING] 58 | ``` 59 | 60 | #### Example (COCO) 61 | To train on COCO using a Darknet-53 backend pretrained on ImageNet run: 62 | ``` 63 | $ python3 train.py --data_config config/coco.data --pretrained_weights weights/darknet53.conv.74 64 | ``` 65 | 66 | #### Training log 67 | ``` 68 | ---- [Epoch 7/100, Batch 7300/14658] ---- 69 | +------------+--------------+--------------+--------------+ 70 | | Metrics | YOLO Layer 0 | YOLO Layer 1 | YOLO Layer 2 | 71 | +------------+--------------+--------------+--------------+ 72 | | grid_size | 16 | 32 | 64 | 73 | | loss | 1.554926 | 1.446884 | 1.427585 | 74 | | x | 0.028157 | 0.044483 | 0.051159 | 75 | | y | 0.040524 | 0.035687 | 0.046307 | 76 | | w | 0.078980 | 0.066310 | 0.027984 | 77 | | h | 0.133414 | 0.094540 | 0.037121 | 78 | | conf | 1.234448 | 1.165665 | 1.223495 | 79 | | cls | 0.039402 | 0.040198 | 0.041520 | 80 | | cls_acc | 44.44% | 43.59% | 32.50% | 81 | | recall50 | 0.361111 | 0.384615 | 0.300000 | 82 | | recall75 | 0.222222 | 0.282051 | 0.300000 | 83 | | precision | 0.520000 | 0.300000 | 0.070175 | 84 | | conf_obj | 0.599058 | 0.622685 | 0.651472 | 85 | | conf_noobj | 0.003778 | 0.004039 | 0.004044 | 86 | +------------+--------------+--------------+--------------+ 87 | Total Loss 4.429395 88 | ---- ETA 0:35:48.821929 89 | ``` 90 | 91 | #### Tensorboard 92 | Track training progress in Tensorboard: 93 | * Initialize training 94 | * Run the command below 95 | * Go to http://localhost:6006/ 96 | 97 | ``` 98 | $ tensorboard --logdir='logs' --port=6006 99 | ``` 100 | 101 | ## Train on Custom Dataset 102 | 103 | #### Custom model 104 | Run the commands below to create a custom model definition, replacing `` with the number of classes in your dataset. 105 | 106 | ``` 107 | $ cd config/ # Navigate to config dir 108 | $ bash create_custom_model.sh # Will create custom model 'yolov3-custom.cfg' 109 | ``` 110 | 111 | #### Classes 112 | Add class names to `data/custom/classes.names`. This file should have one row per class name. 113 | 114 | #### Image Folder 115 | Move the images of your dataset to `data/custom/images/`. 116 | 117 | #### Annotation Folder 118 | Move your annotations to `data/custom/labels/`. The dataloader expects that the annotation file corresponding to the image `data/custom/images/train.jpg` has the path `data/custom/labels/train.txt`. Each row in the annotation file should define one bounding box, using the syntax `label_idx x_center y_center width height`. The coordinates should be scaled `[0, 1]`, and the `label_idx` should be zero-indexed and correspond to the row number of the class name in `data/custom/classes.names`. 119 | 120 | #### Define Train and Validation Sets 121 | In `data/custom/train.txt` and `data/custom/valid.txt`, add paths to images that will be used as train and validation data respectively. 122 | 123 | #### Train 124 | To train on the custom dataset run: 125 | 126 | ``` 127 | $ python3 train.py --model_def config/yolov3-custom.cfg --data_config config/custom.data 128 | ``` 129 | 130 | Add `--pretrained_weights weights/darknet53.conv.74` to train using a backend pretrained on ImageNet. 131 | 132 | 133 | ## Credit 134 | 135 | ### YOLOv3: An Incremental Improvement 136 | _Joseph Redmon, Ali Farhadi_
137 | 138 | **Abstract**
139 | We present some updates to YOLO! We made a bunch 140 | of little design changes to make it better. We also trained 141 | this new network that’s pretty swell. It’s a little bigger than 142 | last time but more accurate. It’s still fast though, don’t 143 | worry. At 320 × 320 YOLOv3 runs in 22 ms at 28.2 mAP, 144 | as accurate as SSD but three times faster. When we look 145 | at the old .5 IOU mAP detection metric YOLOv3 is quite 146 | good. It achieves 57.9 AP50 in 51 ms on a Titan X, compared 147 | to 57.5 AP50 in 198 ms by RetinaNet, similar performance 148 | but 3.8× faster. As always, all the code is online at 149 | https://pjreddie.com/yolo/. 150 | 151 | [[Paper]](https://pjreddie.com/media/files/papers/YOLOv3.pdf) [[Project Webpage]](https://pjreddie.com/darknet/yolo/) [[Authors' Implementation]](https://github.com/pjreddie/darknet) 152 | 153 | ``` 154 | @article{yolov3, 155 | title={YOLOv3: An Incremental Improvement}, 156 | author={Redmon, Joseph and Farhadi, Ali}, 157 | journal = {arXiv}, 158 | year={2018} 159 | } 160 | ``` 161 | -------------------------------------------------------------------------------- /losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import torch 4 | from torch import nn 5 | from torch import Tensor 6 | 7 | class _Loss(nn.Module): 8 | reduction: str 9 | 10 | def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None: 11 | super(_Loss, self).__init__() 12 | if size_average is not None or reduce is not None: 13 | self.reduction = _Reduction.legacy_get_string(size_average, reduce) 14 | else: 15 | self.reduction = reduction 16 | 17 | class SmoothL1Loss(_Loss): 18 | """Creates a criterion that uses a squared term if the absolute 19 | element-wise error falls below beta and an L1 term otherwise. 20 | It is less sensitive to outliers than the `MSELoss` and in some cases 21 | prevents exploding gradients (e.g. see `Fast R-CNN` paper by Ross Girshick). 22 | Also known as the Huber loss: 23 | 24 | .. math:: 25 | \text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i} 26 | 27 | where :math:`z_{i}` is given by: 28 | 29 | .. math:: 30 | z_{i} = 31 | \begin{cases} 32 | 0.5 (x_i - y_i)^2 / beta, & \text{if } |x_i - y_i| < beta \\ 33 | |x_i - y_i| - 0.5 * beta, & \text{otherwise } 34 | \end{cases} 35 | 36 | :math:`x` and :math:`y` arbitrary shapes with a total of :math:`n` elements each 37 | the sum operation still operates over all the elements, and divides by :math:`n`. 38 | 39 | beta is an optional parameter that defaults to 1. 40 | 41 | Note: When beta is set to 0, this is equivalent to :class:`L1Loss`. 42 | Passing a negative value in for beta will result in an exception. 43 | 44 | The division by :math:`n` can be avoided if sets ``reduction = 'sum'``. 45 | 46 | Args: 47 | size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 48 | the losses are averaged over each loss element in the batch. Note that for 49 | some losses, there are multiple elements per sample. If the field :attr:`size_average` 50 | is set to ``False``, the losses are instead summed for each minibatch. Ignored 51 | when reduce is ``False``. Default: ``True`` 52 | reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 53 | losses are averaged or summed over observations for each minibatch depending 54 | on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 55 | batch element instead and ignores :attr:`size_average`. Default: ``True`` 56 | reduction (string, optional): Specifies the reduction to apply to the output: 57 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 58 | ``'mean'``: the sum of the output will be divided by the number of 59 | elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 60 | and :attr:`reduce` are in the process of being deprecated, and in the meantime, 61 | specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 62 | beta (float, optional): Specifies the threshold at which to change between L1 and L2 loss. 63 | This value defaults to 1.0. 64 | 65 | Shape: 66 | - Input: :math:`(N, *)` where :math:`*` means, any number of additional 67 | dimensions 68 | - Target: :math:`(N, *)`, same shape as the input 69 | - Output: scalar. If :attr:`reduction` is ``'none'``, then 70 | :math:`(N, *)`, same shape as the input 71 | 72 | """ 73 | 74 | __constants__ = ['reduction'] 75 | 76 | def __init__(self, size_average=None, reduce=None, reduction: str = 'mean', beta: float = 1.0) -> None: 77 | super(SmoothL1Loss, self).__init__(size_average, reduce, reduction) 78 | self.beta = beta 79 | 80 | def forward(self, input: Tensor, target: Tensor) -> Tensor: 81 | return smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta) 82 | 83 | 84 | def smooth_l1_loss( 85 | input: torch.Tensor, target: torch.Tensor, beta: float, reduction: str = "none" 86 | ) -> torch.Tensor: 87 | """ 88 | Smooth L1 loss defined in the Fast R-CNN paper as: 89 | | 0.5 * x ** 2 / beta if abs(x) < beta 90 | smoothl1(x) = | 91 | | abs(x) - 0.5 * beta otherwise, 92 | where x = input - target. 93 | Smooth L1 loss is related to Huber loss, which is defined as: 94 | | 0.5 * x ** 2 if abs(x) < beta 95 | huber(x) = | 96 | | beta * (abs(x) - 0.5 * beta) otherwise 97 | Smooth L1 loss is equal to huber(x) / beta. This leads to the following 98 | differences: 99 | - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss 100 | converges to a constant 0 loss. 101 | - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss 102 | converges to L2 loss. 103 | - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant 104 | slope of 1. For Huber loss, the slope of the L1 segment is beta. 105 | Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta 106 | portion replaced with a quadratic function such that at abs(x) = beta, its 107 | slope is 1. The quadratic segment smooths the L1 loss near x = 0. 108 | Args: 109 | input (Tensor): input tensor of any shape 110 | target (Tensor): target value tensor with the same shape as input 111 | beta (float): L1 to L2 change point. 112 | For beta values < 1e-5, L1 loss is computed. 113 | reduction: 'none' | 'mean' | 'sum' 114 | 'none': No reduction will be applied to the output. 115 | 'mean': The output will be averaged. 116 | 'sum': The output will be summed. 117 | Returns: 118 | The loss with the reduction option applied. 119 | Note: 120 | PyTorch's builtin "Smooth L1 loss" implementation does not actually 121 | implement Smooth L1 loss, nor does it implement Huber loss. It implements 122 | the special case of both in which they are equal (beta=1). 123 | See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss. 124 | """ 125 | if beta < 1e-5: 126 | # if beta == 0, then torch.where will result in nan gradients when 127 | # the chain rule is applied due to pytorch implementation details 128 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 129 | # zeros, rather than "no gradient"). To avoid this issue, we define 130 | # small values of beta to be exactly l1 loss. 131 | loss = torch.abs(input - target) 132 | else: 133 | n = torch.abs(input - target) 134 | cond = n < beta 135 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 136 | 137 | if reduction == "mean": 138 | loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() 139 | elif reduction == "sum": 140 | loss = loss.sum() 141 | return loss 142 | -------------------------------------------------------------------------------- /detector/yolov3/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.logger import * 5 | from utils.utils import * 6 | from utils.datasets import * 7 | from utils.parse_config import * 8 | from test import evaluate 9 | 10 | from terminaltables import AsciiTable 11 | 12 | import os 13 | import sys 14 | import time 15 | import datetime 16 | import argparse 17 | 18 | import torch 19 | from torch.utils.data import DataLoader 20 | from torchvision import datasets 21 | from torchvision import transforms 22 | from torch.autograd import Variable 23 | import torch.optim as optim 24 | 25 | if __name__ == "__main__": 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument("--epochs", type=int, default=100, help="number of epochs") 28 | parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch") 29 | parser.add_argument("--gradient_accumulations", type=int, default=2, help="number of gradient accums before step") 30 | parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file") 31 | parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file") 32 | parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model") 33 | parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation") 34 | parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") 35 | parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights") 36 | parser.add_argument("--evaluation_interval", type=int, default=1, help="interval evaluations on validation set") 37 | parser.add_argument("--compute_map", default=False, help="if True computes mAP every tenth batch") 38 | parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training") 39 | opt = parser.parse_args() 40 | print(opt) 41 | 42 | logger = Logger("logs") 43 | 44 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 45 | 46 | os.makedirs("output", exist_ok=True) 47 | os.makedirs("checkpoints", exist_ok=True) 48 | 49 | # Get data configuration 50 | data_config = parse_data_config(opt.data_config) 51 | train_path = data_config["train"] 52 | valid_path = data_config["valid"] 53 | class_names = load_classes(data_config["names"]) 54 | 55 | # Initiate model 56 | model = Darknet(opt.model_def).to(device) 57 | model.apply(weights_init_normal) 58 | 59 | # If specified we start from checkpoint 60 | if opt.pretrained_weights: 61 | if opt.pretrained_weights.endswith(".pth"): 62 | model.load_state_dict(torch.load(opt.pretrained_weights)) 63 | else: 64 | model.load_darknet_weights(opt.pretrained_weights) 65 | 66 | # Get dataloader 67 | dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training) 68 | dataloader = torch.utils.data.DataLoader( 69 | dataset, 70 | batch_size=opt.batch_size, 71 | shuffle=True, 72 | num_workers=opt.n_cpu, 73 | pin_memory=True, 74 | collate_fn=dataset.collate_fn, 75 | ) 76 | 77 | optimizer = torch.optim.Adam(model.parameters()) 78 | 79 | metrics = [ 80 | "grid_size", 81 | "loss", 82 | "x", 83 | "y", 84 | "w", 85 | "h", 86 | "conf", 87 | "cls", 88 | "cls_acc", 89 | "recall50", 90 | "recall75", 91 | "precision", 92 | "conf_obj", 93 | "conf_noobj", 94 | ] 95 | 96 | for epoch in range(opt.epochs): 97 | model.train() 98 | start_time = time.time() 99 | for batch_i, (_, imgs, targets) in enumerate(dataloader): 100 | batches_done = len(dataloader) * epoch + batch_i 101 | 102 | imgs = Variable(imgs.to(device)) 103 | targets = Variable(targets.to(device), requires_grad=False) 104 | 105 | loss, outputs = model(imgs, targets) 106 | loss.backward() 107 | 108 | if batches_done % opt.gradient_accumulations: 109 | # Accumulates gradient before each step 110 | optimizer.step() 111 | optimizer.zero_grad() 112 | 113 | # ---------------- 114 | # Log progress 115 | # ---------------- 116 | 117 | log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, opt.epochs, batch_i, len(dataloader)) 118 | 119 | metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(model.yolo_layers))]]] 120 | 121 | # Log metrics at each YOLO layer 122 | for i, metric in enumerate(metrics): 123 | formats = {m: "%.6f" for m in metrics} 124 | formats["grid_size"] = "%2d" 125 | formats["cls_acc"] = "%.2f%%" 126 | row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in model.yolo_layers] 127 | metric_table += [[metric, *row_metrics]] 128 | 129 | # Tensorboard logging 130 | tensorboard_log = [] 131 | for j, yolo in enumerate(model.yolo_layers): 132 | for name, metric in yolo.metrics.items(): 133 | if name != "grid_size": 134 | tensorboard_log += [(f"{name}_{j+1}", metric)] 135 | tensorboard_log += [("loss", loss.item())] 136 | logger.list_of_scalars_summary(tensorboard_log, batches_done) 137 | 138 | log_str += AsciiTable(metric_table).table 139 | log_str += f"\nTotal loss {loss.item()}" 140 | 141 | # Determine approximate time left for epoch 142 | epoch_batches_left = len(dataloader) - (batch_i + 1) 143 | time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1)) 144 | log_str += f"\n---- ETA {time_left}" 145 | 146 | print(log_str) 147 | 148 | model.seen += imgs.size(0) 149 | 150 | if epoch % opt.evaluation_interval == 0: 151 | print("\n---- Evaluating Model ----") 152 | # Evaluate the model on the validation set 153 | precision, recall, AP, f1, ap_class = evaluate( 154 | model, 155 | path=valid_path, 156 | iou_thres=0.5, 157 | conf_thres=0.5, 158 | nms_thres=0.5, 159 | img_size=opt.img_size, 160 | batch_size=8, 161 | ) 162 | evaluation_metrics = [ 163 | ("val_precision", precision.mean()), 164 | ("val_recall", recall.mean()), 165 | ("val_mAP", AP.mean()), 166 | ("val_f1", f1.mean()), 167 | ] 168 | logger.list_of_scalars_summary(evaluation_metrics, epoch) 169 | 170 | # Print class APs and mAP 171 | ap_table = [["Index", "Class name", "AP"]] 172 | for i, c in enumerate(ap_class): 173 | ap_table += [[c, class_names[c], "%.5f" % AP[i]]] 174 | print(AsciiTable(ap_table).table) 175 | print(f"---- mAP {AP.mean()}") 176 | 177 | if epoch % opt.checkpoint_interval == 0: 178 | torch.save(model.state_dict(), f"checkpoints/yolov3_ckpt_%d.pth" % epoch) 179 | -------------------------------------------------------------------------------- /eval_advPath_oneperson.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import json 5 | import numpy as np 6 | import glob 7 | 8 | # change this to your own robust detector (i.e. a model with 100% detection accuracy) 9 | ROBUST_DETECTOR_NAME = 'DFaster_RCNN_R101_COCO' 10 | 11 | def single_bbox_iou(box1, box2, x1y1x2y2=True): 12 | """ 13 | Returns the IoU of two bounding boxes 14 | """ 15 | if not x1y1x2y2: 16 | # Transform from center and width to exact coordinates 17 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 18 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 19 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 20 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 21 | else: 22 | # Get the coordinates of bounding boxes 23 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 24 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 25 | 26 | # get the corrdinates of the intersection rectangle 27 | inter_rect_x1 = max(b1_x1, b2_x1) 28 | inter_rect_y1 = max(b1_y1, b2_y1) 29 | inter_rect_x2 = min(b1_x2, b2_x2) 30 | inter_rect_y2 = min(b1_y2, b2_y2) 31 | 32 | # Intersection area 33 | inter_area = max(inter_rect_x2 - inter_rect_x1 + 1, 0.0) * max(inter_rect_y2 - inter_rect_y1 + 1, 0.0) 34 | # Union Area 35 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 36 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 37 | 38 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 39 | 40 | return iou 41 | 42 | #remove false alarms 43 | def prune_detection(detection): 44 | person_num = len(detection) 45 | if person_num == 0: 46 | return detection 47 | 48 | if person_num == 1: 49 | return detection[0] 50 | 51 | areas =[(det[2] - det[0]) * (det[3] - det[1]) for det in detection] 52 | I = sorted(range(len(areas)), key=lambda k: areas[k], reverse=True) 53 | #print (areas, I) 54 | return detection[I[0]] 55 | 56 | def is_matched(gt_bb, detections, det_thresh, match_thresh): 57 | for det in detections: 58 | if det[4] > det_thresh and single_bbox_iou(gt_bb, det[:4]) >= match_thresh: 59 | return True 60 | 61 | return False 62 | 63 | def match_results(gt_detections, detections, det_thresh, match_thresh, skip_num, skip_info=False): 64 | success = 0 65 | total_cnt = 0 66 | cut_skip = 0 67 | detection_skip = 0 68 | frame_cnt = 0 69 | for frame_num, det in gt_detections.items(): 70 | frame_cnt += 1 71 | if int(frame_num.split('_')[-1]) < skip_num: # exclude it 72 | cut_skip += 1 73 | if skip_info: 74 | print('skip %, index < %d ' % (frame_num, skip_num)) 75 | continue 76 | 77 | # det is a list of list 78 | pruned_det = prune_detection(det) 79 | person_num = len(pruned_det) 80 | if person_num <= 0: 81 | detection_skip += 1 82 | if skip_info: 83 | print ('skip %s, person_num: %d' % (frame_num, person_num)) 84 | continue 85 | 86 | if pruned_det and is_matched(pruned_det[:4], detections[frame_num], det_thresh, match_thresh): 87 | success += 1 88 | 89 | total_cnt += 1 90 | return success, total_cnt, cut_skip, detection_skip, frame_cnt 91 | 92 | def evaluate_adv_model(data_dir, data_list, attack_model, det_thresh=0.7, match_thresh=0.1, skip_num=0, skip_info=False): 93 | gt_files = get_file_list(data_dir, ROBUST_DETECTOR_NAME, data_list) 94 | gt_detections = [ load_detection(item) for item in gt_files ] 95 | 96 | detection_files = get_file_list(data_dir, attack_model, data_list) 97 | detections = [load_detection(item) for item in detection_files] 98 | 99 | matching_results = [match_results(gt, detection, det_thresh, match_thresh, skip_num=skip_num, \ 100 | skip_info=skip_info) for gt, detection in zip(gt_detections, detections)] 101 | 102 | return matching_results 103 | 104 | def load_detection(filename): 105 | with open(filename, 'r') as f: 106 | data = json.load(f) 107 | return data 108 | 109 | def get_file_list(data_dir, model, data_list): 110 | return [os.path.join(data_dir, model +'+'+ item + '.json') for item in data_list] 111 | 112 | def get_dataset_list(data_dir, detector): 113 | # print (adv_patches) 114 | dataset_list = glob.glob(data_dir+'/*.json') 115 | dataset_list = [os.path.basename(item).split('.')[0] for item in dataset_list if detector in item] 116 | dataset_list = [item.split('+')[-1] for item in dataset_list] 117 | #for adv_patch in adv_patches: 118 | # dataset_list = [item for item in dataset_list if adv_patch in item.split('_')] 119 | # dataset_list = [item for item in dataset_list if adv_patch in item.split('_')] 120 | return dataset_list 121 | 122 | def arg_parser(): 123 | parser = argparse.ArgumentParser(description='PyTorch evaluation') 124 | parser.add_argument('--data_list', help="a list of video files") 125 | parser.add_argument('--data_dir', type=str, default='../../ICLR_detection_results', help="where are the detection results") 126 | parser.add_argument('--adv_patch', help='adversarial patch') 127 | parser.add_argument('--victim_model', type=str, help='victim model') 128 | parser.add_argument('--skip_num', type=int, default=0, help='how many frames to be skipped') 129 | parser.add_argument('--skip_info', dest='skip_info', action='store_true', help='print skip info') 130 | parser.add_argument('--detection_thresh', dest='detection_thresh', type=float, default=0.7, help='threshold for detection_score') 131 | 132 | return parser 133 | 134 | def main(): 135 | global args 136 | parser = arg_parser() 137 | args = parser.parse_args() 138 | adv_patches = args.adv_patch.split(',') 139 | all_dataset_list = get_dataset_list(args.data_dir, args.victim_model) if args.data_list is None else args.data_list.split(',') 140 | #print (all_dataset_list) 141 | #dataset_list = [item for item in dataset_list if 'PCTN' not in item] 142 | for adv_patch in sorted(adv_patches): 143 | print('\n======== %s =================' % (adv_patch)) 144 | 145 | #dataset_list = get_dataset_list(args.data_dir, args.victim_model, adv_patch) 146 | dataset_list = [item for item in all_dataset_list if adv_patch in item.split('_')] 147 | 148 | assert dataset_list, 'no dataset found. please check the detetor name and results directory!' 149 | 150 | # determine the model to be evaluated for the data list 151 | MATCH_THRESH = 0.1 152 | results = evaluate_adv_model(args.data_dir, dataset_list, args.victim_model, det_thresh=args.detection_thresh, match_thresh=MATCH_THRESH, \ 153 | skip_num=args.skip_num, skip_info=args.skip_info) 154 | results = np.array(results) 155 | tot_results = np.sum(results, axis=0) 156 | for dataset, r in sorted(zip(dataset_list, results), key=lambda t: t[0]): 157 | print ('%10s ASR %4.2f Detected: %3d Processed: %3d Cut skip: %3d Detection skip: %3d Total: %3d' % \ 158 | (dataset, (1.0 - r[0]/r[1]), r[0], r[1], r[2], r[3], r[4])) 159 | print ('----------------------------------------------') 160 | print ('%10s ASR %4.2f Detected: %3d Processed: %3d Cut skip: %3d Detection skip: %3d Total: %3d' % \ 161 | ('All', (1.0 - tot_results[0]/tot_results[1]), tot_results[0], tot_results[1], tot_results[2], tot_results[3], tot_results[4])) 162 | 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /detector/faster_rcnn_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .object_detector import ObjectDetector 4 | from .faster_rcnn.lib.model.utils.config import cfg, cfg_from_file 5 | from .faster_rcnn.lib.model.faster_rcnn.vgg16 import vgg16 6 | from .faster_rcnn.lib.model.faster_rcnn.resnet import resnet 7 | from .faster_rcnn.lib.model.roi_layers import nms 8 | from .faster_rcnn.lib.model.rpn.bbox_transform import bbox_transform_inv 9 | from .faster_rcnn.lib.model.rpn.bbox_transform import clip_boxes 10 | import numpy as np 11 | 12 | class Faster_RCNN_Detector(ObjectDetector): 13 | def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1): 14 | # load SSD 15 | super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id) 16 | 17 | self.mean = cfg.PIXEL_MEANS[0][0].tolist() 18 | # self.test_size = cfg.TEST.SCALES 19 | self.cfg = cfg 20 | 21 | def load_model(self, cfg_path, model_path, class_names): 22 | cfg_from_file(cfg_path) 23 | # fixed 24 | cfg.POOLING_MODE = 'align' 25 | cfg.class_agnostic = False 26 | 27 | obj_classes = np.asarray(class_names) 28 | # initilize the network here. 29 | if cfg.EXP_DIR == 'vgg16': 30 | fasterRCNN = vgg16(obj_classes, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS) 31 | elif cfg.EXP_DIR == 'res50': 32 | fasterRCNN = resnet(obj_classes, 50, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS) 33 | elif cfg.EXP_DIR == 'res101': 34 | fasterRCNN = resnet(obj_classes, 101, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS) 35 | elif cfg.EXP_DIR == 'res152': 36 | fasterRCNN = resnet(obj_classes, 152, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS) 37 | else: 38 | raise NameError("network %s is not defined" % (cfg.EXP_DIR) ) 39 | 40 | fasterRCNN.create_architecture() 41 | 42 | checkpoint = torch.load(model_path, map_location=(lambda storage, loc: storage)) 43 | fasterRCNN.load_state_dict(checkpoint['model']) 44 | 45 | fasterRCNN.eval() 46 | 47 | return fasterRCNN 48 | 49 | def detect(self, images, conf_thresh=0.2, nms_thresh=0.0): 50 | 51 | input_imgs, im_scale = self.preprocess(images) 52 | 53 | batch_size = input_imgs.size(0) 54 | with torch.no_grad(): 55 | im_info = np.array([[input_imgs.shape[2], input_imgs.shape[3], im_scale[0]]], dtype=np.float32) 56 | im_info = np.repeat(im_info, batch_size, axis=0) 57 | im_info = torch.from_numpy(im_info).cuda(device=images.device) 58 | num_boxes = torch.zeros(batch_size).cuda(device=images.device) 59 | gt_boxes = torch.zeros(batch_size, 1, 5).cuda(device=images.device) 60 | 61 | rois, cls_prob, bbox_pred, \ 62 | rpn_loss_cls, rpn_loss_box, \ 63 | RCNN_loss_cls, RCNN_loss_bbox, \ 64 | rois_label = self.model(input_imgs, im_info, gt_boxes, num_boxes) 65 | 66 | scores = cls_prob 67 | boxes = rois[:, :, 1:5] 68 | results = self.post_process(im_info, bbox_pred, scores, boxes, im_scale, conf_thresh, nms_thresh) 69 | return results 70 | 71 | def preprocess(self, images): 72 | batch_size, _, h, w = images.shape 73 | im_size_min = min(h,w) 74 | im_size_max = max(h,w) 75 | im_scale = float(self.test_size[0]) / im_size_min 76 | if np.round(im_scale * im_size_max) > self.cfg.TEST.MAX_SIZE: 77 | im_scale = float(self.cfg.TEST.MAX_SIZE) / float(im_size_max) 78 | 79 | # scale the image 80 | test_size = (round(h*im_scale), round(w*im_scale)) 81 | scaled_imgs = F.interpolate(images, size=test_size, mode='bilinear', align_corners=False) 82 | scaled_imgs *= 255.0 83 | 84 | ''' 85 | import torchvision.transforms as transforms 86 | from utils.utils import visualize_detections 87 | import os 88 | for i in range(scaled_imgs.shape[0]): 89 | train_img = transforms.ToPILImage()(images[i].detach().cpu()) 90 | train_img.save(os.path.join('tmp', '%d.jpg' % (int(100*np.random.rand())))) 91 | ''' 92 | # normalize the image 93 | mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda(device=images.device) 94 | input_imgs = scaled_imgs - mean 95 | 96 | return input_imgs, (im_scale, im_scale) 97 | 98 | def do_nms(self, scores, pred_boxes, conf_thresh, nms_thresh): 99 | results = list() 100 | for j in range(1, len(self.class_names)): 101 | inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1) 102 | #print (inds) 103 | # if there is det 104 | if inds.numel() > 0: 105 | cls_scores = scores[:, j][inds] 106 | _, order = torch.sort(cls_scores, 0, True) 107 | if self.cfg.class_agnostic: 108 | cls_boxes = pred_boxes[inds, :] 109 | else: 110 | cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] 111 | 112 | cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) 113 | # cls_dets = torch.cat((cls_boxes, cls_scores), 1) 114 | cls_dets = cls_dets[order] 115 | # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) 116 | keep = nms(cls_boxes[order, :], cls_scores[order], nms_thresh) 117 | cls_dets = cls_dets[keep.view(-1).long()] 118 | label_ids = j * torch.ones(cls_dets.size(0), 1).cuda(device=cls_dets.device) 119 | #print (cls_dets.shape, label_ids.shape) 120 | results.append(torch.cat((cls_dets, label_ids), 1)) 121 | #print (results) 122 | return torch.cat(results, dim = 0) if len(results) > 0 else [None] 123 | 124 | def post_process(self, im_info, bbox_pred, scores, boxes, im_scale, conf_thresh, nms_thresh): 125 | batch_size = bbox_pred.size(0) 126 | if self.cfg.TEST.BBOX_REG: 127 | # Apply bounding-box regression deltas 128 | box_deltas = bbox_pred 129 | if self.cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 130 | # Optionally normalize targets by a precomputed mean and stdev 131 | if self.cfg.class_agnostic: 132 | box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ 133 | + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() 134 | box_deltas = box_deltas.view(batch_size, -1, 4) 135 | else: 136 | box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ 137 | + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() 138 | box_deltas = box_deltas.view(batch_size, -1, 4 * len(self.class_names)) 139 | 140 | pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size) 141 | pred_boxes = clip_boxes(pred_boxes, im_info, batch_size) 142 | else: 143 | # Simply repeat the boxes, once for each class 144 | pred_boxes = np.tile(boxes, (1, scores.shape[1])) 145 | 146 | pred_boxes /= im_scale[0] 147 | # scores = scores.squeeze() 148 | # pred_boxes = pred_boxes.squeeze() 149 | 150 | results = [self.do_nms(scores[k], pred_boxes[k], conf_thresh, nms_thresh) for k in range(batch_size)] 151 | 152 | return results 153 | -------------------------------------------------------------------------------- /opts.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from nets.LightingNet import LIGHTINGNET_REGISTRY 4 | 5 | def arg_parser(): 6 | parser = argparse.ArgumentParser(description='PyTorch Action recognition Training') 7 | parser.add_argument('--config', type=str, default='configs/config.yaml', help="training configuration") 8 | parser.add_argument('--seed', type=int, help='manual seed') 9 | parser.add_argument('--dataset', help='path to dataset file list') 10 | parser.add_argument('--datadir', metavar='DIR', help='path to dataset file list') 11 | parser.add_argument('--logdir', dest='logdir', help='where to save the model') 12 | parser.add_argument('--train_list_file', type=str, help='training file') 13 | parser.add_argument('--val_list_file', type=str, help='validation file') 14 | parser.add_argument('--no_flip', dest='no_flip', action='store_true', help='do not flip data') 15 | parser.add_argument('--template_resize', dest='template_resize', action='store_true', help='resize template') 16 | parser.add_argument('--mask_loss', dest='mask_loss', action='store_true', help='use L1 masked loss') 17 | 18 | parser.add_argument('--loc_backbone', dest='loc_backbone', choices=['resnet18', 'resnet50', 'resnet101'], help='which backbone to use') 19 | 20 | parser.add_argument('--resume', type=str, default='', metavar='PATH', help='path to the model for resuming') 21 | parser.add_argument('--auto_resume', action='store_true', help='use the last checkpoint in the logdir for resume') 22 | parser.add_argument('--pretrained', dest='pretrained', type=str, metavar='PATH', help='use pre-trained model') 23 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='use this flag to validate without training') 24 | parser.add_argument('--batch_size', type=int, metavar='N', help='mini-batch size') 25 | parser.add_argument('--epochs', default=1000, type=int, metavar='N', help='number of total epochs to run') 26 | parser.add_argument('--disable_cudnn_benchmark', dest='cudnn_benchmark', action='store_false', 27 | help='Disable cudnn to search the best mode (avoid OOM)') 28 | parser.add_argument('--optimizer', type=str, help='optimizer (Default: Adam)', choices=['Adam', 'SGD'], default='Adam') 29 | parser.add_argument('--scheduler', type=str, help='Learning Rate scheduler (Default: ReduceLROnPlateau)', choices=['ReduceLROnPlateau', 'Cosine'], default='ReduceLROnPlateau') 30 | parser.add_argument('--use_val_loss', action='store_true', help='When using ReduceLROnPlateau, use val loss to change learning rate') 31 | parser.add_argument('--name_suffix', type=str, help='suffix of model name, used for creating log folder', default='') 32 | parser.add_argument('--gpu', dest='gpu_ids', help='comma separated list of GPU(s) to use.') 33 | parser.add_argument('--compute_dsr', action='store_true', help='Compute detection successful rate in validation, it will be automatically turned on if evaluate flag is existed.') 34 | parser.add_argument('--obj_loss_type', type=str, default='max', choices=['max', 'avg', 'ce'], help='different way to compute obj loss') 35 | parser.add_argument('--show_dsr_hist', action='store_true', help='Show the histogram of detection acc w.r.t. the height of person.') 36 | 37 | # data-related 38 | parser.add_argument('-j', '--num_workers', type=int, metavar='N', 39 | help='number of data loading workers (default: 4)') 40 | parser.add_argument('--lr', type=float, metavar='N', help='learning rate') 41 | 42 | parser.add_argument('--use_PBM', dest='use_PBM', action='store_true', help='use patch blurring') 43 | 44 | parser.add_argument('--STN', type=str, help='affine or tps') 45 | parser.add_argument('--learnableSTN', dest='learnableSTN', action='store_true', help='learn STN') 46 | parser.add_argument('--TPS_localizer', dest='TPS_localizer', type=str, help='tps localizer') 47 | 48 | parser.add_argument('--use_PCT', dest='use_PCT', action='store_true', help='use lighting color transformation') 49 | parser.add_argument('--PrinterCT', type=str, help='PCT or LinearPCT') 50 | 51 | parser.add_argument('--use_LCT', dest='use_LCT', action='store_true', help='use lighting color transformation') 52 | parser.add_argument('--LightingCT', type=str, help='cc (color constancy) or gen (image generator)', choices=LIGHTINGNET_REGISTRY._obj_map.keys()) 53 | parser.add_argument('--lct_backbone', type=str, help='set the backbone of lightning net', default=None) 54 | 55 | parser.add_argument('--target_patch_path', dest='target_patch_path', type=str, help='target patch to be transformed') 56 | 57 | parser.add_argument('--patch_transformer_path', dest='patch_transformer_path', type=str, help='stn model') 58 | 59 | parser.add_argument('--tv_loss_weight', type=float, metavar='N', help='tv_loss_weight range[0,10]') 60 | 61 | #parser.add_argument('--use_augmentation', dest='use_augmentation', action='store_true', help='use augmentation') 62 | parser.add_argument('--use_ohem', dest='use_ohem', action='store_true', help='use ohem') 63 | parser.add_argument('--ohem_ratio', type=float, metavar='N', help='ohem ratio [0.1-1.0]') 64 | parser.add_argument('--use_EOT', dest='use_EOT', action='store_true', help='use augmentation') 65 | 66 | # parser.add_argument('--MaxProbExtractor_loss', dest='MaxProbExtractor_loss', type=str, help='type of max prob extractor') 67 | 68 | parser.add_argument('--visualize', dest='visualize', action='store_true', help='store adversarial images') 69 | parser.add_argument('--test_dir', dest='test_dir', type=str, help='test directory with images') 70 | parser.add_argument('--detection_output_dir', dest='detection_output_dir', type=str, help='output directory') 71 | 72 | parser.add_argument('--detector_impl', dest='detector_impl', type=str, help='implementation') 73 | parser.add_argument('--detector_name', dest='detector_name', type=str, help='detector name') 74 | parser.add_argument('--object_dataset', dest='object_dataset', type=str, help='object dataset: COCO or PASCAL') 75 | 76 | parser.add_argument('--collaborative_learning', '--CL', action='store_true', help='collaborative learning') 77 | parser.add_argument('--CL_pretrained', '--CLPretrain', action='store_true', help='use pretrained models collaborative learning') 78 | parser.add_argument('--collaborative_weights', '--CW', action='store_true', help='using learnable weights in collaborative learning ') 79 | parser.add_argument('--kd_norm', type=float, metavar='N', help='margin loss norm') 80 | parser.add_argument('--kd_type', type=str, metavar='N', help='loss type: margin (proposed) | mutual | one') 81 | 82 | # for distributed learning 83 | parser.add_argument('--sync-bn', action='store_true', 84 | help='sync BN across GPUs') 85 | parser.add_argument('--world-size', default=1, type=int, 86 | help='number of nodes for distributed training') 87 | parser.add_argument('--rank', default=0, type=int, 88 | help='node rank for distributed training') 89 | parser.add_argument('--hostfile', default='', type=str, 90 | help='hostfile distributed learning') 91 | parser.add_argument('--dist-url', default='tcp://127.0.0.1:23456', type=str, 92 | help='url used to set up distributed training') 93 | parser.add_argument('--dist-backend', default='nccl', type=str, 94 | help='distributed backend') 95 | parser.add_argument('--multiprocessing-distributed', '--ddp', action='store_true', 96 | help='Use multi-processing distributed training to launch ' 97 | 'N processes per node, which has N GPUs. This is the ' 98 | 'fastest way to use PyTorch for either single node or ' 99 | 'multi node data parallel training') 100 | 101 | return parser 102 | 103 | def merge_args(args, config): 104 | for key, value in vars(args).items(): 105 | if value is not None: 106 | config[key] = value 107 | 108 | return config 109 | -------------------------------------------------------------------------------- /nets/LightingNet/cc_f4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim 5 | 6 | from . import LIGHTINGNET_REGISTRY 7 | 8 | class FCN32s(nn.Module): 9 | def __init__(self, model_name, n_class): 10 | super().__init__() 11 | self.n_class = n_class 12 | self.pretrained_net = get_backbone(model_name) 13 | last_dim = get_last_conv_dim(model_name) 14 | 15 | self.relu = nn.ReLU(inplace=True) 16 | self.deconv1 = nn.ConvTranspose2d(last_dim, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 17 | self.bn1 = nn.BatchNorm2d(512) 18 | self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 19 | self.bn2 = nn.BatchNorm2d(256) 20 | self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 21 | self.bn3 = nn.BatchNorm2d(128) 22 | self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 23 | self.bn4 = nn.BatchNorm2d(64) 24 | self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 25 | self.bn5 = nn.BatchNorm2d(32) 26 | self.classifier = nn.Conv2d(32, n_class, kernel_size=1) 27 | 28 | def forward(self, x): 29 | output = self.pretrained_net(x) # size = (N, 512, x.H/32, x.W/32) 30 | 31 | score = self.bn1(self.relu(self.deconv1(output))) # size=(N, 512, x.H/16, x.W/16) 32 | score = self.bn2(self.relu(self.deconv2(score))) # size=(N, 256, x.H/8, x.W/8) 33 | score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) 34 | score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) 35 | score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) 36 | score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) 37 | 38 | return score # size=(N, n_class, x.H/1, x.W/1) 39 | 40 | # class CC_FCN4(nn.Module): 41 | # def __init__(self, model_name): 42 | # super().__init__() 43 | # self.fcn = FCN32s(model_name, 4) 44 | # self.relu = nn.ReLU(inplace=True) 45 | # 46 | # def forward(self, x): 47 | # score = self.fcn(x) 48 | # # score = self.relu(score) 49 | # rgb = self.relu(score[:,:3,:,:]) 50 | # #rgb = F.normalize(rgb, p=2, dim=1) 51 | # _, _, h, w = score.shape 52 | # confidence = score[:,3:4,:,:].view(-1, h*w) 53 | # confidence = F.softmax(confidence, dim=1) 54 | # rgb = rgb * confidence.view(-1, 1, h, w) 55 | # # average pool 56 | # # rgb = F.normalize(rgb, p=2, dim=1) 57 | # return rgb 58 | 59 | class CC_Alex_FCN4(nn.Module): 60 | def __init__(self, config=None): 61 | super().__init__() 62 | FC1_SIZE = 64 63 | FC1_KERNEL_SIZE = 6 64 | self.backbone = get_backbone('alexnet') 65 | last_conv_dim = get_last_conv_dim('alexnet') 66 | self.fc1 = nn.Conv2d(last_conv_dim, FC1_SIZE, kernel_size=FC1_KERNEL_SIZE, padding=3, bias=False) 67 | self.relu = nn.ReLU(inplace=True) 68 | self.dropout = nn.Dropout(0.5) 69 | self.fc2 = nn.Conv2d(FC1_SIZE, 4, kernel_size=1, stride=1, bias=True) 70 | #self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, padding=3, bias=False) 71 | self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, bias=True) 72 | 73 | ''' 74 | for m in self.modules(): 75 | if isinstance(m, nn.Conv2d): 76 | m.weight.data.zero_() 77 | if m.bias is not None: 78 | m.bias.data.zero_() 79 | if isinstance(m, nn.ConvTranspose2d): 80 | assert m.kernel_size[0] == m.kernel_size[1] 81 | initial_weight = get_upsampling_weight( 82 | m.in_channels, m.out_channels, m.kernel_size[0]) 83 | m.weight.data.copy_(initial_weight) 84 | ''' 85 | 86 | # transform the template 87 | def forward(self, x): 88 | return self.forward_template(x) 89 | 90 | # the normalized output is NOT required as we need to learn the lighting condition 91 | # changes in the environment 92 | def forward_template(self, x): 93 | y = self.backbone(x) 94 | y = self.fc1(y) 95 | y = self.relu(y) 96 | y = self.dropout(y) 97 | y = self.fc2(y) 98 | y = self.relu(y) 99 | _, _, h, w = y.shape 100 | 101 | rgb = y[:, :3, :, :] 102 | rgb = F.normalize(rgb, p=2, dim=1) 103 | confidence = y[:,3:4,:,:].view(-1, h*w) 104 | confidence = F.softmax(confidence, dim=1) 105 | confidence = confidence.view(-1, 1, h, w) 106 | # rgb = F.adaptive_avg_pool2d(rgb, (1, 1)) 107 | # rgb = F.normalize(rgb, p=2, dim=1) 108 | rgb *= confidence 109 | rgb = self.relu(self.fc_pool(rgb)) 110 | print (rgb) 111 | #rgb = F.normalize(rgb, p=2, dim=1) 112 | rgb = F.interpolate(rgb, x.size()[2:], mode='bilinear', align_corners=False) 113 | return rgb #, confidence 114 | 115 | def generate(self, src_img, frame_img): 116 | rgb = self.forward_template(frame_img) 117 | return src_img * rgb 118 | 119 | 120 | @LIGHTINGNET_REGISTRY.register() 121 | class CC_FCN4(nn.Module): 122 | def __init__(self, config=None): 123 | super().__init__() 124 | FC1_OUTPUT_SIZE = 64 125 | FC1_KERNEL_SIZE = 6 126 | FC2_OUTPUT_SIZE = 4 127 | POOL_SIZE = 8 128 | backbone_name =config['lct_backbone'] 129 | self.backbone = get_backbone(backbone_name) 130 | last_conv_dim = get_last_conv_dim(backbone_name) 131 | 132 | if backbone_name == 'resnet18': 133 | POOL_SIZE = 9 134 | 135 | self.fc1 = nn.Conv2d(last_conv_dim, FC1_OUTPUT_SIZE, kernel_size=FC1_KERNEL_SIZE, padding=3, bias=False) 136 | self.relu = nn.ReLU(inplace=True) 137 | self.dropout = nn.Dropout(0.5) 138 | self.fc2 = nn.Conv2d(FC1_OUTPUT_SIZE, FC2_OUTPUT_SIZE, kernel_size=1, stride=1, bias=True) 139 | #self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, padding=3, bias=False) 140 | self.fc_pool = nn.Conv2d(3, 3, kernel_size=POOL_SIZE, bias=True) 141 | 142 | ''' 143 | for m in self.modules(): 144 | if isinstance(m, nn.Conv2d): 145 | m.weight.data.zero_() 146 | if m.bias is not None: 147 | m.bias.data.zero_() 148 | if isinstance(m, nn.ConvTranspose2d): 149 | assert m.kernel_size[0] == m.kernel_size[1] 150 | initial_weight = get_upsampling_weight( 151 | m.in_channels, m.out_channels, m.kernel_size[0]) 152 | m.weight.data.copy_(initial_weight) 153 | ''' 154 | 155 | # transform the template 156 | def forward(self, x): 157 | return self.forward_template(x) 158 | 159 | # the normalized output is NOT required as we need to learn the lighting condition 160 | # changes in the environment 161 | def forward_template(self, x): 162 | y = self.backbone(x) 163 | y = self.fc1(y) 164 | y = self.relu(y) 165 | y = self.dropout(y) 166 | y = self.fc2(y) 167 | y = self.relu(y) 168 | _, _, h, w = y.shape 169 | 170 | rgb = y[:, :3, :, :] 171 | rgb = F.normalize(rgb, p=2, dim=1) 172 | confidence = y[:,3:4,:,:].view(-1, h*w) 173 | confidence = F.softmax(confidence, dim=1) 174 | confidence = confidence.view(-1, 1, h, w) 175 | # rgb = F.adaptive_avg_pool2d(rgb, (1, 1)) 176 | # rgb = F.normalize(rgb, p=2, dim=1) 177 | #print ('-----', x.shape, y.shape, confidence.shape, rgb.shape) 178 | rgb *= confidence 179 | rgb = self.relu(self.fc_pool(rgb)) 180 | #rgb = F.normalize(rgb, p=2, dim=1) 181 | #print (rgb) 182 | # rgb = F.interpolate(rgb, x.size()[2:], mode='bilinear', align_corners=False) 183 | return rgb #, confidence 184 | 185 | def generate(self, src_img, frame_img): 186 | rgb = self.forward_template(frame_img) 187 | return src_img * rgb 188 | -------------------------------------------------------------------------------- /nets/AdvPatch/hybrid_advPatch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torchvision import transforms 4 | from .advPatch import AdvPatch 5 | from PIL import Image 6 | from .advPatch_util import generate_patch, generate_border_mask 7 | import os 8 | from utils.gaussian_blur import gaussian_blur 9 | import cv2 10 | import numpy as np 11 | 12 | def get_gaussian_kernel(kernel_size=3, sigma=2, channels=3): 13 | # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2) 14 | ''' 15 | x_coord = torch.arange(kernel_size) 16 | x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size) 17 | y_grid = x_grid.t() 18 | xy_grid = torch.stack([x_grid, y_grid], dim=-1).float() 19 | 20 | mean = (kernel_size - 1)/2. 21 | variance = sigma**2. 22 | 23 | # Calculate the 2-dimensional gaussian kernel which is 24 | # the product of two gaussian distributions for two different 25 | # variables (in this case called x and y) 26 | gaussian_kernel = (1./(2.*math.pi*variance)) *\ 27 | torch.exp( 28 | -torch.sum((xy_grid - mean)**2., dim=-1) /\ 29 | (2*variance) 30 | ) 31 | 32 | # Make sure sum of values in gaussian kernel equals 1. 33 | gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel) 34 | 35 | # Reshape to 2d depthwise convolutional weight 36 | gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size) 37 | gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1) 38 | ''' 39 | 40 | filter = cv2.getGaussianKernel(kernel_size, sigma=sigma) 41 | gaussian_kernel = np.dot(filter, filter.T) 42 | gaussian_kernel = torch.from_numpy(gaussian_kernel).float() 43 | gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1) 44 | 45 | gaussian_filter = nn.Conv2d(in_channels=channels, out_channels=channels, 46 | kernel_size=kernel_size, groups=channels, bias=False, padding=kernel_size // 2) 47 | gaussian_filter.weight.data = gaussian_kernel 48 | gaussian_filter.weight.requires_grad = False 49 | 50 | return gaussian_filter 51 | 52 | 53 | class PatchBlurringModule(nn.Module): 54 | def __init__(self, kernel_size=3): 55 | super(PatchBlurringModule, self).__init__() 56 | self.kernel_size = kernel_size 57 | self.weights = nn.Parameter(torch.ones(3, 1, self.kernel_size, self.kernel_size) / (self.kernel_size * self.kernel_size)) 58 | 59 | def forward(self, x): 60 | 61 | weights = torch.sigmoid(self.weights) 62 | normalized_w = torch.cat([item/item.sum() for item in weights]) 63 | normalized_w.unsqueeze_(1) 64 | #print (normalized_w) 65 | return torch.conv2d(x, normalized_w, bias=None, padding=self.kernel_size//2, groups=3) 66 | 67 | class HybridAdvPatch(nn.Module): 68 | def __init__(self, config): 69 | super(HybridAdvPatch, self).__init__() 70 | self.adv_patch_size = tuple(config['adv_patch_size']) 71 | self.apply_border_mask = config['apply_border_mask'] 72 | print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size)) 73 | 74 | if self.apply_border_mask: 75 | self.border_value = config['border_value'] 76 | border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5) 77 | print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value)) 78 | self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size)) 79 | 80 | #self.adv_patch = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 81 | self.adv_patch_near = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 82 | self.adv_patch_far = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2])) 83 | 84 | # self.high_filter_size = nn.Parameter(torch.tensor(5.0)) 85 | # self.low_filter_size = nn.Parameter(torch.tensor(5.0)) 86 | # self.high_filter_sigma = nn.Parameter(torch.tensor(0.5)) 87 | # self.low_filter_sigma = nn.Parameter(torch.tensor(0.5)) 88 | 89 | # self.lfilter = get_gaussian_kernel(kernel_size=5, sigma=2, channels=3) 90 | # self.hfilter = get_gaussian_kernel(kernel_size=5, sigma=2, channels=3) 91 | #self.lfilter = PatchBlurringModule(kernel_size=5) 92 | #self.hfilter = PatchBlurringModule(kernel_size=5) 93 | self.blending = nn.Parameter(torch.ones(self.adv_patch_size[0], self.adv_patch_size[1]) * 0.5) 94 | 95 | self.collaborative_learning = not config['CL_pretrained'] 96 | self.collaborative_weight = None 97 | 98 | @property 99 | def patch_size(self): 100 | return self.adv_patch_size 101 | 102 | @property 103 | def border_size(self): 104 | return self.border_size if self.apply_border_mask else 0 105 | 106 | def learnable(self): 107 | #return [self.adv_patch_near, self.adv_patch_far, self.adv_patch] 108 | #return [self.adv_patch_near, self.adv_patch_far] + list(self.lfilter.parameters()) + \ 109 | # list(self.hfilter.parameters()) 110 | return [self.adv_patch_near, self.adv_patch_far, self.blending] 111 | 112 | def clip(self): 113 | self.adv_patch.data.clamp_(0, 1) 114 | self.adv_patch_near.data.clamp_(0, 1) 115 | self.adv_patch_far.data.clamp_(0, 1) 116 | # self.high_filter_size.data.clamp_(3.0, 7.0) 117 | # self.low_filter_size.data.clamp_(3.0, 7.0) 118 | # self.high_filter_sigma.data.clamp_(0.3, 0.8) 119 | # self.low_filter_sigma.data.clamp_(0.3, 0.8) 120 | #self.high_filter_size = torch.round(self.high_filter_size) 121 | #self.low_filter_size = torch.round(self.low_filter_size) 122 | #print (self.low_filter_size, self.high_filter_size) 123 | 124 | def forward(self): 125 | ''' 126 | lf_advT_patch = self.lfilter(self.adv_patch_far.unsqueeze(0)) 127 | lf_advT_patch = lf_advT_patch.squeeze(0) 128 | hf_advT_patch = self.hfilter(self.adv_patch_near.unsqueeze(0)) 129 | hf_advT_patch = hf_advT_patch.squeeze(0) 130 | hf_advT_patch = self.adv_patch_near - hf_advT_patch 131 | 132 | advT_patch = lf_advT_patch + hf_advT_patch 133 | advT_patch.data.clamp_(0,1) 134 | ''' 135 | blending = torch.sigmoid(self.blending) 136 | self.adv_patch = self.adv_patch_far * blending + self.adv_patch_near * ( 1.0 - blending) 137 | 138 | if self.training: 139 | return self.adv_patch, self.adv_patch_near, self.adv_patch_far 140 | 141 | return self.adv_patch 142 | 143 | def save_patch(self, patch_path): 144 | adv_patch = self.adv_patch.detach().cpu() 145 | im = transforms.ToPILImage('RGB')(adv_patch) 146 | im.save(patch_path) 147 | 148 | base_path, adv_file = os.path.split(patch_path) 149 | base_file, ext = adv_file.split('.') 150 | 151 | adv_patch_near = self.adv_patch_near.detach().cpu() 152 | im_near = transforms.ToPILImage('RGB')(adv_patch_near) 153 | im_near.save(os.path.join(base_path, base_file + '_near.' + ext)) 154 | 155 | adv_patch_far = self.adv_patch_far.detach().cpu() 156 | im_far = transforms.ToPILImage('RGB')(adv_patch_far) 157 | im_far.save(os.path.join(base_path, base_file + '_far.' + ext)) 158 | 159 | def _load_patch_image(self, patch_path): 160 | patch_img = Image.open(patch_path).convert('RGB') 161 | w, h = patch_img.size 162 | # first dim is height 163 | adv_h, adv_w = self.adv_patch_size[:2] 164 | if w != adv_w or h != adv_h: 165 | patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img) 166 | return patch_img 167 | 168 | def load_patch(self, patch_path): 169 | patch_img = self._load_patch_image(patch_path) 170 | self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img)) 171 | 172 | base_path, adv_file = os.path.split(patch_path) 173 | base_file, ext = adv_file.split('.') 174 | adv_near_file = os.path.join(base_path, base_file+'_near.'+ext) 175 | if os.path.isfile(adv_near_file): 176 | near_patch_img = self._load_patch_image(adv_near_file) 177 | self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img)) 178 | 179 | adv_far_file = os.path.join(base_path, base_file+'_far.'+ext) 180 | if os.path.isfile(adv_far_file): 181 | far_patch_img = self._load_patch_image(adv_far_file) 182 | self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img)) 183 | 184 | def create_hybrid_advPatch_model(config): 185 | return HybridAdvPatch(config) 186 | --------------------------------------------------------------------------------