├── dataset
    └── __init__.py
├── nets
    ├── STNet
    │   ├── __init__.py
    │   ├── grid_sample.py
    │   ├── affine_STN.py
    │   ├── tps_STN.py
    │   ├── AffineSTN.py
    │   ├── tps_grid_gen.py
    │   └── STNLocalizer.py
    ├── AdvPatch
    │   ├── __init__.py
    │   ├── advPatch.py
    │   ├── advPatch_model_builder.py
    │   ├── advPatch_util.py
    │   ├── collaborative_advPatch.py
    │   └── hybrid_advPatch.py
    ├── ColorNet
    │   ├── __init__.py
    │   ├── color_transformer.py
    │   └── PCT_transformation.py
    ├── backbone
    │   ├── __init__.py
    │   └── backbone_config.py
    ├── PatchTransformer
    │   ├── __init__.py
    │   ├── patch_blurring.py
    │   ├── patchTransformer_model_builder.py
    │   └── patch_transformer_net.py
    ├── LightingNet
    │   ├── __init__.py
    │   ├── cc_gen.py
    │   ├── cc_drn.py
    │   └── cc_f4.py
    └── EOTTransformer
    │   └── EOT_transformer.py
├── trainer
    └── __init__.py
├── detector
    ├── yolov2
    │   ├── __init__.py
    │   ├── arial.ttf
    │   ├── coco.names
    │   ├── patch_config.py
    │   └── yolo.cfg
    ├── yolov3
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── augmentations.py
    │   │   ├── logger.py
    │   │   ├── parse_config.py
    │   │   └── datasets.py
    │   ├── data
    │   │   ├── custom
    │   │   │   ├── classes.names
    │   │   │   ├── train.txt
    │   │   │   ├── valid.txt
    │   │   │   ├── labels
    │   │   │   │   └── train.txt
    │   │   │   └── images
    │   │   │   │   └── train.jpg
    │   │   ├── samples
    │   │   │   ├── dog.jpg
    │   │   │   ├── eagle.jpg
    │   │   │   ├── field.jpg
    │   │   │   ├── messi.jpg
    │   │   │   ├── room.jpg
    │   │   │   ├── giraffe.jpg
    │   │   │   ├── person.jpg
    │   │   │   ├── street.jpg
    │   │   │   └── herd_of_horses.jpg
    │   │   ├── coco.names
    │   │   └── get_coco_dataset.sh
    │   ├── assets
    │   │   ├── dog.png
    │   │   ├── messi.png
    │   │   ├── giraffe.png
    │   │   └── traffic.png
    │   ├── config
    │   │   ├── custom.data
    │   │   ├── coco.data
    │   │   └── yolov3-tiny.cfg
    │   ├── requirements.txt
    │   ├── weights
    │   │   └── download_weights.sh
    │   ├── test.py
    │   ├── detect.py
    │   ├── README.md
    │   └── train.py
    ├── build_object_detector.py
    ├── object_detector.py
    ├── SSD_detector.py
    ├── yolov3_detector.py
    ├── yolo_util.py
    ├── yolov2_detector.py
    └── faster_rcnn_detector.py
├── .gitignore
├── requirement.txt
├── utils
    ├── arial.ttf
    ├── logger.py
    └── gaussian_blur.py
├── kaidi_color_model
    ├── weights2_0_1.npz
    ├── weights2__1_1.npz
    └── weights2_digital2new_0_1.npz
├── .gitmodules
├── configs
    ├── config_patchTransformer.yaml
    ├── config_advPatch_detectron2.yaml
    ├── config_advPatch.yaml
    └── config_collaborative_advPatch.yaml
├── losses
    ├── ohem_loss.py
    ├── mask_losses.py
    └── smooth_l1_loss.py
├── README.md
├── demo_detector.py
├── pytorch_msssim
    └── __init__.py
├── eval_advPath_oneperson.py
└── opts.py


/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/STNet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/detector/yolov2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/ColorNet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__


--------------------------------------------------------------------------------
/detector/yolov3/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nets/PatchTransformer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/custom/classes.names:
--------------------------------------------------------------------------------
1 | train
2 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/custom/train.txt:
--------------------------------------------------------------------------------
1 | data/custom/images/train.jpg
2 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/custom/valid.txt:
--------------------------------------------------------------------------------
1 | data/custom/images/train.jpg
2 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/custom/labels/train.txt:
--------------------------------------------------------------------------------
1 | 0 0.515 0.5 0.21694873 0.18286777
2 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | yacs
4 | cython
5 | packaging
6 | easydict


--------------------------------------------------------------------------------
/utils/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/utils/arial.ttf


--------------------------------------------------------------------------------
/detector/yolov2/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov2/arial.ttf


--------------------------------------------------------------------------------
/detector/yolov3/assets/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/dog.png


--------------------------------------------------------------------------------
/detector/yolov3/assets/messi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/messi.png


--------------------------------------------------------------------------------
/detector/yolov3/assets/giraffe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/giraffe.png


--------------------------------------------------------------------------------
/detector/yolov3/assets/traffic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/assets/traffic.png


--------------------------------------------------------------------------------
/kaidi_color_model/weights2_0_1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2_0_1.npz


--------------------------------------------------------------------------------
/kaidi_color_model/weights2__1_1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2__1_1.npz


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/dog.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/eagle.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/field.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/field.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/messi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/messi.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/room.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/room.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/giraffe.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/person.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/street.jpg


--------------------------------------------------------------------------------
/detector/yolov3/data/custom/images/train.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/custom/images/train.jpg


--------------------------------------------------------------------------------
/detector/yolov3/config/custom.data:
--------------------------------------------------------------------------------
1 | classes= 1
2 | train=data/custom/train.txt
3 | valid=data/custom/valid.txt
4 | names=data/custom/classes.names
5 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/samples/herd_of_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/detector/yolov3/data/samples/herd_of_horses.jpg


--------------------------------------------------------------------------------
/kaidi_color_model/weights2_digital2new_0_1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/advPatNet-pytorch/main/kaidi_color_model/weights2_digital2new_0_1.npz


--------------------------------------------------------------------------------
/detector/yolov3/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | torch>=1.0
 3 | torchvision
 4 | matplotlib
 5 | tensorflow
 6 | tensorboard
 7 | terminaltables
 8 | pillow
 9 | tqdm
10 | 


--------------------------------------------------------------------------------
/detector/yolov3/config/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | train=data/coco/trainvalno5k.txt
3 | valid=data/coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "detector/detectron2"]
2 | 	path = detector/detectron2
3 | 	url = https://github.com/facebookresearch/detectron2.git
4 | [submodule "detector/SSD"]
5 | 	path = detector/SSD
6 | 	url = https://github.com/lufficc/SSD
7 | 


--------------------------------------------------------------------------------
/detector/yolov3/utils/augmentations.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import numpy as np
 4 | 
 5 | 
 6 | def horisontal_flip(images, targets):
 7 |     images = torch.flip(images, [-1])
 8 |     targets[:, 2] = 1 - targets[:, 2]
 9 |     return images, targets
10 | 


--------------------------------------------------------------------------------
/detector/yolov3/weights/download_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Download weights for vanilla YOLOv3
3 | wget -c https://pjreddie.com/media/files/yolov3.weights
4 | # # Download weights for tiny YOLOv3
5 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
6 | # Download weights for backbone network
7 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
8 | 


--------------------------------------------------------------------------------
/nets/PatchTransformer/patch_blurring.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | class PatchBlurringModule(nn.Module):
 7 |     def __init__(self):
 8 |         super(PatchBlurringModule, self).__init__()
 9 |         self.blurring_factor = torch.nn.Parameter(torch.tensor([1.0]))
10 | 
11 |     def forward(self, x):
12 |         blurring_factor = torch.clamp(self.blurring_factor, min=0.1, max=1.0)
13 |         print (self.blurring_factor, blurring_factor)
14 |         return F.interpolate(x, scale_factor= blurring_factor.item(), mode='bilinear', align_corners=False)
15 | 


--------------------------------------------------------------------------------
/nets/STNet/grid_sample.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # credit to https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py
 3 | 
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | 
 7 | def grid_sample(input, grid, canvas = None):
 8 |     output = F.grid_sample(input, grid)
 9 | 
10 |     if canvas is None:
11 |         return output
12 |     else:
13 |         input_mask = Variable(input.data.new(input.size()).fill_(1))
14 |         output_mask = F.grid_sample(input_mask, grid)
15 |         padded_output = output * output_mask + canvas * (1 - output_mask)
16 |         return padded_output
17 | 


--------------------------------------------------------------------------------
/detector/yolov3/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     def __init__(self, log_dir):
 6 |         """Create a summary writer logging to log_dir."""
 7 |         self.writer = tf.summary.FileWriter(log_dir)
 8 | 
 9 |     def scalar_summary(self, tag, value, step):
10 |         """Log a scalar variable."""
11 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
12 |         self.writer.add_summary(summary, step)
13 | 
14 |     def list_of_scalars_summary(self, tag_value_pairs, step):
15 |         """Log scalar variables."""
16 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
17 |         self.writer.add_summary(summary, step)
18 | 


--------------------------------------------------------------------------------
/nets/LightingNet/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any
 2 | 
 3 | import torch.nn as nn
 4 | from fvcore.common.registry import Registry
 5 | 
 6 | LIGHTINGNET_REGISTRY = Registry('LIGHTINGNET')
 7 | LIGHTINGNET_REGISTRY.__doc__ = """
 8 | Registry for lightning model.
 9 | The registered object will be called with `obj(config)`.
10 | The call should return a `torch.nn.Module` object.
11 | """
12 | 
13 | 
14 | def lighting_net_builder(config: Dict[str, Any]) -> nn.Module:
15 | 
16 |     return LIGHTINGNET_REGISTRY.get(config['LightingCT'])(config)
17 | 
18 | 
19 | from .cc_drn import CCDRN
20 | from .cc_f4 import CC_FCN4
21 | from .fine_generator import Generator
22 | from .cc_gen import CCGenerator
23 | 
24 | 
25 | # TODO: support deprecated name
26 | LIGHTINGNET_REGISTRY._do_register('cc', CC_FCN4)
27 | LIGHTINGNET_REGISTRY._do_register('gen', Generator)
28 | 


--------------------------------------------------------------------------------
/detector/yolov2/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/detector/yolov3/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
 4 | 
 5 | # Clone COCO API
 6 | git clone https://github.com/pdollar/coco
 7 | cd coco
 8 | 
 9 | mkdir images
10 | cd images
11 | 
12 | # Download Images
13 | wget -c https://pjreddie.com/media/files/train2014.zip
14 | wget -c https://pjreddie.com/media/files/val2014.zip
15 | 
16 | # Unzip
17 | unzip -q train2014.zip
18 | unzip -q val2014.zip
19 | 
20 | cd ..
21 | 
22 | # Download COCO Metadata
23 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
24 | wget -c https://pjreddie.com/media/files/coco/5k.part
25 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
26 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
27 | tar xzf labels.tgz
28 | unzip -q instances_train-val2014.zip
29 | 
30 | # Set Up Image Lists
31 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
32 | paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
33 | 


--------------------------------------------------------------------------------
/detector/yolov3/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def parse_model_config(path):
 4 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 5 |     file = open(path, 'r')
 6 |     lines = file.read().split('\n')
 7 |     lines = [x for x in lines if x and not x.startswith('#')]
 8 |     lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
 9 |     module_defs = []
10 |     for line in lines:
11 |         if line.startswith('['): # This marks the start of a new block
12 |             module_defs.append({})
13 |             module_defs[-1]['type'] = line[1:-1].rstrip()
14 |             if module_defs[-1]['type'] == 'convolutional':
15 |                 module_defs[-1]['batch_normalize'] = 0
16 |         else:
17 |             key, value = line.split("=")
18 |             value = value.strip()
19 |             module_defs[-1][key.rstrip()] = value.strip()
20 | 
21 |     return module_defs
22 | 
23 | def parse_data_config(path):
24 |     """Parses the data configuration file"""
25 |     options = dict()
26 |     options['gpus'] = '0,1,2,3'
27 |     options['num_workers'] = '10'
28 |     with open(path, 'r') as fp:
29 |         lines = fp.readlines()
30 |     for line in lines:
31 |         line = line.strip()
32 |         if line == '' or line.startswith('#'):
33 |             continue
34 |         key, value = line.split('=')
35 |         options[key.strip()] = value.strip()
36 |     return options
37 | 


--------------------------------------------------------------------------------
/nets/STNet/affine_STN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import torchvision.models as models
 5 | from nets.STNet.STNLocalizer import AffineLocalizer
 6 | 
 7 | def init_module(module):
 8 |     for m in module():
 9 |         if isinstance(m, nn.Conv2d):
10 |             nn.init.kaiming_normal_(m.weight, mode='fan_out')
11 |             if m.bias is not None:
12 |                 nn.init.zeros_(m.bias)
13 |         elif isinstance(m, nn.BatchNorm2d):
14 |             nn.init.ones_(m.weight)
15 |             nn.init.zeros_(m.bias)
16 |         elif isinstance(m, nn.Linear):
17 |             nn.init.normal_(m.weight, 0, 0.01)
18 |             if m.bias is not None:
19 |                 nn.init.zeros_(m.bias)
20 | 
21 | 
22 | class AffineSTNNet(nn.Module):
23 |     def __init__(self, config):
24 |         super(AffineSTNNet, self).__init__()
25 |         self.localizer = AffineLocalizer(backbone=config['loc_backbone'],
26 |                                          downsample_dim=config['loc_downsample_dim'],
27 |                                          fc_dim=config['loc_fc_dim'],
28 |                                          predict_dimension=config['adjust_patch_size'])
29 | 
30 |     # transform the template
31 |     def forward(self, x, template):
32 |         # transform the input
33 |         theta, output_scale = self.localizer(x)
34 |         grid = F.affine_grid(theta, template.size())
35 |         y = F.grid_sample(template, grid)
36 | 
37 |         return y, theta, output_scale
38 | 


--------------------------------------------------------------------------------
/detector/build_object_detector.py:
--------------------------------------------------------------------------------
 1 | from .detector_info import *
 2 | 
 3 | def build_object_detector(config):
 4 |     detector_impl = config['detector_impl']
 5 |     detector_name = config['detector_name']
 6 |     input_size = config['detector_input_size']
 7 |     test_size = config['detector_test_size']
 8 |     target_object_id = config['target_object_id']
 9 |     object_dataset = config['object_dataset']
10 | 
11 |     model_name = detector_name + '_' + object_dataset
12 |     detector_info = DETECTOR_INFO[detector_impl][model_name]
13 |     detector= detector_info['detector']
14 |     cfg_path = detector_info['cfg_path']
15 |     model_path = detector_info['model_path']
16 | 
17 |     if test_size[0] < 0:
18 |         test_size = detector_info['test_size']
19 | 
20 |     if target_object_id < 0:
21 |         target_object_id = detector_info['target_object_id']
22 | 
23 |     print ('====== Object Detector Information ========')
24 |     print ('Detector: %s: %s...' % (detector_impl, detector_name))
25 |     print ('CFG path: %s Model path: %s ' % (cfg_path, model_path))
26 |     print ('Input_size: (%d %d) test_size (%d %d)' % (input_size[0], input_size[1], test_size[0], test_size[1]))
27 |     print ('Dataset: %s' % (object_dataset))
28 |     print ('Target object ID: %d\n' % (target_object_id))
29 | 
30 |     return detector(model_name=model_name,
31 |                         cfg_path=cfg_path,
32 |                         model_path= model_path,
33 |                         class_names = OBJECT_CLASS_NAMES[object_dataset],
34 |                         input_size=input_size,
35 |                         test_size=test_size,
36 |                         target_object_id = target_object_id)
37 | 


--------------------------------------------------------------------------------
/nets/LightingNet/cc_gen.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn.functional as F
 3 | import torch.nn as nn
 4 | 
 5 | from . import LIGHTINGNET_REGISTRY
 6 | from .fine_generator import FineGenerator
 7 | 
 8 | @LIGHTINGNET_REGISTRY.register()
 9 | class CCGenerator(nn.Module):
10 | 
11 |     def __init__(self, config=None):
12 |         super().__init__()
13 |         self.input_dim = config['input_dim']
14 |         self.cnum = config['ngf']
15 |         self.use_cuda = config['cuda']
16 |         self.device_ids = config['gpu_ids']
17 | 
18 |         self.backbone = FineGenerator(self.input_dim, self.cnum, self.use_cuda, self.device_ids, output_dim=4)
19 | 
20 |         # self.pool = nn.Conv2d(3, 3, kernel_size=9, stride=8, bias=True)
21 |         self.pool = nn.Conv2d(3, 3, kernel_size=11, stride=10, bias=True)
22 | 
23 |     # transform the template
24 |     def forward(self, x):
25 |         return self.forward_template(x)
26 | 
27 |     # the normalized output is NOT required as we need to learn the lighting condition
28 |     # changes in the environment
29 |     def forward_template(self, x):
30 |         y = self.backbone(x, add_input_back=False)
31 |         _, _, h, w = y.shape
32 | 
33 |         rgb = y[:, :3, :, :]
34 |         rgb = F.normalize(rgb, p=2, dim=1)
35 |         confidence = y[:, -1, :, :].view(-1, h * w)
36 |         confidence = F.softmax(confidence, dim=1)
37 |         confidence = confidence.view(-1, 1, h, w)
38 | 
39 |         rgb = rgb * confidence
40 |         #rgb = F.relu(rgb)
41 |         rgb = F.relu(self.pool(rgb))
42 | 
43 |         return rgb
44 | 
45 |     def generate(self, src_img, frame_img):
46 |         rgb = self.forward_template(frame_img)
47 |         return src_img * rgb
48 | 


--------------------------------------------------------------------------------
/configs/config_patchTransformer.yaml:
--------------------------------------------------------------------------------
 1 | # data parameters
 2 | use_augmentation: True
 3 | 
 4 | template_shape: [252, 150, 3]  # H, W
 5 | template_resize: False
 6 | 
 7 | # geometric transformation --- STN parameters
 8 | learnableSTN: False   # learn STN or fix it
 9 | STN_loss: L1Mask # L1, L2, SIMMMask, L1Mask
10 | STN: tps   # affine or tps
11 | loc_backbone: resnet18
12 | loc_downsample_dim: 128
13 | loc_fc_dim: 256
14 | adjust_patch_size: False  # learn to adjust the patch size for pasting
15 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn
16 | TPS_range: [0.999, 0.999]
17 | TPS_grid: [20, 10]
18 | 
19 | # patch blurring module
20 | use_PBM: False
21 | 
22 | # printer color transformation (PCT)
23 | use_PCT: False
24 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural
25 | PCT_loss: L1
26 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz'
27 | 
28 | use_LCT: False
29 | LCT_loss: L1Mask
30 | LightingCT: gen   #cc (color constancy, i.e. cc_fc4)  or gen (image generator)
31 | lct_backbone: alexnet   #alextnet, resnet18
32 | lct_input_size: [256, 256]
33 | #generator_input_size: [768, 768]
34 | #generator_input_size: [384, 384]
35 | generator_input_size: [256, 256]
36 | 
37 | epochs: 1000
38 | batch_size: 72
39 | input_dim: 3
40 | ngf: 48
41 | image_shape: [256, 256, 3]
42 | 
43 | # log
44 | model_checkpoint: checkpoint.pth.tar
45 | model_best: model_best.pth.tar
46 | 
47 | # training parameters
48 | cuda: True
49 | gpu_ids: [0, 1, 2, 3, 4, 5]    # set the GPU ids to use, e.g. [0] or [1, 2]
50 | num_workers: 24
51 | lr: 0.0001
52 | beta1: 0.5
53 | beta2: 0.9
54 | print_iter: 20
55 | 
56 | # scheduler
57 | scheduler_patience: 25
58 | scheduler_factor: 0.5
59 | 


--------------------------------------------------------------------------------
/losses/ohem_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | '''
 5 | class NLL_OHEM(th.nn.NLLLoss):
 6 |     """ Online hard example mining. 
 7 |     Needs input from nn.LogSotmax() """
 8 | 
 9 |     def __init__(self, ratio):
10 |         super(NLL_OHEM, self).__init__(None, True)
11 |         self.ratio = ratio
12 | 
13 |     def forward(self, x, y, ratio=None):
14 |         if ratio is not None:
15 |             self.ratio = ratio
16 |         num_inst = x.size(0)
17 |         num_hns = int(self.ratio * num_inst)
18 |         x_ = x.clone()
19 |         inst_losses = th.autograd.Variable(th.zeros(num_inst)).cuda()
20 |         for idx, label in enumerate(y.data):
21 |             inst_losses[idx] = -x_.data[idx, label]
22 |             # loss_incs = -x_.sum(1)                                                    
23 |         _, idxs = inst_losses.topk(num_hns)
24 |         x_hn = x.index_select(0, idxs)
25 |         y_hn = y.index_select(0, idxs)
26 |         return th.nn.functional.nll_loss(x_hn, y_hn)     
27 | '''
28 | 
29 | class Adv_OHEM(nn.Module):
30 |     """ Online hard example mining.
31 |     Needs the max probability of the bboxes in each image  """
32 | 
33 |     def __init__(self, ratio):
34 |         super(Adv_OHEM, self).__init__()
35 |         self.ratio = ratio
36 | 
37 |     def forward(self, x, ratio=None):
38 |         if ratio is not None:
39 |             self.ratio = ratio
40 |         num_inst = x.size(0)
41 |         num_hns = int(self.ratio * num_inst)
42 |         #x_ = x.clone()
43 |         inst_losses = torch.autograd.Variable(torch.zeros(num_inst)).cuda()
44 |         for idx, prob in enumerate(x.data):
45 |             inst_losses[idx] = x.data[idx]
46 |             # loss_incs = -x_.sum(1)
47 | 
48 |         _, idxs = inst_losses.topk(num_hns)
49 |         x_hn = x.index_select(0, idxs)
50 |         return torch.mean(x_hn)
51 | 


--------------------------------------------------------------------------------
/nets/LightingNet/cc_drn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn.functional as F
 3 | import torch.nn as nn
 4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim
 5 | 
 6 | from . import LIGHTINGNET_REGISTRY
 7 | 
 8 | 
 9 | @LIGHTINGNET_REGISTRY.register()
10 | class CCDRN(nn.Module):
11 | 
12 |     def __init__(self, config=None):
13 |         super().__init__()
14 | 
15 |         backbone_name = config['lct_backbone']
16 |         self.backbone = get_backbone(backbone_name)
17 |         last_conv_dim = get_last_conv_dim(backbone_name)
18 |         ori_output_size = 256 // 4
19 | 
20 |         self.projection = nn.Sequential(
21 |             nn.Conv2d(last_conv_dim, 64, kernel_size=1, padding=0, bias=False),
22 |             nn.BatchNorm2d(64),
23 |             nn.ReLU(inplace=True),
24 |             nn.Conv2d(64, 4, kernel_size=1, padding=0, bias=False),
25 |             nn.Dropout(0.5),
26 |             nn.ReLU(inplace=True)
27 |         )
28 | 
29 |         self.pool = nn.Conv2d(3, 3, kernel_size=9, stride=ori_output_size // 8, bias=True)
30 | 
31 |     # transform the template
32 |     def forward(self, x):
33 |        return self.forward_template(x)
34 | 
35 |     # the normalized output is NOT required as we need to learn the lighting condition
36 |     # changes in the environment
37 |     def forward_template(self, x):
38 |         y = self.backbone(x)
39 |         _, _, h, w = y.shape
40 | 
41 |         rgb = y[:, :3, :, :]
42 |         rgb = F.normalize(rgb, p=2, dim=1)
43 |         confidence = y[:, -1, :, :].view(-1, h * w)
44 |         confidence = F.softmax(confidence, dim=1)
45 |         confidence = confidence.view(-1, 1, h, w)
46 | 
47 |         rgb = rgb * confidence
48 |         rgb = F.relu(self.pool(rgb))
49 |         return rgb
50 | 
51 |     def generate(self, src_img, frame_img):
52 |         rgb = self.forward_template(frame_img)
53 |         return src_img * rgb
54 | 


--------------------------------------------------------------------------------
/nets/backbone/backbone_config.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torchvision.models as models
 3 | from functools import partial
 4 | 
 5 | try:
 6 |     from torch.hub import load_state_dict_from_url
 7 | except ImportError:
 8 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 9 | 
10 | 
11 | def dilated_resnet(depth, num_classes=1000, pretrained: bool = False):
12 |     block = models.resnet.BasicBlock if depth < 50 else models.resnet.Bottleneck
13 |     layers = {
14 |         18: [2, 2, 2, 2],
15 |         34: [3, 4, 6, 3],
16 |         50: [3, 4, 6, 3],
17 |         101: [3, 4, 23, 3],
18 |         152: [3, 8, 36, 3]}[depth]
19 | 
20 |     model = models.ResNet(block, layers, num_classes=num_classes, replace_stride_with_dilation=[True, True, True])
21 |     if pretrained:
22 |         state_dict = load_state_dict_from_url(models.resnet.model_urls[f'resnet{depth}'], map_location='cpu')
23 |         model.load_state_dict(state_dict)
24 |     return model
25 | 
26 | 
27 | backbone_info = {
28 |     'resnet18': {'model': models.resnet18, 'last_conv_dim': 512},
29 |     'resnet50': {'model': models.resnet50, 'last_conv_dim': 2048},
30 |     'resnet101': {'model': models.resnet101, 'last_conv_dim': 2048},
31 |     'alexnet': {'model': models.alexnet, 'last_conv_dim': 256},
32 |     'vgg11_bn': {'model': models.vgg11_bn, 'last_conv_dim': 512},
33 |     'vgg11': {'model': models.vgg11, 'last_conv_dim': 512},
34 |     'vgg19_bn': {'model': models.vgg19_bn, 'last_conv_dim': 512},
35 |     'vgg19': {'model': models.vgg19, 'last_conv_dim': 512},
36 |     'dresnet18': {'model': partial(dilated_resnet, depth=18), 'last_conv_dim': 512},
37 |     'dresnet50': {'model': partial(dilated_resnet, depth=50), 'last_conv_dim': 512}
38 | }
39 | 
40 | 
41 | def get_backbone(name: str, pretrained: bool = True) -> nn.Module:
42 |     model = backbone_info[name]['model'](num_classes=1000, pretrained=pretrained)
43 |     return nn.Sequential(*list(model.children())[0:-2])
44 | 
45 | 
46 | def get_last_conv_dim(name: str) -> int:
47 |     return backbone_info[name]['last_conv_dim']
48 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import datetime
 4 | import logging
 5 | import shutil
 6 | 
 7 | 
 8 | def date_uid():
 9 |     """Generate a unique id based on date.
10 | 
11 |     Returns:
12 |         str: Return uid string, e.g. '20171122171307111552'.
13 | 
14 |     """
15 |     return str(datetime.datetime.now()).replace('-', '') \
16 |         .replace(' ', '').replace(':', '').replace('.', '')
17 | 
18 | 
19 | def get_logger(checkpoint_path, filename, filemode='w'):
20 |     """
21 |     Get the root logger
22 |     :param checkpoint_path: only specify this when the first time call it
23 |     :return: the root logger
24 |     """
25 |     if filemode == 'w' and os.path.exists(os.path.join(checkpoint_path, filename)):
26 |         print ("\n**************************************************", flush=True)
27 |         print("Found old results, copying it to avoid for overwritten.", flush=True)
28 |         target_path = checkpoint_path.rstrip('/')
29 |         i = 0
30 |         curr_target_path = target_path + f".{i}"
31 |         while True:
32 |             if os.path.exists(curr_target_path):
33 |                 i += 1
34 |                 curr_target_path = target_path + f".{i}"
35 |             else:
36 |                 break
37 |         print(f"Copying old log folder to {curr_target_path}", flush=True)
38 |         print ("**************************************************\n", flush=True)
39 |         shutil.copytree(checkpoint_path, curr_target_path)
40 | 
41 |     if checkpoint_path:
42 |         logger = logging.getLogger()
43 |         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
44 |         stream_hdlr = logging.StreamHandler(sys.stdout)
45 | #        log_filename = date_uid()
46 |         file_hdlr = logging.FileHandler(os.path.join(checkpoint_path, filename), mode=filemode)
47 |         stream_hdlr.setFormatter(formatter)
48 |         file_hdlr.setFormatter(formatter)
49 |         logger.addHandler(stream_hdlr)
50 |         logger.addHandler(file_hdlr)
51 |         logger.setLevel(logging.INFO)
52 |     else:
53 |         logger = logging.getLogger()
54 |     return logger
55 | 


--------------------------------------------------------------------------------
/nets/EOTTransformer/EOT_transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | class EOTTransformer(nn.Module):
 7 |     def __init__(self, contrast=(0.9, 1.1), brightness=(-0.1, 0.1), rotation=8.0, scale=(0.85, 1.15)):
 8 |     #def __init__(self, contrast=(0.8, 1.2), brightness=(-0.2, 0.2), rotation=8.0, scale=(1.0, 1.0)):
 9 |         super(EOTTransformer, self).__init__()
10 |         self.contrast_min, self.contrast_max = contrast
11 |         self.brightness_min, self.brightness_max = brightness
12 |         self.rotation_min, self.rotation_max = -rotation, rotation
13 |         self.scale_min, self.scale_max = scale
14 |         #self.theta = nn.Parameter(torch.tensor([[1, 0, 0], [0, 1, 0]], dtype=torch.float))
15 |         #self.theta.cuda()
16 | 
17 |         #self.compose = transforms.Compose([transforms.ColorJitter(brightness, contrast),
18 |         #                                   transforms.RandomAffine(rotation, scale=scale, fillcolor=0.0)])
19 | 
20 |     # x in range [0 1]
21 |     def forward(self, x, do_rotate=True):
22 |         num_batch= x.shape[0]
23 |         contrast = torch.FloatTensor(num_batch, 1, 1, 1).uniform_(self.contrast_min, self.contrast_max).cuda()
24 |         brightness = torch.FloatTensor(num_batch, 1, 1, 1).uniform_(-self.brightness_min, self.brightness_max).cuda()
25 |         y = torch.clamp(x * contrast + brightness, 0, 1)
26 | 
27 |         # do affine transformation
28 |         a = np.random.uniform(self.rotation_min, self.rotation_max, num_batch) / 180 * np.pi
29 |         s = np.random.uniform(self.scale_min, self.scale_max, num_batch)
30 | 
31 |         t = np.stack ((np.cos(a)*s, -np.sin(a)*s, np.zeros(num_batch), np.sin(a)*s, np.cos(a)*s, np.zeros(num_batch)), axis=1)
32 |         t = t.reshape(num_batch, 2, 3)
33 |  #       t = np.array([[np.cos(angle), -1.0*np.sin(angle), 0], [np.sin(angle), np.cos(angle), 0]], dtype=np.float)*scale
34 |  #       self.theta.data.copy_(torch.from_numpy(t))
35 |         #print (angle/np.pi*180, scale, t, self.theta)
36 |         t = torch.tensor(t, dtype=torch.float).cuda()
37 |         grid = F.affine_grid(t, y.size(), align_corners=False)
38 |         y = F.grid_sample(y, grid)
39 |         y = torch.clamp(y, 0, 1)
40 | 
41 |         return y
42 | 


--------------------------------------------------------------------------------
/nets/STNet/tps_STN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | import itertools
 6 | from nets.STNet.STNLocalizer import BoundedTPSLocalizer, UnBoundedTPSLocalizer
 7 | from nets.STNet.tps_grid_gen import TPSGridGen
 8 | from nets.STNet.grid_sample import grid_sample
 9 | 
10 | class TpsSTNNet(nn.Module):
11 |     def __init__(self, config):
12 |         super(TpsSTNNet, self).__init__()
13 | 
14 |         r1, r2 = config['TPS_range'] # height and width
15 |         grid_height, grid_width = config['TPS_grid']
16 |         assert r1 < 1 and r2 < 1  # if >= 1, arctanh will cause error in BoundedGridLocNet
17 |         target_control_points = torch.Tensor(list(itertools.product(
18 |             np.arange(-r1, r1 + 0.00001, 2.0 * r1 / (grid_height - 1)),
19 |             np.arange(-r2, r2 + 0.00001, 2.0 * r2 / (grid_width - 1)),
20 |         )))
21 | 
22 |         Y, X = target_control_points.split(1, dim=1)
23 |         target_control_points = torch.cat([X, Y], dim=1)
24 | 
25 |         GridLocNet = {
26 |             'unbounded_stn': UnBoundedTPSLocalizer,
27 |             'bounded_stn': BoundedTPSLocalizer,
28 |         }[config['TPS_localizer']]
29 | 
30 |         backbone = config['loc_backbone']
31 | #        img_height, img_width, _ = config['image_shape']
32 |         #img_height, img_width, _ = config['image_shape'] if config['template_resize'] else config['template_shape']
33 |         img_height, img_width, _ = config['template_shape']
34 |         downsample_dim = config['loc_downsample_dim']
35 |         fc_dim = config['loc_fc_dim']
36 |         adjust_patchDim = config['adjust_patch_size']
37 |         self.loc_net = GridLocNet(backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=adjust_patchDim)
38 |         self.tps = TPSGridGen(img_height, img_width, target_control_points)
39 | 
40 |     # transform the template
41 |     def forward(self, x, template):
42 |         # transform the input
43 |         batch_size = x.size(0)
44 |         source_control_points, output_scale = self.loc_net(x)
45 |         source_coordinate = self.tps(source_control_points)
46 |         _, _, H, W = template.size()
47 |         grid = source_coordinate.view(batch_size, H, W, 2)
48 |         y = grid_sample(template, grid)
49 | 
50 |         return y, source_control_points, output_scale
51 | 


--------------------------------------------------------------------------------
/configs/config_advPatch_detectron2.yaml:
--------------------------------------------------------------------------------
 1 | # data parameters
 2 | use_augmentation: True
 3 | use_ohem: False
 4 | ohem_ratio: 0.5
 5 | # similar to data augmentation
 6 | use_EOT: False
 7 | 
 8 | # patch related parameters
 9 | adv_patch_size: [416, 416, 3]
10 | apply_border_mask: False
11 | border_mask_ratio: 0.05769  # 24/416
12 | border_value: 0.75  # white T-shirt
13 | tv_loss_weight: 2.5
14 | 
15 | # Detector information
16 | detector_name: Detectron2 # YOLO_V2|YOLO_V3|Detectron2
17 | #detector_model_path: ./detector/yolov3
18 | detector_model_path: ./detector/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml  # set to different yaml for different model
19 | detector_input_size: [1080, 1920]
20 | detector_scale_size: [416, 416]
21 | target_obj_id: 0
22 | train_nms_thresh: 0.8
23 | train_conf_thresh: 0.2999
24 | val_nms_thresh: 0.4
25 | val_conf_thresh: 0.7
26 | val_iou_threshold: 0.1 # 0.5
27 | 
28 | template_shape: [252, 150, 3]  # H, W print size
29 | template_resize: False # resize template and place it in the input image
30 | use_loc_net:  True  # use localization net
31 | 
32 | 
33 | # geometric transformation --- STN parameters
34 | STN: tps   # affine or tps
35 | use_STN_loss: True
36 | #STN_loss_weight: 1.0
37 | loc_backbone: resnet18
38 | loc_downsample_dim: 128
39 | loc_fc_dim: 256
40 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn
41 | TPS_range: [0.999, 0.999]
42 | TPS_grid: [20, 10]
43 | 
44 | # printer color transformation (PCT)
45 | color_transformation_path: 'kaidi_color_model/weights2_0_1.npz'
46 | PrinterCT: PCT   # PCT, PCTLinear (linear), PCTNeural (non-linear) or None (no PCT applied)
47 | use_double_PCT: False
48 | 
49 | # lighting color transformation
50 | use_LightingCT: False
51 | LightingCT: cc_fcn4    #cc_fcn4 or generator
52 | 
53 | image_shape: [256, 256, 3]
54 | mask_shape: [128, 128]
55 | 
56 | # log
57 | log_dir: 
58 | log_file: log.log
59 | model_checkpoint: checkpoint.pth.tar
60 | model_best: model_best.pth.tar
61 | adv_patch_img: adv_patch.png
62 | adv_patch_img_best: best_adv_patch.png
63 | 
64 | 
65 | # training parameters
66 | cuda: True
67 | gpu_ids: [0, 1, 2, 3, 4, 5]    # set the GPU ids to use, e.g. [0] or [1, 2]
68 | num_workers: 24
69 | compute_dsr: False
70 | visualize: False
71 | epochs: 1000
72 | batch_size: 72
73 | lr: 0.1
74 | beta1: 0.5
75 | beta2: 0.9
76 | print_iter: 20
77 | # scheduler
78 | scheduler_patience: 25
79 | scheduler_factor: 0.5
80 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/advPatch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Training code for Adversarial patch training
 3 | 
 4 | """
 5 | from torchvision import transforms
 6 | import torch
 7 | from torch import nn
 8 | from PIL import Image
 9 | from .advPatch_util import generate_patch, generate_border_mask
10 | 
11 | class AdvPatch(nn.Module):
12 |     def __init__(self, config):
13 |         super(AdvPatch, self).__init__()
14 |         self.adv_patch_size = tuple(config['adv_patch_size'])
15 |         self.apply_border_mask = config['apply_border_mask']
16 |         print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size))
17 | 
18 |         if self.apply_border_mask:
19 |             self.border_value = config['border_value']
20 |             border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5)
21 |             print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value))
22 |             self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size))
23 | 
24 |         self.adv_patch = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2]))
25 | 
26 |     @property
27 |     def patch_size(self):
28 |         return self.adv_patch_size
29 | 
30 |     @property
31 |     def border_size(self):
32 |         return self.border_size if self.apply_border_mask else 0
33 | 
34 |     def learnable(self):
35 |         return [self.adv_patch]
36 | 
37 |     def clip(self):
38 |         self.adv_patch.data.clamp_(0, 1)  # keep patch in image range
39 | 
40 |     def forward(self):
41 |         if self.apply_border_mask:
42 |             # note that nn.parameter cannot be assigned directly, so an internal change is needed
43 |             self.adv_patch.data *= self.border_mask.data
44 |             self.adv_patch.data +=  (1 - self.border_mask.data) * self.border_value
45 | 
46 |         return self.adv_patch
47 | 
48 |     def save_patch(self, patch_path):
49 |         adv_patch = self.adv_patch.detach().cpu()
50 |         im = transforms.ToPILImage('RGB')(adv_patch)
51 |         im.save(patch_path)
52 | 
53 |     def load_patch(self, patch_path):
54 |         patch_img = Image.open(patch_path).convert('RGB')
55 |         w, h = patch_img.size
56 |         adv_h, adv_w = self.adv_patch_size[:2]
57 |         if w !=  adv_w or h != adv_h:
58 |             patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img)
59 | 
60 |         self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img))
61 | 
62 | def create_advPatch_model(config):
63 |     return AdvPatch(config)
64 | 


--------------------------------------------------------------------------------
/nets/STNet/AffineSTN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import torchvision.models as models
 5 | 
 6 | 
 7 | def init_module(module):
 8 |     for m in module():
 9 |         if isinstance(m, nn.Conv2d):
10 |             nn.init.kaiming_normal_(m.weight, mode='fan_out')
11 |             if m.bias is not None:
12 |                 nn.init.zeros_(m.bias)
13 |         elif isinstance(m, nn.BatchNorm2d):
14 |             nn.init.ones_(m.weight)
15 |             nn.init.zeros_(m.bias)
16 |         elif isinstance(m, nn.Linear):
17 |             nn.init.normal_(m.weight, 0, 0.01)
18 |             if m.bias is not None:
19 |                 nn.init.zeros_(m.bias)
20 | 
21 | 
22 | class AffineSTNNet_depreciated(nn.Module):
23 |     def __init__(self, backbone):
24 |         super(AffineSTNNet_depreciated, self).__init__()
25 |         if backbone == 'resnet18':
26 |             resnet_model = models.resnet18(num_classes=10)
27 |         elif backbone == 'resnet50':
28 |             resnet_model = models.resnet50(num_classes=10)
29 |         self.localizer = nn.Sequential(*list(resnet_model.children())[0:8])
30 | 
31 |         self.last_conv_dim = 128
32 |         self.down_sampler = nn.Sequential(
33 |             nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0),
34 |             nn.ReLU(True),
35 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
36 | 
37 |         # Regressor for the 3 * 2 affine matrix
38 |         self.fc_loc = nn.Sequential(
39 |             nn.Linear(128 * 4 * 4, 256),
40 |             nn.ReLU(True),
41 |             nn.Dropout(0.3),
42 |             nn.Linear(256, 3 * 2)
43 |         )
44 | 
45 |         # weight initialization
46 |        # init_module(self.down_sampler)
47 |        # init_module(self.fc_loc)
48 | 
49 |         # Initialize the weights/bias with identity transformation
50 |         self.fc_loc[-1].weight.data.zero_()
51 |         self.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
52 | 
53 |     # localization
54 |     def localization(self, x):
55 |         xs = self.localizer(x)
56 |      #   print (xs.shape)
57 |         xs = self.down_sampler(xs)
58 |      #   print (xs.shape)
59 |         xs = xs.view(-1, 128 * 4 * 4)
60 |         theta = self.fc_loc(xs)
61 |         theta = theta.view(-1, 2, 3)
62 |         return theta
63 | 
64 |     # transform the template
65 |     def forward(self, x, template):
66 |         # transform the input
67 |         theta = self.localization(x)
68 |         grid = F.affine_grid(theta, template.size(), align_corners=False)
69 |         try:
70 |            y = F.grid_sample(template, grid, align_corners=False)
71 |         except:
72 |            y = F.grid_sample(template, grid)
73 | 
74 |         return y, theta
75 | 


--------------------------------------------------------------------------------
/detector/object_detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class ObjectDetector(nn.Module):
 5 |     def __init__(self, detector_name, cfg_path, model_path, class_names=None, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1):
 6 |         super().__init__()
 7 |         self.name = detector_name
 8 |         self.model = self.load_model(cfg_path, model_path, class_names)
 9 |         self.class_names = class_names
10 |         self.input_size = input_size
11 |         self.test_size = test_size
12 |         self.target_object_id = target_object_id
13 | 
14 |     def load_model(self, cfg_path, model_path, class_names=None):
15 |         raise NotImplementedError('base class not implemented')
16 | 
17 |     def forward(self, x, *args, **kwargs):
18 |         return self.detect(x, *args, **kwargs)
19 | 
20 |     # used for training/val
21 |     def detect(self, images, conf_thresh=0.1, nms_thresh=0):
22 |         raise NotImplementedError('base class not implemented')
23 | 
24 |     # used for test. Most times it is the same as 'detect', but in some cases such as YOLO_V2, it might be implemented differently from 'detect'.
25 |     def detector_detect(self, images, conf_thresh=0.1, nms_thresh=0):
26 |         raise NotImplementedError('base class not implemented')
27 | 
28 |     def detect_train(self, images, conf_thresh=0.1, nms_thresh=0):
29 |         """Only use for FasterRCNN type of model since we attack of FasterRCNN at RPN not final output."""
30 |         return self.detect(images, conf_thresh, nms_thresh)
31 | 
32 |     @property
33 |     def name(self):
34 |         return self._name
35 | 
36 |     @property
37 |     def model(self):
38 |         return self._model
39 | 
40 |     @property
41 |     def class_names(self):
42 |         return self._class_names
43 | 
44 |     @property
45 |     def input_size(self):
46 |         return self._input_size
47 |     
48 |     @property
49 |     def test_size(self):
50 |         return self._test_size
51 | 
52 |     @property
53 |     def target_object_id(self):
54 |         return self._target_object_id
55 | 
56 |     @name.setter
57 |     def name(self,val):
58 |         self._name = val
59 | 
60 |     @model.setter
61 |     def model(self,val):
62 |         self._model = val
63 | 
64 |     @class_names.setter
65 |     def class_names(self, val):
66 |         self._class_names = val
67 | 
68 |     @input_size.setter
69 |     def input_size(self, val):
70 |         self._input_size = val
71 |     
72 |     @test_size.setter
73 |     def test_size(self, val):
74 |         self._test_size = val
75 | 
76 |     @target_object_id.setter
77 |     def target_object_id(self, val):
78 |         self._target_object_id = val
79 | 
80 |     def cuda(self, device):
81 |         self.model.cuda(device)
82 | 
83 |     def eval(self):
84 |         self.model.eval()
85 | 
86 |     def training(self):
87 |         self.model.train()
88 | 


--------------------------------------------------------------------------------
/detector/SSD_detector.py:
--------------------------------------------------------------------------------
 1 | from detector.SSD.ssd.modeling.detector import build_detection_model
 2 | from detector.SSD.ssd.config import cfg
 3 | from detector.SSD.ssd.utils.checkpoint import CheckPointer
 4 | from detector.yolo_util import wrap_detection_results, nms
 5 | 
 6 | from .object_detector import ObjectDetector
 7 | import torch
 8 | import torch.nn.functional as F
 9 | 
10 | class SSD_Detector(ObjectDetector):
11 |     def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1):
12 |         # load SSD
13 |         super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id)
14 |         data_mean = cfg.INPUT.PIXEL_MEAN
15 |         data_mean[0], data_mean[1], data_mean[2] = data_mean[2], data_mean[0], data_mean[1]
16 |         self.mean = data_mean
17 | 
18 |         if test_size[0] != cfg.INPUT.IMAGE_SIZE or test_size[1] != cfg.INPUT.IMAGE_SIZE:
19 |             raise Warning('Scale size (%d, %d) is different from the default (%d %d)!' \
20 |                           % (test_size[0], test_size[1], cfg.INPUT.IMAGE_SIZE, cfg.INPUT.IMAGE_SIZE))
21 | 
22 |         # skip background i.e. 0
23 |         self.class_names = [name for k, name in enumerate(class_names) if k > 0]
24 | 
25 |     def load_model(self, cfg_path, model_path, class_names=None):
26 |         cfg.merge_from_file(cfg_path)
27 |         cfg.freeze()
28 | 
29 |         ssd_model = build_detection_model(cfg)
30 |         checkpointer = CheckPointer(ssd_model, save_dir=cfg.OUTPUT_DIR)
31 |         checkpointer.load(model_path, use_latest=False)
32 |         ssd_model.eval()
33 | 
34 |         return ssd_model
35 | 
36 |     '''
37 |     def _gpu_normalize(self, x_batch):
38 |         x_batch *= 255.0
39 |         mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda()
40 |         std = torch.tensor(self.std).view(1, len(self.std), 1, 1).cuda()
41 |         return (x_batch - mean) / std
42 |     '''
43 | 
44 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
45 |         _, h, w, _ = images.shape
46 |         if self.test_size[0] == w and self.test_size[1] == h:
47 |             scaled_images = images
48 |         else:
49 |             scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False)
50 | 
51 |         scaled_images *= 255.0
52 |         mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda(device=images.device)
53 |         inputs = scaled_images - mean
54 |         outputs = self.model(inputs)
55 |         outputs = [torch.cat((o['boxes'], o['scores'].unsqueeze_(-1), o['labels'].unsqueeze_(-1).float()-1.0), dim=-1) for o in outputs]
56 |         outputs = nms(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh)
57 | 
58 |         results = wrap_detection_results(outputs, self.test_size[0], self.input_size, skip=False)
59 |         return results
60 | 
61 |     def detector_detect(self, img, conf_thresh, nms_thresh):
62 |         with torch.no_grad():
63 |             output = self.detect(img, conf_thresh, nms_thresh)
64 | 
65 |         return output


--------------------------------------------------------------------------------
/losses/mask_losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | from pytorch_msssim import ssim
 5 | from utils.tools import tensor_to_grey
 6 | from .smooth_l1_loss import SmoothL1Loss
 7 | 
 8 | class MaskLoss(nn.Module):
 9 |     def __init__(self, loss_func):
10 |         super().__init__()
11 |         self.loss_func = loss_func
12 | 
13 |     '''
14 |     def forward(self, pred, label, mask_bb):
15 |     # pred: reconstructed or transformed 2d images
16 |     # label: ground-truth images
17 |         l1_losses = []
18 |         for i, bbox in enumerate(mask_bb):
19 |             b, l, h, w = bbox
20 |             crop_pred = pred[i, :, b:b+h, l:l+w]
21 |             crop_label = label[i, :, b:b+h, l:l+w]
22 |             #crop_label = Variable(crop_label.data.cuda(),requires_grad=False)
23 |             l1_losses.append(F.l1_loss(crop_pred, crop_label))
24 |         return torch.mean(torch.stack(l1_losses, dim=0))
25 |     '''
26 | 
27 |     def forward(self, pred, label, mask_bb):
28 |     # pred: reconstructed or transformed 2d images
29 |     # label: ground-truth images
30 |         n, _, h, w = pred.shape
31 |         l1_losses = self.loss_func(pred, label)
32 |         l1_losses = torch.mean(l1_losses.view(n, -1), dim=1)
33 |        # mask_size = mask_bb[:,2:4].clone(device=mask_bb.device).detach()
34 |         w_loss = torch.prod(mask_bb[:,2:4].float().detach(), 1) / (h * w)
35 |         l1_losses /= w_loss
36 |         l1_losses = torch.mean(l1_losses)
37 |         return l1_losses
38 | 
39 | class L1MaskLoss(MaskLoss):
40 |     def __init__(self):
41 |         super().__init__(nn.L1Loss(reduction='none'))
42 | 
43 | class L2MaskLoss(MaskLoss):
44 |     def __init__(self):
45 |         super().__init__(nn.MSELoss(reduction='none'))
46 | 
47 | class SmoothL1MaskLoss(MaskLoss):
48 |     def __init__(self, beta=0.5):
49 |         super().__init__(SmoothL1Loss(reduction='none', beta=beta))
50 | 
51 | class SIMMMaskLoss(nn.Module):
52 |     def __init__(self, val_range=None):
53 |         super(SIMMMaskLoss, self).__init__()
54 |         self.val_range = val_range
55 | 
56 |     def forward(self, pred, label, mask_bb):
57 |     # pred: reconstructed or transformed 2d images
58 |     # label: ground-truth images
59 |         ssim_losses = []
60 |         for i, bbox in enumerate(mask_bb):
61 |             b, l, h, w = bbox
62 |             #pred_grey = tensor_to_grey(pred[i, :, b:b+h, l:l+w]).view(1,1,h,w)
63 |             #label_grey = tensor_to_grey(label[i, :, b:b+h, l:l+w]).view(1,1,h,w)
64 |             pred_grey = pred[i, :, b:b+h, l:l+w]
65 |             label_grey = label[i, :, b:b+h, l:l+w]
66 |             pred_grey = pred_grey.view(1,-1,h,w)
67 |             label_grey = label_grey.view(1,-1,h,w)
68 | #            label_grey = Variable(label_grey.data.cuda(),requires_grad=False)
69 | 
70 |             #print (pred_grey.shape, label_grey.shape)
71 |             ssim_val = 1.0 - ssim(pred_grey, label_grey, val_range=self.val_range)
72 |             ssim_losses.append(ssim_val)
73 |         #print (ssim_losses)
74 |         return torch.mean(torch.stack(ssim_losses, dim=0))
75 | 


--------------------------------------------------------------------------------
/nets/ColorNet/color_transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn
 4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim
 5 | 
 6 | class PatternColorTransformer(nn.Module):
 7 |     def __init__(self, use_cuda, device_ids):
 8 |         super(PatternColorTransformer, self).__init__()
 9 |         self.use_cuda = use_cuda
10 |         self.device_ids = device_ids
11 |         self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3
12 | 
13 |     # transform the color
14 |     def forward(self, x):
15 |         # transform the input
16 |         n, c, h, w = x.shape
17 |         y = torch.matmul(self.color_mapping, x.view(n, c, -1))
18 |         y = torch.clamp(y, -1., 1.)
19 |         return y.view(n, c, h, w)
20 | 
21 | class ColorMapEstimator(nn.Module):
22 |     def __init__(self, backbone, fc_dim=256, num_output=9):
23 |         super(ColorMapEstimator, self).__init__()
24 | 
25 |         resnet_model = get_backbone(backbone)(num_classes=10)
26 |         last_conv_dim = get_last_conv_dim(backbone)
27 | 
28 |         self.backbone = nn.Sequential(*list(resnet_model.children())[0:-2])
29 |         self.fc_dim = fc_dim
30 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
31 | 
32 |         # Regressor for the 3 * 2 affine matrix
33 |         self.fc_loc = nn.Sequential(
34 |             nn.Dropout(0.3),
35 |             nn.Linear(last_conv_dim, self.fc_dim),
36 |             nn.ReLU(True),
37 |             nn.Linear(self.fc_dim, num_output)
38 |         )
39 | 
40 |     def forward(self, x):
41 |         x = self.backbone(x)
42 |         x = self.avgpool(x)
43 |         theta = self.fc_loc(x.squeeze())
44 |         return theta
45 | 
46 | 
47 | class ColorMapNet(nn.Module):
48 |     def __init__(self, backbone, downsample_dim, fc_dim):
49 |         super(ColorMapNet, self).__init__()
50 |         self.color_map = ColorMapEstimator(backbone, fc_dim=fc_dim, num_output=9)
51 | 
52 |         # initialization
53 |         self.color_map.fc_loc[-1].weight.data.zero_()
54 |         self.color_map.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=torch.float))
55 | 
56 |     # localization
57 |     def forward(self, x):
58 |         x = self.color_map(x)
59 |         x = F.relu(x)
60 |         return x.view(-1, 3, 3)
61 | 
62 | 
63 | class LightingColorTransformer(nn.Module):
64 |     def __init__(self, config):
65 |         super(LightingColorTransformer, self).__init__()
66 |         self.color_map = ColorMapNet(backbone=config['loc_backbone'],
67 |                                          downsample_dim=config['loc_downsample_dim'],
68 |                                          fc_dim=config['loc_fc_dim'])
69 | 
70 |     # transform the template
71 |     def forward(self, x, template):
72 |         # transform the input
73 |         c_map = self.color_map(x)
74 |         n, c, h, w = template.shape
75 |         y = torch.matmul(c_map, template.view(n, c, -1))
76 |         y = torch.clamp(y, -1., 1.)
77 |         y = y.view(n, c, h, w)
78 |         return y, c_map
79 | 
80 |     def forward_template(self, x, template):
81 |         y, _= self.forward(x, template)
82 |         return y
83 | 


--------------------------------------------------------------------------------
/detector/yolov3/config/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | # 0
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | # 1
 35 | [maxpool]
 36 | size=2
 37 | stride=2
 38 | 
 39 | # 2
 40 | [convolutional]
 41 | batch_normalize=1
 42 | filters=32
 43 | size=3
 44 | stride=1
 45 | pad=1
 46 | activation=leaky
 47 | 
 48 | # 3
 49 | [maxpool]
 50 | size=2
 51 | stride=2
 52 | 
 53 | # 4
 54 | [convolutional]
 55 | batch_normalize=1
 56 | filters=64
 57 | size=3
 58 | stride=1
 59 | pad=1
 60 | activation=leaky
 61 | 
 62 | # 5
 63 | [maxpool]
 64 | size=2
 65 | stride=2
 66 | 
 67 | # 6
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=128
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | # 7
 77 | [maxpool]
 78 | size=2
 79 | stride=2
 80 | 
 81 | # 8
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=256
 85 | size=3
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | # 9
 91 | [maxpool]
 92 | size=2
 93 | stride=2
 94 | 
 95 | # 10
 96 | [convolutional]
 97 | batch_normalize=1
 98 | filters=512
 99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 | 
104 | # 11
105 | [maxpool]
106 | size=2
107 | stride=1
108 | 
109 | # 12
110 | [convolutional]
111 | batch_normalize=1
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 | 
118 | ###########
119 | 
120 | # 13
121 | [convolutional]
122 | batch_normalize=1
123 | filters=256
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | # 14
130 | [convolutional]
131 | batch_normalize=1
132 | filters=512
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 | 
138 | # 15
139 | [convolutional]
140 | size=1
141 | stride=1
142 | pad=1
143 | filters=255
144 | activation=linear
145 | 
146 | 
147 | 
148 | # 16
149 | [yolo]
150 | mask = 3,4,5
151 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
152 | classes=80
153 | num=6
154 | jitter=.3
155 | ignore_thresh = .7
156 | truth_thresh = 1
157 | random=1
158 | 
159 | # 17
160 | [route]
161 | layers = -4
162 | 
163 | # 18
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 | 
172 | # 19
173 | [upsample]
174 | stride=2
175 | 
176 | # 20
177 | [route]
178 | layers = -1, 8
179 | 
180 | # 21
181 | [convolutional]
182 | batch_normalize=1
183 | filters=256
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | # 22
190 | [convolutional]
191 | size=1
192 | stride=1
193 | pad=1
194 | filters=255
195 | activation=linear
196 | 
197 | # 23
198 | [yolo]
199 | mask = 1,2,3
200 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
201 | classes=80
202 | num=6
203 | jitter=.3
204 | ignore_thresh = .7
205 | truth_thresh = 1
206 | random=1
207 | 


--------------------------------------------------------------------------------
/detector/yolov3_detector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch.nn.functional as F
 4 | import torch
 5 | 
 6 | from detector.yolov3.models import Darknet
 7 | from detector.yolov3.utils.utils import non_max_suppression
 8 | from detector.object_detector import ObjectDetector
 9 | from detector.yolo_util import nms, wrap_detection_results
10 | 
11 | 
12 | class YOLOV3_Detector(ObjectDetector):
13 |     def __init__(self, model_name, cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1):
14 |         # load darknet
15 |         super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id)
16 |         
17 |         # skip background i.e. 0
18 |         self.class_names = [name for k, name in enumerate(class_names) if k > 0]
19 | 
20 |     def load_model(self, cfg_path, model_path, class_names=None):
21 |         # Initiate model
22 |         model = Darknet(cfg_path)
23 |         model.load_darknet_weights(model_path)
24 |         model = model.eval()
25 |         return model
26 | 
27 |     '''
28 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
29 |         scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False)
30 |         outputs = self.model(scaled_images)
31 |         outputs = test_size_suppression(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh)
32 | 
33 |         #print ([item.shape for item in outputs])
34 |         new_outputs = [None for _ in outputs] 
35 |         for k in range(len(outputs)):
36 |             if outputs[k] is not None:
37 |                 new_outputs[k]  = resize_boxes(outputs[k], self.test_size[0], self.input_size)
38 |                 new_outputs[k] = new_outputs[k][:, [0,1,2,3,4,6]]
39 |             else:
40 |                 #new_outputs[k]  = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda()
41 |                 new_outputs[k]  = [None]
42 |                 #print ('------', k, new_outputs[k].shape, new_outputs[k])
43 |             #print (k, outputs[k], new_outputs[k])
44 | 
45 |         #print (new_outputs)
46 |         return new_outputs
47 |     '''
48 | 
49 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
50 |         _, h, w, _ = images.shape
51 |         if self.test_size[0] == w and self.test_size[1] == h:
52 |             scaled_images = images
53 |         else:
54 |             scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False)
55 | 
56 |         outputs = self.model(scaled_images)
57 |         outputs = non_max_suppression(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh)
58 |         results = wrap_detection_results(outputs, self.test_size[0], self.input_size)
59 |         return results
60 | 
61 |     def detector_detect(self, img, conf_thresh, nms_thresh):
62 |         with torch.no_grad():
63 |             output = self.detect(img, conf_thresh, nms_thresh)
64 | 
65 |         return output
66 | 
67 | '''
68 | def resize_boxes(detection, test_size, input_size):
69 |     h, w = input_size
70 |     rh, rw = float(h)/test_size, float(w)/test_size 
71 |     detection[:,0] *= rw
72 |     detection[:,1] *= rh
73 |     detection[:,2] *= rw
74 |     detection[:,3] *= rh
75 | 
76 |     return detection
77 | '''
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # advPatch-pytorch
 2 | code for generating adversarial patches
 3 | 
 4 | # Required packages
 5 | pip install pytorch-msssim
 6 | 
 7 | 
 8 | # Installation
 9 | 
10 | Clone the project with the submodules.
11 | 
12 | ```bash
13 | git clone --rescursive URL
14 | ```
15 | 
16 | This repo requires Python >= 3.6.
17 | To get dependent packages, you can install the required packages in `requirement.txt` via
18 | ```bash
19 | pip install -r requirement.txt
20 | ```
21 | 
22 | In order to use the object detectors from `SSD` or `Detectrons`, you will need to install them from the submodule
23 | 
24 | To install SSD, more details can be found [here](https://github.com/lufficc/SSD#installation).
25 | 
26 | ```bash
27 | cd REPO/detector/SSD
28 | pip install -e . 
29 | ``` 
30 | 
31 | To install Detectron2, more details can be found [here](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md)
32 | 
33 | ```bash
34 | cd REPO/detector/detectron2
35 | pip install -e .
36 | ``` 
37 | 
38 | 
39 | 
40 | 
41 | #Usage
42 | 
43 | 1 Learning STN with Generator
44 | 1) Training
45 | 
46 | ```bash
47 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir STN-results --dataset neu_color \
48 |    --datadir ../../adv_data/neu_data --epochs 600 --STN tps --learnableSTN --use_LCT --LightingCT gen --batch_size 72`
49 | ```
50 | 2) Evaluation
51 | 
52 | ```bash
53 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir thinklab-STN-results --dataset neu_color \
54 |   --datadir ../../adv_data/neu_data --epochs 600 --STN tps --learnableSTN --use_LCT --LightingCT gen --batch_size 60 \
55 |   --patch_transformer_path thinklab-STN-results/PT_neu_color_STN_resnet18_ds128_fc256_tps_bounded20x10_gen_p256_L1Mask_bs60_e600/model_best.pth.tar \
56 |   --visualize --evaluate
57 | ```
58 | 
59 | --visualize: save intermediate results into a folder 'vis_output' under 'patch_transformer_path'
60 | --val_list_file: specify which subset to be evaluated: train or validation
61 | 
62 | 2 Learning Printer Color (PCT) and Lighting Transformation (LCT)
63 | 1) Training
64 | 
65 | ```bash
66 | python train_patchTransformer.py --config configs/config_patchTransformer.yaml --logdir PatNet-results --dataset neu_color \
67 | --datadir ../../adv_data/neu_data --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72 \
68 | --pretrained STN-results/PT_neu_color_STN_resnet18_ds128_fc256_tps_bounded20x10_gen_bs72_e600/model_best.pth.tar 
69 | ```
70 | 3 Learning Adversarial Attack Model
71 | 1) Training 
72 | 
73 | ```bash
74 | python train_advPatch.py --config configs/config_advPatch.yaml --logdir AdvNet-results --dataset neu_color --datadir ../../adv_data/neu_data \
75 | --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72  \
76 | --patch_transformer_path PatNet-results/PatNet/PT_neu_color_fixedSTN_blur6_resnet18_ds128_fc256_tps_bounded20x10_PCTLinear_cc_alexnet_bs72_e600_pretrained_nopctloss_blur/model_best.pth.tar
77 | ```
78 | 
79 | 2) Evaluation
80 | 
81 | ```bash
82 | python train_advPatch.py --config configs/config_advPatch.yaml --logdir AdvNet-results --dataset neu_color --datadir ../../adv_data/neu_data \
83 | --epochs 600 --STN tps --use_PCT --PrinterCT PCTLinear --use_LCT --LightingCT cc --batch_size 72  \
84 | --patch_transformer_path PatNet-results/PatNet/PT_neu_color_fixedSTN_blur6_resnet18_ds128_fc256_tps_bounded20x10_PCTLinear_cc_alexnet_bs72_e600_pretrained_nopctloss_blur/model_best.pth.tar\
85 | --evaluate 
86 | ```
87 | 


--------------------------------------------------------------------------------
/detector/yolov2/patch_config.py:
--------------------------------------------------------------------------------
  1 | from torch import optim
  2 | 
  3 | 
  4 | class BaseConfig(object):
  5 |     """
  6 |     Default parameters for all config files.
  7 |     """
  8 | 
  9 |     def __init__(self):
 10 |         """
 11 |         Set the defaults.
 12 |         """
 13 |         self.img_dir = "inria/Train/pos"
 14 |         self.lab_dir = "inria/Train/pos/yolo-labels"
 15 |         self.cfgfile = "model/darknet/yolo.cfg"
 16 |         self.weightfile = "model/darknet/yolo.weights"
 17 |         self.printfile = "non_printability/30values.txt"
 18 |         self.class_name = 'model/darknet/coco.names'
 19 |         self.patch_size = 300
 20 | 
 21 |         self.start_learning_rate = 0.0001
 22 | 
 23 |         self.patch_name = 'base'
 24 | 
 25 |         self.scheduler_factory = lambda x: optim.lr_scheduler.ReduceLROnPlateau(x, 'min', patience=50, factor=0.5)
 26 |         self.max_tv = 0
 27 | 
 28 |         self.batch_size = 20
 29 | 
 30 |         self.loss_target = lambda obj, cls: obj * cls
 31 | 
 32 | 
 33 | class Experiment1(BaseConfig):
 34 |     """
 35 |     Model that uses a maximum total variation, tv cannot go below this point.
 36 |     """
 37 | 
 38 |     def __init__(self):
 39 |         """
 40 |         Change stuff...
 41 |         """
 42 |         super().__init__()
 43 | 
 44 |         self.patch_name = 'Experiment1'
 45 |         self.max_tv = 0.165
 46 | 
 47 | 
 48 | class Experiment2HighRes(Experiment1):
 49 |     """
 50 |     Higher res
 51 |     """
 52 | 
 53 |     def __init__(self):
 54 |         """
 55 |         Change stuff...
 56 |         """
 57 |         super().__init__()
 58 | 
 59 |         self.max_tv = 0.165
 60 |         self.patch_size = 400
 61 |         self.patch_name = 'Exp2HighRes'
 62 | 
 63 | class Experiment3LowRes(Experiment1):
 64 |     """
 65 |     Lower res
 66 |     """
 67 | 
 68 |     def __init__(self):
 69 |         """
 70 |         Change stuff...
 71 |         """
 72 |         super().__init__()
 73 | 
 74 |         self.max_tv = 0.165
 75 |         self.patch_size = 100
 76 |         self.patch_name = "Exp3LowRes"
 77 | 
 78 | class Experiment4ClassOnly(Experiment1):
 79 |     """
 80 |     Only minimise class score.
 81 |     """
 82 | 
 83 |     def __init__(self):
 84 |         """
 85 |         Change stuff...
 86 |         """
 87 |         super().__init__()
 88 | 
 89 |         self.patch_name = 'Experiment4ClassOnly'
 90 |         self.loss_target = lambda obj, cls: cls
 91 | 
 92 | 
 93 | 
 94 | 
 95 | class Experiment1Desktop(Experiment1):
 96 |     """
 97 |     """
 98 | 
 99 |     def __init__(self):
100 |         """
101 |         Change batch size.
102 |         """
103 |         super().__init__()
104 | 
105 |         self.batch_size = 8
106 |         self.patch_size = 400
107 | 
108 | 
109 | class ReproducePaperObj(BaseConfig):
110 |     """
111 |     Reproduce the results from the paper: Generate a patch that minimises object score.
112 |     """
113 | 
114 |     def __init__(self):
115 |         super().__init__()
116 | 
117 |         self.batch_size = 12
118 |         self.patch_size = 416
119 | 
120 |         self.patch_name = 'ObjectOnlyPaper'
121 |         self.max_tv = 0.165
122 | 
123 |         self.loss_target = lambda obj, cls: obj
124 | 
125 | 
126 | patch_configs = {
127 |     "base": BaseConfig,
128 |     "exp1": Experiment1,
129 |     "exp1_des": Experiment1Desktop,
130 |     "exp2_high_res": Experiment2HighRes,
131 |     "exp3_low_res": Experiment3LowRes,
132 |     "exp4_class_only": Experiment4ClassOnly,
133 |     "paper_obj": ReproducePaperObj
134 | }
135 | 


--------------------------------------------------------------------------------
/configs/config_advPatch.yaml:
--------------------------------------------------------------------------------
  1 | # data parameters
  2 | use_augmentation: True
  3 | use_ohem: False
  4 | ohem_ratio: 0.5
  5 | # similar to data augmentation
  6 | use_EOT: False
  7 | 
  8 | # patch related parameters
  9 | #adv_patch_size: [416, 416, 3]
 10 | adv_patch_size: [252, 150, 3]
 11 | apply_border_mask: False
 12 | border_mask_ratio: 0.05769  # 24/416
 13 | border_value: 0.75  # white T-shirt
 14 | tv_loss_weight: 2.5
 15 | 
 16 | # Detector information
 17 | #YOLO: YOLO_V2, YOLO_V3
 18 | #SSD: SSD300_VGG16, SSD512_VGG16
 19 | #Faster_RCNN: Faster_RCNN_VGG16, Faster_RCNN_R50, Faster_RCNN_R101
 20 | #DETECTRON2: DFaster_RCNN_R50, DFaster_RCNN_R101, DRCNN_FPN_R50, DRCNN_FPN_R101, DRetinaNet_R50, DRetinaNet_R101
 21 | 
 22 | detector_impl: YOLO
 23 | detector_name: YOLO_V2 
 24 | detector_input_size: [416, 416]
 25 | #detector_input_size: [1080, 1920]
 26 | #detector_input_size: [540, 960]
 27 | detector_test_size: [-1,-1]
 28 | object_dataset: COCO # COCO or VOC
 29 | target_object_id: -1
 30 | train_nms_thresh: 0.8 
 31 | train_conf_thresh: 0.2999
 32 | val_nms_thresh: 0.4
 33 | val_conf_thresh: 0.7
 34 | val_iou_threshold: 0.1 # 0.5
 35 | 
 36 | template_shape: [252, 150, 3]  # H, W
 37 | template_resize: False
 38 | #template_scaling_factor: -1 # how much blurring to apply on the template
 39 | 
 40 | # geometric transformation --- STN parameters
 41 | learnableSTN: False   # learn STN or fix it
 42 | STN_loss: L1Mask # L1, L2, SIMMMask, L1Mask
 43 | STN: tps   # affine or tps
 44 | loc_backbone: resnet18
 45 | loc_downsample_dim: 128
 46 | loc_fc_dim: 256
 47 | adjust_patch_size: False  # learn to adjust the patch size for pasting
 48 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn
 49 | TPS_range: [0.999, 0.999]
 50 | TPS_grid: [20, 10]
 51 | 
 52 | # printer color transformation (PCT)
 53 | use_PCT: False
 54 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural
 55 | PCT_loss: L1
 56 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz'
 57 | 
 58 | use_LCT: False
 59 | LCT_loss: L1Mask
 60 | LightingCT: gen   #cc (color constancy, i.e. cc_fc4)  or gen (image generator)
 61 | lct_backbone: alexnet   #alextnet, resnet18
 62 | lct_input_size: [256, 256]
 63 | #generator_input_size: [1024, 1024]
 64 | #generator_input_size: [512, 512]
 65 | generator_input_size: [384, 384]
 66 | #generator_input_size: [256, 256]
 67 | #generator_input_size: [288, 288]
 68 | #generator_input_size: [320, 320]
 69 | #generator_input_size: [352, 352]
 70 | 
 71 | patch_size_median: 0.2519  # i.e. (150-50+1) / (450-50+1)
 72 | #patch_size_range: [60, 400]  #[min_height, max_height]
 73 | patch_size_range: [50, 450]  #[min_height, max_height]
 74 | 
 75 | #collaborative_learning: False
 76 | #patch_size_median: 0.17  # i.e. (100-60+1) / (300-60+1)
 77 | #kd_type: margin #margin (our proposed) |mutual (deep mutual Learning) |one (online knowledge disttillation)
 78 | #kd_norm: 2  # 1: L1 2: L2
 79 | #patch_size_range: [60, 300]  #[min_height, max_height]
 80 | #CL_pretrained: False
 81 | #near_patch_path:
 82 | #far_patch_path:
 83 | 
 84 | #image_shape: [256, 256, 3]
 85 | #mask_shape: [128, 128]
 86 | 
 87 | # log
 88 | log_dir: 
 89 | log_file: log.log
 90 | model_checkpoint: checkpoint.pth.tar
 91 | model_best: model_best.pth.tar
 92 | adv_patch_img: adv_patch.png
 93 | adv_patch_img_best: best_adv_patch.png
 94 | 
 95 | 
 96 | # training parameters
 97 | cuda: True
 98 | gpu_ids: [0,1,2,3,4,5]    # set the GPU ids to use, e.g. [0] or [1, 2]
 99 | num_workers: 24
100 | compute_dsr: False
101 | visualize: False
102 | epochs: 1000
103 | batch_size: 72
104 | lr: 0.1
105 | beta1: 0.5
106 | beta2: 0.9
107 | print_iter: 20
108 | # scheduler
109 | scheduler_patience: 25
110 | scheduler_factor: 0.5
111 | 


--------------------------------------------------------------------------------
/nets/STNet/tps_grid_gen.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # credit to https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py
 3 | 
 4 | import torch
 5 | import itertools
 6 | import torch.nn as nn
 7 | from torch.autograd import Function, Variable
 8 | 
 9 | # phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
10 | def compute_partial_repr(input_points, control_points):
11 |     N = input_points.size(0)
12 |     M = control_points.size(0)
13 |     pairwise_diff = input_points.view(N, 1, 2) - control_points.view(1, M, 2)
14 |     # original implementation, very slow
15 |     # pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
16 |     pairwise_diff_square = pairwise_diff * pairwise_diff
17 |     pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :, 1]
18 |     repr_matrix = 0.5 * pairwise_dist * torch.log(pairwise_dist)
19 |     # fix numerical error for 0 * log(0), substitute all nan with 0
20 |     mask = repr_matrix != repr_matrix
21 |     repr_matrix.masked_fill_(mask, 0)
22 |     return repr_matrix
23 | 
24 | class TPSGridGen(nn.Module):
25 | 
26 |     def __init__(self, target_height, target_width, target_control_points):
27 |         super(TPSGridGen, self).__init__()
28 |         assert target_control_points.ndimension() == 2
29 |         assert target_control_points.size(1) == 2
30 |         N = target_control_points.size(0)
31 |         self.num_points = N
32 |         target_control_points = target_control_points.float()
33 | 
34 |         # create padded kernel matrix
35 |         forward_kernel = torch.zeros(N + 3, N + 3)
36 |         target_control_partial_repr = compute_partial_repr(target_control_points, target_control_points)
37 |         forward_kernel[:N, :N].copy_(target_control_partial_repr)
38 |         forward_kernel[:N, -3].fill_(1)
39 |         forward_kernel[-3, :N].fill_(1)
40 |         forward_kernel[:N, -2:].copy_(target_control_points)
41 |         forward_kernel[-2:, :N].copy_(target_control_points.transpose(0, 1))
42 |         # compute inverse matrix
43 |         inverse_kernel = torch.inverse(forward_kernel)
44 | 
45 |         # create target cordinate matrix
46 |         HW = target_height * target_width
47 |         target_coordinate = list(itertools.product(range(target_height), range(target_width)))
48 |         target_coordinate = torch.Tensor(target_coordinate) # HW x 2
49 |         Y, X = target_coordinate.split(1, dim = 1)
50 |         Y = Y * 2 / (target_height - 1) - 1
51 |         X = X * 2 / (target_width - 1) - 1
52 |         target_coordinate = torch.cat([X, Y], dim = 1) # convert from (y, x) to (x, y)
53 |         target_coordinate_partial_repr = compute_partial_repr(target_coordinate, target_control_points)
54 |         target_coordinate_repr = torch.cat([
55 |             target_coordinate_partial_repr, torch.ones(HW, 1), target_coordinate
56 |         ], dim = 1)
57 | 
58 |         # register precomputed matrices
59 |         self.register_buffer('inverse_kernel', inverse_kernel)
60 |         self.register_buffer('padding_matrix', torch.zeros(3, 2))
61 |         self.register_buffer('target_coordinate_repr', target_coordinate_repr)
62 | 
63 |     def forward(self, source_control_points):
64 |         assert source_control_points.ndimension() == 3
65 |         assert source_control_points.size(1) == self.num_points
66 |         assert source_control_points.size(2) == 2
67 |         batch_size = source_control_points.size(0)
68 | 
69 |         Y = torch.cat([source_control_points, Variable(self.padding_matrix.expand(batch_size, 3, 2))], 1)
70 |         mapping_matrix = torch.matmul(Variable(self.inverse_kernel), Y)
71 |         source_coordinate = torch.matmul(Variable(self.target_coordinate_repr), mapping_matrix)
72 |         return source_coordinate


--------------------------------------------------------------------------------
/configs/config_collaborative_advPatch.yaml:
--------------------------------------------------------------------------------
  1 | # data parameters
  2 | use_augmentation: True
  3 | use_ohem: False
  4 | ohem_ratio: 0.5
  5 | # similar to data augmentation
  6 | use_EOT: False
  7 | 
  8 | # patch related parameters
  9 | #adv_patch_size: [416, 416, 3]
 10 | adv_patch_size: [252, 150, 3]
 11 | apply_border_mask: False
 12 | border_mask_ratio: 0.05769  # 24/416
 13 | border_value: 0.75  # white T-shirt
 14 | tv_loss_weight: 2.5
 15 | 
 16 | # Detector information
 17 | #YOLO: YOLO_V2, YOLO_V3
 18 | #SSD: SSD300_VGG16, SSD512_VGG16
 19 | #Faster_RCNN: Faster_RCNN_VGG16, Faster_RCNN_R50, Faster_RCNN_R101
 20 | #DETECTRON2: DFaster_RCNN_R50, DFaster_RCNN_R101, DRCNN_FPN_R50, DRCNN_FPN_R101, DRetinaNet_R50, DRetinaNet_R101
 21 | 
 22 | detector_impl: Faster_RCNN
 23 | detector_name: Faster_RCNN_VGG16
 24 | detector_input_size: [1080, 1920]
 25 | #detector_input_size: [540, 960]
 26 | #detector_input_size: [416, 416]
 27 | detector_test_size: [-1,-1]
 28 | object_dataset: COCO # COCO or VOC
 29 | target_object_id: -1
 30 | train_nms_thresh: 0.8
 31 | train_conf_thresh: 0.2999
 32 | val_nms_thresh: 0.4
 33 | val_conf_thresh: 0.7
 34 | val_iou_threshold: 0.1 # 0.5
 35 | 
 36 | template_shape: [252, 150, 3]  # H, W
 37 | #template_scaling_factor: -1 # how much blurring to apply on the template
 38 | 
 39 | # geometric transformation --- STN parameters
 40 | learnableSTN: False   # learn STN or fix it
 41 | STN_loss: SIMMMask # L1, L2, SIMMMask, L1Mask
 42 | STN: tps   # affine or tps
 43 | loc_backbone: resnet18
 44 | loc_downsample_dim: 128
 45 | loc_fc_dim: 256
 46 | adjust_patch_size: False  # learn to adjust the patch size for pasting
 47 | TPS_localizer: bounded_stn #bounded_stn or unbounded_stn
 48 | TPS_range: [0.999, 0.999]
 49 | TPS_grid: [20, 10]
 50 | 
 51 | # printer color transformation (PCT)
 52 | use_PCT: False
 53 | PrinterCT: PCTLinear # PCT or PCTLinear or PCTLinearBias or PCTNeural
 54 | PCT_loss: L1
 55 | color_transformation_path: 'kaidi_color_model/weights2_digital2new_0_1.npz'
 56 | 
 57 | use_LCT: False
 58 | LCT_loss: L1
 59 | LightingCT: gen   #cc (color constancy, i.e. cc_fc4)  or gen (image generator)
 60 | lct_backbone: alexnet   #alextnet, resnet18
 61 | lct_input_size: [256, 256]
 62 | #generator_input_size: [512, 512]
 63 | generator_input_size: [384, 384]
 64 | #generator_input_size: [256, 256]
 65 | #generator_input_size: [272, 272]
 66 | 
 67 | collaborative_learning: False
 68 | CL_pretrained: False
 69 | collaborative_weight: False
 70 | 
 71 | half_patches: False
 72 | kd_type: MSE #margin , MSE , L1, SmoothL1 or mutual, one, SmoothL1Mask
 73 | kd_norm: 2  # 1: L1 2: L2
 74 | 
 75 | # for non-colloaborative_learning
 76 | near_patch_path: test_collaborative/YOLO_V2_COCO_adv252_neu_near_tps_PCTLinear_cc_alexnet_p256_L1Mask_tv25_bs72_e500/best_adv_patch.png
 77 | far_patch_path: test_collaborative/YOLO_V2_COCO_adv252_neu_far_tps_PCTLinear_cc_alexnet_p256_L1Mask_tv25_bs72_e500/best_adv_patch.png
 78 | 
 79 | # advT_data
 80 | patch_size_median: 0.2519  # i.e. (150-50+1) / (450-50+1)
 81 | #patch_size_range: [60, 400]  #[min_height, max_height]
 82 | patch_size_range: [50, 450]  #[min_height, max_height]
 83 | 
 84 | #image_shape: [256, 256, 3]
 85 | #mask_shape: [128, 128]
 86 | 
 87 | # log
 88 | log_dir:
 89 | log_file: log.log
 90 | model_checkpoint: checkpoint.pth.tar
 91 | model_best: model_best.pth.tar
 92 | adv_patch_img: adv_patch.png
 93 | adv_patch_img_best: best_adv_patch.png
 94 | 
 95 | 
 96 | # training parameters
 97 | cuda: True
 98 | gpu_ids: [0,1,2,3,4,5]    # set the GPU ids to use, e.g. [0] or [1, 2]
 99 | num_workers: 24
100 | compute_dsr: False
101 | visualize: False
102 | epochs: 1000
103 | batch_size: 72
104 | lr: 0.1
105 | beta1: 0.5
106 | beta2: 0.9
107 | print_iter: 20
108 | # scheduler
109 | scheduler_patience: 25
110 | scheduler_factor: 0.5
111 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/advPatch_model_builder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision import transforms
 3 | from PIL import Image
 4 | from nets.AdvPatch.advPatch_net import AdvPatchNet
 5 | from nets.AdvPatch.collaborative_advPatch_net import CollaborativeAdvPatchNet
 6 | from nets.AdvPatch.hybrid_advPatch import HybridAdvPatch
 7 | from utils.tools import normalize
 8 | import os
 9 | 
10 | def get_adv_model_path(config):
11 |     model_name = config['detector_name']
12 |     model_name += '_' + config['object_dataset']
13 |     if config['collaborative_learning'] is True:
14 |         model_name += '_CL'
15 |         if config['half_patches'] is True:
16 |             model_name += 'half'
17 |         if config['collaborative_weight']:
18 |             model_name += '_weighted'
19 |         if config['CL_pretrained'] is True:
20 |             model_name += '_pretrained'
21 |     model_name += '_adv%d' % (config['adv_patch_size'][0])
22 | 
23 |     model_name += '_' + config['dataset']
24 | #    if config['use_augmentation']:
25 | #        model_name += '_aug'
26 |     if config['use_ohem']:
27 |         model_name += '_ohem%d' % (int(config['ohem_ratio']*100))
28 |      #   model_name += '_ohem'
29 |     if config['apply_border_mask']:
30 |         model_name += '_border'
31 | 
32 |     #model_name += '_STN' if config['learnableSTN'] else '_fixedSTN'
33 | 
34 | #    model_name += '_p%d' % (config['person_crop_size'][0])
35 |     #if config['template_scaling_factor'] > 0:
36 |     #    model_name += '_blur%d' % (config['template_scaling_factor'])
37 |     #model_name += '_' + config['loc_backbone']
38 |     #model_name += '_ds%d'% (config['loc_downsample_dim'])
39 |     #model_name += '_fc%d'% (config['loc_fc_dim'])
40 |     model_name += '_' + config['STN']
41 |     #if config['STN'] == 'tps':
42 |     #    if config['TPS_localizer'] == 'bounded_stn':
43 |     #        model_name += '_bounded'
44 |     #    else:
45 |     #        model_name += '_unbounded'
46 |     #    model_name += '%dx%d'% (config['TPS_grid'][0], config['TPS_grid'][1])
47 | 
48 |     if config['use_PCT']:
49 |         model_name += '_' + config['PrinterCT']
50 | 
51 |     if config['use_LCT']:
52 |         model_name += '_' + config['LightingCT']
53 |         if config['LightingCT'] == 'cc':
54 |             model_name += '_' + config['lct_backbone']
55 |         model_name += '_p%d' % (config['lct_input_size'][0])
56 | 
57 |     if config['use_LCT']:
58 |         model_name += '_' + config['LCT_loss']
59 |     elif config['use_PCT']:
60 |         model_name += '_' + config['PCT_loss']
61 | 
62 |     if config['use_EOT']:
63 |         model_name += '_EOT'
64 | 
65 | #    model_name += '_%s_loss' % (config['MaxProbExtractor_loss'])
66 |     model_name += '_tv%d' % (int(config['tv_loss_weight']*10))
67 |     model_name += '_bs%d' % (config['batch_size'])
68 |     model_name += '_e%d' % (config['epochs'])
69 | 
70 |     return model_name
71 | 
72 | def build_advPatch_model(config):
73 |     return AdvPatchNet(config) if not config['collaborative_learning'] else \
74 |        CollaborativeAdvPatchNet(config)
75 | 
76 | def build_advPatch_model_from_checkpoint_file(model, model_path):
77 |     print ('loading advPatch model: %s' % (model_path))
78 |     checkpoint = torch.load(model_path, map_location='cpu')
79 |     y1 = torch.sigmoid(checkpoint['adv_patch']['blending'])
80 |     import torchvision.transforms as transforms
81 |     img = transforms.ToPILImage()(y1)
82 |     img.save('blend_mask.png')
83 |     print (y1)
84 | 
85 |     model.adv_patch_model.load_state_dict(checkpoint['adv_patch'])
86 |     return model, checkpoint['epoch'], checkpoint['iteration'], checkpoint['lr'], checkpoint['best_error']
87 | 
88 | def build_advPatch_model_from_image_file(model, model_path):
89 |     model.adv_patch_model.load_patch(model_path)
90 |     return model
91 | 
92 | #def set_gradient_false(model):
93 | #    for p in model.parameters():
94 | #        p.requires_grad = False
95 | 


--------------------------------------------------------------------------------
/detector/yolov2/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/utils/gaussian_blur.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.functional import conv2d
  6 | 
  7 | 
  8 | def gaussian(window_size, sigma):
  9 |     def gauss_fcn(x):
 10 |         return -(x - window_size // 2)**2 / float(2 * sigma**2)
 11 |     gauss = torch.stack(
 12 |         [torch.exp(torch.tensor(gauss_fcn(x))) for x in range(window_size)])
 13 |     return gauss / gauss.sum()
 14 | 
 15 | 
 16 | def get_gaussian_kernel(ksize, sigma):
 17 |     window_1d = gaussian(ksize, sigma)
 18 |     return window_1d
 19 | 
 20 | 
 21 | def get_gaussian_kernel2d(ksize, sigma):
 22 |     ksize_x, ksize_y = ksize
 23 |     sigma_x, sigma_y = sigma
 24 |     kernel_x = get_gaussian_kernel(ksize_x, sigma_x)
 25 |     kernel_y = get_gaussian_kernel(ksize_y, sigma_y)
 26 |     kernel_2d = torch.matmul(
 27 |         kernel_x.unsqueeze(-1), kernel_y.unsqueeze(-1).t())
 28 |     return kernel_2d
 29 | 
 30 | class GaussianBlur(nn.Module):
 31 |     r"""Creates an operator that blurs a tensor using a Gaussian filter.
 32 | 
 33 |     The operator smooths the given tensor with a gaussian kernel by convolving
 34 |     it to each channel. It suports batched operation.
 35 | 
 36 |     Arguments:
 37 |         kernel_size (Tuple[int, int]): the size of the kernel.
 38 |         sigma (Tuple[float, float]): the standard deviation of the kernel.
 39 | 
 40 |     Returns:
 41 |         Tensor: the blurred tensor.
 42 | 
 43 |     Shape:
 44 |         - Input: :math:`(B, C, H, W)`
 45 |         - Output: :math:`(B, C, H, W)`
 46 | 
 47 |     Examples::
 48 | 
 49 |         >>> input = torch.rand(2, 4, 5, 5)
 50 |         >>> gauss = tgm.image.GaussianBlur((3, 3), (1.5, 1.5))
 51 |         >>> output = gauss(input)  # 2x4x5x5
 52 |     """
 53 | 
 54 |     def __init__(self, kernel_size, sigma):
 55 |         super(GaussianBlur, self).__init__()
 56 |         self.kernel_size = kernel_size
 57 |         self.sigma = sigma
 58 |         self._padding = self.compute_zero_padding(kernel_size)
 59 |         self.kernel = self.create_gaussian_kernel(kernel_size, sigma)
 60 | 
 61 |     @staticmethod
 62 |     def create_gaussian_kernel(kernel_size, sigma):
 63 |         """Returns a 2D Gaussian kernel array."""
 64 |         kernel = get_gaussian_kernel2d(kernel_size, sigma)
 65 |         return kernel
 66 | 
 67 |     @staticmethod
 68 |     def compute_zero_padding(kernel_size):
 69 |         """Computes zero padding tuple."""
 70 |         computed = [(k - 1) // 2 for k in kernel_size]
 71 |         return computed[0], computed[1]
 72 | 
 73 |     def forward(self, x):
 74 |         if not torch.is_tensor(x):
 75 |             raise TypeError("Input x type is not a torch.Tensor. Got {}"
 76 |                             .format(type(x)))
 77 |         if not len(x.shape) == 4:
 78 |             raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}"
 79 |                              .format(x.shape))
 80 |         # prepare kernel
 81 |         b, c, h, w = x.shape
 82 |         tmp_kernel = self.kernel.to(x.device).to(x.dtype)
 83 |         kernel = tmp_kernel.repeat(c, 1, 1, 1)
 84 | 
 85 |         # convolve tensor with gaussian kernel
 86 |         return conv2d(x, kernel, padding=self._padding, stride=1, groups=c)
 87 | 
 88 | 
 89 | 
 90 | ######################
 91 | # functional interface
 92 | ######################
 93 | 
 94 | 
 95 | def gaussian_blur(src, kernel_size, sigma):
 96 |     r"""Function that blurs a tensor using a Gaussian filter.
 97 | 
 98 |     The operator smooths the given tensor with a gaussian kernel by convolving
 99 |     it to each channel. It suports batched operation.
100 | 
101 |     Arguments:
102 |         src (Tensor): the input tensor.
103 |         kernel_size (Tuple[int, int]): the size of the kernel.
104 |         sigma (Tuple[float, float]): the standard deviation of the kernel.
105 | 
106 |     Returns:
107 |         Tensor: the blurred tensor.
108 | 
109 |     Shape:
110 |         - Input: :math:`(B, C, H, W)`
111 |         - Output: :math:`(B, C, H, W)`
112 | 
113 |     Examples::
114 | 
115 |         >>> input = torch.rand(2, 4, 5, 5)
116 |         >>> output = tgm.image.gaussian_blur(input, (3, 3), (1.5, 1.5))
117 |     """
118 |     return GaussianBlur(kernel_size, sigma)(src)
119 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/advPatch_util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | def generate_patch(type, size=(416, 416)):
  5 |     """
  6 |     Generate a random patch as a starting point for optimization.
  7 | 
  8 |     :param type: Can be 'gray' or 'random'. Whether or not generate a gray or a random patch.
  9 |     :return:
 10 |     """
 11 | 
 12 |     if type == 'gray':
 13 |         adv_patch = torch.full((3, size[0], size[1]), 0.5)
 14 |     elif type == 'random':
 15 |         adv_patch = torch.rand((3, size[0], size[1]))
 16 | 
 17 |     return adv_patch
 18 | 
 19 | def generate_border_mask(patch_size, border_size):
 20 |     h = patch_size[0]
 21 |     w = patch_size[1]
 22 |     border_mask = torch.full((3, h, w), 0)
 23 |     bottom = border_size
 24 |     top = h - bottom
 25 |     border_mask[:, bottom:top, :] = 1.0
 26 | 
 27 |     return border_mask
 28 | 
 29 | def paste_patch_to_frame(patch, patch_bb, img, img_bb):
 30 |     n, c, _, _ = patch.shape
 31 |     # create tensor
 32 |     img_h, img_w = img.shape[2:]
 33 |     x = torch.cuda.FloatTensor(n, c, img_h, img_w).fill_(0)
 34 |     for i, bbox in enumerate(patch_bb):
 35 |         pb, pl, ph, pw = bbox
 36 |         ib, il, ih, iw = img_bb[i]
 37 |         resized_tmpl = F.interpolate(patch[i, :, pb:pb + ph, pl:pl + pw].unsqueeze(0), size=(ih, iw),
 38 |                                      mode='bilinear', align_corners=False)
 39 |         x[i, :, ib:ib + ih, il:il + iw] = resized_tmpl.squeeze()
 40 | 
 41 |     return x
 42 | 
 43 | def get_max_detection_score(output, obj_bbox, target_obj_id=0, min_detection_score=0.3, loss_type = 'max'):
 44 |     # output a list of (x1,y1,x2,y2, object_conf, class_pred)
 45 |     # obj_bbox: a list of (x1, y1, x2, y2)
 46 |     assert len(output) == obj_bbox.shape[0]
 47 | 
 48 |     # minimum prob. is set to 0.3
 49 |     max_prob = torch.zeros((obj_bbox.shape[0], 1)).cuda()
 50 |     #print ('max_prob_0', max_prob)
 51 |     for k in range(len(output)):
 52 |         detection = output[k]
 53 |         if isinstance(detection, list) and detection[0] is None:
 54 |             continue
 55 | 
 56 |         person_detection = detection[detection[:, -1] == target_obj_id]
 57 |         if person_detection.shape[0] == 0:
 58 |             continue
 59 | 
 60 |         bbox = obj_bbox[k]
 61 |         xc = (person_detection[:, 0] + person_detection[:, 2]) / 2.0
 62 |         yc = (person_detection[:, 1] + person_detection[:, 3]) / 2.0
 63 | 
 64 |         x_inside = (xc > bbox[0]) & (bbox[2] > xc)
 65 |         y_inside = (yc > bbox[1]) & (bbox[3] > yc)
 66 | 
 67 |         xy_inside = x_inside & y_inside
 68 |         # assert any(xy_inside>0), (xy_inside, xc, yc, detection, bbox, x_inside, y_inside, xy_inside)
 69 |         if loss_type == 'ce':
 70 |             prob = person_detection[xy_inside, 4:-1]
 71 |             if len(prob) > 0:
 72 |                 max_prob[k] = torch.nn.functional.nll_loss(prob.log(), (prob.shape[-1] - 1) * torch.ones(len(prob), dtype=torch.long, device=prob.device))
 73 |             min_detection_score = 0.0
 74 |         else:
 75 |             if any(xy_inside > 0):
 76 |                 if loss_type == 'avg':
 77 |                     max_prob[k] = torch.mean(person_detection[xy_inside, 4])
 78 |                     min_detection_score = 0.0
 79 |                 else:
 80 |                     max_prob[k] = torch.max(person_detection[xy_inside, 4])
 81 |     max_prob = torch.clamp(max_prob, min=min_detection_score)
 82 |     return max_prob
 83 | 
 84 | 
 85 | # total variation of the patch
 86 | def get_totalVariation(adv_patch):
 87 |     tvcomp1 = torch.sum(torch.abs(adv_patch[:, :, 1:] - adv_patch[:, :, :-1] + 0.000001), 0)
 88 |     tvcomp1 = torch.sum(torch.sum(tvcomp1, 0), 0)
 89 |     tvcomp2 = torch.sum(torch.abs(adv_patch[:, 1:, :] - adv_patch[:, :-1, :] + 0.000001), 0)
 90 |     tvcomp2 = torch.sum(torch.sum(tvcomp2, 0), 0)
 91 |     tv = tvcomp1 + tvcomp2
 92 |     return tv / torch.numel(adv_patch)
 93 | 
 94 | # adversarial loss
 95 | def advsarial_loss(max_detection_score, loss_type):
 96 |     if loss_type == '':
 97 |         return None
 98 | 
 99 |     if loss_type == '':
100 |         return None
101 | 
102 |     if loss_type == '':
103 |         return None
104 | 
105 |     return max_detection_score


--------------------------------------------------------------------------------
/detector/yolov3/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | from models import *
  4 | from utils.utils import *
  5 | from utils.datasets import *
  6 | from utils.parse_config import *
  7 | 
  8 | import os
  9 | import sys
 10 | import time
 11 | import datetime
 12 | import argparse
 13 | import tqdm
 14 | 
 15 | import torch
 16 | from torch.utils.data import DataLoader
 17 | from torchvision import datasets
 18 | from torchvision import transforms
 19 | from torch.autograd import Variable
 20 | import torch.optim as optim
 21 | 
 22 | 
 23 | def evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size):
 24 |     model.eval()
 25 | 
 26 |     # Get dataloader
 27 |     dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False)
 28 |     dataloader = torch.utils.data.DataLoader(
 29 |         dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn
 30 |     )
 31 | 
 32 |     Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
 33 | 
 34 |     labels = []
 35 |     sample_metrics = []  # List of tuples (TP, confs, pred)
 36 |     for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc="Detecting objects")):
 37 | 
 38 |         # Extract labels
 39 |         labels += targets[:, 1].tolist()
 40 |         # Rescale target
 41 |         targets[:, 2:] = xywh2xyxy(targets[:, 2:])
 42 |         targets[:, 2:] *= img_size
 43 | 
 44 |         imgs = Variable(imgs.type(Tensor), requires_grad=False)
 45 | 
 46 |         with torch.no_grad():
 47 |             outputs = model(imgs)
 48 |             outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres)
 49 | 
 50 |         sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres)
 51 | 
 52 |     # Concatenate sample statistics
 53 |     true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]
 54 |     precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels)
 55 | 
 56 |     return precision, recall, AP, f1, ap_class
 57 | 
 58 | 
 59 | if __name__ == "__main__":
 60 |     parser = argparse.ArgumentParser()
 61 |     parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch")
 62 |     parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
 63 |     parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file")
 64 |     parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
 65 |     parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
 66 |     parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected")
 67 |     parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold")
 68 |     parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression")
 69 |     parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
 70 |     parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
 71 |     opt = parser.parse_args()
 72 |     print(opt)
 73 | 
 74 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 75 | 
 76 |     data_config = parse_data_config(opt.data_config)
 77 |     valid_path = data_config["valid"]
 78 |     class_names = load_classes(data_config["names"])
 79 | 
 80 |     # Initiate model
 81 |     model = Darknet(opt.model_def).to(device)
 82 |     if opt.weights_path.endswith(".weights"):
 83 |         # Load darknet weights
 84 |         model.load_darknet_weights(opt.weights_path)
 85 |     else:
 86 |         # Load checkpoint weights
 87 |         model.load_state_dict(torch.load(opt.weights_path))
 88 | 
 89 |     print("Compute mAP...")
 90 | 
 91 |     precision, recall, AP, f1, ap_class = evaluate(
 92 |         model,
 93 |         path=valid_path,
 94 |         iou_thres=opt.iou_thres,
 95 |         conf_thres=opt.conf_thres,
 96 |         nms_thres=opt.nms_thres,
 97 |         img_size=opt.img_size,
 98 |         batch_size=8,
 99 |     )
100 | 
101 |     print("Average Precisions:")
102 |     for i, c in enumerate(ap_class):
103 |         print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}")
104 | 
105 |     print(f"mAP: {AP.mean()}")
106 | 


--------------------------------------------------------------------------------
/nets/PatchTransformer/patchTransformer_model_builder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from nets.PatchTransformer.patchTransformer_network import PatchTransformerNetwork
  3 | from utils.utils import fix_checkpoint_key
  4 | 
  5 | def get_patchTransformer_model_path(config):
  6 |     model_name = 'PT'
  7 |     model_name += '_%s' % (config['dataset'])
  8 | 
  9 |     if config['use_PBM']:
 10 |         model_name += '_PBM'
 11 | 
 12 |     model_name += '_STN' if config['learnableSTN'] else '_fixedSTN'
 13 |     #if config['adjust_patch_size']:
 14 |     #    model_name += '_APS'
 15 |     #if config['template_scaling_factor'] > 0:
 16 |     #    model_name += '_blur%d' % (config['template_scaling_factor'])
 17 |     model_name += '_' + config['loc_backbone']
 18 |     model_name += '_ds%d'% (config['loc_downsample_dim'])
 19 |     model_name += '_fc%d'% (config['loc_fc_dim'])
 20 |     model_name += '_' + config['STN']
 21 |     if config['STN'] == 'tps':
 22 |         if config['TPS_localizer'] == 'bounded_stn':
 23 |             model_name += '_bounded'
 24 |         else:
 25 |             model_name += '_unbounded'
 26 |         model_name += '%dx%d'% (config['TPS_grid'][0], config['TPS_grid'][1])
 27 | 
 28 |     if config['use_PCT']:
 29 |         model_name += '_' + config['PrinterCT']
 30 | 
 31 |     if config['use_LCT']:
 32 |         model_name += '_' + config['LightingCT']
 33 |         if config['LightingCT'] == 'cc':
 34 |             model_name += '_' + config['lct_backbone']
 35 |         model_name += '_p%d' % (config['lct_input_size'][0])
 36 | 
 37 |     if config['use_LCT']:
 38 |         model_name += '_' + config['LCT_loss']
 39 |     elif config['use_PCT']:
 40 |         model_name += '_' + config['PCT_loss']
 41 | 
 42 |     model_name += '_bs%d' % (config['batch_size'])
 43 |     model_name += '_e%d' % (config['epochs'])
 44 | 
 45 |     return model_name
 46 | 
 47 | def build_patchTransformer_model(config):
 48 |     return PatchTransformerNetwork(config)
 49 | 
 50 | def build_patchTransformer_from_checkpoint(model, model_path):
 51 |     checkpoint = torch.load(model_path, map_location='cpu')
 52 | 
 53 |     pt_modules = {'PBM':model.PBM, 'STN':model.STN, 'PCT':model.PCT, 'LCT':model.LCT}
 54 | 
 55 |     print ('Loading weights from %s' % (model_path))
 56 |     for module_name in ['PBM', 'STN', 'PCT', 'LCT']:
 57 |         module = pt_modules[module_name]
 58 |         if module is not None:
 59 |             if checkpoint[module_name] is not None:
 60 |                 try:
 61 |                     module.load_state_dict(fix_checkpoint_key((checkpoint[module_name])), strict=True)
 62 |                     print ("===== Finished loading %s module  =====" % (module_name))
 63 |                 except Exception as e:
 64 |                     print (e)
 65 |                     print ("==== Woops, no %s module loaded ====" % (module_name))
 66 |             else:
 67 |                 print ('==== %s module is not available ====' % (module_name))
 68 | 
 69 |     #print ("\nGenerator input size: {size}".format(size=model.generator_input_size))
 70 |     #if 'generator_input_size' in checkpoint and model.generator_input_size != checkpoint['generator_input_size']:
 71 |         #print ("Change generator input size from {size1} to {size2}".format(size1=model.generator_input_size, size2=checkpoint['generator_input_size']))
 72 |         #model.generator_input_size = checkpoint['generator_input_size']
 73 |      #   print ("Warnging: generator input size {size1} is different from the size {size2} in the loaded model.".format(size1=model.generator_input_size, size2=checkpoint['generator_input_size']))
 74 | 
 75 |     '''
 76 |     pct_key = 'PCT'
 77 |     if model.PCT is not None:
 78 |     try:
 79 |         if model.PCT is not None:
 80 |             model.PCT.load_state_dict(fix_checkpoint_key((checkpoint[pct_key])), strict=True)
 81 |             print ("===== Finished loading PCT module with key: %s =====" % (pct_key))
 82 |     except Exception as e:
 83 |         print (e)
 84 |         print ("==== Woops, no PCT module loaded ====")
 85 | 
 86 |     lct_key = 'LCT'
 87 |     try:
 88 |         model.LCT.load_state_dict(fix_checkpoint_key((checkpoint[lct_key])), strict=True)
 89 |         print("===== Finished loading LCT module with key:%s =====" % (lct_key))
 90 |     except Exception as e:
 91 |         print (e)
 92 |         print("==== Woops, no LCT module loaded ====")
 93 |     print ('\n')
 94 |     '''
 95 |     # now load optimizer
 96 |     epoch = checkpoint['epoch'] if 'epoch' in checkpoint else 0
 97 |     iteration = checkpoint['iteration'] if 'iteration' in checkpoint else 0
 98 |     lr = checkpoint['lr'] if 'lr' in checkpoint else 0.0
 99 | 
100 |     best_error = checkpoint['best_error'] if 'best_error' in checkpoint else 9999.0
101 | 
102 |     return model, epoch, iteration, lr, best_error
103 | 


--------------------------------------------------------------------------------
/detector/yolo_util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | def wrap_detection_results(outputs, scale_size, input_size, skip=True):
  4 |     # print ([item.shape for item in outputs])
  5 |     new_outputs = [None for _ in outputs]
  6 |     for k in range(len(outputs)):
  7 |         if outputs[k] is not None:
  8 |             new_outputs[k] = resize_boxes(outputs[k], scale_size, input_size)
  9 |             if skip:
 10 |                 new_outputs[k] = new_outputs[k][:, [0, 1, 2, 3, 4, 6]] 
 11 |         else:
 12 |             # new_outputs[k]  = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda()
 13 |             new_outputs[k] = [None]
 14 | 
 15 |     # print (new_outputs)
 16 |     return new_outputs
 17 | 
 18 | def resize_boxes(detection, scale_size, input_size):
 19 |     w, h = input_size
 20 |     rh, rw = float(h)/scale_size, float(w)/scale_size
 21 |     detection[:,0] *= rw
 22 |     detection[:,1] *= rh
 23 |     detection[:,2] *= rw
 24 |     detection[:,3] *= rh
 25 | 
 26 |     return detection
 27 | 
 28 | def xywh2xyxy(x):
 29 |     y = x.new(x.shape)
 30 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
 31 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
 32 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
 33 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
 34 |     return y
 35 | 
 36 | def bbox_iou(box1, box2, x1y1x2y2=True):
 37 |     """
 38 |     Returns the IoU of two bounding boxes
 39 |     """
 40 |     if not x1y1x2y2:
 41 |         # Transform from center and width to exact coordinates
 42 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
 43 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
 44 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
 45 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
 46 |     else:
 47 |         # Get the coordinates of bounding boxes
 48 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
 49 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
 50 | 
 51 |     # get the corrdinates of the intersection rectangle
 52 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
 53 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
 54 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
 55 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
 56 |     # Intersection area
 57 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
 58 |         inter_rect_y2 - inter_rect_y1 + 1, min=0
 59 |     )
 60 |     # Union Area
 61 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
 62 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
 63 | 
 64 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
 65 | 
 66 |     return iou
 67 | 
 68 | def nms(prediction, conf_thres=0.5, nms_thres=0.4):
 69 |     """
 70 |     Removes detections with lower object confidence score than 'conf_thres' and performs
 71 |     Non-Maximum Suppression to further filter detections.
 72 |     Returns detections with shape:
 73 |         (x1, y1, x2, y2, object_conf, class_score, class_pred)
 74 |     """
 75 | 
 76 |     ## From (center x, center y, width, height) to (x1, y1, x2, y2)
 77 |     #prediction[..., :4] = xywh2xyxy(prediction[..., :4])
 78 |     output = [None for _ in range(len(prediction))]
 79 |     for image_i, image_pred in enumerate(prediction):
 80 |         # Filter out confidence scores below threshold
 81 |         image_pred = image_pred[image_pred[:, 4] >= conf_thres]
 82 |         # If none are remaining => process next image
 83 |         if not image_pred.size(0):
 84 |             continue
 85 |         # Object confidence times class confidence
 86 |         score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
 87 |         # Sort by it
 88 |         image_pred = image_pred[(-score).argsort()]
 89 |         class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
 90 | 
 91 |         detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
 92 | 
 93 |         # Perform non-maximum suppression
 94 |         keep_boxes = []
 95 |         while detections.size(0):
 96 |             large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) >= nms_thres
 97 |             label_match = detections[0, -1] == detections[:, -1]
 98 |             # Indices of boxes with lower confidence scores, large IOUs and matching labels
 99 |             invalid = large_overlap & label_match
100 |             weights = detections[invalid, 4:5]
101 |             # Merge overlapping bboxes by order of confidence
102 |             #detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
103 |             keep_boxes += [detections[0]]
104 |             detections = detections[~invalid]
105 |         if keep_boxes:
106 |             output[image_i] = torch.stack(keep_boxes)
107 | 
108 |     return output
109 | 
110 | 


--------------------------------------------------------------------------------
/demo_detector.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import argparse
  4 | from detector.build_object_detector import build_object_detector
  5 | from utils.tools import get_config
  6 | from opts import arg_parser, merge_args
  7 | import glob
  8 | from PIL import Image
  9 | import torchvision.transforms as transforms
 10 | import torch
 11 | import numpy as np
 12 | import json
 13 | from json import encoder
 14 | encoder.FLOAT_REPR = lambda o: format(o, '.2f')
 15 | 
 16 | def load_detection(filename):
 17 |     with open(filename, 'r') as f:
 18 |         data = json.load(f)
 19 |     return data
 20 | 
 21 | def prune_detection(detection):
 22 |     person_num = len(detection)
 23 |     if person_num <= 2:
 24 |         return detection
 25 | 
 26 |     areas =[(det[2] - det[0]) * (det[3] - det[1]) for det in detection]
 27 |     I = sorted(range(len(areas)), key=lambda k: areas[k], reverse=True)
 28 |     #print (areas, I)
 29 |     first_person = detection[I[0]]
 30 |     xc = (first_person[0] + first_person[2]) / 2.0
 31 |     second_person = None
 32 |     for k in range(1, len(I)):
 33 |         det = detection[I[k]]
 34 |         half_width = abs((first_person[2] - first_person[0])) / 2.0
 35 |         xc_temp = (det[0] + det[2]) / 2.0
 36 |         if abs(xc_temp - xc) > half_width:
 37 |             second_person = det
 38 |             break
 39 | 
 40 |     return [first_person, second_person] if second_person is not None else [first_person]
 41 | 
 42 | def main():
 43 |     global args
 44 |     parser = arg_parser()
 45 |     args = parser.parse_args()
 46 | 
 47 |     config = get_config(args.config)
 48 |     config = merge_args(args, config)
 49 |     #config['detector_input_size'] = [540, 960]
 50 |     detector = build_object_detector(config)
 51 |     
 52 |     device_ids = args.gpu_ids
 53 |     os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in device_ids)
 54 |     device_ids = list(range(len(device_ids)))
 55 |     detector.cuda(int(device_ids[0]))
 56 | 
 57 |     input_size = config['detector_input_size']
 58 |     target_obj_id = detector.target_object_id
 59 |     results = {}
 60 | 
 61 |     video_name = args.test_dir.split('/')[-1]
 62 |     #gt_detections = load_detection('../../CVPR_detection_results/20201106/DFaster_RCNN_R101_COCO+' + video_name+'.json')
 63 |     with torch.no_grad():
 64 |         for img_path in sorted(glob.glob(args.test_dir+"/*.png")):
 65 |             frame_img = Image.open(img_path)
 66 |             w, h = frame_img.size
 67 |             if w != input_size[0] or h != input_size[1]:
 68 |                 frame_img = frame_img.resize(input_size, Image.BILINEAR)
 69 |     
 70 |             '''   
 71 |             frm_name = img_path.split('/')[-1] 
 72 |             frm_name = frm_name.split('.')[0]
 73 |             gt_det = gt_detections[frm_name]
 74 |             pruned_det = prune_detection(gt_det)
 75 |             bb = list(map(int, pruned_det[0][:4]))
 76 |             ph = bb[3] - bb[1] + 1
 77 |             pw = bb[2] - bb[0] + 1
 78 |             bb = [bb[0]+int(pw*0.15), bb[1]+int(ph*0.15), bb[2]-int(pw*0.25), bb[3]-int(ph*0.4)]
 79 |             np_im = np.array(frame_img)
 80 |             #print (np_im.shape, bb)
 81 |             np_im[bb[1]:bb[3],bb[0]:bb[2],:] = 128
 82 |             frame_img = Image.fromarray(np_im, 'RGB')
 83 |             '''
 84 |             frame_img = transforms.ToTensor()(frame_img)
 85 |             frame_img = frame_img.cuda()
 86 |             frame_img = torch.unsqueeze(frame_img, dim=0)
 87 |             #detections = detector.detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh'])
 88 |             detections = detector.detector_detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh'])
 89 |             person_detection = []
 90 |             for idx, detection in enumerate(detections):
 91 |                 for det in detection:
 92 |                     if det is None:
 93 |                        continue
 94 |                     if det[-1] == target_obj_id:  # only count the person
 95 |                         det = det.detach().cpu().numpy().tolist()
 96 |                         person_detection.append(det)
 97 |             filename = os.path.basename(img_path)
 98 |             print (filename)
 99 |             results[filename.split('.')[0]] = person_detection
100 |     if not os.path.isdir(args.detection_output_dir):
101 |         os.mkdir(args.detection_output_dir)
102 |     output_filename = os.path.join(args.detection_output_dir, detector.name + '+' + os.path.basename(args.test_dir) + '.json')
103 |     print ('Results are written to %s' % output_filename)
104 |     with open(output_filename, 'w') as f:
105 |         json.dump(results, f)
106 | 
107 | '''
108 | if __name__ == '__main__':
109 |     main()
110 | 
111 |     config = merge_args(args, config)
112 | 
113 |     detector = build_object_detector(config).cuda()
114 |     for img in glob.glob("Path/to/dir/*.jpg"):
115 |         frame_img = Image.open(img)
116 |         frame_img = transforms.ToTensor()(frame_img)
117 |         frame_img = frame_img.cuda()
118 |         torch.squeeze(frame_image, dim=0)
119 |         detection_results = detector.detect(frame_img, nms_thresh=config['val_nms_thresh'], conf_thresh=config['val_conf_thresh'])
120 |         print (detection_results)
121 | '''
122 | 
123 | if __name__ == '__main__':
124 |     main()
125 | 


--------------------------------------------------------------------------------
/detector/yolov3/utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import random
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | from PIL import Image
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | from utils.augmentations import horisontal_flip
 11 | from torch.utils.data import Dataset
 12 | import torchvision.transforms as transforms
 13 | 
 14 | 
 15 | def pad_to_square(img, pad_value):
 16 |     c, h, w = img.shape
 17 |     dim_diff = np.abs(h - w)
 18 |     # (upper / left) padding and (lower / right) padding
 19 |     pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
 20 |     # Determine padding
 21 |     pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
 22 |     # Add padding
 23 |     img = F.pad(img, pad, "constant", value=pad_value)
 24 | 
 25 |     return img, pad
 26 | 
 27 | 
 28 | def resize(image, size):
 29 |     image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
 30 |     return image
 31 | 
 32 | 
 33 | def random_resize(images, min_size=288, max_size=448):
 34 |     new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
 35 |     images = F.interpolate(images, size=new_size, mode="nearest")
 36 |     return images
 37 | 
 38 | 
 39 | class ImageFolder(Dataset):
 40 |     def __init__(self, folder_path, img_size=416):
 41 |         self.files = sorted(glob.glob("%s/*.*" % folder_path))
 42 |         self.img_size = img_size
 43 | 
 44 |     def __getitem__(self, index):
 45 |         img_path = self.files[index % len(self.files)]
 46 |         # Extract image as PyTorch tensor
 47 |         img = transforms.ToTensor()(Image.open(img_path))
 48 |         # Pad to square resolution
 49 |         img, _ = pad_to_square(img, 0)
 50 |         # Resize
 51 |         img = resize(img, self.img_size)
 52 | 
 53 |         return img_path, img
 54 | 
 55 |     def __len__(self):
 56 |         return len(self.files)
 57 | 
 58 | 
 59 | class ListDataset(Dataset):
 60 |     def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
 61 |         with open(list_path, "r") as file:
 62 |             self.img_files = file.readlines()
 63 | 
 64 |         self.label_files = [
 65 |             path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
 66 |             for path in self.img_files
 67 |         ]
 68 |         self.img_size = img_size
 69 |         self.max_objects = 100
 70 |         self.augment = augment
 71 |         self.multiscale = multiscale
 72 |         self.normalized_labels = normalized_labels
 73 |         self.min_size = self.img_size - 3 * 32
 74 |         self.max_size = self.img_size + 3 * 32
 75 |         self.batch_count = 0
 76 | 
 77 |     def __getitem__(self, index):
 78 | 
 79 |         # ---------
 80 |         #  Image
 81 |         # ---------
 82 | 
 83 |         img_path = self.img_files[index % len(self.img_files)].rstrip()
 84 | 
 85 |         # Extract image as PyTorch tensor
 86 |         img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
 87 | 
 88 |         # Handle images with less than three channels
 89 |         if len(img.shape) != 3:
 90 |             img = img.unsqueeze(0)
 91 |             img = img.expand((3, img.shape[1:]))
 92 | 
 93 |         _, h, w = img.shape
 94 |         h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
 95 |         # Pad to square resolution
 96 |         img, pad = pad_to_square(img, 0)
 97 |         _, padded_h, padded_w = img.shape
 98 | 
 99 |         # ---------
100 |         #  Label
101 |         # ---------
102 | 
103 |         label_path = self.label_files[index % len(self.img_files)].rstrip()
104 | 
105 |         targets = None
106 |         if os.path.exists(label_path):
107 |             boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
108 |             # Extract coordinates for unpadded + unscaled image
109 |             x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
110 |             y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
111 |             x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
112 |             y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
113 |             # Adjust for added padding
114 |             x1 += pad[0]
115 |             y1 += pad[2]
116 |             x2 += pad[1]
117 |             y2 += pad[3]
118 |             # Returns (x, y, w, h)
119 |             boxes[:, 1] = ((x1 + x2) / 2) / padded_w
120 |             boxes[:, 2] = ((y1 + y2) / 2) / padded_h
121 |             boxes[:, 3] *= w_factor / padded_w
122 |             boxes[:, 4] *= h_factor / padded_h
123 | 
124 |             targets = torch.zeros((len(boxes), 6))
125 |             targets[:, 1:] = boxes
126 | 
127 |         # Apply augmentations
128 |         if self.augment:
129 |             if np.random.random() < 0.5:
130 |                 img, targets = horisontal_flip(img, targets)
131 | 
132 |         return img_path, img, targets
133 | 
134 |     def collate_fn(self, batch):
135 |         paths, imgs, targets = list(zip(*batch))
136 |         # Remove empty placeholder targets
137 |         targets = [boxes for boxes in targets if boxes is not None]
138 |         # Add sample index to targets
139 |         for i, boxes in enumerate(targets):
140 |             boxes[:, 0] = i
141 |         targets = torch.cat(targets, 0)
142 |         # Selects new image size every tenth batch
143 |         if self.multiscale and self.batch_count % 10 == 0:
144 |             self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
145 |         # Resize images to input shape
146 |         imgs = torch.stack([resize(img, self.img_size) for img in imgs])
147 |         self.batch_count += 1
148 |         return paths, imgs, targets
149 | 
150 |     def __len__(self):
151 |         return len(self.img_files)
152 | 


--------------------------------------------------------------------------------
/detector/yolov3/detect.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | from models import *
  4 | 
  5 | from detector.yolov3.utils.utils import rescale_boxes
  6 | from utils.utils import *
  7 | from utils.datasets import *
  8 | 
  9 | import os
 10 | import sys
 11 | import time
 12 | import datetime
 13 | import argparse
 14 | 
 15 | from PIL import Image
 16 | 
 17 | import torch
 18 | from torch.utils.data import DataLoader
 19 | from torchvision import datasets
 20 | from torch.autograd import Variable
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | import matplotlib.patches as patches
 24 | from matplotlib.ticker import NullLocator
 25 | 
 26 | if __name__ == "__main__":
 27 |     parser = argparse.ArgumentParser()
 28 |     parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset")
 29 |     parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
 30 |     parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
 31 |     parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
 32 |     parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
 33 |     parser.add_argument("--nms_thres", type=float, default=0.4, help="iou thresshold for non-maximum suppression")
 34 |     parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
 35 |     parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
 36 |     parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
 37 |     parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
 38 |     opt = parser.parse_args()
 39 |     print(opt)
 40 | 
 41 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 42 | 
 43 |     os.makedirs("output", exist_ok=True)
 44 | 
 45 |     # Set up model
 46 |     model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
 47 | 
 48 |     if opt.weights_path.endswith(".weights"):
 49 |         # Load darknet weights
 50 |         model.load_darknet_weights(opt.weights_path)
 51 |     else:
 52 |         # Load checkpoint weights
 53 |         model.load_state_dict(torch.load(opt.weights_path))
 54 | 
 55 |     model.eval()  # Set in evaluation mode
 56 | 
 57 |     dataloader = DataLoader(
 58 |         ImageFolder(opt.image_folder, img_size=opt.img_size),
 59 |         batch_size=opt.batch_size,
 60 |         shuffle=False,
 61 |         num_workers=opt.n_cpu,
 62 |     )
 63 | 
 64 |     classes = load_classes(opt.class_path)  # Extracts class labels from file
 65 | 
 66 |     Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
 67 | 
 68 |     imgs = []  # Stores image paths
 69 |     img_detections = []  # Stores detections for each image index
 70 | 
 71 |     print("\nPerforming object detection:")
 72 |     prev_time = time.time()
 73 |     for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
 74 |         # Configure input
 75 |         input_imgs = Variable(input_imgs.type(Tensor))
 76 | 
 77 |         # Get detections
 78 |         with torch.no_grad():
 79 |             detections = model(input_imgs)
 80 |             detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
 81 | 
 82 |         # Log progress
 83 |         current_time = time.time()
 84 |         inference_time = datetime.timedelta(seconds=current_time - prev_time)
 85 |         prev_time = current_time
 86 |         print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))
 87 | 
 88 |         # Save image and detections
 89 |         imgs.extend(img_paths)
 90 |         img_detections.extend(detections)
 91 | 
 92 |     # Bounding-box colors
 93 |     cmap = plt.get_cmap("tab20b")
 94 |     colors = [cmap(i) for i in np.linspace(0, 1, 20)]
 95 | 
 96 |     print("\nSaving images:")
 97 |     # Iterate through images and save plot of detections
 98 |     for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
 99 | 
100 |         print("(%d) Image: '%s'" % (img_i, path))
101 | 
102 |         # Create plot
103 |         img = np.array(Image.open(path))
104 |         plt.figure()
105 |         fig, ax = plt.subplots(1)
106 |         ax.imshow(img)
107 | 
108 |         # Draw bounding boxes and labels of detections
109 |         if detections is not None:
110 |             # Rescale boxes to original image
111 |             detections = rescale_boxes(detections, opt.img_size, img.shape[:2])
112 |             unique_labels = detections[:, -1].cpu().unique()
113 |             n_cls_preds = len(unique_labels)
114 |             bbox_colors = random.sample(colors, n_cls_preds)
115 |             for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
116 | 
117 |                 print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))
118 | 
119 |                 box_w = x2 - x1
120 |                 box_h = y2 - y1
121 | 
122 |                 color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
123 |                 # Create a Rectangle patch
124 |                 bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
125 |                 # Add the bbox to the plot
126 |                 ax.add_patch(bbox)
127 |                 # Add label
128 |                 plt.text(
129 |                     x1,
130 |                     y1,
131 |                     s=classes[int(cls_pred)],
132 |                     color="white",
133 |                     verticalalignment="top",
134 |                     bbox={"color": color, "pad": 0},
135 |                 )
136 | 
137 |         # Save generated image with detections
138 |         plt.axis("off")
139 |         plt.gca().xaxis.set_major_locator(NullLocator())
140 |         plt.gca().yaxis.set_major_locator(NullLocator())
141 |         filename = path.split("/")[-1].split(".")[0]
142 |         plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
143 |         plt.close()
144 | 


--------------------------------------------------------------------------------
/nets/STNet/STNLocalizer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torchvision.models as models
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim
  7 | 
  8 | #backbone_info = { 'resnet18': {'model': models.resnet18, 'last_conv_dim':512},
  9 | #                  'resnet50': {'model': models.resnet50, 'last_conv_dim':2048},
 10 | #                  'resnet101': {'model': models.resnet101, 'last_conv_dim': 2048}
 11 | #                  }
 12 | 
 13 | class BasicLocalizer(nn.Module):
 14 |     def __init__(self, backbone, downsample_dim=128, fc_dim=256, num_output=6):
 15 |         super(BasicLocalizer, self).__init__()
 16 | 
 17 | #        resnet_model = backbone_info[backbone]['model'](num_classes=10)
 18 | #        last_conv_dim = backbone_info[backbone]['last_conv_dim']
 19 | 
 20 | #        self.backbone = nn.Sequential(*list(resnet_model.children())[0:-2])
 21 |         self.backbone = get_backbone(backbone)
 22 |         last_conv_dim = get_last_conv_dim(backbone)
 23 | 
 24 | 
 25 |         self.downsample_dim = downsample_dim
 26 |         self.down_sampler = nn.Sequential(
 27 |             nn.Conv2d(last_conv_dim, self.downsample_dim, kernel_size=1, stride=1, padding=0),
 28 |             nn.ReLU(True),
 29 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
 30 | 
 31 |         # Regressor for the 3 * 2 affine matrix
 32 |         self.fc_dim = fc_dim
 33 |         self.last_spatial_dim = 4
 34 |         self.fc_loc = nn.Sequential(
 35 |             nn.Dropout(0.3),
 36 |             nn.Linear(self.downsample_dim * self.last_spatial_dim * self.last_spatial_dim, self.fc_dim),
 37 |             nn.ReLU(True),
 38 |             nn.Linear(self.fc_dim, num_output)
 39 |         )
 40 | 
 41 |     # localization
 42 |     def forward(self, x):
 43 |         xs = self.backbone(x)
 44 |      #   print (xs.shape)
 45 |         xs = self.down_sampler(xs)
 46 |      #   print (xs.shape)
 47 |         xs = xs.view(-1, self.downsample_dim * self.last_spatial_dim * self.last_spatial_dim)
 48 |         theta = self.fc_loc(xs)
 49 | #        theta = theta.view(-1, 2, 3)
 50 |         return theta
 51 | 
 52 | class AffineLocalizer(nn.Module):
 53 |     def __init__(self, backbone, downsample_dim, fc_dim, predict_dimension=False):
 54 |         super(AffineLocalizer, self).__init__()
 55 |         self.predict_dimension=predict_dimension
 56 |         num_output = 6
 57 |         if self.predict_dimension:
 58 |             num_output += 1
 59 | 
 60 |         self.localizer = BasicLocalizer(backbone, downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output)
 61 | 
 62 |         # initialization
 63 |         self.localizer.fc_loc[-1].weight.data.zero_()
 64 |         if self.predict_dimension:
 65 |             self.localizer.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0, 1], dtype=torch.float))
 66 |         else:
 67 |             self.localizer.fc_loc[-1].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
 68 | 
 69 |     # localization
 70 |     def forward(self, x):
 71 |         x = self.localizer(x)
 72 |         if self.predict_dimension:
 73 |             return x[:,:6].view(-1, 2, 3), x[:,-1]
 74 |         else:
 75 |             return x.view(-1, 2, 3), None
 76 | 
 77 | # based on https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py
 78 | class BoundedTPSLocalizer(nn.Module):
 79 | 
 80 |     def __init__(self, backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=False):
 81 |         super(BoundedTPSLocalizer, self).__init__()
 82 |         self.precit_dimension = predict_dimension
 83 |         num_output = grid_height * grid_width * 2
 84 |         if self.precit_dimension:
 85 |             num_output += 1
 86 |         self.cnn = BasicLocalizer(backbone, downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output)
 87 | 
 88 |         #bias = torch.from_numpy(np.arctanh(target_control_points.numpy()))
 89 |         #bias = bias.view(-1)
 90 |         bias = torch.zeros(num_output)
 91 |         if self.precit_dimension:
 92 |             bias[:-1] = torch.from_numpy(np.arctanh(target_control_points.numpy())).view(-1)
 93 |             bias[-1] = 1.0
 94 |         else:
 95 |             bias = torch.from_numpy(np.arctanh(target_control_points.numpy())).view(-1)
 96 | 
 97 |         self.cnn.fc_loc[-1].bias.data.copy_(bias)
 98 |         self.cnn.fc_loc[-1].weight.data.zero_()
 99 | 
100 |     def forward(self, x):
101 |         batch_size = x.size(0)
102 |         points = self.cnn(x)
103 |         if self.precit_dimension:
104 |             return torch.tanh(points[:,:-1]).view(batch_size, -1, 2), points[:,-1]
105 |         else:
106 |             return torch.tanh(points).view(batch_size, -1, 2), None
107 | 
108 | # based on https://github.com/WarBean/tps_stn_pytorch/blob/master/tps_grid_gen.py
109 | class UnBoundedTPSLocalizer(nn.Module):
110 | 
111 |     def __init__(self, backbone, downsample_dim, fc_dim, grid_height, grid_width, target_control_points, predict_dimension=False):
112 |         super(UnBoundedTPSLocalizer, self).__init__()
113 | 
114 |         self.precit_dimension = predict_dimension
115 |         num_output = grid_height * grid_width * 2
116 |         if self.precit_dimension:
117 |             num_output += 1
118 | 
119 |         self.cnn = BasicLocalizer(backbone,  downsample_dim=downsample_dim, fc_dim=fc_dim, num_output=num_output)
120 | 
121 | #        bias = target_control_points.view(-1)
122 |         bias = torch.zeros(num_output)
123 |         if self.precit_dimension:
124 |             bias[:-1] = target_control_points.view(-1)
125 |             bias[-1] = 1.0
126 |         else:
127 |             bias = target_control_points.view(-1)
128 | 
129 |         self.cnn.fc_loc[-1].bias.data.copy_(bias)
130 |         self.cnn.fc_loc[-1].weight.data.zero_()
131 | 
132 |     def forward(self, x):
133 |         batch_size = x.size(0)
134 |         points = self.cnn(x)
135 |         if self.precit_dimension:
136 |             return points[:, :-1].view(batch_size, -1, 2), points[:,-1]
137 |         else:
138 |             return points.view(batch_size, -1, 2), None
139 | 


--------------------------------------------------------------------------------
/nets/ColorNet/PCT_transformation.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | '''
  6 | class ColorTransformation(nn.Module):
  7 |     def __init__(self, config):
  8 |         super(ColorTransformation, self).__init__()
  9 |         file = config['color_transformation_path']
 10 |         self.W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32).cuda()
 11 |         self.W1 = self.W1.unsqueeze(0).unsqueeze(0)
 12 |         self.W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32).cuda()
 13 |         self.W2 = self.W2.unsqueeze(0).unsqueeze(0)
 14 |         self.b = torch.tensor(np.load(file)["bias"], dtype=torch.float32).cuda()
 15 | 
 16 |     def forward(self, x):
 17 |         x = x.transpose(1, -1)
 18 |         x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b
 19 |         x = x.transpose(1, -1)
 20 |         return x
 21 | '''
 22 | 
 23 | class PCTTransformation(nn.Module):
 24 |     def __init__(self, config):
 25 |         super(PCTTransformation, self).__init__()
 26 | #        use_cuda = config['cuda']
 27 | #        device_ids = config['gpu_ids']
 28 | 
 29 |         file = config['color_transformation_path']
 30 |         W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32)
 31 |         self.W1 = torch.nn.Parameter(W1.unsqueeze(0).unsqueeze(0))
 32 |         W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32)
 33 |         self.W2 = torch.nn.Parameter(W2.unsqueeze(0).unsqueeze(0))
 34 |         b = torch.tensor(np.load(file)["bias"], dtype=torch.float32)
 35 |         self.b = torch.nn.Parameter(b)
 36 | 
 37 |     def forward(self, x):
 38 |         x = x.transpose(1, -1)
 39 |         x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b
 40 |         x = x.transpose(1, -1)
 41 |         x = torch.clamp(x, 0., 1.)
 42 |         return x
 43 | 
 44 | # linear PCT transformation
 45 | class PCTLinearTransformation(nn.Module):
 46 |     def __init__(self, config):
 47 |         super(PCTLinearTransformation, self).__init__()
 48 | #        self.use_cuda = use_cuda
 49 | #        self.device_ids = device_ids
 50 |         self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3
 51 |     # transform the color
 52 |     def forward(self, x):
 53 |         # transform the input
 54 |         n, c, h, w = x.shape
 55 |         y = torch.matmul(self.color_mapping, x.view(n, c, -1))
 56 | 
 57 |         #y = torch.clamp(y, -1., 1.)
 58 |         y = torch.clamp(y, 0., 1.)
 59 |         return y.view(n, c, h, w)
 60 | 
 61 | # linear PCT transformation
 62 | class PCTLinearBiasTransformation(nn.Module):
 63 |     def __init__(self, config):
 64 |         super(PCTLinearBiasTransformation, self).__init__()
 65 | #        self.use_cuda = use_cuda
 66 | #        self.device_ids = device_ids
 67 |         self.color_mapping = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3
 68 |         self.b = torch.nn.Parameter(torch.tensor([[0.0, 0.0, 0.0]]))
 69 | 
 70 |     # transform the color
 71 |     def forward(self, x):
 72 |         # transform the input
 73 |         n, c, h, w = x.shape
 74 |         y = torch.matmul(self.color_mapping, x.view(n, c, -1))
 75 |         #y += self.b.view(1, 3, 1)
 76 | 
 77 |         #y = torch.clamp(y, -1., 1.)
 78 |         min_y = torch.min(y)
 79 |         max_y = torch.max(y)
 80 |         y = (y -min_y ) / (max_y-min_y)
 81 | #       y = torch.clamp(y, 0.0, 1.)
 82 |         return y.view(n, c, h, w)
 83 | 
 84 | # non-linear PCT transformation
 85 | class PCTNeuralTransformation(nn.Module):
 86 |     def __init__(self, config):
 87 |         super(PCTNeuralTransformation, self).__init__()
 88 | #        self.use_cuda = use_cuda
 89 | #        self.device_ids = device_ids
 90 |         self.M1 = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3
 91 |         self.M2 = torch.nn.Parameter(torch.tensor([[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]])) # 3x3
 92 | 
 93 |     # transform the color
 94 |     def forward(self, x):
 95 |         # transform the input
 96 |         n, c, h, w = x.shape
 97 |         y = torch.matmul(self.M1, x.view(n, c, -1))
 98 |         y = nn.functional.relu(y)
 99 |         y = torch.matmul(self.M2, y.view(n, c, -1))
100 |         y = torch.clamp(y, 0., 1.)
101 | #        y = torch.clamp(y, 0.0, 1.)
102 |         return y.view(n, c, h, w)
103 | 
104 | '''
105 | # non-linear PCT transformation
106 | class PCTNeuralTransformation(nn.Module):
107 |     def __init__(self, config):
108 |         super(PCTNeuralTransformation, self).__init__()
109 | #        self.use_cuda = use_cuda
110 | #        self.device_ids = device_ids
111 |         fc_dim = 100
112 |         self.fc_transform = nn.Sequential(
113 |             nn.Dropout(0.3),
114 |             nn.Linear(3, fc_dim),
115 |             nn.ReLU(True),
116 |             nn.Linear(fc_dim, 3)
117 |         )
118 | 
119 |     # transform the color
120 |     def forward(self, x):
121 |         # transform the input
122 |         n, c, h, w = x.shape
123 |         output = self.fc_transform(x.view(-1,c))
124 |         #output = torch.tanh(output)
125 |         #output = 0.5* (output + 1.0) # normalize to [0 1]
126 |         #output = torch.clamp(output, 0.0, 1.0)
127 |         return output.view(n,c,h,w)
128 | '''
129 | 
130 | class PCTTransformationOld2New(nn.Module):
131 |     def __init__(self):
132 |         super(PCTTransformationOld2New, self).__init__()
133 | #        use_cuda = config['cuda']
134 | #        device_ids = config['gpu_ids']
135 | 
136 |         file = 'weights2_old2new_.npz'
137 |         W1 = torch.tensor(np.load(file)["weight1"], dtype=torch.float32)
138 |         self.W1 = torch.nn.Parameter(W1.unsqueeze(0).unsqueeze(0))
139 |         print (self.W1)
140 |         W2 = torch.tensor(np.load(file)["weight2"], dtype=torch.float32)
141 |         self.W2 = torch.nn.Parameter(W2.unsqueeze(0).unsqueeze(0))
142 |         print (self.W2)
143 |         b = torch.tensor(np.load(file)["bias"], dtype=torch.float32)
144 |         self.b = torch.nn.Parameter(b)
145 | 
146 |     def forward(self, x):
147 |         x = x.transpose(1, -1)
148 |         x = torch.matmul(x.pow(2), self.W2) + torch.matmul(x, self.W1) + self.b
149 |         x = x.transpose(1, -1)
150 |         return x
151 | 
152 | 


--------------------------------------------------------------------------------
/nets/PatchTransformer/patch_transformer_net.py:
--------------------------------------------------------------------------------
  1 | from nets.STNet.affine_STN import AffineSTNNet
  2 | from nets.STNet.tps_STN import TpsSTNNet
  3 | from utils.tools import transform_template_input
  4 | from nets.ColorNet.cc_f4 import CC_Alex_FCN4
  5 | from nets.ColorNet.PCT_transformation import *
  6 | 
  7 | PCT_INFO = {'PCT':PCTTransformation, 'PCTLinear': PCTLinearTransformation, 'PCTNeural': PCTNeuralTransformation}
  8 | LCT_INFO = {'cc_fcn4':CC_Alex_FCN4}
  9 | 
 10 | class PatchTransformerNet(nn.Module):
 11 |     def __init__(self, config):
 12 |         super(PatchTransformerNet, self).__init__()
 13 |         self.config = config
 14 |         self.use_cuda = self.config['cuda']
 15 |         self.device_ids = self.config['gpu_ids']
 16 | 
 17 |         if config['STN'] == 'affine':
 18 |             self.STN = AffineSTNNet(config)
 19 |         elif config['STN'] == 'tps':
 20 |             self.STN = TpsSTNNet(config)
 21 | 
 22 |         # printer color transformation (PCT)
 23 |         self.predefined_PCT = True  if self.config['PrinterCT'] == 'PCT' else False
 24 |         self.PCT = PCT_INFO[self.config['PrinterCT']](self.config) if self.config['PrinterCT'] != 'None' else None
 25 |         # only applied PCT once. This demonstrates better performance
 26 |         self.apply_PCT_twice = config['use_double_PCT']
 27 |         assert self.apply_PCT_twice == False
 28 | 
 29 |         # Lighting color transformation (LCT)
 30 |         self.use_LCT = self.config['use_LightingCT']
 31 |         if self.use_LCT:
 32 |             assert self.PCT is not None
 33 |             self.LCT = LCT_INFO[self.config['LightingCT']]()
 34 | 
 35 |     # Note: 'x' is within [-1 1] while template_img and frame_img are within [0 1].
 36 |     # the output is within [0 1]
 37 |     def forward(self, x, bboxes, masks, template_img, frame_img):
 38 |         if self.apply_PCT_twice:
 39 |             new_template_img = self.PCT(template_img)
 40 |             x_stn, _ = self.STN(x, new_template_img)
 41 |         else:
 42 |             x_stn, _ = self.STN(x, template_img)
 43 | 
 44 |         # paste the transformed patch to the frame image
 45 |         x_stn = transform_template_input(x_stn, bboxes, frame_img.shape[2:])
 46 | 
 47 |         # perform printer color transformation
 48 |         x_pct = self.PCT(x_stn) if self.PCT is not None else x_stn
 49 |         x_pct = x_pct * masks + frame_img * (1. - masks)
 50 |         x_pct = torch.clamp(x_pct, 0, 0.999)
 51 | 
 52 |         if self.use_LCT:
 53 |             lct = self.LCT.forward_template(frame_img)
 54 |            # print (lct)
 55 |             x_lct = x_pct * lct
 56 |             x_lct = x_lct * masks + frame_img * (1. - masks)
 57 |             x_lct = torch.clamp(x_lct, 0, 0.999)
 58 | 
 59 |         if self.use_LCT:
 60 |             return x_lct, x_pct, lct
 61 |         else:
 62 |             return x_pct
 63 | 
 64 |     '''
 65 |     # Note that the range of output [0 1] is different from that of input [-1 1] because of the multiplication
 66 |     #  of line 91 does not work with the input range. Also 'ground_truth' got changed. Good for now, but it's
 67 |     # better to REWRITE in the future for reducing confusion and potential issues.
 68 |     def forward(self, x, bboxes, masks, template_img, frame_img):
 69 |         # geometric
 70 |         new_template_img = self.PCT(template_img)
 71 |         x_stn, _ = self.STN(x, new_template_img)
 72 | 
 73 | #        x_stn, _ = self.STN(x, template_img)
 74 |         # x_stn = F.avg_pool2d(x_stn, 3, stride=1, padding=1)
 75 | 
 76 |         # transform the template to be the input image to the generator
 77 |         x_stn = transform_template_input(x_stn, bboxes, frame_img.shape[2:])
 78 | 
 79 |         # self.vis_tensor(x_stn * masks + frame_img * (1. - masks), 'before_')
 80 | 
 81 |         x_stn = self.PCT(x_stn)
 82 | 
 83 |         x_pct = x_stn * masks + frame_img * (1. - masks)
 84 | 
 85 |         # self.vis_tensor(x_pct, 'after_')
 86 | 
 87 |         x_pct = torch.clamp(x_pct, -0.999, 0.999)
 88 | 
 89 |         # frame_img_in_lct = transform_frames_input(frame_img, coord_w_set, (256, 256))
 90 |         # put the color in range [0 1] !!! critical
 91 |         x_pct.add_(1.0).div_(2.0)
 92 |         if self.use_LCT:
 93 |             frame_img.add_(1.0).div_(2.0)
 94 |             lct = self.LCT.forward_template(frame_img)
 95 |             lct = F.interpolate(lct, size=frame_img.shape[2:], mode='bilinear', align_corners=False)
 96 |             x_lct = x_pct * lct
 97 |             x_lct = x_lct * masks + frame_img * (1. - masks)
 98 | #            x_lct = torch.clamp(x_lct, -0.999, 0.999)
 99 |             x_lct = torch.clamp(x_lct, 0, 0.999)
100 | 
101 |         if self.use_LCT:
102 |             return x_lct, lct
103 |         else:
104 |             return x_pct
105 |     '''
106 | 
107 |     def load_from_file(self, model_path):
108 |         try:
109 |             checkpoint = torch.load(model_path, map_location='cpu')
110 |             #for key, _ in checkpoint.items():
111 |             #    print (key)
112 |             self.STN.load_state_dict(fix_checkpoint_key((checkpoint['stn'])), strict=True)
113 |             print ('--- Finished loading STN module .....')
114 | 
115 |             if self.PCT is not None:
116 |                 if not self.predefined_PCT:
117 |                     self.PCT.load_state_dict(fix_checkpoint_key((checkpoint['color_transformer'])), strict=True)
118 |                     print (checkpoint['color_transformer'])
119 |                     print ('--- Finished loading Printer Color Transformation (%s) module' % (self.config['PrinterCT']))
120 |                 else:
121 |                     print ('--- Printer Color Transformation (%s) module loaded from somewhere else' % (self.config['PrinterCT']))
122 | 
123 |             if self.use_LCT:
124 |                 self.LCT.load_state_dict(fix_checkpoint_key((checkpoint['generator'])), strict=True)
125 |                 print ('--- Finished loading Lighting Color Transformation (%s) module' % (self.config['LightingCT']))
126 | 
127 |         except Exception as e:
128 |             print (e)
129 |             raise IOError('Warning ---mostly this is because the model was trained using different names for its submodules. Please double check if the right model is used.')
130 | 
131 | 
132 | def fix_checkpoint_key(checkpoint):
133 |     new_dict = {}
134 |     for k, v in checkpoint.items():
135 |         # TODO: a better approach:
136 |         new_dict[k.replace("module.", "")] = v
137 |     return new_dict
138 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/collaborative_advPatch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torchvision import transforms
  4 | from .advPatch import AdvPatch
  5 | from PIL import Image
  6 | from .advPatch_util import generate_patch, generate_border_mask
  7 | import os
  8 | 
  9 | class CollaborativeAdvPatch(nn.Module):
 10 |     def __init__(self, config):
 11 |         super(CollaborativeAdvPatch, self).__init__()
 12 |         self.adv_patch_size = tuple(config['adv_patch_size'])
 13 |         self.apply_border_mask = config['apply_border_mask']
 14 |         print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size))
 15 | 
 16 |         if self.apply_border_mask:
 17 |             self.border_value = config['border_value']
 18 |             border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5)
 19 |             print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value))
 20 |             self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size))
 21 | 
 22 |         self.collaborative_learning = not config['CL_pretrained']
 23 |         #self.adv_patch = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2]))
 24 |         #self.adv_patch_near = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2]))
 25 |         #self.adv_patch_far = nn.Parameter(generate_patch("gray", size=self.adv_patch_size[:2]))
 26 |         self.adv_patch = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 27 |         self.adv_patch_near = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 28 |         self.adv_patch_far = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 29 | 
 30 |         # learnable weights
 31 |         if config.get('collaborative_weights', False):
 32 |             self.collaborative_weight = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())
 33 |             nn.init.constant_(self.collaborative_weight[0].weight, 10.0)
 34 |             nn.init.constant_(self.collaborative_weight[0].bias, -2.5)
 35 |         else:
 36 |             self.collaborative_weight = None
 37 | 
 38 |     @property
 39 |     def patch_size(self):
 40 |         return self.adv_patch_size
 41 | 
 42 |     @property
 43 |     def border_size(self):
 44 |         return self.border_size if self.apply_border_mask else 0
 45 | 
 46 |     def learnable(self):
 47 |         out = [self.adv_patch] if not self.collaborative_learning else \
 48 |             [self.adv_patch, self.adv_patch_near, self.adv_patch_far]
 49 |         if self.collaborative_weight:
 50 |             out += [self.collaborative_weight[0].weight, self.collaborative_weight[0].bias]
 51 |         return out
 52 | 
 53 |     def clip(self):
 54 |         self.adv_patch.data.clamp_(0, 1)  # keep patch in image range
 55 |         if self.collaborative_learning:
 56 |             self.adv_patch_near.data.clamp_(0, 1)
 57 |             self.adv_patch_far.data.clamp_(0, 1)
 58 | 
 59 |         if self.collaborative_weight:
 60 |             self.collaborative_weight[0].weight.data.clamp_(9.0, 11.0)
 61 |             self.collaborative_weight[0].bias.data.clamp_(-3.0, -2.0)
 62 | 
 63 |     def forward(self):
 64 |         if self.apply_border_mask:
 65 |             # note that nn.parameter cannot be assigned directly, so an internal change is needed
 66 |             self.adv_patch.data *= self.border_mask.data
 67 |             self.adv_patch.data +=  (1 - self.border_mask.data) * self.border_value
 68 | 
 69 |         if self.training:
 70 |             return self.adv_patch, self.adv_patch_near, self.adv_patch_far
 71 | 
 72 |         return self.adv_patch
 73 | 
 74 |     def save_patch(self, patch_path):
 75 |         adv_patch = self.adv_patch.detach().cpu()
 76 |         im = transforms.ToPILImage('RGB')(adv_patch)
 77 |         im.save(patch_path)
 78 | 
 79 |         if self.collaborative_learning:
 80 |             base_path, adv_file = os.path.split(patch_path)
 81 |             base_file, ext = adv_file.split('.')
 82 | 
 83 |             adv_patch_near = self.adv_patch_near.detach().cpu()
 84 |             im_near = transforms.ToPILImage('RGB')(adv_patch_near)
 85 |             im_near.save(os.path.join(base_path, base_file + '_near.' + ext))
 86 | 
 87 |             adv_patch_far = self.adv_patch_far.detach().cpu()
 88 |             im_far = transforms.ToPILImage('RGB')(adv_patch_far)
 89 |             im_far.save(os.path.join(base_path, base_file + '_far.' + ext))
 90 | 
 91 |     def _load_patch_image(self, patch_path):
 92 |         patch_img = Image.open(patch_path).convert('RGB')
 93 |         w, h = patch_img.size
 94 |         # first dim is height
 95 |         adv_h, adv_w = self.adv_patch_size[:2]
 96 |         if w !=  adv_w or h != adv_h:
 97 |             patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img)
 98 |         return patch_img
 99 | 
100 |     def load_patch(self, patch_path):
101 |         patch_img = self._load_patch_image(patch_path)
102 |         self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img))
103 | 
104 |         if self.collaborative_learning:
105 |             base_path, adv_file = os.path.split(patch_path)
106 |             base_file, ext = adv_file.split('.')
107 |             adv_near_file = os.path.join(base_path, base_file+'_near.'+ext)
108 |             if os.path.isfile(adv_near_file):
109 |                 near_patch_img = self._load_patch_image(adv_near_file)
110 |                 self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img))
111 | 
112 |             adv_far_file = os.path.join(base_path, base_file+'_far.'+ext)
113 |             if os.path.isfile(adv_far_file):
114 |                 far_patch_img = self._load_patch_image(adv_far_file)
115 |                 self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img))
116 | 
117 |     def load_pretrained_patch(self, near_patch_path, far_patch_path):
118 |         assert not self.collaborative_learning
119 |         if near_patch_path is not None:
120 |             near_patch_img = self._load_patch_image(near_patch_path)
121 |             self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img))
122 |             print ('Loading near model from %s' % (near_patch_path))
123 | 
124 |         if far_patch_path is not None:
125 |             far_patch_img = self._load_patch_image(far_patch_path)
126 |             self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img))
127 |             print ('Loading far model from %s' % (far_patch_path))
128 | 
129 | def create_collaborative_advPatch_model(config):
130 |     return CollaborativeAdvPatch(config)
131 | 


--------------------------------------------------------------------------------
/detector/yolov2_detector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | from detector.yolov2.darknet import Darknet
  7 | from detector.yolov2.utils import do_detect_1
  8 | from detector.object_detector import ObjectDetector
  9 | from detector.yolo_util import nms, xywh2xyxy, wrap_detection_results
 10 | 
 11 | 
 12 | class YOLOV2_Detector(ObjectDetector):
 13 |     def __init__(self, model_name,  cfg_path, model_path, class_names, input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1):
 14 |         # load darknet
 15 | #        model, class_names = self._load_model(cfg_path, model_path)
 16 |         super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id)
 17 |         
 18 |         # skip background i.e. 0
 19 |         self.class_names = [name for k, name in enumerate(class_names) if k > 0]
 20 | 
 21 |     def load_model(self, cfg_path, model_path, class_names=None):
 22 |         darknet_model = Darknet(cfg_path)
 23 |         darknet_model.load_weights(model_path)
 24 |         darknet_model = darknet_model.eval()
 25 | 
 26 |         return darknet_model
 27 | 
 28 |     '''
 29 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
 30 |         scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False)
 31 |         outputs = self.model(scaled_images)
 32 |         boxes = get_region_boxes(outputs, conf_thresh, self.model.num_classes, self.model.anchors, self.model.num_anchors)
 33 |         if nms_thresh > 0:
 34 |             boxes = [nms(box, nms_thresh) for box in boxes]
 35 | 
 36 |         # convert it to coordinates with regards to the orginal sizes
 37 |         outputs = []
 38 |         height, width = self.input_size
 39 |         for b in boxes:
 40 |             if len(b) == 0:
 41 |                 #outputs += [torch.FloatTensor([[0.0, 0.0, 0.0, 0.0, 0.0, -1]]).cuda()]
 42 |                 outputs += [None]
 43 |             else:
 44 |                 t = torch.stack(b).cuda()
 45 |                 t_new = t.clone()
 46 |                 t_new[:,0] = (t[:,0] - t[:,2] / 2.0) * width
 47 |                 t_new[:,1] = (t[:,1] - t[:,3] / 2.0) * height
 48 |                 t_new[:,2] = (t[:,0] + t[:,2] / 2.0) * width
 49 |                 t_new[:,3] = (t[:,1] + t[:,3] / 2.0) * height
 50 |         #        print ('t_new', t_new)
 51 |                 # skip classification score
 52 |                 outputs += [t_new[:, [0,1,2,3,4,6]]]
 53 |         return outputs
 54 |     '''
 55 | 
 56 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
 57 |         _, h, w, _ = images.shape
 58 |         if self.test_size[0] == w and self.test_size[1] == h:
 59 |             scaled_images = images
 60 |         else:
 61 |             scaled_images = F.interpolate(images, size=self.test_size, mode='bilinear', align_corners=False)
 62 | 
 63 |         outputs = self.model(scaled_images)
 64 |         # print (outputs.shape)
 65 |         outputs = post_processing(outputs, self.model.num_classes, self.model.anchors, self.model.num_anchors, self.test_size)
 66 |         # From (center x, center y, width, height) to (x1, y1, x2, y2)
 67 |         outputs[..., :4] = xywh2xyxy(outputs[..., :4])
 68 | 
 69 |         outputs = nms(outputs, conf_thres=conf_thresh, nms_thres=nms_thresh)
 70 |         results = wrap_detection_results(outputs, self.test_size[0], self.input_size)
 71 | 
 72 |         return results
 73 | 
 74 |     # The 'detect' method is implemented differently from the one in the original implmentation of yolov2.
 75 |     # 'detector_detect' attempts to keep the same implementation as the original one.
 76 |     def detector_detect(self, img, conf_thresh, nms_thresh):
 77 |         batch, h, w, _ = img.shape
 78 |         if self.test_size[0] == w and self.test_size[1] == h:
 79 |             scaled_img = img
 80 |         else:
 81 |             scaled_img = F.interpolate(img, size=self.test_size, mode='bilinear', align_corners=False)
 82 | 
 83 |         outputs = do_detect_1(self.model, scaled_img, conf_thresh, nms_thresh)
 84 |         if not outputs:
 85 |             return [[None]] * batch
 86 | 
 87 |         for item in outputs:
 88 |             item[:4] = xywh2xyxy(item[:4])
 89 |             item[:4] *= self.test_size[0]
 90 |         outputs = [torch.stack(outputs, dim=0)]
 91 |         results = wrap_detection_results(outputs, self.test_size[0], self.input_size)
 92 |         # resize
 93 |         return results
 94 | 
 95 | def post_processing(output, num_classes, anchors, num_anchors, test_size):
 96 |     # anchor_step = len(anchors)/num_anchors
 97 |     FloatTensor = torch.cuda.FloatTensor if output.is_cuda else torch.FloatTensor
 98 | 
 99 |     anchor_step = len(anchors) // num_anchors
100 |     if output.dim() == 3:
101 |         output = output.unsqueeze(0)
102 | 
103 |     batch, _,  h, w = output.shape
104 |     assert (output.size(1) == (5 + num_classes) * num_anchors)
105 | 
106 |     #print(output.size())
107 |     output = output.view(batch * num_anchors, 5 + num_classes, h * w)
108 |     #print(output.size())
109 |     output = output.transpose(1, 2).contiguous()
110 |     #print(output.size())
111 |     output = output.view(batch * num_anchors * h * w, 5 + num_classes)
112 |     #print(output.size())
113 | 
114 |     # Get outputs
115 |     x = torch.sigmoid(output[..., 0])  # Center x
116 |     y = torch.sigmoid(output[..., 1])  # Center y
117 |     pred_conf = torch.sigmoid(output[..., 4])  # Conf
118 |     pred_cls = torch.sigmoid(output[..., 5:])  # Cls pred.
119 | 
120 |     # print(output.size())
121 |     grid_x = torch.linspace(0, w - 1, w).repeat(h, 1).repeat(batch * num_anchors, 1, 1).view(
122 |         batch * num_anchors * h * w).cuda()
123 |     grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view(
124 |         batch * num_anchors * h * w).cuda()
125 |     xs = x + grid_x
126 |     ys = y + grid_y
127 | 
128 |     anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0]))
129 |     anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1]))
130 |     anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda()
131 |     anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h * w).view(batch * num_anchors * h * w).cuda()
132 |     ws = torch.exp(output[..., 2]) * anchor_w
133 |     hs = torch.exp(output[..., 3]) * anchor_h
134 | 
135 |     iw, ih = test_size
136 |     output = torch.cat(
137 |         (
138 |             xs.view(batch, -1, 1) / w * iw,
139 |             ys.view(batch, -1, 1) / h * ih,
140 |             ws.view(batch, -1, 1) / w * iw,
141 |             hs.view(batch, -1, 1) / h * ih,
142 |             pred_conf.view(batch, -1, 1),
143 |             pred_cls.view(batch, -1, num_classes),
144 |         ),
145 |         dim=2,
146 |     )
147 |     #print (output.shape)
148 |     return output
149 | 


--------------------------------------------------------------------------------
/pytorch_msssim/__init__.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from math import exp
  4 | import numpy as np
  5 | 
  6 | 
  7 | def gaussian(window_size, sigma):
  8 |     gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)])
  9 |     return gauss/gauss.sum()
 10 | 
 11 | 
 12 | def create_window(window_size, channel=1):
 13 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
 14 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
 15 |     window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
 16 |     return window
 17 | 
 18 | 
 19 | def ssim(img1, img2, window_size=11, window=None, size_average=True, full=False, val_range=None):
 20 |     # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh).
 21 |     if val_range is None:
 22 |         if torch.max(img1) > 128:
 23 |             max_val = 255
 24 |         else:
 25 |             max_val = 1
 26 | 
 27 |         if torch.min(img1) < -0.5:
 28 |             min_val = -1
 29 |         else:
 30 |             min_val = 0
 31 |         L = max_val - min_val
 32 |     else:
 33 |         L = val_range
 34 | 
 35 |     padd = 0
 36 |     (_, channel, height, width) = img1.size()
 37 |     if window is None:
 38 |         real_size = min(window_size, height, width)
 39 |         window = create_window(real_size, channel=channel).to(img1.device)
 40 | 
 41 |     mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
 42 |     mu2 = F.conv2d(img2, window, padding=padd, groups=channel)
 43 | 
 44 |     mu1_sq = mu1.pow(2)
 45 |     mu2_sq = mu2.pow(2)
 46 |     mu1_mu2 = mu1 * mu2
 47 | 
 48 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq
 49 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq
 50 |     sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2
 51 | 
 52 |     C1 = (0.01 * L) ** 2
 53 |     C2 = (0.03 * L) ** 2
 54 | 
 55 |     v1 = 2.0 * sigma12 + C2
 56 |     v2 = sigma1_sq + sigma2_sq + C2
 57 |     cs = torch.mean(v1 / v2)  # contrast sensitivity
 58 | 
 59 |     ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
 60 | 
 61 |     if size_average:
 62 |         ret = ssim_map.mean()
 63 |     else:
 64 |         ret = ssim_map.mean(1).mean(1).mean(1)
 65 | 
 66 |     if full:
 67 |         return ret, cs
 68 |     return ret
 69 | 
 70 | 
 71 | def msssim(img1, img2, window_size=11, size_average=True, val_range=None, normalize=False):
 72 |     device = img1.device
 73 |     weights = torch.FloatTensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333]).to(device)
 74 |     levels = weights.size()[0]
 75 |     mssim = []
 76 |     mcs = []
 77 |     for _ in range(levels):
 78 |         sim, cs = ssim(img1, img2, window_size=window_size, size_average=size_average, full=True, val_range=val_range)
 79 |         mssim.append(sim)
 80 |         mcs.append(cs)
 81 | 
 82 |         img1 = F.avg_pool2d(img1, (2, 2))
 83 |         img2 = F.avg_pool2d(img2, (2, 2))
 84 | 
 85 |     mssim = torch.stack(mssim)
 86 |     mcs = torch.stack(mcs)
 87 | 
 88 |     # Normalize (to avoid NaNs during training unstable models, not compliant with original definition)
 89 |     if normalize:
 90 |         mssim = (mssim + 1) / 2
 91 |         mcs = (mcs + 1) / 2
 92 | 
 93 |     pow1 = mcs ** weights
 94 |     pow2 = mssim ** weights
 95 |     # From Matlab implementation https://ece.uwaterloo.ca/~z70wang/research/iwssim/
 96 |     output = torch.prod(pow1[:-1] * pow2[-1])
 97 |     return output
 98 | 
 99 | 
100 | # normalized cross correlation
101 | def ncc(img1, img2, window_size=11, window=None, size_average=True):
102 |     padd = 0
103 |     (_, channel, height, width) = img1.size()
104 |     if window is None:
105 |         real_size = min(window_size, height, width)
106 |         window = create_window(real_size, channel=channel).to(img1.device)
107 | 
108 |     mu1 = F.conv2d(img1, window, padding=padd, groups=channel)
109 |     mu2 = F.conv2d(img2, window, padding=padd, groups=channel)
110 | 
111 |     mu1_sq = mu1.pow(2)
112 |     mu2_sq = mu2.pow(2)
113 |     mu1_mu2 = mu1 * mu2
114 | 
115 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=padd, groups=channel) - mu1_sq
116 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=padd, groups=channel) - mu2_sq
117 |     sigma12 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) - mu1_mu2
118 |     print (sigma1_sq.shape)
119 |     print (sigma1_sq.mean(), sigma2_sq.mean(), sigma12.mean())
120 |     ncc = sigma12 / (torch.sqrt(sigma1_sq * sigma2_sq) + 0.0000001)
121 |     print (ncc)
122 |     if size_average:
123 |         ret = ncc.mean()
124 |         print (ret)
125 |     else:
126 |         ret = ncc.mean(1).mean(1).mean(1)
127 | 
128 |     return ret
129 | 
130 | # Classes to re-use window
131 | class SSIM(torch.nn.Module):
132 |     def __init__(self, window_size=11, size_average=True, val_range=None):
133 |         super(SSIM, self).__init__()
134 |         self.window_size = window_size
135 |         self.size_average = size_average
136 |         self.val_range = val_range
137 | 
138 |         # Assume 1 channel for SSIM
139 |         self.channel = 1
140 |         self.window = create_window(window_size)
141 | 
142 |     def forward(self, img1, img2):
143 |         (_, channel, _, _) = img1.size()
144 | 
145 |         if channel == self.channel and self.window.dtype == img1.dtype:
146 |             window = self.window
147 |         else:
148 |             window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
149 |             self.window = window
150 |             self.channel = channel
151 | 
152 |         return ssim(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
153 | 
154 | class MSSSIM(torch.nn.Module):
155 |     def __init__(self, window_size=11, size_average=True, channel=3):
156 |         super(MSSSIM, self).__init__()
157 |         self.window_size = window_size
158 |         self.size_average = size_average
159 |         self.channel = channel
160 | 
161 |     def forward(self, img1, img2):
162 |         # TODO: store window between calls if possible
163 |         return msssim(img1, img2, window_size=self.window_size, size_average=self.size_average)
164 | 
165 | # Classes to re-use window
166 | class NCC(torch.nn.Module):
167 |     def __init__(self, window_size=11, size_average=True):
168 |         super(NCC, self).__init__()
169 |         self.window_size = window_size
170 |         self.size_average = size_average
171 | 
172 |         # Assume 1 channel for SSIM
173 |         self.channel = 1
174 |         self.window = create_window(window_size)
175 | 
176 |     def forward(self, img1, img2):
177 |         (_, channel, _, _) = img1.size()
178 | 
179 |         if channel == self.channel and self.window.dtype == img1.dtype:
180 |             window = self.window
181 |         else:
182 |             window = create_window(self.window_size, channel).to(img1.device).type(img1.dtype)
183 |             self.window = window
184 |             self.channel = channel
185 | 
186 |         return ncc(img1, img2, window=window, window_size=self.window_size, size_average=self.size_average)
187 | 


--------------------------------------------------------------------------------
/detector/yolov3/README.md:
--------------------------------------------------------------------------------
  1 | # PyTorch-YOLOv3
  2 | A minimal PyTorch implementation of YOLOv3, with support for training, inference and evaluation.
  3 | 
  4 | ## Installation
  5 | ##### Clone and install requirements
  6 |     $ git clone https://github.com/eriklindernoren/PyTorch-YOLOv3
  7 |     $ cd PyTorch-YOLOv3/
  8 |     $ sudo pip3 install -r requirements.txt
  9 | 
 10 | ##### Download pretrained weights
 11 |     $ cd weights/
 12 |     $ bash download_weights.sh
 13 | 
 14 | ##### Download COCO
 15 |     $ cd data/
 16 |     $ bash get_coco_dataset.sh
 17 |     
 18 | ## Test
 19 | Evaluates the model on COCO test.
 20 | 
 21 |     $ python3 test.py --weights_path weights/yolov3.weights
 22 | 
 23 | | Model                   | mAP (min. 50 IoU) |
 24 | | ----------------------- |:-----------------:|
 25 | | YOLOv3 608 (paper)      | 57.9              |
 26 | | YOLOv3 608 (this impl.) | 57.3              |
 27 | | YOLOv3 416 (paper)      | 55.3              |
 28 | | YOLOv3 416 (this impl.) | 55.5              |
 29 | 
 30 | ## Inference
 31 | Uses pretrained weights to make predictions on images. Below table displays the inference times when using as inputs images scaled to 256x256. The ResNet backbone measurements are taken from the YOLOv3 paper. The Darknet-53 measurement marked shows the inference time of this implementation on my 1080ti card.
 32 | 
 33 | | Backbone                | GPU      | FPS      |
 34 | | ----------------------- |:--------:|:--------:|
 35 | | ResNet-101              | Titan X  | 53       |
 36 | | ResNet-152              | Titan X  | 37       |
 37 | | Darknet-53 (paper)      | Titan X  | 76       |
 38 | | Darknet-53 (this impl.) | 1080ti   | 74       |
 39 | 
 40 |     $ python3 detect.py --image_folder data/samples/
 41 | 
 42 | <p align="center"><img src="assets/giraffe.png" width="480"\></p>
 43 | <p align="center"><img src="assets/dog.png" width="480"\></p>
 44 | <p align="center"><img src="assets/traffic.png" width="480"\></p>
 45 | <p align="center"><img src="assets/messi.png" width="480"\></p>
 46 | 
 47 | ## Train
 48 | ```
 49 | $ train.py [-h] [--epochs EPOCHS] [--batch_size BATCH_SIZE]
 50 |                 [--gradient_accumulations GRADIENT_ACCUMULATIONS]
 51 |                 [--model_def MODEL_DEF] [--data_config DATA_CONFIG]
 52 |                 [--pretrained_weights PRETRAINED_WEIGHTS] [--n_cpu N_CPU]
 53 |                 [--img_size IMG_SIZE]
 54 |                 [--checkpoint_interval CHECKPOINT_INTERVAL]
 55 |                 [--evaluation_interval EVALUATION_INTERVAL]
 56 |                 [--compute_map COMPUTE_MAP]
 57 |                 [--multiscale_training MULTISCALE_TRAINING]
 58 | ```
 59 | 
 60 | #### Example (COCO)
 61 | To train on COCO using a Darknet-53 backend pretrained on ImageNet run: 
 62 | ```
 63 | $ python3 train.py --data_config config/coco.data  --pretrained_weights weights/darknet53.conv.74
 64 | ```
 65 | 
 66 | #### Training log
 67 | ```
 68 | ---- [Epoch 7/100, Batch 7300/14658] ----
 69 | +------------+--------------+--------------+--------------+
 70 | | Metrics    | YOLO Layer 0 | YOLO Layer 1 | YOLO Layer 2 |
 71 | +------------+--------------+--------------+--------------+
 72 | | grid_size  | 16           | 32           | 64           |
 73 | | loss       | 1.554926     | 1.446884     | 1.427585     |
 74 | | x          | 0.028157     | 0.044483     | 0.051159     |
 75 | | y          | 0.040524     | 0.035687     | 0.046307     |
 76 | | w          | 0.078980     | 0.066310     | 0.027984     |
 77 | | h          | 0.133414     | 0.094540     | 0.037121     |
 78 | | conf       | 1.234448     | 1.165665     | 1.223495     |
 79 | | cls        | 0.039402     | 0.040198     | 0.041520     |
 80 | | cls_acc    | 44.44%       | 43.59%       | 32.50%       |
 81 | | recall50   | 0.361111     | 0.384615     | 0.300000     |
 82 | | recall75   | 0.222222     | 0.282051     | 0.300000     |
 83 | | precision  | 0.520000     | 0.300000     | 0.070175     |
 84 | | conf_obj   | 0.599058     | 0.622685     | 0.651472     |
 85 | | conf_noobj | 0.003778     | 0.004039     | 0.004044     |
 86 | +------------+--------------+--------------+--------------+
 87 | Total Loss 4.429395
 88 | ---- ETA 0:35:48.821929
 89 | ```
 90 | 
 91 | #### Tensorboard
 92 | Track training progress in Tensorboard:
 93 | * Initialize training
 94 | * Run the command below
 95 | * Go to http://localhost:6006/
 96 | 
 97 | ```
 98 | $ tensorboard --logdir='logs' --port=6006
 99 | ```
100 | 
101 | ## Train on Custom Dataset
102 | 
103 | #### Custom model
104 | Run the commands below to create a custom model definition, replacing `<num-classes>` with the number of classes in your dataset.
105 | 
106 | ```
107 | $ cd config/                                # Navigate to config dir
108 | $ bash create_custom_model.sh <num-classes> # Will create custom model 'yolov3-custom.cfg'
109 | ```
110 | 
111 | #### Classes
112 | Add class names to `data/custom/classes.names`. This file should have one row per class name.
113 | 
114 | #### Image Folder
115 | Move the images of your dataset to `data/custom/images/`.
116 | 
117 | #### Annotation Folder
118 | Move your annotations to `data/custom/labels/`. The dataloader expects that the annotation file corresponding to the image `data/custom/images/train.jpg` has the path `data/custom/labels/train.txt`. Each row in the annotation file should define one bounding box, using the syntax `label_idx x_center y_center width height`. The coordinates should be scaled `[0, 1]`, and the `label_idx` should be zero-indexed and correspond to the row number of the class name in `data/custom/classes.names`.
119 | 
120 | #### Define Train and Validation Sets
121 | In `data/custom/train.txt` and `data/custom/valid.txt`, add paths to images that will be used as train and validation data respectively.
122 | 
123 | #### Train
124 | To train on the custom dataset run:
125 | 
126 | ```
127 | $ python3 train.py --model_def config/yolov3-custom.cfg --data_config config/custom.data
128 | ```
129 | 
130 | Add `--pretrained_weights weights/darknet53.conv.74` to train using a backend pretrained on ImageNet.
131 | 
132 | 
133 | ## Credit
134 | 
135 | ### YOLOv3: An Incremental Improvement
136 | _Joseph Redmon, Ali Farhadi_ <br>
137 | 
138 | **Abstract** <br>
139 | We present some updates to YOLO! We made a bunch
140 | of little design changes to make it better. We also trained
141 | this new network that’s pretty swell. It’s a little bigger than
142 | last time but more accurate. It’s still fast though, don’t
143 | worry. At 320 × 320 YOLOv3 runs in 22 ms at 28.2 mAP,
144 | as accurate as SSD but three times faster. When we look
145 | at the old .5 IOU mAP detection metric YOLOv3 is quite
146 | good. It achieves 57.9 AP50 in 51 ms on a Titan X, compared
147 | to 57.5 AP50 in 198 ms by RetinaNet, similar performance
148 | but 3.8× faster. As always, all the code is online at
149 | https://pjreddie.com/yolo/.
150 | 
151 | [[Paper]](https://pjreddie.com/media/files/papers/YOLOv3.pdf) [[Project Webpage]](https://pjreddie.com/darknet/yolo/) [[Authors' Implementation]](https://github.com/pjreddie/darknet)
152 | 
153 | ```
154 | @article{yolov3,
155 |   title={YOLOv3: An Incremental Improvement},
156 |   author={Redmon, Joseph and Farhadi, Ali},
157 |   journal = {arXiv},
158 |   year={2018}
159 | }
160 | ```
161 | 


--------------------------------------------------------------------------------
/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | from torch import Tensor
  6 | 
  7 | class _Loss(nn.Module):
  8 |     reduction: str
  9 | 
 10 |     def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None:
 11 |         super(_Loss, self).__init__()
 12 |         if size_average is not None or reduce is not None:
 13 |             self.reduction = _Reduction.legacy_get_string(size_average, reduce)
 14 |         else:
 15 |             self.reduction = reduction
 16 | 
 17 | class SmoothL1Loss(_Loss):
 18 |     """Creates a criterion that uses a squared term if the absolute
 19 |     element-wise error falls below beta and an L1 term otherwise.
 20 |     It is less sensitive to outliers than the `MSELoss` and in some cases
 21 |     prevents exploding gradients (e.g. see `Fast R-CNN` paper by Ross Girshick).
 22 |     Also known as the Huber loss:
 23 | 
 24 |     .. math::
 25 |         \text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i}
 26 | 
 27 |     where :math:`z_{i}` is given by:
 28 | 
 29 |     .. math::
 30 |         z_{i} =
 31 |         \begin{cases}
 32 |         0.5 (x_i - y_i)^2 / beta, & \text{if } |x_i - y_i| < beta \\
 33 |         |x_i - y_i| - 0.5 * beta, & \text{otherwise }
 34 |         \end{cases}
 35 | 
 36 |     :math:`x` and :math:`y` arbitrary shapes with a total of :math:`n` elements each
 37 |     the sum operation still operates over all the elements, and divides by :math:`n`.
 38 | 
 39 |     beta is an optional parameter that defaults to 1.
 40 | 
 41 |     Note: When beta is set to 0, this is equivalent to :class:`L1Loss`.
 42 |     Passing a negative value in for beta will result in an exception.
 43 | 
 44 |     The division by :math:`n` can be avoided if sets ``reduction = 'sum'``.
 45 | 
 46 |     Args:
 47 |         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
 48 |             the losses are averaged over each loss element in the batch. Note that for
 49 |             some losses, there are multiple elements per sample. If the field :attr:`size_average`
 50 |             is set to ``False``, the losses are instead summed for each minibatch. Ignored
 51 |             when reduce is ``False``. Default: ``True``
 52 |         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
 53 |             losses are averaged or summed over observations for each minibatch depending
 54 |             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
 55 |             batch element instead and ignores :attr:`size_average`. Default: ``True``
 56 |         reduction (string, optional): Specifies the reduction to apply to the output:
 57 |             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
 58 |             ``'mean'``: the sum of the output will be divided by the number of
 59 |             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
 60 |             and :attr:`reduce` are in the process of being deprecated, and in the meantime,
 61 |             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
 62 |         beta (float, optional): Specifies the threshold at which to change between L1 and L2 loss.
 63 |             This value defaults to 1.0.
 64 | 
 65 |     Shape:
 66 |         - Input: :math:`(N, *)` where :math:`*` means, any number of additional
 67 |           dimensions
 68 |         - Target: :math:`(N, *)`, same shape as the input
 69 |         - Output: scalar. If :attr:`reduction` is ``'none'``, then
 70 |           :math:`(N, *)`, same shape as the input
 71 | 
 72 |     """
 73 | 
 74 |     __constants__ = ['reduction']
 75 | 
 76 |     def __init__(self, size_average=None, reduce=None, reduction: str = 'mean', beta: float = 1.0) -> None:
 77 |         super(SmoothL1Loss, self).__init__(size_average, reduce, reduction)
 78 |         self.beta = beta
 79 | 
 80 |     def forward(self, input: Tensor, target: Tensor) -> Tensor:
 81 |         return smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)
 82 | 
 83 | 
 84 | def smooth_l1_loss(
 85 |     input: torch.Tensor, target: torch.Tensor, beta: float, reduction: str = "none"
 86 | ) -> torch.Tensor:
 87 |     """
 88 |     Smooth L1 loss defined in the Fast R-CNN paper as:
 89 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
 90 |     smoothl1(x) = |
 91 |                   | abs(x) - 0.5 * beta   otherwise,
 92 |     where x = input - target.
 93 |     Smooth L1 loss is related to Huber loss, which is defined as:
 94 |                 | 0.5 * x ** 2                  if abs(x) < beta
 95 |      huber(x) = |
 96 |                 | beta * (abs(x) - 0.5 * beta)  otherwise
 97 |     Smooth L1 loss is equal to huber(x) / beta. This leads to the following
 98 |     differences:
 99 |      - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
100 |        converges to a constant 0 loss.
101 |      - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
102 |        converges to L2 loss.
103 |      - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
104 |        slope of 1. For Huber loss, the slope of the L1 segment is beta.
105 |     Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
106 |     portion replaced with a quadratic function such that at abs(x) = beta, its
107 |     slope is 1. The quadratic segment smooths the L1 loss near x = 0.
108 |     Args:
109 |         input (Tensor): input tensor of any shape
110 |         target (Tensor): target value tensor with the same shape as input
111 |         beta (float): L1 to L2 change point.
112 |             For beta values < 1e-5, L1 loss is computed.
113 |         reduction: 'none' | 'mean' | 'sum'
114 |                  'none': No reduction will be applied to the output.
115 |                  'mean': The output will be averaged.
116 |                  'sum': The output will be summed.
117 |     Returns:
118 |         The loss with the reduction option applied.
119 |     Note:
120 |         PyTorch's builtin "Smooth L1 loss" implementation does not actually
121 |         implement Smooth L1 loss, nor does it implement Huber loss. It implements
122 |         the special case of both in which they are equal (beta=1).
123 |         See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
124 |     """
125 |     if beta < 1e-5:
126 |         # if beta == 0, then torch.where will result in nan gradients when
127 |         # the chain rule is applied due to pytorch implementation details
128 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
129 |         # zeros, rather than "no gradient"). To avoid this issue, we define
130 |         # small values of beta to be exactly l1 loss.
131 |         loss = torch.abs(input - target)
132 |     else:
133 |         n = torch.abs(input - target)
134 |         cond = n < beta
135 |         loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
136 | 
137 |     if reduction == "mean":
138 |         loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum()
139 |     elif reduction == "sum":
140 |         loss = loss.sum()
141 |     return loss
142 | 


--------------------------------------------------------------------------------
/detector/yolov3/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | from models import *
  4 | from utils.logger import *
  5 | from utils.utils import *
  6 | from utils.datasets import *
  7 | from utils.parse_config import *
  8 | from test import evaluate
  9 | 
 10 | from terminaltables import AsciiTable
 11 | 
 12 | import os
 13 | import sys
 14 | import time
 15 | import datetime
 16 | import argparse
 17 | 
 18 | import torch
 19 | from torch.utils.data import DataLoader
 20 | from torchvision import datasets
 21 | from torchvision import transforms
 22 | from torch.autograd import Variable
 23 | import torch.optim as optim
 24 | 
 25 | if __name__ == "__main__":
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument("--epochs", type=int, default=100, help="number of epochs")
 28 |     parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch")
 29 |     parser.add_argument("--gradient_accumulations", type=int, default=2, help="number of gradient accums before step")
 30 |     parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
 31 |     parser.add_argument("--data_config", type=str, default="config/coco.data", help="path to data config file")
 32 |     parser.add_argument("--pretrained_weights", type=str, help="if specified starts from checkpoint model")
 33 |     parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
 34 |     parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
 35 |     parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
 36 |     parser.add_argument("--evaluation_interval", type=int, default=1, help="interval evaluations on validation set")
 37 |     parser.add_argument("--compute_map", default=False, help="if True computes mAP every tenth batch")
 38 |     parser.add_argument("--multiscale_training", default=True, help="allow for multi-scale training")
 39 |     opt = parser.parse_args()
 40 |     print(opt)
 41 | 
 42 |     logger = Logger("logs")
 43 | 
 44 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 45 | 
 46 |     os.makedirs("output", exist_ok=True)
 47 |     os.makedirs("checkpoints", exist_ok=True)
 48 | 
 49 |     # Get data configuration
 50 |     data_config = parse_data_config(opt.data_config)
 51 |     train_path = data_config["train"]
 52 |     valid_path = data_config["valid"]
 53 |     class_names = load_classes(data_config["names"])
 54 | 
 55 |     # Initiate model
 56 |     model = Darknet(opt.model_def).to(device)
 57 |     model.apply(weights_init_normal)
 58 | 
 59 |     # If specified we start from checkpoint
 60 |     if opt.pretrained_weights:
 61 |         if opt.pretrained_weights.endswith(".pth"):
 62 |             model.load_state_dict(torch.load(opt.pretrained_weights))
 63 |         else:
 64 |             model.load_darknet_weights(opt.pretrained_weights)
 65 | 
 66 |     # Get dataloader
 67 |     dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training)
 68 |     dataloader = torch.utils.data.DataLoader(
 69 |         dataset,
 70 |         batch_size=opt.batch_size,
 71 |         shuffle=True,
 72 |         num_workers=opt.n_cpu,
 73 |         pin_memory=True,
 74 |         collate_fn=dataset.collate_fn,
 75 |     )
 76 | 
 77 |     optimizer = torch.optim.Adam(model.parameters())
 78 | 
 79 |     metrics = [
 80 |         "grid_size",
 81 |         "loss",
 82 |         "x",
 83 |         "y",
 84 |         "w",
 85 |         "h",
 86 |         "conf",
 87 |         "cls",
 88 |         "cls_acc",
 89 |         "recall50",
 90 |         "recall75",
 91 |         "precision",
 92 |         "conf_obj",
 93 |         "conf_noobj",
 94 |     ]
 95 | 
 96 |     for epoch in range(opt.epochs):
 97 |         model.train()
 98 |         start_time = time.time()
 99 |         for batch_i, (_, imgs, targets) in enumerate(dataloader):
100 |             batches_done = len(dataloader) * epoch + batch_i
101 | 
102 |             imgs = Variable(imgs.to(device))
103 |             targets = Variable(targets.to(device), requires_grad=False)
104 | 
105 |             loss, outputs = model(imgs, targets)
106 |             loss.backward()
107 | 
108 |             if batches_done % opt.gradient_accumulations:
109 |                 # Accumulates gradient before each step
110 |                 optimizer.step()
111 |                 optimizer.zero_grad()
112 | 
113 |             # ----------------
114 |             #   Log progress
115 |             # ----------------
116 | 
117 |             log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, opt.epochs, batch_i, len(dataloader))
118 | 
119 |             metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(model.yolo_layers))]]]
120 | 
121 |             # Log metrics at each YOLO layer
122 |             for i, metric in enumerate(metrics):
123 |                 formats = {m: "%.6f" for m in metrics}
124 |                 formats["grid_size"] = "%2d"
125 |                 formats["cls_acc"] = "%.2f%%"
126 |                 row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in model.yolo_layers]
127 |                 metric_table += [[metric, *row_metrics]]
128 | 
129 |                 # Tensorboard logging
130 |                 tensorboard_log = []
131 |                 for j, yolo in enumerate(model.yolo_layers):
132 |                     for name, metric in yolo.metrics.items():
133 |                         if name != "grid_size":
134 |                             tensorboard_log += [(f"{name}_{j+1}", metric)]
135 |                 tensorboard_log += [("loss", loss.item())]
136 |                 logger.list_of_scalars_summary(tensorboard_log, batches_done)
137 | 
138 |             log_str += AsciiTable(metric_table).table
139 |             log_str += f"\nTotal loss {loss.item()}"
140 | 
141 |             # Determine approximate time left for epoch
142 |             epoch_batches_left = len(dataloader) - (batch_i + 1)
143 |             time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1))
144 |             log_str += f"\n---- ETA {time_left}"
145 | 
146 |             print(log_str)
147 | 
148 |             model.seen += imgs.size(0)
149 | 
150 |         if epoch % opt.evaluation_interval == 0:
151 |             print("\n---- Evaluating Model ----")
152 |             # Evaluate the model on the validation set
153 |             precision, recall, AP, f1, ap_class = evaluate(
154 |                 model,
155 |                 path=valid_path,
156 |                 iou_thres=0.5,
157 |                 conf_thres=0.5,
158 |                 nms_thres=0.5,
159 |                 img_size=opt.img_size,
160 |                 batch_size=8,
161 |             )
162 |             evaluation_metrics = [
163 |                 ("val_precision", precision.mean()),
164 |                 ("val_recall", recall.mean()),
165 |                 ("val_mAP", AP.mean()),
166 |                 ("val_f1", f1.mean()),
167 |             ]
168 |             logger.list_of_scalars_summary(evaluation_metrics, epoch)
169 | 
170 |             # Print class APs and mAP
171 |             ap_table = [["Index", "Class name", "AP"]]
172 |             for i, c in enumerate(ap_class):
173 |                 ap_table += [[c, class_names[c], "%.5f" % AP[i]]]
174 |             print(AsciiTable(ap_table).table)
175 |             print(f"---- mAP {AP.mean()}")
176 | 
177 |         if epoch % opt.checkpoint_interval == 0:
178 |             torch.save(model.state_dict(), f"checkpoints/yolov3_ckpt_%d.pth" % epoch)
179 | 


--------------------------------------------------------------------------------
/eval_advPath_oneperson.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import argparse
  4 | import json
  5 | import numpy as np
  6 | import glob
  7 | 
  8 | # change this to your own robust detector (i.e. a model with 100% detection accuracy)
  9 | ROBUST_DETECTOR_NAME = 'DFaster_RCNN_R101_COCO'
 10 | 
 11 | def single_bbox_iou(box1, box2, x1y1x2y2=True):
 12 |     """
 13 |     Returns the IoU of two bounding boxes
 14 |     """
 15 |     if not x1y1x2y2:
 16 |         # Transform from center and width to exact coordinates
 17 |         b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
 18 |         b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
 19 |         b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
 20 |         b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
 21 |     else:
 22 |         # Get the coordinates of bounding boxes
 23 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
 24 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
 25 | 
 26 |     # get the corrdinates of the intersection rectangle
 27 |     inter_rect_x1 = max(b1_x1, b2_x1)
 28 |     inter_rect_y1 = max(b1_y1, b2_y1)
 29 |     inter_rect_x2 = min(b1_x2, b2_x2)
 30 |     inter_rect_y2 = min(b1_y2, b2_y2)
 31 | 
 32 |     # Intersection area
 33 |     inter_area = max(inter_rect_x2 - inter_rect_x1 + 1, 0.0) * max(inter_rect_y2 - inter_rect_y1 + 1, 0.0)
 34 |     # Union Area
 35 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
 36 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
 37 | 
 38 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
 39 | 
 40 |     return iou
 41 | 
 42 | #remove false alarms
 43 | def prune_detection(detection):
 44 |     person_num = len(detection)
 45 |     if person_num == 0:
 46 |         return detection
 47 | 
 48 |     if person_num == 1:
 49 |         return detection[0]
 50 | 
 51 |     areas =[(det[2] - det[0]) * (det[3] - det[1]) for det in detection]
 52 |     I = sorted(range(len(areas)), key=lambda k: areas[k], reverse=True)
 53 |     #print (areas, I)
 54 |     return detection[I[0]]
 55 | 
 56 | def is_matched(gt_bb, detections, det_thresh, match_thresh):
 57 |     for det in detections:
 58 |         if det[4] > det_thresh and single_bbox_iou(gt_bb, det[:4]) >= match_thresh:
 59 |             return True
 60 | 
 61 |     return False
 62 | 
 63 | def match_results(gt_detections, detections, det_thresh, match_thresh, skip_num, skip_info=False):
 64 |     success = 0
 65 |     total_cnt = 0
 66 |     cut_skip = 0
 67 |     detection_skip = 0
 68 |     frame_cnt = 0
 69 |     for frame_num, det in gt_detections.items():
 70 |         frame_cnt += 1
 71 |         if int(frame_num.split('_')[-1]) < skip_num: # exclude it
 72 |             cut_skip += 1
 73 |             if skip_info:
 74 |                 print('skip %, index < %d ' % (frame_num, skip_num))
 75 |             continue
 76 | 
 77 |         # det is a list of list
 78 |         pruned_det = prune_detection(det)
 79 |         person_num = len(pruned_det)
 80 |         if person_num <= 0:
 81 |             detection_skip += 1
 82 |             if skip_info:
 83 |                 print ('skip %s, person_num: %d' % (frame_num, person_num))
 84 |             continue
 85 | 
 86 |         if pruned_det and is_matched(pruned_det[:4], detections[frame_num], det_thresh, match_thresh):
 87 |             success += 1
 88 | 
 89 |         total_cnt += 1
 90 |     return success, total_cnt, cut_skip, detection_skip, frame_cnt
 91 | 
 92 | def evaluate_adv_model(data_dir, data_list, attack_model, det_thresh=0.7, match_thresh=0.1, skip_num=0, skip_info=False):
 93 |     gt_files = get_file_list(data_dir, ROBUST_DETECTOR_NAME, data_list)
 94 |     gt_detections = [ load_detection(item) for item in gt_files ]
 95 | 
 96 |     detection_files = get_file_list(data_dir, attack_model, data_list)
 97 |     detections = [load_detection(item) for item in detection_files]
 98 |     
 99 |     matching_results = [match_results(gt, detection, det_thresh, match_thresh, skip_num=skip_num, \
100 |                          skip_info=skip_info) for gt, detection in zip(gt_detections, detections)]
101 | 
102 |     return matching_results
103 | 
104 | def load_detection(filename):
105 |     with open(filename, 'r') as f:
106 |         data = json.load(f)
107 |     return data
108 | 
109 | def get_file_list(data_dir, model, data_list):
110 |     return [os.path.join(data_dir, model +'+'+ item + '.json') for item in data_list]
111 | 
112 | def get_dataset_list(data_dir, detector):
113 | #    print (adv_patches)
114 |     dataset_list = glob.glob(data_dir+'/*.json')
115 |     dataset_list = [os.path.basename(item).split('.')[0] for item in dataset_list if detector in item]
116 |     dataset_list = [item.split('+')[-1] for item in dataset_list]
117 |     #for adv_patch in adv_patches:
118 |     #    dataset_list = [item for item in dataset_list if adv_patch in item.split('_')]
119 | #    dataset_list = [item for item in dataset_list if adv_patch in item.split('_')]
120 |     return dataset_list
121 | 
122 | def arg_parser():
123 |     parser = argparse.ArgumentParser(description='PyTorch evaluation')
124 |     parser.add_argument('--data_list', help="a list of video files")
125 |     parser.add_argument('--data_dir', type=str, default='../../ICLR_detection_results', help="where are the detection results")
126 |     parser.add_argument('--adv_patch', help='adversarial patch')
127 |     parser.add_argument('--victim_model', type=str,  help='victim model')
128 |     parser.add_argument('--skip_num', type=int, default=0, help='how many frames to be skipped')
129 |     parser.add_argument('--skip_info', dest='skip_info', action='store_true', help='print skip info')
130 |     parser.add_argument('--detection_thresh', dest='detection_thresh', type=float, default=0.7, help='threshold for detection_score')
131 | 
132 |     return parser
133 | 
134 | def main():
135 |     global args
136 |     parser = arg_parser()
137 |     args = parser.parse_args()
138 |     adv_patches = args.adv_patch.split(',')
139 |     all_dataset_list = get_dataset_list(args.data_dir, args.victim_model) if args.data_list is None else args.data_list.split(',')
140 |     #print (all_dataset_list)
141 |     #dataset_list = [item for item in dataset_list if 'PCTN' not in item]
142 |     for adv_patch in sorted(adv_patches):
143 |         print('\n======== %s =================' % (adv_patch))
144 | 
145 |         #dataset_list = get_dataset_list(args.data_dir, args.victim_model, adv_patch)
146 |         dataset_list = [item for item in all_dataset_list if adv_patch in item.split('_')]
147 | 
148 |         assert dataset_list, 'no dataset found. please check the detetor name and results directory!'
149 | 
150 |         # determine the model to be evaluated for the data list
151 |         MATCH_THRESH = 0.1
152 |         results = evaluate_adv_model(args.data_dir, dataset_list, args.victim_model, det_thresh=args.detection_thresh, match_thresh=MATCH_THRESH, \
153 |               skip_num=args.skip_num, skip_info=args.skip_info)
154 |         results = np.array(results)
155 |         tot_results = np.sum(results, axis=0)
156 |         for dataset, r in sorted(zip(dataset_list, results), key=lambda t: t[0]):
157 |            print ('%10s ASR %4.2f Detected: %3d Processed: %3d Cut skip: %3d Detection skip: %3d Total: %3d' % \
158 |                  (dataset, (1.0 - r[0]/r[1]), r[0], r[1], r[2], r[3], r[4]))
159 |         print ('----------------------------------------------')
160 |         print ('%10s ASR %4.2f Detected: %3d Processed: %3d Cut skip: %3d Detection skip: %3d Total: %3d'  % \
161 |                 ('All', (1.0 - tot_results[0]/tot_results[1]), tot_results[0], tot_results[1], tot_results[2], tot_results[3], tot_results[4]))
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/detector/faster_rcnn_detector.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .object_detector import ObjectDetector
  4 | from .faster_rcnn.lib.model.utils.config import cfg, cfg_from_file
  5 | from .faster_rcnn.lib.model.faster_rcnn.vgg16 import vgg16
  6 | from .faster_rcnn.lib.model.faster_rcnn.resnet import resnet
  7 | from .faster_rcnn.lib.model.roi_layers import nms
  8 | from .faster_rcnn.lib.model.rpn.bbox_transform import bbox_transform_inv
  9 | from .faster_rcnn.lib.model.rpn.bbox_transform import clip_boxes
 10 | import numpy as np
 11 | 
 12 | class Faster_RCNN_Detector(ObjectDetector):
 13 |     def __init__(self, model_name, cfg_path, model_path, class_names,  input_size=(-1, -1), test_size=(-1, -1), target_object_id=-1):
 14 |         # load SSD
 15 |         super().__init__(model_name, cfg_path, model_path, class_names, input_size, test_size, target_object_id)
 16 | 
 17 |         self.mean = cfg.PIXEL_MEANS[0][0].tolist()
 18 | #        self.test_size = cfg.TEST.SCALES
 19 |         self.cfg = cfg
 20 | 
 21 |     def load_model(self, cfg_path, model_path, class_names):
 22 |         cfg_from_file(cfg_path)
 23 |         # fixed
 24 |         cfg.POOLING_MODE = 'align'
 25 |         cfg.class_agnostic = False
 26 | 
 27 |         obj_classes = np.asarray(class_names)
 28 |         # initilize the network here.
 29 |         if cfg.EXP_DIR == 'vgg16':
 30 |             fasterRCNN = vgg16(obj_classes, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS)
 31 |         elif cfg.EXP_DIR == 'res50':
 32 |             fasterRCNN = resnet(obj_classes, 50, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS)
 33 |         elif cfg.EXP_DIR == 'res101':
 34 |             fasterRCNN = resnet(obj_classes, 101, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS)
 35 |         elif cfg.EXP_DIR == 'res152':
 36 |             fasterRCNN = resnet(obj_classes, 152, pretrained=False, class_agnostic=cfg.class_agnostic, anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS)
 37 |         else:
 38 |             raise NameError("network %s is not defined" % (cfg.EXP_DIR) )
 39 | 
 40 |         fasterRCNN.create_architecture()
 41 | 
 42 |         checkpoint = torch.load(model_path, map_location=(lambda storage, loc: storage))
 43 |         fasterRCNN.load_state_dict(checkpoint['model'])
 44 | 
 45 |         fasterRCNN.eval()
 46 | 
 47 |         return fasterRCNN
 48 | 
 49 |     def detect(self, images, conf_thresh=0.2, nms_thresh=0.0):
 50 |         
 51 |         input_imgs, im_scale = self.preprocess(images)
 52 | 
 53 |         batch_size = input_imgs.size(0)
 54 |         with torch.no_grad():
 55 |             im_info = np.array([[input_imgs.shape[2], input_imgs.shape[3], im_scale[0]]], dtype=np.float32)
 56 |             im_info = np.repeat(im_info, batch_size, axis=0)
 57 |             im_info = torch.from_numpy(im_info).cuda(device=images.device)
 58 |             num_boxes = torch.zeros(batch_size).cuda(device=images.device)
 59 |             gt_boxes = torch.zeros(batch_size, 1, 5).cuda(device=images.device)
 60 | 
 61 |         rois, cls_prob, bbox_pred, \
 62 |         rpn_loss_cls, rpn_loss_box, \
 63 |         RCNN_loss_cls, RCNN_loss_bbox, \
 64 |         rois_label = self.model(input_imgs, im_info, gt_boxes, num_boxes)
 65 | 
 66 |         scores = cls_prob
 67 |         boxes = rois[:, :, 1:5]
 68 |         results = self.post_process(im_info, bbox_pred, scores, boxes, im_scale, conf_thresh, nms_thresh)
 69 |         return results
 70 | 
 71 |     def preprocess(self, images):
 72 |         batch_size, _, h, w = images.shape
 73 |         im_size_min = min(h,w)
 74 |         im_size_max = max(h,w)
 75 |         im_scale = float(self.test_size[0]) / im_size_min
 76 |         if np.round(im_scale * im_size_max) > self.cfg.TEST.MAX_SIZE:
 77 |             im_scale = float(self.cfg.TEST.MAX_SIZE) / float(im_size_max)
 78 | 
 79 |         # scale the image
 80 |         test_size = (round(h*im_scale), round(w*im_scale))
 81 |         scaled_imgs = F.interpolate(images, size=test_size,  mode='bilinear', align_corners=False)
 82 |         scaled_imgs *= 255.0
 83 |  
 84 |         '''
 85 |         import torchvision.transforms as transforms
 86 |         from utils.utils import visualize_detections
 87 |         import os
 88 |         for i in range(scaled_imgs.shape[0]):
 89 |                train_img = transforms.ToPILImage()(images[i].detach().cpu())
 90 |                train_img.save(os.path.join('tmp', '%d.jpg' % (int(100*np.random.rand()))))
 91 |         '''
 92 |         # normalize the image
 93 |         mean = torch.tensor(self.mean).view(1, len(self.mean), 1, 1).cuda(device=images.device)
 94 |         input_imgs = scaled_imgs - mean
 95 | 
 96 |         return input_imgs, (im_scale, im_scale)
 97 | 
 98 |     def do_nms(self, scores, pred_boxes, conf_thresh, nms_thresh):
 99 |         results = list()
100 |         for j in range(1, len(self.class_names)):
101 |             inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1)
102 |             #print (inds)
103 |             # if there is det
104 |             if inds.numel() > 0:
105 |                 cls_scores = scores[:, j][inds]
106 |                 _, order = torch.sort(cls_scores, 0, True)
107 |                 if self.cfg.class_agnostic:
108 |                     cls_boxes = pred_boxes[inds, :]
109 |                 else:
110 |                     cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
111 | 
112 |                 cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
113 |                 # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
114 |                 cls_dets = cls_dets[order]
115 |                 # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
116 |                 keep = nms(cls_boxes[order, :], cls_scores[order], nms_thresh)
117 |                 cls_dets = cls_dets[keep.view(-1).long()]
118 |                 label_ids = j * torch.ones(cls_dets.size(0), 1).cuda(device=cls_dets.device)
119 |                 #print (cls_dets.shape, label_ids.shape)
120 |                 results.append(torch.cat((cls_dets, label_ids), 1))
121 |         #print (results) 
122 |         return torch.cat(results, dim = 0) if len(results) > 0 else [None]
123 | 
124 |     def post_process(self, im_info, bbox_pred, scores, boxes, im_scale, conf_thresh, nms_thresh):
125 |         batch_size = bbox_pred.size(0)
126 |         if self.cfg.TEST.BBOX_REG:
127 |             # Apply bounding-box regression deltas
128 |             box_deltas = bbox_pred
129 |             if self.cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
130 |                 # Optionally normalize targets by a precomputed mean and stdev
131 |                 if self.cfg.class_agnostic:
132 |                     box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
133 |                                  + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
134 |                     box_deltas = box_deltas.view(batch_size, -1, 4)
135 |                 else:
136 |                     box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
137 |                                      + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
138 |                     box_deltas = box_deltas.view(batch_size, -1, 4 * len(self.class_names))
139 | 
140 |             pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
141 |             pred_boxes = clip_boxes(pred_boxes, im_info, batch_size)
142 |         else:
143 |             # Simply repeat the boxes, once for each class
144 |             pred_boxes = np.tile(boxes, (1, scores.shape[1]))
145 | 
146 |         pred_boxes /= im_scale[0]
147 |     #    scores = scores.squeeze()
148 |     #    pred_boxes = pred_boxes.squeeze()
149 |     
150 |         results = [self.do_nms(scores[k], pred_boxes[k], conf_thresh, nms_thresh) for k in range(batch_size)]
151 | 
152 |         return results
153 | 


--------------------------------------------------------------------------------
/opts.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from nets.LightingNet import LIGHTINGNET_REGISTRY
  4 | 
  5 | def arg_parser():
  6 |     parser = argparse.ArgumentParser(description='PyTorch Action recognition Training')
  7 |     parser.add_argument('--config', type=str, default='configs/config.yaml', help="training configuration")
  8 |     parser.add_argument('--seed', type=int, help='manual seed')
  9 |     parser.add_argument('--dataset', help='path to dataset file list')
 10 |     parser.add_argument('--datadir', metavar='DIR', help='path to dataset file list')
 11 |     parser.add_argument('--logdir', dest='logdir', help='where to save the model')
 12 |     parser.add_argument('--train_list_file', type=str, help='training file')
 13 |     parser.add_argument('--val_list_file', type=str, help='validation file')
 14 |     parser.add_argument('--no_flip', dest='no_flip', action='store_true', help='do not flip data')
 15 |     parser.add_argument('--template_resize', dest='template_resize', action='store_true', help='resize template')
 16 |     parser.add_argument('--mask_loss', dest='mask_loss', action='store_true', help='use L1 masked loss')
 17 |     
 18 |     parser.add_argument('--loc_backbone', dest='loc_backbone', choices=['resnet18', 'resnet50', 'resnet101'],  help='which backbone to use')
 19 | 
 20 |     parser.add_argument('--resume', type=str, default='', metavar='PATH', help='path to the model for resuming')
 21 |     parser.add_argument('--auto_resume', action='store_true', help='use the last checkpoint in the logdir for resume')
 22 |     parser.add_argument('--pretrained', dest='pretrained', type=str, metavar='PATH', help='use pre-trained model')
 23 |     parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='use this flag to validate without training')
 24 |     parser.add_argument('--batch_size', type=int, metavar='N', help='mini-batch size')
 25 |     parser.add_argument('--epochs', default=1000, type=int, metavar='N', help='number of total epochs to run')
 26 |     parser.add_argument('--disable_cudnn_benchmark', dest='cudnn_benchmark', action='store_false',
 27 |                         help='Disable cudnn to search the best mode (avoid OOM)')
 28 |     parser.add_argument('--optimizer', type=str, help='optimizer (Default: Adam)', choices=['Adam', 'SGD'], default='Adam')
 29 |     parser.add_argument('--scheduler', type=str, help='Learning Rate scheduler (Default: ReduceLROnPlateau)', choices=['ReduceLROnPlateau', 'Cosine'], default='ReduceLROnPlateau')
 30 |     parser.add_argument('--use_val_loss', action='store_true', help='When using ReduceLROnPlateau, use val loss to change learning rate')
 31 |     parser.add_argument('--name_suffix', type=str, help='suffix of model name, used for creating log folder', default='')
 32 |     parser.add_argument('--gpu', dest='gpu_ids', help='comma separated list of GPU(s) to use.')
 33 |     parser.add_argument('--compute_dsr', action='store_true', help='Compute detection successful rate in validation, it will be automatically turned on if evaluate flag is existed.')
 34 |     parser.add_argument('--obj_loss_type', type=str, default='max', choices=['max', 'avg', 'ce'], help='different way to compute obj loss')
 35 |     parser.add_argument('--show_dsr_hist', action='store_true', help='Show the histogram of detection acc w.r.t. the height of person.')
 36 | 
 37 |     # data-related
 38 |     parser.add_argument('-j', '--num_workers', type=int, metavar='N',
 39 |                         help='number of data loading workers (default: 4)')
 40 |     parser.add_argument('--lr', type=float, metavar='N', help='learning rate')
 41 | 
 42 |     parser.add_argument('--use_PBM', dest='use_PBM', action='store_true', help='use patch blurring')
 43 | 
 44 |     parser.add_argument('--STN', type=str, help='affine or tps')
 45 |     parser.add_argument('--learnableSTN', dest='learnableSTN', action='store_true', help='learn STN')
 46 |     parser.add_argument('--TPS_localizer', dest='TPS_localizer', type=str, help='tps localizer')
 47 | 
 48 |     parser.add_argument('--use_PCT', dest='use_PCT', action='store_true', help='use lighting color transformation')
 49 |     parser.add_argument('--PrinterCT', type=str, help='PCT or LinearPCT')
 50 | 
 51 |     parser.add_argument('--use_LCT', dest='use_LCT', action='store_true', help='use lighting color transformation')
 52 |     parser.add_argument('--LightingCT', type=str, help='cc (color constancy) or gen (image generator)', choices=LIGHTINGNET_REGISTRY._obj_map.keys())
 53 |     parser.add_argument('--lct_backbone', type=str, help='set the backbone of lightning net', default=None)
 54 | 
 55 |     parser.add_argument('--target_patch_path', dest='target_patch_path', type=str, help='target patch to be transformed')
 56 | 
 57 |     parser.add_argument('--patch_transformer_path', dest='patch_transformer_path', type=str, help='stn model')
 58 | 
 59 |     parser.add_argument('--tv_loss_weight', type=float, metavar='N', help='tv_loss_weight range[0,10]')
 60 | 
 61 |     #parser.add_argument('--use_augmentation', dest='use_augmentation', action='store_true', help='use augmentation')
 62 |     parser.add_argument('--use_ohem', dest='use_ohem', action='store_true', help='use ohem')
 63 |     parser.add_argument('--ohem_ratio', type=float, metavar='N', help='ohem ratio [0.1-1.0]')
 64 |     parser.add_argument('--use_EOT', dest='use_EOT', action='store_true', help='use augmentation')
 65 | 
 66 | #    parser.add_argument('--MaxProbExtractor_loss', dest='MaxProbExtractor_loss', type=str, help='type of max prob extractor')
 67 |     
 68 |     parser.add_argument('--visualize', dest='visualize', action='store_true', help='store adversarial images')
 69 |     parser.add_argument('--test_dir', dest='test_dir',  type=str, help='test directory with images')
 70 |     parser.add_argument('--detection_output_dir', dest='detection_output_dir',  type=str, help='output directory')
 71 |     
 72 |     parser.add_argument('--detector_impl', dest='detector_impl', type=str, help='implementation')
 73 |     parser.add_argument('--detector_name', dest='detector_name', type=str, help='detector name')
 74 |     parser.add_argument('--object_dataset', dest='object_dataset', type=str, help='object dataset: COCO or PASCAL')
 75 | 
 76 |     parser.add_argument('--collaborative_learning', '--CL',  action='store_true', help='collaborative learning')
 77 |     parser.add_argument('--CL_pretrained', '--CLPretrain',  action='store_true', help='use pretrained models collaborative learning')
 78 |     parser.add_argument('--collaborative_weights', '--CW', action='store_true', help='using learnable weights in collaborative learning ')
 79 |     parser.add_argument('--kd_norm', type=float, metavar='N', help='margin loss norm')
 80 |     parser.add_argument('--kd_type', type=str, metavar='N', help='loss type: margin (proposed) | mutual | one')
 81 | 
 82 |     # for distributed learning
 83 |     parser.add_argument('--sync-bn', action='store_true',
 84 |                         help='sync BN across GPUs')
 85 |     parser.add_argument('--world-size', default=1, type=int,
 86 |                         help='number of nodes for distributed training')
 87 |     parser.add_argument('--rank', default=0, type=int,
 88 |                         help='node rank for distributed training')
 89 |     parser.add_argument('--hostfile', default='', type=str,
 90 |                         help='hostfile distributed learning')
 91 |     parser.add_argument('--dist-url', default='tcp://127.0.0.1:23456', type=str,
 92 |                         help='url used to set up distributed training')
 93 |     parser.add_argument('--dist-backend', default='nccl', type=str,
 94 |                         help='distributed backend')
 95 |     parser.add_argument('--multiprocessing-distributed', '--ddp', action='store_true',
 96 |                         help='Use multi-processing distributed training to launch '
 97 |                              'N processes per node, which has N GPUs. This is the '
 98 |                              'fastest way to use PyTorch for either single node or '
 99 |                              'multi node data parallel training')
100 | 
101 |     return parser
102 | 
103 | def merge_args(args, config):
104 |     for key, value in vars(args).items():
105 |         if value is not None:
106 |             config[key] = value
107 | 
108 |     return config
109 | 


--------------------------------------------------------------------------------
/nets/LightingNet/cc_f4.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch import nn
  4 | from nets.backbone.backbone_config import get_backbone, get_last_conv_dim
  5 | 
  6 | from . import LIGHTINGNET_REGISTRY
  7 | 
  8 | class FCN32s(nn.Module):
  9 |     def __init__(self, model_name, n_class):
 10 |         super().__init__()
 11 |         self.n_class = n_class
 12 |         self.pretrained_net = get_backbone(model_name)
 13 |         last_dim = get_last_conv_dim(model_name)
 14 | 
 15 |         self.relu    = nn.ReLU(inplace=True)
 16 |         self.deconv1 = nn.ConvTranspose2d(last_dim, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 17 |         self.bn1     = nn.BatchNorm2d(512)
 18 |         self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 19 |         self.bn2     = nn.BatchNorm2d(256)
 20 |         self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 21 |         self.bn3     = nn.BatchNorm2d(128)
 22 |         self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 23 |         self.bn4     = nn.BatchNorm2d(64)
 24 |         self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 25 |         self.bn5     = nn.BatchNorm2d(32)
 26 |         self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
 27 | 
 28 |     def forward(self, x):
 29 |         output = self.pretrained_net(x) # size = (N, 512, x.H/32, x.W/32)
 30 | 
 31 |         score = self.bn1(self.relu(self.deconv1(output)))     # size=(N, 512, x.H/16, x.W/16)
 32 |         score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
 33 |         score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
 34 |         score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
 35 |         score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
 36 |         score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)
 37 | 
 38 |         return score  # size=(N, n_class, x.H/1, x.W/1)
 39 | 
 40 | # class CC_FCN4(nn.Module):
 41 | #     def __init__(self, model_name):
 42 | #         super().__init__()
 43 | #         self.fcn = FCN32s(model_name, 4)
 44 | #         self.relu = nn.ReLU(inplace=True)
 45 | #
 46 | #     def forward(self, x):
 47 | #         score = self.fcn(x)
 48 | # #        score = self.relu(score)
 49 | #         rgb = self.relu(score[:,:3,:,:])
 50 | #         #rgb = F.normalize(rgb, p=2, dim=1)
 51 | #         _, _, h, w = score.shape
 52 | #         confidence = score[:,3:4,:,:].view(-1, h*w)
 53 | #         confidence = F.softmax(confidence, dim=1)
 54 | #         rgb = rgb * confidence.view(-1, 1, h, w)
 55 | #         # average pool
 56 | # #        rgb = F.normalize(rgb, p=2, dim=1)
 57 | #         return rgb
 58 | 
 59 | class CC_Alex_FCN4(nn.Module):
 60 |     def __init__(self, config=None):
 61 |         super().__init__()
 62 |         FC1_SIZE = 64
 63 |         FC1_KERNEL_SIZE = 6
 64 |         self.backbone = get_backbone('alexnet')
 65 |         last_conv_dim = get_last_conv_dim('alexnet')
 66 |         self.fc1 = nn.Conv2d(last_conv_dim, FC1_SIZE, kernel_size=FC1_KERNEL_SIZE, padding=3, bias=False)
 67 |         self.relu = nn.ReLU(inplace=True)
 68 |         self.dropout = nn.Dropout(0.5)
 69 |         self.fc2 = nn.Conv2d(FC1_SIZE, 4,  kernel_size=1, stride=1, bias=True)
 70 |         #self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, padding=3, bias=False)
 71 |         self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, bias=True)
 72 | 
 73 |         '''
 74 |         for m in self.modules():
 75 |             if isinstance(m, nn.Conv2d):
 76 |                 m.weight.data.zero_()
 77 |                 if m.bias is not None:
 78 |                     m.bias.data.zero_()
 79 |             if isinstance(m, nn.ConvTranspose2d):
 80 |                 assert m.kernel_size[0] == m.kernel_size[1]
 81 |                 initial_weight = get_upsampling_weight(
 82 |                     m.in_channels, m.out_channels, m.kernel_size[0])
 83 |                 m.weight.data.copy_(initial_weight)
 84 |         '''
 85 | 
 86 |     # transform the template
 87 |     def forward(self, x):
 88 |        return self.forward_template(x)
 89 | 
 90 |     # the normalized output is NOT required as we need to learn the lighting condition
 91 |     # changes in the environment
 92 |     def forward_template(self, x):
 93 |         y = self.backbone(x)
 94 |         y = self.fc1(y)
 95 |         y = self.relu(y)
 96 |         y = self.dropout(y)
 97 |         y = self.fc2(y)
 98 |         y = self.relu(y)
 99 |         _, _, h, w = y.shape
100 | 
101 |         rgb = y[:, :3, :, :]
102 |         rgb = F.normalize(rgb, p=2, dim=1)
103 |         confidence = y[:,3:4,:,:].view(-1, h*w)
104 |         confidence = F.softmax(confidence, dim=1)
105 |         confidence = confidence.view(-1, 1, h, w)
106 | #        rgb = F.adaptive_avg_pool2d(rgb, (1, 1))
107 |  #       rgb = F.normalize(rgb, p=2, dim=1)
108 |         rgb *= confidence
109 |         rgb = self.relu(self.fc_pool(rgb))
110 |         print (rgb)
111 |         #rgb = F.normalize(rgb, p=2, dim=1)
112 |         rgb = F.interpolate(rgb, x.size()[2:], mode='bilinear', align_corners=False)
113 |         return rgb #, confidence
114 | 
115 |     def generate(self, src_img, frame_img):
116 |         rgb = self.forward_template(frame_img)
117 |         return src_img * rgb
118 | 
119 | 
120 | @LIGHTINGNET_REGISTRY.register()
121 | class CC_FCN4(nn.Module):
122 |     def __init__(self, config=None):
123 |         super().__init__()
124 |         FC1_OUTPUT_SIZE = 64
125 |         FC1_KERNEL_SIZE = 6
126 |         FC2_OUTPUT_SIZE = 4
127 |         POOL_SIZE = 8
128 |         backbone_name =config['lct_backbone']
129 |         self.backbone = get_backbone(backbone_name)
130 |         last_conv_dim = get_last_conv_dim(backbone_name)
131 | 
132 |         if backbone_name == 'resnet18':
133 |             POOL_SIZE = 9
134 | 
135 |         self.fc1 = nn.Conv2d(last_conv_dim, FC1_OUTPUT_SIZE, kernel_size=FC1_KERNEL_SIZE, padding=3, bias=False)
136 |         self.relu = nn.ReLU(inplace=True)
137 |         self.dropout = nn.Dropout(0.5)
138 |         self.fc2 = nn.Conv2d(FC1_OUTPUT_SIZE, FC2_OUTPUT_SIZE,  kernel_size=1, stride=1, bias=True)
139 |         #self.fc_pool = nn.Conv2d(3, 3, kernel_size=8, padding=3, bias=False)
140 |         self.fc_pool = nn.Conv2d(3, 3, kernel_size=POOL_SIZE, bias=True)
141 | 
142 |         '''
143 |         for m in self.modules():
144 |             if isinstance(m, nn.Conv2d):
145 |                 m.weight.data.zero_()
146 |                 if m.bias is not None:
147 |                     m.bias.data.zero_()
148 |             if isinstance(m, nn.ConvTranspose2d):
149 |                 assert m.kernel_size[0] == m.kernel_size[1]
150 |                 initial_weight = get_upsampling_weight(
151 |                     m.in_channels, m.out_channels, m.kernel_size[0])
152 |                 m.weight.data.copy_(initial_weight)
153 |         '''
154 | 
155 |     # transform the template
156 |     def forward(self, x):
157 |        return self.forward_template(x)
158 | 
159 |     # the normalized output is NOT required as we need to learn the lighting condition
160 |     # changes in the environment
161 |     def forward_template(self, x):
162 |         y = self.backbone(x)
163 |         y = self.fc1(y)
164 |         y = self.relu(y)
165 |         y = self.dropout(y)
166 |         y = self.fc2(y)
167 |         y = self.relu(y)
168 |         _, _, h, w = y.shape
169 | 
170 |         rgb = y[:, :3, :, :]
171 |         rgb = F.normalize(rgb, p=2, dim=1)
172 |         confidence = y[:,3:4,:,:].view(-1, h*w)
173 |         confidence = F.softmax(confidence, dim=1)
174 |         confidence = confidence.view(-1, 1, h, w)
175 | #        rgb = F.adaptive_avg_pool2d(rgb, (1, 1))
176 |  #       rgb = F.normalize(rgb, p=2, dim=1)
177 |         #print ('-----', x.shape, y.shape, confidence.shape, rgb.shape)
178 |         rgb *= confidence
179 |         rgb = self.relu(self.fc_pool(rgb))
180 |         #rgb = F.normalize(rgb, p=2, dim=1)
181 |         #print (rgb)
182 | #        rgb = F.interpolate(rgb, x.size()[2:], mode='bilinear', align_corners=False)
183 |         return rgb #, confidence
184 | 
185 |     def generate(self, src_img, frame_img):
186 |         rgb = self.forward_template(frame_img)
187 |         return src_img * rgb
188 | 


--------------------------------------------------------------------------------
/nets/AdvPatch/hybrid_advPatch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torchvision import transforms
  4 | from .advPatch import AdvPatch
  5 | from PIL import Image
  6 | from .advPatch_util import generate_patch, generate_border_mask
  7 | import os
  8 | from utils.gaussian_blur import gaussian_blur
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | def get_gaussian_kernel(kernel_size=3, sigma=2, channels=3):
 13 |     # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
 14 |     '''
 15 |     x_coord = torch.arange(kernel_size)
 16 |     x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
 17 |     y_grid = x_grid.t()
 18 |     xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()
 19 | 
 20 |     mean = (kernel_size - 1)/2.
 21 |     variance = sigma**2.
 22 | 
 23 |     # Calculate the 2-dimensional gaussian kernel which is
 24 |     # the product of two gaussian distributions for two different
 25 |     # variables (in this case called x and y)
 26 |     gaussian_kernel = (1./(2.*math.pi*variance)) *\
 27 |                       torch.exp(
 28 |                           -torch.sum((xy_grid - mean)**2., dim=-1) /\
 29 |                           (2*variance)
 30 |                       )
 31 | 
 32 |     # Make sure sum of values in gaussian kernel equals 1.
 33 |     gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)
 34 | 
 35 |     # Reshape to 2d depthwise convolutional weight
 36 |     gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size)
 37 |     gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1)
 38 |     '''
 39 | 
 40 |     filter = cv2.getGaussianKernel(kernel_size, sigma=sigma)
 41 |     gaussian_kernel = np.dot(filter, filter.T)
 42 |     gaussian_kernel = torch.from_numpy(gaussian_kernel).float()
 43 |     gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1)
 44 | 
 45 |     gaussian_filter = nn.Conv2d(in_channels=channels, out_channels=channels,
 46 |                                 kernel_size=kernel_size, groups=channels, bias=False, padding=kernel_size // 2)
 47 |     gaussian_filter.weight.data = gaussian_kernel
 48 |     gaussian_filter.weight.requires_grad = False
 49 | 
 50 |     return gaussian_filter
 51 | 
 52 | 
 53 | class PatchBlurringModule(nn.Module):
 54 |     def __init__(self, kernel_size=3):
 55 |         super(PatchBlurringModule, self).__init__()
 56 |         self.kernel_size = kernel_size
 57 |         self.weights = nn.Parameter(torch.ones(3, 1, self.kernel_size, self.kernel_size) / (self.kernel_size * self.kernel_size))
 58 | 
 59 |     def forward(self, x):
 60 | 
 61 |         weights = torch.sigmoid(self.weights)
 62 |         normalized_w = torch.cat([item/item.sum() for item in weights])
 63 |         normalized_w.unsqueeze_(1)
 64 |         #print (normalized_w)
 65 |         return torch.conv2d(x, normalized_w, bias=None, padding=self.kernel_size//2, groups=3) 
 66 | 
 67 | class HybridAdvPatch(nn.Module):
 68 |     def __init__(self, config):
 69 |         super(HybridAdvPatch, self).__init__()
 70 |         self.adv_patch_size = tuple(config['adv_patch_size'])
 71 |         self.apply_border_mask = config['apply_border_mask']
 72 |         print(' ===== AdvPatch size: (%d %d %d) =======' % (self.adv_patch_size))
 73 | 
 74 |         if self.apply_border_mask:
 75 |             self.border_value = config['border_value']
 76 |             border_size = int(self.adv_patch_size[0] * config['border_mask_ratio'] + 0.5)
 77 |             print(' ===== Border mask size: %d Value: %d =======' % (border_size, self.border_value))
 78 |             self.border_mask = nn.Parameter(generate_border_mask(self.adv_patch_size, border_size))
 79 | 
 80 |         #self.adv_patch = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 81 |         self.adv_patch_near = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 82 |         self.adv_patch_far = nn.Parameter(generate_patch("random", size=self.adv_patch_size[:2]))
 83 | 
 84 |     #    self.high_filter_size = nn.Parameter(torch.tensor(5.0))
 85 |     #    self.low_filter_size =  nn.Parameter(torch.tensor(5.0))
 86 |     #    self.high_filter_sigma = nn.Parameter(torch.tensor(0.5))
 87 |     #    self.low_filter_sigma =  nn.Parameter(torch.tensor(0.5))
 88 | 
 89 |  #       self.lfilter = get_gaussian_kernel(kernel_size=5, sigma=2, channels=3)
 90 |  #       self.hfilter = get_gaussian_kernel(kernel_size=5, sigma=2, channels=3)
 91 |         #self.lfilter = PatchBlurringModule(kernel_size=5)
 92 |         #self.hfilter = PatchBlurringModule(kernel_size=5)
 93 |         self.blending = nn.Parameter(torch.ones(self.adv_patch_size[0], self.adv_patch_size[1]) * 0.5)
 94 | 
 95 |         self.collaborative_learning = not config['CL_pretrained']
 96 |         self.collaborative_weight = None
 97 | 
 98 |     @property
 99 |     def patch_size(self):
100 |         return self.adv_patch_size
101 | 
102 |     @property
103 |     def border_size(self):
104 |         return self.border_size if self.apply_border_mask else 0
105 | 
106 |     def learnable(self):
107 |         #return [self.adv_patch_near, self.adv_patch_far, self.adv_patch]
108 |         #return [self.adv_patch_near, self.adv_patch_far] + list(self.lfilter.parameters()) + \
109 |         #    list(self.hfilter.parameters())
110 |         return [self.adv_patch_near, self.adv_patch_far, self.blending]
111 | 
112 |     def clip(self):
113 |         self.adv_patch.data.clamp_(0, 1)
114 |         self.adv_patch_near.data.clamp_(0, 1)
115 |         self.adv_patch_far.data.clamp_(0, 1)
116 | #        self.high_filter_size.data.clamp_(3.0, 7.0)
117 | #        self.low_filter_size.data.clamp_(3.0, 7.0)
118 | #        self.high_filter_sigma.data.clamp_(0.3, 0.8)
119 | #        self.low_filter_sigma.data.clamp_(0.3, 0.8)
120 |         #self.high_filter_size = torch.round(self.high_filter_size)
121 |         #self.low_filter_size = torch.round(self.low_filter_size)
122 |         #print (self.low_filter_size, self.high_filter_size)
123 | 
124 |     def forward(self):
125 |         '''
126 |         lf_advT_patch = self.lfilter(self.adv_patch_far.unsqueeze(0))
127 |         lf_advT_patch = lf_advT_patch.squeeze(0)
128 |         hf_advT_patch = self.hfilter(self.adv_patch_near.unsqueeze(0))
129 |         hf_advT_patch = hf_advT_patch.squeeze(0)
130 |         hf_advT_patch = self.adv_patch_near - hf_advT_patch
131 | 
132 |         advT_patch = lf_advT_patch + hf_advT_patch
133 |         advT_patch.data.clamp_(0,1)
134 |         '''
135 |         blending = torch.sigmoid(self.blending)
136 |         self.adv_patch = self.adv_patch_far * blending + self.adv_patch_near * ( 1.0 - blending)
137 | 
138 |         if self.training:
139 |             return self.adv_patch, self.adv_patch_near, self.adv_patch_far
140 | 
141 |         return self.adv_patch
142 | 
143 |     def save_patch(self, patch_path):
144 |         adv_patch = self.adv_patch.detach().cpu()
145 |         im = transforms.ToPILImage('RGB')(adv_patch)
146 |         im.save(patch_path)
147 | 
148 |         base_path, adv_file = os.path.split(patch_path)
149 |         base_file, ext = adv_file.split('.')
150 | 
151 |         adv_patch_near = self.adv_patch_near.detach().cpu()
152 |         im_near = transforms.ToPILImage('RGB')(adv_patch_near)
153 |         im_near.save(os.path.join(base_path, base_file + '_near.' + ext))
154 | 
155 |         adv_patch_far = self.adv_patch_far.detach().cpu()
156 |         im_far = transforms.ToPILImage('RGB')(adv_patch_far)
157 |         im_far.save(os.path.join(base_path, base_file + '_far.' + ext))
158 | 
159 |     def _load_patch_image(self, patch_path):
160 |         patch_img = Image.open(patch_path).convert('RGB')
161 |         w, h = patch_img.size
162 |         # first dim is height
163 |         adv_h, adv_w = self.adv_patch_size[:2]
164 |         if w !=  adv_w or h != adv_h:
165 |             patch_img = transforms.Resize((adv_h, adv_w), Image.BILINEAR)(patch_img)
166 |         return patch_img
167 | 
168 |     def load_patch(self, patch_path):
169 |         patch_img = self._load_patch_image(patch_path)
170 |         self.adv_patch = torch.nn.Parameter(transforms.ToTensor()(patch_img))
171 | 
172 |         base_path, adv_file = os.path.split(patch_path)
173 |         base_file, ext = adv_file.split('.')
174 |         adv_near_file = os.path.join(base_path, base_file+'_near.'+ext)
175 |         if os.path.isfile(adv_near_file):
176 |             near_patch_img = self._load_patch_image(adv_near_file)
177 |             self.adv_patch_near = torch.nn.Parameter(transforms.ToTensor()(near_patch_img))
178 | 
179 |         adv_far_file = os.path.join(base_path, base_file+'_far.'+ext)
180 |         if os.path.isfile(adv_far_file):
181 |             far_patch_img = self._load_patch_image(adv_far_file)
182 |             self.adv_patch_far = torch.nn.Parameter(transforms.ToTensor()(far_patch_img))
183 | 
184 | def create_hybrid_advPatch_model(config):
185 |     return HybridAdvPatch(config)
186 | 


--------------------------------------------------------------------------------