├── .vscode └── settings.json ├── ReadMe.md ├── TemporalTranslation.yml ├── __pycache__ ├── attack_methods.cpython-37.pyc ├── base_attacks.cpython-37.pyc ├── datasets.cpython-37.pyc ├── scale_invariant.cpython-37.pyc ├── utils.cpython-37.pyc └── video_attacks.cpython-37.pyc ├── attack_kinetics.py ├── attack_methods.py ├── attack_ucf101.py ├── config ├── i3d_nl5_resnet101_v1_kinetics400.yaml ├── i3d_nl5_resnet50_v1_kinetics400.yaml ├── slowfast_8x8_resnet101_kinetics400.yaml ├── slowfast_8x8_resnet50_kinetics400.yaml ├── tpn_resnet101_f32s2_kinetics400.yaml └── tpn_resnet50_f32s2_kinetics400.yaml ├── dataset ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── kinetics.cpython-37.pyc │ ├── transforms.cpython-37.pyc │ └── ucf101.cpython-37.pyc ├── kinetics.py ├── transforms.py └── ucf101.py ├── kinetics400_attack_samples.csv ├── reference_kinetics.py ├── reference_ucf101.py ├── ucf_all_info.csv ├── used_idxs.pkl └── utils.py /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/home/zhipeng/anaconda3/envs/transfer/bin/python" 3 | } -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # AAAI-2022 Paper 2 | Boosting the Transferability of Video Adversarial Examples via Temporal Translation [pdf](https://ojs.aaai.org/index.php/AAAI/article/download/20168/19927) 3 | 4 | # Environment 5 | Recover the environment by 6 | ``` 7 | conda env create -f TemporalTranslation.yml 8 | ``` 9 | ## GPU infos 10 | ``` 11 | NVIDIA GeForce RTX 2080TI 12 | NVIDIA-SMI 430.14 Driver Version: 430.14 CUDA Version: 10.2 13 | ``` 14 | 15 | # Attacked Dataset 16 | The used datasets are sampled from UCF101 and Kinetics-400. Download attacked datasets from [here](https://drive.google.com/drive/folders/1O4XyLw37WqGKqFvWFaE2ps5IAD_shSpG?usp=sharing). 17 | Change the **UCF_DATA_ROOT** and **Kinetic_DATA_ROOT** of utils.py into your dataset path. 18 | 19 | # Models 20 | Non-local, SlowFast, TPN with ResNet-50 and ResNet-101 as backbones are used here. 21 | ## UCF101 22 | We fine-tune video models on UCF101. 23 | Download checkpoint files from [here](https://drive.google.com/drive/folders/10KOlWdi5bsV9001uL4Bn1T48m9hkgsZ2?usp=sharing). 24 | Change the **UCF_MODEL_ROOT** of utils.py into your checkpoint path. 25 | 26 | ## Kinetics-400 27 | We use pretrained models on Kinetics-400 from [gluoncv](https://cv.gluon.ai/model_zoo/action_recognition.html) to conduct experiments. 28 | 29 | # Attack 30 | Assign your output path to **OPT_PATH** of utils.py. 31 | ## Generate adversarial examples. 32 | ``` 33 | python attack_kinetics.py/attack_ucf101.py --gpu 0 --batch_size 1 --model slowfast_resnet101 --attack_method TemporalTranslation --step 10 --file_prefix yours --momentum --kernlen 15 --move_type adj --kernel_mode gaussian 34 | ``` 35 | * model: the white-box model 36 | * attack_method: TemporalTranslation(TT/TT-MI) or TemporalTranslation_TI(TT-TI) 37 | * step: the attack step 38 | * file_prefix: additional names for the output file 39 | * momentum: TT-MI 40 | * kernlen: 2 * (Shift Length) + 1 41 | * move_type: shifting strategies 42 | * kernel_mode: weight matrix generation strategies 43 | 44 | ## Attack Success rate 45 | ``` 46 | python reference_kinetics.py/reference_ucf101.py --gpu 0 --adv_path your_adv_path 47 | ``` 48 | * adv_path: name of the output file 49 | 50 | # Citation 51 | If you use our method for attacks in your research, please consider citing 52 | ``` 53 | @inproceedings{wei2022boosting, 54 | title={Boosting the Transferability of Video Adversarial Examples via Temporal Translation}, 55 | author={Wei, Zhipeng and Chen, Jingjing and Wu, Zuxuan and Jiang, Yu-Gang}, 56 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, 57 | volume={36}, 58 | number={3}, 59 | pages={2659--2667}, 60 | year={2022} 61 | } 62 | ``` 63 | -------------------------------------------------------------------------------- /TemporalTranslation.yml: -------------------------------------------------------------------------------- 1 | name: transfer 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=4.5=1_gnu 8 | - argcomplete=1.12.3=pyhd3eb1b0_0 9 | - argon2-cffi=20.1.0=py37h27cfd23_1 10 | - async_generator=1.10=py37h28b3542_0 11 | - attrs=21.2.0=pyhd3eb1b0_0 12 | - backcall=0.2.0=pyhd3eb1b0_0 13 | - blas=1.0=mkl 14 | - bleach=4.0.0=pyhd3eb1b0_0 15 | - bzip2=1.0.8=h7b6447c_0 16 | - ca-certificates=2021.7.5=h06a4308_1 17 | - certifi=2021.5.30=py37h06a4308_0 18 | - cffi=1.14.6=py37h400218f_0 19 | - cudatoolkit=10.2.89=hfd86e86_1 20 | - dbus=1.13.18=hb2f20db_0 21 | - debugpy=1.4.1=py37h295c915_0 22 | - decorator=5.0.9=pyhd3eb1b0_0 23 | - defusedxml=0.7.1=pyhd3eb1b0_0 24 | - entrypoints=0.3=py37_0 25 | - expat=2.4.1=h2531618_2 26 | - ffmpeg=4.3=hf484d3e_0 27 | - fontconfig=2.13.1=h6c09931_0 28 | - freetype=2.10.4=h5ab3b9f_0 29 | - glib=2.69.1=h5202010_0 30 | - gmp=6.2.1=h2531618_2 31 | - gnutls=3.6.15=he1e5248_0 32 | - gst-plugins-base=1.14.0=h8213a91_2 33 | - gstreamer=1.14.0=h28cd5cc_2 34 | - icu=58.2=he6710b0_3 35 | - importlib-metadata=4.8.1=py37h06a4308_0 36 | - importlib_metadata=4.8.1=hd3eb1b0_0 37 | - intel-openmp=2021.3.0=h06a4308_3350 38 | - ipykernel=6.2.0=py37h06a4308_1 39 | - ipython=7.27.0=py37hb070fc8_0 40 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 41 | - ipywidgets=7.6.3=pyhd3eb1b0_1 42 | - jedi=0.18.0=py37h06a4308_1 43 | - jinja2=3.0.1=pyhd3eb1b0_0 44 | - jpeg=9b=h024ee3a_2 45 | - jsonschema=3.2.0=pyhd3eb1b0_2 46 | - jupyter=1.0.0=py37_7 47 | - jupyter_client=7.0.1=pyhd3eb1b0_0 48 | - jupyter_console=6.4.0=pyhd3eb1b0_0 49 | - jupyter_core=4.7.1=py37h06a4308_0 50 | - jupyterlab_pygments=0.1.2=py_0 51 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 52 | - lame=3.100=h7b6447c_0 53 | - lcms2=2.12=h3be6417_0 54 | - ld_impl_linux-64=2.35.1=h7274673_9 55 | - libffi=3.3=he6710b0_2 56 | - libgcc-ng=9.3.0=h5101ec6_17 57 | - libgomp=9.3.0=h5101ec6_17 58 | - libiconv=1.15=h63c8f33_5 59 | - libidn2=2.3.2=h7f8727e_0 60 | - libpng=1.6.37=hbc83047_0 61 | - libsodium=1.0.18=h7b6447c_0 62 | - libstdcxx-ng=9.3.0=hd4cf53a_17 63 | - libtasn1=4.16.0=h27cfd23_0 64 | - libtiff=4.2.0=h85742a9_0 65 | - libunistring=0.9.10=h27cfd23_0 66 | - libuuid=1.0.3=h1bed415_2 67 | - libuv=1.40.0=h7b6447c_0 68 | - libwebp-base=1.2.0=h27cfd23_0 69 | - libxcb=1.14=h7b6447c_0 70 | - libxml2=2.9.12=h03d6c58_0 71 | - lz4-c=1.9.3=h295c915_1 72 | - markupsafe=2.0.1=py37h27cfd23_0 73 | - matplotlib-inline=0.1.2=pyhd3eb1b0_2 74 | - mistune=0.8.4=py37h14c3975_1001 75 | - mkl=2021.3.0=h06a4308_520 76 | - mkl-service=2.4.0=py37h7f8727e_0 77 | - mkl_fft=1.3.0=py37h42c9631_2 78 | - mkl_random=1.2.2=py37h51133e4_0 79 | - nbclient=0.5.3=pyhd3eb1b0_0 80 | - nbconvert=6.1.0=py37h06a4308_0 81 | - nbformat=5.1.3=pyhd3eb1b0_0 82 | - ncurses=6.2=he6710b0_1 83 | - nest-asyncio=1.5.1=pyhd3eb1b0_0 84 | - nettle=3.7.3=hbbd107a_1 85 | - ninja=1.10.2=hff7bd54_1 86 | - notebook=6.4.3=py37h06a4308_0 87 | - numpy=1.20.3=py37hf144106_0 88 | - numpy-base=1.20.3=py37h74d4b33_0 89 | - olefile=0.46=py37_0 90 | - openh264=2.1.0=hd408876_0 91 | - openjpeg=2.4.0=h3ad879b_0 92 | - openssl=1.1.1l=h7f8727e_0 93 | - packaging=21.0=pyhd3eb1b0_0 94 | - pandocfilters=1.4.3=py37h06a4308_1 95 | - parso=0.8.2=pyhd3eb1b0_0 96 | - pcre=8.45=h295c915_0 97 | - pexpect=4.8.0=pyhd3eb1b0_3 98 | - pickleshare=0.7.5=pyhd3eb1b0_1003 99 | - pillow=8.3.1=py37h2c7a002_0 100 | - pip=21.0.1=py37h06a4308_0 101 | - prometheus_client=0.11.0=pyhd3eb1b0_0 102 | - prompt-toolkit=3.0.17=pyhca03da5_0 103 | - prompt_toolkit=3.0.17=hd3eb1b0_0 104 | - ptyprocess=0.7.0=pyhd3eb1b0_2 105 | - pycparser=2.20=py_2 106 | - pygments=2.10.0=pyhd3eb1b0_0 107 | - pyparsing=2.4.7=pyhd3eb1b0_0 108 | - pyqt=5.9.2=py37h05f1152_2 109 | - pyrsistent=0.17.3=py37h7b6447c_0 110 | - python=3.7.11=h12debd9_0 111 | - python-dateutil=2.8.2=pyhd3eb1b0_0 112 | - pytorch=1.9.1=py3.7_cuda10.2_cudnn7.6.5_0 113 | - pyzmq=22.2.1=py37h295c915_1 114 | - qt=5.9.7=h5867ecd_1 115 | - qtconsole=5.1.0=pyhd3eb1b0_0 116 | - qtpy=1.10.0=pyhd3eb1b0_0 117 | - readline=8.1=h27cfd23_0 118 | - send2trash=1.5.0=pyhd3eb1b0_1 119 | - setuptools=58.0.4=py37h06a4308_0 120 | - sip=4.19.8=py37hf484d3e_0 121 | - six=1.16.0=pyhd3eb1b0_0 122 | - sqlite=3.36.0=hc218d9a_0 123 | - terminado=0.9.4=py37h06a4308_0 124 | - testpath=0.5.0=pyhd3eb1b0_0 125 | - tk=8.6.10=hbc83047_0 126 | - torchaudio=0.9.1=py37 127 | - torchvision=0.10.1=py37_cu102 128 | - tornado=6.1=py37h27cfd23_0 129 | - traitlets=5.0.5=pyhd3eb1b0_0 130 | - typing_extensions=3.10.0.2=pyh06a4308_0 131 | - wcwidth=0.2.5=pyhd3eb1b0_0 132 | - webencodings=0.5.1=py37_1 133 | - wheel=0.37.0=pyhd3eb1b0_1 134 | - widgetsnbextension=3.5.1=py37_0 135 | - xz=5.2.5=h7b6447c_0 136 | - zeromq=4.3.4=h2531618_0 137 | - zipp=3.5.0=pyhd3eb1b0_0 138 | - zlib=1.2.11=h7b6447c_3 139 | - zstd=1.4.9=haebb681_0 140 | - pip: 141 | - autocfg==0.0.8 142 | - charset-normalizer==2.0.6 143 | - cycler==0.10.0 144 | - decord==0.6.0 145 | - gluoncv==0.10.4.post4 146 | - idna==3.2 147 | - kiwisolver==1.3.2 148 | - matplotlib==3.4.3 149 | - opencv-contrib-python==4.5.3.56 150 | - pandas==1.3.3 151 | - portalocker==2.3.2 152 | - pytz==2021.1 153 | - pyyaml==5.4.1 154 | - requests==2.26.0 155 | - scipy==1.7.1 156 | - seaborn==0.11.2 157 | - timm==0.5.0 158 | - tqdm==4.62.3 159 | - urllib3==1.26.6 160 | - yacs==0.1.8 161 | prefix: /home/zhipeng/miniconda3/envs/transfer 162 | -------------------------------------------------------------------------------- /__pycache__/attack_methods.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/attack_methods.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/base_attacks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/base_attacks.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/datasets.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/scale_invariant.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/scale_invariant.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/video_attacks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/__pycache__/video_attacks.cpython-37.pyc -------------------------------------------------------------------------------- /attack_kinetics.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import numpy as np 5 | import math 6 | 7 | import attack_methods 8 | 9 | from dataset.kinetics import get_dataset 10 | from gluoncv.torch.model_zoo import get_model 11 | from utils import CONFIG_PATHS, OPT_PATH, get_cfg_custom 12 | 13 | def arg_parse(): 14 | parser = argparse.ArgumentParser(description='') 15 | parser.add_argument('--gpu', type=str, default='0', help='gpu device.') 16 | parser.add_argument('--batch_size', type=int, default=1, metavar='N') 17 | parser.add_argument('--model', type=str, default='i3d_resnet101', help='i3d_resnet101 | slowfast_resnet101 | tpn_resnet101.') 18 | parser.add_argument('--attack_method', type=str, default='TemporalTranslation_TI', help='TemporalTranslation | TemporalTranslation_TI') 19 | parser.add_argument('--step', type=int, default=10, metavar='N', 20 | help='Multi-step or One-step.') 21 | 22 | parser.add_argument('--file_prefix', type=str, default='') 23 | 24 | # parameters in the paper 25 | parser.add_argument('--momentum', action='store_true', default=False, help='Use iterative momentum in MFFGSM.') 26 | parser.add_argument('--kernlen', type=int, default=15, metavar='N') 27 | parser.add_argument('--move_type', type=str, default='adj',help='adj | remote | random') 28 | parser.add_argument('--kernel_mode', type=str, default='gaussian') 29 | args = parser.parse_args() 30 | args.adv_path = os.path.join(OPT_PATH, 'Kinetics-{}-{}-{}-{}'.format(args.model, args.attack_method, args.step, args.file_prefix)) 31 | if not os.path.exists(args.adv_path): 32 | os.makedirs(args.adv_path) 33 | return args 34 | 35 | if __name__ == '__main__': 36 | args = arg_parse() 37 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 38 | print (args) 39 | # loading cfg. 40 | cfg_path = CONFIG_PATHS[args.model] 41 | cfg = get_cfg_custom(cfg_path, args.batch_size) 42 | 43 | # loading dataset and model. 44 | dataset_loader = get_dataset(cfg) 45 | model = get_model(cfg).cuda() 46 | 47 | # attack 48 | params = {'kernlen':args.kernlen, 49 | 'momentum':args.momentum, 50 | 'move_type':args.move_type, 51 | 'kernel_mode':args.kernel_mode} 52 | attack_method = getattr(attack_methods, args.attack_method)(model, params=params, steps=args.step) 53 | 54 | for step, data in enumerate(dataset_loader): 55 | if step %1 == 0: 56 | print ('Running {}, {}/{}'.format(args.attack_method, step+1, len(dataset_loader))) 57 | val_batch = data[0].cuda() 58 | val_label = data[1].cuda() 59 | adv_batches = attack_method(val_batch, val_label) 60 | val_batch = val_batch.detach() 61 | for ind,label in enumerate(val_label): 62 | ori = val_batch[ind].cpu().numpy() 63 | adv = adv_batches[ind].cpu().numpy() 64 | np.save(os.path.join(args.adv_path, '{}-adv'.format(label.item())), adv) 65 | np.save(os.path.join(args.adv_path, '{}-ori'.format(label.item())), ori) 66 | -------------------------------------------------------------------------------- /attack_methods.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import scipy.stats as st 4 | import numpy as np 5 | import torchvision 6 | from PIL import Image 7 | import random 8 | import time 9 | import math 10 | 11 | def norm_grads(grads, frame_level=True): 12 | # frame level norm 13 | # clip level norm 14 | assert len(grads.shape) == 5 and grads.shape[2] == 32 15 | if frame_level: 16 | norm = torch.mean(torch.abs(grads), [1,3,4], keepdim=True) 17 | else: 18 | norm = torch.mean(torch.abs(grads), [1,2,3,4], keepdim=True) 19 | # norm = torch.norm(grads, dim=[1,2,3,4], p=1) 20 | return grads / norm 21 | 22 | class Attack(object): 23 | """ 24 | # refer to https://github.com/Harry24k/adversarial-attacks-pytorch 25 | Base class for all attacks. 26 | .. note:: 27 | It automatically set device to the device where given model is. 28 | It temporarily changes the model's training mode to `test` 29 | by `.eval()` only during an attack process. 30 | """ 31 | def __init__(self, name, model): 32 | r""" 33 | Initializes internal attack state. 34 | Arguments: 35 | name (str) : name of an attack. 36 | model (torch.nn.Module): model to attack. 37 | """ 38 | self.attack = name 39 | self.model = model 40 | self.model_name = str(model).split("(")[0] 41 | 42 | self.training = model.training 43 | self.device = next(model.parameters()).device 44 | 45 | self._targeted = 1 46 | self._attack_mode = 'default' 47 | self._return_type = 'float' 48 | self._target_map_function = lambda images, labels:labels 49 | 50 | self.mean = [0.485, 0.456, 0.406] 51 | self.std = [0.229, 0.224, 0.225] 52 | 53 | def forward(self, *input): 54 | r""" 55 | It defines the computation performed at every call (attack forward). 56 | Should be overridden by all subclasses. 57 | """ 58 | raise NotImplementedError 59 | 60 | def set_attack_mode(self, mode, target_map_function=None): 61 | r""" 62 | Set the attack mode. 63 | 64 | Arguments: 65 | mode (str) : 'default' (DEFAULT) 66 | 'targeted' - Use input labels as targeted labels. 67 | 'least_likely' - Use least likely labels as targeted labels. 68 | 69 | target_map_function (function) : 70 | """ 71 | if self._attack_mode is 'only_default': 72 | raise ValueError("Changing attack mode is not supported in this attack method.") 73 | 74 | if (mode is 'targeted') and (target_map_function is None): 75 | raise ValueError("Please give a target_map_function, e.g., lambda images, labels:(labels+1)%10.") 76 | 77 | if mode=="default": 78 | self._attack_mode = "default" 79 | self._targeted = 1 80 | self._transform_label = self._get_label 81 | elif mode=="targeted": 82 | self._attack_mode = "targeted" 83 | self._targeted = -1 84 | self._target_map_function = target_map_function 85 | self._transform_label = self._get_target_label 86 | elif mode=="least_likely": 87 | self._attack_mode = "least_likely" 88 | self._targeted = -1 89 | self._transform_label = self._get_least_likely_label 90 | else: 91 | raise ValueError(mode + " is not a valid mode. [Options : default, targeted, least_likely]") 92 | 93 | def set_return_type(self, type): 94 | r""" 95 | Set the return type of adversarial images: `int` or `float`. 96 | Arguments: 97 | type (str) : 'float' or 'int'. (DEFAULT : 'float') 98 | """ 99 | if type == 'float': 100 | self._return_type = 'float' 101 | elif type == 'int': 102 | self._return_type = 'int' 103 | else: 104 | raise ValueError(type + " is not a valid type. [Options : float, int]") 105 | 106 | def save(self, save_path, data_loader, verbose=True): 107 | r""" 108 | Save adversarial images as torch.tensor from given torch.utils.data.DataLoader. 109 | Arguments: 110 | save_path (str) : save_path. 111 | data_loader (torch.utils.data.DataLoader) : data loader. 112 | verbose (bool) : True for displaying detailed information. (DEFAULT : True) 113 | """ 114 | self.model.eval() 115 | 116 | image_list = [] 117 | label_list = [] 118 | 119 | correct = 0 120 | total = 0 121 | 122 | total_batch = len(data_loader) 123 | 124 | for step, (images, labels) in enumerate(data_loader): 125 | adv_images = self.__call__(images, labels) 126 | 127 | image_list.append(adv_images.cpu()) 128 | label_list.append(labels.cpu()) 129 | 130 | if self._return_type == 'int': 131 | adv_images = adv_images.float()/255 132 | 133 | if verbose: 134 | outputs = self.model(adv_images) 135 | _, predicted = torch.max(outputs.data, 1) 136 | total += labels.size(0) 137 | correct += (predicted == labels.to(self.device)).sum() 138 | 139 | acc = 100 * float(correct) / total 140 | print('- Save Progress : %2.2f %% / Accuracy : %2.2f %%' % ((step+1)/total_batch*100, acc), end='\r') 141 | 142 | x = torch.cat(image_list, 0) 143 | y = torch.cat(label_list, 0) 144 | torch.save((x, y), save_path) 145 | print('\n- Save Complete!') 146 | 147 | self._switch_model() 148 | 149 | def _transform_video(self, video, mode='forward'): 150 | r''' 151 | Transform the video into [0, 1] 152 | ''' 153 | dtype = video.dtype 154 | mean = torch.as_tensor(self.mean, dtype=dtype, device=self.device) 155 | std = torch.as_tensor(self.std, dtype=dtype, device=self.device) 156 | if mode == 'forward': 157 | # [-mean/std, mean/std] 158 | video.sub_(mean[:, None, None, None]).div_(std[:, None, None, None]) 159 | elif mode == 'back': 160 | # [0, 1] 161 | video.mul_(std[:, None, None, None]).add_(mean[:, None, None, None]) 162 | return video 163 | 164 | def _transform_label(self, images, labels): 165 | r""" 166 | Function for changing the attack mode. 167 | """ 168 | return labels 169 | 170 | def _get_label(self, images, labels): 171 | r""" 172 | Function for changing the attack mode. 173 | Return input labels. 174 | """ 175 | return labels 176 | 177 | def _get_target_label(self, images, labels): 178 | r""" 179 | Function for changing the attack mode. 180 | Return input labels. 181 | """ 182 | return self._target_map_function(images, labels) 183 | 184 | def _get_least_likely_label(self, images, labels): 185 | r""" 186 | Function for changing the attack mode. 187 | Return least likely labels. 188 | """ 189 | outputs = self.model(images) 190 | _, labels = torch.min(outputs.data, 1) 191 | labels = labels.detach_() 192 | return labels 193 | 194 | def _to_uint(self, images): 195 | r""" 196 | Function for changing the return type. 197 | Return images as int. 198 | """ 199 | return (images*255).type(torch.uint8) 200 | 201 | def _switch_model(self): 202 | r""" 203 | Function for changing the training mode of the model. 204 | """ 205 | if self.training: 206 | self.model.train() 207 | else: 208 | self.model.eval() 209 | 210 | def __str__(self): 211 | info = self.__dict__.copy() 212 | 213 | del_keys = ['model', 'attack'] 214 | 215 | for key in info.keys(): 216 | if key[0] == "_" : 217 | del_keys.append(key) 218 | 219 | for key in del_keys: 220 | del info[key] 221 | 222 | info['attack_mode'] = self._attack_mode 223 | if info['attack_mode'] == 'only_default' : 224 | info['attack_mode'] = 'default' 225 | 226 | info['return_type'] = self._return_type 227 | 228 | return self.attack + "(" + ', '.join('{}={}'.format(key, val) for key, val in info.items()) + ")" 229 | 230 | def __call__(self, *input, **kwargs): 231 | self.model.eval() 232 | images = self.forward(*input, **kwargs) 233 | self._switch_model() 234 | 235 | if self._return_type == 'int': 236 | images = self._to_uint(images) 237 | 238 | return images 239 | 240 | class TemporalTranslation(Attack): 241 | ''' 242 | TT and TT-MI 243 | model: a video model. 244 | params = { 245 | 'kernlen': shift length. int. 246 | 'momentum': True or False. 247 | 'move_type': three shifting strategies. adj or remote or random. 248 | 'kernel_mode': three strategies to generate W. gaussian or linear or uniform.} 249 | delay: hyper-parameter in momentum iterm. 250 | ''' 251 | def __init__(self, model, params, epsilon=16/255, steps=10, delay=1.0): 252 | super(TemporalTranslation, self).__init__("TemporalTranslation", model) 253 | self.epsilon = epsilon 254 | self.steps = steps 255 | self.step_size = self.epsilon / self.steps 256 | self.delay = delay 257 | 258 | for name, value in params.items(): 259 | setattr(self, name, value) 260 | 261 | self.frames = 32 262 | self.cycle_move_list = self._move_info_generation() 263 | if self.kernel_mode == 'gaussian': 264 | kernel = self._initial_kernel_gaussian(self.kernlen).astype(np.float32) # (self.kernlen) 265 | elif self.kernel_mode == 'linear': 266 | kernel = self._initial_kernel_linear(self.kernlen).astype(np.float32) # (self.kernlen) 267 | elif self.kernel_mode == 'uniform': 268 | kernel = self._initial_kernel_uniform(self.kernlen).astype(np.float32) # (self.kernlen) 269 | 270 | self.kernel = torch.from_numpy(np.expand_dims(kernel, 0)).to(self.device) # 1,self.kernlen 271 | 272 | def _move_info_generation(self): 273 | max_move = int((self.kernlen - 1) / 2) 274 | lists = [i for i in range(-max_move, max_move+1)] 275 | return lists 276 | 277 | def _initial_kernel_linear(self, kernlen): 278 | k = int((kernlen - 1) / 2) 279 | kern1d = [] 280 | for i in range(k+1): 281 | kern1d.append(1 - i / (k+1)) 282 | kern1d = np.array(kern1d[::-1][:-1] + kern1d) 283 | kernel = kern1d / kern1d.sum() 284 | return kernel 285 | 286 | def _initial_kernel_uniform(self, kernlen): 287 | kern1d = np.ones(kernlen) 288 | kernel = kern1d / kern1d.sum() 289 | return kernel 290 | 291 | def _initial_kernel_gaussian(self, kernlen): 292 | assert kernlen%2 == 1 293 | k = (kernlen - 1) /2 294 | sigma = k/3 295 | k = int(k) 296 | def calculte_guassian(x, sigma): 297 | return (1/(sigma*np.sqrt(2*np.pi)) * np.math.exp(-(x**2)/(2* (sigma**2)))) 298 | kern1d = [] 299 | for i in range(-k, k+1): 300 | kern1d.append(calculte_guassian(i, sigma)) 301 | assert len(kern1d) == kernlen 302 | kern1d = np.array(kern1d) 303 | kernel = kern1d / kern1d.sum() 304 | return kernel 305 | 306 | def _conv1d_frame(self, grads): 307 | ''' 308 | grads: D, N, C, T, H, W 309 | ''' 310 | # cycle padding for grads 311 | D,N,C,T,H,W = grads.shape 312 | grads = grads.reshape(D, -1) 313 | 314 | grad = torch.matmul(self.kernel, grads) 315 | grad = grad.reshape(N,C,T,H,W) 316 | return grad 317 | 318 | def _cycle_move(self, adv_videos, cycle_move): 319 | if cycle_move < 0: 320 | direction = -1 321 | else: 322 | direction = 1 323 | cycle_move = abs(cycle_move) 324 | cycle_move = cycle_move % self.frames 325 | new_videos = torch.zeros_like(adv_videos) 326 | for i in range(self.frames): 327 | ori_ind = i 328 | new_ind = (ori_ind + direction * cycle_move) % self.frames 329 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 330 | return new_videos 331 | 332 | def _cycle_move_remote(self, adv_videos, cycle_move): 333 | if cycle_move < 0: 334 | direction = -1 335 | else: 336 | direction = 1 337 | cycle_move = abs(cycle_move) 338 | if cycle_move == 0: 339 | cycle_move = cycle_move % self.frames 340 | else: 341 | cycle_move = (cycle_move + (int(self.frames/2)-1)) % self.frames 342 | new_videos = torch.zeros_like(adv_videos) 343 | for i in range(self.frames): 344 | ori_ind = i 345 | new_ind = (ori_ind + direction * cycle_move) % self.frames 346 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 347 | return new_videos 348 | 349 | def _cycle_move_random(self, adv_videos, cycle_move): 350 | if cycle_move < 0: 351 | direction = -1 352 | else: 353 | direction = 1 354 | # cycle_move = abs(cycle_move) 355 | if cycle_move == 0: 356 | cycle_move = cycle_move % self.frames 357 | else: 358 | cycle_move = random.randint(0, 100) % self.frames 359 | # cycle_move = (cycle_move + int(self.frames/2)) % self.frames 360 | new_videos = torch.zeros_like(adv_videos) 361 | for i in range(self.frames): 362 | ori_ind = i 363 | new_ind = (ori_ind + direction * cycle_move) % self.frames 364 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 365 | return new_videos 366 | 367 | def _exchange_move(self, adv_videos, exchange_lists): 368 | new_videos = adv_videos.clone() 369 | for exchange in exchange_lists: 370 | one_frame, ano_frame = exchange 371 | new_videos[:,:,one_frame] = adv_videos[:,:,ano_frame] 372 | new_videos[:,:,ano_frame] = adv_videos[:,:,one_frame] 373 | return new_videos 374 | 375 | def _get_grad(self, adv_videos, labels, loss): 376 | batch_size = adv_videos.shape[0] 377 | used_labels = torch.cat([labels]*batch_size, dim=0) 378 | adv_videos.requires_grad = True 379 | outputs = self.model(adv_videos) 380 | cost = self._targeted*loss(outputs, used_labels).to(self.device) 381 | grad = torch.autograd.grad(cost, adv_videos, 382 | retain_graph=False, create_graph=False)[0] 383 | return grad 384 | 385 | def _grad_augmentation(self, grads): 386 | ''' 387 | Input: 388 | grads: kernlen, grad.shape 389 | Return 390 | grad 391 | ''' 392 | diff_position_same_frame = torch.zeros_like(grads) 393 | for ind, cycle_move in enumerate(self.cycle_move_list): 394 | diff_position_same_frame[ind] = self._cycle_move(grads[ind], -cycle_move) 395 | d_conv_grad = self._conv1d_frame(diff_position_same_frame) 396 | return d_conv_grad 397 | 398 | def forward(self, videos, labels): 399 | r""" 400 | Overridden. 401 | """ 402 | videos = videos.to(self.device) 403 | momentum = torch.zeros_like(videos).to(self.device) 404 | labels = labels.to(self.device) 405 | loss = nn.CrossEntropyLoss() 406 | unnorm_videos = self._transform_video(videos.clone().detach(), mode='back') # [0, 1] 407 | adv_videos = videos.clone().detach() 408 | del videos 409 | 410 | start_time = time.time() 411 | for i in range(self.steps): 412 | # obtain grads of these variants 413 | batch_new_videos = [] 414 | for cycle_move in self.cycle_move_list: 415 | if self.move_type == 'adj': 416 | new_videos = self._cycle_move(adv_videos, cycle_move) 417 | elif self.move_type == 'remote': 418 | new_videos = self._cycle_move_remote(adv_videos, cycle_move) 419 | elif self.move_type == 'random': 420 | new_videos = self._cycle_move_random(adv_videos, cycle_move) 421 | batch_new_videos.append(new_videos) 422 | batch_inps = torch.cat(batch_new_videos, dim=0) 423 | grads = [] 424 | batch_times = 5 425 | length = len(self.cycle_move_list) 426 | if self.model_name == 'TPNet': 427 | batch_times = length 428 | print (self.model_name, batch_times) 429 | batch_size = math.ceil(length / batch_times) 430 | for i in range(batch_times): 431 | grad = self._get_grad(batch_inps[i*batch_size:min((i+1)*batch_size, length)], labels, loss) 432 | grads.append(grad) 433 | # grad augmentation 434 | grads = torch.cat(grads, dim=0) 435 | grads = torch.unsqueeze(grads, dim=1) 436 | grad = self._grad_augmentation(grads) 437 | 438 | # momentum 439 | if self.momentum: 440 | grad = norm_grads(grad) 441 | grad += momentum * self.delay 442 | momentum = grad 443 | else: 444 | pass 445 | 446 | adv_videos = self._transform_video(adv_videos.detach(), mode='back') # [0, 1] 447 | adv_videos = adv_videos + self.step_size*grad.sign() 448 | delta = torch.clamp(adv_videos - unnorm_videos, min=-self.epsilon, max=self.epsilon) 449 | adv_videos = torch.clamp(unnorm_videos + delta, min=0, max=1).detach() 450 | adv_videos = self._transform_video(adv_videos, mode='forward') # norm 451 | print ('now_time', time.time()-start_time) 452 | return adv_videos 453 | 454 | class TemporalTranslation_TI(Attack): 455 | ''' 456 | TT-TI 457 | model: a video model. 458 | params = { 459 | 'kernlen': shift length. int. 460 | 'momentum': True or False. 461 | 'move_type': three shifting strategies. adj or remote or random. 462 | 'kernel_mode': three strategies to generate W. gaussian or linear or uniform.} 463 | delay: hyper-parameter in momentum iterm. 464 | ''' 465 | def __init__(self, model, params, epsilon=16/255, steps=1, delay=1.0): 466 | super(TemporalTranslation_TI, self).__init__("TemporalTranslation_TI", model) 467 | self.epsilon = epsilon 468 | self.steps = steps 469 | self.step_size = self.epsilon / self.steps 470 | self.delay = delay 471 | 472 | for name, value in params.items(): 473 | setattr(self, name, value) 474 | 475 | self.frames = 32 476 | self.cycle_move_list = self._move_info_generation() 477 | if self.kernel_mode == 'gaussian': 478 | kernel = self._initial_kernel_gaussian(self.kernlen).astype(np.float32) # (self.kernlen) 479 | elif self.kernel_mode == 'linear': 480 | kernel = self._initial_kernel_linear(self.kernlen).astype(np.float32) # (self.kernlen) 481 | elif self.kernel_mode == 'uniform': 482 | kernel = self._initial_kernel_uniform(self.kernlen).astype(np.float32) # (self.kernlen) 483 | 484 | self.kernel = torch.from_numpy(np.expand_dims(kernel, 0)).to(self.device) # 1,self.kernlen 485 | 486 | # TI kernel 487 | ti_kernel = self._initial_kernel(15, 3).astype(np.float32) # (15,15) 488 | stack_kernel = np.stack([ti_kernel, ti_kernel, ti_kernel]) # (3,15,15) 489 | self.stack_kernel = torch.from_numpy(np.expand_dims(stack_kernel, 1)).to(self.device) # 3,1,15,15 490 | 491 | def _move_info_generation(self): 492 | max_move = int((self.kernlen - 1) / 2) 493 | lists = [i for i in range(-max_move, max_move+1)] 494 | return lists 495 | 496 | def _initial_kernel_linear(self, kernlen): 497 | k = int((kernlen - 1) / 2) 498 | kern1d = [] 499 | for i in range(k+1): 500 | kern1d.append(1 - i / (k+1)) 501 | kern1d = np.array(kern1d[::-1][:-1] + kern1d) 502 | kernel = kern1d / kern1d.sum() 503 | return kernel 504 | 505 | def _initial_kernel_uniform(self, kernlen): 506 | kern1d = np.ones(kernlen) 507 | kernel = kern1d / kern1d.sum() 508 | return kernel 509 | 510 | def _initial_kernel_gaussian(self, kernlen): 511 | assert kernlen%2 == 1 512 | k = (kernlen - 1) /2 513 | sigma = k/3 514 | k = int(k) 515 | def calculte_guassian(x, sigma): 516 | return (1/(sigma*np.sqrt(2*np.pi)) * np.math.exp(-(x**2)/(2* (sigma**2)))) 517 | kern1d = [] 518 | for i in range(-k, k+1): 519 | kern1d.append(calculte_guassian(i, sigma)) 520 | assert len(kern1d) == kernlen 521 | kern1d = np.array(kern1d) 522 | kernel = kern1d / kern1d.sum() 523 | return kernel 524 | 525 | def _conv1d_frame(self, grads): 526 | ''' 527 | grads: D, N, C, T, H, W 528 | ''' 529 | # cycle padding for grads 530 | D,N,C,T,H,W = grads.shape 531 | grads = grads.reshape(D, -1) 532 | 533 | grad = torch.matmul(self.kernel, grads) 534 | grad = grad.reshape(N,C,T,H,W) 535 | return grad 536 | 537 | def _cycle_move(self, adv_videos, cycle_move): 538 | if cycle_move < 0: 539 | direction = -1 540 | else: 541 | direction = 1 542 | cycle_move = abs(cycle_move) 543 | cycle_move = cycle_move % self.frames 544 | new_videos = torch.zeros_like(adv_videos) 545 | for i in range(self.frames): 546 | ori_ind = i 547 | new_ind = (ori_ind + direction * cycle_move) % self.frames 548 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 549 | return new_videos 550 | 551 | def _cycle_move_remote(self, adv_videos, cycle_move): 552 | if cycle_move < 0: 553 | direction = -1 554 | else: 555 | direction = 1 556 | cycle_move = abs(cycle_move) 557 | if cycle_move == 0: 558 | cycle_move = cycle_move % self.frames 559 | else: 560 | cycle_move = (cycle_move + (int(self.frames/2)-1)) % self.frames 561 | new_videos = torch.zeros_like(adv_videos) 562 | for i in range(self.frames): 563 | ori_ind = i 564 | new_ind = (ori_ind + direction * cycle_move) % self.frames 565 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 566 | return new_videos 567 | 568 | def _cycle_move_random(self, adv_videos, cycle_move): 569 | if cycle_move < 0: 570 | direction = -1 571 | else: 572 | direction = 1 573 | # cycle_move = abs(cycle_move) 574 | if cycle_move == 0: 575 | cycle_move = cycle_move % self.frames 576 | else: 577 | cycle_move = random.randint(0, 100) % self.frames 578 | # cycle_move = (cycle_move + int(self.frames/2)) % self.frames 579 | new_videos = torch.zeros_like(adv_videos) 580 | for i in range(self.frames): 581 | ori_ind = i 582 | new_ind = (ori_ind + direction * cycle_move) % self.frames 583 | new_videos[:,:,new_ind] = adv_videos[:,:,ori_ind] 584 | return new_videos 585 | 586 | def _exchange_move(self, adv_videos, exchange_lists): 587 | new_videos = adv_videos.clone() 588 | for exchange in exchange_lists: 589 | one_frame, ano_frame = exchange 590 | new_videos[:,:,one_frame] = adv_videos[:,:,ano_frame] 591 | new_videos[:,:,ano_frame] = adv_videos[:,:,one_frame] 592 | return new_videos 593 | 594 | def _get_grad(self, adv_videos, labels, loss): 595 | batch_size = adv_videos.shape[0] 596 | used_labels = torch.cat([labels]*batch_size, dim=0) 597 | adv_videos.requires_grad = True 598 | outputs = self.model(adv_videos) 599 | cost = self._targeted*loss(outputs, used_labels).to(self.device) 600 | grad = torch.autograd.grad(cost, adv_videos, 601 | retain_graph=False, create_graph=False)[0] 602 | return grad 603 | 604 | def _grad_augmentation(self, grads): 605 | ''' 606 | Input: 607 | grads: kernlen, grad.shape 608 | Return 609 | grad 610 | ''' 611 | diff_position_same_frame = torch.zeros_like(grads) 612 | for ind, cycle_move in enumerate(self.cycle_move_list): 613 | diff_position_same_frame[ind] = self._cycle_move(grads[ind], -cycle_move) 614 | d_conv_grad = self._conv1d_frame(diff_position_same_frame) 615 | return d_conv_grad 616 | 617 | # TI Function 618 | def _initial_kernel(self, kernlen, nsig): 619 | x = np.linspace(-nsig, nsig, kernlen) 620 | kern1d = st.norm.pdf(x) 621 | kernel_raw = np.outer(kern1d, kern1d) 622 | kernel = kernel_raw / kernel_raw.sum() 623 | return kernel 624 | 625 | def _conv2d_frame(self, grads): 626 | ''' 627 | grads: N, C, T, H, W 628 | ''' 629 | frames = grads.shape[2] 630 | out_grads = torch.zeros_like(grads) 631 | for i in range(frames): 632 | this_grads = grads[:,:,i] 633 | out_grad = nn.functional.conv2d(this_grads, self.stack_kernel, groups=3, stride=1, padding=7) 634 | out_grads[:,:,i] = out_grad 635 | out_grads = out_grads / torch.mean(torch.abs(out_grads), [1,2,3], True) 636 | return out_grads 637 | 638 | def forward(self, videos, labels): 639 | r""" 640 | Overridden. 641 | """ 642 | videos = videos.to(self.device) 643 | momentum = torch.zeros_like(videos).to(self.device) 644 | labels = labels.to(self.device) 645 | loss = nn.CrossEntropyLoss() 646 | unnorm_videos = self._transform_video(videos.clone().detach(), mode='back') # [0, 1] 647 | adv_videos = videos.clone().detach() 648 | del videos 649 | 650 | start_time = time.time() 651 | for i in range(self.steps): 652 | # obtain grads of these variants 653 | batch_new_videos = [] 654 | for cycle_move in self.cycle_move_list: 655 | if self.move_type == 'adj': 656 | new_videos = self._cycle_move(adv_videos, cycle_move) 657 | elif self.move_type == 'remote': 658 | new_videos = self._cycle_move_remote(adv_videos, cycle_move) 659 | elif self.move_type == 'random': 660 | new_videos = self._cycle_move_random(adv_videos, cycle_move) 661 | batch_new_videos.append(new_videos) 662 | batch_inps = torch.cat(batch_new_videos, dim=0) 663 | grads = [] 664 | batch_times = 5 665 | length = len(self.cycle_move_list) 666 | if self.model_name == 'TPNet': 667 | batch_times = length 668 | print (self.model_name, batch_times) 669 | batch_size = math.ceil(length / batch_times) 670 | for i in range(batch_times): 671 | grad = self._get_grad(batch_inps[i*batch_size:min((i+1)*batch_size, length)], labels, loss) 672 | grad = self._conv2d_frame(grad) 673 | grads.append(grad) 674 | # grad augmentation 675 | grads = torch.cat(grads, dim=0) 676 | grads = torch.unsqueeze(grads, dim=1) 677 | grad = self._grad_augmentation(grads) 678 | 679 | # momentum 680 | if self.momentum: 681 | grad = norm_grads(grad) 682 | grad += momentum * self.delay 683 | momentum = grad 684 | else: 685 | pass 686 | 687 | adv_videos = self._transform_video(adv_videos.detach(), mode='back') # [0, 1] 688 | adv_videos = adv_videos + self.step_size*grad.sign() 689 | delta = torch.clamp(adv_videos - unnorm_videos, min=-self.epsilon, max=self.epsilon) 690 | adv_videos = torch.clamp(unnorm_videos + delta, min=0, max=1).detach() 691 | adv_videos = self._transform_video(adv_videos, mode='forward') # norm 692 | print ('now_time', time.time()-start_time) 693 | return adv_videos -------------------------------------------------------------------------------- /attack_ucf101.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import numpy as np 5 | import math 6 | 7 | import attack_methods 8 | from dataset.ucf101 import get_dataset 9 | from gluoncv.torch.model_zoo import get_model 10 | 11 | from utils import CONFIG_PATHS, OPT_PATH, get_cfg_custom, MODEL_TO_CKPTS 12 | 13 | def arg_parse(): 14 | parser = argparse.ArgumentParser(description='') 15 | parser.add_argument('--gpu', type=str, default='0', help='gpu device.') 16 | parser.add_argument('--batch_size', type=int, default=4, metavar='N') 17 | parser.add_argument('--model', type=str, default='i3d_resnet101', help='i3d_resnet101 | slowfast_resnet101 | tpn_resnet101.') 18 | parser.add_argument('--attack_method', type=str, default='TemporalTranslation', help='TemporalTranslation | TemporalTranslation_TI') 19 | parser.add_argument('--step', type=int, default=10, metavar='N', 20 | help='Multi-step or One-step.') 21 | 22 | parser.add_argument('--file_prefix', type=str, default='') 23 | 24 | # parameters in the paper 25 | parser.add_argument('--kernlen', type=int, default=15, metavar='N') 26 | parser.add_argument('--momentum', action='store_true', default=False, help='Use iterative momentum in MFFGSM.') 27 | parser.add_argument('--move_type', type=str, default='adj',help='adj | remote | random') 28 | parser.add_argument('--kernel_mode', type=str, default='gaussian') 29 | args = parser.parse_args() 30 | args.adv_path = os.path.join(OPT_PATH, 'UCF-{}-{}-{}-{}'.format(args.model, args.attack_method, args.step, args.file_prefix)) 31 | if not os.path.exists(args.adv_path): 32 | os.makedirs(args.adv_path) 33 | return args 34 | 35 | if __name__ == '__main__': 36 | args = arg_parse() 37 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 38 | print (args) 39 | 40 | # loading cfg 41 | cfg_path = CONFIG_PATHS[args.model] 42 | cfg = get_cfg_custom(cfg_path, args.batch_size) 43 | cfg.CONFIG.MODEL.PRETRAINED = False 44 | 45 | # loading model. 46 | ckpt_path = MODEL_TO_CKPTS[args.model] 47 | model = get_model(cfg) 48 | model.load_state_dict(torch.load(ckpt_path)['state_dict']) 49 | model.cuda() 50 | model.eval() 51 | 52 | # loading dataset 53 | dataset_loader = get_dataset('./ucf_all_info.csv', './used_idxs.pkl', args.batch_size) 54 | 55 | 56 | # attack 57 | params = {'kernlen':args.kernlen, 58 | 'momentum':args.momentum, 59 | 'move_type':args.move_type, 60 | 'kernel_mode':args.kernel_mode} 61 | attack_method = getattr(attack_methods, args.attack_method)(model, params=params, steps=args.step) 62 | 63 | for step, data in enumerate(dataset_loader): 64 | if step %1 == 0: 65 | print ('Running {}, {}/{}'.format(args.attack_method, step+1, len(dataset_loader))) 66 | val_batch = data[0].cuda() 67 | val_label = data[1].cuda() 68 | adv_batches = attack_method(val_batch, val_label) 69 | val_batch = val_batch.detach() 70 | for ind,label in enumerate(val_label): 71 | ori = val_batch[ind].cpu().numpy() 72 | adv = adv_batches[ind].cpu().numpy() 73 | np.save(os.path.join(args.adv_path, '{}-adv'.format(label.item())), adv) 74 | np.save(os.path.join(args.adv_path, '{}-ori'.format(label.item())), ori) 75 | -------------------------------------------------------------------------------- /config/i3d_nl5_resnet101_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl5_resnet101_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl5_resnet101_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl5_resnet101_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl5_resnet101_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /config/i3d_nl5_resnet50_v1_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # i3d_nl5_resnet50_v1_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 100 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [40, 80] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'i3d_nl5_resnet50_v1_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/i3d_nl5_resnet50_v1_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/i3d_nl5_resnet50_v1_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /config/slowfast_8x8_resnet101_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_8x8_resnet101_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 32 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_8x8_resnet101_kinetics400' 49 | PRETRAINED: False 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_8x8_resnet101_kinetics400' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_8x8_resnet101_kinetics400/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /config/slowfast_8x8_resnet50_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # slowfast_8x8_resnet50_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 196 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | USE_WARMUP: True 23 | WARMUP_EPOCHS: 34 24 | LR_POLICY: 'Cosine' 25 | WARMUP_END_LR: 0.1 26 | RESUME_EPOCH: -1 27 | 28 | VAL: 29 | FREQ: 2 30 | BATCH_SIZE: 8 31 | 32 | DATA: 33 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 34 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 35 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 36 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 37 | NUM_CLASSES: 400 38 | CLIP_LEN: 32 39 | FRAME_RATE: 2 40 | NUM_SEGMENT: 1 41 | NUM_CROP: 1 42 | TEST_NUM_SEGMENT: 10 43 | TEST_NUM_CROP: 3 44 | MULTIGRID: False 45 | KEEP_ASPECT_RATIO: False 46 | 47 | MODEL: 48 | NAME: 'slowfast_8x8_resnet50_kinetics400' 49 | PRETRAINED: False 50 | 51 | LOG: 52 | BASE_PATH: './logs/slowfast_8x8_resnet50_kinetics400' 53 | LOG_DIR: 'tb_log' 54 | SAVE_DIR: 'checkpoints' 55 | EVAL_DIR: './logs/slowfast_8x8_resnet50_kinetics400/eval' 56 | SAVE_FREQ: 2 57 | -------------------------------------------------------------------------------- /config/tpn_resnet101_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet101_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.252:23456' 9 | WOLRD_URLS: ['172.31.72.252'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet101_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet101_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet101_f32s2_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /config/tpn_resnet50_f32s2_kinetics400.yaml: -------------------------------------------------------------------------------- 1 | # tpn_resnet50_f32s2_kinetics400 2 | 3 | DDP_CONFIG: 4 | WORLD_SIZE: 1 5 | WORLD_RANK: 0 6 | GPU_WORLD_SIZE: 8 7 | GPU_WORLD_RANK: 0 8 | DIST_URL: 'tcp://172.31.72.195:23456' 9 | WOLRD_URLS: ['172.31.72.195'] 10 | AUTO_RANK_MATCH: True 11 | DIST_BACKEND: 'nccl' 12 | GPU: 0 13 | DISTRIBUTED: True 14 | 15 | CONFIG: 16 | TRAIN: 17 | EPOCH_NUM: 150 18 | BATCH_SIZE: 8 19 | LR: 0.01 20 | MOMENTUM: 0.9 21 | W_DECAY: 1e-5 22 | LR_POLICY: 'Step' 23 | USE_WARMUP: False 24 | LR_MILESTONE: [75, 125] 25 | STEP: 0.1 26 | 27 | VAL: 28 | FREQ: 2 29 | BATCH_SIZE: 8 30 | 31 | DATA: 32 | TRAIN_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_train.txt' 33 | VAL_ANNO_PATH: '/home/ubuntu/data/kinetics400/k400_val.txt' 34 | TRAIN_DATA_PATH: '/home/ubuntu/data/kinetics400/train_256/' 35 | VAL_DATA_PATH: '/home/ubuntu/data/kinetics400/val_256/' 36 | NUM_CLASSES: 400 37 | CLIP_LEN: 32 38 | FRAME_RATE: 2 39 | NUM_SEGMENT: 1 40 | NUM_CROP: 1 41 | TEST_NUM_SEGMENT: 10 42 | TEST_NUM_CROP: 3 43 | MULTIGRID: False 44 | KEEP_ASPECT_RATIO: False 45 | 46 | MODEL: 47 | NAME: 'tpn_resnet50_f32s2_kinetics400' 48 | PRETRAINED: False 49 | 50 | LOG: 51 | BASE_PATH: './logs/tpn_resnet50_f32s2_kinetics400' 52 | LOG_DIR: 'tb_log' 53 | SAVE_DIR: 'checkpoints' 54 | EVAL_DIR: './logs/tpn_resnet50_f32s2_kinetics400/eval' 55 | SAVE_FREQ: 2 56 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/dataset/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/kinetics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/dataset/__pycache__/kinetics.cpython-37.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/transforms.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/dataset/__pycache__/transforms.cpython-37.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/ucf101.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/dataset/__pycache__/ucf101.cpython-37.pyc -------------------------------------------------------------------------------- /dataset/kinetics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import warnings 4 | import numpy as np 5 | try: 6 | from decord import VideoReader, cpu 7 | except ImportError: 8 | VideoReader = None 9 | cpu = None 10 | 11 | import torch 12 | from torch.utils.data import Dataset 13 | from gluoncv.torch.data import video_transforms, volume_transforms, multiGridHelper, MultiGridBatchSampler 14 | 15 | class VideoClsDataset(Dataset): 16 | """Load your own video classification dataset.""" 17 | 18 | def __init__(self, anno_path, data_path, mode='train', clip_len=8, 19 | frame_sample_rate=2, crop_size=224, short_side_size=256, 20 | new_height=256, new_width=340, keep_aspect_ratio=False, 21 | num_segment=1, num_crop=1, test_num_segment=10, test_num_crop=3, 22 | use_multigrid=False): 23 | self.anno_path = anno_path 24 | self.data_path = data_path 25 | self.mode = mode 26 | self.clip_len = clip_len 27 | self.frame_sample_rate = frame_sample_rate 28 | self.crop_size = crop_size 29 | self.short_side_size = short_side_size 30 | self.new_height = new_height 31 | self.new_width = new_width 32 | self.keep_aspect_ratio = keep_aspect_ratio 33 | self.num_segment = num_segment 34 | self.test_num_segment = test_num_segment 35 | self.num_crop = num_crop 36 | self.test_num_crop = test_num_crop 37 | self.use_multigrid = use_multigrid and (mode == 'train') 38 | if VideoReader is None: 39 | raise ImportError("Unable to import `decord` which is required to read videos.") 40 | 41 | import pandas as pd 42 | # cleaned = pd.read_csv(self.anno_path, header=None, delimiter=' ') 43 | # self.dataset_samples = list(cleaned.values[:, 0]) 44 | # self.label_array = list(cleaned.values[:, 2]) 45 | cleaned = pd.read_csv(self.anno_path) 46 | self.dataset_samples = cleaned['path'].values.tolist() 47 | self.label_array = cleaned['gt_label'].values.tolist() 48 | self.clip_inds = cleaned['clip_index'].values.tolist() 49 | 50 | if (mode == 'train'): 51 | if self.use_multigrid: 52 | self.mg_helper = multiGridHelper() 53 | self.data_transform = [] 54 | for alpha in range(self.mg_helper.mod_long): 55 | tmp = [] 56 | for beta in range(self.mg_helper.mod_short): 57 | info = self.mg_helper.get_resize(alpha, beta) 58 | scale_s = info[1] 59 | tmp.append(video_transforms.Compose([ 60 | video_transforms.Resize(int(self.short_side_size / scale_s), 61 | interpolation='bilinear'), 62 | # TODO: multiscale corner cropping 63 | video_transforms.RandomResize(ratio=(1, 1.25), 64 | interpolation='bilinear'), 65 | video_transforms.RandomCrop(size=(int(self.crop_size / scale_s), 66 | int(self.crop_size / scale_s)))])) 67 | self.data_transform.append(tmp) 68 | else: 69 | self.data_transform = video_transforms.Compose([ 70 | video_transforms.Resize(int(self.short_side_size), 71 | interpolation='bilinear'), 72 | video_transforms.RandomResize(ratio=(1, 1.25), 73 | interpolation='bilinear'), 74 | video_transforms.RandomCrop(size=(int(self.crop_size), 75 | int(self.crop_size)))]) 76 | 77 | self.data_transform_after = video_transforms.Compose([ 78 | video_transforms.RandomHorizontalFlip(), 79 | volume_transforms.ClipToTensor(), 80 | video_transforms.Normalize(mean=[0.485, 0.456, 0.406], 81 | std=[0.229, 0.224, 0.225]) 82 | ]) 83 | elif (mode == 'validation'): 84 | self.data_transform = video_transforms.Compose([ 85 | video_transforms.Resize(self.short_side_size, interpolation='bilinear'), 86 | video_transforms.CenterCrop(size=(self.crop_size, self.crop_size)), 87 | volume_transforms.ClipToTensor(), 88 | video_transforms.Normalize(mean=[0.485, 0.456, 0.406], 89 | std=[0.229, 0.224, 0.225]) 90 | ]) 91 | elif mode == 'test': 92 | self.data_resize = video_transforms.Compose([ 93 | video_transforms.Resize(size=(short_side_size), interpolation='bilinear') 94 | ]) 95 | self.data_transform = video_transforms.Compose([ 96 | volume_transforms.ClipToTensor(), 97 | video_transforms.Normalize(mean=[0.485, 0.456, 0.406], 98 | std=[0.229, 0.224, 0.225]) 99 | ]) 100 | self.test_seg = [] 101 | self.test_dataset = [] 102 | self.test_label_array = [] 103 | for ck in range(self.test_num_segment): 104 | for cp in range(self.test_num_crop): 105 | for idx in range(len(self.label_array)): 106 | sample_label = self.label_array[idx] 107 | self.test_label_array.append(sample_label) 108 | self.test_dataset.append(self.dataset_samples[idx]) 109 | self.test_seg.append((ck, cp)) 110 | 111 | def __getitem__(self, index): 112 | if self.mode == 'train': 113 | if self.use_multigrid is True: 114 | index, alpha, beta = index 115 | info = self.mg_helper.get_resize(alpha, beta) 116 | scale_t = info[0] 117 | data_transform_func = self.data_transform[alpha][beta] 118 | else: 119 | scale_t = 1 120 | data_transform_func = self.data_transform 121 | 122 | sample = self.dataset_samples[index] 123 | buffer = self.loadvideo_decord(sample, sample_rate_scale=scale_t) 124 | if len(buffer) == 0: 125 | while len(buffer) == 0: 126 | warnings.warn("video {} not correctly loaded during training".format(sample)) 127 | index = np.random.randint(self.__len__()) 128 | sample = self.dataset_samples[index] 129 | buffer = self.loadvideo_decord(sample, sample_rate_scale=scale_t) 130 | 131 | buffer = data_transform_func(buffer) 132 | buffer = self.data_transform_after(buffer) 133 | return buffer, self.label_array[index], sample.split("/")[-1].split(".")[0] 134 | 135 | elif self.mode == 'validation': 136 | sample = self.dataset_samples[index] 137 | clip_ind = self.clip_inds[index] 138 | buffer = self.loadvideo_decord(sample, clip_ind) 139 | if len(buffer) == 0: 140 | while len(buffer) == 0: 141 | warnings.warn("video {} not correctly loaded during validation".format(sample)) 142 | index = np.random.randint(self.__len__()) 143 | sample = self.dataset_samples[index] 144 | buffer = self.loadvideo_decord(sample) 145 | buffer = self.data_transform(buffer) 146 | # return buffer, self.label_array[index], sample.split("/")[-1].split(".")[0] 147 | return buffer, self.label_array[index], sample.split(".")[0], clip_ind 148 | 149 | elif self.mode == 'test': 150 | sample = self.test_dataset[index] 151 | chunk_nb, split_nb = self.test_seg[index] 152 | buffer = self.loadvideo_decord(sample) 153 | 154 | while len(buffer) == 0: 155 | warnings.warn("video {}, temporal {}, spatial {} not found during testing".format(\ 156 | str(self.test_dataset[index]), chunk_nb, split_nb)) 157 | index = np.random.randint(self.__len__()) 158 | sample = self.test_dataset[index] 159 | chunk_nb, split_nb = self.test_seg[index] 160 | buffer = self.loadvideo_decord(sample) 161 | 162 | buffer = self.data_resize(buffer) 163 | if isinstance(buffer, list): 164 | buffer = np.stack(buffer, 0) 165 | 166 | spatial_step = 1.0 * (max(buffer.shape[1], buffer.shape[2]) - self.short_side_size) \ 167 | / (self.test_num_crop - 1) 168 | temporal_step = max(1.0 * (buffer.shape[0] - self.clip_len) \ 169 | / (self.test_num_segment - 1), 0) 170 | temporal_start = int(chunk_nb * temporal_step) 171 | spatial_start = int(split_nb * spatial_step) 172 | if buffer.shape[1] >= buffer.shape[2]: 173 | buffer = buffer[temporal_start:temporal_start + self.clip_len, \ 174 | spatial_start:spatial_start + self.short_side_size, :, :] 175 | else: 176 | buffer = buffer[temporal_start:temporal_start + self.clip_len, \ 177 | :, spatial_start:spatial_start + self.short_side_size, :] 178 | 179 | buffer = self.data_transform(buffer) 180 | return buffer, self.test_label_array[index], sample.split("/")[-1].split(".")[0], \ 181 | chunk_nb, split_nb 182 | else: 183 | raise NameError('mode {} unkown'.format(self.mode)) 184 | 185 | def loadvideo_decord(self, sample, clip_ind, sample_rate_scale=1): 186 | """Load video content using Decord""" 187 | # pylint: disable=line-too-long, bare-except, unnecessary-comprehension 188 | fname = os.path.join(self.data_path, sample.replace(' ', '\ ').replace('(', '\(').replace(')', '\)')) 189 | if not (os.path.exists(fname)): 190 | return [] 191 | 192 | # avoid hanging issue 193 | if os.path.getsize(fname) < 1 * 1024: 194 | print('SKIP: ', fname, " - ", os.path.getsize(fname)) 195 | return [] 196 | try: 197 | if self.keep_aspect_ratio: 198 | vr = VideoReader(fname, num_threads=1, ctx=cpu(0)) 199 | else: 200 | vr = VideoReader(fname, width=self.new_width, height=self.new_height, 201 | num_threads=1, ctx=cpu(0)) 202 | except: 203 | print("video cannot be loaded by decord: ", fname) 204 | return [] 205 | 206 | if self.mode == 'test': 207 | all_index = [x for x in range(0, len(vr), self.frame_sample_rate)] 208 | while len(all_index) < self.clip_len: 209 | all_index.append(all_index[-1]) 210 | vr.seek(0) 211 | buffer = vr.get_batch(all_index).asnumpy() 212 | return buffer 213 | 214 | # handle temporal segments 215 | converted_len = int(self.clip_len * self.frame_sample_rate) 216 | seg_len = len(vr) // self.num_segment 217 | 218 | all_index = [] 219 | for i in range(self.num_segment): 220 | if seg_len <= converted_len: 221 | index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate) 222 | index = np.concatenate((index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len)) 223 | index = np.clip(index, 0, seg_len - 1).astype(np.int64) 224 | else: 225 | # end_idx = np.random.randint(converted_len, seg_len) 226 | if clip_ind == -1: 227 | end_idx = seg_len - 1 228 | else: 229 | np.random.seed(clip_ind) 230 | end_idx = np.random.randint(converted_len, seg_len) 231 | str_idx = end_idx - converted_len 232 | index = np.linspace(str_idx, end_idx, num=self.clip_len) 233 | index = np.clip(index, str_idx, end_idx - 1).astype(np.int64) 234 | index = index + i*seg_len 235 | all_index.extend(list(index)) 236 | 237 | all_index = all_index[::int(sample_rate_scale)] 238 | vr.seek(0) 239 | buffer = vr.get_batch(all_index).asnumpy() 240 | return buffer 241 | 242 | def __len__(self): 243 | if self.mode != 'test': 244 | return len(self.dataset_samples) 245 | else: 246 | return len(self.test_dataset) 247 | 248 | def get_dataset(cfg, loader=True): 249 | val_dataset = VideoClsDataset(anno_path=cfg.CONFIG.DATA.VAL_ANNO_PATH, 250 | data_path=cfg.CONFIG.DATA.VAL_DATA_PATH, 251 | mode='validation', 252 | use_multigrid=cfg.CONFIG.DATA.MULTIGRID, 253 | clip_len=cfg.CONFIG.DATA.CLIP_LEN, 254 | frame_sample_rate=cfg.CONFIG.DATA.FRAME_RATE, 255 | num_segment=cfg.CONFIG.DATA.NUM_SEGMENT, 256 | num_crop=cfg.CONFIG.DATA.NUM_CROP, 257 | keep_aspect_ratio=cfg.CONFIG.DATA.KEEP_ASPECT_RATIO, 258 | crop_size=cfg.CONFIG.DATA.CROP_SIZE, 259 | short_side_size=cfg.CONFIG.DATA.SHORT_SIDE_SIZE, 260 | new_height=cfg.CONFIG.DATA.NEW_HEIGHT, 261 | new_width=cfg.CONFIG.DATA.NEW_WIDTH) 262 | print ('The length of Dataset is {}.'.format(len(val_dataset))) 263 | if loader: 264 | val_loader = torch.utils.data.DataLoader( 265 | val_dataset, batch_size=cfg.CONFIG.VAL.BATCH_SIZE, shuffle=False, 266 | num_workers=9, sampler=None, pin_memory=True) 267 | return val_loader 268 | else: 269 | return val_dataset -------------------------------------------------------------------------------- /dataset/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import numbers 5 | import collections 6 | import numpy as np 7 | import torch 8 | from PIL import Image, ImageOps 9 | try: 10 | import accimage 11 | except ImportError: 12 | accimage = None 13 | 14 | # the code from 3D-ResNets-PyTorch-master/temporal_transforms.py, spatial_transforms.py and target_transforms.py 15 | 16 | #******************************************** 17 | # temporal_transforms 18 | #******************************************** 19 | # LoopPadding: frame_indices < size, loop 20 | # TemporalBeginCrop: frame_indices[:size] < size, loop, [1:] 21 | # TemporalCenterCrop: frame_indice[center-size/2:center+size/2] < size, loop, [1:] 22 | # TemporalRandomCrop: frame_indice[random_begin,:random_begin+size] < size, loop, [1:] 23 | class LoopPadding(object): 24 | """ 25 | Variable size means that the length of temporal images we wanted. 26 | The length of variable out should be equal with variable size. 27 | If not, LoopPadding the temporal images. 28 | """ 29 | def __init__(self, size): 30 | self.size = size 31 | 32 | def __call__(self, frame_indices): 33 | out = frame_indices[1:self.size+1] 34 | 35 | for index in out: 36 | if len(out) >= self.size: 37 | break 38 | out.append(index) 39 | 40 | return out 41 | 42 | class TemporalBeginCrop(object): 43 | """ 44 | Temporally crop the given frame indices at a beginning. 45 | If the number of frames is less than the size, 46 | loop the indices as many times as necessary to satisfy the size. 47 | Args: 48 | size (int): Desired output size of the crop. 49 | """ 50 | 51 | def __init__(self, size): 52 | self.size = size 53 | 54 | def __call__(self, frame_indices): 55 | out = frame_indices[1:self.size+1] 56 | 57 | for index in out: 58 | if len(out) >= self.size: 59 | break 60 | out.append(index) 61 | return out 62 | 63 | class TemporalCenterCrop(object): 64 | """ 65 | Temporally crop the given frame indices at a center. 66 | If the number of frames is less than the size, 67 | loop the indices as many times as necessary to satisfy the size. 68 | Args: 69 | size (int): Desired output size of the crop. 70 | """ 71 | 72 | def __init__(self, size): 73 | self.size = size 74 | 75 | def __call__(self, frame_indices): 76 | """ 77 | Args: 78 | frame_indices (list): frame indices to be cropped. 79 | Returns: 80 | list: Cropped frame indices. 81 | """ 82 | 83 | center_index = len(frame_indices) // 2 84 | begin_index = max(1, center_index - (self.size // 2)) 85 | end_index = min(begin_index + self.size, len(frame_indices)) 86 | 87 | out = frame_indices[begin_index:end_index] 88 | 89 | for index in out: 90 | if len(out) >= self.size: 91 | break 92 | out.append(index) 93 | 94 | return out 95 | 96 | class TemporalRandomCrop(object): 97 | """ 98 | Temporally crop the given frame indices at a random location. 99 | If the number of frames is less than the size, 100 | loop the indices as many times as necessary to satisfy the size. 101 | Args: 102 | size (int): Desired output size of the crop. 103 | """ 104 | 105 | def __init__(self, size): 106 | self.size = size 107 | 108 | def __call__(self, frame_indices): 109 | """ 110 | Args: 111 | frame_indices (list): frame indices to be cropped. 112 | Returns: 113 | list: Cropped frame indices. 114 | """ 115 | 116 | rand_end = max(1, len(frame_indices) - self.size - 1) 117 | random.seed(1024) 118 | begin_index = random.randint(0, rand_end) 119 | end_index = min(begin_index + self.size, len(frame_indices)) 120 | 121 | out = frame_indices[begin_index:end_index] 122 | 123 | for index in out: 124 | if len(out) >= self.size: 125 | break 126 | out.append(index) 127 | 128 | return out 129 | 130 | #******************************************** 131 | # spatial_transforms 132 | #******************************************** 133 | # spatial_Compose: combine multiple spatial transforms function. 134 | # ToTensor: np.array, Image.image to tensor, H*W*C(0-255) to C*H*W (0.0-1.0), pixel/norm_value, for tensor. 135 | # Normalize: (pixel-mean)/std, for tensor. 136 | # Scale: Keeping aspect ratio unchanged, scaled the smaller side, for Image. 137 | # CenterCrop: for Image. 138 | # RandomHorizontalFlip: Horizontally flip a image by a probability 0.5. 139 | # MultiScaleCornerCrop: Multiple Scales from 4 corners and 1 center. 140 | # MultiScaleRandomCrop: Multiple Scales from random position. 141 | 142 | class spatial_Compose(object): 143 | """Composes several transforms together. 144 | Args: 145 | transforms (list of ``Transform`` objects): list of transforms to compose. 146 | Example: 147 | >>> transforms.Compose([ 148 | >>> transforms.CenterCrop(10), 149 | >>> transforms.ToTensor(), 150 | >>> ]) 151 | """ 152 | 153 | def __init__(self, transforms): 154 | self.transforms = transforms 155 | 156 | def __call__(self, img): 157 | for t in self.transforms: 158 | img = t(img) 159 | return img 160 | 161 | def randomize_parameters(self): 162 | for t in self.transforms: 163 | t.randomize_parameters() 164 | 165 | class ToTensor(object): 166 | """ 167 | Convert a ``PIL.Image`` or ``numpy.ndarray`` to tensor. 168 | Converts a PIL.Image or numpy.ndarray (H x W x C) in the range 169 | [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. 170 | """ 171 | 172 | def __init__(self, norm_value=255): 173 | self.norm_value = norm_value 174 | 175 | def __call__(self, pic): 176 | """ 177 | Args: 178 | pic (PIL.Image or numpy.ndarray): Image to be converted to tensor. 179 | Returns: 180 | Tensor: Converted image. 181 | """ 182 | if isinstance(pic, np.ndarray): 183 | # handle numpy array 184 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 185 | # backward compatibility 186 | return img.float().div(self.norm_value) 187 | 188 | if accimage is not None and isinstance(pic, accimage.Image): 189 | nppic = np.zeros( 190 | [pic.channels, pic.height, pic.width], dtype=np.float32) 191 | pic.copyto(nppic) 192 | return torch.from_numpy(nppic) 193 | 194 | # handle PIL Image 195 | if pic.mode == 'I': 196 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 197 | elif pic.mode == 'I;16': 198 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 199 | else: 200 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) 201 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 202 | if pic.mode == 'YCbCr': 203 | nchannel = 3 204 | elif pic.mode == 'I;16': 205 | nchannel = 1 206 | else: 207 | nchannel = len(pic.mode) 208 | img = img.view(pic.size[1], pic.size[0], nchannel) 209 | # put it from HWC to CHW format 210 | # yikes, this transpose takes 80% of the loading time/CPU 211 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 212 | if isinstance(img, torch.ByteTensor): 213 | return img.float().div(self.norm_value) 214 | else: 215 | return img 216 | 217 | def randomize_parameters(self): 218 | pass 219 | 220 | class Normalize(object): 221 | """ 222 | Normalize an tensor image with mean and standard deviation. 223 | Given mean: (R, G, B) and std: (R, G, B), 224 | will normalize each channel of the torch.*Tensor, i.e. 225 | channel = (channel - mean) / std 226 | Args: 227 | mean (sequence): Sequence of means for R, G, B channels respecitvely. 228 | std (sequence): Sequence of standard deviations for R, G, B channels 229 | respecitvely. 230 | """ 231 | 232 | def __init__(self, mean, std): 233 | self.mean = mean 234 | self.std = std 235 | 236 | def __call__(self, tensor): 237 | """ 238 | Args: 239 | tensor (Tensor): Tensor image of size (C, H, W) to be normalized. 240 | Returns: 241 | Tensor: Normalized image. 242 | """ 243 | # TODO: make efficient 244 | for t, m, s in zip(tensor, self.mean, self.std): 245 | t.sub_(m).div_(s) 246 | return tensor 247 | 248 | def randomize_parameters(self): 249 | pass 250 | 251 | 252 | class Scale(object): 253 | """Rescale the input PIL.Image to the given size. 254 | Args: 255 | size (sequence or int): Desired output size. If size is a sequence like 256 | (w, h), output size will be matched to this. If size is an int, 257 | smaller edge of the image will be matched to this number. 258 | i.e, if height > width, then image will be rescaled to 259 | (size * height / width, size) 260 | interpolation (int, optional): Desired interpolation. Default is 261 | ``PIL.Image.BILINEAR`` 262 | """ 263 | 264 | def __init__(self, size, interpolation=Image.BILINEAR): 265 | assert isinstance(size, 266 | int) or (isinstance(size, collections.Iterable) and 267 | len(size) == 2) 268 | self.size = size 269 | self.interpolation = interpolation 270 | 271 | def __call__(self, img): 272 | """ 273 | Args: 274 | img (PIL.Image): Image to be scaled. 275 | Returns: 276 | PIL.Image: Rescaled image. 277 | """ 278 | if isinstance(self.size, int): 279 | w, h = img.size 280 | if (w <= h and w == self.size) or (h <= w and h == self.size): 281 | return img 282 | if w < h: 283 | ow = self.size 284 | oh = int(self.size * h / w) 285 | return img.resize((ow, oh), self.interpolation) 286 | else: 287 | oh = self.size 288 | ow = int(self.size * w / h) 289 | return img.resize((ow, oh), self.interpolation) 290 | else: 291 | return img.resize(self.size, self.interpolation) 292 | 293 | def randomize_parameters(self): 294 | pass 295 | 296 | 297 | class CenterCrop(object): 298 | """Crops the given PIL.Image at the center. 299 | Args: 300 | size (sequence or int): Desired output size of the crop. If size is an 301 | int instead of sequence like (h, w), a square crop (size, size) is 302 | made. 303 | """ 304 | 305 | def __init__(self, size): 306 | if isinstance(size, numbers.Number): 307 | self.size = (int(size), int(size)) 308 | else: 309 | self.size = size 310 | 311 | def __call__(self, img): 312 | """ 313 | Args: 314 | img (PIL.Image): Image to be cropped. 315 | Returns: 316 | PIL.Image: Cropped image. 317 | """ 318 | w, h = img.size 319 | th, tw = self.size 320 | x1 = int(round((w - tw) / 2.)) 321 | y1 = int(round((h - th) / 2.)) 322 | return img.crop((x1, y1, x1 + tw, y1 + th)) 323 | 324 | def randomize_parameters(self): 325 | pass 326 | 327 | 328 | class CornerCrop(object): 329 | 330 | def __init__(self, size, crop_position=None): 331 | self.size = size 332 | if crop_position is None: 333 | self.randomize = True 334 | else: 335 | self.randomize = False 336 | self.crop_position = crop_position 337 | self.crop_positions = ['c', 'tl', 'tr', 'bl', 'br'] 338 | 339 | def __call__(self, img): 340 | image_width = img.size[0] 341 | image_height = img.size[1] 342 | 343 | if self.crop_position == 'c': 344 | th, tw = (self.size, self.size) 345 | x1 = int(round((image_width - tw) / 2.)) 346 | y1 = int(round((image_height - th) / 2.)) 347 | x2 = x1 + tw 348 | y2 = y1 + th 349 | elif self.crop_position == 'tl': 350 | x1 = 0 351 | y1 = 0 352 | x2 = self.size 353 | y2 = self.size 354 | elif self.crop_position == 'tr': 355 | x1 = image_width - self.size 356 | y1 = 0 357 | x2 = image_width 358 | y2 = self.size 359 | elif self.crop_position == 'bl': 360 | x1 = 0 361 | y1 = image_height - self.size 362 | x2 = self.size 363 | y2 = image_height 364 | elif self.crop_position == 'br': 365 | x1 = image_width - self.size 366 | y1 = image_height - self.size 367 | x2 = image_width 368 | y2 = image_height 369 | 370 | img = img.crop((x1, y1, x2, y2)) 371 | 372 | return img 373 | 374 | def randomize_parameters(self): 375 | if self.randomize: 376 | random.seed(1024) 377 | self.crop_position = self.crop_positions[random.randint( 378 | 0, 379 | len(self.crop_positions) - 1)] 380 | 381 | 382 | class RandomHorizontalFlip(object): 383 | """Horizontally flip the given PIL.Image randomly with a probability of 0.5.""" 384 | 385 | def __call__(self, img): 386 | """ 387 | Args: 388 | img (PIL.Image): Image to be flipped. 389 | Returns: 390 | PIL.Image: Randomly flipped image. 391 | """ 392 | if self.p < 0.5: 393 | return img.transpose(Image.FLIP_LEFT_RIGHT) 394 | return img 395 | 396 | def randomize_parameters(self): 397 | random.seed(1024) 398 | self.p = random.random() 399 | 400 | 401 | class MultiScaleCornerCrop(object): 402 | """Crop the given PIL.Image to randomly selected size. 403 | A crop of size is selected from scales of the original size. 404 | A position of cropping is randomly selected from 4 corners and 1 center. 405 | This crop is finally resized to given size. 406 | Args: 407 | scales: cropping scales of the original size 408 | size: size of the smaller edge 409 | interpolation: Default: PIL.Image.BILINEAR 410 | """ 411 | 412 | def __init__(self, 413 | scales, 414 | size, 415 | interpolation=Image.BILINEAR, 416 | crop_positions=['c', 'tl', 'tr', 'bl', 'br']): 417 | self.scales = scales 418 | self.size = size 419 | self.interpolation = interpolation 420 | 421 | self.crop_positions = crop_positions 422 | 423 | def __call__(self, img): 424 | min_length = min(img.size[0], img.size[1]) 425 | crop_size = int(min_length * self.scale) 426 | 427 | image_width = img.size[0] 428 | image_height = img.size[1] 429 | 430 | if self.crop_position == 'c': 431 | center_x = image_width // 2 432 | center_y = image_height // 2 433 | box_half = crop_size // 2 434 | x1 = center_x - box_half 435 | y1 = center_y - box_half 436 | x2 = center_x + box_half 437 | y2 = center_y + box_half 438 | elif self.crop_position == 'tl': 439 | x1 = 0 440 | y1 = 0 441 | x2 = crop_size 442 | y2 = crop_size 443 | elif self.crop_position == 'tr': 444 | x1 = image_width - crop_size 445 | y1 = 0 446 | x2 = image_width 447 | y2 = crop_size 448 | elif self.crop_position == 'bl': 449 | x1 = 0 450 | y1 = image_height - crop_size 451 | x2 = crop_size 452 | y2 = image_height 453 | elif self.crop_position == 'br': 454 | x1 = image_width - crop_size 455 | y1 = image_height - crop_size 456 | x2 = image_width 457 | y2 = image_height 458 | 459 | img = img.crop((x1, y1, x2, y2)) 460 | 461 | return img.resize((self.size, self.size), self.interpolation) 462 | 463 | def randomize_parameters(self): 464 | random.seed(1024) 465 | self.scale = self.scales[random.randint(0, len(self.scales) - 1)] 466 | random.seed(1024) 467 | self.crop_position = self.crop_positions[random.randint( 468 | 0, 469 | len(self.crop_positions) - 1)] 470 | 471 | 472 | class MultiScaleRandomCrop(object): 473 | """ 474 | Crop the given PIL.Image to randomly selected size. 475 | A position of cropping is randomly selected. 476 | """ 477 | def __init__(self, scales, size, interpolation=Image.BILINEAR): 478 | self.scales = scales 479 | self.size = size 480 | self.interpolation = interpolation 481 | 482 | def __call__(self, img): 483 | min_length = min(img.size[0], img.size[1]) 484 | crop_size = int(min_length * self.scale) 485 | 486 | image_width = img.size[0] 487 | image_height = img.size[1] 488 | 489 | x1 = self.tl_x * (image_width - crop_size) 490 | y1 = self.tl_y * (image_height - crop_size) 491 | x2 = x1 + crop_size 492 | y2 = y1 + crop_size 493 | 494 | img = img.crop((x1, y1, x2, y2)) 495 | 496 | return img.resize((self.size, self.size), self.interpolation) 497 | 498 | def randomize_parameters(self): 499 | self.scale = self.scales[random.randint(0, len(self.scales) - 1)] 500 | random.seed(1024) 501 | self.tl_x = random.random() 502 | random.seed(1024) 503 | self.tl_y = random.random() 504 | -------------------------------------------------------------------------------- /dataset/ucf101.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import pandas as pd 4 | import os 5 | import math 6 | import functools 7 | import json 8 | import copy 9 | import numpy as np 10 | from .transforms import * 11 | import pickle as pkl 12 | 13 | from utils import UCF_DATA_ROOT 14 | 15 | def pil_loader(path): 16 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 17 | with open(path, 'rb') as f: 18 | with Image.open(f) as img: 19 | return img.convert('RGB') 20 | 21 | 22 | def accimage_loader(path): 23 | try: 24 | import accimage 25 | return accimage.Image(path) 26 | except IOError: 27 | # Potentially a decoding problem, fall back to PIL.Image 28 | return pil_loader(path) 29 | 30 | 31 | def get_default_image_loader(): 32 | from torchvision import get_image_backend 33 | if get_image_backend() == 'accimage': 34 | return accimage_loader 35 | else: 36 | return pil_loader 37 | 38 | 39 | def video_loader(video_dir_path, frame_indices, image_loader): 40 | video = [] 41 | for i in frame_indices: 42 | image_path = os.path.join(video_dir_path, 'image_{:05d}.jpg'.format(i)) 43 | if os.path.exists(image_path): 44 | video.append(image_loader(image_path)) 45 | else: 46 | return video 47 | 48 | return video 49 | 50 | 51 | def get_default_video_loader(): 52 | image_loader = get_default_image_loader() 53 | return functools.partial(video_loader, image_loader=image_loader) 54 | 55 | 56 | class attack_ucf101(data.Dataset): 57 | def __init__(self, setting_path, idx_path, spatial_transform=None, temporal_transform=None,get_loader=get_default_video_loader): 58 | setting = setting_path 59 | self.clips = self._make_dataset(setting) 60 | self.spatial_transform = spatial_transform 61 | self.temporal_transform = temporal_transform 62 | self.loader = get_loader() 63 | with open(idx_path, 'rb') as ipt: 64 | used_idxs = pkl.load(ipt) 65 | self.new_clips = [] 66 | for i in used_idxs: 67 | self.new_clips.append(self.clips[i]) 68 | self.clips = self.new_clips 69 | print ('length', len(self.clips)) 70 | 71 | def __getitem__(self, index): 72 | directory, duration, target = self.clips[index] 73 | frame_indices = list(range(1, duration + 1)) 74 | 75 | if self.temporal_transform is not None: 76 | frame_indices = self.temporal_transform(frame_indices) 77 | 78 | clip = self.loader(directory, frame_indices) 79 | 80 | if self.spatial_transform is not None: 81 | self.spatial_transform.randomize_parameters() 82 | clip = [self.spatial_transform(img) for img in clip] 83 | 84 | clip = torch.stack(clip, 0).permute(1, 0, 2, 3) 85 | 86 | return clip, target 87 | 88 | def _make_dataset(self, setting): 89 | if not os.path.exists(setting): 90 | raise(RuntimeError("Setting file %s doesn't exist. Check opt.train-list and opt.val-list. " % (setting))) 91 | clips = [] 92 | df = pd.read_csv(setting) 93 | for idx, row in df.iterrows(): 94 | clip_path = os.path.join(UCF_DATA_ROOT, row['path'].split('/')[-1]) 95 | duration = int(row['duration']) 96 | target = int(row['target']) 97 | item = (clip_path, duration, target) 98 | clips.append(item) 99 | return clips 100 | 101 | def __len__(self): 102 | return len(self.clips) 103 | 104 | def test_transform(): 105 | input_size = 224 106 | scale_ratios = '1.0, 0.8' 107 | scale_ratios = [float(i) for i in scale_ratios.split(',')] 108 | default_mean = [0.485, 0.456, 0.406] 109 | default_std = [0.229, 0.224, 0.225] 110 | norm_method = Normalize(default_mean, default_std) 111 | spatial_transform = spatial_Compose([ 112 | Scale(int(input_size / 1.0)), 113 | CornerCrop(input_size, 'c'), 114 | ToTensor(), norm_method 115 | ]) 116 | temporal_transform = LoopPadding(32) 117 | return spatial_transform, temporal_transform 118 | 119 | def get_dataset(setting_path, idx_path, test_batch_size, loader=True): 120 | test_spa_trans, test_temp_trans = test_transform() 121 | test_dataset = attack_ucf101(setting_path, idx_path, spatial_transform=test_spa_trans, temporal_transform=test_temp_trans) 122 | val_loader = torch.utils.data.DataLoader( 123 | test_dataset, batch_size=test_batch_size, shuffle=False, 124 | num_workers=9, pin_memory=True) 125 | return val_loader -------------------------------------------------------------------------------- /kinetics400_attack_samples.csv: -------------------------------------------------------------------------------- 1 | path,gt_label,clip_index 2 | abseiling/YqTT34PsD5c_000003_000013.mp4,0,-1 3 | air drumming/--nQbRBEz2s_000104_000114.mp4,1,-1 4 | answering questions/AqPeHqTDfGE_000068_000078.mp4,2,-1 5 | applauding/ieIq7ym_UXQ_000007_000017.mp4,3,-1 6 | applying cream/rFee2NCkWQE_000013_000023.mp4,4,-1 7 | archery/FzfqEd36YbY_001101_001111.mp4,5,-1 8 | arm wrestling/_Lo3hFbum_o_000005_000015.mp4,6,-1 9 | arranging flowers/cuOsRai-HCE_000126_000136.mp4,7,-1 10 | assembling computer/CB1iIWtDpSI_000431_000441.mp4,8,-1 11 | auctioning/6jcDGC4LF5s_001022_001032.mp4,9,-1 12 | baby waking up/X66nHKtYtt0_000008_000018.mp4,10,-1 13 | baking cookies/LG2hEf9ueAM_000178_000188.mp4,11,-1 14 | balloon blowing/qjyGo-e_d6I_000076_000086.mp4,12,-1 15 | bandaging/TwSUMZOrLyE_000139_000149.mp4,13,-1 16 | barbequing/_7mBVhDgiO8_000019_000029.mp4,14,-1 17 | bartending/TuCc2RwG2fM_000354_000364.mp4,15,-1 18 | beatboxing/rYnVViYbae0_000000_000010.mp4,16,-1 19 | bee keeping/EpbYRgIsQRg_000018_000028.mp4,17,-1 20 | belly dancing/cMVkWCb3fE8_000103_000113.mp4,18,-1 21 | bench pressing/ehxWC3nDZC8_000003_000013.mp4,19,-1 22 | bending back/DA8K3c4HgVo_000003_000013.mp4,20,-1 23 | bending metal/U-iQuIgd5ps_000478_000488.mp4,21,-1 24 | biking through snow/g3GJvDqtfys_000031_000041.mp4,22,-1 25 | blasting sand/NAqrwQ54ptY_000028_000038.mp4,23,-1 26 | blowing glass/p2NEs8gon0k_000381_000391.mp4,24,-1 27 | blowing leaves/wKgo6AS5C80_000044_000054.mp4,25,-1 28 | blowing nose/gu0QuD4zpzg_000030_000040.mp4,26,-1 29 | blowing out candles/9IzWImcF3hM_000032_000042.mp4,27,-1 30 | bobsledding/uftReOMM9-A_000063_000073.mp4,28,-1 31 | bookbinding/hmoPcSFBYPY_000222_000232.mp4,29,-1 32 | bouncing on trampoline/bekao5nG02M_000024_000034.mp4,30,-1 33 | bowling/8pBjZcOc8MY_000096_000106.mp4,31,-1 34 | braiding hair/-dLVSg5JvxY_000022_000032.mp4,32,-1 35 | breading or breadcrumbing/-_3E3GBXAUc_000010_000020.mp4,33,-1 36 | breakdancing/4T2F4PQ97GE_000008_000018.mp4,34,-1 37 | brush painting/YHUeGa8Eu70_000225_000235.mp4,35,-1 38 | brushing hair/9LnZrptwj6Q_000314_000324.mp4,36,-1 39 | brushing teeth/8NStNQyjIXI_000054_000064.mp4,37,-1 40 | building cabinet/CG9DKR4lPC0_001821_001831.mp4,38,-1 41 | building shed/6q-XsQgZ8_w_000044_000054.mp4,39,-1 42 | bungee jumping/7Goki93f5mo_000018_000028.mp4,40,-1 43 | busking/CU6MFCvEct0_000016_000026.mp4,41,-1 44 | canoeing or kayaking/cdDu63UKbu0_000164_000174.mp4,42,-1 45 | capoeira/lm6ibanrGK8_000000_000010.mp4,43,-1 46 | carrying baby/ztAfXKZ0ovM_000141_000151.mp4,44,-1 47 | cartwheeling/EiZvgwrHCMk_000000_000010.mp4,45,-1 48 | carving pumpkin/oPoLYdOTOt0_000000_000010.mp4,46,-1 49 | catching fish/dpfJTo3nywA_000028_000038.mp4,47,-1 50 | catching or throwing baseball/AsPjORZU-cU_000055_000065.mp4,48,-1 51 | catching or throwing frisbee/RxgW7Hdn4YM_000006_000016.mp4,49,-1 52 | catching or throwing softball/kU3qQGVRT-g_000011_000021.mp4,50,-1 53 | celebrating/2lBUaUBD9JE_000018_000028.mp4,51,-1 54 | changing oil/-aJHPlJTesM_000734_000744.mp4,52,-1 55 | changing wheel/EQNTFw62uh8_000251_000261.mp4,53,-1 56 | checking tires/wUQduZ3i-VM_000275_000285.mp4,54,-1 57 | cheerleading/6LOV6-dkNZE_000251_000261.mp4,55,-1 58 | chopping wood/nyFulYDEKFs_000017_000027.mp4,56,-1 59 | clapping/M9NORCUCrtE_000003_000013.mp4,57,-1 60 | clay pottery making/PP7MtP6BMkY_000193_000203.mp4,58,-1 61 | clean and jerk/R6pk7NDa7Mw_000015_000025.mp4,59,-1 62 | cleaning floor/vVlrGgL9dxk_000004_000014.mp4,60,-1 63 | cleaning gutters/22xdXMMq6XE_000040_000050.mp4,61,-1 64 | cleaning pool/AFvEYQkSmfk_000123_000133.mp4,62,-1 65 | cleaning shoes/WJEGNo9YETM_000203_000213.mp4,63,-1 66 | cleaning toilet/BjS2g1oZj_s_000065_000075.mp4,64,-1 67 | cleaning windows/OiN3AgBVB80_000003_000013.mp4,65,-1 68 | climbing a rope/NfH4FZhrtvE_000002_000012.mp4,66,-1 69 | climbing ladder/70Er7J3srS0_000001_000011.mp4,67,-1 70 | climbing tree/aM1AgHyvm4E_000017_000027.mp4,68,-1 71 | contact juggling/yymr4YWVFe4_000046_000056.mp4,69,-1 72 | cooking chicken/pj8TWS7KEeY_000024_000034.mp4,70,-1 73 | cooking egg/Ao3M2TPI3sQ_000294_000304.mp4,71,-1 74 | cooking on campfire/BQfDmW1Nodk_000002_000012.mp4,72,-1 75 | cooking sausages/52AOa09jJWs_000195_000205.mp4,73,-1 76 | counting money/kPCbWDyAcFE_000000_000010.mp4,74,-1 77 | country line dancing/suHCOVoGPMU_000475_000485.mp4,75,-1 78 | cracking neck/j_EiZph3YKE_000001_000011.mp4,76,-1 79 | crawling baby/GpPvqvsqGy0_000006_000016.mp4,77,-1 80 | crossing river/luTkBLIT6lU_000036_000046.mp4,78,-1 81 | crying/zCEEKnSB_RU_000000_000010.mp4,79,-1 82 | curling hair/gwNMVUlBUtY_000068_000078.mp4,80,-1 83 | cutting nails/es35biYvLRA_000020_000030.mp4,81,-1 84 | cutting pineapple/T5jQWQg2eNc_000000_000010.mp4,82,-1 85 | cutting watermelon/LBgRTCVwyik_000042_000052.mp4,83,-1 86 | dancing ballet/s_gGtYIrtsc_000118_000128.mp4,84,-1 87 | dancing charleston/FQpLIyAfbqI_000023_000033.mp4,85,-1 88 | dancing gangnam style/o_TIgx4gb_M_000023_000033.mp4,86,-1 89 | dancing macarena/dXIyWMidYa0_000008_000018.mp4,87,-1 90 | deadlifting/zvamd5T7yj8_000001_000011.mp4,88,-1 91 | decorating the christmas tree/kQDSa-xhsLY_000035_000045.mp4,89,-1 92 | digging/42Vx9FGzmkM_000075_000085.mp4,90,-1 93 | dining/-vOrVT1CiPQ_000080_000090.mp4,91,-1 94 | disc golfing/_owWHGvn_b0_000112_000122.mp4,92,-1 95 | diving cliff/1MmjE51PeIE_000015_000025.mp4,93,-1 96 | dodgeball/wFIuMu2w9pA_000010_000020.mp4,94,-1 97 | doing aerobics/-53DvfE42gE_001767_001777.mp4,95,-1 98 | doing laundry/qkd7laDeom0_000098_000108.mp4,96,-1 99 | doing nails/UixL7lHSHR8_000040_000050.mp4,97,-1 100 | drawing/IPmic5VRb7I_000066_000076.mp4,98,-1 101 | dribbling basketball/qoODmONT1a0_000019_000029.mp4,99,-1 102 | drinking/15FiZ48tTUU_000045_000055.mp4,100,-1 103 | drinking beer/382B3Q3xttk_000000_000010.mp4,101,-1 104 | drinking shots/o1hqepKau4A_000004_000014.mp4,102,-1 105 | driving car/NUG7kwJ-614_000400_000410.mp4,103,-1 106 | driving tractor/WtnQKvOuukE_000081_000091.mp4,104,-1 107 | drop kicking/pvuiN-G8-yc_000000_000010.mp4,105,-1 108 | drumming fingers/eap32WOJcAU_000108_000118.mp4,106,-1 109 | dunking basketball/WC2FOUSNyvE_000006_000016.mp4,107,-1 110 | dying hair/-7E9WiX7QfA_000053_000063.mp4,108,-1 111 | eating burger/w9G7CpkBBM0_000000_000010.mp4,109,-1 112 | eating cake/8QhblWHnNAY_000019_000029.mp4,110,-1 113 | eating carrots/V4IaThkaK6Y_000025_000035.mp4,111,-1 114 | eating chips/I5Y53-Q9KRo_000444_000454.mp4,112,-1 115 | eating doughnuts/HyUF0Uo0f2A_000077_000087.mp4,113,-1 116 | eating hotdog/FTOgHjhqlhU_000054_000064.mp4,114,-1 117 | eating ice cream/0fCDlKYkRxc_000081_000091.mp4,115,-1 118 | eating spaghetti/DiSP2oDGQ1Q_000014_000024.mp4,116,-1 119 | eating watermelon/pLA62YSoEoM_000002_000012.mp4,117,-1 120 | egg hunting/U9vSW3-zJ9s_000007_000017.mp4,118,-1 121 | exercising arm/0wZpjStZtUY_000001_000011.mp4,119,-1 122 | exercising with an exercise ball/oj7Qgyz5KK8_000143_000153.mp4,120,-1 123 | extinguishing fire/BVXG_JOh9jQ_000002_000012.mp4,121,-1 124 | faceplanting/petld-72OXM_000001_000011.mp4,122,-1 125 | feeding birds/QJSwBNxKYqg_000120_000130.mp4,123,-1 126 | feeding fish/ZtkTAHzih9Q_000084_000094.mp4,124,-1 127 | feeding goats/v5Bl68y5ra0_000006_000016.mp4,125,-1 128 | filling eyebrows/XycmcISYPA8_000045_000055.mp4,126,-1 129 | finger snapping/j6qYhS2W1fM_000001_000011.mp4,127,-1 130 | fixing hair/-65aI53dvdE_000022_000032.mp4,128,-1 131 | flipping pancake/HIBxq2P0BL0_000004_000014.mp4,129,-1 132 | flying kite/hAQJ9GHklS4_000004_000014.mp4,130,-1 133 | folding clothes/HvbmGxDuNxs_000035_000045.mp4,131,-1 134 | folding napkins/iCtT6ZadoOM_000052_000062.mp4,132,-1 135 | folding paper/soHl6SrXlEI_000105_000115.mp4,133,-1 136 | front raises/ObO_Gnw1nOQ_000005_000015.mp4,134,-1 137 | frying vegetables/1IDdvXnTI60_000123_000133.mp4,135,-1 138 | garbage collecting/KxTIEKllIzg_000114_000124.mp4,136,-1 139 | gargling/HAPBKE3Qo5A_000217_000227.mp4,137,-1 140 | getting a haircut/lVwFn9m8Q_Q_000053_000063.mp4,138,-1 141 | getting a tattoo/g8dOsqPBe7A_000657_000667.mp4,139,-1 142 | giving or receiving award/LmuS2GreXkc_000033_000043.mp4,140,-1 143 | golf chipping/NIf0bxodA9E_000120_000130.mp4,141,-1 144 | golf driving/1Q-E6UW1XE8_000011_000021.mp4,142,-1 145 | golf putting/VS9uEOvJhzg_000000_000010.mp4,143,-1 146 | grinding meat/SErnxQf4ONQ_000230_000240.mp4,144,-1 147 | grooming dog/Q9mt0lJjQUA_000105_000115.mp4,145,-1 148 | grooming horse/kaVWY-GyXcs_000063_000073.mp4,146,-1 149 | gymnastics tumbling/mlzx2bi9nwQ_000059_000069.mp4,147,-1 150 | hammer throw/WUrwglFhY64_000002_000012.mp4,148,-1 151 | headbanging/ZhDdQmHIM78_000044_000054.mp4,149,-1 152 | high jump/M2j1BTibIzs_000000_000010.mp4,151,-1 153 | high kick/NdjLKFhn9j0_000004_000014.mp4,152,-1 154 | hitting baseball/e8uB0GZsVOQ_000034_000044.mp4,153,-1 155 | hockey stop/Nrscg8fLYqY_000049_000059.mp4,154,-1 156 | holding snake/6cbXqLP0FHE_000002_000012.mp4,155,-1 157 | hopscotch/vxp0SOd2W1E_000002_000012.mp4,156,-1 158 | hoverboarding/E1Smsuf6cpE_000147_000157.mp4,157,-1 159 | hugging/xWyOTDxm9yQ_000009_000019.mp4,158,-1 160 | hula hooping/UjfYNVaZ39Y_000087_000097.mp4,159,-1 161 | hurdling/Xa6gI4yGLQo_000000_000010.mp4,160,-1 162 | hurling (sport)/ml2eBC_nXrw_000055_000065.mp4,161,-1 163 | ice climbing/UM1fUqvFnME_000048_000058.mp4,162,-1 164 | ice fishing/GO6YI36E_Do_000140_000150.mp4,163,-1 165 | ice skating/vMZLTP9MfZ4_000008_000018.mp4,164,-1 166 | ironing/ZgHZ0KgFOSc_000215_000225.mp4,165,-1 167 | javelin throw/E5xdkQvnhkc_000002_000012.mp4,166,-1 168 | jetskiing/Be59Cot2yGI_000233_000243.mp4,167,-1 169 | jogging/kBUt5duOHFU_000005_000015.mp4,168,-1 170 | juggling balls/YH801xSLkZM_000000_000010.mp4,169,-1 171 | juggling fire/TA2mmXre8HQ_000000_000010.mp4,170,-1 172 | juggling soccer ball/WAPctsQ-SwM_000000_000010.mp4,171,-1 173 | jumping into pool/kjzgLLaYO8w_000010_000020.mp4,172,-1 174 | jumpstyle dancing/QeG2HREr6m0_000003_000013.mp4,173,-1 175 | kicking field goal/sR0oOq-qOqs_000015_000025.mp4,174,-1 176 | kicking soccer ball/5PML0iLnBD8_000003_000013.mp4,175,-1 177 | kissing/LmPjkroyPcY_000739_000749.mp4,176,-1 178 | kitesurfing/KOOfe61BIyE_000023_000033.mp4,177,-1 179 | knitting/bCa_5xZa4Ug_002346_002356.mp4,178,-1 180 | krumping/3JxrK2Jt52Y_000754_000764.mp4,179,-1 181 | laughing/UpVXo5Q9JKk_000079_000089.mp4,180,-1 182 | laying bricks/N4HdEYIci0I_000037_000047.mp4,181,-1 183 | long jump/MrlWkj87rfU_000002_000012.mp4,182,-1 184 | lunge/g-XXUD65DyI_000003_000013.mp4,183,-1 185 | making a cake/bX6I6jVAQMI_000028_000038.mp4,184,-1 186 | making a sandwich/jofgWiVBwqo_000086_000096.mp4,185,-1 187 | making bed/yD42KW6cm-A_000820_000830.mp4,186,-1 188 | making jewelry/wMWkwQ7HXik_000616_000626.mp4,187,-1 189 | making pizza/wxgqu30nSLE_000000_000010.mp4,188,-1 190 | making snowman/8kN7EyPBmrI_000082_000092.mp4,189,-1 191 | making sushi/Ah2YqA7bmHY_000055_000065.mp4,190,-1 192 | making tea/hs2MVCM2LdY_000043_000053.mp4,191,-1 193 | marching/_h60EbUbh3I_000026_000036.mp4,192,-1 194 | massaging back/zsJ2PmhGM98_000215_000225.mp4,193,-1 195 | massaging feet/BwMKdpNAmy4_000090_000100.mp4,194,-1 196 | massaging legs/0EJXIQ1ltjo_000013_000023.mp4,195,-1 197 | massaging person's head/z-6l_dkR3vE_000299_000309.mp4,196,-1 198 | milking cow/DdUTLqyZ5b8_000044_000054.mp4,197,-1 199 | mopping floor/-F-aEPmjERo_000043_000053.mp4,198,-1 200 | motorcycling/kthzjAS1XS8_000009_000019.mp4,199,-1 201 | moving furniture/b9vF-F1LC5g_000003_000013.mp4,200,-1 202 | mowing lawn/t5SHfHDj0uw_000006_000016.mp4,201,-1 203 | news anchoring/xJMgxnXI0GY_000000_000010.mp4,202,-1 204 | opening bottle/gWd5AU5wP0k_000041_000051.mp4,203,-1 205 | opening present/vd4uGb1162o_000002_000012.mp4,204,-1 206 | paragliding/GF4WEdN_H0s_000191_000201.mp4,205,-1 207 | parasailing/GuClMEvE3gM_000055_000065.mp4,206,-1 208 | parkour/ptgKO940ISM_000042_000052.mp4,207,-1 209 | passing American football (in game)/ixMPVi3Zr9s_000001_000011.mp4,208,-1 210 | passing American football (not in game)/RxO7IEU7_I8_000391_000401.mp4,209,-1 211 | peeling apples/8qEAQXckcVw_000003_000013.mp4,210,-1 212 | peeling potatoes/_3CsQJ6XpHo_000015_000025.mp4,211,-1 213 | petting animal (not cat)/tlWjTLpoWLw_000000_000010.mp4,212,-1 214 | petting cat/q1GijBRBqjE_000203_000213.mp4,213,-1 215 | picking fruit/NTfCraM0XyM_000257_000267.mp4,214,-1 216 | planting trees/_WzkPBxP-5g_000096_000106.mp4,215,-1 217 | plastering/mdN9BDP0cVY_000032_000042.mp4,216,-1 218 | playing accordion/syp1O0cjens_000038_000048.mp4,217,-1 219 | playing badminton/tJz980bJ3UI_000065_000075.mp4,218,-1 220 | playing bagpipes/fMeaggq0_rA_000032_000042.mp4,219,-1 221 | playing basketball/3mIvIgAlniY_000001_000011.mp4,220,-1 222 | playing bass guitar/HqsAvuo5XhA_000059_000069.mp4,221,-1 223 | playing cards/IVP8pO4Q8Hs_000084_000094.mp4,222,-1 224 | playing cello/rsN982-8cvg_000042_000052.mp4,223,-1 225 | playing chess/xFq-OJ8HDJs_000185_000195.mp4,224,-1 226 | playing clarinet/7g4aL1EX8EI_001210_001220.mp4,225,-1 227 | playing controller/gxbUZcsy4EA_000097_000107.mp4,226,-1 228 | playing cricket/lgPslaxBQt0_000000_000010.mp4,227,-1 229 | playing cymbals/--Y25nDn2Wk_000060_000070.mp4,228,-1 230 | playing didgeridoo/2ezT7E6g8Ew_000044_000054.mp4,229,-1 231 | playing drums/kXhnTK9TVsU_000076_000086.mp4,230,-1 232 | playing flute/wqYzrDwV_o4_000047_000057.mp4,231,-1 233 | playing guitar/ysjCIR7SkJU_000141_000151.mp4,232,-1 234 | playing harmonica/DpJQShJs2kI_000036_000046.mp4,233,-1 235 | playing harp/Ud-INZAw5Ik_000163_000173.mp4,234,-1 236 | playing ice hockey/kRWk_-5d5bs_000010_000020.mp4,235,-1 237 | playing keyboard/vxVoptVwZp4_000027_000037.mp4,236,-1 238 | playing kickball/d5TMlt6P-ug_000317_000327.mp4,237,-1 239 | playing monopoly/SsAtR4oD7WY_000000_000010.mp4,238,-1 240 | playing organ/b9TfeDnfemw_000047_000057.mp4,239,-1 241 | playing paintball/DOL1_JLWeoo_000321_000331.mp4,240,-1 242 | playing piano/l4zZtMgNPvU_000009_000019.mp4,241,-1 243 | playing poker/-0NQHRndkPI_000004_000014.mp4,242,-1 244 | playing recorder/Zl_ey-UqwpY_000001_000011.mp4,243,-1 245 | playing saxophone/K06EmNd6t_I_000006_000016.mp4,244,-1 246 | playing squash or racquetball/-yUM3WwKQHM_000032_000042.mp4,245,-1 247 | playing tennis/VoAJFfutNlg_000060_000070.mp4,246,-1 248 | playing trombone/vqNbapex1kU_000015_000025.mp4,247,-1 249 | playing trumpet/-BtzVCzSnLk_000073_000083.mp4,248,-1 250 | playing ukulele/vE6Cnt7XJrg_000026_000036.mp4,249,-1 251 | playing violin/t2XntpSO4Yo_000105_000115.mp4,250,-1 252 | playing volleyball/5Wle9ClW4q0_000170_000180.mp4,251,-1 253 | playing xylophone/N586DnjSCxo_000000_000010.mp4,252,-1 254 | pole vault/9g4Sf8aWIx8_000003_000013.mp4,253,-1 255 | presenting weather forecast/lVSiCfeBP8I_000152_000162.mp4,254,-1 256 | pull ups/yLVMDD7b0xM_000020_000030.mp4,255,-1 257 | pumping fist/V-IqR1THKr4_000015_000025.mp4,256,-1 258 | pumping gas/eanhmmKIolc_000044_000054.mp4,257,-1 259 | punching bag/3baFNAxC2YI_000012_000022.mp4,258,-1 260 | punching person (boxing)/D5iLGttoHr4_000022_000032.mp4,259,-1 261 | push up/-B2oGkg1qSI_000012_000022.mp4,260,-1 262 | pushing car/-46DNkpyApI_000045_000055.mp4,261,-1 263 | pushing cart/p9CIcEEaSEk_000001_000011.mp4,262,-1 264 | pushing wheelchair/5gQlgNS5qfY_000023_000033.mp4,263,-1 265 | reading book/XoO1uEVNgjM_000058_000068.mp4,264,-1 266 | reading newspaper/gKqKWn6Nl0A_000035_000045.mp4,265,-1 267 | recording music/864rV9vdAK4_000577_000587.mp4,266,-1 268 | riding a bike/Ig-eRsgi6CU_000339_000349.mp4,267,-1 269 | riding camel/bGzjObGU_qM_000014_000024.mp4,268,-1 270 | riding elephant/j06vowPye30_000009_000019.mp4,269,-1 271 | riding mechanical bull/eJpkgBaykQ8_000029_000039.mp4,270,-1 272 | riding mountain bike/O95dOpT9T-c_000039_000049.mp4,271,-1 273 | riding mule/azD58bwAe7E_000003_000013.mp4,272,-1 274 | riding or walking with horse/C9pFs8sDARw_000218_000228.mp4,273,-1 275 | riding scooter/FGCNMNjanO4_000013_000023.mp4,274,-1 276 | riding unicycle/9RN16I79P9U_000000_000010.mp4,275,-1 277 | ripping paper/-Ovwq0kVUx4_000002_000012.mp4,276,-1 278 | robot dancing/5hQW4BHjWvM_000061_000071.mp4,277,-1 279 | rock climbing/2jXlO2nzHGE_000026_000036.mp4,278,-1 280 | rock scissors paper/Kxbdg32t6bU_000001_000011.mp4,279,-1 281 | roller skating/_vjX5nPwTBs_000072_000082.mp4,280,-1 282 | running on treadmill/BrKuhHIHccg_000049_000059.mp4,281,-1 283 | sailing/h1SM1ArgB0E_000034_000044.mp4,282,-1 284 | salsa dancing/WW4N7GToB5I_000313_000323.mp4,283,-1 285 | sanding floor/EbXC4bGpZ4M_000034_000044.mp4,284,-1 286 | scrambling eggs/ojJpJZpACdE_000245_000255.mp4,285,-1 287 | scuba diving/64BhyrIZkz0_000002_000012.mp4,286,-1 288 | setting table/tSQqcJqGplA_000011_000021.mp4,287,-1 289 | shaking hands/lCQ17mGZeVE_000029_000039.mp4,288,-1 290 | shaking head/WUOxNQKdRMM_000065_000075.mp4,289,-1 291 | sharpening knives/iaQPoVg8Xtw_000468_000478.mp4,290,-1 292 | sharpening pencil/ZMMCn1JE0Vc_000001_000011.mp4,291,-1 293 | shaving head/K1C_jI8z1F8_000261_000271.mp4,292,-1 294 | shaving legs/zvjNnDhUTxE_000034_000044.mp4,293,-1 295 | shearing sheep/Vaff3l43A40_000018_000028.mp4,294,-1 296 | shining shoes/HhW13wPky1U_000556_000566.mp4,295,-1 297 | shooting basketball/Y3oHAIylSrg_000031_000041.mp4,296,-1 298 | shooting goal (soccer)/ezJdtQzJ7qI_000021_000031.mp4,297,-1 299 | shot put/SQddPtgoQGE_000007_000017.mp4,298,-1 300 | shoveling snow/SZRDWgGOpXY_000062_000072.mp4,299,-1 301 | shredding paper/KXyOXrWiJGY_000022_000032.mp4,300,-1 302 | shuffling cards/_k0w_3JFfmE_000026_000036.mp4,301,-1 303 | side kick/sZ8JiPfAoWc_000005_000015.mp4,302,-1 304 | sign language interpreting/fKvqQEGGf6E_000031_000041.mp4,303,-1 305 | singing/FZrg29zsAe8_000023_000033.mp4,304,-1 306 | situp/jTMZX30XTXA_000072_000082.mp4,305,-1 307 | skateboarding/kIzdzzMLCJI_000199_000209.mp4,306,-1 308 | ski jumping/XQUsRpJ1A_Y_000001_000011.mp4,307,-1 309 | skiing (not slalom or crosscountry)/fRiYQEVMcEc_000000_000010.mp4,308,-1 310 | skiing crosscountry/pfvt6iYSXXw_000764_000774.mp4,309,-1 311 | skiing slalom/Ch_wt_nV2k4_000702_000712.mp4,310,-1 312 | skipping rope/xzXJJIni2hQ_000238_000248.mp4,311,-1 313 | skydiving/5uw3m1tIvJ0_000057_000067.mp4,312,-1 314 | slacklining/iLj4i4fTzn0_000038_000048.mp4,313,-1 315 | sled dog racing/yMQMfdV-Fzs_000025_000035.mp4,315,-1 316 | smoking/xo_9xPRu7_4_000114_000124.mp4,316,-1 317 | smoking hookah/C_QP4vOVTrE_000164_000174.mp4,317,-1 318 | snatch weight lifting/827ciUyYK5k_000000_000010.mp4,318,-1 319 | sneezing/ce6aUvCKpbU_000000_000010.mp4,319,-1 320 | sniffing/u2s0kiGG7AU_000011_000021.mp4,320,-1 321 | snorkeling/rsDfe_ikY1I_000010_000020.mp4,321,-1 322 | snowboarding/4cjhTsZjNP8_000202_000212.mp4,322,-1 323 | snowkiting/ToDS3RIVybY_000025_000035.mp4,323,-1 324 | snowmobiling/EgkRnTkj8gc_000003_000013.mp4,324,-1 325 | somersaulting/hFYg1xqG5yk_000154_000164.mp4,325,-1 326 | spinning poi/by9gw0ipuUg_000002_000012.mp4,326,-1 327 | spray painting/zyR32Dm9yek_000019_000029.mp4,327,-1 328 | spraying/LJQX3Atdn4k_000043_000053.mp4,328,-1 329 | springboard diving/T3b1nxhG9Lo_000026_000036.mp4,329,-1 330 | squat/ENkU87uTdfU_000025_000035.mp4,330,-1 331 | sticking tongue out/E6ZgFC1L178_000041_000051.mp4,331,-1 332 | stomping grapes/gzAmaRypLyI_000062_000072.mp4,332,-1 333 | stretching arm/qKiTc6GGT4c_000036_000046.mp4,333,-1 334 | stretching leg/-hkrPB2YU50_000612_000622.mp4,334,-1 335 | strumming guitar/-2GJPqAglxU_000862_000872.mp4,335,-1 336 | surfing crowd/tpruGil1UCs_000038_000048.mp4,336,-1 337 | surfing water/-G_tgkmqChg_000072_000082.mp4,337,-1 338 | sweeping floor/bHy05OAiL1g_000027_000037.mp4,338,-1 339 | swimming backstroke/7aGVsi5ZgMI_000023_000033.mp4,339,-1 340 | swimming breast stroke/BX-dyfoGFsE_000168_000178.mp4,340,-1 341 | swimming butterfly stroke/aAYaI35qR5Q_000005_000015.mp4,341,-1 342 | swing dancing/LVyo14Q5PmY_000006_000016.mp4,342,-1 343 | swinging legs/W52Cl1ed1LU_000000_000010.mp4,343,-1 344 | swinging on something/ibRyH1Q1bbo_000066_000076.mp4,344,-1 345 | sword fighting/_kcVbo4E2JQ_000101_000111.mp4,345,-1 346 | tai chi/BbFbo987QEo_000057_000067.mp4,346,-1 347 | taking a shower/zW5Gt8bfZbc_000011_000021.mp4,347,-1 348 | tango dancing/dFDdr9zxfzc_000101_000111.mp4,348,-1 349 | tap dancing/dZ1EkA3BuQ4_000000_000010.mp4,349,-1 350 | tapping guitar/7Nqupt1WIn4_000031_000041.mp4,350,-1 351 | tapping pen/xcsH3jFtdSg_000026_000036.mp4,351,-1 352 | tasting beer/UcAAItCUJrk_000518_000528.mp4,352,-1 353 | tasting food/SQCsXDtiARU_000332_000342.mp4,353,-1 354 | testifying/O4ystlpCCxM_000010_000020.mp4,354,-1 355 | texting/V_LQXRQVrok_000116_000126.mp4,355,-1 356 | throwing axe/1YmkhTmmyRc_000002_000012.mp4,356,-1 357 | throwing ball/A6JeTfQqm0I_000000_000010.mp4,357,-1 358 | throwing discus/gjNdAGf_16Y_000084_000094.mp4,358,-1 359 | tickling/W9Ydqjoda9c_000000_000010.mp4,359,-1 360 | tobogganing/SWmBChx-7fI_000003_000013.mp4,360,-1 361 | tossing coin/Iwg-Had3-wE_000002_000012.mp4,361,-1 362 | tossing salad/PcbZKLvO6gc_000181_000191.mp4,362,-1 363 | training dog/am3gcomIUa4_000048_000058.mp4,363,-1 364 | trapezing/p-E3XWgf3Wk_000012_000022.mp4,364,-1 365 | trimming or shaving beard/X9mmMztC1Vo_000366_000376.mp4,365,-1 366 | trimming trees/Q_F85_VgKwM_000045_000055.mp4,366,-1 367 | triple jump/UergZFP-AdM_000002_000012.mp4,367,-1 368 | tying bow tie/c-t7EA00jj8_000016_000026.mp4,368,-1 369 | tying knot (not on a tie)/Vdx6g26ZOE0_000002_000012.mp4,369,-1 370 | tying tie/DIrYfnfogiA_000148_000158.mp4,370,-1 371 | unboxing/5lmmjOhih3U_000046_000056.mp4,371,-1 372 | unloading truck/-aKzhHxNXDo_000066_000076.mp4,372,-1 373 | using computer/5R1KJn3Pqa8_000066_000076.mp4,373,-1 374 | using remote controller (not gaming)/Bj7_KWKEXp8_000046_000056.mp4,374,-1 375 | using segway/QQTUQu4emh8_000145_000155.mp4,375,-1 376 | vault/A4U2LxAwIm4_000031_000041.mp4,376,-1 377 | waiting in line/5V_Ed93k2bI_000059_000069.mp4,377,-1 378 | walking the dog/3NlgmP6MDmY_000021_000031.mp4,378,-1 379 | washing dishes/oEkXkrSbFU8_000052_000062.mp4,379,-1 380 | washing feet/n3vpap_pQ-U_000076_000086.mp4,380,-1 381 | washing hair/BhU4HGJ2q4s_000004_000014.mp4,381,-1 382 | washing hands/-jtKtX9gGdY_000005_000015.mp4,382,-1 383 | water skiing/F1KYDfTyuEI_000040_000050.mp4,383,-1 384 | water sliding/N6lBqLeKs8I_000001_000011.mp4,384,-1 385 | watering plants/jZfXAIU4rZ4_000073_000083.mp4,385,-1 386 | waxing back/P5qR6CoGbk8_000035_000045.mp4,386,-1 387 | waxing chest/oRKbez1LpWU_000080_000090.mp4,387,-1 388 | waxing eyebrows/hjzI8c63hVo_000011_000021.mp4,388,-1 389 | waxing legs/dzeivZlP6tU_000024_000034.mp4,389,-1 390 | weaving basket/oD0wHopSNLU_000000_000010.mp4,390,-1 391 | welding/5hSYP2XxBGY_000204_000214.mp4,391,-1 392 | whistling/KFOWZBfLHrA_000084_000094.mp4,392,-1 393 | windsurfing/nDlR90yHqPY_000112_000122.mp4,393,-1 394 | wrapping present/zYjHJNadEj4_000246_000256.mp4,394,-1 395 | wrestling/UP_iRJv5mPU_000150_000160.mp4,395,-1 396 | writing/OrWjyz2bFJQ_000064_000074.mp4,396,-1 397 | yawning/SaJWnqViSLo_000023_000033.mp4,397,-1 398 | yoga/5NysTi21_D0_000003_000013.mp4,398,-1 399 | zumba/BvO4NNTw7Ks_000094_000104.mp4,399,-1 400 | headbutting/PzD2BkZye2U_000013_000023.mp4,150,6 401 | slapping/WGxSNBg_tl0_000075_000085.mp4,314,9 402 | -------------------------------------------------------------------------------- /reference_kinetics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import pandas as pd 5 | import json 6 | 7 | import torch 8 | from gluoncv.torch.model_zoo import get_model 9 | from utils import CONFIG_PATHS, get_cfg_custom, AverageMeter, OPT_PATH 10 | import argparse 11 | import math 12 | 13 | def arg_parse(): 14 | parser = argparse.ArgumentParser(description='') 15 | parser.add_argument('--adv_path', type=str, default='', help='the path of adversarial examples.') 16 | parser.add_argument('--gpu', type=str, default='0', help='gpu device.') 17 | parser.add_argument('--batch_size', type=int, default=16, metavar='N', 18 | help='input batch size for reference (default: 16)') 19 | args = parser.parse_args() 20 | args.adv_path = os.path.join(OPT_PATH, args.adv_path) 21 | return args 22 | 23 | def accuracy(output, target): 24 | batch_size = target.size(0) 25 | 26 | _, pred = output.topk(1, 1, True, True) 27 | pred = pred.t() # batch_size, 1 28 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 29 | 30 | correct_k = correct[:1].view(-1).float().sum(0) 31 | return correct_k.mul_(100.0 / batch_size), torch.squeeze(pred) 32 | 33 | def generate_batch(batch_files): 34 | batches = [] 35 | labels = [] 36 | for file in batch_files: 37 | batches.append(torch.from_numpy(np.load(os.path.join(args.adv_path, file))).cuda()) 38 | labels.append(int(file.split('-')[0])) 39 | labels = np.array(labels).astype(np.int32) 40 | labels = torch.from_numpy(labels) 41 | return torch.stack(batches), labels 42 | 43 | def reference(model, files_batch): 44 | data_time = AverageMeter() 45 | top1 = AverageMeter() 46 | batch_time = AverageMeter() 47 | 48 | predictions = [] 49 | labels = [] 50 | 51 | end = time.time() 52 | with torch.no_grad(): 53 | for step, batch in enumerate(files_batch): 54 | data_time.update(time.time() - end) 55 | val_batch, val_label = generate_batch(batch) 56 | 57 | val_batch = val_batch.cuda() 58 | val_label = val_label.cuda() 59 | 60 | batch_size = val_label.size(0) 61 | outputs = model(val_batch) 62 | 63 | prec1a, preds = accuracy(outputs.data, val_label) 64 | 65 | predictions += list(preds.cpu().numpy()) 66 | labels += list(val_label.cpu().numpy()) 67 | 68 | top1.update(prec1a.item(), val_batch.size(0)) 69 | batch_time.update(time.time() - end) 70 | end = time.time() 71 | 72 | if step % 5 == 0: 73 | print('----validation----') 74 | print_string = 'Process: [{0}/{1}]'.format(step + 1, len(files_batch)) 75 | print(print_string) 76 | print_string = 'data_time: {data_time:.3f}, batch time: {batch_time:.3f}'.format( 77 | data_time=data_time.val, 78 | batch_time=batch_time.val) 79 | print(print_string) 80 | print_string = 'top-1 accuracy: {top1_acc:.2f}%'.format(top1_acc = top1.avg) 81 | print (print_string) 82 | return predictions, labels, top1.avg 83 | 84 | if __name__ == '__main__': 85 | global args 86 | args = arg_parse() 87 | 88 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 89 | 90 | # loading adversarial examples. 91 | files = os.listdir(args.adv_path) 92 | files = [i for i in files if 'adv' in i] 93 | 94 | batch_times = math.ceil(len(files) / args.batch_size) 95 | files_batch = [] 96 | for i in range(batch_times): 97 | batch = files[i*args.batch_size: min((i+1)*args.batch_size, len(files))] 98 | files_batch.append(batch) 99 | 100 | model_val_acc = {} 101 | info_df = pd.DataFrame() 102 | info_df['gt_label'] = [i for i in range(400)] 103 | for model_name in CONFIG_PATHS.keys(): 104 | print ('Model-{}:'.format(model_name)) 105 | cfg_path = CONFIG_PATHS[model_name] 106 | cfg = get_cfg_custom(cfg_path) 107 | model = get_model(cfg).cuda() 108 | model.eval() 109 | preds, labels, top1_avg = reference(model, files_batch) 110 | 111 | predd = np.zeros_like(preds) 112 | inds = np.argsort(labels) 113 | for i,ind in enumerate(inds): 114 | predd[ind] = preds[i] 115 | 116 | info_df['{}-pre'.format(model_name)] = predd 117 | model_val_acc[model_name] = top1_avg 118 | del model 119 | torch.cuda.empty_cache() 120 | 121 | info_df.to_csv(os.path.join(args.adv_path, 'results_all_models_prediction.csv'), index=False) 122 | with open(os.path.join(args.adv_path, 'top1_acc_all_models.json'), 'w') as opt: 123 | json.dump(model_val_acc, opt) 124 | # delete the generated npy files. 125 | # command = os.path.join(args.adv_path, '*.npy') 126 | # os.system('rm {}'.format(command)) 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /reference_ucf101.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import pandas as pd 5 | import json 6 | import torch 7 | 8 | from utils import CONFIG_PATHS, get_cfg_custom, AverageMeter, OPT_PATH, MODEL_TO_CKPTS 9 | import argparse 10 | import math 11 | from gluoncv.torch.model_zoo import get_model 12 | 13 | def arg_parse(): 14 | parser = argparse.ArgumentParser(description='') 15 | parser.add_argument('--adv_path', type=str, default='', help='the path of adversarial examples.') 16 | parser.add_argument('--gpu', type=str, default='0', help='gpu device.') 17 | parser.add_argument('--batch_size', type=int, default=16, metavar='N', 18 | help='input batch size for reference (default: 16)') 19 | args = parser.parse_args() 20 | args.adv_path = os.path.join(OPT_PATH, args.adv_path) 21 | return args 22 | 23 | def accuracy(output, target): 24 | batch_size = target.size(0) 25 | 26 | _, pred = output.topk(1, 1, True, True) 27 | pred = pred.t() # batch_size, 1 28 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 29 | 30 | correct_k = correct[:1].view(-1).float().sum(0) 31 | return correct_k.mul_(100.0 / batch_size), torch.squeeze(pred) 32 | 33 | def generate_batch(batch_files): 34 | batches = [] 35 | labels = [] 36 | for file in batch_files: 37 | batches.append(torch.from_numpy(np.load(os.path.join(args.adv_path, file))).cuda()) 38 | labels.append(int(file.split('-')[0])) 39 | labels = np.array(labels).astype(np.int32) 40 | labels = torch.from_numpy(labels) 41 | return torch.stack(batches), labels 42 | 43 | def reference(model, files_batch): 44 | data_time = AverageMeter() 45 | top1 = AverageMeter() 46 | batch_time = AverageMeter() 47 | 48 | predictions = [] 49 | labels = [] 50 | 51 | end = time.time() 52 | with torch.no_grad(): 53 | for step, batch in enumerate(files_batch): 54 | data_time.update(time.time() - end) 55 | val_batch, val_label = generate_batch(batch) 56 | 57 | val_batch = val_batch.cuda() 58 | val_label = val_label.cuda() 59 | 60 | batch_size = val_label.size(0) 61 | outputs = model(val_batch) 62 | 63 | prec1a, preds = accuracy(outputs.data, val_label) 64 | 65 | predictions += list(preds.cpu().numpy()) 66 | labels += list(val_label.cpu().numpy()) 67 | 68 | top1.update(prec1a.item(), val_batch.size(0)) 69 | batch_time.update(time.time() - end) 70 | end = time.time() 71 | 72 | if step % 5 == 0: 73 | print('----validation----') 74 | print_string = 'Process: [{0}/{1}]'.format(step + 1, len(files_batch)) 75 | print(print_string) 76 | print_string = 'data_time: {data_time:.3f}, batch time: {batch_time:.3f}'.format( 77 | data_time=data_time.val, 78 | batch_time=batch_time.val) 79 | print(print_string) 80 | print_string = 'top-1 accuracy: {top1_acc:.2f}%'.format(top1_acc = top1.avg) 81 | print (print_string) 82 | return predictions, labels, top1.avg 83 | 84 | if __name__ == '__main__': 85 | global args 86 | args = arg_parse() 87 | 88 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu 89 | 90 | # loading adversarial examples. 91 | files = os.listdir(args.adv_path) 92 | files = [i for i in files if 'adv' in i] 93 | 94 | batch_times = math.ceil(len(files) / args.batch_size) 95 | files_batch = [] 96 | for i in range(batch_times): 97 | batch = files[i*args.batch_size: min((i+1)*args.batch_size, len(files))] 98 | files_batch.append(batch) 99 | 100 | model_val_acc = {} 101 | info_df = pd.DataFrame() 102 | info_df['gt_label'] = [i for i in range(101)] 103 | for model_name in ['i3d_resnet101', 'i3d_resnet50', 'slowfast_resnet101', 'slowfast_resnet50', 'tpn_resnet101', 'tpn_resnet50']: 104 | print ('Model-{}:'.format(model_name)) 105 | cfg_path = CONFIG_PATHS[args.model] 106 | cfg = get_cfg_custom(cfg_path, args.batch_size) 107 | cfg.CONFIG.MODEL.PRETRAINED = False 108 | ckpt_path = MODEL_TO_CKPTS[args.model] 109 | model = get_model(cfg) 110 | model.load_state_dict(torch.load(ckpt_path)['state_dict']) 111 | model.cuda() 112 | model.eval() 113 | 114 | preds, labels, top1_avg = reference(model, files_batch) 115 | 116 | predd = np.zeros_like(preds) 117 | inds = np.argsort(labels) 118 | for i,ind in enumerate(inds): 119 | predd[ind] = preds[i] 120 | 121 | info_df['{}-pre'.format(model_name)] = predd 122 | model_val_acc[model_name] = top1_avg 123 | del model 124 | torch.cuda.empty_cache() 125 | 126 | info_df.to_csv(os.path.join(args.adv_path, 'results_all_models_prediction.csv'), index=False) 127 | with open(os.path.join(args.adv_path, 'top1_acc_all_models.json'), 'w') as opt: 128 | json.dump(model_val_acc, opt) 129 | # delete the generated npy files. 130 | # command = os.path.join(args.adv_path, '*.npy') 131 | # os.system('rm {}'.format(command)) 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /used_idxs.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhipeng-wei/TT/cd2aec8dc6478b5e9444822bcb19ee7971847763/used_idxs.pkl -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gluoncv.torch.engine.config import get_cfg_defaults 3 | import torch 4 | 5 | # config info 6 | # refer to https://cv.gluon.ai/model_zoo/action_recognition.html 7 | CONFIG_ROOT = './config' # config paths 8 | CONFIG_PATHS = { 9 | 'i3d_resnet50': os.path.join(CONFIG_ROOT, 'i3d_nl5_resnet50_v1_kinetics400.yaml'), 10 | 'i3d_resnet101': os.path.join(CONFIG_ROOT, 'i3d_nl5_resnet101_v1_kinetics400.yaml'), 11 | 'slowfast_resnet50': os.path.join(CONFIG_ROOT, 'slowfast_8x8_resnet50_kinetics400.yaml'), 12 | 'slowfast_resnet101': os.path.join(CONFIG_ROOT, 'slowfast_8x8_resnet101_kinetics400.yaml'), 13 | 'tpn_resnet50': os.path.join(CONFIG_ROOT, 'tpn_resnet50_f32s2_kinetics400.yaml'), 14 | 'tpn_resnet101': os.path.join(CONFIG_ROOT, 'tpn_resnet101_f32s2_kinetics400.yaml') 15 | } 16 | 17 | # save info 18 | OPT_PATH = '' # output path 19 | 20 | # ucf model infos 21 | UCF_MODEL_ROOT = '' # ckpt file path of UCF101 22 | MODEL_TO_CKPTS = { 23 | 'i3d_resnet50': os.path.join(UCF_MODEL_ROOT, 'i3d_resnet50.pth'), 24 | 'i3d_resnet101': os.path.join(UCF_MODEL_ROOT, 'i3d_resnet101.pth'), 25 | 'slowfast_resnet50': os.path.join(UCF_MODEL_ROOT, 'slowfast_resnet50.pth'), 26 | 'slowfast_resnet101': os.path.join(UCF_MODEL_ROOT, 'slowfast_resnet101.pth'), 27 | 'tpn_resnet50': os.path.join(UCF_MODEL_ROOT, 'tpn_resnet50.pth'), 28 | 'tpn_resnet101': os.path.join(UCF_MODEL_ROOT, 'tpn_resnet101.pth') 29 | } 30 | # ucf dataset 31 | UCF_DATA_ROOT = '' # ucf101 dataset path 32 | Kinetic_DATA_ROOT = '' # kinetics dataset path 33 | 34 | def change_cfg(cfg, batch_size): 35 | # modify video paths and pretrain setting. 36 | cfg.CONFIG.DATA.VAL_DATA_PATH = Kinetic_DATA_ROOT 37 | cfg.CONFIG.DATA.VAL_ANNO_PATH = './kinetics400_attack_samples.csv' 38 | cfg.CONFIG.MODEL.PRETRAINED = True 39 | cfg.CONFIG.VAL.BATCH_SIZE = batch_size 40 | return cfg 41 | 42 | def get_cfg_custom(cfg_path, batch_size=16): 43 | cfg = get_cfg_defaults() 44 | cfg.merge_from_file(cfg_path) 45 | cfg = change_cfg(cfg, batch_size) 46 | return cfg 47 | 48 | class AverageMeter(object): 49 | """Computes and stores the average and current value""" 50 | 51 | def __init__(self): 52 | self.reset() 53 | 54 | def reset(self): 55 | self.val = 0 56 | self.avg = 0 57 | self.sum = 0 58 | self.count = 0 59 | 60 | def update(self, val, n=1): 61 | self.val = val 62 | self.sum += val * n 63 | self.count += n 64 | self.avg = self.sum / self.count --------------------------------------------------------------------------------