├── .gitattributes ├── .gitignore ├── README.md ├── data └── download.py ├── deepeye ├── README.md ├── __init__.py ├── archs │ ├── __init__.py │ ├── densenet.py │ ├── lenet.py │ ├── resnet.py │ └── toynet.py ├── callbacks.py ├── datasets │ ├── __init__.py │ ├── cdnet.py │ ├── file.py │ ├── forest.py │ └── preprocessing.py ├── losses.py ├── metrics.py ├── model.py ├── transforms.py └── utils │ ├── __init__.py │ ├── arg_utils.py │ ├── config_utils.py │ ├── convert_torch.py │ ├── generic_utils.py │ └── img_utils.py ├── main.py ├── requirements.txt ├── src-torch ├── CONTRIBUTING.md ├── INSTALL.md ├── LICENSE ├── PATENTS ├── README.md ├── TRAINING.md ├── best-thresholds.txt ├── category-perf.lua ├── checkpoints.lua ├── dataloader.lua ├── datasets │ ├── .cdnet-gen.lua.swp │ ├── README.md │ ├── cdnet-gen.lua │ ├── cdnet.lua │ ├── cifar10-gen.lua │ ├── cifar10.lua │ ├── cifar100-gen.lua │ ├── cifar100.lua │ ├── imagenet-gen.lua │ ├── imagenet.lua │ ├── init.lua │ ├── sanity-gen.lua │ ├── sanity.lua │ ├── test-hdf5-gen.lua │ ├── test-hdf5.lua │ ├── transforms-test.lua │ └── transforms.lua ├── drawnet.lua ├── main.lua ├── models │ ├── init.lua │ ├── lenet5.lua │ ├── preresnet.lua │ ├── resnet-deep-subtraction │ ├── resnet-skip.lua │ ├── resnet.lua │ ├── unet-original.lua │ ├── unet.lua │ ├── unet.lua.1 │ └── unet1.lua ├── opts.lua ├── param-optim.py ├── plotting.lua ├── pretrained │ ├── CONVERGENCE.md │ ├── README.md │ ├── classify.lua │ ├── extract-features.lua │ └── imagenet.lua ├── run-tests.sh ├── sanity_test.lua ├── save_img.lua ├── test-cdnet-gen-correct ├── train.lua ├── tune-threshold.lua ├── tuning.sh └── visualization.py └── train-val-split.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | src-torch/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/vim,linux,macos,video,images,python,windows,sublimetext,visualstudiocode 3 | 4 | ### Images ### 5 | # JPEG 6 | *.jpg 7 | *.jpeg 8 | *.jpe 9 | *.jif 10 | *.jfif 11 | *.jfi 12 | 13 | # JPEG 2000 14 | *.jp2 15 | *.j2k 16 | *.jpf 17 | *.jpx 18 | *.jpm 19 | *.mj2 20 | 21 | # JPEG XR 22 | *.jxr 23 | *.hdp 24 | *.wdp 25 | 26 | # Graphics Interchange Format 27 | *.gif 28 | 29 | # RAW 30 | *.raw 31 | 32 | # Web P 33 | *.webp 34 | 35 | # Portable Network Graphics 36 | *.png 37 | 38 | # Animated Portable Network Graphics 39 | *.apng 40 | 41 | # Multiple-image Network Graphics 42 | *.mng 43 | 44 | # Tagged Image File Format 45 | *.tiff 46 | *.tif 47 | 48 | # Scalable Vector Graphics 49 | *.svg 50 | *.svgz 51 | 52 | # Portable Document Format 53 | *.pdf 54 | 55 | # X BitMap 56 | *.xbm 57 | 58 | # BMP 59 | *.bmp 60 | *.dib 61 | 62 | # ICO 63 | *.ico 64 | 65 | # 3D Images 66 | *.3dm 67 | *.max 68 | 69 | ### Linux ### 70 | *~ 71 | 72 | # temporary files which can be created if a process still has a handle open of a deleted file 73 | .fuse_hidden* 74 | 75 | # KDE directory preferences 76 | .directory 77 | 78 | # Linux trash folder which might appear on any partition or disk 79 | .Trash-* 80 | 81 | # .nfs files are created when an open file is removed but is still being accessed 82 | .nfs* 83 | 84 | ### macOS ### 85 | *.DS_Store 86 | .AppleDouble 87 | .LSOverride 88 | 89 | # Icon must end with two \r 90 | Icon 91 | 92 | # Thumbnails 93 | ._* 94 | 95 | # Files that might appear in the root of a volume 96 | .DocumentRevisions-V100 97 | .fseventsd 98 | .Spotlight-V100 99 | .TemporaryItems 100 | .Trashes 101 | .VolumeIcon.icns 102 | .com.apple.timemachine.donotpresent 103 | 104 | # Directories potentially created on remote AFP share 105 | .AppleDB 106 | .AppleDesktop 107 | Network Trash Folder 108 | Temporary Items 109 | .apdisk 110 | 111 | ### Python ### 112 | # Byte-compiled / optimized / DLL files 113 | **/__pycache__/ 114 | *.py[cod] 115 | *$py.class 116 | 117 | # C extensions 118 | *.so 119 | 120 | # Distribution / packaging 121 | .Python 122 | build/ 123 | develop-eggs/ 124 | dist/ 125 | downloads/ 126 | eggs/ 127 | .eggs/ 128 | lib/ 129 | lib64/ 130 | parts/ 131 | sdist/ 132 | var/ 133 | wheels/ 134 | *.egg-info/ 135 | .installed.cfg 136 | *.egg 137 | 138 | # PyInstaller 139 | # Usually these files are written by a python script from a template 140 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 141 | *.manifest 142 | *.spec 143 | 144 | # Installer logs 145 | pip-log.txt 146 | pip-delete-this-directory.txt 147 | 148 | # Unit test / coverage reports 149 | htmlcov/ 150 | .tox/ 151 | .coverage 152 | .coverage.* 153 | .cache 154 | nosetests.xml 155 | coverage.xml 156 | *.cover 157 | .hypothesis/ 158 | 159 | # Translations 160 | *.mo 161 | *.pot 162 | 163 | # Django stuff: 164 | *.log 165 | local_settings.py 166 | 167 | # Flask stuff: 168 | instance/ 169 | .webassets-cache 170 | 171 | # Scrapy stuff: 172 | .scrapy 173 | 174 | # Sphinx documentation 175 | docs/_build/ 176 | 177 | # PyBuilder 178 | target/ 179 | 180 | # Jupyter Notebook 181 | .ipynb_checkpoints 182 | 183 | # pyenv 184 | .python-version 185 | 186 | # celery beat schedule file 187 | celerybeat-schedule.* 188 | 189 | # SageMath parsed files 190 | *.sage.py 191 | 192 | # Environments 193 | .env 194 | .venv 195 | env/ 196 | venv/ 197 | ENV/ 198 | env.bak/ 199 | venv.bak/ 200 | 201 | # Spyder project settings 202 | .spyderproject 203 | .spyproject 204 | 205 | # Rope project settings 206 | .ropeproject 207 | 208 | # mkdocs documentation 209 | /site 210 | 211 | # mypy 212 | .mypy_cache/ 213 | 214 | ### SublimeText ### 215 | # cache files for sublime text 216 | *.tmlanguage.cache 217 | *.tmPreferences.cache 218 | *.stTheme.cache 219 | 220 | # workspace files are user-specific 221 | *.sublime-workspace 222 | 223 | # project files should be checked into the repository, unless a significant 224 | # proportion of contributors will probably not be using SublimeText 225 | # *.sublime-project 226 | 227 | # sftp configuration file 228 | sftp-config.json 229 | 230 | # Package control specific files 231 | Package Control.last-run 232 | Package Control.ca-list 233 | Package Control.ca-bundle 234 | Package Control.system-ca-bundle 235 | Package Control.cache/ 236 | Package Control.ca-certs/ 237 | Package Control.merged-ca-bundle 238 | Package Control.user-ca-bundle 239 | oscrypto-ca-bundle.crt 240 | bh_unicode_properties.cache 241 | 242 | # Sublime-github package stores a github token in this file 243 | # https://packagecontrol.io/packages/sublime-github 244 | GitHub.sublime-settings 245 | 246 | ### Video ### 247 | *.3g2 248 | *.3gp 249 | *.asf 250 | *.asx 251 | *.avi 252 | *.flv 253 | *.mov 254 | *.mp4 255 | *.mpg 256 | *.rm 257 | *.swf 258 | *.vob 259 | *.wmv 260 | 261 | ### Vim ### 262 | # swap 263 | [._]*.s[a-v][a-z] 264 | [._]*.sw[a-p] 265 | [._]s[a-v][a-z] 266 | [._]sw[a-p] 267 | # session 268 | Session.vim 269 | # temporary 270 | .netrwhist 271 | # auto-generated tag files 272 | tags 273 | 274 | ### VisualStudioCode ### 275 | .vscode/* 276 | !.vscode/settings.json 277 | !.vscode/tasks.json 278 | !.vscode/launch.json 279 | !.vscode/extensions.json 280 | .history 281 | 282 | ### Windows ### 283 | # Windows thumbnail cache files 284 | Thumbs.db 285 | ehthumbs.db 286 | ehthumbs_vista.db 287 | 288 | # Folder config file 289 | Desktop.ini 290 | 291 | # Recycle Bin used on file shares 292 | $RECYCLE.BIN/ 293 | 294 | # Windows Installer files 295 | *.cab 296 | *.msi 297 | *.msm 298 | *.msp 299 | 300 | # Windows shortcuts 301 | *.lnk 302 | 303 | # End of https://www.gitignore.io/api/vim,linux,macos,video,images,python,windows,sublimetext,visualstudiocode 304 | 305 | # Custom ignore 306 | data/**/* 307 | .vscode 308 | models/* 309 | notebooks/**/* 310 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Foreground Segmentation for Anomaly Detection in Surveillance Videos 2 | 3 | Pytorch code for **SBRT 2017** paper **Foreground Segmentation for Anomaly Detection in Surveillance Videos Using Deep Residual Networks** available [here](https://www.researchgate.net/publication/319681001_Foreground_Segmentation_for_Anomaly_Detection_in_Surveillance_Videos_Using_Deep_Residual_Networks) 4 | 5 | The aim of this work (under [deepeye](https://github.com/lpcinelli/foreground-segmentation/tree/master/deepeye) folder) is to detect and segment anomalies in a target video given a temporally aligned reference video (anomaly-free). The output segmentation map has the same resolution as the input video frame. 6 | 7 | ## Dataset 8 | 9 | ### CDNET 10 | 11 | For our experiments, we used [CDNET](http://changedetection.net/) database. A database for identification of changing or moving areas in the field of view of a camera, covering a wide range of detection challenges and are representative of typical indoor and outdoor visual data captured today in surveillance: 12 | 13 | * Dynamic background 14 | * Camera jitter 15 | * Intermittent object motion 16 | * Shadows 17 | * Thermal signatures 18 | * Challenging weather 19 | * Low frame-rate 20 | * Acquisition at night 21 | * PTZ capture 22 | * Air turbulence 23 | 24 | In this preliminary work, instead of a entire reference video, we use a single still reference frame by taking the median of each pixel throughout the first 150 frames of the considered target video. Although not ideal, this does not have much influence since videos in CDNET are recorded with a stationary camera (except for the PTZ class, for which the algorithm's performance naturally is worse). It is worth emphasizing that our algorithm allows the more general setting of using a whole video (with egomotion) for reference, and not a single still image, which is compared frame per frame with the target video. 25 | 26 | ### VDAO 27 | 28 | The idea is now use it on the [VDAO](http://www02.smt.ufrj.br/~tvdigital/database), a video database containing annotated videos in a cluttered industrial environment, in which the videos were captured using a camera on a moving platform. 29 | You can have a bunch of useful tools to play with VDAO database in the [VDAO_Access Project](https://github.com/rafaelpadilla/DeepLearning-VDAO/tree/master/VDAO_Access). 30 | 31 | ## Downloading the dataset 32 | Once you have installed Python you can just prompt: 33 | ```bash 34 | $ cd data; python download.py 35 | ``` 36 | 37 | ## Training a model using the `main.py` script 38 | 39 | This script allows training all models using a command-line interface. The call should be something like: 40 | ```bash 41 | $ main.py --manifest TRAIN --img_path DIR --arch ARCH train \ 42 | --epochs N --lr LR 43 | ``` 44 | 45 | Example of call which instantiates and trains a 20-layer ResNet with reconstruction by bilinear upsampling: 46 | ````bash 47 | python main.py --img-dir ~/Documents/database/cdnet2014/dataset --shape 2,192,256 --arch resnet20 --arch-params 'up_mode=upsample' --manifest data/manifest.train --loss bce -b 16 train --epochs 90 --aug --lr 0.01 --wd 0.0002 --val data/manifest.val --save models/resnet20-bilinear.pth.tar 48 | ```` 49 | 50 | For more details, you may prompt 51 | ```bash 52 | $ main.py --help 53 | ``` 54 | or just check out [main.py](../main.py). 55 | 56 | This script will automatically save the model at every epoch. 57 | 58 | 59 | ## Evaluating a model using the `main.py` script 60 | 61 | Evaluating a trained model can be done by simply 62 | 63 | ```bash 64 | $ main.py --manifest EVAL --img_path DIR --arch ARCH \ 65 | --load PATH eval 66 | ``` 67 | 68 | ## Custom scripts 69 | 70 | ### Custom model 71 | 72 | All models are defined by a class defined in the [models](codes/models/) package. A custom model can be defined as 73 | 74 | ```python 75 | 76 | # Filename: codes/models/customnet.py 77 | 78 | # this line is necessary 79 | __all__ = ['CustomNet', 'customnet'] 80 | 81 | 82 | class CustomNet(nn.Module): 83 | 84 | def __init__(self): 85 | super(CustomNet, self).__init__() 86 | ... 87 | 88 | def forward(self, x): 89 | ... 90 | return out 91 | 92 | # This method is required by the main script 93 | def customnet(**kwargs): 94 | ... 95 | return CustomNet(**kwargs) 96 | ``` 97 | 98 | To make the `CustomNet` visible in `main.py`, we have to append the following code to [init](models/__init__.py) script 99 | 100 | ```python 101 | # Filename: codes/models/__init__.py 102 | 103 | from .customnet import * 104 | ``` 105 | 106 | ### Custom Callback 107 | 108 | All callbacks must inherit [Callback](codes/callbacks.py) and can optionally implement one of 8 calls. The default cycle is: 109 | 1. on_begin 110 | 2. on_epoch_begin 111 | 3. on_step_begin 112 | 4. on_batch_begin 113 | 5. on_batch_end 114 | 6. on_step_end 115 | 7. on_epoch_end 116 | 8. on_end 117 | 118 | A simple custom callback that prints at the beginning and at the end of each epoch is given: 119 | ```python 120 | 121 | class CustomCallback(object): 122 | 123 | def on_epoch_begin(self, epoch): 124 | print('epoch begin') 125 | 126 | def on_epoch_end(self, metrics): 127 | print('epoch end') 128 | 129 | ``` 130 | 131 | ## Requirements 132 | 133 | ### Softwares 134 | 135 | * Python 136 | * [7za](http://www.7-zip.org/download.html) 137 | * zip 138 | 139 | ### Python packages 140 | 141 | * pytorch 142 | * torchvision 143 | * numpy 144 | * pandas 145 | * matplotlib 146 | * pillow 147 | * glob2 148 | * inflection 149 | * tqdm 150 | * visdom 151 | 152 | ## Citation 153 | 154 | If you use this code in your research, please use the following BibTeX entry. 155 | 156 | ```` 157 | @inproceedings{cinelli2017, 158 | title = {Foreground Segmentation for Anomaly Detection in Surveillance Videos Using Deep Residual Networks}, 159 | author = {Cinelli, Lucas P and Thomaz, Lucas A and da Silva, Allan F and da Silva, Eduardo AB and Netto, Sergio L}, 160 | booktitle = {Simpósio Brasileiro de Telecomunicações e Processamento de Sinais (SBRT)}, 161 | month = September, 162 | year = {2017} 163 | } 164 | ```` 165 | 166 | ## Acknowledge 167 | 168 | THe download script, main[]().py structure, parts of readme, callbacks, and many others were done by [Igor Macedo Quintanilha](https://igormq.github.io/about), a good friend and colleague. 169 | 170 | ## License 171 | 172 | See [LICENSE.md](LICENSE.md) 173 | 174 | -------------------------------------------------------------------------------- /deepeye/README.md: -------------------------------------------------------------------------------- 1 | # Foreground Segmentation for Anomaly Detection in Surveillance Videos 2 | 3 | ## Downloading the dataset 4 | Once you have installed Python you can just prompt: 5 | ```bash 6 | $ cd data; python download.py 7 | ``` 8 | 9 | ## Training a model using the `main.py` script 10 | 11 | This script allows training all models using a command-line interface. The call should be something like: 12 | ```bash 13 | $ main.py --manifest TRAIN --img_path DIR --arch ARCH train \ 14 | --epochs N --lr LR 15 | ``` 16 | 17 | Example of call which instantiates and trains a 20-layer ResNet with reconstruction by bilinear upsampling: 18 | ````bash 19 | python main.py --img-dir ~/Documents/database/cdnet2014/dataset --shape 2,192,256 --arch resnet20 --arch-params 'up_mode=upsample' --manifest data/manifest.train --loss bce -b 16 train --epochs 90 --aug --lr 0.01 --wd 0.0002 --val data/manifest.val --save models/resnet20-bilinear.pth.tar 20 | ```` 21 | 22 | For more details, you may prompt 23 | ```bash 24 | $ main.py --help 25 | ``` 26 | or just check out [main.py](../main.py). 27 | 28 | This script will automatically save the model at every epoch. 29 | 30 | 31 | ## Evaluating a model using the `main.py` script 32 | 33 | Evaluating a trained model can be done by simply 34 | 35 | ```bash 36 | $ main.py --manifest EVAL --img_path DIR --arch ARCH \ 37 | --load PATH eval 38 | ``` 39 | 40 | 41 | ## Custom scripts 42 | 43 | ### Custom model 44 | 45 | All models are defined by a class defined in the [models](codes/models/) package. A custom model can be defined as 46 | 47 | ```python 48 | 49 | # Filename: codes/models/customnet.py 50 | 51 | # this line is necessary 52 | __all__ = ['CustomNet', 'customnet'] 53 | 54 | 55 | class CustomNet(nn.Module): 56 | 57 | def __init__(self): 58 | super(CustomNet, self).__init__() 59 | ... 60 | 61 | def forward(self, x): 62 | ... 63 | return out 64 | 65 | # This method is required by the main script 66 | def customnet(**kwargs): 67 | ... 68 | return CustomNet(**kwargs) 69 | ``` 70 | 71 | To make the `CustomNet` visible in `main.py`, we have to append the following code to [init](models/__init__.py) script 72 | 73 | ```python 74 | # Filename: codes/models/__init__.py 75 | 76 | from .customnet import * 77 | ``` 78 | 79 | ### Custom Callback 80 | 81 | All callbacks must inherit [Callback](codes/callbacks.py) and can optionally implement one of 8 calls. The default cycle is: 82 | 1. on_begin 83 | 2. on_epoch_begin 84 | 3. on_step_begin 85 | 4. on_batch_begin 86 | 5. on_batch_end 87 | 6. on_step_end 88 | 7. on_epoch_end 89 | 8. on_end 90 | 91 | A simple custom callback that prints at the beginning and at the end of each epoch is given: 92 | ```python 93 | 94 | class CustomCallback(object): 95 | 96 | def on_epoch_begin(self, epoch): 97 | print('epoch begin') 98 | 99 | def on_epoch_end(self, metrics): 100 | print('epoch end') 101 | 102 | ``` 103 | 104 | ## Requirements 105 | 106 | ### Softwares 107 | * Python 108 | * [7za](http://www.7-zip.org/download.html) 109 | * zip 110 | 111 | ### Python packages 112 | * pytorch 113 | * torchvision 114 | * numpy 115 | * pandas 116 | * matplotlib 117 | * pillow 118 | * glob2 119 | * inflection 120 | * tqdm 121 | * visdom 122 | 123 | ## Cite 124 | 125 | 126 | 127 | ## License 128 | See [LICENSE.md](LICENSE.md) 129 | -------------------------------------------------------------------------------- /deepeye/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lpcinelli/foreground-segmentation/25b34253547bde70c93e4b29668952d89b354273/deepeye/__init__.py -------------------------------------------------------------------------------- /deepeye/archs/__init__.py: -------------------------------------------------------------------------------- 1 | from .toynet import * 2 | from .resnet import * 3 | from .lenet import * -------------------------------------------------------------------------------- /deepeye/archs/toynet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | 4 | import torch.nn.functional as F 5 | from torch import nn 6 | from torch.nn import init 7 | 8 | __all__ = ['ToyNet', 'toynet'] 9 | 10 | 11 | class ToyNet(nn.Module): 12 | """ Toy network 13 | 14 | Hint: better works with images with W:32, H:32 15 | """ 16 | 17 | def __init__(self, input_shape, num_classes=17): 18 | super(ToyNet, self).__init__() 19 | 20 | C, W, H = input_shape 21 | self.features = nn.Sequential( 22 | nn.Conv2d(C, 32, kernel_size=3, padding=1), 23 | nn.BatchNorm2d(32), 24 | nn.ReLU(inplace=True), 25 | nn.MaxPool2d(2), 26 | nn.Conv2d(32, 64, kernel_size=3, padding=1), 27 | nn.BatchNorm2d(64), 28 | nn.ReLU(inplace=True), 29 | nn.MaxPool2d(2)) 30 | 31 | self.classifier = nn.Sequential( 32 | nn.Linear(int((W * H * 64) / 16), 128), 33 | nn.ReLU(inplace=True), 34 | nn.Dropout(), 35 | nn.Linear(128, num_classes)) 36 | 37 | self._weights_init() 38 | 39 | def forward(self, x): 40 | # Extracting the features 41 | x = self.features(x) 42 | 43 | # Flatten Layer 44 | x = x.view(x.size(0), -1) 45 | 46 | # Projecting to num_classes 47 | x = self.classifier(x) 48 | 49 | return x 50 | 51 | def _weights_init(self): 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | init.kaiming_normal(m.weight) 55 | 56 | 57 | def toynet(pretrained=False, **kwargs): 58 | if pretrained: 59 | warnings.warn('No pretrained model available. ' + 60 | 'Falling back to pretrained=True') 61 | input_shape = kwargs.pop('input_shape', None) 62 | if not input_shape: 63 | raise ValueError('input_shape is required') 64 | 65 | return ToyNet(input_shape, **kwargs) 66 | -------------------------------------------------------------------------------- /deepeye/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .cdnet import CDNetDataset 2 | from .cdnet import DEFAULT_SHAPE -------------------------------------------------------------------------------- /deepeye/datasets/cdnet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import warnings 4 | 5 | import glob2 as glob 6 | import numpy as np 7 | import pandas as pd 8 | import torch 9 | import torch.utils.data as data # Torch dataset class 10 | 11 | import torchvision.transforms as vision_transforms 12 | import torchvision.transforms.functional as F 13 | from torchvision.datasets.folder import default_loader 14 | 15 | from ..transforms import * 16 | from ..utils.img_utils import IMG_EXTENSIONS 17 | from .file import DataFile 18 | 19 | DEFAULT_SHAPE = (2, 192, 256) 20 | 21 | 22 | class CDNetDataset(data.Dataset): 23 | def __init__(self, 24 | manifest_path, 25 | database_path, 26 | transform=None, 27 | training=False, 28 | shrink_data=False, 29 | input_shape=DEFAULT_SHAPE, 30 | augmentation=False): 31 | ''' 32 | Inits an ImageFile instance. 33 | 34 | Args: 35 | csv_file: csv file path containing the targets. 36 | transform: Images transforms. 37 | target_transform: Labels transform. 38 | loader: Images loader. 39 | ''' 40 | 41 | # Loads data 42 | self.database_path = database_path 43 | training = training 44 | shrink_data = shrink_data 45 | data, self.names = self.from_file(manifest_path, training, shrink_data) 46 | 47 | if len(data) == 0: 48 | raise (RuntimeError( 49 | "Found 0 images in path: " + manifest_path + "\n" + 50 | "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) 51 | 52 | # Main data 53 | self.data = data 54 | 55 | # Saving data 56 | self.manifest_path = manifest_path 57 | 58 | self.input_shape = input_shape 59 | if not transform: 60 | transform = transforms(self.input_shape, training, augmentation) 61 | self.transform = transform 62 | 63 | def __getitem__(self, index): 64 | ''' 65 | Returns image and target values for a given index. 66 | 67 | @param index Input index. 68 | 69 | @return The image and its respective target. 70 | ''' 71 | 72 | if isinstance(index, str): 73 | idxs = [ 74 | idx for idx, name in enumerate(self.names) if index in name 75 | ] 76 | if len(idxs) == 0: 77 | raise KeyError('Value not found') 78 | if len(idxs) > 1: 79 | raise KeyError('Non-unique key') 80 | 81 | data = self.data[idxs[0]] 82 | else: 83 | data = self.data[index] 84 | # Get single data 85 | (input_, bg_model), (target, roi) = data 86 | 87 | input_ = self.loader(input_) 88 | bg_model = self.loader(bg_model) 89 | target = self.loader(target, to_gray=True) 90 | roi = self.loader(roi, to_gray=True) 91 | 92 | # Transforming image 93 | if self.transform is not None: 94 | input_, target, roi = self.transform((input_, bg_model, target, 95 | roi)) 96 | # Return 97 | return input_, target, roi 98 | 99 | def __len__(self): 100 | ''' 101 | Returns samples size. 102 | 103 | @return Current data number of samples. 104 | ''' 105 | return len(self.data) 106 | 107 | def loader(self, path, to_gray=False): 108 | img = default_loader(path) 109 | 110 | if to_gray: 111 | img = F.to_grayscale(img) 112 | 113 | return img 114 | 115 | def from_file(self, csv_file, training, shrink_data): 116 | # Opening file 117 | dataset = pd.read_csv(csv_file) 118 | imgs, targets, names = [], [], [] 119 | 120 | for _, row in dataset.iterrows(): 121 | if training and shrink_data and row['negative_only']: 122 | continue 123 | 124 | input_path = os.path.join(self.database_path, row['video_type'], 125 | row['video_name'], 'input', 126 | row['input_frame']) 127 | 128 | bg_path = os.path.join(self.database_path, row['video_type'], 129 | row['video_name'], 'bg_model.jpg') 130 | 131 | target_path = os.path.join(self.database_path, row['video_type'], 132 | row['video_name'], 'groundtruth', 133 | row['target_frame']) 134 | 135 | roi_path = os.path.join(self.database_path, row['video_type'], 136 | row['video_name'], 'ROI.bmp') 137 | 138 | imgs.append((input_path, bg_path)) 139 | targets.append((target_path, roi_path)) 140 | names.append(input_path) 141 | 142 | return list(zip(imgs, targets)), names 143 | 144 | 145 | def transforms(input_shape, training=False, augmentation=False): 146 | 147 | if not training and augmentation is not False: 148 | raise ValueError('Combinations of parameters not permitted. ' 149 | 'training=False, augmentation=True') 150 | 151 | C, H, W = input_shape 152 | 153 | compose = [BinarizeTarget()] 154 | 155 | if C < 3: 156 | compose = compose + [Grayscale(1)] 157 | 158 | compose = compose + [ 159 | Resize((H, W), (INTERPOLATION['BICUBIC'], INTERPOLATION['BICUBIC'], 160 | INTERPOLATION['NEAREST'], INTERPOLATION['NEAREST'])), 161 | ] 162 | if augmentation is True: 163 | compose = compose + [RandomHorizontalFlip()] 164 | 165 | compose = compose + [MergeChannels(), ToTensor(), RoiCrop()] 166 | 167 | return vision_transforms.Compose(compose) 168 | -------------------------------------------------------------------------------- /deepeye/datasets/file.py: -------------------------------------------------------------------------------- 1 | import numpy as np # numpy library 2 | 3 | import torch 4 | import torch.utils.data as data # Torch dataset class 5 | 6 | from ..utils.img_utils import IMG_EXTENSIONS 7 | 8 | 9 | class DataFile(data.Dataset): 10 | ''' 11 | Reads data from a file 12 | ''' 13 | 14 | def __init__(self, file_path, transform=None, target_transform=None): 15 | ''' 16 | Inits an ImageFile instance. 17 | 18 | @param csv_file csv file path containing the targets. 19 | @param transform Images transforms. 20 | @param target_transform Labels transform. 21 | @param loader Images loader. 22 | ''' 23 | 24 | # Loads data 25 | data = self.from_file(file_path) 26 | 27 | if len(data) == 0: 28 | raise(RuntimeError("Found 0 images in path: " + file_path + "\n" + 29 | "Supported image extensions are: " + 30 | ",".join(IMG_EXTENSIONS))) 31 | 32 | # Saving data 33 | self.file_path = file_path 34 | self.transform = transform 35 | self.target_transform = target_transform 36 | 37 | # Main data 38 | self.data = data 39 | 40 | def __getitem__(self, index): 41 | ''' 42 | Returns image and target values for a given index. 43 | 44 | @param index Input index. 45 | 46 | @return The image and its respective target. 47 | ''' 48 | 49 | # Get single data 50 | input_, target = self.data[index] 51 | 52 | input_ = self.loader(input_) 53 | 54 | # Transforming image 55 | if self.transform is not None: 56 | input_ = self.transform(input_) 57 | 58 | # Transforming target labels 59 | if self.target_transform is not None: 60 | target = self.target_transform(target) 61 | 62 | # Return 63 | return input_, target 64 | 65 | def __len__(self): 66 | ''' 67 | Returns samples size. 68 | 69 | @return Current data number of samples. 70 | ''' 71 | return len(self.data) 72 | 73 | def loader(self): 74 | return NotImplementedError 75 | 76 | def from_file(self, file_path): 77 | return NotImplementedError 78 | -------------------------------------------------------------------------------- /deepeye/datasets/forest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import argparse 4 | import warnings 5 | import pandas as pd 6 | 7 | from sklearn.preprocessing import MultiLabelBinarizer 8 | from sklearn.exceptions import NotFittedError 9 | 10 | import torchvision.transforms as vision_transforms 11 | 12 | from .file import DataFile 13 | 14 | from ..utils.img_utils import default_loader 15 | from ..utils import img_utils 16 | from ..transforms import ToTensor 17 | 18 | MEAN3 = [0.3114, 0.3405, 0.2988] 19 | STD3 = [0.1672, 0.1438, 0.1373] 20 | MEAN4 = [0.0742, 0.0632, 0.0450, 0.0957] 21 | STD4 = [0.0278, 0.0246, 0.0258, 0.0286] 22 | W, H = 256, 256 23 | 24 | 25 | class ForestDataset(DataFile): 26 | ''' 27 | Dataset wrapping images and target labels for Kaggle contest 28 | Planet: Understanding the Amazon from Space 29 | ''' 30 | 31 | def __init__(self, csv_file, imgs_dir, img_ext=None, exceptions=None, 32 | transform=None, target_transform=None, **kwargs): 33 | 34 | self.exceptions = exceptions 35 | self.binarizer = MultiLabelBinarizer(classes=self.classes) 36 | 37 | self.imgs_dir = imgs_dir 38 | self.img_ext = img_ext or ('.jpg' if 'jpg' in imgs_dir else '.tif') 39 | 40 | self.num_channels = 4 if '.tif' in self.img_ext else 3 41 | 42 | self.input_shape = kwargs.get('input_shape', 43 | (self.num_channels, None, None) 44 | ) 45 | 46 | if not transform: 47 | transform = transforms(self.num_channels, **kwargs) 48 | self.input_shape = default_input_shape(self.num_channels, 49 | **kwargs) 50 | 51 | super(ForestDataset, self).__init__(csv_file, 52 | transform, 53 | target_transform) 54 | 55 | def from_file(self, csv_file): 56 | # Opening file 57 | dataset = pd.read_csv(csv_file) 58 | imgs, targets = [], [] 59 | 60 | for i, row in dataset.iterrows(): 61 | img, target = row['image_name'], row['tags'] 62 | 63 | path = os.path.join(self.imgs_dir, img + self.img_ext) 64 | 65 | if img_utils.is_image_file(path) and os.path.isfile(path): 66 | target = [c for c in target.split(' ') \ 67 | if c not in self.exceptions] 68 | if target: 69 | imgs.append(path) 70 | targets.append(target) 71 | 72 | targets = self.binarizer.fit_transform(targets).astype(np.float32) 73 | 74 | return list(zip(np.array(imgs), targets)) 75 | 76 | def loader(self, path): 77 | return default_loader(path) 78 | 79 | @property 80 | def classes(self): 81 | return [c for c in ['agriculture', 'artisinal_mine', 'bare_ground', 'blooming', 82 | 'blow_down', 'clear', 'cloudy', 'conventional_mine', 83 | 'cultivation', 'habitation', 'haze', 'partly_cloudy', 84 | 'primary', 'road', 'selective_logging', 'slash_burn', 85 | 'water'] if c not in self.exceptions] 86 | 87 | 88 | def normalize(num_channels): 89 | if num_channels == 3: 90 | return vision_transforms.Normalize(MEAN3, STD3) 91 | elif num_channels == 4: 92 | return vision_transforms.Normalize(MEAN4, STD4) 93 | 94 | raise ValueError('Wrong number of channels: %d' % num_channels) 95 | 96 | 97 | def transforms(num_channels, training=False, augmentation=False): 98 | 99 | if not training and augmentation: 100 | raise ValueError('Combinations of parameters not permitted. ' 101 | 'training=False, augmentation=True') 102 | 103 | compose = [ToTensor(), normalize(num_channels)] 104 | 105 | if not augmentation: 106 | compose = [vision_transforms.CenterCrop(224)] + compose 107 | else: 108 | compose = [vision_transforms.RandomSizedCrop(224), 109 | vision_transforms.RandomHorizontalFlip()] + compose 110 | 111 | if not training: 112 | compose = [vision_transforms.Scale(W)] + compose 113 | 114 | return vision_transforms.Compose(compose) 115 | 116 | 117 | def default_input_shape(num_channels, training=False, augmentation=False): 118 | return (num_channels, 224, 224) 119 | 120 | 121 | def split(args): 122 | rate = np.array(args.rate) 123 | 124 | if np.sum(rate) <= 0 or np.sum(rate) > 1: 125 | raise ValueError('rate sum must be in (0,1)') 126 | 127 | if np.alltrue(np.sort(rate) != rate): 128 | raise ValueError('rate must be in increasing order') 129 | 130 | filename, ext = os.path.splitext(args.csv_file) 131 | 132 | outputs = args.output or ['%s-split-%d%s' % (filename, i, ext) 133 | for i in range(rate.size + 1)] 134 | if rate.size + 1 != len(outputs): 135 | raise ValueError('len(size) + 1 differs from len(output)') 136 | 137 | dataset = pd.read_csv(args.csv_file, index_col='image_name') 138 | print('Total samples: %d' % len(dataset)) 139 | 140 | indices = (rate*(len(dataset))).astype(np.int) 141 | 142 | datasets = np.split(dataset, indices) 143 | 144 | rate = np.stack([rate, [1.]]) 145 | 146 | for i, (dt, out) in enumerate(zip(datasets, outputs)): 147 | 148 | print('\tSaving to %s' % out) 149 | print('\t\tSplit rate: %f' % rate[i]) 150 | print('\t\tSamples: %d' % len(dt)) 151 | dt.to_csv(out) 152 | 153 | 154 | def stats(args): 155 | from .preprocessing import get_mean_and_std 156 | 157 | datasets = ForestDataset(args.csv_file, args.imgs_dir, 158 | transform=ToTensor()) 159 | 160 | mean, std = get_mean_and_std(datasets) 161 | 162 | print('Mean: %s\nStd: %s' % (mean, std)) 163 | 164 | 165 | if __name__ == '__main__': 166 | parser = argparse.ArgumentParser( 167 | description='Forest dataset preprocessing') 168 | parser.add_argument("csv_file", type=str) 169 | 170 | subparsers = parser.add_subparsers() 171 | 172 | parser_split = subparsers.add_parser('split') 173 | parser_split.add_argument("rate", nargs='+', type=float) 174 | parser_split.add_argument("--output", nargs='+', type=str, default=None) 175 | parser_split.set_defaults(func=split) 176 | 177 | parser_stats = subparsers.add_parser('stats') 178 | parser_stats.add_argument("--imgs-dir", type=str, 179 | default='data/train-tif') 180 | parser_stats.set_defaults(func=stats) 181 | 182 | args = parser.parse_args() 183 | args.func(args) 184 | -------------------------------------------------------------------------------- /deepeye/datasets/preprocessing.py: -------------------------------------------------------------------------------- 1 | ''''Some helper functions for PyTorch 2 | ''' 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import DataLoader 7 | from torch.utils.data.sampler import Sampler 8 | 9 | def get_mean_and_std(dataset, axis=(0, 2, 3), batch_size=256, num_workers=2): 10 | ''' 11 | Computes the mean and standard deviation with one pass. 12 | Uses Chan's pairwise algorithm. 13 | 14 | @param dataset Input data. 15 | @param axis Axis to be marginalized. 16 | @param batch_size Batch size. 17 | @param num_workers NUmber of parallel works. 18 | 19 | @return Returns mean and standard deviation. 20 | ''' 21 | 22 | # Loading data 23 | data = DataLoader(dataset, batch_size=batch_size, 24 | num_workers=num_workers) 25 | 26 | # Setting initial values 27 | fll_cn = 0 28 | fll_mu = 0.0 29 | fll_sd = 0.0 30 | shape = None 31 | 32 | # Computing mean and std using Chan's algorithm 33 | for i, (images, _) in enumerate(data): 34 | 35 | # Testing for tensors 36 | if torch.is_tensor(images): 37 | images = images.numpy() 38 | 39 | # Testing sample 40 | if i == 0: 41 | 42 | # Initializing 43 | shape = np.array(images.shape) 44 | shape = shape[np.setdiff1d(np.arange(images.ndim), axis)] 45 | fll_cn = images.size/np.prod(shape) 46 | fll_mu = np.mean(images, axis=axis) 47 | fll_sd = np.std(images, axis=axis) 48 | 49 | else: 50 | 51 | # Computing current images statistics 52 | cur_cn = images.size/np.prod(shape) 53 | cur_mu = np.mean(images, axis=axis) 54 | cur_sd = np.std(images, axis=axis) 55 | 56 | # Using parallel algorithm 57 | delta = fll_mu - cur_mu 58 | cur_ss = cur_sd*(cur_cn-1) 59 | fll_ss = fll_sd*(fll_cn-1) 60 | fll_mu = (fll_mu*fll_cn +cur_mu*cur_cn)/(fll_cn+cur_cn) 61 | fll_ss += cur_ss+(delta**2)*(cur_cn*fll_cn/(fll_cn+cur_cn)) 62 | fll_cn += cur_cn 63 | fll_sd = fll_ss/(fll_cn-1) 64 | 65 | # Return 66 | return fll_mu, fll_sd 67 | -------------------------------------------------------------------------------- /deepeye/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn.modules.module import Module 4 | 5 | 6 | class MaskedBinaryCrossEntropy(torch.nn.BCEWithLogitsLoss): 7 | def forward(self, input, target, roi=None): 8 | if roi is None: 9 | roi = torch.ones_like(input) 10 | return (F.binary_cross_entropy_with_logits( 11 | input, target, reduce=False) * roi).sum() / roi.sum() 12 | 13 | 14 | mbce = MaskedBinaryCrossEntropy 15 | 16 | 17 | class BinaryCrossEntropy(torch.nn.BCEWithLogitsLoss): 18 | def forward(self, input, target, roi=None): 19 | return super().forward(input, target) 20 | 21 | 22 | bce = BinaryCrossEntropy 23 | 24 | 25 | class MaskedHardDiceLoss(Module): 26 | """Non-differentiable (due to threshold operation on input arg) 27 | Sørensen–Dice coefficient with mask for comparing the similarity in 28 | region of interest of two batch of data. 29 | Coefficient ranges between 0 to 1, 1 if totally matchs. 30 | Inputs: 31 | threshold (float): threshold value above which is 1, else 0 32 | eps (float): small value to avoid division by zero 33 | smooth (float): negative value added to both num. and den. 34 | (laplacian/additive smoothing) 35 | per_img (bool): If True, computes loss per image and then 36 | averages, else computes it per batch 37 | Shape: 38 | input (tensor): `(N, *)` where `*` means, any number of 39 | additional dimensions 40 | target (tensor): `(N, *)`, same shape as the input 41 | roi (tensor): `(N, *)`, same shape as the input 42 | """ 43 | 44 | def __init__(self, threshold=0.5, smooth=1, eps=1e-12, per_img=False): 45 | super().__init__() 46 | self.threshold = threshold 47 | self.eps = eps 48 | self.smooth = smooth 49 | self.per_img = per_img 50 | 51 | def forward(self, input, target, roi=None): 52 | if roi is None: 53 | roi = torch.ones_like(input) 54 | input = (torch.sigmoid(input) > self.threshold).float() 55 | intersect = (input * target * roi).view(input.size(0), -1).sum(1) 56 | union = ((input + target) * roi).view(input.size(0), -1).sum(1) 57 | 58 | if self.per_img is True: 59 | return (1 - (2 * intersect + self.smooth) / 60 | ((union + self.eps) + self.smooth)).mean() 61 | 62 | return 1 - ((2 * intersect.sum() + self.smooth) / 63 | (union.sum() + self.eps + self.smooth)) 64 | 65 | 66 | harddice = MaskedHardDiceLoss 67 | 68 | 69 | class MaskedSoftDiceLoss(Module): 70 | """Non-differentiable Sørensen–Dice coefficient for comparing the 71 | similarity in the region of interest of two batch of data. 72 | Coefficient ranges between 0 to 1, 1 if totally matchs. 73 | Inputs: 74 | eps (float): small value to avoid division by zero 75 | smooth (float): negative value added to both num. and den. 76 | (laplacian/additive smoothing) 77 | per_img (bool): If True, computes loss per image and then 78 | averages, else computes it per batch 79 | Shape: 80 | input (tensor): `(N, *)` where `*` means, any number of 81 | additional dimensions 82 | target (tensor): `(N, *)`, same shape as the input 83 | roi (tensor): `(N, *)`, same shape as the input 84 | """ 85 | 86 | def __init__(self, threshold=0.5, smooth=1, eps=1e-12, per_img=False): 87 | super().__init__() 88 | self.threshold = threshold 89 | self.eps = eps 90 | self.smooth = smooth 91 | self.per_img = per_img 92 | 93 | def forward(self, input, target, roi=None): 94 | if roi is None: 95 | roi = torch.ones_like(input) 96 | input = torch.sigmoid(input) 97 | intersect = (input * target * roi).view(input.size(0), -1).sum(1) 98 | union = ((input + target) * roi).view(input.size(0), -1).sum(1) 99 | 100 | if self.per_img is True: 101 | return (1 - (2 * intersect + self.smooth) / 102 | ((union + self.eps) + self.smooth)).mean() 103 | 104 | return 1 - ((2 * intersect.sum() + self.smooth) / 105 | (union.sum() + self.eps + self.smooth)) 106 | 107 | 108 | softdice = MaskedSoftDiceLoss 109 | -------------------------------------------------------------------------------- /deepeye/metrics.py: -------------------------------------------------------------------------------- 1 | """ Those metrics are restricted to binary classification task 2 | 3 | TODO 4 | Add docstring for each method 5 | """ 6 | import torch 7 | 8 | 9 | def _sanitize(y_true, y_pred, roi): 10 | y_pred = y_pred.data 11 | y_true = y_true.data 12 | 13 | # Flatten 14 | y_pred = y_pred.view(y_pred.shape[0], -1) 15 | y_true = y_true.view(y_true.shape[0], -1) 16 | 17 | if y_pred.dtype is not torch.uint8: 18 | raise ValueError('y_pred must be torch.uint8, got {}'.format( 19 | y_pred.dtype)) 20 | 21 | if y_true.dtype is not torch.uint8: 22 | raise ValueError('y_true must be torch.uint8, got {}'.format( 23 | y_true.dtype)) 24 | 25 | if roi is None: 26 | roi = torch.ones_like(y_pred) 27 | else: 28 | roi = roi.data 29 | 30 | # Flatten 31 | roi = roi.view(roi.shape[0], -1) 32 | 33 | if roi.dtype is not torch.uint8: 34 | raise ValueError('roi must be torch.uint8, got {}'.format( 35 | roi.dtype)) 36 | 37 | return y_true, y_pred, roi 38 | 39 | 40 | def _tn(y_true, y_pred, roi): 41 | return torch.sum((((y_pred == 0) & (y_true == 0)) * roi).float()) 42 | 43 | 44 | def _tp(y_true, y_pred, roi): 45 | return torch.sum((((y_pred == 1) & (y_true == 1)) * roi).float()) 46 | 47 | 48 | def _fp(y_true, y_pred, roi): 49 | return torch.sum((((y_pred == 1) & (y_true == 0)) * roi).float()) 50 | 51 | 52 | def _fn(y_true, y_pred, roi): 53 | return torch.sum((((y_pred == 0) & (y_true == 1)) * roi).float()) 54 | 55 | 56 | def tp_tn_fp_fn(y_true, y_pred, roi=None): 57 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 58 | 59 | return _tp(y_true, y_pred, roi), _tn(y_true, y_pred, roi), _fp( 60 | y_true, y_pred, roi), _fn(y_true, y_pred, roi) 61 | 62 | 63 | def acc_score(y_true, y_pred, roi=None, eps=1e-12): 64 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 65 | 66 | tn = _tn(y_true, y_pred, roi) 67 | tp = _tp(y_true, y_pred, roi) 68 | return _acc_score(tp, tn, None, None, eps) 69 | 70 | 71 | def _acc_score(tp, tn, fp, fn, eps=1e-12): 72 | return ((tp + tn) / (roi.float().sum(dim=-1) + eps)) 73 | 74 | 75 | def prec_score(y_true, y_pred, roi=None, eps=1e-12): 76 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 77 | 78 | tp = _tp(y_true, y_pred, roi) 79 | fp = _fp(y_true, y_pred, roi) 80 | return _prec_score(tp, None, fp, None, eps) 81 | 82 | 83 | def _prec_score(tp, tn, fp, fn, eps=1e-12): 84 | return (tp / (tp + fp + eps)) 85 | 86 | 87 | def recall_score(y_true, y_pred, roi=None, eps=1e-12): 88 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 89 | 90 | tp = _tp(y_true, y_pred, roi) 91 | fn = _fn(y_true, y_pred, roi) 92 | return _recall_score(tp, None, None, fn) 93 | 94 | 95 | def _recall_score(tp, tn, fp, fn, eps=1e-12): 96 | return (tp / (tp + fn + eps)) 97 | 98 | 99 | def f1_score(y_true, y_pred, roi=None, eps=1e-12): 100 | # Dice's index : 2*TP/(2*TP + FP + FN) 101 | return fbeta_score(y_true, y_pred, 1, roi, eps) 102 | 103 | 104 | def _f1_score(tp, tn, fp, fn, eps=1e-12): 105 | return _fbeta_score(tp, None, fp, fn, 1, eps) 106 | 107 | 108 | def fbeta_score(y_true, y_pred, beta, roi=None, eps=1e-12): 109 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 110 | 111 | tp = _tp(y_true, y_pred, roi) 112 | fp = _fp(y_true, y_pred, roi) 113 | fn = _fn(y_true, y_pred, roi) 114 | 115 | return _fbeta_score(tp, None, fp, fn, beta, eps) 116 | 117 | 118 | def _fbeta_score(tp, tn, fp, fn, beta, eps=1e-12): 119 | beta2 = beta**2 120 | return ((1 + beta2) * tp / ((1 + beta2) * tp + beta2 * fn + fp + eps)) 121 | 122 | 123 | def false_pos_rate(y_true, y_pred, roi=None, eps=1e-12): 124 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 125 | 126 | fp = _fp(y_true, y_pred, roi) 127 | tn = _tn(y_true, y_pred, roi) 128 | 129 | return _false_pos_rate(None, tn, fp, None, eps) 130 | 131 | 132 | def _false_pos_rate(tp, tn, fp, fn, eps=1e-12): 133 | return (fp / (fp + tn + eps)) 134 | 135 | 136 | def false_neg_rate(y_true, y_pred, roi=None, eps=1e-12): 137 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 138 | 139 | fn = _fn(y_true, y_pred, roi) 140 | tp = _tp(y_true, y_pred, roi) 141 | 142 | return _false_neg_rate(tp, None, None, fn, eps) 143 | 144 | 145 | def _false_neg_rate(tp, tn, fp, fn, eps=1e-12): 146 | return (fn / (fn + tp + eps)) 147 | 148 | 149 | def true_pos_rate(y_true, y_pred, roi=None, eps=1e-12): 150 | # or sensitivity 151 | return 1 - false_neg_rate(y_true, y_pred, roi, eps) 152 | 153 | 154 | def _true_pos_rate(tp, tn, fp, fn, eps=1e-12): 155 | return 1 - _true_neg_rate(None, tn, fp, fn, eps) 156 | 157 | 158 | def true_neg_rate(y_true, y_pred, roi=None, eps=1e-12): 159 | # or specificity 160 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 161 | 162 | fp = _fp(y_true, y_pred, roi) 163 | tn = _tn(y_true, y_pred, roi) 164 | 165 | return _true_neg_rate(None, tn, fp, None, eps) 166 | 167 | 168 | def _true_neg_rate(tp, tn, fp, fn, eps=1e-12): 169 | return (tn / (fp + tn + eps)) 170 | 171 | 172 | def IoU_score(y_true, y_pred, roi=None, eps=1e-12): 173 | # Jaccard's index : TP/(TP + FP + FN) 174 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 175 | 176 | tp = _tp(y_true, y_pred, roi) 177 | fp = _fp(y_true, y_pred, roi) 178 | fn = _fn(y_true, y_pred, roi) 179 | 180 | return _IoU_score(tp, None, fp, fn, eps) 181 | 182 | 183 | def _IoU_score(tp, tn, fp, fn, eps=1e-12): 184 | return (tp / (tp + fp + fn + eps)) 185 | 186 | 187 | def total_error(y_true, y_pred, roi=None, eps=1e-12): 188 | # Total error: (FN + FP)/(TP + FP + TN + FN) 189 | # https://stats.stackexchange.com/questions/273537/f1-dice-score-vs-iou 190 | y_true, y_pred, roi = _sanitize(y_true, y_pred, roi) 191 | 192 | tp = _tp(y_true, y_pred, roi) 193 | tn = _tn(y_true, y_pred, roi) 194 | fp = _fp(y_true, y_pred, roi) 195 | fn = _fn(y_true, y_pred, roi) 196 | 197 | return _total_error(tp, tn, fp, fn, eps) 198 | 199 | 200 | def _total_error(tp, tn, fp, fn, eps=1e-12): 201 | return ((fn + fp) / (tp + fp + tn + fn + eps)) 202 | -------------------------------------------------------------------------------- /deepeye/model.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .callbacks import Callback, Progbar 8 | from .utils.generic_utils import AverageMeter, CMMeter, Meter 9 | from .metrics import tp_tn_fp_fn 10 | 11 | 12 | class Model(object): 13 | def __init__(self, arch, criterion=None, optimizer=None, threshold=0.5): 14 | if not isinstance(arch, nn.Module): 15 | raise ValueError('Arch should be an instance of torch.nn.Module') 16 | 17 | self.arch = arch 18 | self.set_criterion(criterion) 19 | self.set_optimizer(optimizer) 20 | self.set_threshold(threshold) 21 | 22 | self.cuda = False 23 | if next(self.arch.parameters()).is_cuda: 24 | self.cuda = True 25 | 26 | def set_criterion(self, criterion): 27 | if not isinstance(criterion, nn.Module) and criterion: 28 | raise ValueError('Criterion should be an instance of ' 29 | 'torch.nn.Module') 30 | 31 | self._criterion = criterion 32 | 33 | def set_optimizer(self, optimizer): 34 | if not isinstance(optimizer, torch.optim.Optimizer) and optimizer: 35 | raise ValueError('Optimizer should be an instance of ' 36 | 'torch.optim.Optimizer') 37 | self._optimizer = optimizer 38 | 39 | def set_threshold(self, threshold): 40 | if not isinstance(threshold, float) or threshold > 1 or threshold < 0: 41 | raise ValueError('Threshold should be a float in the ' 42 | 'interval [0,1]') 43 | if hasattr(self._criterion, 'threshold'): 44 | self._threshold = self._criterion.threshold 45 | else: 46 | self._threshold = threshold 47 | 48 | @property 49 | def optimizer(self): 50 | if not self._optimizer: 51 | raise ValueError('optimizer was not set') 52 | return self._optimizer 53 | 54 | @property 55 | def criterion(self): 56 | if not self._criterion: 57 | raise ValueError('criterion was not set') 58 | return self._criterion 59 | 60 | def fit_loader(self, 61 | loader, 62 | epochs, 63 | val_loader=None, 64 | metrics={}, 65 | callback=None, 66 | start_epoch=0): 67 | if not isinstance(loader, torch.utils.data.DataLoader): 68 | raise ValueError('loader should be a instance of ' 69 | 'torch.utils.data.DataLoader') 70 | 71 | if val_loader and not isinstance(val_loader, 72 | torch.utils.data.DataLoader): 73 | raise ValueError('val_loader should be a instance of ' 74 | 'torch.utils.data.DataLoader') 75 | 76 | if not isinstance(callback, Callback): 77 | raise ValueError('callback should be a instance of Callback') 78 | 79 | names = [ 80 | '{{}}_{}'.format(name.replace('_', '-')) 81 | for name in ['loss'] + list(metrics.keys()) 82 | ] 83 | metrics_name = [n.format('train') for n in names] 84 | if val_loader: 85 | metrics_name += [n.format('val') for n in names] 86 | 87 | callback.set_params( 88 | arch=self.arch, optimizer=self.optimizer, criterion=self.criterion) 89 | 90 | callback.on_begin(start_epoch, epochs, metrics_name) 91 | 92 | for epoch in range(start_epoch, epochs): 93 | 94 | callback.on_epoch_begin(epoch) 95 | 96 | # train for one epoch 97 | train_metrics = self._step_loader( 98 | loader, callback, metrics=metrics, mode='train') 99 | 100 | if val_loader: 101 | # evaluate 102 | val_metrics = self._step_loader( 103 | val_loader, callback, metrics=metrics, mode='val') 104 | train_metrics.update(val_metrics) 105 | 106 | callback.on_epoch_end(train_metrics) 107 | 108 | callback.on_end() 109 | 110 | def eval_loader(self, loader, metrics={}, callback=None): 111 | if not isinstance(loader, torch.utils.data.DataLoader): 112 | raise ValueError('loader should be a instance of ' 113 | 'torch.utils.data.DataLoader') 114 | 115 | callback = callback or Progbar(print_freq=len(loader) - 1) 116 | 117 | callback.set_params(arch=self.arch, criterion=self.criterion) 118 | 119 | names = [ 120 | '{}'.format(name.replace('_', '-')) 121 | for name in ['loss'] + list(metrics.keys()) 122 | ] 123 | 124 | callback.on_begin(metrics_name=names) 125 | callback.on_epoch_begin(0) 126 | metrics = self._step_loader( 127 | loader, callback, metrics=metrics, mode='test') 128 | callback.on_epoch_end(metrics) 129 | callback.on_end() 130 | 131 | return metrics 132 | 133 | def predict_loader(self, loader, callback=None): 134 | if not isinstance(loader, torch.utils.data.DataLoader): 135 | raise ValueError('loader should be a instance of ' 136 | 'torch.utils.data.DataLoader') 137 | 138 | callback = callback or Progbar(print_freq=len(loader) - 1) 139 | 140 | callback.set_params(arch=self.arch) 141 | 142 | callback.on_begin() 143 | callback.on_epoch_begin(0) 144 | outputs = self._step_loader(loader, callback, mode='predict') 145 | callback.on_epoch_end({}) 146 | callback.on_end() 147 | 148 | return outputs 149 | 150 | def _step_loader(self, 151 | loader, 152 | callback, 153 | metrics={}, 154 | mode='train', 155 | non_blocking=True): 156 | 157 | meters = OrderedDict() 158 | if mode == 'predict': 159 | outputs = np.zeros((len(loader.dataset), len( 160 | loader.dataset.classes))) 161 | seen = 0 162 | else: 163 | meters['{}_loss'.format(mode)] = AverageMeter() 164 | for name in metrics: 165 | meters['{}_{}'.format(mode, name)] = Meter() 166 | 167 | cm_meter = CMMeter() 168 | 169 | if mode == 'train': 170 | # switch to train mode 171 | self.arch.train() 172 | volatile = False 173 | else: 174 | self.arch.eval() 175 | volatile = True 176 | 177 | callback.on_step_begin(len(loader), mode=mode) 178 | 179 | for batch, (input, target, roi) in enumerate(loader): 180 | batch_size = input.size(0) 181 | callback.on_batch_begin(batch, batch_size) 182 | 183 | input.requires_grad_(True) 184 | 185 | if mode != 'predict': 186 | if self.cuda: 187 | target = target.cuda(non_blocking=non_blocking) 188 | roi = roi.cuda(non_blocking=non_blocking) 189 | target.requires_grad_(True) 190 | roi.requires_grad_(True) 191 | 192 | torch.set_grad_enabled(not volatile) 193 | 194 | # Compute output 195 | output = self.arch(input) 196 | if mode == 'predict': 197 | outputs[seen:seen + batch_size, ...] =\ 198 | output.data.cpu().numpy() 199 | seen += batch_size 200 | else: 201 | loss = self.criterion(output, target, roi=roi) 202 | # Updating meters 203 | meters['{}_loss'.format(mode)].update(loss.data.item(), 204 | batch_size) 205 | 206 | output = (torch.sigmoid(output) > self._threshold) 207 | 208 | # Updating the confusion matrix 209 | cm_meter.update( 210 | *tp_tn_fp_fn(target.byte(), output, roi=roi.byte())) 211 | 212 | for name, metric in metrics.items(): 213 | meters['{}_{}'.format(mode, name)].update( 214 | metric(cm_meter.tp.item(), cm_meter.tn.item(), 215 | cm_meter.fp.item(), cm_meter.fn.item())) 216 | 217 | if mode == 'train': 218 | # compute gradient and do SGD step 219 | self.optimizer.zero_grad() 220 | loss.backward() 221 | self.optimizer.step() 222 | 223 | callback.on_batch_end(meters) 224 | 225 | callback.on_step_end() 226 | 227 | return meters or outputs 228 | -------------------------------------------------------------------------------- /deepeye/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lpcinelli/foreground-segmentation/25b34253547bde70c93e4b29668952d89b354273/deepeye/utils/__init__.py -------------------------------------------------------------------------------- /deepeye/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | arg_utils.py 3 | 4 | Argument treatment utilities. 5 | ''' 6 | 7 | import json 8 | 9 | def cast_arg(arg): 10 | ''' 11 | Cast args to the correct type. 12 | 13 | @param arg argument to be casted. 14 | 15 | @return arg with the correctly parsed type. 16 | ''' 17 | 18 | # Test each type 19 | try: 20 | # int 21 | return int(arg) 22 | except ValueError: 23 | pass 24 | try: 25 | # float 26 | return float(arg) 27 | except ValueError: 28 | pass 29 | if arg in ['True', 'False']: 30 | # boolean 31 | return arg == 'True' 32 | # A string 33 | return arg 34 | 35 | 36 | def parse_kwparams(kwlst): 37 | ''' 38 | Parses key-worded parameters. 39 | 40 | @param kwstr key-worded parameters list to be parsed. 41 | 42 | @return dictionary with the key-worded parameters. 43 | ''' 44 | 45 | # Set in dictionary form 46 | kwparams = {} 47 | for param in kwlst: 48 | k, v = param.split('=') 49 | try: 50 | kwparams[k] = json.loads(v) 51 | except json.JSONDecodeError: 52 | kwparams[k] = v 53 | return kwparams 54 | -------------------------------------------------------------------------------- /deepeye/utils/config_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | config_utils.py 3 | 4 | Configuration file treatment files. 5 | ''' 6 | 7 | import yaml 8 | 9 | 10 | class Bunch(object): 11 | ''' 12 | Converts dict to namespace 13 | ''' 14 | 15 | def __init__(self, adict): 16 | ''' 17 | Initializes a namespace given a dictionary. 18 | 19 | @param adict target dictionary. 20 | 21 | @return converted object. 22 | ''' 23 | self.__dict__.update(adict) 24 | 25 | 26 | def create_config_file(path, args=None): 27 | ''' 28 | Creates a configuration file. 29 | 30 | @param path file path. 31 | @param args configuration arguments. 32 | ''' 33 | 34 | if args: 35 | # Use provided arguments. 36 | args = vars(args) 37 | else: 38 | # Set parameters dictionary with arguments 39 | args = {} 40 | args['img_path'] = '' # path to dataset 41 | args['augmentation'] = '' #TODO use data augmentation 42 | args['shrink-negatives'] = False # supress imgs w/ neg pixels only 43 | args['train'] = '' # path to train.csv 44 | args['val'] = '' # path to val.csv 45 | args['arch'] = 'toynet' # model architecture 46 | args['arch_params'] = [] # model architecture params 47 | args['loss'] = 'bce' # losses 48 | args['shape'] = '' # input shape 49 | args['workers'] = 4 # number of data loading workers 50 | args['epochs'] = 90 # number of total epochs to run 51 | args['start_epoch'] = 0 # manual epoch number (useful on restarts) 52 | args['batch_size'] = 32 # manual epoch number (useful on restarts) 53 | args['optim'] = 'adam' # algorithm for model optimization 54 | args['learning_rate'] = 0.1 # initial learning rate 55 | args['momentum'] = 0.9 # momentum 56 | args['weight_decay'] = 1e-4 # weight decay 57 | args['print_freq'] = 10 # print frequency 58 | args['resume'] = '' # path to latest checkpoint 59 | args['evaluate'] = True # evaluate model on validation set 60 | args['cuda'] = True # use GPU 61 | args['visdom'] = True # use visdom 62 | args['save_folder'] = 'models/' # Location to save models 63 | 64 | # Saving on yaml 65 | with open(path, 'w') as yaml_file: 66 | yaml.dump(args, stream=yaml_file, default_flow_style=False) 67 | 68 | 69 | def load_config_file(path): 70 | ''' 71 | Loads configuration file. 72 | 73 | @param file path. 74 | 75 | @return models arguments. 76 | ''' 77 | 78 | # Open yaml file 79 | with open(path, 'r') as yaml_file: 80 | args = yaml.load(yaml_file) 81 | 82 | # Converting to namespace and returning 83 | return Bunch(args) 84 | return None 85 | -------------------------------------------------------------------------------- /deepeye/utils/generic_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Meter(object): 5 | """ Stores the last value and the cummulative sum 6 | """ 7 | 8 | def __init__(self): 9 | self.reset() 10 | 11 | def reset(self): 12 | self.val = 0 13 | self.sum = 0 14 | self.avg = 0 15 | 16 | def update(self, val, n=1): 17 | self.val = val 18 | self.sum += val * n 19 | self.avg = val 20 | 21 | 22 | class AverageMeter(Meter): 23 | """Computes and stores the average and current value""" 24 | 25 | def reset(self): 26 | super().reset() 27 | self.count = 0 28 | 29 | def update(self, val, n=1): 30 | super().update(val, n) 31 | self.count += n 32 | self.avg = self.sum / self.count 33 | 34 | 35 | class CMMeter(object): 36 | """ Confusion matrix meter, i.e., computes TP, TN, FP, and FN globally 37 | """ 38 | 39 | def __init__(self): 40 | self.reset() 41 | 42 | def reset(self): 43 | self.tp = 0 44 | self.tn = 0 45 | self.fp = 0 46 | self.fn = 0 47 | 48 | def update(self, tp, tn, fp, fn, n=1): 49 | self.tp += tp * n 50 | self.tn += tn * n 51 | self.fp += fp * n 52 | self.fn += fn * n 53 | 54 | 55 | class History(AverageMeter): 56 | """ Stores values and computes some metrics """ 57 | 58 | def reset(self): 59 | super().reset() 60 | self.vals = [] 61 | 62 | def update(self, val, n=1): 63 | super().update(val, n) 64 | self.vals.append(val * n) 65 | 66 | 67 | def conv2_out_size(input_size, 68 | kernel_size, 69 | stride=1, 70 | padding=0, 71 | dilation=1, 72 | ceil_mode=False): 73 | """ Computes height and width size of the output tensor 74 | Inputs: 75 | input_size: tuple with height h and width w of the input tensor 76 | kernel_size: tuple with kernel dimensions 77 | stride: int or tuple with kernel's stride along each dim 78 | padding: int or tuple with the (half the) amount of padding 79 | on each dim 80 | dilation: int or tuple with that controls the spacing between 81 | kernel points 82 | Returns: 83 | (output height, output width) 84 | 85 | """ 86 | if isinstance(kernel_size, (int, float)): 87 | kernel_size = (int(kernel_size), int(kernel_size)) 88 | 89 | if isinstance(stride, (int, float)): 90 | stride = (int(stride), int(stride)) 91 | 92 | if isinstance(padding, (int, float)): 93 | padding = (int(padding), int(padding)) 94 | 95 | if isinstance(dilation, (int, float)): 96 | dilation = (int(dilation), int(dilation)) 97 | 98 | return ( 99 | int((input_size[0] + 2 * padding[0] - dilation[0] * 100 | (kernel_size[0] - 1) - 1) / stride[0] + 1 + int(ceil_mode) * 0.5), 101 | int((input_size[1] + 2 * padding[1] - dilation[1] * 102 | (kernel_size[1] - 1) - 1) / stride[1] + 1 + int(ceil_mode) * 0.5)) 103 | 104 | 105 | def find_threshold(y_pred, y_true, metric, min_val=0, max_val=1.0, eps=1e-6): 106 | ''' 107 | Finds the best threshold considering a metric and a training set. 108 | 109 | @param y_pred Current predicted values to be thresholded. 110 | @param y_true Target labels. 111 | @param metric Metric to be maximized. 112 | @param min_val Minimum possible predicted value. 113 | @param max_val Maximum possible predicted value. 114 | @param eps Minimum interval 115 | 116 | @return Best threshold and metric value. 117 | ''' 118 | 119 | # Set initial values 120 | thrs_cur = (min_val + max_val) / 2.0 121 | thrs_low = min_val 122 | thrs_hgh = max_val 123 | 124 | # Bisection algorithm 125 | while (True): 126 | 127 | # Initializing bisection algorithm 128 | thrs_cur = (min_val + max_val) / 2.0 129 | thrs_low = (thrs_cur + min_val) / 2.0 130 | thrs_hgh = (thrs_cur + max_val) / 2.0 131 | 132 | # Scores 133 | scr_low = metric(y_true, y_pred, thrs_low) 134 | scr_hgh = metric(y_true, y_pred, thrs_hgh) 135 | 136 | # Testing 137 | if (scr_low >= scr_hgh): 138 | # Update 139 | max_val = thrs_hgh 140 | else: 141 | min_val = thrs_low 142 | 143 | # End condition 144 | if (abs(max_val - min_val) <= eps): 145 | break 146 | 147 | # Return 148 | return thrs_cur, scr_low 149 | 150 | 151 | def rgb2gray(weights): 152 | """ Converts weights pretrained on RGB to grayscale 153 | Args: 154 | weights (torch.Tensor): model's weights of size (?, 3, ?, ?) 155 | Returns: 156 | torch.Tensor of size (?, 1, ?, ?) 157 | """ 158 | return (0.2989 * weights[:, 0, :, :] + 0.5870 * weights[:, 1, :, :] + 159 | 0.1140 * weights[:, 2, :, :]).unsqueeze(1) 160 | -------------------------------------------------------------------------------- /deepeye/utils/img_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import ImageFile # To solve load problems 2 | 3 | # Valid images extensions 4 | IMG_EXTENSIONS = [ 5 | '.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm' 6 | ] 7 | ImageFile.LOAD_TRUNCATED_IMAGES = True 8 | 9 | 10 | def is_image_file(filename): 11 | ''' 12 | Test if current file is a valid image. 13 | 14 | @param filename Input file name. 15 | 16 | @return True if file is an image, else False. 17 | ''' 18 | return filename.endswith(tuple(IMG_EXTENSIONS)) 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.13.3 2 | inflection==0.3.1 3 | pandas==0.20.3 4 | torch==0.3.1.post2 5 | matplotlib==2.0.2 6 | Pillow==5.1.0 7 | glob2==0.6 8 | scikit_learn==0.19.1 9 | torchvision==0.2.1 10 | tqdm==4.23.4 11 | visdom==0.1.8.3 12 | PyYAML==3.12 13 | -------------------------------------------------------------------------------- /src-torch/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to fb.resnet.torch 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you haven't already, complete the Contributor License Agreement ("CLA"). 10 | 11 | ## Contributor License Agreement ("CLA") 12 | In order to accept your pull request, we need you to submit a CLA. You only need 13 | to do this once to work on any of Facebook's open source projects. 14 | 15 | Complete your CLA here: 16 | 17 | ## Issues 18 | We use GitHub issues to track public bugs. Please ensure your description is 19 | clear and has sufficient instructions to be able to reproduce the issue. 20 | 21 | ## Coding Style 22 | * Use three spaces for indentation rather than tabs 23 | * 80 character line length 24 | 25 | ## License 26 | By contributing to fb.resnet.torch, you agree that your contributions will be 27 | licensed under its BSD license. 28 | -------------------------------------------------------------------------------- /src-torch/INSTALL.md: -------------------------------------------------------------------------------- 1 | Torch ResNet Installation 2 | ========================= 3 | 4 | This is the suggested way to install the Torch ResNet dependencies on [Ubuntu 14.04+](http://www.ubuntu.com/): 5 | * NVIDIA CUDA 7.0+ 6 | * NVIDIA cuDNN v4 7 | * Torch 8 | * ImageNet dataset 9 | 10 | ## Requirements 11 | * NVIDIA GPU with compute capability 3.5 or above 12 | 13 | ## Install CUDA 14 | 1. Install the `build-essential` package: 15 | ```bash 16 | sudo apt-get install build-essential 17 | ``` 18 | 19 | 2. If you are using a Virtual Machine (like Amazon EC2 instances), install: 20 | ```bash 21 | sudo apt-get update 22 | sudo apt-get install linux-generic 23 | ``` 24 | 25 | 3. Download the CUDA .deb file for Linux Ubuntu 14.04 64-bit from: https://developer.nvidia.com/cuda-downloads. 26 | The file will be named something like `cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb` 27 | 28 | 4. Install CUDA from the .deb file: 29 | ```bash 30 | sudo dpkg -i cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb 31 | sudo apt-get update 32 | sudo apt-get install cuda 33 | echo "export PATH=/usr/local/cuda/bin/:\$PATH; export LD_LIBRARY_PATH=/usr/local/cuda/lib64/:\$LD_LIBRARY_PATH; " >>~/.bashrc && source ~/.bashrc 34 | ``` 35 | 36 | 4. Restart your computer 37 | 38 | ## Install cuDNN v4 39 | 1. Download cuDNN v4 from https://developer.nvidia.com/cuDNN (requires registration). 40 | The file will be named something like `cudnn-7.0-linux-x64-v4.0-rc.tgz`. 41 | 42 | 2. Extract the file to `/usr/local/cuda`: 43 | ```bash 44 | tar -xvf cudnn-7.0-linux-x64-v4.0-rc.tgz 45 | sudo cp cuda/include/*.h /usr/local/cuda/include 46 | sudo cp cuda/lib64/*.so* /usr/local/cuda/lib64 47 | ``` 48 | 49 | ## Install Torch 50 | 1. Install the Torch dependencies: 51 | ```bash 52 | curl -sk https://raw.githubusercontent.com/torch/ezinstall/master/install-deps | bash -e 53 | ``` 54 | 55 | 2. Install Torch in a local folder: 56 | ```bash 57 | git clone https://github.com/torch/distro.git ~/torch --recursive 58 | cd ~/torch; ./install.sh 59 | ``` 60 | 61 | If you want to uninstall torch, you can use the command: `rm -rf ~/torch` 62 | 63 | ## Install the Torch cuDNN v4 bindings 64 | ```bash 65 | git clone -b R4 https://github.com/soumith/cudnn.torch.git 66 | cd cudnn.torch; luarocks make 67 | ``` 68 | 69 | ## Download the ImageNet dataset 70 | The ImageNet Large Scale Visual Recognition Challenge (ILSVRC) dataset has 1000 categories and 1.2 million images. The images do not need to be preprocessed or packaged in any database, but the validation images need to be moved into appropriate subfolders. 71 | 72 | 1. Download the images from http://image-net.org/download-images 73 | 74 | 2. Extract the training data: 75 | ```bash 76 | mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train 77 | tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar 78 | find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done 79 | cd .. 80 | ``` 81 | 82 | 3. Extract the validation data and move images to subfolders: 83 | ```bash 84 | mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xvf ILSVRC2012_img_val.tar 85 | wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash 86 | ``` 87 | 88 | ## Download Torch ResNet 89 | ```bash 90 | git clone https://github.com/facebook/fb.resnet.torch.git 91 | cd fb.resnet.torch 92 | ``` 93 | -------------------------------------------------------------------------------- /src-torch/LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For fb.resnet.torch software 4 | 5 | Copyright (c) 2016, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src-torch/PATENTS: -------------------------------------------------------------------------------- 1 | Additional Grant of Patent Rights Version 2 2 | 3 | "Software" means the fb.resnet.torch software distributed by Facebook, Inc. 4 | 5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software 6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable 7 | (subject to the termination provision below) license under any Necessary 8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise 9 | transfer the Software. For avoidance of doubt, no license is granted under 10 | Facebook’s rights in any patent claims that are infringed by (i) modifications 11 | to the Software made by you or any third party or (ii) the Software in 12 | combination with any software or other technology. 13 | 14 | The license granted hereunder will terminate, automatically and without notice, 15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate 16 | directly or indirectly, or take a direct financial interest in, any Patent 17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate 18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or 19 | in part from any software, technology, product or service of Facebook or any of 20 | its subsidiaries or corporate affiliates, or (iii) against any party relating 21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its 22 | subsidiaries or corporate affiliates files a lawsuit alleging patent 23 | infringement against you in the first instance, and you respond by filing a 24 | patent infringement counterclaim in that lawsuit against that party that is 25 | unrelated to the Software, the license granted hereunder will not terminate 26 | under section (i) of this paragraph due to such counterclaim. 27 | 28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is 29 | necessarily infringed by the Software standing alone. 30 | 31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, 32 | or contributory infringement or inducement to infringe any patent, including a 33 | cross-claim or counterclaim. 34 | -------------------------------------------------------------------------------- /src-torch/README.md: -------------------------------------------------------------------------------- 1 | ResNet training in Torch 2 | ============================ 3 | 4 | This implements training of residual networks from [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385) by Kaiming He, et. al. 5 | 6 | [We wrote a more verbose blog post discussing this code, and ResNets in general here.](http://torch.ch/blog/2016/02/04/resnets.html) 7 | 8 | 9 | ## Requirements 10 | See the [installation instructions](INSTALL.md) for a step-by-step guide. 11 | - Install [Torch](http://torch.ch/docs/getting-started.html) on a machine with CUDA GPU 12 | - Install [cuDNN v4 or v5](https://developer.nvidia.com/cudnn) and the Torch [cuDNN bindings](https://github.com/soumith/cudnn.torch/tree/R4) 13 | - Download the [ImageNet](http://image-net.org/download-images) dataset and [move validation images](https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset) to labeled subfolders 14 | 15 | If you already have Torch installed, update `nn`, `cunn`, and `cudnn`. 16 | 17 | ## Training 18 | See the [training recipes](TRAINING.md) for addition examples. 19 | 20 | The training scripts come with several options, which can be listed with the `--help` flag. 21 | ```bash 22 | th main.lua --help 23 | ``` 24 | 25 | To run the training, simply run main.lua. By default, the script runs ResNet-34 on ImageNet with 1 GPU and 2 data-loader threads. 26 | ```bash 27 | th main.lua -data [imagenet-folder with train and val folders] 28 | ``` 29 | 30 | To train ResNet-50 on 4 GPUs: 31 | ```bash 32 | th main.lua -depth 50 -batchSize 256 -nGPU 4 -nThreads 8 -shareGradInput true -data [imagenet-folder] 33 | ``` 34 | 35 | ## Trained models 36 | 37 | Trained ResNet 18, 34, 50, 101, 152, and 200 models are [available for download](pretrained). We include instructions for [using a custom dataset](pretrained/README.md#fine-tuning-on-a-custom-dataset), [classifying an image and getting the model's top5 predictions](pretrained/README.md#classification), and for [extracting image features](pretrained/README.md#extracting-image-features) using a pre-trained model. 38 | 39 | The trained models achieve better error rates than the [original ResNet models](https://github.com/KaimingHe/deep-residual-networks). 40 | 41 | #### Single-crop (224x224) validation error rate 42 | 43 | | Network | Top-1 error | Top-5 error | 44 | | ------------- | ----------- | ----------- | 45 | | ResNet-18 | 30.43 | 10.76 | 46 | | ResNet-34 | 26.73 | 8.74 | 47 | | ResNet-50 | 24.01 | 7.02 | 48 | | ResNet-101 | 22.44 | 6.21 | 49 | | ResNet-152 | 22.16 | 6.16 | 50 | | ResNet-200 | 21.66 | 5.79 | 51 | 52 | ## Notes 53 | 54 | This implementation differs from the ResNet paper in a few ways: 55 | 56 | **Scale augmentation**: We use the [scale and aspect ratio augmentation](datasets/transforms.lua#L130) from [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842), instead of [scale augmentation](datasets/transforms.lua#L113) used in the ResNet paper. We find this gives a better validation error. 57 | 58 | **Color augmentation**: We use the photometric distortions from [Andrew Howard](http://arxiv.org/abs/1312.5402) in addition to the [AlexNet](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)-style color augmentation used in the ResNet paper. 59 | 60 | **Weight decay**: We apply weight decay to all weights and biases instead of just the weights of the convolution layers. 61 | 62 | **Strided convolution**: When using the bottleneck architecture, we use stride 2 in the 3x3 convolution, instead of the first 1x1 convolution. 63 | -------------------------------------------------------------------------------- /src-torch/TRAINING.md: -------------------------------------------------------------------------------- 1 | Training recipes 2 | ---------------- 3 | 4 | ### CIFAR-10 5 | 6 | To train ResNet-20 on CIFAR-10 with 2 GPUs: 7 | 8 | ```bash 9 | th main.lua -dataset cifar10 -nGPU 2 -batchSize 128 -depth 20 10 | ``` 11 | 12 | To train ResNet-110 instead just change the `-depth` flag: 13 | 14 | ```bash 15 | th main.lua -dataset cifar10 -nGPU 2 -batchSize 128 -depth 110 16 | ``` 17 | 18 | To fit ResNet-1202 on two GPUs, you will need to use the [`-shareGradInput`](#sharegradinput) flag: 19 | 20 | ```bash 21 | th main.lua -dataset cifar10 -nGPU 2 -batchSize 128 -depth 1202 -shareGradInput true 22 | ``` 23 | 24 | ### ImageNet 25 | 26 | See the [installation instructions](INSTALL.md#download-the-imagenet-dataset) for ImageNet data setup. 27 | 28 | To train ResNet-18 on ImageNet with 4 GPUs and 8 data loading threads: 29 | 30 | ```bash 31 | th main.lua -depth 18 -nGPU 4 -nThreads 8 -batchSize 256 -data [imagenet-folder] 32 | ``` 33 | 34 | To train ResNet-34 instead just change the `-depth` flag: 35 | 36 | ```bash 37 | th main.lua -depth 34 -nGPU 4 -nThreads 8 -batchSize 256 -data [imagenet-folder] 38 | ``` 39 | To train ResNet-50 on 4 GPUs, you will need to use the [`-shareGradInput`](#sharegradinput) flag: 40 | 41 | ```bash 42 | th main.lua -depth 50 -nGPU 4 -nThreads 8 -batchSize 256 -shareGradInput true -data [imagenet-folder] 43 | ``` 44 | 45 | To train ResNet-101 or ResNet-152 with batch size 256, you may need 8 GPUs: 46 | 47 | ```bash 48 | th main.lua -depth 152 -nGPU 8 -nThreads 12 -batchSize 256 -shareGradInput true -data [imagenet-folder] 49 | ``` 50 | 51 | ## Useful flags 52 | 53 | For a complete list of flags, run `th main.lua --help`. 54 | 55 | ### shareGradInput 56 | 57 | The `-shareGradInput` flag enables sharing of `gradInput` tensors between modules of the same type. This reduces 58 | memory usage. It works correctly with the included ResNet models, but may not work for other network architectures. See 59 | [models/init.lua](models/init.lua#L42-L60) for the implementation. 60 | 61 | The `shareGradInput` implementation may not work with older versions of the `nn` package. Update your `nn` package by running `luarocks install nn`. 62 | 63 | ### shortcutType 64 | 65 | The `-shortcutType` flag selects the type of shortcut connection. The [ResNet paper](http://arxiv.org/abs/1512.03385) describes three different shortcut types: 66 | - `A`: identity shortcut with zero-padding for increasing dimensions. This is used for all CIFAR-10 experiments. 67 | - `B`: identity shortcut with 1x1 convolutions for increasing dimesions. This is used for most ImageNet experiments. 68 | - `C`: 1x1 convolutions for all shortcut connections. 69 | -------------------------------------------------------------------------------- /src-torch/category-perf.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'paths' 3 | require 'optim' 4 | require 'nn' 5 | local DataLoader = require 'dataloader' 6 | local models = require 'models/init' 7 | local Trainer = require 'train' 8 | local checkpoints = require 'checkpoints' 9 | 10 | torch.setdefaulttensortype('torch.FloatTensor') 11 | torch.setnumthreads(1) 12 | 13 | function copyInputs(sample, opt) 14 | -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory, 15 | -- if using DataParallelTable. The target is always copied to a CUDA tensor 16 | inputImg = inputImg or (opt.nGPU == 1 17 | and torch.CudaTensor() 18 | or cutorch.createCudaHostTensor()) 19 | targetImg = targetImg or (opt.nGPU == 1 20 | and torch.CudaTensor() 21 | or cutorch.createCudaHostTensor()) 22 | 23 | inputImg:resize(sample.input:size()):copy(sample.input) 24 | targetImg:resize(sample.target:size()):copy(sample.target) 25 | end 26 | 27 | -- new computerScore function used for segmentation task 28 | function computeScore(output, target, threshold) 29 | -- threshold segmentation mask: is FG if prog >= 0.5 30 | threshold = threshold == nil and 0.5 or threshold 31 | output[output:ge(threshold)] = 1 32 | output[output:lt(threshold)] = 0 33 | 34 | local targetArea = target:sum() -- TP + FN 35 | local outputArea = output:sum() -- TP + FP 36 | local intersection = torch.cmul(output,target):sum() -- TP 37 | local trueNeg = (output + target):eq(0):float():sum() -- TN 38 | 39 | local function treatNaN(a) 40 | if outputArea == 0 and targetArea == 0 then 41 | a = 1 42 | elseif outputArea == 0 or targetArea == 0 then 43 | a = 0 44 | end 45 | return a 46 | end 47 | 48 | local function precision() -- TP/(TP+FP) 49 | return treatNaN(intersection/outputArea) 50 | end 51 | local function recall() -- TP/(TP+FN) 52 | return treatNaN(intersection/targetArea) 53 | end 54 | local function f1Score(a, b) 55 | return treatNaN(2*torch.cdiv(torch.cmul(a, b), (a+b))) 56 | end 57 | local function f1Direct() -- 2*TP/(2*TP+FN+FP) 58 | return treatNaN(2*intersection/(outputArea + targetArea)) 59 | end 60 | local function IoU() -- TP/(TP+FN+FP) 61 | local IoUPerImage = torch.cdiv( (output + target):eq(2):float():sum(3):sum(4), -- overlap regions have 1+1 values (output+target) 62 | (output + target):ge(1):float():sum(3):sum(4)) -- union region is either 1 or 2 (output+target) 63 | local nanPos = IoUPerImage:ne(IoUPerImage) 64 | IoUPerImage[nanPos] = 0 65 | return IoUPerImage:sum()/(IoUPerImage:size(1) - nanPos:sum()) 66 | end 67 | local function accuracy() -- (TP+TN)/(TP+TN+FN+FP) 68 | return (intersection + trueNeg)/(target:view(-1):size(1)) 69 | end 70 | local function specifity() -- TN/(TN+FP) 71 | return trueNeg/(trueNeg + (outputArea - intersection)) 72 | end 73 | local function falsePosRate() -- FP/(FP+TN) 74 | return (outputArea - intersection)/((outputArea - intersection) + trueNeg) 75 | end 76 | local function falseNegRate() -- FN/(TP+FN) 77 | return (targetArea - intersection)/targetArea 78 | end 79 | local function classifErr() -- (FN+FP)/(TP+TN+FN+FP) 80 | return 1 - accuracy() 81 | end 82 | 83 | local recallVal = recall() 84 | local precisionVal = precision() 85 | local IoUVal = IoU() 86 | local f1 = f1Direct() 87 | local fnrVal = falseNegRate() 88 | 89 | -- specifity() 90 | -- falseNegRate() 91 | -- falsePosRate() 92 | -- classifErr() 93 | 94 | return f1, precisionVal, recallVal, fnrVal, IoUVal 95 | end 96 | 97 | function parse(arg) 98 | local cmd = torch.CmdLine() 99 | cmd:text() 100 | cmd:text('Threshold finetuning script') 101 | cmd:text('Options:') 102 | cmd:option('-data', '', 'Path to dataset') 103 | cmd:option('-dataset', '', 'Datase name') 104 | cmd:option('-nGPU', 1, 'Number of GPUs to use by default') 105 | cmd:option('-backend', 'cudnn', 'Options: cudnn | cunn') 106 | cmd:option('-cudnn', 'fastest', 'Options: fastest | default | deterministic') 107 | cmd:option('-nThreads', 1, 'Number of data loading threads') 108 | cmd:option('-batchSize', 32, 'Mini-batch size (1 = pure stochastic)') 109 | cmd:option('-model', 'none', 'Path to model') 110 | cmd:option('-trials', 100, 'Nb of trials') 111 | cmd:text() 112 | 113 | local opt = cmd:parse(arg or {}) 114 | 115 | if not paths.filep(opt.model) then 116 | cmd:error('error: unable to find model: ' .. opt.model .. '\n') 117 | end 118 | 119 | if not paths.dirp(opt.data) then 120 | cmd:error('error: unable to find path to dataset: ' .. opt.data .. '\n') 121 | end 122 | 123 | return opt 124 | end 125 | 126 | local opt = parse(arg) 127 | -- torch.manualSeed(opt.manualSeed) 128 | -- cutorch.manualSeedAll(opt.manualSeed) 129 | opt.manualSeed = 0 130 | opt.gen = 'gen/' 131 | 132 | -- Load model 133 | local model = torch.load(opt.model) 134 | local criterion = nn.BCECriterion():cuda() 135 | 136 | -- Data loading 137 | local trainLoader, valLoader = DataLoader.create(opt) 138 | 139 | -- Logger 140 | dirPath = paths.dirname(opt.model) 141 | local logger = optim.Logger(paths.concat(dirPath,'threshold-tuning.log')) 142 | logger:setNames{"Thres", "F1", "Prec", "Rec", "Fnr", "IoU"} 143 | -- local dummyLogger = optim.Logger(nil) 144 | 145 | -- The trainer handles the training loop and evaluation on validation set 146 | -- local trainer = Trainer(model, criterion, opt, nil, dummyLogger) 147 | 148 | model:cuda() 149 | model:evaluate() 150 | 151 | local thresholdList = torch.FloatTensor(opt.trials):random(20,50)/100 152 | local stats = torch.FloatTensor(opt.trials,5):zero() 153 | -- for trial = 1, opt.trials do 154 | 155 | local size = valLoader:size() 156 | local N = 0 157 | 158 | for n, sample in valLoader:run() do 159 | 160 | print(string.format('Iter %d/%d', n, size)) 161 | 162 | -- Copy input and target to the GPU 163 | copyInputs(sample, opt) 164 | 165 | local output = model:forward(inputImg):float() 166 | local batchSize = output:size(1) 167 | 168 | for i=1,thresholdList:size(1) do 169 | local f1, precision, recall, fnr, IoU = computeScore(output, sample.target, thresholdList[i]) 170 | 171 | stats[i][1] = stats[i][1] + f1*batchSize 172 | stats[i][2] = stats[i][2] + precision*batchSize 173 | stats[i][3] = stats[i][3] + recall*batchSize 174 | stats[i][4] = stats[i][4] + fnr*batchSize 175 | stats[i][5] = stats[i][5] + IoU*batchSize 176 | end 177 | 178 | N = N + batchSize 179 | end 180 | 181 | stats = stats/N 182 | 183 | -- Update logger 184 | for i=1,thresholdList:size(1) do 185 | local F1 = stats[i][1] 186 | local Prec = stats[i][2] 187 | local Rec = stats[i][3] 188 | local Fnr = stats[i][4] 189 | local IoU = stats[i][5] 190 | logger:add{thresholdList[i], F1, Prec, Rec, Fnr, IoU} 191 | end 192 | 193 | local val, ind = torch.max(stats[{ {}, {1} }], 1) 194 | ind = ind:squeeze() 195 | 196 | logger:add{} 197 | logger:add{thresholdList[ind], stats[ind][1], stats[ind][2], stats[ind][3], stats[ind][4], stats[ind][5]} 198 | print(string.format(' * Finished:: Thres %.2f F1 %.3f Prec %.3f Rec %.3f Fnr %.3f IoU %.3f', 199 | thresholdList[ind], stats[ind][1], stats[ind][2], stats[ind][3], stats[ind][4], stats[ind][5])) 200 | 201 | -------------------------------------------------------------------------------- /src-torch/checkpoints.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | local checkpoint = {} 10 | 11 | local function deepCopy(tbl) 12 | -- creates a copy of a network with new modules and the same tensors 13 | local copy = {} 14 | for k, v in pairs(tbl) do 15 | if type(v) == 'table' then 16 | copy[k] = deepCopy(v) 17 | else 18 | copy[k] = v 19 | end 20 | end 21 | if torch.typename(tbl) then 22 | torch.setmetatable(copy, torch.typename(tbl)) 23 | end 24 | return copy 25 | end 26 | 27 | function checkpoint.latest(opt) 28 | if opt.resume == 'none' then 29 | return nil 30 | end 31 | 32 | -- local latestPath = paths.concat(opt.resume, 'latest.t7') 33 | -- if not paths.filep(latestPath) then 34 | if not paths.filep(opt.resume) then 35 | print('resume file not found') 36 | return nil 37 | end 38 | 39 | print('=> Loading checkpoint ' .. opt.resume) 40 | -- local latest = torch.load(opt.resume) 41 | local optimFile = 'optimState_' .. paths.basename(opt.resume):match('%d+') .. '.t7' 42 | print(optimFile) 43 | local optimState = torch.load(paths.concat(paths.dirname(opt.resume), optimFile)) 44 | -- local optimState = torch.load(paths.concat(paths.dirname(opt.resume), latest.optimFile)) 45 | 46 | return opt.resume, optimState 47 | end 48 | 49 | function checkpoint.save(epoch, model, optimState, isBestModel, opt) 50 | -- don't save the DataParallelTable for easier loading on other machines 51 | if torch.type(model) == 'nn.DataParallelTable' then 52 | model = model:get(1) 53 | end 54 | 55 | -- create a clean copy on the CPU without modifying the original network 56 | model = deepCopy(model):float():clearState() 57 | 58 | local modelFile = 'model_' .. epoch .. '.t7' 59 | local optimFile = 'optimState_' .. epoch .. '.t7' 60 | if opt.checkpoint == 'true' then 61 | torch.save(paths.concat(opt.save, modelFile), model) 62 | torch.save(paths.concat(opt.save, optimFile), optimState) 63 | torch.save(paths.concat(opt.save, 'latest.t7'), { 64 | epoch = epoch, 65 | modelFile = modelFile, 66 | optimFile = optimFile, 67 | }) 68 | end 69 | 70 | if isBestModel then 71 | torch.save(paths.concat(opt.save, 'model_best.t7'), model) 72 | torch.save(paths.concat(opt.save, 'model_best_optimState.t7'), optimState) 73 | end 74 | end 75 | 76 | return checkpoint 77 | -------------------------------------------------------------------------------- /src-torch/dataloader.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Multi-threaded data loader 10 | -- 11 | 12 | 13 | -- require 'hdf5' 14 | local datasets = require 'datasets/init' 15 | local Threads = require 'threads' 16 | Threads.serialization('threads.sharedserialize') 17 | 18 | local M = {} 19 | local DataLoader = torch.class('resnet.DataLoader', M) 20 | 21 | function DataLoader.create(opt) 22 | -- The train and val loader 23 | local loaders = {} 24 | 25 | for i, split in ipairs{'train', 'val'} do 26 | local dataset = datasets.create(opt, split) 27 | loaders[i] = M.DataLoader(dataset, opt, split) 28 | end 29 | return table.unpack(loaders) 30 | end 31 | 32 | function DataLoader:__init(dataset, opt, split) 33 | local manualSeed = opt.manualSeed 34 | local function init() 35 | require('datasets/' .. opt.dataset) 36 | end 37 | local function main(idx) 38 | if manualSeed ~= 0 then 39 | torch.manualSeed(manualSeed + idx) 40 | end 41 | torch.setnumthreads(1) 42 | 43 | _G.dataset = dataset 44 | _G.preprocess = dataset:preprocess() 45 | return dataset:size() 46 | end 47 | local threads, sizes = Threads(opt.nThreads, init, main) 48 | -- self.nCrops = (split == 'val' and opt.tenCrop) and 10 or 1 49 | self.nCrops = 1 50 | self.threads = threads 51 | self.__size = sizes[1][1] 52 | self.batchSize = math.floor(opt.batchSize / self.nCrops) 53 | end 54 | 55 | function DataLoader:size() 56 | return math.ceil(self.__size / self.batchSize) 57 | end 58 | 59 | function DataLoader:run() 60 | local threads = self.threads 61 | local size, batchSize = self.__size, self.batchSize 62 | local perm = torch.randperm(size) 63 | local idx, sample = 1, nil 64 | local function enqueue() 65 | while idx <= size and threads:acceptsjob() do 66 | local indices = perm:narrow(1, idx, math.min(batchSize, size - idx + 1)) 67 | threads:addjob( 68 | function(indices, nCrops) 69 | local sz = indices:size(1) 70 | local inputBatch, targetBatch, imageSize 71 | for i, idx in ipairs(indices:totable()) do 72 | local sample = _G.dataset:get(idx) 73 | local input, target = _G.preprocess(sample.input,sample.target) 74 | if not inputBatch then 75 | imageSize = input[1]:size():totable() 76 | if nCrops > 1 then table.remove(imageSize, 1) end 77 | inputBatch = torch.FloatTensor(sz, nCrops, 2, table.unpack(imageSize)) 78 | targetBatch = torch.FloatTensor(sz, nCrops,1, table.unpack(imageSize)) 79 | end 80 | inputBatch[i]:copy(input) 81 | targetBatch[i]:copy(target) 82 | end 83 | collectgarbage() 84 | return { 85 | input = inputBatch:view(sz * nCrops, 2, table.unpack(imageSize)), 86 | target = targetBatch:view(sz * nCrops, 1, table.unpack(imageSize)), 87 | } 88 | end, 89 | function(_sample_) 90 | sample = _sample_ 91 | end, 92 | indices, 93 | self.nCrops 94 | ) 95 | idx = idx + batchSize 96 | end 97 | end 98 | 99 | local n = 0 100 | local function loop() 101 | enqueue() 102 | if not threads:hasjob() then 103 | return nil 104 | end 105 | threads:dojob() 106 | if threads:haserror() then 107 | threads:synchronize() 108 | end 109 | enqueue() 110 | n = n + 1 111 | return n, sample 112 | end 113 | 114 | return loop 115 | end 116 | 117 | return M.DataLoader 118 | -------------------------------------------------------------------------------- /src-torch/datasets/.cdnet-gen.lua.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lpcinelli/foreground-segmentation/25b34253547bde70c93e4b29668952d89b354273/src-torch/datasets/.cdnet-gen.lua.swp -------------------------------------------------------------------------------- /src-torch/datasets/README.md: -------------------------------------------------------------------------------- 1 | ## Datasets 2 | 3 | Each dataset consist of two files: `dataset-gen.lua` and `dataset.lua`. The `dataset-gen.lua` is responsible for one-time setup, while 4 | the `dataset.lua` handles the actual data loading. 5 | 6 | If you want to be able to use the new dataset from main.lua, you should also modify `opts.lua` to handle the new dataset name. 7 | 8 | ### `dataset-gen.lua` 9 | 10 | The `dataset-gen.lua` performs any necessary one-time setup. For example, the [`cifar10-gen.lua`](cifar10-gen.lua) file downloads the CIFAR-10 dataset, and the [`imagenet-gen.lua`](imagenet-gen.lua) file indexes all the training and validation data. 11 | 12 | The module should have a single function `exec(opt, cacheFile)`. 13 | - `opt`: the command line options 14 | - `cacheFile`: path to output 15 | 16 | ```lua 17 | local M = {} 18 | function M.exec(opt, cacheFile) 19 | local imageInfo = {} 20 | -- preprocess dataset, store results in imageInfo, save to cacheFile 21 | torch.save(cacheFile, imageInfo) 22 | end 23 | return M 24 | ``` 25 | 26 | ### `dataset.lua` 27 | 28 | The `dataset.lua` should return a class that implements three functions: 29 | - `get(i)`: returns a table containing two entries, `input` and `target` 30 | - `input`: the training or validation image as a Torch tensor 31 | - `target`: the image category as a number 1-N 32 | - `size()`: returns the number of entries in the dataset 33 | - `preprocess()`: returns a function that transforms the `input` for data augmentation or input normalization 34 | 35 | ```lua 36 | local M = {} 37 | local FakeDataset = torch.class('resnet.FakeDataset', M) 38 | 39 | function FakeDataset:__init(imageInfo, opt, split) 40 | -- imageInfo: result from dataset-gen.lua 41 | -- opt: command-line arguments 42 | -- split: "train" or "val" 43 | end 44 | 45 | function FakeDataset:get(i) 46 | return { 47 | input = torch.Tensor(3, 800, 600):uniform(), 48 | target = 42, 49 | } 50 | end 51 | 52 | function FakeDataset:size() 53 | -- size of dataset 54 | return 2000 55 | end 56 | 57 | function FakeDataset:preprocess() 58 | -- Scale smaller side to 256 and take 224x224 center-crop 59 | return t.Compose{ 60 | t.Scale(256), 61 | t.CenterCrop(224), 62 | } 63 | end 64 | 65 | return M.FakeDataset 66 | ``` 67 | -------------------------------------------------------------------------------- /src-torch/datasets/cdnet.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet dataset loader 10 | 11 | local image = require 'image' 12 | local paths = require 'paths' 13 | local t = require 'datasets/transforms' 14 | local ffi = require 'ffi' 15 | 16 | local M = {} 17 | local CDNetDataset = torch.class('resnet.CDNetDataset', M) 18 | 19 | function CDNetDataset:__init(imageInfo, opt, split) 20 | -- self.imageInfo = torch.load(imageInfo[split].processedImgs) 21 | self.imageInfo = imageInfo[split] 22 | self.opt = opt 23 | self.split = split 24 | self.dir = paths.concat(opt.data, split) 25 | self.bgDir = paths.concat(opt.data, 'background') 26 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 27 | end 28 | 29 | function CDNetDataset:get(i) 30 | local imagePath = ffi.string(self.imageInfo.imagePath[i]:data()) 31 | -- local gtPath = ffi.string(self.imageInfo.gtPath[i]:data()) 32 | -- local bgModelPath = ffi.string(self.imageInfo.bgModelPath[i]:data()) 33 | -- local ROIPath = ffi.string(self.imageInfo.ROIPath[i]:data()) 34 | 35 | -- local img = self:_loadImage(paths.concat(self.dir, imagePath), 3) 36 | -- local gtImage = self:_loadImage(paths.concat(self.dir, gtPath), 1) 37 | -- local bgModel = self:_loadImage(paths.concat(self.bgDir, bgModelPath), 3) 38 | -- local videoROI = self:_loadImage(paths.concat(self.bgDir, ROIPath), 3) 39 | 40 | -- gtImage = gtImage:eq(1):float() 41 | 42 | -- return { 43 | -- input = torch.cat({videoROI:expandAs(img), bgModel:expandAs(img), img}, 4), 44 | -- target = gtImage, 45 | -- } 46 | return torch.load(imagePath) 47 | end 48 | 49 | function CDNetDataset:_loadImage(path, channels) 50 | channels = channels == nil and 3 or channels 51 | local ok, input = pcall(function() 52 | return image.load(path, channels, 'float') 53 | end) 54 | 55 | -- Sometimes image.load fails because the file extension does not match the 56 | -- image format. In that case, use image.decompress on a ByteTensor. 57 | if not ok then 58 | local f = io.open(path, 'r') 59 | assert(f, 'Error reading: ' .. tostring(path)) 60 | local data = f:read('*a') 61 | f:close() 62 | 63 | local b = torch.ByteTensor(string.len(data)) 64 | ffi.copy(b:data(), data, b:size(1)) 65 | 66 | input = image.decompress(b, channels, 'float') 67 | end 68 | 69 | return input 70 | end 71 | 72 | function CDNetDataset:size() 73 | -- return #self.imageInfo 74 | return self.imageInfo.imagePath:size(1) 75 | end 76 | 77 | 78 | -- Computed from random subset of ImageNet training images 79 | local meanstd = { 80 | mean = { 0.485, 0.456, 0.406 }, 81 | std = { 0.229, 0.224, 0.225 }, 82 | } 83 | local pca = { 84 | eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 }, 85 | eigvec = torch.Tensor{ 86 | { -0.5675, 0.7192, 0.4009 }, 87 | { -0.5808, -0.0045, -0.8140 }, 88 | { -0.5836, -0.6948, 0.4203 }, 89 | }, 90 | } 91 | 92 | function CDNetDataset:preprocess() 93 | if self.split == 'train' then 94 | return t.Compose{ 95 | -- t.getImageROI(), 96 | -- t.OneGrayscaleImagePerChannel(), 97 | -- t.ScaleDim(256,'w'), 98 | -- t.ScaleDim(192,'h'), -- previously 192 99 | -- t.Lighting(0.1, pca.eigval, pca.eigvec), 100 | -- t.ColorNormalize(meanstd), 101 | t.HorizontalFlip(0.5), 102 | } 103 | elseif self.split == 'val' then 104 | return t.Compose{ 105 | -- t.getImageROI(), 106 | -- t.OneGrayscaleImagePerChannel(), 107 | -- t.ScaleDim(256,'w'), 108 | -- t.ScaleDim(192,'h'), 109 | -- t.ColorNormalize(meanstd), 110 | -- Crop(224), 111 | } 112 | else 113 | error('invalid split: ' .. self.split) 114 | end 115 | end 116 | 117 | return M.CDNetDataset 118 | -------------------------------------------------------------------------------- /src-torch/datasets/cifar10-gen.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Script to compute list of ImageNet filenames and classes 10 | -- 11 | -- This automatically downloads the CIFAR-10 dataset from 12 | -- http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz 13 | -- 14 | 15 | local URL = 'http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz' 16 | 17 | local M = {} 18 | 19 | local function convertToTensor(files) 20 | local data, labels 21 | 22 | for _, file in ipairs(files) do 23 | local m = torch.load(file, 'ascii') 24 | if not data then 25 | data = m.data:t() 26 | labels = m.labels:squeeze() 27 | else 28 | data = torch.cat(data, m.data:t(), 1) 29 | labels = torch.cat(labels, m.labels:squeeze()) 30 | end 31 | end 32 | 33 | -- This is *very* important. The downloaded files have labels 0-9, which do 34 | -- not work with CrossEntropyCriterion 35 | labels:add(1) 36 | 37 | return { 38 | data = data:contiguous():view(-1, 3, 32, 32), 39 | labels = labels, 40 | } 41 | end 42 | 43 | function M.exec(opt, cacheFile) 44 | print("=> Downloading CIFAR-10 dataset from " .. URL) 45 | local ok = os.execute('curl ' .. URL .. ' | tar xz -C gen/') 46 | assert(ok == true or ok == 0, 'error downloading CIFAR-10') 47 | 48 | print(" | combining dataset into a single file") 49 | local trainData = convertToTensor({ 50 | 'gen/cifar-10-batches-t7/data_batch_1.t7', 51 | 'gen/cifar-10-batches-t7/data_batch_2.t7', 52 | 'gen/cifar-10-batches-t7/data_batch_3.t7', 53 | 'gen/cifar-10-batches-t7/data_batch_4.t7', 54 | 'gen/cifar-10-batches-t7/data_batch_5.t7', 55 | }) 56 | local testData = convertToTensor({ 57 | 'gen/cifar-10-batches-t7/test_batch.t7', 58 | }) 59 | 60 | print(" | saving CIFAR-10 dataset to " .. cacheFile) 61 | torch.save(cacheFile, { 62 | train = trainData, 63 | val = testData, 64 | }) 65 | end 66 | 67 | return M 68 | -------------------------------------------------------------------------------- /src-torch/datasets/cifar10.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- CIFAR-10 dataset loader 10 | -- 11 | 12 | local t = require 'datasets/transforms' 13 | 14 | local M = {} 15 | local CifarDataset = torch.class('resnet.CifarDataset', M) 16 | 17 | function CifarDataset:__init(imageInfo, opt, split) 18 | assert(imageInfo[split], split) 19 | self.imageInfo = imageInfo[split] 20 | self.split = split 21 | end 22 | 23 | function CifarDataset:get(i) 24 | local image = self.imageInfo.data[i]:float() 25 | local label = self.imageInfo.labels[i] 26 | 27 | return { 28 | input = image, 29 | target = label, 30 | } 31 | end 32 | 33 | function CifarDataset:size() 34 | return self.imageInfo.data:size(1) 35 | end 36 | 37 | -- Computed from entire CIFAR-10 training set 38 | local meanstd = { 39 | mean = {125.3, 123.0, 113.9}, 40 | std = {63.0, 62.1, 66.7}, 41 | } 42 | 43 | function CifarDataset:preprocess() 44 | if self.split == 'train' then 45 | return t.Compose{ 46 | t.ColorNormalize(meanstd), 47 | t.HorizontalFlip(0.5), 48 | t.RandomCrop(32, 4), 49 | } 50 | elseif self.split == 'val' then 51 | return t.ColorNormalize(meanstd) 52 | else 53 | error('invalid split: ' .. self.split) 54 | end 55 | end 56 | 57 | return M.CifarDataset 58 | -------------------------------------------------------------------------------- /src-torch/datasets/cifar100-gen.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | 10 | ------------ 11 | -- This file automatically downloads the CIFAR-100 dataset from 12 | -- http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz 13 | -- It is based on cifar10-gen.lua 14 | -- Ludovic Trottier 15 | ------------ 16 | 17 | local URL = 'http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz' 18 | 19 | local M = {} 20 | 21 | local function convertCifar100BinToTorchTensor(inputFname) 22 | local m=torch.DiskFile(inputFname, 'r'):binary() 23 | m:seekEnd() 24 | local length = m:position() - 1 25 | local nSamples = length / 3074 -- 1 coarse-label byte, 1 fine-label byte, 3072 pixel bytes 26 | 27 | assert(nSamples == math.floor(nSamples), 'expecting numSamples to be an exact integer') 28 | m:seek(1) 29 | 30 | local coarse = torch.ByteTensor(nSamples) 31 | local fine = torch.ByteTensor(nSamples) 32 | local data = torch.ByteTensor(nSamples, 3, 32, 32) 33 | for i=1,nSamples do 34 | coarse[i] = m:readByte() 35 | fine[i] = m:readByte() 36 | local store = m:readByte(3072) 37 | data[i]:copy(torch.ByteTensor(store)) 38 | end 39 | 40 | local out = {} 41 | out.data = data 42 | -- This is *very* important. The downloaded files have labels 0-9, which do 43 | -- not work with CrossEntropyCriterion 44 | out.labels = fine + 1 45 | 46 | return out 47 | end 48 | 49 | function M.exec(opt, cacheFile) 50 | print("=> Downloading CIFAR-100 dataset from " .. URL) 51 | 52 | local ok = os.execute('curl ' .. URL .. ' | tar xz -C gen/') 53 | assert(ok == true or ok == 0, 'error downloading CIFAR-100') 54 | 55 | print(" | combining dataset into a single file") 56 | 57 | local trainData = convertCifar100BinToTorchTensor('gen/cifar-100-binary/train.bin') 58 | local testData = convertCifar100BinToTorchTensor('gen/cifar-100-binary/test.bin') 59 | 60 | print(" | saving CIFAR-100 dataset to " .. cacheFile) 61 | torch.save(cacheFile, { 62 | train = trainData, 63 | val = testData, 64 | }) 65 | end 66 | 67 | return M 68 | -------------------------------------------------------------------------------- /src-torch/datasets/cifar100.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | 10 | ------------ 11 | -- This file is downloading and transforming CIFAR-100. 12 | -- It is based on cifar10.lua 13 | -- Ludovic Trottier 14 | ------------ 15 | 16 | local t = require 'datasets/transforms' 17 | 18 | local M = {} 19 | local CifarDataset = torch.class('resnet.CifarDataset', M) 20 | 21 | function CifarDataset:__init(imageInfo, opt, split) 22 | assert(imageInfo[split], split) 23 | self.imageInfo = imageInfo[split] 24 | self.split = split 25 | end 26 | 27 | function CifarDataset:get(i) 28 | local image = self.imageInfo.data[i]:float() 29 | local label = self.imageInfo.labels[i] 30 | 31 | return { 32 | input = image, 33 | target = label, 34 | } 35 | end 36 | 37 | function CifarDataset:size() 38 | return self.imageInfo.data:size(1) 39 | end 40 | 41 | 42 | -- Computed from entire CIFAR-100 training set with this code: 43 | -- dataset = torch.load('cifar100.t7') 44 | -- tt = dataset.train.data:double(); 45 | -- tt = tt:transpose(2,4); 46 | -- tt = tt:reshape(50000*32*32, 3); 47 | -- tt:mean(1) 48 | -- tt:std(1) 49 | local meanstd = { 50 | mean = {129.3, 124.1, 112.4}, 51 | std = {68.2, 65.4, 70.4}, 52 | } 53 | 54 | function CifarDataset:preprocess() 55 | if self.split == 'train' then 56 | return t.Compose{ 57 | t.ColorNormalize(meanstd), 58 | t.HorizontalFlip(0.5), 59 | t.RandomCrop(32, 4), 60 | } 61 | elseif self.split == 'val' then 62 | return t.ColorNormalize(meanstd) 63 | else 64 | error('invalid split: ' .. self.split) 65 | end 66 | end 67 | 68 | return M.CifarDataset 69 | -------------------------------------------------------------------------------- /src-torch/datasets/imagenet-gen.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Script to compute list of ImageNet filenames and classes 10 | -- 11 | -- This generates a file gen/imagenet.t7 which contains the list of all 12 | -- ImageNet training and validation images and their classes. This script also 13 | -- works for other datasets arragned with the same layout. 14 | -- 15 | 16 | local sys = require 'sys' 17 | local ffi = require 'ffi' 18 | 19 | local M = {} 20 | 21 | local function findClasses(dir) 22 | local dirs = paths.dir(dir) 23 | table.sort(dirs) 24 | 25 | local classList = {} 26 | local classToIdx = {} 27 | for _ ,class in ipairs(dirs) do 28 | if not classToIdx[class] and class ~= '.' and class ~= '..' and class ~= '.DS_Store' then 29 | table.insert(classList, class) 30 | classToIdx[class] = #classList 31 | end 32 | end 33 | 34 | -- assert(#classList == 1000, 'expected 1000 ImageNet classes') 35 | return classList, classToIdx 36 | end 37 | 38 | local function findImages(dir, classToIdx) 39 | local imagePath = torch.CharTensor() 40 | local imageClass = torch.LongTensor() 41 | 42 | ---------------------------------------------------------------------- 43 | -- Options for the GNU and BSD find command 44 | local extensionList = {'jpg', 'png', 'jpeg', 'JPG', 'PNG', 'JPEG', 'ppm', 'PPM', 'bmp', 'BMP'} 45 | local findOptions = ' -iname "*.' .. extensionList[1] .. '"' 46 | for i=2,#extensionList do 47 | findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"' 48 | end 49 | 50 | -- Find all the images using the find command 51 | local f = io.popen('find -L ' .. dir .. findOptions) 52 | 53 | local maxLength = -1 54 | local imagePaths = {} 55 | local imageClasses = {} 56 | 57 | -- Generate a list of all the images and their class 58 | while true do 59 | local line = f:read('*line') 60 | if not line then break end 61 | 62 | local className = paths.basename(paths.dirname(line)) 63 | local filename = paths.basename(line) 64 | local path = className .. '/' .. filename 65 | 66 | local classId = classToIdx[className] 67 | assert(classId, 'class not found: ' .. className) 68 | 69 | table.insert(imagePaths, path) 70 | table.insert(imageClasses, classId) 71 | 72 | maxLength = math.max(maxLength, #path + 1) 73 | end 74 | 75 | f:close() 76 | 77 | -- Convert the generated list to a tensor for faster loading 78 | local nImages = #imagePaths 79 | local imagePath = torch.CharTensor(nImages, maxLength):zero() 80 | for i, path in ipairs(imagePaths) do 81 | ffi.copy(imagePath[i]:data(), path) 82 | end 83 | 84 | local imageClass = torch.LongTensor(imageClasses) 85 | return imagePath, imageClass 86 | end 87 | 88 | function M.exec(opt, cacheFile) 89 | -- find the image path names 90 | local imagePath = torch.CharTensor() -- path to each image in dataset 91 | local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes) 92 | 93 | local trainDir = paths.concat(opt.data, 'train') 94 | local valDir = paths.concat(opt.data, 'val') 95 | assert(paths.dirp(trainDir), 'train directory not found: ' .. trainDir) 96 | assert(paths.dirp(valDir), 'val directory not found: ' .. valDir) 97 | 98 | print("=> Generating list of images") 99 | local classList, classToIdx = findClasses(trainDir) 100 | 101 | print(" | finding all validation images") 102 | local valImagePath, valImageClass = findImages(valDir, classToIdx) 103 | 104 | print(" | finding all training images") 105 | local trainImagePath, trainImageClass = findImages(trainDir, classToIdx) 106 | 107 | local info = { 108 | basedir = opt.data, 109 | classList = classList, 110 | train = { 111 | imagePath = trainImagePath, 112 | imageClass = trainImageClass, 113 | }, 114 | val = { 115 | imagePath = valImagePath, 116 | imageClass = valImageClass, 117 | }, 118 | } 119 | 120 | print(" | saving list of images to " .. cacheFile) 121 | torch.save(cacheFile, info) 122 | return info 123 | end 124 | 125 | return M 126 | -------------------------------------------------------------------------------- /src-torch/datasets/imagenet.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet dataset loader 10 | -- 11 | 12 | local image = require 'image' 13 | local paths = require 'paths' 14 | local t = require 'datasets/transforms' 15 | local ffi = require 'ffi' 16 | 17 | local M = {} 18 | local ImagenetDataset = torch.class('resnet.ImagenetDataset', M) 19 | 20 | function ImagenetDataset:__init(imageInfo, opt, split) 21 | self.imageInfo = imageInfo[split] 22 | self.opt = opt 23 | self.split = split 24 | self.dir = paths.concat(opt.data, split) 25 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 26 | end 27 | 28 | function ImagenetDataset:get(i) 29 | local path = ffi.string(self.imageInfo.imagePath[i]:data()) 30 | 31 | local image = self:_loadImage(paths.concat(self.dir, path)) 32 | local class = self.imageInfo.imageClass[i] 33 | 34 | return { 35 | input = image, 36 | target = class, 37 | } 38 | end 39 | 40 | function ImagenetDataset:_loadImage(path) 41 | local ok, input = pcall(function() 42 | return image.load(path, 3, 'float') 43 | end) 44 | 45 | -- Sometimes image.load fails because the file extension does not match the 46 | -- image format. In that case, use image.decompress on a ByteTensor. 47 | if not ok then 48 | local f = io.open(path, 'r') 49 | assert(f, 'Error reading: ' .. tostring(path)) 50 | local data = f:read('*a') 51 | f:close() 52 | 53 | local b = torch.ByteTensor(string.len(data)) 54 | ffi.copy(b:data(), data, b:size(1)) 55 | 56 | input = image.decompress(b, 3, 'float') 57 | end 58 | 59 | return input 60 | end 61 | 62 | function ImagenetDataset:size() 63 | return self.imageInfo.imageClass:size(1) 64 | end 65 | 66 | -- Computed from random subset of ImageNet training images 67 | local meanstd = { 68 | mean = { 0.485, 0.456, 0.406 }, 69 | std = { 0.229, 0.224, 0.225 }, 70 | } 71 | local pca = { 72 | eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 }, 73 | eigvec = torch.Tensor{ 74 | { -0.5675, 0.7192, 0.4009 }, 75 | { -0.5808, -0.0045, -0.8140 }, 76 | { -0.5836, -0.6948, 0.4203 }, 77 | }, 78 | } 79 | 80 | function ImagenetDataset:preprocess() 81 | if self.split == 'train' then 82 | return t.Compose{ 83 | t.RandomSizedCrop(224), 84 | t.ColorJitter({ 85 | brightness = 0.4, 86 | contrast = 0.4, 87 | saturation = 0.4, 88 | }), 89 | t.Lighting(0.1, pca.eigval, pca.eigvec), 90 | t.ColorNormalize(meanstd), 91 | t.HorizontalFlip(0.5), 92 | } 93 | elseif self.split == 'val' then 94 | local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop 95 | return t.Compose{ 96 | t.Scale(256), 97 | t.ColorNormalize(meanstd), 98 | Crop(224), 99 | } 100 | else 101 | error('invalid split: ' .. self.split) 102 | end 103 | end 104 | 105 | return M.ImagenetDataset 106 | -------------------------------------------------------------------------------- /src-torch/datasets/init.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet and CIFAR-10 datasets 10 | -- 11 | 12 | local M = {} 13 | 14 | local function isvalid(opt, cachePath) 15 | local imageInfo = torch.load(cachePath) 16 | if imageInfo.basedir and imageInfo.basedir ~= opt.data then 17 | return false 18 | end 19 | return true 20 | end 21 | 22 | function M.create(opt, split) 23 | local cachePath = paths.concat(opt.gen, opt.dataset .. '.t7') 24 | if not paths.filep(cachePath) or not isvalid(opt, cachePath) then 25 | paths.mkdir('gen') 26 | 27 | local script = paths.dofile(opt.dataset .. '-gen.lua') 28 | script.exec(opt, cachePath) 29 | end 30 | local imageInfo = torch.load(cachePath) 31 | 32 | local Dataset = require('datasets/' .. opt.dataset) 33 | return Dataset(imageInfo, opt, split) 34 | end 35 | 36 | return M 37 | -------------------------------------------------------------------------------- /src-torch/datasets/sanity.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet dataset loader 10 | 11 | local image = require 'image' 12 | local paths = require 'paths' 13 | local t = require 'datasets/transforms' 14 | local ffi = require 'ffi' 15 | 16 | local M = {} 17 | local CDNetDataset = torch.class('resnet.CDNetDataset', M) 18 | 19 | function CDNetDataset:__init(imageInfo, opt, split) 20 | -- self.imageInfo = torch.load(imageInfo[split].processedImgs) 21 | self.imageInfo = imageInfo[split] 22 | self.opt = opt 23 | self.split = split 24 | self.dir = paths.concat(opt.data, split) 25 | self.bgDir = paths.concat(opt.data, 'background') 26 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 27 | end 28 | 29 | function CDNetDataset:get(i) 30 | local imagePath = ffi.string(self.imageInfo.imagePath[i]:data()) 31 | -- local gtPath = ffi.string(self.imageInfo.gtPath[i]:data()) 32 | -- local bgModelPath = ffi.string(self.imageInfo.bgModelPath[i]:data()) 33 | -- local ROIPath = ffi.string(self.imageInfo.ROIPath[i]:data()) 34 | 35 | -- local img = self:_loadImage(paths.concat(self.dir, imagePath), 3) 36 | -- local gtImage = self:_loadImage(paths.concat(self.dir, gtPath), 1) 37 | -- local bgModel = self:_loadImage(paths.concat(self.bgDir, bgModelPath), 3) 38 | -- local videoROI = self:_loadImage(paths.concat(self.bgDir, ROIPath), 3) 39 | 40 | -- gtImage = gtImage:eq(1):float() 41 | 42 | -- return { 43 | -- input = torch.cat({videoROI:expandAs(img), bgModel:expandAs(img), img}, 4), 44 | -- target = gtImage, 45 | -- } 46 | return torch.load(imagePath) 47 | end 48 | 49 | function CDNetDataset:_loadImage(path, channels) 50 | channels = channels == nil and 3 or channels 51 | local ok, input = pcall(function() 52 | return image.load(path, channels, 'float') 53 | end) 54 | 55 | -- Sometimes image.load fails because the file extension does not match the 56 | -- image format. In that case, use image.decompress on a ByteTensor. 57 | if not ok then 58 | local f = io.open(path, 'r') 59 | assert(f, 'Error reading: ' .. tostring(path)) 60 | local data = f:read('*a') 61 | f:close() 62 | 63 | local b = torch.ByteTensor(string.len(data)) 64 | ffi.copy(b:data(), data, b:size(1)) 65 | 66 | input = image.decompress(b, channels, 'float') 67 | end 68 | 69 | return input 70 | end 71 | 72 | function CDNetDataset:size() 73 | -- return #self.imageInfo 74 | return self.imageInfo.imagePath:size(1) 75 | end 76 | 77 | 78 | -- Computed from random subset of ImageNet training images 79 | local meanstd = { 80 | mean = { 0.485, 0.456, 0.406 }, 81 | std = { 0.229, 0.224, 0.225 }, 82 | } 83 | local pca = { 84 | eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 }, 85 | eigvec = torch.Tensor{ 86 | { -0.5675, 0.7192, 0.4009 }, 87 | { -0.5808, -0.0045, -0.8140 }, 88 | { -0.5836, -0.6948, 0.4203 }, 89 | }, 90 | } 91 | 92 | function CDNetDataset:preprocess() 93 | if self.split == 'train' then 94 | return t.Compose{ 95 | -- t.getImageROI(), 96 | -- t.OneGrayscaleImagePerChannel(), 97 | -- t.ScaleDim(256,'w'), 98 | -- t.ScaleDim(192,'h'), -- previously 192 99 | -- t.Lighting(0.1, pca.eigval, pca.eigvec), 100 | -- t.ColorNormalize(meanstd), 101 | t.HorizontalFlip(0.5), 102 | } 103 | elseif self.split == 'val' then 104 | return t.Compose{ 105 | -- t.getImageROI(), 106 | -- t.OneGrayscaleImagePerChannel(), 107 | -- t.ScaleDim(256,'w'), 108 | -- t.ScaleDim(192,'h'), 109 | -- t.ColorNormalize(meanstd), 110 | -- Crop(224), 111 | } 112 | else 113 | error('invalid split: ' .. self.split) 114 | end 115 | end 116 | 117 | return M.CDNetDataset 118 | -------------------------------------------------------------------------------- /src-torch/datasets/test-hdf5-gen.lua: -------------------------------------------------------------------------------- 1 | local lfs = require 'lfs' 2 | local sys = require 'sys' 3 | local ffi = require 'ffi' 4 | local image = require 'image' 5 | require 'hdf5' 6 | -- local hdf5 = require 'hdf5' 7 | 8 | local URL = 'http://wordpress-jodoin.dmi.usherb.ca/static/dataset/dataset2014.zip' 9 | 10 | local M = {} 11 | 12 | -- Check whether dir is empty or net 13 | local function isemptydir(directory) 14 | for filename in lfs.dir(directory) do 15 | if filename ~= '.' and filename ~= '..' then 16 | return false 17 | end 18 | end 19 | return true 20 | end 21 | 22 | -- Convert the generated list to a tensor for faster loading 23 | local function list2tensor(imagePaths, maxLength) 24 | local nImages = #imagePaths 25 | local imagePath = torch.CharTensor(nImages, maxLength):zero() 26 | for i, path in ipairs(imagePaths) do 27 | ffi.copy(imagePath[i]:data(), path) 28 | end 29 | return imagePath 30 | end 31 | 32 | local function saveHdf5(hdf5File, dir, imgPath) 33 | -- local name = imgPath:gsub('%.[A-z]+','') 34 | local name = imgPath 35 | hdf5File:write(paths.concat(dir, name), image.load(paths.concat(dir, imgPath))) 36 | end 37 | 38 | local function bgMedianModel(path, hdf5Dataset) 39 | local nbElem = 150 40 | local modelDir = paths.concat(path, 'model') 41 | local referenceDir = paths.concat(path, 'reference') 42 | local bgSplitDir = paths.dirname(paths.dirname(path)) 43 | 44 | -- Is there a model already? 45 | if not paths.dirp(modelDir) then 46 | paths.mkdir(modelDir) 47 | -- elseif not isemptydir(modelDir) then 48 | -- return 49 | end 50 | 51 | paths.mkdir(modelDir) 52 | local refImgs 53 | 54 | for img in lfs.dir(referenceDir) do 55 | if img ~= "." and img ~= ".." then 56 | if refImgs == nil then 57 | refImgs = image.load(referenceDir .. '/' .. img, 3, 'float') 58 | else 59 | refImgs = torch.cat(refImgs, image.load(referenceDir .. '/' .. img, 3, 'float'), 4) 60 | end 61 | end 62 | end 63 | bgModel = torch.median(refImgs, 4):squeeze() 64 | bgModelPath = modelDir .. '/staticModel.jpg' 65 | image.save(bgModelPath,bgModel) 66 | saveHdf5(hdf5Dataset, bgSplitDir, bgModelPath) 67 | 68 | return 69 | end 70 | 71 | local function findImages(dir, hdf5Dataset, hdf5FilePath) 72 | local imagePath = torch.CharTensor() 73 | local datasetDir = paths.dirname(dir) 74 | ---------------------------------------------------------------------- 75 | 76 | -- Options for the GNU and BSD find command 77 | local extensionList = {'jpg', 'png', 'jpeg', 'JPG', 'PNG', 'JPEG', 'ppm', 'PPM', 'bmp', 'BMP'} 78 | local findOptions = ' -iname "*.' .. extensionList[1] .. '"' 79 | for i=2,#extensionList do 80 | findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"' 81 | end 82 | 83 | -- list of desired video situations 84 | local videoTypeList={ 85 | -- 'PTZ', 86 | -- 'badWeather', 87 | 'baseline', 88 | -- 'cameraJitter', 89 | -- 'dynamicBackground', 90 | -- 'intermittentObjectMotion', 91 | -- 'lowFramerate', 92 | -- 'nightVideos', 93 | -- 'shadow', 94 | -- 'thermal', 95 | -- 'turbulence' 96 | } 97 | -- list of undesireddisregarded videos 98 | local list_of_videos = {} 99 | 100 | local maxLength = -1 101 | local inputPaths = {} 102 | local gtPaths = {} 103 | local bgModelPaths = {} 104 | local ROIPaths = {} 105 | local f,g 106 | 107 | for videoType=1,#videoTypeList do -- get specific type of video 108 | local dir_path = dir .. '/' .. videoTypeList[videoType] 109 | for video in lfs.dir(dir_path) do -- get the list of the videos of that type 110 | if video~="." and video~=".." then 111 | 112 | local bgModelPath 113 | local ROIPath 114 | if paths.basename(dir) == 'train' then 115 | print(datasetDir .. '/background/' .. videoTypeList[videoType] .. '/' .. video) 116 | bgMedianModel(datasetDir .. '/background/' .. videoTypeList[videoType] .. '/' .. video, hdf5Dataset) 117 | 118 | bgModelPath = videoTypeList[videoType] .. '/' .. video .. '/model/' .. 'staticModel' 119 | ROIPath = videoTypeList[videoType] .. '/' .. video .. '/ROI/' .. 'ROI' 120 | 121 | saveHdf5(hdf5Dataset, datasetDir..'/background/', ROIPath .. '.jpg') 122 | end 123 | 124 | -- print(hdf5FilePath) 125 | -- if not paths.filep(hdf5FilePath) then 126 | -- print('saved ROI') 127 | -- saveHdf5(hdf5Dataset, datasetDir..'/background/', ROIPath .. '.jpg') 128 | -- end 129 | 130 | -- find all input frames for the current video 131 | f = io.popen('find -L ' .. dir_path .. '/' .. video .. '/input' .. findOptions) 132 | g = io.popen('find -L ' .. dir_path .. '/' .. video .. '/groundtruth/' .. findOptions) 133 | -- Generate a list of all the images and groundtruths 134 | while true do 135 | local line = f:read('*line') 136 | if not line then break end 137 | 138 | local inputFilename = paths.basename(line) 139 | local frame = inputFilename:match('%d+') -- get frame number 140 | local inputPath = videoTypeList[videoType] .. '/' .. video .. '/input/' .. inputFilename 141 | local inputExtension = inputPath:match('%.[A-z]+') -- get file extension type 142 | inputPath = inputPath:gsub('%.[A-z]+','') 143 | 144 | local gtExtension = g:read('*line'):match('%.[A-z]+') -- get file extension type 145 | local gtFilename = 'gt' .. frame --.. gtExtension 146 | local gtPath = videoTypeList[videoType] .. '/' .. video .. '/groundtruth/' .. gtFilename 147 | 148 | 149 | saveHdf5(hdf5Dataset, dir, inputPath .. inputExtension) 150 | saveHdf5(hdf5Dataset, dir, gtPath .. gtExtension) 151 | table.insert(inputPaths, inputPath) 152 | table.insert(gtPaths, gtPath) 153 | table.insert(ROIPaths, ROIPath) 154 | table.insert(bgModelPaths, bgModelPath) 155 | 156 | maxLength = math.max(maxLength, #inputPath + 1, #gtPath + 1) 157 | end 158 | 159 | end 160 | end 161 | end 162 | 163 | f:close() 164 | g:close() 165 | 166 | local inputPathTensor = list2tensor(inputPaths, maxLength) 167 | local gtPathTensor = list2tensor(gtPaths, maxLength) 168 | local bgModelPathTensor = list2tensor(bgModelPaths, maxLength) 169 | local ROIPathTensor = list2tensor(ROIPaths, maxLength) 170 | 171 | return inputPathTensor, gtPathTensor, bgModelPathTensor, ROIPathTensor 172 | end 173 | 174 | 175 | function M.exec(opt, cacheFile) 176 | -- find the image path names 177 | local imagePath = torch.CharTensor() -- path to each image in dataset 178 | local gtPath = torch.CharTensor() -- path to each groundtruth in dataset 179 | local bgModelPath = torch.CharTensor() -- path to each bg model in dataset 180 | 181 | local trainDir = paths.concat(opt.data, 'train') 182 | local valDir = paths.concat(opt.data, 'val') 183 | local bgModelDir = paths.concat(opt.data, 'background') 184 | assert(paths.dirp(trainDir), 'train directory not found: ' .. trainDir) 185 | assert(paths.dirp(valDir), 'val directory not found: ' .. valDir) 186 | assert(paths.dirp(bgModelDir), 'background directory not found: ' .. bgModelDir) 187 | 188 | local hdf5Dataset = hdf5.open(paths.concat(opt.data, opt.dataset .. '.h5'), 'w') 189 | 190 | print(" | finding all validation images") 191 | local valImagePath, valGtPath, valBgModelPath, valROIPath = findImages(valDir, hdf5Dataset, paths.concat(opt.data, opt.dataset .. '.h5')) 192 | 193 | print(" | finding all training images") 194 | local trainImagePath, trainGtPath, trainBgModelPath, trainROIPath = findImages(trainDir, hdf5Dataset, paths.concat(opt.data, opt.dataset .. '.h5')) 195 | 196 | hdf5Dataset:close() 197 | 198 | local info = { 199 | basedir = opt.data, 200 | train = { 201 | imagePath = trainImagePath, 202 | gtPath = trainGtPath, 203 | bgModelPath = trainBgModelPath, 204 | ROIPath = trainROIPath, 205 | 206 | }, 207 | val = { 208 | imagePath = valImagePath, 209 | gtPath = valGtPath, 210 | bgModelPath = valBgModelPath, 211 | ROIPath = valROIPath, 212 | }, 213 | } 214 | 215 | print(" | saving list of images to " .. cacheFile) 216 | torch.save(cacheFile, info) 217 | return info 218 | end 219 | 220 | return M 221 | -------------------------------------------------------------------------------- /src-torch/datasets/test-hdf5.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet dataset loader 10 | -- 11 | 12 | --TODO: add code to divide image in many patchs 13 | 14 | local image = require 'image' 15 | local paths = require 'paths' 16 | local t = require 'datasets/transforms' 17 | local ffi = require 'ffi' 18 | require 'hdf5' 19 | 20 | 21 | local M = {} 22 | local CDNetDataset = torch.class('resnet.CDNetDataset', M) 23 | 24 | function CDNetDataset:__init(imageInfo, opt, split) 25 | self.imageInfo = imageInfo[split] 26 | self.opt = opt 27 | self.split = split 28 | self.dir = paths.concat(opt.data, split) 29 | self.bgDir = paths.concat(opt.data, 'background') 30 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 31 | self.hdf5File = hdf5.open(paths.concat(opt.data, opt.dataset .. '.h5'), 'r') 32 | end 33 | 34 | function CDNetDataset:get(i) 35 | 36 | print('cdnet init') 37 | local imagePath = ffi.string(self.imageInfo.imagePath[i]:data()) 38 | local gtPath = ffi.string(self.imageInfo.gtPath[i]:data()) 39 | local bgModelPath = ffi.string(self.imageInfo.bgModelPath[i]:data()) 40 | local ROIPath = ffi.string(self.imageInfo.ROIPath[i]:data()) 41 | 42 | -- local img = self:_loadImage(paths.concat(self.dir, imagePath), 3) 43 | local img = self:_loadImage(paths.concat(self.dir, self.imageInfo.imagePath[i]:data()), 3) 44 | local gtImage = self:_loadImage(paths.concat(self.dir, gtPath), 1) 45 | -- local bgModel = self:_loadImage(paths.concat(self.bgDir, bgModelPath), 3) 46 | -- local videoROI = self:_loadImage(paths.concat(self.bgDir, ROIPath), 3) 47 | 48 | gtImage = gtImage:eq(1):float() 49 | 50 | return { 51 | input = torch.cat({videoROI:expandAs(img), 52 | bgModel:expandAs(img), 53 | img}, 4), 54 | target = gtImage, 55 | } 56 | end 57 | 58 | function CDNetDataset:_loadImage(path, channels) 59 | -- baseline/highway/input/in000484 60 | -- local data = myFile:read('/path/to/data'):all() 61 | channels = channels == nil and 3 or channels 62 | -- local ok, input = pcall(function() 63 | -- return image.load(path, channels, 'float') 64 | -- end) 65 | 66 | -- -- Sometimes image.load fails because the file extension does not match the 67 | -- -- image format. In that case, use image.decompress on a ByteTensor. 68 | -- if not ok then 69 | -- local f = io.open(path, 'r') 70 | -- assert(f, 'Error reading: ' .. tostring(path)) 71 | -- local data = f:read('*a') 72 | -- f:close() 73 | 74 | -- local b = torch.ByteTensor(string.len(data)) 75 | -- ffi.copy(b:data(), data, b:size(1)) 76 | 77 | -- input = image.decompress(b, channels, 'float') 78 | -- end 79 | print(path) 80 | return self.hdf5File:read(path):all()[{{1,channels},{},{}}] 81 | -- return input 82 | end 83 | 84 | function CDNetDataset:size() 85 | return self.imageInfo.imagePath:size(1) 86 | end 87 | 88 | -- Computed from random subset of ImageNet training images 89 | local meanstd = { 90 | mean = { 0.485, 0.456, 0.406 }, 91 | std = { 0.229, 0.224, 0.225 }, 92 | } 93 | local pca = { 94 | eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 }, 95 | eigvec = torch.Tensor{ 96 | { -0.5675, 0.7192, 0.4009 }, 97 | { -0.5808, -0.0045, -0.8140 }, 98 | { -0.5836, -0.6948, 0.4203 }, 99 | }, 100 | } 101 | 102 | function CDNetDataset:preprocess() 103 | if self.split == 'train' then 104 | return t.Compose{ 105 | t.getImageROI(), 106 | t.OneGrayscaleImagePerChannel(), 107 | t.ScaleDim(256,'w'), 108 | t.ScaleDim(192,'h'), 109 | -- t.Lighting(0.1, pca.eigval, pca.eigvec), 110 | -- t.ColorNormalize(meanstd), 111 | t.HorizontalFlip(0.5), 112 | } 113 | elseif self.split == 'val' then 114 | return t.Compose{ 115 | t.getImageROI(), 116 | t.OneGrayscaleImagePerChannel(), 117 | t.ScaleDim(256,'w'), 118 | t.ScaleDim(192,'h'), 119 | -- t.ColorNormalize(meanstd), 120 | -- Crop(224), 121 | } 122 | else 123 | error('invalid split: ' .. self.split) 124 | end 125 | end 126 | 127 | return M.CDNetDataset 128 | -------------------------------------------------------------------------------- /src-torch/datasets/transforms-test.lua: -------------------------------------------------------------------------------- 1 | require 'image' 2 | t = require 'datasets/transforms' 3 | 4 | imagePath = '/home/lpcinelli/Documents/cdnet2014/train-val-split/train/dynamicBackground/boats/input/in002000.jpg' 5 | gtPath = '/home/lpcinelli/Documents/cdnet2014/train-val-split/train/dynamicBackground/boats/groundtruth/gt002000.png' 6 | bgModelPath = '/home/lpcinelli/Documents/cdnet2014/train-val-split/background/dynamicBackground/boats/model/staticModel.jpg' 7 | ROIPath = '/home/lpcinelli/Documents/cdnet2014/train-val-split/background/dynamicBackground/boats/ROI/ROI.jpg' 8 | -- imagePath = '/home/lpcinelli/Documents/cdnet2014/deep-subtraction-split/train/baseline/highway/input/in000860.jpg' 9 | -- gtPath = '/home/lpcinelli/Documents/cdnet2014/deep-subtraction-split/train/baseline/highway/groundtruth/gt000860.png' 10 | -- bgModelPath = '/home/lpcinelli/Documents/cdnet2014/deep-subtraction-split/background/baseline/highway/model/staticModel.jpg' 11 | -- ROIPath = '/home/lpcinelli/Documents/cdnet2014/deep-subtraction-split/background/baseline/highway/ROI/ROI.jpg' 12 | 13 | 14 | img = image.load(imagePath, 3, 'float') 15 | bgModel = image.load(bgModelPath, 3, 'float') 16 | videoROI = image.load(ROIPath, 3, 'float') 17 | gt = image.load(gtPath, 1, 'float') 18 | gt[gt:lt(1)] = 0 19 | -- gt = gt:view(1,table.unpack(gt:size():totable())) 20 | 21 | -- sample = torch.cat({ videoROI:expandAs(img), 22 | -- bgModel:expandAs(img), 23 | -- img}, 4) 24 | 25 | -- preprocess1 = t.getImageROI() 26 | -- tmp = preprocess1(sample) 27 | -- bgModelROI = tmp[{{},{},{},{1}}] 28 | -- imgROI = tmp[{{},{},{},{2}}] 29 | 30 | -- print(bgModelROI:size()) 31 | -- print(imgROI:size()) 32 | 33 | -- image.save('bgModelROI.jpg',bgModelROI) 34 | -- image.save('imgROI.jpg',imgROI) 35 | 36 | -- preprocess2 = t.OneGrayscaleImagePerChannel() 37 | -- tmp2 = preprocess2(tmp) 38 | -- print(tmp:size()) 39 | -- print(img:size()) 40 | 41 | -- img_grey = tmp2[1]:view(1,table.unpack(tmp2[1]:size():totable())) 42 | -- bg_grey = tmp2[2]:view(1,table.unpack(tmp2[2]:size():totable())) 43 | -- image.save('img-grey.jpg', img_grey) 44 | -- image.save('bg-rey.jpg', bg_grey) 45 | 46 | -- preprocess3 = t.ScaleDim(100,'h') 47 | -- tmp3,tmp4 = preprocess3(img,img) 48 | 49 | -- function preprocess4() 50 | -- return t.Compose{ 51 | -- t.getImageROI(), 52 | -- t.OneGrayscaleImagePerChannel(), 53 | -- t.ScaleDim(256,'w'), 54 | -- t.ScaleDim(192,'h'), 55 | -- t.HorizontalFlip(0.5),} 56 | -- end 57 | 58 | -- preprocess = preprocess4() 59 | 60 | -- tmp5, tmp6 = preprocess(sample, gt) 61 | -- print(tmp5:size()) 62 | -- print(tmp6:size()) 63 | -- print(img:size()) 64 | 65 | -- flip = t.HorizontalFlip(1) 66 | -- flipImg, flipGt = flip(img,gt) 67 | -- print(flipImg:size()) 68 | -- print(flipGt:size()) 69 | 70 | -- image.save('flipImg.jpg',flipImg) 71 | -- image.save('flipGt.jpg',flipGt) 72 | 73 | 74 | scale = t.Compose{t.ScaleDim(256,'w'), t.ScaleDim(192,'h')} 75 | scaled_img, scaled_gt = scale(img, gt) 76 | image.save('scaled_img.jpg', scaled_img) 77 | image.save('scaled_gt.jpg', scaled_gt) 78 | image.save('gt.jpg', gt) -------------------------------------------------------------------------------- /src-torch/drawnet.lua: -------------------------------------------------------------------------------- 1 | require 'nn'; 2 | require 'cunn'; 3 | require 'cudnn'; 4 | local generateGraph = require 'optnet.graphgen' 5 | 6 | local modelPath = 'checkpoints/lenet5/dilation/ch-1/BN-layer/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,weightDecay=0.0002/Wed-Jan-25-00:11:07-2017/model_60.t7' 7 | local modelname = 'lenet5-dilation-2' 8 | 9 | local M = {} 10 | 11 | -- visual properties of the generated graph 12 | -- follows graphviz attributes 13 | local graphOpts = { 14 | displayProps = {shape='ellipse',fontsize=14, style='solid'}, 15 | -- nodeData = function(oldData, tensor) 16 | --return oldData .. '\n' .. 'Size: '.. tensor:numel() 17 | -- local text_sz = '' 18 | -- for i = 1,tensor:dim() do 19 | -- if i == 1 then 20 | -- text_sz = text_sz .. '' .. tensor:size(i) 21 | -- else 22 | -- text_sz = text_sz .. ', ' .. tensor:size(i) 23 | -- end 24 | -- end 25 | -- return oldData 26 | -- return oldData .. '\n' .. 'Size: {'.. text_sz .. '}\n' .. 'Mem size: ' .. tensor:numel() 27 | -- end 28 | } 29 | 30 | local function copyInputs(sample) 31 | -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory, 32 | -- if using DataParallelTable. The target is always copied to a CUDA tensor 33 | local input = torch.CudaTensor() 34 | --print('type of input: ' .. torch.type(input)) 35 | --print(sample:size()) 36 | input:resize(sample:size()):copy(sample) 37 | 38 | return input 39 | end 40 | 41 | function M.DrawModel(model, input, name) 42 | -- model: A network architecture 43 | -- input: The input for the given network architecture 44 | -- name: The model name (string). 45 | -- The files, '.dot' and '.svg' will be generated. 46 | local input_ 47 | if torch.type(input) == 'table' then 48 | input_ = {} 49 | --print('table: ', #input) 50 | for i = 1,#input do 51 | input_[i] = copyInputs(input[i]) 52 | --print(torch.type(input_[i])) 53 | end 54 | else 55 | input_ = copyInputs(input) 56 | --print(torch.type(input_)) 57 | end 58 | 59 | g = generateGraph(model, input_, graphOpts) 60 | graph.dot(g, name, name) 61 | 62 | --print(torch.type(g)) 63 | --print(g) 64 | --print(#g.nodes) 65 | --print(g.nodes[#g.nodes]:label()) 66 | --print(g:leaves()) 67 | 68 | return g 69 | end 70 | 71 | -- local model = torch.load(modelPath) 72 | -- model:cuda():evaluate() 73 | -- local input = torch.CudaTensor(2,256,192):normal(0,0.1) 74 | -- drawModel(model, input, modelname) 75 | 76 | return M 77 | -------------------------------------------------------------------------------- /src-torch/models/init.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Generic model creating code. For the specific ResNet model see 10 | -- models/resnet.lua 11 | -- 12 | 13 | require 'nn' 14 | require 'cunn' 15 | require 'cudnn' 16 | 17 | local M = {} 18 | 19 | function M.setup(opt, checkpoint) 20 | local model 21 | print('opt.params '..opt.params) 22 | print(checkpoint) 23 | if checkpoint then 24 | local modelPath = opt.resume 25 | -- local modelPath = paths.concat(opt.resume, checkpoint.modelFile) 26 | assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath) 27 | print('=> Resuming model from ' .. modelPath) 28 | model = torch.load(modelPath):cuda() 29 | elseif opt.retrain ~= 'none' then 30 | assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain) 31 | print('Loading model from file: ' .. opt.retrain) 32 | model = torch.load(opt.retrain):cuda() 33 | model.__memoryOptimized = nil 34 | elseif opt.params ~= 'none' then 35 | assert(paths.filep(opt.params), 'File not found: ' .. opt.retrain) 36 | print('=> Creating model from file: models/' .. opt.netType .. '.lua') 37 | print('=> Getting weights from ' .. opt.params) 38 | model = require('models/' .. opt.netType)(opt, opt.params) 39 | else 40 | print('=> Creating model from file: models/' .. opt.netType .. '.lua') 41 | model = require('models/' .. opt.netType)(opt) 42 | end 43 | 44 | -- First remove any DataParallelTable 45 | if torch.type(model) == 'nn.DataParallelTable' then 46 | model = model:get(1) 47 | end 48 | 49 | -- optnet is an general library for reducing memory usage in neural networks 50 | if opt.optnet then 51 | local optnet = require 'optnet' 52 | local imsize = opt.dataset == 'imagenet' and 224 or 32 53 | local sampleInput = torch.zeros(4,3,imsize,imsize):cuda() 54 | optnet.optimizeMemory(model, sampleInput, {inplace = false, mode = 'training'}) 55 | end 56 | 57 | -- This is useful for fitting ResNet-50 on 4 GPUs, but requires that all 58 | -- containers override backwards to call backwards recursively on submodules 59 | if opt.shareGradInput then 60 | M.shareGradInput(model) 61 | end 62 | 63 | -- For resetting the classifier when fine-tuning on a different Dataset 64 | if opt.resetClassifier and not checkpoint then 65 | print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier') 66 | 67 | local orig = model:get(#model.modules) 68 | assert(torch.type(orig) == 'nn.Linear', 69 | 'expected last layer to be fully connected') 70 | 71 | 72 | local conv_fc = cudnn.SpatialConvolution(orig.weight:size(2),1, 1,1, 1,1) 73 | linear.bias:zero() 74 | 75 | model:remove(#model.modules) -- remove nn.Linear layer 76 | model:remove(#model.modules) -- remove nn.View 77 | if model:get(#model.modules) == 'cudnn.SpatialAveragePooling' or model:get(#model.modules) == 'nn.SpatialAveragePooling' then 78 | model:remove(#model.modules) 79 | end 80 | 81 | model:add(conv_fc:cuda()) 82 | end 83 | 84 | -- Set the CUDNN flags 85 | if opt.cudnn == 'fastest' then 86 | cudnn.fastest = true 87 | cudnn.benchmark = true 88 | elseif opt.cudnn == 'deterministic' then 89 | -- Use a deterministic convolution implementation 90 | model:apply(function(m) 91 | if m.setMode then m:setMode(1, 1, 1) end 92 | end) 93 | end 94 | 95 | -- Wrap the model with DataParallelTable, if using more than one GPU 96 | if opt.nGPU > 1 then 97 | local gpus = torch.range(1, opt.nGPU):totable() 98 | local fastest, benchmark = cudnn.fastest, cudnn.benchmark 99 | 100 | local dpt = nn.DataParallelTable(1, true, true) 101 | :add(model, gpus) 102 | :threads(function() 103 | local cudnn = require 'cudnn' 104 | cudnn.fastest, cudnn.benchmark = fastest, benchmark 105 | end) 106 | dpt.gradInput = nil 107 | 108 | model = dpt:cuda() 109 | end 110 | 111 | local criterion 112 | if opt.dataset == 'cdnet' or 'sanity' then 113 | print('BCECriterion') 114 | -- criterion = cudnn.SpatialCrossEntropyCriterion():cuda() 115 | -- unbalanced class: 80% BG vs. 20% FG 116 | -- setting diff weights: 1.25 vs. 5 117 | criterion = nn.BCECriterion():cuda() 118 | else 119 | criterion = nn.CrossEntropyCriterion():cuda() 120 | end 121 | 122 | return model, criterion 123 | end 124 | 125 | function M.shareGradInput(model) 126 | local function sharingKey(m) 127 | local key = torch.type(m) 128 | if m.__shareGradInputKey then 129 | key = key .. ':' .. m.__shareGradInputKey 130 | end 131 | return key 132 | end 133 | 134 | -- Share gradInput for memory efficient backprop 135 | local cache = {} 136 | model:apply(function(m) 137 | local moduleType = torch.type(m) 138 | if torch.isTensor(m.gradInput) and moduleType ~= 'nn.ConcatTable' then 139 | local key = sharingKey(m) 140 | if cache[key] == nil then 141 | cache[key] = torch.CudaStorage(1) 142 | end 143 | m.gradInput = torch.CudaTensor(cache[key], 1, 0) 144 | end 145 | end) 146 | for i, m in ipairs(model:findModules('nn.ConcatTable')) do 147 | if cache[i % 2] == nil then 148 | cache[i % 2] = torch.CudaStorage(1) 149 | end 150 | m.gradInput = torch.CudaTensor(cache[i % 2], 1, 0) 151 | end 152 | end 153 | 154 | return M 155 | -------------------------------------------------------------------------------- /src-torch/models/preresnet.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- The full pre-activation ResNet variation from the technical report 10 | -- "Identity Mappings in Deep Residual Networks" (http://arxiv.org/abs/1603.05027) 11 | -- 12 | 13 | local nn = require 'nn' 14 | require 'cunn' 15 | 16 | local Convolution = cudnn.SpatialConvolution 17 | local Avg = cudnn.SpatialAveragePooling 18 | local ReLU = cudnn.ReLU 19 | local Max = nn.SpatialMaxPooling 20 | local SBatchNorm = nn.SpatialBatchNormalization 21 | 22 | local function createModel(opt) 23 | local depth = opt.depth 24 | local shortcutType = opt.shortcutType or 'B' 25 | local iChannels 26 | 27 | -- The shortcut layer is either identity or 1x1 convolution 28 | local function shortcut(nInputPlane, nOutputPlane, stride) 29 | local useConv = shortcutType == 'C' or 30 | (shortcutType == 'B' and nInputPlane ~= nOutputPlane) 31 | if useConv then 32 | -- 1x1 convolution 33 | return nn.Sequential() 34 | :add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride)) 35 | elseif nInputPlane ~= nOutputPlane then 36 | -- Strided, zero-padded identity shortcut 37 | return nn.Sequential() 38 | :add(nn.SpatialAveragePooling(1, 1, stride, stride)) 39 | :add(nn.Concat(2) 40 | :add(nn.Identity()) 41 | :add(nn.MulConstant(0))) 42 | else 43 | return nn.Identity() 44 | end 45 | end 46 | 47 | -- Typically shareGradInput uses the same gradInput storage for all modules 48 | -- of the same type. This is incorrect for some SpatialBatchNormalization 49 | -- modules in this network b/c of the in-place CAddTable. This marks the 50 | -- module so that it's shared only with other modules with the same key 51 | local function ShareGradInput(module, key) 52 | assert(key) 53 | module.__shareGradInputKey = key 54 | return module 55 | end 56 | 57 | -- The basic residual layer block for 18 and 34 layer network, and the 58 | -- CIFAR networks 59 | local function basicblock(n, stride, type) 60 | local nInputPlane = iChannels 61 | iChannels = n 62 | 63 | local block = nn.Sequential() 64 | local s = nn.Sequential() 65 | if type == 'both_preact' then 66 | block:add(ShareGradInput(SBatchNorm(nInputPlane), 'preact')) 67 | block:add(ReLU(true)) 68 | elseif type ~= 'no_preact' then 69 | s:add(SBatchNorm(nInputPlane)) 70 | s:add(ReLU(true)) 71 | end 72 | s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1)) 73 | s:add(SBatchNorm(n)) 74 | s:add(ReLU(true)) 75 | s:add(Convolution(n,n,3,3,1,1,1,1)) 76 | 77 | return block 78 | :add(nn.ConcatTable() 79 | :add(s) 80 | :add(shortcut(nInputPlane, n, stride))) 81 | :add(nn.CAddTable(true)) 82 | end 83 | 84 | -- The bottleneck residual layer for 50, 101, and 152 layer networks 85 | local function bottleneck(n, stride, type) 86 | local nInputPlane = iChannels 87 | iChannels = n * 4 88 | 89 | local block = nn.Sequential() 90 | local s = nn.Sequential() 91 | if type == 'both_preact' then 92 | block:add(ShareGradInput(SBatchNorm(nInputPlane), 'preact')) 93 | block:add(ReLU(true)) 94 | elseif type ~= 'no_preact' then 95 | s:add(SBatchNorm(nInputPlane)) 96 | s:add(ReLU(true)) 97 | end 98 | s:add(Convolution(nInputPlane,n,1,1,1,1,0,0)) 99 | s:add(SBatchNorm(n)) 100 | s:add(ReLU(true)) 101 | s:add(Convolution(n,n,3,3,stride,stride,1,1)) 102 | s:add(SBatchNorm(n)) 103 | s:add(ReLU(true)) 104 | s:add(Convolution(n,n*4,1,1,1,1,0,0)) 105 | 106 | return block 107 | :add(nn.ConcatTable() 108 | :add(s) 109 | :add(shortcut(nInputPlane, n * 4, stride))) 110 | :add(nn.CAddTable(true)) 111 | end 112 | 113 | -- Creates count residual blocks with specified number of features 114 | local function layer(block, features, count, stride, type) 115 | local s = nn.Sequential() 116 | if count < 1 then 117 | return s 118 | end 119 | s:add(block(features, stride, 120 | type == 'first' and 'no_preact' or 'both_preact')) 121 | for i=2,count do 122 | s:add(block(features, 1)) 123 | end 124 | return s 125 | end 126 | 127 | local model = nn.Sequential() 128 | if opt.dataset == 'imagenet' then 129 | -- Configurations for ResNet: 130 | -- num. residual blocks, num features, residual block function 131 | local cfg = { 132 | [18] = {{2, 2, 2, 2}, 512, basicblock}, 133 | [34] = {{3, 4, 6, 3}, 512, basicblock}, 134 | [50] = {{3, 4, 6, 3}, 2048, bottleneck}, 135 | [101] = {{3, 4, 23, 3}, 2048, bottleneck}, 136 | [152] = {{3, 8, 36, 3}, 2048, bottleneck}, 137 | [200] = {{3, 24, 36, 3}, 2048, bottleneck}, 138 | } 139 | 140 | assert(cfg[depth], 'Invalid depth: ' .. tostring(depth)) 141 | local def, nFeatures, block = table.unpack(cfg[depth]) 142 | iChannels = 64 143 | print(' | ResNet-' .. depth .. ' ImageNet') 144 | 145 | -- The ResNet ImageNet model 146 | model:add(Convolution(3,64,7,7,2,2,3,3)) 147 | model:add(SBatchNorm(64)) 148 | model:add(ReLU(true)) 149 | model:add(Max(3,3,2,2,1,1)) 150 | model:add(layer(block, 64, def[1], 1, 'first')) 151 | model:add(layer(block, 128, def[2], 2)) 152 | model:add(layer(block, 256, def[3], 2)) 153 | model:add(layer(block, 512, def[4], 2)) 154 | model:add(ShareGradInput(SBatchNorm(iChannels), 'last')) 155 | model:add(ReLU(true)) 156 | model:add(Avg(7, 7, 1, 1)) 157 | model:add(nn.View(nFeatures):setNumInputDims(3)) 158 | model:add(nn.Linear(nFeatures, 1000)) 159 | elseif opt.dataset == 'cifar10' then 160 | -- Model type specifies number of layers for CIFAR-10 model 161 | assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202') 162 | local n = (depth - 2) / 6 163 | iChannels = 16 164 | print(' | ResNet-' .. depth .. ' CIFAR-10') 165 | 166 | -- The ResNet CIFAR-10 model 167 | model:add(Convolution(3,16,3,3,1,1,1,1)) 168 | model:add(layer(basicblock, 16, n, 1)) 169 | model:add(layer(basicblock, 32, n, 2)) 170 | model:add(layer(basicblock, 64, n, 2)) 171 | model:add(ShareGradInput(SBatchNorm(iChannels), 'last')) 172 | model:add(ReLU(true)) 173 | model:add(Avg(8, 8, 1, 1)) 174 | model:add(nn.View(64):setNumInputDims(3)) 175 | model:add(nn.Linear(64, 10)) 176 | elseif opt.dataset == 'cifar100' then 177 | -- Model type specifies number of layers for CIFAR-100 model 178 | assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202') 179 | local n = (depth - 2) / 6 180 | iChannels = 16 181 | print(' | ResNet-' .. depth .. ' CIFAR-100') 182 | 183 | -- The ResNet CIFAR-100 model 184 | model:add(Convolution(3,16,3,3,1,1,1,1)) 185 | model:add(layer(basicblock, 16, n, 1)) 186 | model:add(layer(basicblock, 32, n, 2)) 187 | model:add(layer(basicblock, 64, n, 2)) 188 | model:add(ShareGradInput(SBatchNorm(iChannels), 'last')) 189 | model:add(ReLU(true)) 190 | model:add(Avg(8, 8, 1, 1)) 191 | model:add(nn.View(64):setNumInputDims(3)) 192 | model:add(nn.Linear(64, 100)) 193 | else 194 | error('invalid dataset: ' .. opt.dataset) 195 | end 196 | 197 | local function ConvInit(name) 198 | for k,v in pairs(model:findModules(name)) do 199 | local n = v.kW*v.kH*v.nOutputPlane 200 | v.weight:normal(0,math.sqrt(2/n)) 201 | if cudnn.version >= 4000 then 202 | v.bias = nil 203 | v.gradBias = nil 204 | else 205 | v.bias:zero() 206 | end 207 | end 208 | end 209 | local function BNInit(name) 210 | for k,v in pairs(model:findModules(name)) do 211 | v.weight:fill(1) 212 | v.bias:zero() 213 | end 214 | end 215 | 216 | ConvInit('cudnn.SpatialConvolution') 217 | ConvInit('nn.SpatialConvolution') 218 | BNInit('fbnn.SpatialBatchNormalization') 219 | BNInit('cudnn.SpatialBatchNormalization') 220 | BNInit('nn.SpatialBatchNormalization') 221 | for k,v in pairs(model:findModules('nn.Linear')) do 222 | v.bias:zero() 223 | end 224 | model:cuda() 225 | 226 | if opt.cudnn == 'deterministic' then 227 | model:apply(function(m) 228 | if m.setMode then m:setMode(1,1,1) end 229 | end) 230 | end 231 | 232 | model:get(1).gradInput = nil 233 | 234 | return model 235 | end 236 | 237 | return createModel 238 | -------------------------------------------------------------------------------- /src-torch/models/resnet-deep-subtraction: -------------------------------------------------------------------------------- 1 | Resnet com deep subtraction 2 | 3 | Para funcionar o esquema de convolucao 1x1 no final, uma entrada de TxT (patchSize x patchSize) deve ter como saida um unico valor (um unico pixel) e alem disso zero-padding de (T-1)/2 deve ser feito na imagem inicial 4 | 5 | 6 | Tres possibilidades: 7 | patchSize = 27 8 | model:add(layer(basicblock, 16, n)) 9 | model:add(layer(basicblock, 32, n, 3)) -- no padding (alterar codigo da func basicblock) 10 | model:add(layer(basicblock, 64, n)) 11 | model:add(Avg(9, 9, 1, 1)) 12 | 13 | patchSize = 27 14 | model:add(layer(basicblock, 16, n)) 15 | model:add(layer(basicblock, 64, n)) 16 | model:add(layer(basicblock, 32, n, 3)) -- no padding (alterar codigo da func basicblock) 17 | model:add(Avg(9, 9, 1, 1)) 18 | 19 | patchSize = 32 -- problema: quem seria o pixel central? 20 | model:add(layer(basicblock, 16, n)) 21 | model:add(layer(basicblock, 64, n, 2)) 22 | model:add(layer(basicblock, 32, n, 2)) 23 | model:add(Avg(8, 8, 1, 1)) 24 | 25 | patchSize = 28 -- problema: quem seria o pixel central? 26 | model:add(layer(basicblock, 16, n)) 27 | model:add(layer(basicblock, 64, n, 2)) 28 | model:add(layer(basicblock, 32, n, 2)) 29 | model:add(Avg(8, 8, 1, 1)) 30 | -------------------------------------------------------------------------------- /src-torch/models/unet-original.lua: -------------------------------------------------------------------------------- 1 | 2 | ------------------------------ 3 | -- library 4 | ------------------------------ 5 | 6 | require 'torch' 7 | require 'nn' 8 | require 'cunn' 9 | require 'cudnn' 10 | 11 | ------------------------------ 12 | -- function 13 | ------------------------------ 14 | 15 | function branch(insert) 16 | 17 | local block = nn.Sequential() 18 | local max_pooling = nn.SpatialMaxPooling(2,2,2,2) 19 | block:add(max_pooling) 20 | block:add(insert) 21 | -- block:add(nn.SpatialMaxUnpooling(max_pooling)) 22 | block:add(nn.SpatialUpSamplingNearest(2)) 23 | 24 | local parallel = nn.ConcatTable(2) 25 | parallel:add(nn.Identity()) 26 | parallel:add(block) 27 | 28 | local model = nn.Sequential() 29 | model:add(parallel) 30 | model:add(nn.JoinTable(2)) 31 | 32 | return model 33 | end 34 | 35 | function conv(n_input, n_middle, n_output, filtsize, out_bn) 36 | 37 | local model = nn.Sequential() 38 | 39 | model:add(cudnn.SpatialConvolution(n_input, n_middle, filtsize, filtsize, 1, 1, 1, 1)) 40 | model:add(nn.SpatialBatchNormalization(n_middle)) 41 | model:add(nn.LeakyReLU(0.1, true)) 42 | 43 | model:add(cudnn.SpatialConvolution(n_middle, n_output, filtsize, filtsize, 1, 1, 1, 1)) 44 | if out_bn == true then 45 | model:add(nn.SpatialBatchNormalization(n_output)) 46 | end 47 | 48 | return model 49 | 50 | end 51 | 52 | function newmodel() 53 | 54 | -- number of output 55 | local num_output = num_class or 1 56 | 57 | -- filter size 58 | local filtsize = 3 59 | 60 | local block0 = conv(512, 1024, 512, filtsize, true) 61 | 62 | local block1 = nn.Sequential() 63 | block1:add(conv(256, 512, 512, filtsize, true)) 64 | block1:add(branch(block0)) 65 | block1:add(conv(512*2, 512, 256, filtsize, true)) 66 | 67 | local block2 = nn.Sequential() 68 | block2:add(conv(128, 256, 256, filtsize, true)) 69 | block2:add(branch(block1)) 70 | block2:add(conv(256*2, 256, 128, filtsize, true)) 71 | 72 | local block3 = nn.Sequential() 73 | block3:add(conv(64, 128, 128, filtsize, true)) 74 | block3:add(branch(block2)) 75 | block3:add(conv(128*2, 128, 64, filtsize, true)) 76 | 77 | local model = nn.Sequential() 78 | model:add(conv(3, 64, 64, filtsize, true)) 79 | model:add(branch(block3)) 80 | model:add(conv(64*2, 64, num_output, filtsize, false)) 81 | 82 | model:add(nn.Sigmoid()) 83 | 84 | return model 85 | end 86 | 87 | --[[ 88 | <> 89 | [1] U-Net: Convolutional Networks for Biomedical Image Segmentation 90 | Olaf Ronneberger, Philipp Fischer, Thomas Brox 91 | https://arxiv.org/abs/1505.04597 92 | --]] 93 | -------------------------------------------------------------------------------- /src-torch/models/unet.lua: -------------------------------------------------------------------------------- 1 | ------------------------------ 2 | -- library 3 | ------------------------------ 4 | 5 | local nn = require 'nn' 6 | require 'cunn' 7 | 8 | local Convolution = cudnn.SpatialConvolution 9 | local Deconvolution = cudnn.SpatialFullConvolution 10 | local ReLU = cudnn.ReLU 11 | local MaxPool = cudnn.SpatialMaxPooling 12 | local BN = nn.SpatialBatchNormalization 13 | 14 | ------------------------------ 15 | -- function 16 | ------------------------------ 17 | local function createModel(opt) 18 | -- local depth = opt.depth 19 | local iChannels 20 | 21 | local function branch(insert, inPlanes, outPlanes) 22 | 23 | local block = nn.Sequential() 24 | block:add(MaxPool(2,2,2,2)) 25 | block:add(insert) 26 | -- block:add(nn.SpatialUpSamplingNearest(2)) 27 | block:add(Deconvolution(inPlanes, outPlanes, 2, 2, 2, 2)) 28 | 29 | local parallel = nn.ConcatTable(2) 30 | parallel:add(nn.Identity()) 31 | parallel:add(block) 32 | 33 | local model = nn.Sequential() 34 | model:add(parallel) 35 | model:add(nn.JoinTable(2)) 36 | 37 | return model 38 | end 39 | 40 | local function conv(n_input, n_middle, n_output, filtsize, out_bn) 41 | 42 | local model = nn.Sequential() 43 | 44 | model:add(cudnn.SpatialConvolution(n_input, n_middle, filtsize, filtsize, 1, 1, 1, 1)) 45 | model:add(BN(n_middle)) 46 | model:add(ReLU(true)) 47 | 48 | model:add(cudnn.SpatialConvolution(n_middle, n_output, filtsize, filtsize, 1, 1, 1, 1)) 49 | if out_bn == true then 50 | model:add(BN(n_output)) 51 | -- model:add(ReLU(true)) 52 | end 53 | 54 | return model 55 | 56 | end 57 | 58 | -- number of input channels 59 | local num_input = 2 60 | 61 | -- number of output 62 | local num_output = num_class or 1 63 | 64 | -- filter size 65 | local filtsize = 3 66 | 67 | local block0 = conv(512, 1024, 1024, filtsize, true) 68 | 69 | local block1 = nn.Sequential() 70 | block1:add(conv(256, 512, 512, filtsize, true)) 71 | block1:add(branch(block0, 1024, 512)) 72 | block1:add(conv(512*2, 512, 512, filtsize, true)) 73 | 74 | local block2 = nn.Sequential() 75 | block2:add(conv(128, 256, 256, filtsize, true)) 76 | block2:add(branch(block1, 512, 256)) 77 | block2:add(conv(256*2, 256, 256, filtsize, true)) 78 | 79 | local block3 = nn.Sequential() 80 | block3:add(conv(64, 128, 128, filtsize, true)) 81 | block3:add(branch(block2, 256, 128)) 82 | block3:add(conv(128*2, 128, 128, filtsize, true)) 83 | 84 | local model = nn.Sequential() 85 | model:add(conv(num_input, 64, 64, filtsize, true)) 86 | model:add(branch(block3, 128, 64)) 87 | model:add(conv(64*2, 64, 32, filtsize, true)) 88 | 89 | model:add(cudnn.SpatialConvolution(32, num_output, 1, 1, 1, 1)) 90 | model:add(nn.Sigmoid()) 91 | 92 | ------------------------------------------------------------------ 93 | 94 | local function ConvInit(name) 95 | local modules = model:findModules(name) 96 | for k,v in pairs(modules) do 97 | local n = v.kW*v.kH*v.nOutputPlane 98 | v.weight:normal(0,math.sqrt(2/n)) 99 | if cudnn.version >= 4000 and k < #modules then 100 | v.bias = nil 101 | v.gradBias = nil 102 | else 103 | v.bias:zero() 104 | end 105 | end 106 | end 107 | 108 | local function BNInit(name) 109 | for k,v in pairs(model:findModules(name)) do 110 | v.weight:fill(1) 111 | v.bias:zero() 112 | end 113 | end 114 | 115 | local function DeconvInit(name) 116 | for k,v in pairs(model:findModules(name)) do 117 | local n = v.kW*v.kH*v.nOutputPlane 118 | v.weight:normal(0,math.sqrt(2/n)) 119 | -- initialize with "homogeneuos interpolation filter of stride 2" 120 | local factor = (v.weight:size(3)+1)/2 121 | local kernel1d = torch.cat( torch.linspace(1,factor,factor), 122 | torch.linspace(factor-1,1,factor-1))/factor 123 | kernel1d:resize(2*factor-1,1) 124 | local kernel2d = kernel1d*kernel1d:t() 125 | for i=1,v.weight:size(2) do 126 | v.weight[{{i},{i},{},{}}] = kernel2d 127 | end 128 | if cudnn.version >= 4000 then 129 | v.bias = nil 130 | v.gradBias = nil 131 | else 132 | v.bias:zero() 133 | end 134 | end 135 | end 136 | 137 | -- ConvInit('cudnn.SpatialConvolution') 138 | -- ConvInit('cudnn.SpatialFullConvolution') 139 | -- -- DeconvInit('cudnn.SpatialFullConvolution') 140 | -- BNInit('nn.SpatialBatchNormalization') 141 | 142 | for k,v in pairs(model:findModules('nn.Linear')) do 143 | v.bias:zero() 144 | end 145 | 146 | model:cuda() 147 | 148 | if nn.SpatialBatchNormalization.cudnn == 'deterministic' then 149 | local newModules = model:findModules(newLayer) 150 | model:apply(function(m) 151 | if m.setMode then m:setMode(1,1,1) end 152 | end) 153 | end 154 | 155 | -- all architctures but cdnet begin with conv 156 | -- cudnn conv first layer should ignore gradInput 157 | if opt.dataset ~= 'cdnet' and opt.dataset ~= 'sanity' then 158 | model:get(1).gradInput = nil 159 | end 160 | 161 | return model 162 | 163 | end 164 | return createModel 165 | -------------------------------------------------------------------------------- /src-torch/models/unet.lua.1: -------------------------------------------------------------------------------- 1 | 2 | ------------------------------ 3 | -- library 4 | ------------------------------ 5 | 6 | require 'torch' 7 | require 'nn' 8 | require 'cunn' 9 | require 'cudnn' 10 | 11 | require 'loadcaffe' 12 | 13 | ------------------------------ 14 | -- function 15 | ------------------------------ 16 | 17 | function branch(insert) 18 | 19 | local block = nn.Sequential() 20 | local max_pooling = nn.SpatialMaxPooling(2,2,2,2) 21 | block:add(max_pooling) 22 | block:add(insert) 23 | -- block:add(nn.SpatialMaxUnpooling(max_pooling)) 24 | block:add(nn.SpatialUpSamplingNearest(2)) 25 | 26 | local parallel = nn.ConcatTable(2) 27 | parallel:add(nn.Identity()) 28 | parallel:add(block) 29 | 30 | local model = nn.Sequential() 31 | model:add(parallel) 32 | model:add(nn.JoinTable(2)) 33 | 34 | return model 35 | end 36 | 37 | function conv(n_input, n_middle, n_output, filtsize, out_bn) 38 | 39 | local model = nn.Sequential() 40 | 41 | model:add(cudnn.SpatialConvolution(n_input, n_middle, filtsize, filtsize, 1, 1, 1, 1)) 42 | model:add(nn.SpatialBatchNormalization(n_middle)) 43 | model:add(nn.LeakyReLU(0.1, true)) 44 | 45 | model:add(cudnn.SpatialConvolution(n_middle, n_output, filtsize, filtsize, 1, 1, 1, 1)) 46 | if out_bn == true then 47 | model:add(nn.SpatialBatchNormalization(n_output)) 48 | end 49 | 50 | return model 51 | 52 | end 53 | 54 | function newmodel() 55 | 56 | -- number of output 57 | local num_output = num_class or 1 58 | 59 | -- filter size 60 | local filtsize = 3 61 | 62 | local block0 = conv(512, 1024, 512, filtsize, true) 63 | 64 | local block1 = nn.Sequential() 65 | block1:add(conv(256, 512, 512, filtsize, true)) 66 | block1:add(branch(block0)) 67 | block1:add(conv(512*2, 512, 256, filtsize, true)) 68 | 69 | local block2 = nn.Sequential() 70 | block2:add(conv(128, 256, 256, filtsize, true)) 71 | block2:add(branch(block1)) 72 | block2:add(conv(256*2, 256, 128, filtsize, true)) 73 | 74 | local block3 = nn.Sequential() 75 | block3:add(conv(64, 128, 128, filtsize, true)) 76 | block3:add(branch(block2)) 77 | block3:add(conv(128*2, 128, 64, filtsize, true)) 78 | 79 | local model = nn.Sequential() 80 | model:add(conv(3, 64, 64, filtsize, true)) 81 | model:add(branch(block3)) 82 | model:add(conv(64*2, 64, num_output, filtsize, false)) 83 | 84 | model:add(nn.Sigmoid()) 85 | 86 | return model 87 | end 88 | 89 | --[[ 90 | <> 91 | [1] U-Net: Convolutional Networks for Biomedical Image Segmentation 92 | Olaf Ronneberger, Philipp Fischer, Thomas Brox 93 | https://arxiv.org/abs/1505.04597 94 | --]] 95 | -------------------------------------------------------------------------------- /src-torch/models/unet1.lua: -------------------------------------------------------------------------------- 1 | -- This code was based on: 2 | -- https://gist.github.com/toshi-k/ca75e614f1ac12fa44f62014ac1d6465 3 | -- 4 | local nn = require 'nn' 5 | require 'cunn' 6 | 7 | local Convolution = cudnn.SpatialConvolution 8 | -- local Convolution = nn.SpatialConvolution 9 | local Deconvolution = cudnn.SpatialFullConvolution 10 | local DilatedConv = nn.SpatialDilatedConvolution 11 | local Avg = cudnn.SpatialAveragePooling 12 | local DilatedMax = nn.SpatialDilatedMaxPooling 13 | local Max = nn.SpatialMaxPooling 14 | local Unpool = nn.SpatialMaxUnpooling 15 | local ReLU = cudnn.ReLU 16 | local SBatchNorm = nn.SpatialBatchNormalization 17 | local Crop = nn.SpatialUniformCrop 18 | 19 | local function createModel(opt) 20 | 21 | function branch(insert, inPlanes, outPlanes) 22 | 23 | local block = nn.Sequential() 24 | local max_pooling = nn.SpatialMaxPooling(2,2,2,2) 25 | block:add(max_pooling) 26 | block:add(insert) 27 | -- block:add(nn.SpatialMaxUnpooling(max_pooling)) 28 | -- block:add(nn.SpatialUpSamplingNearest(2)) 29 | block:add(Deconvolution(inPlanes, outPlanes, 2, 2, 2, 2)) 30 | 31 | local parallel = nn.ConcatTable(2) 32 | parallel:add(nn.Identity()) 33 | parallel:add(block) 34 | 35 | local model = nn.Sequential() 36 | model:add(parallel) 37 | model:add(nn.JoinTable(2)) 38 | 39 | return model 40 | end 41 | 42 | function conv(n_input, n_middle, n_output, filtsize, out_bn) 43 | 44 | local s = nn.Sequential() 45 | 46 | s:add(cudnn.SpatialConvolution(n_input, n_middle, filtsize, filtsize, 1, 1, 1, 1)) 47 | s:add(nn.SpatialBatchNormalization(n_middle)) 48 | s:add(nn.LeakyReLU(0.1, true)) 49 | 50 | s:add(cudnn.SpatialConvolution(n_middle, n_output, filtsize, filtsize, 1, 1, 1, 1)) 51 | if out_bn == true then 52 | s:add(nn.SpatialBatchNormalization(n_output)) 53 | end 54 | 55 | return s 56 | 57 | end 58 | 59 | 60 | -- number of output 61 | local num_output = num_class or 1 62 | 63 | -- filter size 64 | local filtsize = 3 65 | 66 | local block0 = conv(512, 1024, 1024, filtsize, true) 67 | 68 | local block1 = nn.Sequential() 69 | block1:add(conv(256, 512, 512, filtsize, true)) 70 | block1:add(branch(block0,1024,512)) 71 | block1:add(conv(512*2, 512, 512, filtsize, true)) 72 | 73 | local block2 = nn.Sequential() 74 | block2:add(conv(128, 256, 256, filtsize, true)) 75 | block2:add(branch(block1,512,256)) 76 | block2:add(conv(256*2, 256, 256, filtsize, true)) 77 | 78 | local block3 = nn.Sequential() 79 | block3:add(conv(64, 128, 128, filtsize, true)) 80 | block3:add(branch(block2,256,128)) 81 | block3:add(conv(128*2, 128, 128, filtsize, true)) 82 | 83 | local model = nn.Sequential() 84 | model:add(conv(2, 64, 64, filtsize, true)) 85 | model:add(branch(block3,128,64)) 86 | model:add(conv(64*2, 64, num_output, filtsize, false)) 87 | 88 | model:add(nn.Sigmoid()) 89 | 90 | 91 | local function ConvInit(name) 92 | local modules = model:findModules(name) 93 | for k,v in pairs(modules) do 94 | local n = v.kW*v.kH*v.nOutputPlane 95 | v.weight:normal(0,math.sqrt(2/n)) 96 | if cudnn.version >= 4000 and k < #modules then 97 | v.bias = nil 98 | v.gradBias = nil 99 | else 100 | v.bias:zero() 101 | end 102 | -- end 103 | end 104 | end 105 | 106 | local function BNInit(name) 107 | for k,v in pairs(model:findModules(name)) do 108 | v.weight:fill(1) 109 | v.bias:zero() 110 | end 111 | end 112 | 113 | -- func returning error at torch.cat 114 | local function UpSampleInit(name) 115 | for k,v in pairs(model:findModules(name)) do 116 | local n = v.kW*v.kH*v.nOutputPlane 117 | v.weight:normal(0,math.sqrt(2/n)) 118 | -- initialize with "homogeneuos interpolation filter of stride 2" 119 | local factor = (v.weight:size(3)+1)/2 120 | -- factor = 1.5 for default values 121 | local kernel1d = torch.cat( torch.linspace(1,factor,factor), 122 | torch.linspace(factor-1,1,factor-1))/factor 123 | kernel1d:resize(2*factor-1,1) 124 | local kernel2d = kernel1d*kernel1d:t() 125 | for i=1,v.weight:size(2) do 126 | v.weight[{{i},{i},{},{}}] = kernel2d 127 | end 128 | if cudnn.version >= 4000 then 129 | v.bias = nil 130 | v.gradBias = nil 131 | else 132 | v.bias:zero() 133 | end 134 | end 135 | end 136 | ConvInit('cudnn.SpatialConvolution') 137 | ConvInit('cudnn.SpatialFullConvolution') 138 | -- UpSampleInit('cudnn.SpatialFullConvolution') 139 | BNInit('cudnn.SpatialBatchNormalization') 140 | 141 | for k,v in pairs(model:findModules('nn.Linear')) do 142 | v.bias:zero() 143 | end 144 | 145 | model:cuda() 146 | 147 | if nn.SpatialBatchNormalization.cudnn == 'deterministic' then 148 | local newModules = model:findModules(newLayer) 149 | model:apply(function(m) 150 | if m.setMode then m:setMode(1,1,1) end 151 | end) 152 | end 153 | 154 | -- cudnn conv first layer should ignore gradInput 155 | if opt.dataset ~= 'cdnet' and opt.dataset ~= 'sanity' then 156 | model:get(1):get(1).gradInput = nil 157 | end 158 | 159 | return model 160 | end 161 | 162 | return createModel 163 | -------------------------------------------------------------------------------- /src-torch/opts.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | local M = { } 10 | 11 | function M.parse(arg) 12 | local cmd = torch.CmdLine() 13 | cmd:text() 14 | cmd:text('Torch-7 ResNet Training script') 15 | cmd:text('See https://github.com/facebook/fb.resnet.torch/blob/master/TRAINING.md for examples') 16 | cmd:text() 17 | cmd:text('Options:') 18 | ------------ General options -------------------- 19 | cmd:option('-data', '', 'Path to dataset') 20 | cmd:option('-dataset', 'imagenet', 'Options: imagenet | cifar10 | cifar100 | cdnet | sanity | test-hdf5') 21 | cmd:option('-manualSeed', 0, 'Manually set RNG seed') 22 | cmd:option('-nGPU', 1, 'Number of GPUs to use by default') 23 | cmd:option('-backend', 'cudnn', 'Options: cudnn | cunn') 24 | cmd:option('-cudnn', 'fastest', 'Options: fastest | default | deterministic') 25 | cmd:option('-gen', 'gen', 'Path to save generated files') 26 | ------------- Data options ------------------------ 27 | cmd:option('-nThreads', 1, 'number of data loading threads') 28 | ------------- Training options -------------------- 29 | cmd:option('-nEpochs', 0, 'Number of total epochs to run') 30 | cmd:option('-epochNumber', 1, 'Manual epoch number (useful on restarts)') 31 | cmd:option('-batchSize', 32, 'mini-batch size (1 = pure stochastic)') 32 | cmd:option('-testOnly', 'false', 'Run on validation set only') 33 | cmd:option('-tenCrop', 'false', 'Ten-crop testing') 34 | ------------- Checkpointing options --------------- 35 | cmd:option('-save', 'checkpoints', 'Directory in which to save checkpoints') 36 | cmd:option('-checkpoint', 'true', 'Save model after each epoch: true | false (true)') 37 | cmd:option('-resume', 'none', 'Resume from the latest checkpoint in this directory') 38 | cmd:option('-trash', 'false', 'Discard all log and checkpoint: true | false (false)') 39 | ---------- Optimization options ---------------------- 40 | cmd:option('-LR', 0.1, 'initial learning rate') 41 | cmd:option('-momentum', 0.9, 'momentum') 42 | cmd:option('-weightDecay', 1e-4, 'weight decay') 43 | cmd:option('-model_init_LR', -1, 'Define a small LR to init model for 2 epochs. If it is below 0, ignored. (-1)') 44 | cmd:option('-LR_step', -1, 'Define number of epochs between each LR decay') 45 | cmd:option('-LR_factor', -1, 'Define factor by which LR will be decayed') 46 | cmd:option('-optimizer', 'sgd', 'Optimizer algorithm: sgd | adam | rmsprop (sgd)') 47 | cmd:option('-alpha', 0.99, 'RMSProp optimizer param alpha') 48 | ---------- Model options ---------------------------------- 49 | cmd:option('-netType', 'resnet', 'Options: resnet | preresnet | lenet5') 50 | cmd:option('-depth', 0, 'ResNet depth: 18 | 34 | 50 | 101 | ...', 'number') 51 | cmd:option('-shortcutType', 'B', 'Options: A | B | C') 52 | cmd:option('-base', '', 'Base on the archi for: cifar10 | cifar100 | imagenet') 53 | cmd:option('-params', 'none', 'Path to model from which to get the params') 54 | cmd:option('-retrain', 'none', 'Path to model to retrain with') 55 | cmd:option('-optimState', 'none', 'Path to an optimState to reload from') 56 | ---------- Model options ---------------------------------- 57 | cmd:option('-shareGradInput', 'false', 'Share gradInput tensors to reduce memory usage') 58 | cmd:option('-optnet', 'false', 'Use optnet to reduce memory usage') 59 | cmd:option('-resetClassifier', 'false', 'Reset the fully connected layer for fine-tuning') 60 | cmd:option('-nClasses', 0, 'Number of classes in the dataset') 61 | cmd:option('-patchSize', 0, 'Patch size for pixel evaluation during training') 62 | cmd:text() 63 | 64 | local opt = cmd:parse(arg or {}) 65 | 66 | opt.testOnly = opt.testOnly ~= 'false' 67 | opt.tenCrop = opt.tenCrop ~= 'false' 68 | opt.shareGradInput = opt.shareGradInput ~= 'false' 69 | opt.optnet = opt.optnet ~= 'false' 70 | opt.resetClassifier = opt.resetClassifier ~= 'false' 71 | opt.trash = opt.trash ~= 'false' 72 | opt.LR_step = opt.LR_step == -1 and 10 or opt.LR_step 73 | opt.LR_factor = opt.LR_factor == -1 and 0.5 or opt.LR_factor 74 | 75 | -- set folder name to save model checkpoints 76 | if opt.trash == false then 77 | if opt.resume ~= 'none' then 78 | opt.save = paths.concat(paths.dirname(opt.resume),'resume/') 79 | else 80 | opt.save = paths.concat(opt.save, 81 | cmd:string('', opt, 82 | {netType=true, optimState=true, gen=true, manualSeed=true, 83 | nThreads=true, checkpoint=true, data=true, retrain=true, 84 | save=true, shareGradInput=true, optnet=true, tenCrop=true, 85 | testOnly=true, resetClassifier=true,nClasses=true, trash=true, 86 | params=true, resume=true})) 87 | -- add date/time 88 | opt.save = paths.concat(opt.save, '' .. os.date():gsub(' ','-')) 89 | end 90 | if not paths.dirp(opt.save) and not paths.mkdir(opt.save) then 91 | cmd:error('error: unable to create checkpoint directory: ' .. opt.save .. '\n') 92 | end 93 | end 94 | 95 | if opt.dataset == 'imagenet' then 96 | -- Handle the most common case of missing -data flag 97 | local trainDir = paths.concat(opt.data, 'train') 98 | if not paths.dirp(opt.data) then 99 | cmd:error('error: missing ImageNet data directory') 100 | elseif not paths.dirp(trainDir) then 101 | cmd:error('error: ImageNet missing `train` directory: ' .. trainDir) 102 | end 103 | -- Default shortcutType=B and nEpochs=90 104 | opt.shortcutType = opt.shortcutType == '' and 'B' or opt.shortcutType 105 | opt.nEpochs = opt.nEpochs == 0 and 90 or opt.nEpochs 106 | 107 | elseif opt.dataset == 'cifar10' then 108 | -- Default shortcutType=A and nEpochs=164 109 | opt.shortcutType = opt.shortcutType == '' and 'A' or opt.shortcutType 110 | opt.nEpochs = opt.nEpochs == 0 and 164 or opt.nEpochs 111 | 112 | elseif opt.dataset == 'cifar100' then 113 | -- Default shortcutType=A and nEpochs=164 114 | opt.shortcutType = opt.shortcutType == '' and 'A' or opt.shortcutType 115 | opt.nEpochs = opt.nEpochs == 0 and 164 or opt.nEpochs 116 | 117 | elseif opt.dataset == 'cdnet' or opt.dataset == 'sanity' or opt.dataset == 'test-hdf5' then 118 | opt.shortcutType = opt.shortcutType == '' and 'B' or opt.shortcutType 119 | opt.nEpochs = opt.nEpochs == 0 and 120 or opt.nEpochs 120 | opt.tenCrop = 'false' 121 | opt.patchSize = opt.patchSize == 0 and 33 or opt.patchSize 122 | -- opt.batchSize = opt.batchSize == 32 and 100 or opt.batchSize 123 | -- opt.depth = opt.depth == 34 and 33 or opt.depth 124 | 125 | else 126 | cmd:error('unknown dataset: ' .. opt.dataset) 127 | end 128 | 129 | if opt.resetClassifier then 130 | if opt.nClasses == 0 then 131 | cmd:error('-nClasses required when resetClassifier is set') 132 | end 133 | end 134 | 135 | if opt.shareGradInput and opt.optnet then 136 | cmd:error('error: cannot use both -shareGradInput and -optnet') 137 | end 138 | 139 | -- set optimizer option 140 | print(" Use ".. opt.optimizer .. ' as the optimizer') 141 | 142 | return opt 143 | end 144 | 145 | return M 146 | -------------------------------------------------------------------------------- /src-torch/param-optim.py: -------------------------------------------------------------------------------- 1 | # this file cross-validate the hyper-parameters 2 | # it calls th main.lua 3 | 4 | import argparse 5 | import os 6 | import numpy as np 7 | import subprocess 8 | 9 | parser = argparse.ArgumentParser(description='Cross validates hyperparameters of the resnet model') 10 | 11 | # parameters for setting the learning rate 12 | parser.add_argument('--lr_min', type=float, help='LOWER bound of the LEARNING RATE search space', nargs=1) 13 | parser.add_argument('--lr_max', type=float, help='UPPER bound of the LEARNING RATE search space', nargs=1) 14 | parser.add_argument('--lt_decay', type=int, help='number of epochs the exp. decay scheduler reduces the lr to 10%', nargs=1) 15 | 16 | # parameters for setting the regularization (weight decay) 17 | parser.add_argument('--reg_min', type=float, help='LOWER bound of the REGULARIZATION STRENGTH search space', nargs=1) 18 | parser.add_argument('--reg_max', type=float, help='UPPER bound of the REGULARIZATION STRENGTH search space', nargs=1) 19 | 20 | # parameters for generic training settings 21 | parser.add_argument('--optim', type=str, help='The solver to use: SGD | Adam', nargs=1) 22 | parser.add_argument('--epoch', type=int, help='Number of epochs the net will be trained', nargs=1) 23 | parser.add_argument('--batch', type=int, help='Mini-batch size', nargs=1) 24 | parser.add_argument('--cuda', type=int, help='Which cuda to use', nargs=1) 25 | parser.add_argument('--trials', type=int, help='Number of trials of this experiment', nargs=1) 26 | 27 | # network depth 28 | parser.add_argument('--depth', type=int, help='determines the depth of the network: 22 | 34 | 46 | 58 | 112 | 1204', nargs=1) 29 | 30 | #dir to where save the results and models 31 | parser.add_argument('--save', type=str, help='dtermines the the dir in which to save the results', nargs=1) 32 | 33 | args = parser.parse_args() 34 | 35 | 36 | print(args.trials) 37 | 38 | for trial in xrange(args.trials[0]): 39 | print('trial nb.' + str(trial)) 40 | 41 | sampled_learning_rate = 10**np.random.uniform(np.log10(args.lr_min)[0],np.log10(args.lr_max[0])) 42 | sampled_reg = 10**np.random.uniform(np.log10(args.reg_min)[0],np.log10(args.reg_max[0])) 43 | 44 | command = 'CUDA_VISIBLE_DEVICES=' + str(args.cuda[0]) 45 | command = command + ' th main.lua -dataset cdnet -data ~/Documents/cdnet2014/deep-subtraction-split/ -nGPU 1 -nEpochs ' + str(args.epoch[0]) 46 | command = command + ' -depth ' + str(args.depth[0]) 47 | command = command + ' -batchSize ' + str(args.batch[0]) 48 | command = command + ' -LR ' + str(sampled_learning_rate) 49 | command = command + ' -weightDecay ' + str(sampled_reg) 50 | # command = command + ' -LR_decay_step ' + str(args.lt_decay[0]) 51 | # command = command + ' -model_init_LR ' + str(2*sampled_learning_rate) 52 | command = command + ' -save ' + args.save[0] 53 | command = command + ' -optimizer ' + args.optim[0] 54 | 55 | print(command) 56 | #retval = subprocess.call('CUDA_VISIBLE_DEVICES=2 th main.lua -dataset cdnet -data ~/Documents/cdnet2014/sanity-check-split/ -nGPU 1 -nEpochs 15 -depth 34 -batchSize 20 -LR 0.1', shell=True) 57 | 58 | retval = subprocess.call(command, shell=True) 59 | #for trial in xrange() 60 | -------------------------------------------------------------------------------- /src-torch/plotting.lua: -------------------------------------------------------------------------------- 1 | require 'gnuplot' 2 | 3 | local plotting = {} 4 | 5 | -- stats should be 2D tensor 6 | function plotting.loss_curve(stats, opt) 7 | local fn = paths.concat(opt.save,'training_loss.eps') 8 | gnuplot.epsfigure(fn) 9 | gnuplot.title('Training loss\nBest Value : ' .. tostring(stats:min())) 10 | gnuplot.grid('off') 11 | local xs = torch.range(1, stats:size(1)) 12 | gnuplot.plot( 13 | { 'train', xs, torch.Tensor(stats), '+' } 14 | ) 15 | gnuplot.axis({ 1, stats:size(1), 0, stats:max()}) 16 | gnuplot.xlabel('iteration') 17 | gnuplot.ylabel('loss') 18 | gnuplot.plotflush() 19 | end 20 | 21 | -- trainingStats and testingStats should be tables 22 | function plotting.curve(trainingStats, testingStats, title, filename, opt, ylabel, ylimit) 23 | local fn = paths.concat(opt.save, filename .. '.eps') 24 | gnuplot.epsfigure(fn) 25 | gnuplot.title(title .. '\nBest Test Value : ' .. tostring(torch.Tensor(testingStats):min())) 26 | gnuplot.grid('on') 27 | local xsTrain = torch.range(1, #trainingStats) 28 | local xsTest = torch.range(1, #testingStats) 29 | -- local xsTest = torch.range(1, testingStats:size(1)) 30 | 31 | gnuplot.plot( 32 | { 'train', xsTrain, torch.Tensor(trainingStats), '-' }, 33 | { 'test', xsTest, torch.Tensor(testingStats), '-' } 34 | ) 35 | if ylabel == nil then ylabel = 'error' end 36 | if ylimit == nil then ylimit = 1 end 37 | gnuplot.axis({ 1, math.max(#testingStats, #trainingStats), 0, ylimit}) 38 | -- gnuplot.axis({ 0, math.max(trainingStats:size(1),testingStats:size(1)), 0, ylimit}) 39 | gnuplot.xlabel('epoch') 40 | gnuplot.ylabel(ylabel) 41 | gnuplot.plotflush() 42 | end 43 | 44 | return plotting 45 | -------------------------------------------------------------------------------- /src-torch/pretrained/CONVERGENCE.md: -------------------------------------------------------------------------------- 1 | These are plots of validation error, training error, and training loss on ImageNet measured after every epoch. 2 | 3 | - [Validation Error](#validation-error) 4 | - [Training Error](#training-error) 5 | - [Training Loss](#training-loss) 6 | 7 | ## Validation Error 8 | validation error 9 | 10 | ## Training Error 11 | train error 12 | 13 | ## Training Loss 14 | train loss 15 | -------------------------------------------------------------------------------- /src-torch/pretrained/README.md: -------------------------------------------------------------------------------- 1 | Trained ResNet Torch models 2 | ============================ 3 | 4 | These are ResNet models trainined on ImageNet. The accuracy on the ImageNet validation set are included below. 5 | 6 | - [ResNet-18](https://d2j0dndfm35trm.cloudfront.net/resnet-18.t7) 7 | - [ResNet-34](https://d2j0dndfm35trm.cloudfront.net/resnet-34.t7) 8 | - [ResNet-50](https://d2j0dndfm35trm.cloudfront.net/resnet-50.t7) 9 | - [ResNet-101](https://d2j0dndfm35trm.cloudfront.net/resnet-101.t7) 10 | - [ResNet-152](https://d2j0dndfm35trm.cloudfront.net/resnet-152.t7) 11 | - [ResNet-200](https://d2j0dndfm35trm.cloudfront.net/resnet-200.t7) 12 | 13 | The ResNet-50 model has a batch normalization layer after the addition, instead of immediately after the convolution layer. The ResNet-200 model is the full pre-activation variant from ["Identity Mappings in Deep Residual Networks"](http://arxiv.org/abs/1603.05027). 14 | 15 | ##### ImageNet 1-crop error rates (224x224) 16 | 17 | | Network | Top-1 error | Top-5 error | 18 | | ------------- | ----------- | ----------- | 19 | | ResNet-18 | 30.43 | 10.76 | 20 | | ResNet-34 | 26.73 | 8.74 | 21 | | ResNet-50 | 24.01 | 7.02 | 22 | | ResNet-101 | 22.44 | 6.21 | 23 | | ResNet-152 | 22.16 | 6.16 | 24 | | ResNet-200 | 21.66 [1](#notes) | 5.79 | 25 | 26 | ##### ImageNet 10-crop error rates 27 | 28 | | Network | Top-1 error | Top-5 error | 29 | | ------------- | ----------- | ----------- | 30 | | ResNet-18 | 28.22 | 9.42 | 31 | | ResNet-34 | 24.76 | 7.35 | 32 | | ResNet-50 | 22.24 | 6.08 | 33 | | ResNet-101 | 21.08 | 5.35 | 34 | | ResNet-152 | 20.69 | 5.21 | 35 | | ResNet-200 | 20.15 | 4.93 | 36 | 37 | ##### ImageNet charts 38 | 39 | See the [convergence plots](CONVERGENCE.md) for charts of training and validation error and training loss after every epoch. 40 | 41 | ### Fine-tuning on a custom dataset 42 | 43 | Your images don't need to be pre-processed or packaged in a database, but you need to arrange them so that your dataset contains a `train` and a `val` directory, which each contain sub-directories for every label. For example: 44 | 45 | ``` 46 | train// 47 | train// 48 | val// 49 | val// 50 | ``` 51 | 52 | You can then use the included [ImageNet data loader](../datasets/imagenet.lua) with your dataset and train with the `-resetClassifer` and `-nClasses` options: 53 | 54 | ```bash 55 | th main.lua -retrain resnet-50.t7 -data [path-to-directory-with-train-and-val] -resetClassifier true -nClasses 80 56 | ``` 57 | 58 | The labels will be sorted alphabetically. The first output of the network corresponds to the label that comes first alphabetically. 59 | 60 | You can find how to create custom data loader in [datasets](../datasets) readme. 61 | 62 | ### Classification 63 | To get the top 5 predicted of a model for a given input image, you can use the [`classify.lua`](classify.lua) script. For example: 64 | ```bash 65 | th classify.lua resnet-101.t7 img1.jpg img2.jpg ... 66 | ``` 67 | Example output: 68 | ``` 69 | Classes for cat.jpg 70 | 0.77302575111389 Egyptian cat 71 | 0.060410376638174 tabby, tabby cat 72 | 0.040622022002935 tiger cat 73 | 0.025837801396847 lynx, catamount 74 | 0.018691379576921 window screen 75 | ``` 76 | 77 | 78 | ### Extracting image features 79 | 80 | The [`extract-features.lua`](extract-features.lua) script will extract the image features from an image and save them as a serialized Torch tensor. To use it, first download one of the trained models above. Next run it using 81 | 82 | ```bash 83 | th extract-features.lua resnet-101.t7 img1.jpg img2.jpg ... 84 | ``` 85 | 86 | This will save a file called `features.t7` in the current directory. You can then load the image features in Torch. 87 | 88 | ```lua 89 | local features = torch.load('features.t7') 90 | ``` 91 | 92 | ### Notes 93 | 1 This is on a test crop of size 224x224. On a test crop of size 320x320, the error rate is 20.1/4.8. 94 | -------------------------------------------------------------------------------- /src-torch/pretrained/classify.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Manuel Araoz 3 | -- Copyright (c) 2016, Facebook, Inc. 4 | -- All rights reserved. 5 | -- 6 | -- This source code is licensed under the BSD-style license found in the 7 | -- LICENSE file in the root directory of this source tree. An additional grant 8 | -- of patent rights can be found in the PATENTS file in the same directory. 9 | -- 10 | -- classifies an image using a trained model 11 | -- 12 | 13 | require 'torch' 14 | require 'paths' 15 | require 'cudnn' 16 | require 'cunn' 17 | require 'image' 18 | 19 | local t = require '../datasets/transforms' 20 | local imagenetLabel = require './imagenet' 21 | 22 | if #arg < 2 then 23 | io.stderr:write('Usage: th classify.lua [MODEL] [FILE]...\n') 24 | os.exit(1) 25 | end 26 | for _, f in ipairs(arg) do 27 | if not paths.filep(f) then 28 | io.stderr:write('file not found: ' .. f .. '\n') 29 | os.exit(1) 30 | end 31 | end 32 | 33 | 34 | -- Load the model 35 | local model = torch.load(arg[1]):cuda() 36 | local softMaxLayer = cudnn.SoftMax():cuda() 37 | 38 | -- add Softmax layer 39 | model:add(softMaxLayer) 40 | 41 | -- Evaluate mode 42 | model:evaluate() 43 | 44 | -- The model was trained with this input normalization 45 | local meanstd = { 46 | mean = { 0.485, 0.456, 0.406 }, 47 | std = { 0.229, 0.224, 0.225 }, 48 | } 49 | 50 | local transform = t.Compose{ 51 | t.Scale(256), 52 | t.ColorNormalize(meanstd), 53 | t.CenterCrop(224), 54 | } 55 | 56 | local N = 5 57 | 58 | for i=2,#arg do 59 | -- load the image as a RGB float tensor with values 0..1 60 | local img = image.load(arg[i], 3, 'float') 61 | local name = arg[i]:match( "([^/]+)$" ) 62 | 63 | -- Scale, normalize, and crop the image 64 | img = transform(img) 65 | 66 | -- View as mini-batch of size 1 67 | local batch = img:view(1, table.unpack(img:size():totable())) 68 | 69 | -- Get the output of the softmax 70 | local output = model:forward(batch:cuda()):squeeze() 71 | 72 | -- Get the top 5 class indexes and probabilities 73 | local probs, indexes = output:topk(N, true, true) 74 | print('Classes for', arg[i]) 75 | for n=1,N do 76 | print(probs[n], imagenetLabel[indexes[n]]) 77 | end 78 | print('') 79 | 80 | end 81 | -------------------------------------------------------------------------------- /src-torch/pretrained/extract-features.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- extracts features from an image using a trained model 10 | -- 11 | 12 | -- USAGE 13 | -- SINGLE FILE MODE 14 | -- th extract-features.lua [MODEL] [FILE] ... 15 | -- 16 | -- BATCH MODE 17 | -- th extract-features.lua [MODEL] [BATCH_SIZE] [DIRECTORY_CONTAINING_IMAGES] 18 | -- 19 | 20 | 21 | require 'torch' 22 | require 'paths' 23 | require 'cudnn' 24 | require 'cunn' 25 | require 'image' 26 | local t = require '../datasets/transforms' 27 | 28 | 29 | if #arg < 2 then 30 | io.stderr:write('Usage (Single file mode): th extract-features.lua [MODEL] [FILE] ... \n') 31 | io.stderr:write('Usage (Batch mode) : th extract-features.lua [MODEL] [BATCH_SIZE] [DIRECTORY_CONTAINING_IMAGES] \n') 32 | os.exit(1) 33 | end 34 | 35 | 36 | -- get the list of files 37 | local list_of_filenames = {} 38 | local batch_size = 1 39 | 40 | if not paths.filep(arg[1]) then 41 | io.stderr:write('Model file not found at ' .. arg[1] .. '\n') 42 | os.exit(1) 43 | end 44 | 45 | 46 | if tonumber(arg[2]) ~= nil then -- batch mode ; collect file from directory 47 | 48 | local lfs = require 'lfs' 49 | batch_size = tonumber(arg[2]) 50 | dir_path = arg[3] 51 | 52 | for file in lfs.dir(dir_path) do -- get the list of the files 53 | if file~="." and file~=".." then 54 | table.insert(list_of_filenames, dir_path..'/'..file) 55 | end 56 | end 57 | 58 | else -- single file mode ; collect file from command line 59 | for i=2, #arg do 60 | f = arg[i] 61 | if not paths.filep(f) then 62 | io.stderr:write('file not found: ' .. f .. '\n') 63 | os.exit(1) 64 | else 65 | table.insert(list_of_filenames, f) 66 | end 67 | end 68 | end 69 | 70 | local number_of_files = #list_of_filenames 71 | 72 | if batch_size > number_of_files then batch_size = number_of_files end 73 | 74 | -- Load the model 75 | local model = torch.load(arg[1]):cuda() 76 | 77 | -- Remove the fully connected layer 78 | assert(torch.type(model:get(#model.modules)) == 'nn.Linear') 79 | model:remove(#model.modules) 80 | 81 | -- Evaluate mode 82 | model:evaluate() 83 | 84 | -- The model was trained with this input normalization 85 | local meanstd = { 86 | mean = { 0.485, 0.456, 0.406 }, 87 | std = { 0.229, 0.224, 0.225 }, 88 | } 89 | 90 | local transform = t.Compose{ 91 | t.Scale(256), 92 | t.ColorNormalize(meanstd), 93 | t.CenterCrop(224), 94 | } 95 | 96 | local features 97 | 98 | for i=1,number_of_files,batch_size do 99 | local img_batch = torch.FloatTensor(batch_size, 3, 224, 224) -- batch numbers are the 3 channels and size of transform 100 | 101 | -- preprocess the images for the batch 102 | local image_count = 0 103 | for j=1,batch_size do 104 | img_name = list_of_filenames[i+j-1] 105 | 106 | if img_name ~= nil then 107 | image_count = image_count + 1 108 | local img = image.load(img_name, 3, 'float') 109 | img = transform(img) 110 | img_batch[{j, {}, {}, {} }] = img 111 | end 112 | end 113 | 114 | -- if this is last batch it may not be the same size, so check that 115 | if image_count ~= batch_size then 116 | img_batch = img_batch[{{1,image_count}, {}, {}, {} } ] 117 | end 118 | 119 | -- Get the output of the layer before the (removed) fully connected layer 120 | local output = model:forward(img_batch:cuda()):squeeze(1) 121 | 122 | 123 | -- this is necesary because the model outputs different dimension based on size of input 124 | if output:nDimension() == 1 then output = torch.reshape(output, 1, output:size(1)) end 125 | 126 | if not features then 127 | features = torch.FloatTensor(number_of_files, output:size(2)):zero() 128 | end 129 | features[{ {i, i-1+image_count}, {} } ]:copy(output) 130 | 131 | end 132 | 133 | torch.save('features.t7', {features=features, image_list=list_of_filenames}) 134 | print('saved features to features.t7') 135 | -------------------------------------------------------------------------------- /src-torch/run-tests.sh: -------------------------------------------------------------------------------- 1 | #CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -nEpochs 90 -batchSize 16 -LR 0.01 -weightDecay 2e-4 -LR_step 10 -LR_factor 0.5 -optimizer 'adam' -nThreads 1 -netType 'lenet5' -save 'checkpoints/lenet5/bilinear-upsample/ch-1/no-BN-layer/full-base' 2 | 3 | #CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -nEpochs 90 -batchSize 16 -LR 0.01 -weightDecay 2e-4 -LR_step 10 -LR_factor 0.5 -optimizer 'adam' -nThreads 2 -netType 'resnet' -base 'cifar10' -shortcutType 'B' -depth 32 -save 'checkpoints/resnet/cifar-10/bilinear-upsample/ch1/full-base' 4 | 5 | #CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -nEpochs 90 -batchSize 16 -LR 0.01 -weightDecay 2e-4 -LR_step 10 -LR_factor 0.5 -optimizer 'adam' -nThreads 2 -netType 'resnet' -base 'imagenet' -shortcutType 'B' -depth 34 -save 'checkpoints/resnet/imagenet/bilinear-upsample/full-base' 6 | 7 | 8 | 9 | 10 | #CUDA_VISIBLE_DEVICES=0 th category-perf.lua -dataset cdnet -data /local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean -nGPU 1 -batchSize 32 -nThreads 2 -model checkpoints/resnet/cifar-10/deconv/ch-1/non-linear-deconv/better-init/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=90,optimizer=adam,weightDecay=0.0002/Tue-Apr--4-23:59:21-2017/resume/model_78.t7 11 | 12 | #CUDA_VISIBLE_DEVICES=0 th category-perf.lua -dataset cdnet -data /local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean -nGPU 1 -batchSize 32 -nThreads 2 -model checkpoints/resnet/cifar-10/decode/shallow/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=80,optimizer=adam,weightDecay=0.0002/Mon-Apr-10-14:24:56-2017/model_80.t7 13 | 14 | #CUDA_VISIBLE_DEVICES=0 th category-perf.lua -dataset cdnet -data /local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean -nGPU 1 -batchSize 32 -nThreads 2 -model checkpoints/resnet/cifar-10/decode/deep/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=12,dataset=cdnet,depth=32,nEpochs=80,optimizer=adam,weightDecay=0.0002/Mon-Apr-10-14:33:08-2017/model_78.t7 15 | 16 | CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -batchSize 32 -testOnly true -retrain checkpoints/resnet/cifar-10/bilinear-upsample/ch1/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=90,optimizer=adam,weightDecay=0.0002/Fri-Mar-31-18:45:23-2017/resume/model_75.t7 -trash true 17 | 18 | CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -batchSize 32 -testOnly true -retrain checkpoints/resnet/cifar-10/deconv/ch-1/linear-deconv/better-init/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=90,optimizer=adam,weightDecay=0.0002/Fri-Mar-31-20:49:15-2017/model_80.t7 -trash true 19 | 20 | CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -batchSize 32 -testOnly true -retrain checkpoints/resnet/cifar-10/deconv/ch-1/non-linear-deconv/better-init/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=90,optimizer=adam,weightDecay=0.0002/Tue-Apr--4-23:59:21-2017/resume/model_78.t7 -trash true 21 | 22 | CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -batchSize 32 -testOnly true -retrain checkpoints/resnet/cifar-10/decode/shallow/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=16,dataset=cdnet,depth=32,nEpochs=80,optimizer=adam,weightDecay=0.0002/Mon-Apr-10-14:24:56-2017/model_80.t7 -trash true 23 | 24 | CUDA_VISIBLE_DEVICES=1 th main.lua -dataset 'cdnet' -data '/local/home/lpcinelli/Documents/cdnet2014/full-split-70-30-clean' -nGPU 1 -batchSize 32 -testOnly true -retrain checkpoints/resnet/cifar-10/decode/deep/full-base/,LR=0.01,LR_factor=0.5,LR_step=10,base=cifar10,batchSize=12,dataset=cdnet,depth=32,nEpochs=80,optimizer=adam,weightDecay=0.0002/Mon-Apr-10-14:33:08-2017/model_78.t7 -trash true 25 | -------------------------------------------------------------------------------- /src-torch/save_img.lua: -------------------------------------------------------------------------------- 1 | -- require 'torch' 2 | local image = require 'image' 3 | local lfs = require 'lfs' 4 | local ffi = require 'ffi' 5 | local paths = require 'paths' 6 | 7 | -- absolute paths 8 | srcDir = '/home/lpcinelli/repos/fb.resnet.torch.perso/gen/cdnet_70-30' 9 | dstDir = '/home/lpcinelli/repos/fb.resnet.torch.perso/gen/imgs/cdnet_70-30' 10 | 11 | function attrdir (srcPath, dstPath) 12 | for file in lfs.dir(srcPath) do 13 | if file ~= "." and file ~= ".." then 14 | local f = srcPath .. '/' .. file 15 | local g = dstPath .. '/' .. file 16 | print ("\t "..f) 17 | local attr = lfs.attributes (f) 18 | assert (type(attr) == "table") 19 | if attr.mode == "directory" then 20 | paths.mkdir(g) 21 | attrdir (f,g) 22 | else 23 | img = torch.load(f) 24 | if not paths.filep(paths.concat(paths.dirname(g),'bg.jpg')) then 25 | image.save(paths.concat(paths.dirname(g),'bg.jpg'),img.input[1]:view(1, table.unpack(img.input[1]:size():totable()))) 26 | end 27 | 28 | image.save(g .. '.jpg', img.input[2]:view(1, table.unpack(img.input[2]:size():totable()))) 29 | end 30 | end 31 | end 32 | end 33 | 34 | attrdir (srcDir, dstDir) 35 | -------------------------------------------------------------------------------- /src-torch/tune-threshold.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'paths' 3 | require 'optim' 4 | require 'nn' 5 | local DataLoader = require 'dataloader' 6 | local models = require 'models/init' 7 | local Trainer = require 'train' 8 | local checkpoints = require 'checkpoints' 9 | 10 | torch.setdefaulttensortype('torch.FloatTensor') 11 | torch.setnumthreads(1) 12 | 13 | function copyInputs(sample, opt) 14 | -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory, 15 | -- if using DataParallelTable. The target is always copied to a CUDA tensor 16 | inputImg = inputImg or (opt.nGPU == 1 17 | and torch.CudaTensor() 18 | or cutorch.createCudaHostTensor()) 19 | targetImg = targetImg or (opt.nGPU == 1 20 | and torch.CudaTensor() 21 | or cutorch.createCudaHostTensor()) 22 | 23 | inputImg:resize(sample.input:size()):copy(sample.input) 24 | targetImg:resize(sample.target:size()):copy(sample.target) 25 | end 26 | 27 | -- new computerScore function used for segmentation task 28 | function computeScore(output, target, threshold) 29 | -- threshold segmentation mask: is FG if prog >= 0.5 30 | threshold = threshold == nil and 0.5 or threshold 31 | output[output:ge(threshold)] = 1 32 | output[output:lt(threshold)] = 0 33 | 34 | local targetArea = target:sum() -- TP + FN 35 | local outputArea = output:sum() -- TP + FP 36 | local intersection = torch.cmul(output,target):sum() -- TP 37 | local trueNeg = (output + target):eq(0):float():sum() -- TN 38 | 39 | local function treatNaN(a) 40 | if outputArea == 0 and targetArea == 0 then 41 | a = 1 42 | elseif outputArea == 0 or targetArea == 0 then 43 | a = 0 44 | end 45 | return a 46 | end 47 | 48 | local function precision() -- TP/(TP+FP) 49 | return treatNaN(intersection/outputArea) 50 | end 51 | local function recall() -- TP/(TP+FN) 52 | return treatNaN(intersection/targetArea) 53 | end 54 | local function f1Score(a, b) 55 | return treatNaN(2*torch.cdiv(torch.cmul(a, b), (a+b))) 56 | end 57 | local function f1Direct() -- 2*TP/(2*TP+FN+FP) 58 | return treatNaN(2*intersection/(outputArea + targetArea)) 59 | end 60 | local function IoU() -- TP/(TP+FN+FP) 61 | local IoUPerImage = torch.cdiv( (output + target):eq(2):float():sum(3):sum(4), -- overlap regions have 1+1 values (output+target) 62 | (output + target):ge(1):float():sum(3):sum(4)) -- union region is either 1 or 2 (output+target) 63 | local nanPos = IoUPerImage:ne(IoUPerImage) 64 | IoUPerImage[nanPos] = 0 65 | return IoUPerImage:sum()/(IoUPerImage:size(1) - nanPos:sum()) 66 | end 67 | local function accuracy() -- (TP+TN)/(TP+TN+FN+FP) 68 | return (intersection + trueNeg)/(target:view(-1):size(1)) 69 | end 70 | local function specifity() -- TN/(TN+FP) 71 | return trueNeg/(trueNeg + (outputArea - intersection)) 72 | end 73 | local function falsePosRate() -- FP/(FP+TN) 74 | return (outputArea - intersection)/((outputArea - intersection) + trueNeg) 75 | end 76 | local function falseNegRate() -- FN/(TP+FN) 77 | return (targetArea - intersection)/targetArea 78 | end 79 | local function classifErr() -- (FN+FP)/(TP+TN+FN+FP) 80 | return 1 - accuracy() 81 | end 82 | 83 | local recallVal = recall() 84 | local precisionVal = precision() 85 | local IoUVal = IoU() 86 | local f1 = f1Direct() 87 | local fnrVal = falseNegRate() 88 | 89 | -- specifity() 90 | -- falseNegRate() 91 | -- falsePosRate() 92 | -- classifErr() 93 | 94 | return f1, precisionVal, recallVal, fnrVal, IoUVal 95 | end 96 | 97 | function parse(arg) 98 | local cmd = torch.CmdLine() 99 | cmd:text() 100 | cmd:text('Threshold finetuning script') 101 | cmd:text('Options:') 102 | cmd:option('-data', '', 'Path to dataset') 103 | cmd:option('-dataset', '', 'Datase name') 104 | cmd:option('-nGPU', 1, 'Number of GPUs to use by default') 105 | cmd:option('-backend', 'cudnn', 'Options: cudnn | cunn') 106 | cmd:option('-cudnn', 'fastest', 'Options: fastest | default | deterministic') 107 | cmd:option('-nThreads', 1, 'Number of data loading threads') 108 | cmd:option('-batchSize', 32, 'Mini-batch size (1 = pure stochastic)') 109 | cmd:option('-model', 'none', 'Path to model') 110 | cmd:option('-trials', 100, 'Nb of trials') 111 | cmd:text() 112 | 113 | local opt = cmd:parse(arg or {}) 114 | 115 | if not paths.filep(opt.model) then 116 | cmd:error('error: unable to find model: ' .. opt.model .. '\n') 117 | end 118 | 119 | if not paths.dirp(opt.data) then 120 | cmd:error('error: unable to find path to dataset: ' .. opt.data .. '\n') 121 | end 122 | 123 | return opt 124 | end 125 | 126 | local opt = parse(arg) 127 | -- torch.manualSeed(opt.manualSeed) 128 | -- cutorch.manualSeedAll(opt.manualSeed) 129 | opt.manualSeed = 0 130 | opt.gen = 'gen/' 131 | 132 | -- Load model 133 | local model = torch.load(opt.model) 134 | local criterion = nn.BCECriterion():cuda() 135 | 136 | -- Data loading 137 | local trainLoader, valLoader = DataLoader.create(opt) 138 | 139 | -- Logger 140 | dirPath = paths.dirname(opt.model) 141 | local logger = optim.Logger(paths.concat(dirPath,'threshold-tuning.log')) 142 | logger:setNames{"Thres", "F1", "Prec", "Rec", "Fnr", "IoU"} 143 | -- local dummyLogger = optim.Logger(nil) 144 | 145 | -- The trainer handles the training loop and evaluation on validation set 146 | -- local trainer = Trainer(model, criterion, opt, nil, dummyLogger) 147 | 148 | model:cuda() 149 | model:evaluate() 150 | 151 | local thresholdList = torch.FloatTensor(opt.trials):random(20,55)/100 152 | local stats = torch.FloatTensor(opt.trials,5):zero() 153 | -- for trial = 1, opt.trials do 154 | 155 | local size = valLoader:size() 156 | local N = 0 157 | 158 | for n, sample in valLoader:run() do 159 | 160 | print(string.format('Iter %d/%d', n, size)) 161 | 162 | -- Copy input and target to the GPU 163 | copyInputs(sample, opt) 164 | 165 | local output = model:forward(inputImg):float() 166 | local batchSize = output:size(1) 167 | 168 | for i=1,thresholdList:size(1) do 169 | local f1, precision, recall, fnr, IoU = computeScore(output, sample.target, thresholdList[i]) 170 | 171 | stats[i][1] = stats[i][1] + f1*batchSize 172 | stats[i][2] = stats[i][2] + precision*batchSize 173 | stats[i][3] = stats[i][3] + recall*batchSize 174 | stats[i][4] = stats[i][4] + fnr*batchSize 175 | stats[i][5] = stats[i][5] + IoU*batchSize 176 | end 177 | 178 | N = N + batchSize 179 | end 180 | 181 | stats = stats/N 182 | 183 | -- Update logger 184 | for i=1,thresholdList:size(1) do 185 | local F1 = stats[i][1] 186 | local Prec = stats[i][2] 187 | local Rec = stats[i][3] 188 | local Fnr = stats[i][4] 189 | local IoU = stats[i][5] 190 | logger:add{thresholdList[i], F1, Prec, Rec, Fnr, IoU} 191 | end 192 | 193 | local val, ind = torch.max(stats[{ {}, {1} }], 1) 194 | ind = ind:squeeze() 195 | 196 | logger:add{} 197 | logger:add{thresholdList[ind], stats[ind][1], stats[ind][2], stats[ind][3], stats[ind][4], stats[ind][5]} 198 | print(string.format(' * Finished:: Thres %.2f F1 %.3f Prec %.3f Rec %.3f Fnr %.3f IoU %.3f', 199 | thresholdList[ind], stats[ind][1], stats[ind][2], stats[ind][3], stats[ind][4], stats[ind][5])) 200 | 201 | -------------------------------------------------------------------------------- /src-torch/tuning.sh: -------------------------------------------------------------------------------- 1 | #1 - erro no maxunpooling 2 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/lenet5/deconv/ch-1/better-init/linear-deconv/BN-layer/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,weightDecay=0.0002/Thu-Jan-26-21:12:31-2017/resume/model_97.t7' -trials 60 3 | 4 | #2 5 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/cifar-10/deconv/ch-1/linear-deconv/bad-init/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,shortcutType=B,weightDecay=0.0002/Tue-Jan-24-13:00:13-2017/model_60.t7' -trials 60 6 | 7 | #3 8 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/cifar-10/deconv/ch-1/non-linear-deconv/bad-init/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,shortcutType=B,weightDecay=0.0002/Tue-Jan-24-22:11:42-2017/model_59.t7' -trials 60 9 | 10 | #4 11 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/deconv/linear-deconv/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Fri-Feb-10-09:22:14-2017/model_58.t7' -trials 60 12 | 13 | #5 14 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/deconv/non-linear-deconv/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Fri-Jan-27-01:04:18-2017/model_60.t7' -trials 60 15 | 16 | #6 17 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/deconv/non-linear-deconv/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Thu-Jan-26-21:23:51-2017/model_60.t7' -trials 60 18 | 19 | #7 20 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/deconv/non-linear-deconv/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=50,nEpochs=60,optimizer=adam,weightDecay=0.0002/Thu-Feb--9-19:41:31-2017/model_57.t7' -trials 60 21 | 22 | #8 23 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/deconv/linear-deconv/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=50,nEpochs=60,optimizer=adam,weightDecay=0.0002/Thu-Feb--9-12:55:44-2017/model_60.t ch' -trials 60 24 | 25 | #9 26 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/lenet5/dilation/ch-1/BN-layer/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,weightDecay=0.0002/Wed-Jan-25-00:11:07-2017/model-60.t7' -trials 60 27 | 28 | #10 29 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/cifar-10/bilinear-upsample/ch1/,LR=0.01,batchSize=16,dataset=cdnet,nEpochs=60,optimizer=adam,weightDecay=0.0002/Wed-Jan-25-19:50:57-2017/resume/model_97.t7' -trials 60 30 | 31 | #11 32 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/cifar-10/dilated/ch-1/,LR=0.01,base=cifar10,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Fri-Jan-27-14:07:46-2017/resume/model_97.t7' -trials 60 33 | 34 | #12 35 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/cifar-10/dilated/ch-1/,LR=0.01,base=cifar10,batchSize=8,dataset=cdnet,depth=58,nEpochs=60,optimizer=adam,weightDecay=0.0002/Tue-Jan-31-00:33:22-2017/model_54.t7' -trials 60 36 | 37 | #13 38 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/dilated/ch-1/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Sun-Jan-29-04:07:19-2017/model_55.t7' -trials 60 39 | 40 | #14 41 | CUDA_VISIBLE_DEVICES=1 th tune-threshold.lua -data '/home/lpcinelli/Documents/cdnet2014/braham-split-70-30' -dataset 'cdnet' -batchSize 32 -nThreads 2 -model 'checkpoints/resnet/imagenet/dilated/ch-1/pretrained/,LR=0.01,base=imagenet,batchSize=16,dataset=cdnet,depth=34,nEpochs=60,optimizer=adam,weightDecay=0.0002/Sat-Feb-11-15:15:25-2017/model_60.t7' -trials 60 42 | 43 | 44 | -------------------------------------------------------------------------------- /src-torch/visualization.py: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # AIM OF THE SCRIPT # 3 | ################################################################## 4 | # read through checkpoints/ # 5 | # get params from dir names # 6 | # get stats from each subfolder # 7 | # generate scattered plot of scores in param space # 8 | ################################################################## 9 | 10 | ################################################################## 11 | # IMPORTS SECTIONS # 12 | ################################################################## 13 | import argparse # 14 | import numpy as np # 15 | import pandas as pd # 16 | import os.path as path # 17 | import os # 18 | import subprocess # 19 | import matplotlib.pyplot as plt # 20 | ################################################################## 21 | 22 | def slowlyReadlines(fnames): 23 | with open(fname, 'rb') as f: 24 | print('start reading') 25 | first = f.readline() # Read the first line 26 | f.seek(-2, 2) # Jump to the second last byte. 27 | print('read first line') 28 | while f.read(1) != b"\n":# Until EOL is found... 29 | f.seek(-2, 1) # ...jump back the read byte plus one more. 30 | last = f.readline() # Read the last line 31 | return first, last 32 | 33 | def readlines(fname): 34 | first = subprocess.check_output(['head', '-1', fname]) 35 | last = subprocess.check_output(['tail', '-1', fname]) 36 | return first, last 37 | 38 | def tonumber(value): 39 | try: 40 | return float(value) 41 | except: 42 | return value 43 | 44 | def replaceNan(df,cols, vals): 45 | if type(cols) == list and type(vals) == list and len(cols) != len(vals): 46 | return 47 | df.fillna({col: val for col, val in zip(cols,vals)}, axis=0, inplace=True) 48 | return 49 | 50 | 51 | def plotScattered(x,y, results, fig, xlabel, ylabel): 52 | x_scatter = np.asarray(x, dtype=np.float64) 53 | y_scatter = np.asarray(y, dtype=np.float64) 54 | axs = [] 55 | im = [] 56 | marker_size = 100 57 | 58 | # Set common labels 59 | fig.text(0.5, 0.04, xlabel, ha='center', va='center') 60 | fig.text(0.05, 0.5, ylabel, ha='center', va='center', rotation='vertical') 61 | 62 | try: 63 | for i in xrange(results.shape[-1]): 64 | colors = np.asarray(results.iloc[:,i]) 65 | axs.append(fig.add_subplot(results.shape[-1], 1, i+1)) 66 | im.append(axs[i].scatter(x_scatter, y_scatter, marker_size,c=colors)) 67 | fig.colorbar(im[i],ax=axs[i]) 68 | # axs[i].set_title(results.column[i]) 69 | # ax[i+1].set_title('ax1 title') 70 | 71 | # if results on a single array/series 72 | except: 73 | colors = results 74 | axs.append(fig.add_subplot(1, 1, i+1)) 75 | im.append(axs[i].scatter(x_scatter, y_scatter, marker_size)) 76 | 77 | return axs 78 | 79 | # create pandas daframe from experimental data 80 | def getData(dataDir): 81 | 82 | experiments = [] 83 | for (dirpath, dirnames, filenames) in os.walk(dataDir): 84 | if (dirnames is None) or (not filenames): 85 | continue 86 | 87 | d = {} 88 | paramsList = str.split(str.split(dirpath,'/')[-2], ',') 89 | d = dict(map(lambda x: str.split(x, '='), filter(lambda x: '=' in x , paramsList))) 90 | 91 | logs = list(filter(lambda x: x == 'training.log' or x == 'loss.log' ,filenames)) 92 | resultLines = [readlines(path.join(dirpath, log)) for log in logs] 93 | resultLines = list(filter(lambda x: x[0] != x[1], resultLines)) 94 | for resultLine in resultLines: 95 | d.update( {str(el): tonumber(val) for el,val in zip(str.split(resultLine[0].rstrip(),'\t'), str.split(resultLine[1].rstrip(), '\t'))} ) 96 | 97 | d['path'] = dirpath 98 | experiments.append(d) 99 | 100 | return pd.DataFrame(experiments) 101 | 102 | def main(): 103 | parser = argparse.ArgumentParser(description='Creates scattered plots of hyparams vs scores') 104 | parser.add_argument('--dir', help='Specifies the dir to look into') 105 | args = parser.parse_args() 106 | 107 | data = getData(args.dir) 108 | 109 | # default config values dont show up on the files 110 | replaceNan(data,['LR','weightDecay','depth'],[0.1, 1e-4, 34]) 111 | 112 | # filter useful columns only 113 | colsPerEpoch = ['Training Acc Error','Testing Acc Error','Training Recall Error','Testing Recall Error','Training F1 Error','Validation F1 Error', 'Training Loss', 'Validation Loss'] 114 | colsPerIter = ['Loss'] 115 | colsParam = ['LR', 'weightDecay', 'Epoch', 'dataset'] 116 | # colsParam = ['LR', 'depth', 'weightDecay', 'Epoch', 'dataset'] 117 | data = (data[colsPerEpoch + colsPerIter + colsParam]).dropna(axis=0) 118 | 119 | # filter out unwanted datasets) 120 | data = data[ data['dataset'] == 'cdnet' ].reset_index() 121 | data = data[ data['Epoch'] == 1 ].reset_index() 122 | 123 | #plot every train/test error pair (acc/recall/f1/loss) scattered in log(LR) vs log(weightDecay) 124 | #plot every train/test error pair vs depth 125 | x_scatter = np.log10(data.loc[:,'LR'].astype(np.float64)) 126 | y_scatter = np.log10(data['weightDecay'].astype(np.float64)) 127 | 128 | for col in xrange(0,len(colsPerEpoch),2): 129 | fig = plt.figure(1) 130 | dataName = colsPerEpoch[col].split(' ') 131 | # use val and train errors as color intensity in scattered plot 132 | colors = data.loc[:, [colsPerEpoch[col],colsPerEpoch[col+1]] ].astype(np.float64) 133 | plotScattered(x_scatter, y_scatter, colors, fig, 'log learning rate', 'log reg strength') 134 | fig.suptitle('CDNet ' + dataName[1], fontsize=14) 135 | # plt.show() 136 | plt.savefig('_'.join(dataName[1:]) + '-param_space.eps', format='eps', dpi=1000) 137 | plt.savefig('_'.join(dataName[1:]) + '-param_space.png', format='png') 138 | plt.clf() 139 | # fig = plt.figure() 140 | # plt.plot(np.arange(len(data.loc[:, 'Loss'])),data.loc[:, 'Loss'].astype(np.float64)) 141 | # plt.show() 142 | 143 | if __name__ == '__main__': 144 | main() 145 | -------------------------------------------------------------------------------- /train-val-split.sh: -------------------------------------------------------------------------------- 1 | # Script to partition the 'raw' cdnet dataset in 3 dirs: background, train, val. 2 | # The ratio variable defines the size of the train set relative to the whole 3 | # dataset. Ratio should be in the interval [0,100] 4 | 5 | dataset=$1 6 | destination=$2 7 | ratio=$3 8 | 9 | for videoType in $dataset/*; do 10 | videoTypeName=${videoType##*/} 11 | for video in $videoType/*; do 12 | videoName=${video##*/} 13 | 14 | mkdir -p $destination/train/$videoTypeName/$videoName/input/ 15 | mkdir -p $destination/val/$videoTypeName/$videoName/input/ 16 | 17 | mkdir -p $destination/train/$videoTypeName/$videoName/groundtruth/ 18 | mkdir -p $destination/val/$videoTypeName/$videoName/groundtruth/ 19 | 20 | mkdir -p $destination/background/$videoTypeName/$videoName/ROI/ 21 | mkdir -p $destination/background/$videoTypeName/$videoName/reference/ 22 | 23 | ROI=$(<$video/temporalROI.txt) 24 | eval x=($ROI) 25 | begin=${x[0]} 26 | end=${x[1]} 27 | 28 | if [[ ("$videoTypeName" = "badWeather") || ("$videoTypeName" = "PTZ") || ("$videoTypeName" = "turbulence") || ("$videoTypeName" = "nightVideos") || ("$videoTypeName" = "lowFramerate") ]]; then 29 | (( end = ($end + $begin)/2 - 1 )) 30 | echo "fixed roi for "$videoName 31 | fi 32 | 33 | (( endTrain = $begin + ( $ratio*( $end - $begin ) + 50 )/100 )) 34 | (( beginTest = $endTrain + 1)) 35 | 36 | echo $videoTypeName"/"$videoName 37 | echo "begin - "$begin 38 | echo "endTrain - "$endTrain 39 | echo "beginTest - "$beginTest 40 | echo "end - "$end 41 | echo -e "\n" 42 | 43 | cp $video/ROI.jpg $destination/background/$videoTypeName/$videoName/ROI/ 44 | 45 | for (( frame=1; frame<=150; frame++)); do 46 | printf -v name "%06d" $frame 47 | cp $video/input/in${name}.jpg $destination/background/$videoTypeName/$videoName/reference/ 48 | done 49 | 50 | for (( frame=$begin; frame<=$endTrain; frame++)); do 51 | printf -v name "%06d" $frame 52 | cp $video/input/in${name}.jpg $destination/train/$videoTypeName/$videoName/input/ 53 | cp $video/groundtruth/gt${name}.png $destination/train/$videoTypeName/$videoName/groundtruth/ 54 | done 55 | 56 | for (( frame=$beginTest; frame<=$end; frame++)); do 57 | printf -v name "%06d" $frame 58 | cp $video/input/in${name}.jpg $destination/val/$videoTypeName/$videoName/input/ 59 | cp $video/groundtruth/gt${name}.png $destination/val/$videoTypeName/$videoName/groundtruth/ 60 | done 61 | 62 | done 63 | done 64 | --------------------------------------------------------------------------------