├── Caffe_Models ├── README.md ├── bvlc_alexnet │ └── deploy.prototxt ├── googlenet │ └── deploy.prototxt ├── ilsvrc_2012_mean.npy └── vgg network │ └── VGG_ILSVRC_16_layers_deploy.prototxt ├── LICENSE ├── README.md ├── data ├── ilsvrc_2012_labels.txt ├── n01855672_8202.jpg ├── n02488291_1177.jpg └── n02917067_1599.jpg ├── experiments_imagenet.py ├── prediction_difference_analysis.py ├── sensitivity_analysis_caffe.py ├── utils_classifiers.py ├── utils_data.py ├── utils_sampling.py └── utils_visualise.py /Caffe_Models/README.md: -------------------------------------------------------------------------------- 1 | The code currently supports three pretrained caffe models. You need to download the caffemodel file if you wish to run experiments for one of them. 2 | 3 | ### ALEXNET 4 | 5 | Download the caffemodel file from the following link and save as 'bvlc_alexnet.caffemodel' in the folder './bvlx_alexnet' 6 | 7 | https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet 8 | 9 | ### GOOGLENET 10 | 11 | Download the caffemodel file from the following link and save as 'bvlc_googlenet.caffemodel' in the folder './googlenet' and call it 12 | 13 | https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet 14 | 15 | ### VGG (16 layers) 16 | 17 | Download the caffemodel file from the following link and save as 'VGG_ILSVRC_16_layers.caffemodel' in the folder './vgg network' 18 | 19 | https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md 20 | -------------------------------------------------------------------------------- /Caffe_Models/bvlc_alexnet/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "AlexNet" 2 | input: "data" 3 | input_dim: 20 4 | input_dim: 3 5 | input_dim: 227 6 | input_dim: 227 7 | force_backward: true 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 0 20 | } 21 | convolution_param { 22 | num_output: 96 23 | kernel_size: 11 24 | stride: 4 25 | } 26 | } 27 | layer { 28 | name: "relu1" 29 | type: "ReLU" 30 | bottom: "conv1" 31 | top: "conv1" 32 | } 33 | layer { 34 | name: "norm1" 35 | type: "LRN" 36 | bottom: "conv1" 37 | top: "norm1" 38 | lrn_param { 39 | local_size: 5 40 | alpha: 0.0001 41 | beta: 0.75 42 | } 43 | } 44 | layer { 45 | name: "pool1" 46 | type: "Pooling" 47 | bottom: "norm1" 48 | top: "pool1" 49 | pooling_param { 50 | pool: MAX 51 | kernel_size: 3 52 | stride: 2 53 | } 54 | } 55 | layer { 56 | name: "conv2" 57 | type: "Convolution" 58 | bottom: "pool1" 59 | top: "conv2" 60 | param { 61 | lr_mult: 1 62 | decay_mult: 1 63 | } 64 | param { 65 | lr_mult: 2 66 | decay_mult: 0 67 | } 68 | convolution_param { 69 | num_output: 256 70 | pad: 2 71 | kernel_size: 5 72 | group: 2 73 | } 74 | } 75 | layer { 76 | name: "relu2" 77 | type: "ReLU" 78 | bottom: "conv2" 79 | top: "conv2" 80 | } 81 | layer { 82 | name: "norm2" 83 | type: "LRN" 84 | bottom: "conv2" 85 | top: "norm2" 86 | lrn_param { 87 | local_size: 5 88 | alpha: 0.0001 89 | beta: 0.75 90 | } 91 | } 92 | layer { 93 | name: "pool2" 94 | type: "Pooling" 95 | bottom: "norm2" 96 | top: "pool2" 97 | pooling_param { 98 | pool: MAX 99 | kernel_size: 3 100 | stride: 2 101 | } 102 | } 103 | layer { 104 | name: "conv3" 105 | type: "Convolution" 106 | bottom: "pool2" 107 | top: "conv3" 108 | param { 109 | lr_mult: 1 110 | decay_mult: 1 111 | } 112 | param { 113 | lr_mult: 2 114 | decay_mult: 0 115 | } 116 | convolution_param { 117 | num_output: 384 118 | pad: 1 119 | kernel_size: 3 120 | } 121 | } 122 | layer { 123 | name: "relu3" 124 | type: "ReLU" 125 | bottom: "conv3" 126 | top: "conv3" 127 | } 128 | layer { 129 | name: "conv4" 130 | type: "Convolution" 131 | bottom: "conv3" 132 | top: "conv4" 133 | param { 134 | lr_mult: 1 135 | decay_mult: 1 136 | } 137 | param { 138 | lr_mult: 2 139 | decay_mult: 0 140 | } 141 | convolution_param { 142 | num_output: 384 143 | pad: 1 144 | kernel_size: 3 145 | group: 2 146 | } 147 | } 148 | layer { 149 | name: "relu4" 150 | type: "ReLU" 151 | bottom: "conv4" 152 | top: "conv4" 153 | } 154 | layer { 155 | name: "conv5" 156 | type: "Convolution" 157 | bottom: "conv4" 158 | top: "conv5" 159 | param { 160 | lr_mult: 1 161 | decay_mult: 1 162 | } 163 | param { 164 | lr_mult: 2 165 | decay_mult: 0 166 | } 167 | convolution_param { 168 | num_output: 256 169 | pad: 1 170 | kernel_size: 3 171 | group: 2 172 | } 173 | } 174 | layer { 175 | name: "relu5" 176 | type: "ReLU" 177 | bottom: "conv5" 178 | top: "conv5" 179 | } 180 | layer { 181 | name: "pool5" 182 | type: "Pooling" 183 | bottom: "conv5" 184 | top: "pool5" 185 | pooling_param { 186 | pool: MAX 187 | kernel_size: 3 188 | stride: 2 189 | } 190 | } 191 | layer { 192 | name: "fc6" 193 | type: "InnerProduct" 194 | bottom: "pool5" 195 | top: "fc6" 196 | param { 197 | lr_mult: 1 198 | decay_mult: 1 199 | } 200 | param { 201 | lr_mult: 2 202 | decay_mult: 0 203 | } 204 | inner_product_param { 205 | num_output: 4096 206 | } 207 | } 208 | layer { 209 | name: "relu6" 210 | type: "ReLU" 211 | bottom: "fc6" 212 | top: "fc6" 213 | } 214 | layer { 215 | name: "drop6" 216 | type: "Dropout" 217 | bottom: "fc6" 218 | top: "fc6" 219 | dropout_param { 220 | dropout_ratio: 0.5 221 | } 222 | } 223 | layer { 224 | name: "fc7" 225 | type: "InnerProduct" 226 | bottom: "fc6" 227 | top: "fc7" 228 | param { 229 | lr_mult: 1 230 | decay_mult: 1 231 | } 232 | param { 233 | lr_mult: 2 234 | decay_mult: 0 235 | } 236 | inner_product_param { 237 | num_output: 4096 238 | } 239 | } 240 | layer { 241 | name: "relu7" 242 | type: "ReLU" 243 | bottom: "fc7" 244 | top: "fc7" 245 | } 246 | layer { 247 | name: "drop7" 248 | type: "Dropout" 249 | bottom: "fc7" 250 | top: "fc7" 251 | dropout_param { 252 | dropout_ratio: 0.5 253 | } 254 | } 255 | layer { 256 | name: "fc8" 257 | type: "InnerProduct" 258 | bottom: "fc7" 259 | top: "fc8" 260 | param { 261 | lr_mult: 1 262 | decay_mult: 1 263 | } 264 | param { 265 | lr_mult: 2 266 | decay_mult: 0 267 | } 268 | inner_product_param { 269 | num_output: 1000 270 | } 271 | } 272 | layer { 273 | name: "prob" 274 | type: "Softmax" 275 | bottom: "fc8" 276 | top: "prob" 277 | } 278 | -------------------------------------------------------------------------------- /Caffe_Models/googlenet/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "GoogleNet" 2 | input: "data" 3 | input_dim: 20 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | force_backward: true 8 | layer { 9 | name: "conv1/7x7_s2" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1/7x7_s2" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 0 20 | } 21 | convolution_param { 22 | num_output: 64 23 | pad: 3 24 | kernel_size: 7 25 | stride: 2 26 | weight_filler { 27 | type: "xavier" 28 | std: 0.1 29 | } 30 | bias_filler { 31 | type: "constant" 32 | value: 0.2 33 | } 34 | } 35 | } 36 | layer { 37 | name: "conv1/relu_7x7" 38 | type: "ReLU" 39 | bottom: "conv1/7x7_s2" 40 | top: "conv1/7x7_s2" 41 | } 42 | layer { 43 | name: "pool1/3x3_s2" 44 | type: "Pooling" 45 | bottom: "conv1/7x7_s2" 46 | top: "pool1/3x3_s2" 47 | pooling_param { 48 | pool: MAX 49 | kernel_size: 3 50 | stride: 2 51 | } 52 | } 53 | layer { 54 | name: "pool1/norm1" 55 | type: "LRN" 56 | bottom: "pool1/3x3_s2" 57 | top: "pool1/norm1" 58 | lrn_param { 59 | local_size: 5 60 | alpha: 0.0001 61 | beta: 0.75 62 | } 63 | } 64 | layer { 65 | name: "conv2/3x3_reduce" 66 | type: "Convolution" 67 | bottom: "pool1/norm1" 68 | top: "conv2/3x3_reduce" 69 | param { 70 | lr_mult: 1 71 | decay_mult: 1 72 | } 73 | param { 74 | lr_mult: 2 75 | decay_mult: 0 76 | } 77 | convolution_param { 78 | num_output: 64 79 | kernel_size: 1 80 | weight_filler { 81 | type: "xavier" 82 | std: 0.1 83 | } 84 | bias_filler { 85 | type: "constant" 86 | value: 0.2 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv2/relu_3x3_reduce" 92 | type: "ReLU" 93 | bottom: "conv2/3x3_reduce" 94 | top: "conv2/3x3_reduce" 95 | } 96 | layer { 97 | name: "conv2/3x3" 98 | type: "Convolution" 99 | bottom: "conv2/3x3_reduce" 100 | top: "conv2/3x3" 101 | param { 102 | lr_mult: 1 103 | decay_mult: 1 104 | } 105 | param { 106 | lr_mult: 2 107 | decay_mult: 0 108 | } 109 | convolution_param { 110 | num_output: 192 111 | pad: 1 112 | kernel_size: 3 113 | weight_filler { 114 | type: "xavier" 115 | std: 0.03 116 | } 117 | bias_filler { 118 | type: "constant" 119 | value: 0.2 120 | } 121 | } 122 | } 123 | layer { 124 | name: "conv2/relu_3x3" 125 | type: "ReLU" 126 | bottom: "conv2/3x3" 127 | top: "conv2/3x3" 128 | } 129 | layer { 130 | name: "conv2/norm2" 131 | type: "LRN" 132 | bottom: "conv2/3x3" 133 | top: "conv2/norm2" 134 | lrn_param { 135 | local_size: 5 136 | alpha: 0.0001 137 | beta: 0.75 138 | } 139 | } 140 | layer { 141 | name: "pool2/3x3_s2" 142 | type: "Pooling" 143 | bottom: "conv2/norm2" 144 | top: "pool2/3x3_s2" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "inception_3a/1x1" 153 | type: "Convolution" 154 | bottom: "pool2/3x3_s2" 155 | top: "inception_3a/1x1" 156 | param { 157 | lr_mult: 1 158 | decay_mult: 1 159 | } 160 | param { 161 | lr_mult: 2 162 | decay_mult: 0 163 | } 164 | convolution_param { 165 | num_output: 64 166 | kernel_size: 1 167 | weight_filler { 168 | type: "xavier" 169 | std: 0.03 170 | } 171 | bias_filler { 172 | type: "constant" 173 | value: 0.2 174 | } 175 | } 176 | } 177 | layer { 178 | name: "inception_3a/relu_1x1" 179 | type: "ReLU" 180 | bottom: "inception_3a/1x1" 181 | top: "inception_3a/1x1" 182 | } 183 | layer { 184 | name: "inception_3a/3x3_reduce" 185 | type: "Convolution" 186 | bottom: "pool2/3x3_s2" 187 | top: "inception_3a/3x3_reduce" 188 | param { 189 | lr_mult: 1 190 | decay_mult: 1 191 | } 192 | param { 193 | lr_mult: 2 194 | decay_mult: 0 195 | } 196 | convolution_param { 197 | num_output: 96 198 | kernel_size: 1 199 | weight_filler { 200 | type: "xavier" 201 | std: 0.09 202 | } 203 | bias_filler { 204 | type: "constant" 205 | value: 0.2 206 | } 207 | } 208 | } 209 | layer { 210 | name: "inception_3a/relu_3x3_reduce" 211 | type: "ReLU" 212 | bottom: "inception_3a/3x3_reduce" 213 | top: "inception_3a/3x3_reduce" 214 | } 215 | layer { 216 | name: "inception_3a/3x3" 217 | type: "Convolution" 218 | bottom: "inception_3a/3x3_reduce" 219 | top: "inception_3a/3x3" 220 | param { 221 | lr_mult: 1 222 | decay_mult: 1 223 | } 224 | param { 225 | lr_mult: 2 226 | decay_mult: 0 227 | } 228 | convolution_param { 229 | num_output: 128 230 | pad: 1 231 | kernel_size: 3 232 | weight_filler { 233 | type: "xavier" 234 | std: 0.03 235 | } 236 | bias_filler { 237 | type: "constant" 238 | value: 0.2 239 | } 240 | } 241 | } 242 | layer { 243 | name: "inception_3a/relu_3x3" 244 | type: "ReLU" 245 | bottom: "inception_3a/3x3" 246 | top: "inception_3a/3x3" 247 | } 248 | layer { 249 | name: "inception_3a/5x5_reduce" 250 | type: "Convolution" 251 | bottom: "pool2/3x3_s2" 252 | top: "inception_3a/5x5_reduce" 253 | param { 254 | lr_mult: 1 255 | decay_mult: 1 256 | } 257 | param { 258 | lr_mult: 2 259 | decay_mult: 0 260 | } 261 | convolution_param { 262 | num_output: 16 263 | kernel_size: 1 264 | weight_filler { 265 | type: "xavier" 266 | std: 0.2 267 | } 268 | bias_filler { 269 | type: "constant" 270 | value: 0.2 271 | } 272 | } 273 | } 274 | layer { 275 | name: "inception_3a/relu_5x5_reduce" 276 | type: "ReLU" 277 | bottom: "inception_3a/5x5_reduce" 278 | top: "inception_3a/5x5_reduce" 279 | } 280 | layer { 281 | name: "inception_3a/5x5" 282 | type: "Convolution" 283 | bottom: "inception_3a/5x5_reduce" 284 | top: "inception_3a/5x5" 285 | param { 286 | lr_mult: 1 287 | decay_mult: 1 288 | } 289 | param { 290 | lr_mult: 2 291 | decay_mult: 0 292 | } 293 | convolution_param { 294 | num_output: 32 295 | pad: 2 296 | kernel_size: 5 297 | weight_filler { 298 | type: "xavier" 299 | std: 0.03 300 | } 301 | bias_filler { 302 | type: "constant" 303 | value: 0.2 304 | } 305 | } 306 | } 307 | layer { 308 | name: "inception_3a/relu_5x5" 309 | type: "ReLU" 310 | bottom: "inception_3a/5x5" 311 | top: "inception_3a/5x5" 312 | } 313 | layer { 314 | name: "inception_3a/pool" 315 | type: "Pooling" 316 | bottom: "pool2/3x3_s2" 317 | top: "inception_3a/pool" 318 | pooling_param { 319 | pool: MAX 320 | kernel_size: 3 321 | stride: 1 322 | pad: 1 323 | } 324 | } 325 | layer { 326 | name: "inception_3a/pool_proj" 327 | type: "Convolution" 328 | bottom: "inception_3a/pool" 329 | top: "inception_3a/pool_proj" 330 | param { 331 | lr_mult: 1 332 | decay_mult: 1 333 | } 334 | param { 335 | lr_mult: 2 336 | decay_mult: 0 337 | } 338 | convolution_param { 339 | num_output: 32 340 | kernel_size: 1 341 | weight_filler { 342 | type: "xavier" 343 | std: 0.1 344 | } 345 | bias_filler { 346 | type: "constant" 347 | value: 0.2 348 | } 349 | } 350 | } 351 | layer { 352 | name: "inception_3a/relu_pool_proj" 353 | type: "ReLU" 354 | bottom: "inception_3a/pool_proj" 355 | top: "inception_3a/pool_proj" 356 | } 357 | layer { 358 | name: "inception_3a/output" 359 | type: "Concat" 360 | bottom: "inception_3a/1x1" 361 | bottom: "inception_3a/3x3" 362 | bottom: "inception_3a/5x5" 363 | bottom: "inception_3a/pool_proj" 364 | top: "inception_3a/output" 365 | } 366 | layer { 367 | name: "inception_3b/1x1" 368 | type: "Convolution" 369 | bottom: "inception_3a/output" 370 | top: "inception_3b/1x1" 371 | param { 372 | lr_mult: 1 373 | decay_mult: 1 374 | } 375 | param { 376 | lr_mult: 2 377 | decay_mult: 0 378 | } 379 | convolution_param { 380 | num_output: 128 381 | kernel_size: 1 382 | weight_filler { 383 | type: "xavier" 384 | std: 0.03 385 | } 386 | bias_filler { 387 | type: "constant" 388 | value: 0.2 389 | } 390 | } 391 | } 392 | layer { 393 | name: "inception_3b/relu_1x1" 394 | type: "ReLU" 395 | bottom: "inception_3b/1x1" 396 | top: "inception_3b/1x1" 397 | } 398 | layer { 399 | name: "inception_3b/3x3_reduce" 400 | type: "Convolution" 401 | bottom: "inception_3a/output" 402 | top: "inception_3b/3x3_reduce" 403 | param { 404 | lr_mult: 1 405 | decay_mult: 1 406 | } 407 | param { 408 | lr_mult: 2 409 | decay_mult: 0 410 | } 411 | convolution_param { 412 | num_output: 128 413 | kernel_size: 1 414 | weight_filler { 415 | type: "xavier" 416 | std: 0.09 417 | } 418 | bias_filler { 419 | type: "constant" 420 | value: 0.2 421 | } 422 | } 423 | } 424 | layer { 425 | name: "inception_3b/relu_3x3_reduce" 426 | type: "ReLU" 427 | bottom: "inception_3b/3x3_reduce" 428 | top: "inception_3b/3x3_reduce" 429 | } 430 | layer { 431 | name: "inception_3b/3x3" 432 | type: "Convolution" 433 | bottom: "inception_3b/3x3_reduce" 434 | top: "inception_3b/3x3" 435 | param { 436 | lr_mult: 1 437 | decay_mult: 1 438 | } 439 | param { 440 | lr_mult: 2 441 | decay_mult: 0 442 | } 443 | convolution_param { 444 | num_output: 192 445 | pad: 1 446 | kernel_size: 3 447 | weight_filler { 448 | type: "xavier" 449 | std: 0.03 450 | } 451 | bias_filler { 452 | type: "constant" 453 | value: 0.2 454 | } 455 | } 456 | } 457 | layer { 458 | name: "inception_3b/relu_3x3" 459 | type: "ReLU" 460 | bottom: "inception_3b/3x3" 461 | top: "inception_3b/3x3" 462 | } 463 | layer { 464 | name: "inception_3b/5x5_reduce" 465 | type: "Convolution" 466 | bottom: "inception_3a/output" 467 | top: "inception_3b/5x5_reduce" 468 | param { 469 | lr_mult: 1 470 | decay_mult: 1 471 | } 472 | param { 473 | lr_mult: 2 474 | decay_mult: 0 475 | } 476 | convolution_param { 477 | num_output: 32 478 | kernel_size: 1 479 | weight_filler { 480 | type: "xavier" 481 | std: 0.2 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.2 486 | } 487 | } 488 | } 489 | layer { 490 | name: "inception_3b/relu_5x5_reduce" 491 | type: "ReLU" 492 | bottom: "inception_3b/5x5_reduce" 493 | top: "inception_3b/5x5_reduce" 494 | } 495 | layer { 496 | name: "inception_3b/5x5" 497 | type: "Convolution" 498 | bottom: "inception_3b/5x5_reduce" 499 | top: "inception_3b/5x5" 500 | param { 501 | lr_mult: 1 502 | decay_mult: 1 503 | } 504 | param { 505 | lr_mult: 2 506 | decay_mult: 0 507 | } 508 | convolution_param { 509 | num_output: 96 510 | pad: 2 511 | kernel_size: 5 512 | weight_filler { 513 | type: "xavier" 514 | std: 0.03 515 | } 516 | bias_filler { 517 | type: "constant" 518 | value: 0.2 519 | } 520 | } 521 | } 522 | layer { 523 | name: "inception_3b/relu_5x5" 524 | type: "ReLU" 525 | bottom: "inception_3b/5x5" 526 | top: "inception_3b/5x5" 527 | } 528 | layer { 529 | name: "inception_3b/pool" 530 | type: "Pooling" 531 | bottom: "inception_3a/output" 532 | top: "inception_3b/pool" 533 | pooling_param { 534 | pool: MAX 535 | kernel_size: 3 536 | stride: 1 537 | pad: 1 538 | } 539 | } 540 | layer { 541 | name: "inception_3b/pool_proj" 542 | type: "Convolution" 543 | bottom: "inception_3b/pool" 544 | top: "inception_3b/pool_proj" 545 | param { 546 | lr_mult: 1 547 | decay_mult: 1 548 | } 549 | param { 550 | lr_mult: 2 551 | decay_mult: 0 552 | } 553 | convolution_param { 554 | num_output: 64 555 | kernel_size: 1 556 | weight_filler { 557 | type: "xavier" 558 | std: 0.1 559 | } 560 | bias_filler { 561 | type: "constant" 562 | value: 0.2 563 | } 564 | } 565 | } 566 | layer { 567 | name: "inception_3b/relu_pool_proj" 568 | type: "ReLU" 569 | bottom: "inception_3b/pool_proj" 570 | top: "inception_3b/pool_proj" 571 | } 572 | layer { 573 | name: "inception_3b/output" 574 | type: "Concat" 575 | bottom: "inception_3b/1x1" 576 | bottom: "inception_3b/3x3" 577 | bottom: "inception_3b/5x5" 578 | bottom: "inception_3b/pool_proj" 579 | top: "inception_3b/output" 580 | } 581 | layer { 582 | name: "pool3/3x3_s2" 583 | type: "Pooling" 584 | bottom: "inception_3b/output" 585 | top: "pool3/3x3_s2" 586 | pooling_param { 587 | pool: MAX 588 | kernel_size: 3 589 | stride: 2 590 | } 591 | } 592 | layer { 593 | name: "inception_4a/1x1" 594 | type: "Convolution" 595 | bottom: "pool3/3x3_s2" 596 | top: "inception_4a/1x1" 597 | param { 598 | lr_mult: 1 599 | decay_mult: 1 600 | } 601 | param { 602 | lr_mult: 2 603 | decay_mult: 0 604 | } 605 | convolution_param { 606 | num_output: 192 607 | kernel_size: 1 608 | weight_filler { 609 | type: "xavier" 610 | std: 0.03 611 | } 612 | bias_filler { 613 | type: "constant" 614 | value: 0.2 615 | } 616 | } 617 | } 618 | layer { 619 | name: "inception_4a/relu_1x1" 620 | type: "ReLU" 621 | bottom: "inception_4a/1x1" 622 | top: "inception_4a/1x1" 623 | } 624 | layer { 625 | name: "inception_4a/3x3_reduce" 626 | type: "Convolution" 627 | bottom: "pool3/3x3_s2" 628 | top: "inception_4a/3x3_reduce" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | param { 634 | lr_mult: 2 635 | decay_mult: 0 636 | } 637 | convolution_param { 638 | num_output: 96 639 | kernel_size: 1 640 | weight_filler { 641 | type: "xavier" 642 | std: 0.09 643 | } 644 | bias_filler { 645 | type: "constant" 646 | value: 0.2 647 | } 648 | } 649 | } 650 | layer { 651 | name: "inception_4a/relu_3x3_reduce" 652 | type: "ReLU" 653 | bottom: "inception_4a/3x3_reduce" 654 | top: "inception_4a/3x3_reduce" 655 | } 656 | layer { 657 | name: "inception_4a/3x3" 658 | type: "Convolution" 659 | bottom: "inception_4a/3x3_reduce" 660 | top: "inception_4a/3x3" 661 | param { 662 | lr_mult: 1 663 | decay_mult: 1 664 | } 665 | param { 666 | lr_mult: 2 667 | decay_mult: 0 668 | } 669 | convolution_param { 670 | num_output: 208 671 | pad: 1 672 | kernel_size: 3 673 | weight_filler { 674 | type: "xavier" 675 | std: 0.03 676 | } 677 | bias_filler { 678 | type: "constant" 679 | value: 0.2 680 | } 681 | } 682 | } 683 | layer { 684 | name: "inception_4a/relu_3x3" 685 | type: "ReLU" 686 | bottom: "inception_4a/3x3" 687 | top: "inception_4a/3x3" 688 | } 689 | layer { 690 | name: "inception_4a/5x5_reduce" 691 | type: "Convolution" 692 | bottom: "pool3/3x3_s2" 693 | top: "inception_4a/5x5_reduce" 694 | param { 695 | lr_mult: 1 696 | decay_mult: 1 697 | } 698 | param { 699 | lr_mult: 2 700 | decay_mult: 0 701 | } 702 | convolution_param { 703 | num_output: 16 704 | kernel_size: 1 705 | weight_filler { 706 | type: "xavier" 707 | std: 0.2 708 | } 709 | bias_filler { 710 | type: "constant" 711 | value: 0.2 712 | } 713 | } 714 | } 715 | layer { 716 | name: "inception_4a/relu_5x5_reduce" 717 | type: "ReLU" 718 | bottom: "inception_4a/5x5_reduce" 719 | top: "inception_4a/5x5_reduce" 720 | } 721 | layer { 722 | name: "inception_4a/5x5" 723 | type: "Convolution" 724 | bottom: "inception_4a/5x5_reduce" 725 | top: "inception_4a/5x5" 726 | param { 727 | lr_mult: 1 728 | decay_mult: 1 729 | } 730 | param { 731 | lr_mult: 2 732 | decay_mult: 0 733 | } 734 | convolution_param { 735 | num_output: 48 736 | pad: 2 737 | kernel_size: 5 738 | weight_filler { 739 | type: "xavier" 740 | std: 0.03 741 | } 742 | bias_filler { 743 | type: "constant" 744 | value: 0.2 745 | } 746 | } 747 | } 748 | layer { 749 | name: "inception_4a/relu_5x5" 750 | type: "ReLU" 751 | bottom: "inception_4a/5x5" 752 | top: "inception_4a/5x5" 753 | } 754 | layer { 755 | name: "inception_4a/pool" 756 | type: "Pooling" 757 | bottom: "pool3/3x3_s2" 758 | top: "inception_4a/pool" 759 | pooling_param { 760 | pool: MAX 761 | kernel_size: 3 762 | stride: 1 763 | pad: 1 764 | } 765 | } 766 | layer { 767 | name: "inception_4a/pool_proj" 768 | type: "Convolution" 769 | bottom: "inception_4a/pool" 770 | top: "inception_4a/pool_proj" 771 | param { 772 | lr_mult: 1 773 | decay_mult: 1 774 | } 775 | param { 776 | lr_mult: 2 777 | decay_mult: 0 778 | } 779 | convolution_param { 780 | num_output: 64 781 | kernel_size: 1 782 | weight_filler { 783 | type: "xavier" 784 | std: 0.1 785 | } 786 | bias_filler { 787 | type: "constant" 788 | value: 0.2 789 | } 790 | } 791 | } 792 | layer { 793 | name: "inception_4a/relu_pool_proj" 794 | type: "ReLU" 795 | bottom: "inception_4a/pool_proj" 796 | top: "inception_4a/pool_proj" 797 | } 798 | layer { 799 | name: "inception_4a/output" 800 | type: "Concat" 801 | bottom: "inception_4a/1x1" 802 | bottom: "inception_4a/3x3" 803 | bottom: "inception_4a/5x5" 804 | bottom: "inception_4a/pool_proj" 805 | top: "inception_4a/output" 806 | } 807 | layer { 808 | name: "inception_4b/1x1" 809 | type: "Convolution" 810 | bottom: "inception_4a/output" 811 | top: "inception_4b/1x1" 812 | param { 813 | lr_mult: 1 814 | decay_mult: 1 815 | } 816 | param { 817 | lr_mult: 2 818 | decay_mult: 0 819 | } 820 | convolution_param { 821 | num_output: 160 822 | kernel_size: 1 823 | weight_filler { 824 | type: "xavier" 825 | std: 0.03 826 | } 827 | bias_filler { 828 | type: "constant" 829 | value: 0.2 830 | } 831 | } 832 | } 833 | layer { 834 | name: "inception_4b/relu_1x1" 835 | type: "ReLU" 836 | bottom: "inception_4b/1x1" 837 | top: "inception_4b/1x1" 838 | } 839 | layer { 840 | name: "inception_4b/3x3_reduce" 841 | type: "Convolution" 842 | bottom: "inception_4a/output" 843 | top: "inception_4b/3x3_reduce" 844 | param { 845 | lr_mult: 1 846 | decay_mult: 1 847 | } 848 | param { 849 | lr_mult: 2 850 | decay_mult: 0 851 | } 852 | convolution_param { 853 | num_output: 112 854 | kernel_size: 1 855 | weight_filler { 856 | type: "xavier" 857 | std: 0.09 858 | } 859 | bias_filler { 860 | type: "constant" 861 | value: 0.2 862 | } 863 | } 864 | } 865 | layer { 866 | name: "inception_4b/relu_3x3_reduce" 867 | type: "ReLU" 868 | bottom: "inception_4b/3x3_reduce" 869 | top: "inception_4b/3x3_reduce" 870 | } 871 | layer { 872 | name: "inception_4b/3x3" 873 | type: "Convolution" 874 | bottom: "inception_4b/3x3_reduce" 875 | top: "inception_4b/3x3" 876 | param { 877 | lr_mult: 1 878 | decay_mult: 1 879 | } 880 | param { 881 | lr_mult: 2 882 | decay_mult: 0 883 | } 884 | convolution_param { 885 | num_output: 224 886 | pad: 1 887 | kernel_size: 3 888 | weight_filler { 889 | type: "xavier" 890 | std: 0.03 891 | } 892 | bias_filler { 893 | type: "constant" 894 | value: 0.2 895 | } 896 | } 897 | } 898 | layer { 899 | name: "inception_4b/relu_3x3" 900 | type: "ReLU" 901 | bottom: "inception_4b/3x3" 902 | top: "inception_4b/3x3" 903 | } 904 | layer { 905 | name: "inception_4b/5x5_reduce" 906 | type: "Convolution" 907 | bottom: "inception_4a/output" 908 | top: "inception_4b/5x5_reduce" 909 | param { 910 | lr_mult: 1 911 | decay_mult: 1 912 | } 913 | param { 914 | lr_mult: 2 915 | decay_mult: 0 916 | } 917 | convolution_param { 918 | num_output: 24 919 | kernel_size: 1 920 | weight_filler { 921 | type: "xavier" 922 | std: 0.2 923 | } 924 | bias_filler { 925 | type: "constant" 926 | value: 0.2 927 | } 928 | } 929 | } 930 | layer { 931 | name: "inception_4b/relu_5x5_reduce" 932 | type: "ReLU" 933 | bottom: "inception_4b/5x5_reduce" 934 | top: "inception_4b/5x5_reduce" 935 | } 936 | layer { 937 | name: "inception_4b/5x5" 938 | type: "Convolution" 939 | bottom: "inception_4b/5x5_reduce" 940 | top: "inception_4b/5x5" 941 | param { 942 | lr_mult: 1 943 | decay_mult: 1 944 | } 945 | param { 946 | lr_mult: 2 947 | decay_mult: 0 948 | } 949 | convolution_param { 950 | num_output: 64 951 | pad: 2 952 | kernel_size: 5 953 | weight_filler { 954 | type: "xavier" 955 | std: 0.03 956 | } 957 | bias_filler { 958 | type: "constant" 959 | value: 0.2 960 | } 961 | } 962 | } 963 | layer { 964 | name: "inception_4b/relu_5x5" 965 | type: "ReLU" 966 | bottom: "inception_4b/5x5" 967 | top: "inception_4b/5x5" 968 | } 969 | layer { 970 | name: "inception_4b/pool" 971 | type: "Pooling" 972 | bottom: "inception_4a/output" 973 | top: "inception_4b/pool" 974 | pooling_param { 975 | pool: MAX 976 | kernel_size: 3 977 | stride: 1 978 | pad: 1 979 | } 980 | } 981 | layer { 982 | name: "inception_4b/pool_proj" 983 | type: "Convolution" 984 | bottom: "inception_4b/pool" 985 | top: "inception_4b/pool_proj" 986 | param { 987 | lr_mult: 1 988 | decay_mult: 1 989 | } 990 | param { 991 | lr_mult: 2 992 | decay_mult: 0 993 | } 994 | convolution_param { 995 | num_output: 64 996 | kernel_size: 1 997 | weight_filler { 998 | type: "xavier" 999 | std: 0.1 1000 | } 1001 | bias_filler { 1002 | type: "constant" 1003 | value: 0.2 1004 | } 1005 | } 1006 | } 1007 | layer { 1008 | name: "inception_4b/relu_pool_proj" 1009 | type: "ReLU" 1010 | bottom: "inception_4b/pool_proj" 1011 | top: "inception_4b/pool_proj" 1012 | } 1013 | layer { 1014 | name: "inception_4b/output" 1015 | type: "Concat" 1016 | bottom: "inception_4b/1x1" 1017 | bottom: "inception_4b/3x3" 1018 | bottom: "inception_4b/5x5" 1019 | bottom: "inception_4b/pool_proj" 1020 | top: "inception_4b/output" 1021 | } 1022 | layer { 1023 | name: "inception_4c/1x1" 1024 | type: "Convolution" 1025 | bottom: "inception_4b/output" 1026 | top: "inception_4c/1x1" 1027 | param { 1028 | lr_mult: 1 1029 | decay_mult: 1 1030 | } 1031 | param { 1032 | lr_mult: 2 1033 | decay_mult: 0 1034 | } 1035 | convolution_param { 1036 | num_output: 128 1037 | kernel_size: 1 1038 | weight_filler { 1039 | type: "xavier" 1040 | std: 0.03 1041 | } 1042 | bias_filler { 1043 | type: "constant" 1044 | value: 0.2 1045 | } 1046 | } 1047 | } 1048 | layer { 1049 | name: "inception_4c/relu_1x1" 1050 | type: "ReLU" 1051 | bottom: "inception_4c/1x1" 1052 | top: "inception_4c/1x1" 1053 | } 1054 | layer { 1055 | name: "inception_4c/3x3_reduce" 1056 | type: "Convolution" 1057 | bottom: "inception_4b/output" 1058 | top: "inception_4c/3x3_reduce" 1059 | param { 1060 | lr_mult: 1 1061 | decay_mult: 1 1062 | } 1063 | param { 1064 | lr_mult: 2 1065 | decay_mult: 0 1066 | } 1067 | convolution_param { 1068 | num_output: 128 1069 | kernel_size: 1 1070 | weight_filler { 1071 | type: "xavier" 1072 | std: 0.09 1073 | } 1074 | bias_filler { 1075 | type: "constant" 1076 | value: 0.2 1077 | } 1078 | } 1079 | } 1080 | layer { 1081 | name: "inception_4c/relu_3x3_reduce" 1082 | type: "ReLU" 1083 | bottom: "inception_4c/3x3_reduce" 1084 | top: "inception_4c/3x3_reduce" 1085 | } 1086 | layer { 1087 | name: "inception_4c/3x3" 1088 | type: "Convolution" 1089 | bottom: "inception_4c/3x3_reduce" 1090 | top: "inception_4c/3x3" 1091 | param { 1092 | lr_mult: 1 1093 | decay_mult: 1 1094 | } 1095 | param { 1096 | lr_mult: 2 1097 | decay_mult: 0 1098 | } 1099 | convolution_param { 1100 | num_output: 256 1101 | pad: 1 1102 | kernel_size: 3 1103 | weight_filler { 1104 | type: "xavier" 1105 | std: 0.03 1106 | } 1107 | bias_filler { 1108 | type: "constant" 1109 | value: 0.2 1110 | } 1111 | } 1112 | } 1113 | layer { 1114 | name: "inception_4c/relu_3x3" 1115 | type: "ReLU" 1116 | bottom: "inception_4c/3x3" 1117 | top: "inception_4c/3x3" 1118 | } 1119 | layer { 1120 | name: "inception_4c/5x5_reduce" 1121 | type: "Convolution" 1122 | bottom: "inception_4b/output" 1123 | top: "inception_4c/5x5_reduce" 1124 | param { 1125 | lr_mult: 1 1126 | decay_mult: 1 1127 | } 1128 | param { 1129 | lr_mult: 2 1130 | decay_mult: 0 1131 | } 1132 | convolution_param { 1133 | num_output: 24 1134 | kernel_size: 1 1135 | weight_filler { 1136 | type: "xavier" 1137 | std: 0.2 1138 | } 1139 | bias_filler { 1140 | type: "constant" 1141 | value: 0.2 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "inception_4c/relu_5x5_reduce" 1147 | type: "ReLU" 1148 | bottom: "inception_4c/5x5_reduce" 1149 | top: "inception_4c/5x5_reduce" 1150 | } 1151 | layer { 1152 | name: "inception_4c/5x5" 1153 | type: "Convolution" 1154 | bottom: "inception_4c/5x5_reduce" 1155 | top: "inception_4c/5x5" 1156 | param { 1157 | lr_mult: 1 1158 | decay_mult: 1 1159 | } 1160 | param { 1161 | lr_mult: 2 1162 | decay_mult: 0 1163 | } 1164 | convolution_param { 1165 | num_output: 64 1166 | pad: 2 1167 | kernel_size: 5 1168 | weight_filler { 1169 | type: "xavier" 1170 | std: 0.03 1171 | } 1172 | bias_filler { 1173 | type: "constant" 1174 | value: 0.2 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "inception_4c/relu_5x5" 1180 | type: "ReLU" 1181 | bottom: "inception_4c/5x5" 1182 | top: "inception_4c/5x5" 1183 | } 1184 | layer { 1185 | name: "inception_4c/pool" 1186 | type: "Pooling" 1187 | bottom: "inception_4b/output" 1188 | top: "inception_4c/pool" 1189 | pooling_param { 1190 | pool: MAX 1191 | kernel_size: 3 1192 | stride: 1 1193 | pad: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "inception_4c/pool_proj" 1198 | type: "Convolution" 1199 | bottom: "inception_4c/pool" 1200 | top: "inception_4c/pool_proj" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 0 1208 | } 1209 | convolution_param { 1210 | num_output: 64 1211 | kernel_size: 1 1212 | weight_filler { 1213 | type: "xavier" 1214 | std: 0.1 1215 | } 1216 | bias_filler { 1217 | type: "constant" 1218 | value: 0.2 1219 | } 1220 | } 1221 | } 1222 | layer { 1223 | name: "inception_4c/relu_pool_proj" 1224 | type: "ReLU" 1225 | bottom: "inception_4c/pool_proj" 1226 | top: "inception_4c/pool_proj" 1227 | } 1228 | layer { 1229 | name: "inception_4c/output" 1230 | type: "Concat" 1231 | bottom: "inception_4c/1x1" 1232 | bottom: "inception_4c/3x3" 1233 | bottom: "inception_4c/5x5" 1234 | bottom: "inception_4c/pool_proj" 1235 | top: "inception_4c/output" 1236 | } 1237 | layer { 1238 | name: "inception_4d/1x1" 1239 | type: "Convolution" 1240 | bottom: "inception_4c/output" 1241 | top: "inception_4d/1x1" 1242 | param { 1243 | lr_mult: 1 1244 | decay_mult: 1 1245 | } 1246 | param { 1247 | lr_mult: 2 1248 | decay_mult: 0 1249 | } 1250 | convolution_param { 1251 | num_output: 112 1252 | kernel_size: 1 1253 | weight_filler { 1254 | type: "xavier" 1255 | std: 0.03 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0.2 1260 | } 1261 | } 1262 | } 1263 | layer { 1264 | name: "inception_4d/relu_1x1" 1265 | type: "ReLU" 1266 | bottom: "inception_4d/1x1" 1267 | top: "inception_4d/1x1" 1268 | } 1269 | layer { 1270 | name: "inception_4d/3x3_reduce" 1271 | type: "Convolution" 1272 | bottom: "inception_4c/output" 1273 | top: "inception_4d/3x3_reduce" 1274 | param { 1275 | lr_mult: 1 1276 | decay_mult: 1 1277 | } 1278 | param { 1279 | lr_mult: 2 1280 | decay_mult: 0 1281 | } 1282 | convolution_param { 1283 | num_output: 144 1284 | kernel_size: 1 1285 | weight_filler { 1286 | type: "xavier" 1287 | std: 0.09 1288 | } 1289 | bias_filler { 1290 | type: "constant" 1291 | value: 0.2 1292 | } 1293 | } 1294 | } 1295 | layer { 1296 | name: "inception_4d/relu_3x3_reduce" 1297 | type: "ReLU" 1298 | bottom: "inception_4d/3x3_reduce" 1299 | top: "inception_4d/3x3_reduce" 1300 | } 1301 | layer { 1302 | name: "inception_4d/3x3" 1303 | type: "Convolution" 1304 | bottom: "inception_4d/3x3_reduce" 1305 | top: "inception_4d/3x3" 1306 | param { 1307 | lr_mult: 1 1308 | decay_mult: 1 1309 | } 1310 | param { 1311 | lr_mult: 2 1312 | decay_mult: 0 1313 | } 1314 | convolution_param { 1315 | num_output: 288 1316 | pad: 1 1317 | kernel_size: 3 1318 | weight_filler { 1319 | type: "xavier" 1320 | std: 0.03 1321 | } 1322 | bias_filler { 1323 | type: "constant" 1324 | value: 0.2 1325 | } 1326 | } 1327 | } 1328 | layer { 1329 | name: "inception_4d/relu_3x3" 1330 | type: "ReLU" 1331 | bottom: "inception_4d/3x3" 1332 | top: "inception_4d/3x3" 1333 | } 1334 | layer { 1335 | name: "inception_4d/5x5_reduce" 1336 | type: "Convolution" 1337 | bottom: "inception_4c/output" 1338 | top: "inception_4d/5x5_reduce" 1339 | param { 1340 | lr_mult: 1 1341 | decay_mult: 1 1342 | } 1343 | param { 1344 | lr_mult: 2 1345 | decay_mult: 0 1346 | } 1347 | convolution_param { 1348 | num_output: 32 1349 | kernel_size: 1 1350 | weight_filler { 1351 | type: "xavier" 1352 | std: 0.2 1353 | } 1354 | bias_filler { 1355 | type: "constant" 1356 | value: 0.2 1357 | } 1358 | } 1359 | } 1360 | layer { 1361 | name: "inception_4d/relu_5x5_reduce" 1362 | type: "ReLU" 1363 | bottom: "inception_4d/5x5_reduce" 1364 | top: "inception_4d/5x5_reduce" 1365 | } 1366 | layer { 1367 | name: "inception_4d/5x5" 1368 | type: "Convolution" 1369 | bottom: "inception_4d/5x5_reduce" 1370 | top: "inception_4d/5x5" 1371 | param { 1372 | lr_mult: 1 1373 | decay_mult: 1 1374 | } 1375 | param { 1376 | lr_mult: 2 1377 | decay_mult: 0 1378 | } 1379 | convolution_param { 1380 | num_output: 64 1381 | pad: 2 1382 | kernel_size: 5 1383 | weight_filler { 1384 | type: "xavier" 1385 | std: 0.03 1386 | } 1387 | bias_filler { 1388 | type: "constant" 1389 | value: 0.2 1390 | } 1391 | } 1392 | } 1393 | layer { 1394 | name: "inception_4d/relu_5x5" 1395 | type: "ReLU" 1396 | bottom: "inception_4d/5x5" 1397 | top: "inception_4d/5x5" 1398 | } 1399 | layer { 1400 | name: "inception_4d/pool" 1401 | type: "Pooling" 1402 | bottom: "inception_4c/output" 1403 | top: "inception_4d/pool" 1404 | pooling_param { 1405 | pool: MAX 1406 | kernel_size: 3 1407 | stride: 1 1408 | pad: 1 1409 | } 1410 | } 1411 | layer { 1412 | name: "inception_4d/pool_proj" 1413 | type: "Convolution" 1414 | bottom: "inception_4d/pool" 1415 | top: "inception_4d/pool_proj" 1416 | param { 1417 | lr_mult: 1 1418 | decay_mult: 1 1419 | } 1420 | param { 1421 | lr_mult: 2 1422 | decay_mult: 0 1423 | } 1424 | convolution_param { 1425 | num_output: 64 1426 | kernel_size: 1 1427 | weight_filler { 1428 | type: "xavier" 1429 | std: 0.1 1430 | } 1431 | bias_filler { 1432 | type: "constant" 1433 | value: 0.2 1434 | } 1435 | } 1436 | } 1437 | layer { 1438 | name: "inception_4d/relu_pool_proj" 1439 | type: "ReLU" 1440 | bottom: "inception_4d/pool_proj" 1441 | top: "inception_4d/pool_proj" 1442 | } 1443 | layer { 1444 | name: "inception_4d/output" 1445 | type: "Concat" 1446 | bottom: "inception_4d/1x1" 1447 | bottom: "inception_4d/3x3" 1448 | bottom: "inception_4d/5x5" 1449 | bottom: "inception_4d/pool_proj" 1450 | top: "inception_4d/output" 1451 | } 1452 | layer { 1453 | name: "inception_4e/1x1" 1454 | type: "Convolution" 1455 | bottom: "inception_4d/output" 1456 | top: "inception_4e/1x1" 1457 | param { 1458 | lr_mult: 1 1459 | decay_mult: 1 1460 | } 1461 | param { 1462 | lr_mult: 2 1463 | decay_mult: 0 1464 | } 1465 | convolution_param { 1466 | num_output: 256 1467 | kernel_size: 1 1468 | weight_filler { 1469 | type: "xavier" 1470 | std: 0.03 1471 | } 1472 | bias_filler { 1473 | type: "constant" 1474 | value: 0.2 1475 | } 1476 | } 1477 | } 1478 | layer { 1479 | name: "inception_4e/relu_1x1" 1480 | type: "ReLU" 1481 | bottom: "inception_4e/1x1" 1482 | top: "inception_4e/1x1" 1483 | } 1484 | layer { 1485 | name: "inception_4e/3x3_reduce" 1486 | type: "Convolution" 1487 | bottom: "inception_4d/output" 1488 | top: "inception_4e/3x3_reduce" 1489 | param { 1490 | lr_mult: 1 1491 | decay_mult: 1 1492 | } 1493 | param { 1494 | lr_mult: 2 1495 | decay_mult: 0 1496 | } 1497 | convolution_param { 1498 | num_output: 160 1499 | kernel_size: 1 1500 | weight_filler { 1501 | type: "xavier" 1502 | std: 0.09 1503 | } 1504 | bias_filler { 1505 | type: "constant" 1506 | value: 0.2 1507 | } 1508 | } 1509 | } 1510 | layer { 1511 | name: "inception_4e/relu_3x3_reduce" 1512 | type: "ReLU" 1513 | bottom: "inception_4e/3x3_reduce" 1514 | top: "inception_4e/3x3_reduce" 1515 | } 1516 | layer { 1517 | name: "inception_4e/3x3" 1518 | type: "Convolution" 1519 | bottom: "inception_4e/3x3_reduce" 1520 | top: "inception_4e/3x3" 1521 | param { 1522 | lr_mult: 1 1523 | decay_mult: 1 1524 | } 1525 | param { 1526 | lr_mult: 2 1527 | decay_mult: 0 1528 | } 1529 | convolution_param { 1530 | num_output: 320 1531 | pad: 1 1532 | kernel_size: 3 1533 | weight_filler { 1534 | type: "xavier" 1535 | std: 0.03 1536 | } 1537 | bias_filler { 1538 | type: "constant" 1539 | value: 0.2 1540 | } 1541 | } 1542 | } 1543 | layer { 1544 | name: "inception_4e/relu_3x3" 1545 | type: "ReLU" 1546 | bottom: "inception_4e/3x3" 1547 | top: "inception_4e/3x3" 1548 | } 1549 | layer { 1550 | name: "inception_4e/5x5_reduce" 1551 | type: "Convolution" 1552 | bottom: "inception_4d/output" 1553 | top: "inception_4e/5x5_reduce" 1554 | param { 1555 | lr_mult: 1 1556 | decay_mult: 1 1557 | } 1558 | param { 1559 | lr_mult: 2 1560 | decay_mult: 0 1561 | } 1562 | convolution_param { 1563 | num_output: 32 1564 | kernel_size: 1 1565 | weight_filler { 1566 | type: "xavier" 1567 | std: 0.2 1568 | } 1569 | bias_filler { 1570 | type: "constant" 1571 | value: 0.2 1572 | } 1573 | } 1574 | } 1575 | layer { 1576 | name: "inception_4e/relu_5x5_reduce" 1577 | type: "ReLU" 1578 | bottom: "inception_4e/5x5_reduce" 1579 | top: "inception_4e/5x5_reduce" 1580 | } 1581 | layer { 1582 | name: "inception_4e/5x5" 1583 | type: "Convolution" 1584 | bottom: "inception_4e/5x5_reduce" 1585 | top: "inception_4e/5x5" 1586 | param { 1587 | lr_mult: 1 1588 | decay_mult: 1 1589 | } 1590 | param { 1591 | lr_mult: 2 1592 | decay_mult: 0 1593 | } 1594 | convolution_param { 1595 | num_output: 128 1596 | pad: 2 1597 | kernel_size: 5 1598 | weight_filler { 1599 | type: "xavier" 1600 | std: 0.03 1601 | } 1602 | bias_filler { 1603 | type: "constant" 1604 | value: 0.2 1605 | } 1606 | } 1607 | } 1608 | layer { 1609 | name: "inception_4e/relu_5x5" 1610 | type: "ReLU" 1611 | bottom: "inception_4e/5x5" 1612 | top: "inception_4e/5x5" 1613 | } 1614 | layer { 1615 | name: "inception_4e/pool" 1616 | type: "Pooling" 1617 | bottom: "inception_4d/output" 1618 | top: "inception_4e/pool" 1619 | pooling_param { 1620 | pool: MAX 1621 | kernel_size: 3 1622 | stride: 1 1623 | pad: 1 1624 | } 1625 | } 1626 | layer { 1627 | name: "inception_4e/pool_proj" 1628 | type: "Convolution" 1629 | bottom: "inception_4e/pool" 1630 | top: "inception_4e/pool_proj" 1631 | param { 1632 | lr_mult: 1 1633 | decay_mult: 1 1634 | } 1635 | param { 1636 | lr_mult: 2 1637 | decay_mult: 0 1638 | } 1639 | convolution_param { 1640 | num_output: 128 1641 | kernel_size: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | std: 0.1 1645 | } 1646 | bias_filler { 1647 | type: "constant" 1648 | value: 0.2 1649 | } 1650 | } 1651 | } 1652 | layer { 1653 | name: "inception_4e/relu_pool_proj" 1654 | type: "ReLU" 1655 | bottom: "inception_4e/pool_proj" 1656 | top: "inception_4e/pool_proj" 1657 | } 1658 | layer { 1659 | name: "inception_4e/output" 1660 | type: "Concat" 1661 | bottom: "inception_4e/1x1" 1662 | bottom: "inception_4e/3x3" 1663 | bottom: "inception_4e/5x5" 1664 | bottom: "inception_4e/pool_proj" 1665 | top: "inception_4e/output" 1666 | } 1667 | layer { 1668 | name: "pool4/3x3_s2" 1669 | type: "Pooling" 1670 | bottom: "inception_4e/output" 1671 | top: "pool4/3x3_s2" 1672 | pooling_param { 1673 | pool: MAX 1674 | kernel_size: 3 1675 | stride: 2 1676 | } 1677 | } 1678 | layer { 1679 | name: "inception_5a/1x1" 1680 | type: "Convolution" 1681 | bottom: "pool4/3x3_s2" 1682 | top: "inception_5a/1x1" 1683 | param { 1684 | lr_mult: 1 1685 | decay_mult: 1 1686 | } 1687 | param { 1688 | lr_mult: 2 1689 | decay_mult: 0 1690 | } 1691 | convolution_param { 1692 | num_output: 256 1693 | kernel_size: 1 1694 | weight_filler { 1695 | type: "xavier" 1696 | std: 0.03 1697 | } 1698 | bias_filler { 1699 | type: "constant" 1700 | value: 0.2 1701 | } 1702 | } 1703 | } 1704 | layer { 1705 | name: "inception_5a/relu_1x1" 1706 | type: "ReLU" 1707 | bottom: "inception_5a/1x1" 1708 | top: "inception_5a/1x1" 1709 | } 1710 | layer { 1711 | name: "inception_5a/3x3_reduce" 1712 | type: "Convolution" 1713 | bottom: "pool4/3x3_s2" 1714 | top: "inception_5a/3x3_reduce" 1715 | param { 1716 | lr_mult: 1 1717 | decay_mult: 1 1718 | } 1719 | param { 1720 | lr_mult: 2 1721 | decay_mult: 0 1722 | } 1723 | convolution_param { 1724 | num_output: 160 1725 | kernel_size: 1 1726 | weight_filler { 1727 | type: "xavier" 1728 | std: 0.09 1729 | } 1730 | bias_filler { 1731 | type: "constant" 1732 | value: 0.2 1733 | } 1734 | } 1735 | } 1736 | layer { 1737 | name: "inception_5a/relu_3x3_reduce" 1738 | type: "ReLU" 1739 | bottom: "inception_5a/3x3_reduce" 1740 | top: "inception_5a/3x3_reduce" 1741 | } 1742 | layer { 1743 | name: "inception_5a/3x3" 1744 | type: "Convolution" 1745 | bottom: "inception_5a/3x3_reduce" 1746 | top: "inception_5a/3x3" 1747 | param { 1748 | lr_mult: 1 1749 | decay_mult: 1 1750 | } 1751 | param { 1752 | lr_mult: 2 1753 | decay_mult: 0 1754 | } 1755 | convolution_param { 1756 | num_output: 320 1757 | pad: 1 1758 | kernel_size: 3 1759 | weight_filler { 1760 | type: "xavier" 1761 | std: 0.03 1762 | } 1763 | bias_filler { 1764 | type: "constant" 1765 | value: 0.2 1766 | } 1767 | } 1768 | } 1769 | layer { 1770 | name: "inception_5a/relu_3x3" 1771 | type: "ReLU" 1772 | bottom: "inception_5a/3x3" 1773 | top: "inception_5a/3x3" 1774 | } 1775 | layer { 1776 | name: "inception_5a/5x5_reduce" 1777 | type: "Convolution" 1778 | bottom: "pool4/3x3_s2" 1779 | top: "inception_5a/5x5_reduce" 1780 | param { 1781 | lr_mult: 1 1782 | decay_mult: 1 1783 | } 1784 | param { 1785 | lr_mult: 2 1786 | decay_mult: 0 1787 | } 1788 | convolution_param { 1789 | num_output: 32 1790 | kernel_size: 1 1791 | weight_filler { 1792 | type: "xavier" 1793 | std: 0.2 1794 | } 1795 | bias_filler { 1796 | type: "constant" 1797 | value: 0.2 1798 | } 1799 | } 1800 | } 1801 | layer { 1802 | name: "inception_5a/relu_5x5_reduce" 1803 | type: "ReLU" 1804 | bottom: "inception_5a/5x5_reduce" 1805 | top: "inception_5a/5x5_reduce" 1806 | } 1807 | layer { 1808 | name: "inception_5a/5x5" 1809 | type: "Convolution" 1810 | bottom: "inception_5a/5x5_reduce" 1811 | top: "inception_5a/5x5" 1812 | param { 1813 | lr_mult: 1 1814 | decay_mult: 1 1815 | } 1816 | param { 1817 | lr_mult: 2 1818 | decay_mult: 0 1819 | } 1820 | convolution_param { 1821 | num_output: 128 1822 | pad: 2 1823 | kernel_size: 5 1824 | weight_filler { 1825 | type: "xavier" 1826 | std: 0.03 1827 | } 1828 | bias_filler { 1829 | type: "constant" 1830 | value: 0.2 1831 | } 1832 | } 1833 | } 1834 | layer { 1835 | name: "inception_5a/relu_5x5" 1836 | type: "ReLU" 1837 | bottom: "inception_5a/5x5" 1838 | top: "inception_5a/5x5" 1839 | } 1840 | layer { 1841 | name: "inception_5a/pool" 1842 | type: "Pooling" 1843 | bottom: "pool4/3x3_s2" 1844 | top: "inception_5a/pool" 1845 | pooling_param { 1846 | pool: MAX 1847 | kernel_size: 3 1848 | stride: 1 1849 | pad: 1 1850 | } 1851 | } 1852 | layer { 1853 | name: "inception_5a/pool_proj" 1854 | type: "Convolution" 1855 | bottom: "inception_5a/pool" 1856 | top: "inception_5a/pool_proj" 1857 | param { 1858 | lr_mult: 1 1859 | decay_mult: 1 1860 | } 1861 | param { 1862 | lr_mult: 2 1863 | decay_mult: 0 1864 | } 1865 | convolution_param { 1866 | num_output: 128 1867 | kernel_size: 1 1868 | weight_filler { 1869 | type: "xavier" 1870 | std: 0.1 1871 | } 1872 | bias_filler { 1873 | type: "constant" 1874 | value: 0.2 1875 | } 1876 | } 1877 | } 1878 | layer { 1879 | name: "inception_5a/relu_pool_proj" 1880 | type: "ReLU" 1881 | bottom: "inception_5a/pool_proj" 1882 | top: "inception_5a/pool_proj" 1883 | } 1884 | layer { 1885 | name: "inception_5a/output" 1886 | type: "Concat" 1887 | bottom: "inception_5a/1x1" 1888 | bottom: "inception_5a/3x3" 1889 | bottom: "inception_5a/5x5" 1890 | bottom: "inception_5a/pool_proj" 1891 | top: "inception_5a/output" 1892 | } 1893 | layer { 1894 | name: "inception_5b/1x1" 1895 | type: "Convolution" 1896 | bottom: "inception_5a/output" 1897 | top: "inception_5b/1x1" 1898 | param { 1899 | lr_mult: 1 1900 | decay_mult: 1 1901 | } 1902 | param { 1903 | lr_mult: 2 1904 | decay_mult: 0 1905 | } 1906 | convolution_param { 1907 | num_output: 384 1908 | kernel_size: 1 1909 | weight_filler { 1910 | type: "xavier" 1911 | std: 0.03 1912 | } 1913 | bias_filler { 1914 | type: "constant" 1915 | value: 0.2 1916 | } 1917 | } 1918 | } 1919 | layer { 1920 | name: "inception_5b/relu_1x1" 1921 | type: "ReLU" 1922 | bottom: "inception_5b/1x1" 1923 | top: "inception_5b/1x1" 1924 | } 1925 | layer { 1926 | name: "inception_5b/3x3_reduce" 1927 | type: "Convolution" 1928 | bottom: "inception_5a/output" 1929 | top: "inception_5b/3x3_reduce" 1930 | param { 1931 | lr_mult: 1 1932 | decay_mult: 1 1933 | } 1934 | param { 1935 | lr_mult: 2 1936 | decay_mult: 0 1937 | } 1938 | convolution_param { 1939 | num_output: 192 1940 | kernel_size: 1 1941 | weight_filler { 1942 | type: "xavier" 1943 | std: 0.09 1944 | } 1945 | bias_filler { 1946 | type: "constant" 1947 | value: 0.2 1948 | } 1949 | } 1950 | } 1951 | layer { 1952 | name: "inception_5b/relu_3x3_reduce" 1953 | type: "ReLU" 1954 | bottom: "inception_5b/3x3_reduce" 1955 | top: "inception_5b/3x3_reduce" 1956 | } 1957 | layer { 1958 | name: "inception_5b/3x3" 1959 | type: "Convolution" 1960 | bottom: "inception_5b/3x3_reduce" 1961 | top: "inception_5b/3x3" 1962 | param { 1963 | lr_mult: 1 1964 | decay_mult: 1 1965 | } 1966 | param { 1967 | lr_mult: 2 1968 | decay_mult: 0 1969 | } 1970 | convolution_param { 1971 | num_output: 384 1972 | pad: 1 1973 | kernel_size: 3 1974 | weight_filler { 1975 | type: "xavier" 1976 | std: 0.03 1977 | } 1978 | bias_filler { 1979 | type: "constant" 1980 | value: 0.2 1981 | } 1982 | } 1983 | } 1984 | layer { 1985 | name: "inception_5b/relu_3x3" 1986 | type: "ReLU" 1987 | bottom: "inception_5b/3x3" 1988 | top: "inception_5b/3x3" 1989 | } 1990 | layer { 1991 | name: "inception_5b/5x5_reduce" 1992 | type: "Convolution" 1993 | bottom: "inception_5a/output" 1994 | top: "inception_5b/5x5_reduce" 1995 | param { 1996 | lr_mult: 1 1997 | decay_mult: 1 1998 | } 1999 | param { 2000 | lr_mult: 2 2001 | decay_mult: 0 2002 | } 2003 | convolution_param { 2004 | num_output: 48 2005 | kernel_size: 1 2006 | weight_filler { 2007 | type: "xavier" 2008 | std: 0.2 2009 | } 2010 | bias_filler { 2011 | type: "constant" 2012 | value: 0.2 2013 | } 2014 | } 2015 | } 2016 | layer { 2017 | name: "inception_5b/relu_5x5_reduce" 2018 | type: "ReLU" 2019 | bottom: "inception_5b/5x5_reduce" 2020 | top: "inception_5b/5x5_reduce" 2021 | } 2022 | layer { 2023 | name: "inception_5b/5x5" 2024 | type: "Convolution" 2025 | bottom: "inception_5b/5x5_reduce" 2026 | top: "inception_5b/5x5" 2027 | param { 2028 | lr_mult: 1 2029 | decay_mult: 1 2030 | } 2031 | param { 2032 | lr_mult: 2 2033 | decay_mult: 0 2034 | } 2035 | convolution_param { 2036 | num_output: 128 2037 | pad: 2 2038 | kernel_size: 5 2039 | weight_filler { 2040 | type: "xavier" 2041 | std: 0.03 2042 | } 2043 | bias_filler { 2044 | type: "constant" 2045 | value: 0.2 2046 | } 2047 | } 2048 | } 2049 | layer { 2050 | name: "inception_5b/relu_5x5" 2051 | type: "ReLU" 2052 | bottom: "inception_5b/5x5" 2053 | top: "inception_5b/5x5" 2054 | } 2055 | layer { 2056 | name: "inception_5b/pool" 2057 | type: "Pooling" 2058 | bottom: "inception_5a/output" 2059 | top: "inception_5b/pool" 2060 | pooling_param { 2061 | pool: MAX 2062 | kernel_size: 3 2063 | stride: 1 2064 | pad: 1 2065 | } 2066 | } 2067 | layer { 2068 | name: "inception_5b/pool_proj" 2069 | type: "Convolution" 2070 | bottom: "inception_5b/pool" 2071 | top: "inception_5b/pool_proj" 2072 | param { 2073 | lr_mult: 1 2074 | decay_mult: 1 2075 | } 2076 | param { 2077 | lr_mult: 2 2078 | decay_mult: 0 2079 | } 2080 | convolution_param { 2081 | num_output: 128 2082 | kernel_size: 1 2083 | weight_filler { 2084 | type: "xavier" 2085 | std: 0.1 2086 | } 2087 | bias_filler { 2088 | type: "constant" 2089 | value: 0.2 2090 | } 2091 | } 2092 | } 2093 | layer { 2094 | name: "inception_5b/relu_pool_proj" 2095 | type: "ReLU" 2096 | bottom: "inception_5b/pool_proj" 2097 | top: "inception_5b/pool_proj" 2098 | } 2099 | layer { 2100 | name: "inception_5b/output" 2101 | type: "Concat" 2102 | bottom: "inception_5b/1x1" 2103 | bottom: "inception_5b/3x3" 2104 | bottom: "inception_5b/5x5" 2105 | bottom: "inception_5b/pool_proj" 2106 | top: "inception_5b/output" 2107 | } 2108 | layer { 2109 | name: "pool5/7x7_s1" 2110 | type: "Pooling" 2111 | bottom: "inception_5b/output" 2112 | top: "pool5/7x7_s1" 2113 | pooling_param { 2114 | pool: AVE 2115 | kernel_size: 7 2116 | stride: 1 2117 | } 2118 | } 2119 | layer { 2120 | name: "pool5/drop_7x7_s1" 2121 | type: "Dropout" 2122 | bottom: "pool5/7x7_s1" 2123 | top: "pool5/7x7_s1" 2124 | dropout_param { 2125 | dropout_ratio: 0.4 2126 | } 2127 | } 2128 | layer { 2129 | name: "loss3/classifier" 2130 | type: "InnerProduct" 2131 | bottom: "pool5/7x7_s1" 2132 | top: "loss3/classifier" 2133 | param { 2134 | lr_mult: 1 2135 | decay_mult: 1 2136 | } 2137 | param { 2138 | lr_mult: 2 2139 | decay_mult: 0 2140 | } 2141 | inner_product_param { 2142 | num_output: 1000 2143 | weight_filler { 2144 | type: "xavier" 2145 | } 2146 | bias_filler { 2147 | type: "constant" 2148 | value: 0 2149 | } 2150 | } 2151 | } 2152 | layer { 2153 | name: "prob" 2154 | type: "Softmax" 2155 | bottom: "loss3/classifier" 2156 | top: "prob" 2157 | } 2158 | -------------------------------------------------------------------------------- /Caffe_Models/ilsvrc_2012_mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmzintgraf/DeepVis-PredDiff/02649f2d8847fc23c58f9f2e5bcd97542673293d/Caffe_Models/ilsvrc_2012_mean.npy -------------------------------------------------------------------------------- /Caffe_Models/vgg network/VGG_ILSVRC_16_layers_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "VGG_ILSVRC_16_layers" 2 | input: "data" 3 | input_dim: 20 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | force_backward: true 8 | layers { 9 | bottom: "data" 10 | top: "conv1_1" 11 | name: "conv1_1" 12 | type: CONVOLUTION 13 | convolution_param { 14 | num_output: 64 15 | pad: 1 16 | kernel_size: 3 17 | } 18 | } 19 | layers { 20 | bottom: "conv1_1" 21 | top: "conv1_1" 22 | name: "relu1_1" 23 | type: RELU 24 | } 25 | layers { 26 | bottom: "conv1_1" 27 | top: "conv1_2" 28 | name: "conv1_2" 29 | type: CONVOLUTION 30 | convolution_param { 31 | num_output: 64 32 | pad: 1 33 | kernel_size: 3 34 | } 35 | } 36 | layers { 37 | bottom: "conv1_2" 38 | top: "conv1_2" 39 | name: "relu1_2" 40 | type: RELU 41 | } 42 | layers { 43 | bottom: "conv1_2" 44 | top: "pool1" 45 | name: "pool1" 46 | type: POOLING 47 | pooling_param { 48 | pool: MAX 49 | kernel_size: 2 50 | stride: 2 51 | } 52 | } 53 | layers { 54 | bottom: "pool1" 55 | top: "conv2_1" 56 | name: "conv2_1" 57 | type: CONVOLUTION 58 | convolution_param { 59 | num_output: 128 60 | pad: 1 61 | kernel_size: 3 62 | } 63 | } 64 | layers { 65 | bottom: "conv2_1" 66 | top: "conv2_1" 67 | name: "relu2_1" 68 | type: RELU 69 | } 70 | layers { 71 | bottom: "conv2_1" 72 | top: "conv2_2" 73 | name: "conv2_2" 74 | type: CONVOLUTION 75 | convolution_param { 76 | num_output: 128 77 | pad: 1 78 | kernel_size: 3 79 | } 80 | } 81 | layers { 82 | bottom: "conv2_2" 83 | top: "conv2_2" 84 | name: "relu2_2" 85 | type: RELU 86 | } 87 | layers { 88 | bottom: "conv2_2" 89 | top: "pool2" 90 | name: "pool2" 91 | type: POOLING 92 | pooling_param { 93 | pool: MAX 94 | kernel_size: 2 95 | stride: 2 96 | } 97 | } 98 | layers { 99 | bottom: "pool2" 100 | top: "conv3_1" 101 | name: "conv3_1" 102 | type: CONVOLUTION 103 | convolution_param { 104 | num_output: 256 105 | pad: 1 106 | kernel_size: 3 107 | } 108 | } 109 | layers { 110 | bottom: "conv3_1" 111 | top: "conv3_1" 112 | name: "relu3_1" 113 | type: RELU 114 | } 115 | layers { 116 | bottom: "conv3_1" 117 | top: "conv3_2" 118 | name: "conv3_2" 119 | type: CONVOLUTION 120 | convolution_param { 121 | num_output: 256 122 | pad: 1 123 | kernel_size: 3 124 | } 125 | } 126 | layers { 127 | bottom: "conv3_2" 128 | top: "conv3_2" 129 | name: "relu3_2" 130 | type: RELU 131 | } 132 | layers { 133 | bottom: "conv3_2" 134 | top: "conv3_3" 135 | name: "conv3_3" 136 | type: CONVOLUTION 137 | convolution_param { 138 | num_output: 256 139 | pad: 1 140 | kernel_size: 3 141 | } 142 | } 143 | layers { 144 | bottom: "conv3_3" 145 | top: "conv3_3" 146 | name: "relu3_3" 147 | type: RELU 148 | } 149 | layers { 150 | bottom: "conv3_3" 151 | top: "pool3" 152 | name: "pool3" 153 | type: POOLING 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 2 157 | stride: 2 158 | } 159 | } 160 | layers { 161 | bottom: "pool3" 162 | top: "conv4_1" 163 | name: "conv4_1" 164 | type: CONVOLUTION 165 | convolution_param { 166 | num_output: 512 167 | pad: 1 168 | kernel_size: 3 169 | } 170 | } 171 | layers { 172 | bottom: "conv4_1" 173 | top: "conv4_1" 174 | name: "relu4_1" 175 | type: RELU 176 | } 177 | layers { 178 | bottom: "conv4_1" 179 | top: "conv4_2" 180 | name: "conv4_2" 181 | type: CONVOLUTION 182 | convolution_param { 183 | num_output: 512 184 | pad: 1 185 | kernel_size: 3 186 | } 187 | } 188 | layers { 189 | bottom: "conv4_2" 190 | top: "conv4_2" 191 | name: "relu4_2" 192 | type: RELU 193 | } 194 | layers { 195 | bottom: "conv4_2" 196 | top: "conv4_3" 197 | name: "conv4_3" 198 | type: CONVOLUTION 199 | convolution_param { 200 | num_output: 512 201 | pad: 1 202 | kernel_size: 3 203 | } 204 | } 205 | layers { 206 | bottom: "conv4_3" 207 | top: "conv4_3" 208 | name: "relu4_3" 209 | type: RELU 210 | } 211 | layers { 212 | bottom: "conv4_3" 213 | top: "pool4" 214 | name: "pool4" 215 | type: POOLING 216 | pooling_param { 217 | pool: MAX 218 | kernel_size: 2 219 | stride: 2 220 | } 221 | } 222 | layers { 223 | bottom: "pool4" 224 | top: "conv5_1" 225 | name: "conv5_1" 226 | type: CONVOLUTION 227 | convolution_param { 228 | num_output: 512 229 | pad: 1 230 | kernel_size: 3 231 | } 232 | } 233 | layers { 234 | bottom: "conv5_1" 235 | top: "conv5_1" 236 | name: "relu5_1" 237 | type: RELU 238 | } 239 | layers { 240 | bottom: "conv5_1" 241 | top: "conv5_2" 242 | name: "conv5_2" 243 | type: CONVOLUTION 244 | convolution_param { 245 | num_output: 512 246 | pad: 1 247 | kernel_size: 3 248 | } 249 | } 250 | layers { 251 | bottom: "conv5_2" 252 | top: "conv5_2" 253 | name: "relu5_2" 254 | type: RELU 255 | } 256 | layers { 257 | bottom: "conv5_2" 258 | top: "conv5_3" 259 | name: "conv5_3" 260 | type: CONVOLUTION 261 | convolution_param { 262 | num_output: 512 263 | pad: 1 264 | kernel_size: 3 265 | } 266 | } 267 | layers { 268 | bottom: "conv5_3" 269 | top: "conv5_3" 270 | name: "relu5_3" 271 | type: RELU 272 | } 273 | layers { 274 | bottom: "conv5_3" 275 | top: "pool5" 276 | name: "pool5" 277 | type: POOLING 278 | pooling_param { 279 | pool: MAX 280 | kernel_size: 2 281 | stride: 2 282 | } 283 | } 284 | layers { 285 | bottom: "pool5" 286 | top: "fc6" 287 | name: "fc6" 288 | type: INNER_PRODUCT 289 | inner_product_param { 290 | num_output: 4096 291 | } 292 | } 293 | layers { 294 | bottom: "fc6" 295 | top: "fc6" 296 | name: "relu6" 297 | type: RELU 298 | } 299 | layers { 300 | bottom: "fc6" 301 | top: "fc6" 302 | name: "drop6" 303 | type: DROPOUT 304 | dropout_param { 305 | dropout_ratio: 0.5 306 | } 307 | } 308 | layers { 309 | bottom: "fc6" 310 | top: "fc7" 311 | name: "fc7" 312 | type: INNER_PRODUCT 313 | inner_product_param { 314 | num_output: 4096 315 | } 316 | } 317 | layers { 318 | bottom: "fc7" 319 | top: "fc7" 320 | name: "relu7" 321 | type: RELU 322 | } 323 | layers { 324 | bottom: "fc7" 325 | top: "fc7" 326 | name: "drop7" 327 | type: DROPOUT 328 | dropout_param { 329 | dropout_ratio: 0.5 330 | } 331 | } 332 | layers { 333 | bottom: "fc7" 334 | top: "fc8" 335 | name: "fc8" 336 | type: INNER_PRODUCT 337 | inner_product_param { 338 | num_output: 1000 339 | } 340 | } 341 | layers { 342 | bottom: "fc8" 343 | top: "prob" 344 | name: "prob" 345 | type: SOFTMAX 346 | } 347 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 lmzintgraf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visualizing Deep Neural Network Decisions 2 | 3 | This code implements the method from the paper 4 | 5 | "Visualizing Deep Neural Network Decisions: Prediction Difference Analysis" - Luisa M Zintgraf, Taco S Cohen, Tameem Adel, Max Welling 6 | 7 | which was accepted at ICLR2017, see 8 | 9 | https://openreview.net/forum?id=BJ5UeU9xx 10 | 11 | Note that we are only publishing the code for the ImageNet experiments, since we cannot publish the MRI scans. 12 | If you are interested in the MRI implementation, please contact me (lmzintgraf@gmail.com). 13 | 14 | ## ImageNet Experiments 15 | 16 | ### Dependencies: 17 | 18 | If you want to run the IMAGENET experiments using one of the predefined models (our code supports the alexnet, googlenet and vgg) you need to install caffe, see 19 | 20 | http://caffe.berkeleyvision.org/ 21 | 22 | You will also need to download the respective caffemodel files (they're quite large). Please see the readme file in the "./Caffe_Models" folder for further instructions. 23 | 24 | ### Running Experiments: 25 | 26 | The experiments can be run by executing "./IMAGENET Experiments/experiments_imagenet.py". 27 | Different settings can be adjusted here, please see the file for further information. 28 | 29 | ### Data: 30 | 31 | The above script will use images from the "./data" folder. Only RGB images in format .png and .jpg of a minimum size of 227x227 pixels will be considered. If the image is larger, it will be cut off at the sides. Note that there should be enough images in this folder, since the samplers need them (see paper for further information). 32 | 33 | The "./data" folder also contains a text file with the ImageNet class labels. 34 | -------------------------------------------------------------------------------- /data/ilsvrc_2012_labels.txt: -------------------------------------------------------------------------------- 1 | tench 2 | goldfish 3 | great white shark 4 | tiger shark 5 | hammerhead 6 | electric ray 7 | stingray 8 | cock 9 | hen 10 | ostrich 11 | brambling 12 | goldfinch 13 | house finch 14 | junco 15 | indigo bunting 16 | robin 17 | bulbul 18 | jay 19 | magpie 20 | chickadee 21 | water ouzel 22 | kite 23 | bald eagle 24 | vulture 25 | great grey owl 26 | European fire salamander 27 | common newt 28 | eft 29 | spotted salamander 30 | axolotl 31 | bullfrog 32 | tree frog 33 | tailed frog 34 | loggerhead 35 | leatherback turtle 36 | mud turtle 37 | terrapin 38 | box turtle 39 | banded gecko 40 | common iguana 41 | American chameleon 42 | whiptail 43 | agama 44 | frilled lizard 45 | alligator lizard 46 | Gila monster 47 | green lizard 48 | African chameleon 49 | Komodo dragon 50 | African crocodile 51 | American alligator 52 | triceratops 53 | thunder snake 54 | ringneck snake 55 | hognose snake 56 | green snake 57 | king snake 58 | garter snake 59 | water snake 60 | vine snake 61 | night snake 62 | boa constrictor 63 | rock python 64 | Indian cobra 65 | green mamba 66 | sea snake 67 | horned viper 68 | diamondback 69 | sidewinder 70 | trilobite 71 | harvestman 72 | scorpion 73 | black and gold garden spider 74 | barn spider 75 | garden spider 76 | black widow 77 | tarantula 78 | wolf spider 79 | tick 80 | centipede 81 | black grouse 82 | ptarmigan 83 | ruffed grouse 84 | prairie chicken 85 | peacock 86 | quail 87 | partridge 88 | African grey 89 | macaw 90 | sulphur-crested cockatoo 91 | lorikeet 92 | coucal 93 | bee eater 94 | hornbill 95 | hummingbird 96 | jacamar 97 | toucan 98 | drake 99 | red-breasted merganser 100 | goose 101 | black swan 102 | tusker 103 | echidna 104 | platypus 105 | wallaby 106 | koala 107 | wombat 108 | jellyfish 109 | sea anemone 110 | brain coral 111 | flatworm 112 | nematode 113 | conch 114 | snail 115 | slug 116 | sea slug 117 | chiton 118 | chambered nautilus 119 | Dungeness crab 120 | rock crab 121 | fiddler crab 122 | king crab 123 | American lobster 124 | spiny lobster 125 | crayfish 126 | hermit crab 127 | isopod 128 | white stork 129 | black stork 130 | spoonbill 131 | flamingo 132 | little blue heron 133 | American egret 134 | bittern 135 | crane 136 | limpkin 137 | European gallinule 138 | American coot 139 | bustard 140 | ruddy turnstone 141 | red-backed sandpiper 142 | redshank 143 | dowitcher 144 | oystercatcher 145 | pelican 146 | king penguin 147 | albatross 148 | grey whale 149 | killer whale 150 | dugong 151 | sea lion 152 | Chihuahua 153 | Japanese spaniel 154 | Maltese dog 155 | Pekinese 156 | Shih-Tzu 157 | Blenheim spaniel 158 | papillon 159 | toy terrier 160 | Rhodesian ridgeback 161 | Afghan hound 162 | basset 163 | beagle 164 | bloodhound 165 | bluetick 166 | black-and-tan coonhound 167 | Walker hound 168 | English foxhound 169 | redbone 170 | borzoi 171 | Irish wolfhound 172 | Italian greyhound 173 | whippet 174 | Ibizan hound 175 | Norwegian elkhound 176 | otterhound 177 | Saluki 178 | Scottish deerhound 179 | Weimaraner 180 | Staffordshire bullterrier 181 | American Staffordshire terrier 182 | Bedlington terrier 183 | Border terrier 184 | Kerry blue terrier 185 | Irish terrier 186 | Norfolk terrier 187 | Norwich terrier 188 | Yorkshire terrier 189 | wire-haired fox terrier 190 | Lakeland terrier 191 | Sealyham terrier 192 | Airedale 193 | cairn 194 | Australian terrier 195 | Dandie Dinmont 196 | Boston bull 197 | miniature schnauzer 198 | giant schnauzer 199 | standard schnauzer 200 | Scotch terrier 201 | Tibetan terrier 202 | silky terrier 203 | soft-coated wheaten terrier 204 | West Highland white terrier 205 | Lhasa 206 | flat-coated retriever 207 | curly-coated retriever 208 | golden retriever 209 | Labrador retriever 210 | Chesapeake Bay retriever 211 | German short-haired pointer 212 | vizsla 213 | English setter 214 | Irish setter 215 | Gordon setter 216 | Brittany spaniel 217 | clumber 218 | English springer 219 | Welsh springer spaniel 220 | cocker spaniel 221 | Sussex spaniel 222 | Irish water spaniel 223 | kuvasz 224 | schipperke 225 | groenendael 226 | malinois 227 | briard 228 | kelpie 229 | komondor 230 | Old English sheepdog 231 | Shetland sheepdog 232 | collie 233 | Border collie 234 | Bouvier des Flandres 235 | Rottweiler 236 | German shepherd 237 | Doberman 238 | miniature pinscher 239 | Greater Swiss Mountain dog 240 | Bernese mountain dog 241 | Appenzeller 242 | EntleBucher 243 | boxer 244 | bull mastiff 245 | Tibetan mastiff 246 | French bulldog 247 | Great Dane 248 | Saint Bernard 249 | Eskimo dog 250 | malamute 251 | Siberian husky 252 | dalmatian 253 | affenpinscher 254 | basenji 255 | pug 256 | Leonberg 257 | Newfoundland 258 | Great Pyrenees 259 | Samoyed 260 | Pomeranian 261 | chow 262 | keeshond 263 | Brabancon griffon 264 | Pembroke 265 | Cardigan 266 | toy poodle 267 | miniature poodle 268 | standard poodle 269 | Mexican hairless 270 | timber wolf 271 | white wolf 272 | red wolf 273 | coyote 274 | dingo 275 | dhole 276 | African hunting dog 277 | hyena 278 | red fox 279 | kit fox 280 | Arctic fox 281 | grey fox 282 | tabby 283 | tiger cat 284 | Persian cat 285 | Siamese cat 286 | Egyptian cat 287 | cougar 288 | lynx 289 | leopard 290 | snow leopard 291 | jaguar 292 | lion 293 | tiger 294 | cheetah 295 | brown bear 296 | American black bear 297 | ice bear 298 | sloth bear 299 | mongoose 300 | meerkat 301 | tiger beetle 302 | ladybug 303 | ground beetle 304 | long-horned beetle 305 | leaf beetle 306 | dung beetle 307 | rhinoceros beetle 308 | weevil 309 | fly 310 | bee 311 | ant 312 | grasshopper 313 | cricket 314 | walking stick 315 | cockroach 316 | mantis 317 | cicada 318 | leafhopper 319 | lacewing 320 | dragonfly 321 | damselfly 322 | admiral 323 | ringlet 324 | monarch 325 | cabbage butterfly 326 | sulphur butterfly 327 | lycaenid 328 | starfish 329 | sea urchin 330 | sea cucumber 331 | wood rabbit 332 | hare 333 | Angora 334 | hamster 335 | porcupine 336 | fox squirrel 337 | marmot 338 | beaver 339 | guinea pig 340 | sorrel 341 | zebra 342 | hog 343 | wild boar 344 | warthog 345 | hippopotamus 346 | ox 347 | water buffalo 348 | bison 349 | ram 350 | bighorn 351 | ibex 352 | hartebeest 353 | impala 354 | gazelle 355 | Arabian camel 356 | llama 357 | weasel 358 | mink 359 | polecat 360 | black-footed ferret 361 | otter 362 | skunk 363 | badger 364 | armadillo 365 | three-toed sloth 366 | orangutan 367 | gorilla 368 | chimpanzee 369 | gibbon 370 | siamang 371 | guenon 372 | patas 373 | baboon 374 | macaque 375 | langur 376 | colobus 377 | proboscis monkey 378 | marmoset 379 | capuchin 380 | howler monkey 381 | titi 382 | spider monkey 383 | squirrel monkey 384 | Madagascar cat 385 | indri 386 | Indian elephant 387 | African elephant 388 | lesser panda 389 | giant panda 390 | barracouta 391 | eel 392 | coho 393 | rock beauty 394 | anemone fish 395 | sturgeon 396 | gar 397 | lionfish 398 | puffer 399 | abacus 400 | abaya 401 | academic gown 402 | accordion 403 | acoustic guitar 404 | aircraft carrier 405 | airliner 406 | airship 407 | altar 408 | ambulance 409 | amphibian 410 | analog clock 411 | apiary 412 | apron 413 | ashcan 414 | assault rifle 415 | backpack 416 | bakery 417 | balance beam 418 | balloon 419 | ballpoint 420 | Band Aid 421 | banjo 422 | bannister 423 | barbell 424 | barber chair 425 | barbershop 426 | barn 427 | barometer 428 | barrel 429 | barrow 430 | baseball 431 | basketball 432 | bassinet 433 | bassoon 434 | bathing cap 435 | bath towel 436 | bathtub 437 | beach wagon 438 | beacon 439 | beaker 440 | bearskin 441 | beer bottle 442 | beer glass 443 | bell cote 444 | bib 445 | bicycle-built-for-two 446 | bikini 447 | binder 448 | binoculars 449 | birdhouse 450 | boathouse 451 | bobsled 452 | bolo tie 453 | bonnet 454 | bookcase 455 | bookshop 456 | bottlecap 457 | bow 458 | bow tie 459 | brass 460 | brassiere 461 | breakwater 462 | breastplate 463 | broom 464 | bucket 465 | buckle 466 | bulletproof vest 467 | bullet train 468 | butcher shop 469 | cab 470 | caldron 471 | candle 472 | cannon 473 | canoe 474 | can opener 475 | cardigan 476 | car mirror 477 | carousel 478 | carpenter's kit 479 | carton 480 | car wheel 481 | cash machine 482 | cassette 483 | cassette player 484 | castle 485 | catamaran 486 | CD player 487 | cello 488 | cellular telephone 489 | chain 490 | chainlink fence 491 | chain mail 492 | chain saw 493 | chest 494 | chiffonier 495 | chime 496 | china cabinet 497 | Christmas stocking 498 | church 499 | cinema 500 | cleaver 501 | cliff dwelling 502 | cloak 503 | clog 504 | cocktail shaker 505 | coffee mug 506 | coffeepot 507 | coil 508 | combination lock 509 | computer keyboard 510 | confectionery 511 | container ship 512 | convertible 513 | corkscrew 514 | cornet 515 | cowboy boot 516 | cowboy hat 517 | cradle 518 | crane 519 | crash helmet 520 | crate 521 | crib 522 | Crock Pot 523 | croquet ball 524 | crutch 525 | cuirass 526 | dam 527 | desk 528 | desktop computer 529 | dial telephone 530 | diaper 531 | digital clock 532 | digital watch 533 | dining table 534 | dishrag 535 | dishwasher 536 | disk brake 537 | dock 538 | dogsled 539 | dome 540 | doormat 541 | drilling platform 542 | drum 543 | drumstick 544 | dumbbell 545 | Dutch oven 546 | electric fan 547 | electric guitar 548 | electric locomotive 549 | entertainment center 550 | envelope 551 | espresso maker 552 | face powder 553 | feather boa 554 | file 555 | fireboat 556 | fire engine 557 | fire screen 558 | flagpole 559 | flute 560 | folding chair 561 | football helmet 562 | forklift 563 | fountain 564 | fountain pen 565 | four-poster 566 | freight car 567 | French horn 568 | frying pan 569 | fur coat 570 | garbage truck 571 | gasmask 572 | gas pump 573 | goblet 574 | go-kart 575 | golf ball 576 | golfcart 577 | gondola 578 | gong 579 | gown 580 | grand piano 581 | greenhouse 582 | grille 583 | grocery store 584 | guillotine 585 | hair slide 586 | hair spray 587 | half track 588 | hammer 589 | hamper 590 | hand blower 591 | hand-held computer 592 | handkerchief 593 | hard disc 594 | harmonica 595 | harp 596 | harvester 597 | hatchet 598 | holster 599 | home theater 600 | honeycomb 601 | hook 602 | hoopskirt 603 | horizontal bar 604 | horse cart 605 | hourglass 606 | iPod 607 | iron 608 | jack-o'-lantern 609 | jean 610 | jeep 611 | jersey 612 | jigsaw puzzle 613 | jinrikisha 614 | joystick 615 | kimono 616 | knee pad 617 | knot 618 | lab coat 619 | ladle 620 | lampshade 621 | laptop 622 | lawn mower 623 | lens cap 624 | letter opener 625 | library 626 | lifeboat 627 | lighter 628 | limousine 629 | liner 630 | lipstick 631 | Loafer 632 | lotion 633 | loudspeaker 634 | loupe 635 | lumbermill 636 | magnetic compass 637 | mailbag 638 | mailbox 639 | maillot 640 | maillot 641 | manhole cover 642 | maraca 643 | marimba 644 | mask 645 | matchstick 646 | maypole 647 | maze 648 | measuring cup 649 | medicine chest 650 | megalith 651 | microphone 652 | microwave 653 | military uniform 654 | milk can 655 | minibus 656 | miniskirt 657 | minivan 658 | missile 659 | mitten 660 | mixing bowl 661 | mobile home 662 | Model T 663 | modem 664 | monastery 665 | monitor 666 | moped 667 | mortar 668 | mortarboard 669 | mosque 670 | mosquito net 671 | motor scooter 672 | mountain bike 673 | mountain tent 674 | mouse 675 | mousetrap 676 | moving van 677 | muzzle 678 | nail 679 | neck brace 680 | necklace 681 | nipple 682 | notebook 683 | obelisk 684 | oboe 685 | ocarina 686 | odometer 687 | oil filter 688 | organ 689 | oscilloscope 690 | overskirt 691 | oxcart 692 | oxygen mask 693 | packet 694 | paddle 695 | paddlewheel 696 | padlock 697 | paintbrush 698 | pajama 699 | palace 700 | panpipe 701 | paper towel 702 | parachute 703 | parallel bars 704 | park bench 705 | parking meter 706 | passenger car 707 | patio 708 | pay-phone 709 | pedestal 710 | pencil box 711 | pencil sharpener 712 | perfume 713 | Petri dish 714 | photocopier 715 | pick 716 | pickelhaube 717 | picket fence 718 | pickup 719 | pier 720 | piggy bank 721 | pill bottle 722 | pillow 723 | ping-pong ball 724 | pinwheel 725 | pirate 726 | pitcher 727 | plane 728 | planetarium 729 | plastic bag 730 | plate rack 731 | plow 732 | plunger 733 | Polaroid camera 734 | pole 735 | police van 736 | poncho 737 | pool table 738 | pop bottle 739 | pot 740 | potter's wheel 741 | power drill 742 | prayer rug 743 | printer 744 | prison 745 | projectile 746 | projector 747 | puck 748 | punching bag 749 | purse 750 | quill 751 | quilt 752 | racer 753 | racket 754 | radiator 755 | radio 756 | radio telescope 757 | rain barrel 758 | recreational vehicle 759 | reel 760 | reflex camera 761 | refrigerator 762 | remote control 763 | restaurant 764 | revolver 765 | rifle 766 | rocking chair 767 | rotisserie 768 | rubber eraser 769 | rugby ball 770 | rule 771 | running shoe 772 | safe 773 | safety pin 774 | saltshaker 775 | sandal 776 | sarong 777 | sax 778 | scabbard 779 | scale 780 | school bus 781 | schooner 782 | scoreboard 783 | screen 784 | screw 785 | screwdriver 786 | seat belt 787 | sewing machine 788 | shield 789 | shoe shop 790 | shoji 791 | shopping basket 792 | shopping cart 793 | shovel 794 | shower cap 795 | shower curtain 796 | ski 797 | ski mask 798 | sleeping bag 799 | slide rule 800 | sliding door 801 | slot 802 | snorkel 803 | snowmobile 804 | snowplow 805 | soap dispenser 806 | soccer ball 807 | sock 808 | solar dish 809 | sombrero 810 | soup bowl 811 | space bar 812 | space heater 813 | space shuttle 814 | spatula 815 | speedboat 816 | spider web 817 | spindle 818 | sports car 819 | spotlight 820 | stage 821 | steam locomotive 822 | steel arch bridge 823 | steel drum 824 | stethoscope 825 | stole 826 | stone wall 827 | stopwatch 828 | stove 829 | strainer 830 | streetcar 831 | stretcher 832 | studio couch 833 | stupa 834 | submarine 835 | suit 836 | sundial 837 | sunglass 838 | sunglasses 839 | sunscreen 840 | suspension bridge 841 | swab 842 | sweatshirt 843 | swimming trunks 844 | swing 845 | switch 846 | syringe 847 | table lamp 848 | tank 849 | tape player 850 | teapot 851 | teddy 852 | television 853 | tennis ball 854 | thatch 855 | theater curtain 856 | thimble 857 | thresher 858 | throne 859 | tile roof 860 | toaster 861 | tobacco shop 862 | toilet seat 863 | torch 864 | totem pole 865 | tow truck 866 | toyshop 867 | tractor 868 | trailer truck 869 | tray 870 | trench coat 871 | tricycle 872 | trimaran 873 | tripod 874 | triumphal arch 875 | trolleybus 876 | trombone 877 | tub 878 | turnstile 879 | typewriter keyboard 880 | umbrella 881 | unicycle 882 | upright 883 | vacuum 884 | vase 885 | vault 886 | velvet 887 | vending machine 888 | vestment 889 | viaduct 890 | violin 891 | volleyball 892 | waffle iron 893 | wall clock 894 | wallet 895 | wardrobe 896 | warplane 897 | washbasin 898 | washer 899 | water bottle 900 | water jug 901 | water tower 902 | whiskey jug 903 | whistle 904 | wig 905 | window screen 906 | window shade 907 | Windsor tie 908 | wine bottle 909 | wing 910 | wok 911 | wooden spoon 912 | wool 913 | worm fence 914 | wreck 915 | yawl 916 | yurt 917 | web site 918 | comic book 919 | crossword puzzle 920 | street sign 921 | traffic light 922 | book jacket 923 | menu 924 | plate 925 | guacamole 926 | consomme 927 | hot pot 928 | trifle 929 | ice cream 930 | ice lolly 931 | French loaf 932 | bagel 933 | pretzel 934 | cheeseburger 935 | hotdog 936 | mashed potato 937 | head cabbage 938 | broccoli 939 | cauliflower 940 | zucchini 941 | spaghetti squash 942 | acorn squash 943 | butternut squash 944 | cucumber 945 | artichoke 946 | bell pepper 947 | cardoon 948 | mushroom 949 | Granny Smith 950 | strawberry 951 | orange 952 | lemon 953 | fig 954 | pineapple 955 | banana 956 | jackfruit 957 | custard apple 958 | pomegranate 959 | hay 960 | carbonara 961 | chocolate sauce 962 | dough 963 | meat loaf 964 | pizza 965 | potpie 966 | burrito 967 | red wine 968 | espresso 969 | cup 970 | eggnog 971 | alp 972 | bubble 973 | cliff 974 | coral reef 975 | geyser 976 | lakeside 977 | promontory 978 | sandbar 979 | seashore 980 | valley 981 | volcano 982 | ballplayer 983 | groom 984 | scuba diver 985 | rapeseed 986 | daisy 987 | yellow lady's slipper 988 | corn 989 | acorn 990 | hip 991 | buckeye 992 | coral fungus 993 | agaric 994 | gyromitra 995 | stinkhorn 996 | earthstar 997 | hen-of-the-woods 998 | bolete 999 | ear 1000 | toilet tissue 1001 | -------------------------------------------------------------------------------- /data/n01855672_8202.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmzintgraf/DeepVis-PredDiff/02649f2d8847fc23c58f9f2e5bcd97542673293d/data/n01855672_8202.jpg -------------------------------------------------------------------------------- /data/n02488291_1177.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmzintgraf/DeepVis-PredDiff/02649f2d8847fc23c58f9f2e5bcd97542673293d/data/n02488291_1177.jpg -------------------------------------------------------------------------------- /data/n02917067_1599.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmzintgraf/DeepVis-PredDiff/02649f2d8847fc23c58f9f2e5bcd97542673293d/data/n02917067_1599.jpg -------------------------------------------------------------------------------- /experiments_imagenet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | From this script, experiments for ImageNet pictures can be started. 4 | See "configuration" below for the different possible settings. 5 | The results are saved automatically to the folder ./results 6 | 7 | It is recommended to run caffe in gpu mode when overlapping is set 8 | to True, otherwise the calculation will take a very long time. 9 | 10 | @author: Luisa M Zintgraf 11 | """ 12 | 13 | # the following is needed to avoid some error that can be thrown when 14 | # using matplotlib.pyplot in a linux shell 15 | import matplotlib 16 | matplotlib.use('Agg') 17 | 18 | # standard imports 19 | import numpy as np 20 | import time 21 | import os 22 | 23 | # most important script - relevance estimator 24 | from prediction_difference_analysis import PredDiffAnalyser 25 | 26 | # utilities 27 | import utils_classifiers as utlC 28 | import utils_data as utlD 29 | import utils_sampling as utlS 30 | import utils_visualise as utlV 31 | import sensitivity_analysis_caffe as SA 32 | 33 | 34 | # ------------------------ CONFIGURATION ------------------------ 35 | # -------------------> CHANGE SETTINGS HERE <-------------------- 36 | 37 | # pick neural network to run experiment for (alexnet, googlenet, vgg) 38 | netname = 'googlenet' 39 | 40 | # pick for which layers the explanations should be computet 41 | # (names depend on network, output layer is usually called 'prob') 42 | blobnames = ['prob'] 43 | #blobnames = ['conv1','conv2','conv3','conv4','conv5','fc6','fc7','fc8','prob'] # alexnet 44 | #blobnames = ['conv1/7x7_s2', 'conv2/3x3_reduce', 'conv2/3x3', 'conv2/norm2', 'inception_3a/output', 'inception_3b/output', 'inception_4a/output', 'inception_4b/output', 'inception_4c/output', 'inception_4d/output', 'inception_4e/output','inception_5a/output', 'inception_5b/output', 'loss3/classifier', 'prob'] 45 | #blobnames = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', 'fc6', 'fc7', 'fc8', 'prob'] # vgg 46 | 47 | # is caffe running in gpu mode? 48 | gpu = True 49 | 50 | # pick image indices which are analysed (in alphabetical order as in the ./data folder) [0,1,2,...] 51 | # (if None, all images in './data' will be analysed) 52 | test_indices = None 53 | 54 | # window size (i.e., the size of the pixel patch that is marginalised out in each step) 55 | win_size = 10 # k in alg 1 (see paper) 56 | 57 | # indicate whether windows should be overlapping or not 58 | overlapping = True 59 | 60 | # settings for sampling 61 | sampl_style = 'conditional' # choose: conditional / marginal 62 | num_samples = 10 63 | padding_size = 2 # important for conditional sampling, 64 | # l = win_size+2*padding_size in alg 1 65 | # (see paper) 66 | 67 | # set the batch size - the larger, the faster computation will be 68 | # (if caffe crashes with memory error, reduce the batch size) 69 | batch_size = 128 70 | 71 | 72 | # ------------------------ SET-UP ------------------------ 73 | 74 | utlC.set_caffe_mode(gpu=gpu) 75 | 76 | net = utlC.get_caffenet(netname) 77 | 78 | # get the data 79 | X_test, X_test_im, X_filenames = utlD.get_imagenet_data(net=net) 80 | 81 | # get the label names of the 1000 ImageNet classes 82 | classnames = utlD.get_imagenet_classnames() 83 | 84 | if not test_indices: 85 | test_indices = [i for i in range(X_test.shape[0])] 86 | 87 | # make folder for saving the results if it doesn't exist 88 | path_results = './results/' 89 | if not os.path.exists(path_results): 90 | os.makedirs(path_results) 91 | 92 | # ------------------------ EXPERIMENTS ------------------------ 93 | 94 | # change the batch size of the network to the given value 95 | net.blobs['data'].reshape(batch_size, X_test.shape[1], X_test.shape[2], X_test.shape[3]) 96 | 97 | # target function (mapping input features to output probabilities) 98 | target_func = lambda x: utlC.forward_pass(net, x, blobnames) 99 | 100 | # for the given test indices, do the prediction difference analysis 101 | for test_idx in test_indices: 102 | 103 | # get the specific image (preprocessed, can be used as input to the target function) 104 | x_test = X_test[test_idx] 105 | # get the image for plotting (not preprocessed) 106 | x_test_im = X_test_im[test_idx] 107 | # prediction of the network 108 | y_pred = np.argmax(utlC.forward_pass(net, x_test, ['prob'])) 109 | y_pred_label = classnames[y_pred] 110 | 111 | # get the path for saving the results 112 | if sampl_style == 'conditional': 113 | save_path = path_results+'{}_{}_winSize{}_condSampl_numSampl{}_paddSize{}_{}'.format(X_filenames[test_idx],y_pred_label,win_size,num_samples,padding_size,netname) 114 | elif sampl_style == 'marginal': 115 | save_path = path_results+'{}_{}_winSize{}_margSampl_numSampl{}_{}'.format(X_filenames[test_idx],y_pred_label,win_size,num_samples,netname) 116 | 117 | if os.path.exists(save_path+'.npz'): 118 | print 'Results for ', X_filenames[test_idx], ' exist, will move to the next image. ' 119 | continue 120 | 121 | print "doing test...", "file :", X_filenames[test_idx], ", net:", netname, ", win_size:", win_size, ", sampling: ", sampl_style 122 | 123 | # compute the sensitivity map 124 | layer_name = net.blobs.keys()[-2] # look at penultimate layer (like in Simonyan et al. (2013)) 125 | sensMap = SA.get_sens_map(net, x_test[np.newaxis], layer_name, np.argmax(target_func(x_test)[-1][0])) 126 | 127 | start_time = time.time() 128 | 129 | if sampl_style == 'conditional': 130 | sampler = utlS.cond_sampler_imagenet(win_size=win_size, padding_size=padding_size, image_dims=net.crop_dims, netname=netname) 131 | elif sampl_style == 'marginal': 132 | sampler = utlS.marg_sampler_imagenet(X_test, net) 133 | 134 | pda = PredDiffAnalyser(x_test, target_func, sampler, num_samples=num_samples, batch_size=batch_size) 135 | pred_diff = pda.get_rel_vect(win_size=win_size, overlap=overlapping) 136 | 137 | # plot and save the results 138 | utlV.plot_results(x_test, x_test_im, sensMap, pred_diff[0], target_func, classnames, test_idx, save_path) 139 | np.savez(save_path, *pred_diff) 140 | print "--- Total computation took {:.4f} minutes ---".format((time.time() - start_time)/60) 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /prediction_difference_analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: Luisa Zintgraf 4 | """ 5 | 6 | import numpy as np 7 | import time 8 | 9 | class PredDiffAnalyser: 10 | ''' 11 | This class implements the prediction difference analysis, i.e., a method 12 | which estimates how important the individual input features are to a 13 | (already trained) classifier, given a specific input to the classifier. 14 | To this end, a relevance is estimated which is of the same size as the 15 | input and reflects the importance of each feature. 16 | 17 | Note: this version implements the method for RGB-image classification! 18 | However, the method can be used with any kind of data. 19 | Also note that we assume that the color channels are along axis 0, as it 20 | is common with convolutional neural networks. 21 | ''' 22 | 23 | def __init__(self, x, tar_func, sampler, num_samples=10, batch_size=10, prob_tar=True): 24 | ''' 25 | Input: 26 | x the feature vector for which we want to make the analysis (can be a hidden layer!) 27 | Has to be numpy array of the dimension that fits to targetFunc 28 | tar_func the target function, can be the output of classifier or intermediate layer 29 | (must take x as input, keep this in mind when starting at intermediate layers!) 30 | num_samples the number of samples used for marginalising out features 31 | batch_size batch size for caffe network (in tar_func) 32 | prob_tar boolean, indicates if the target values are probabilities 33 | (not necessarily the case when we look at hidden nodes) 34 | ''' 35 | 36 | # inputs 37 | self.x = np.copy(x) 38 | self.tar_func = tar_func 39 | self.sampler = sampler 40 | self.num_samples = num_samples 41 | self.batch_size = batch_size 42 | self.prob_tar = prob_tar 43 | 44 | # some other useful values 45 | self.num_feats = len(self.x.ravel())/3 # we make the analysis not per color channel, 46 | # but for all channels at once, 47 | # therefore we divide the number of features by 3 48 | self.true_tar_val = self.tar_func(self.x) # true network state for the given input 49 | self.num_blobs = len(self.true_tar_val) 50 | self.num_metrics = 2 # the number of metrics we use for evaluating 51 | # the prediction difference (avg and max of 52 | # the weight of evidence per feature map) 53 | self.tests_per_batch = int(self.batch_size/self.num_samples) # rounds down 54 | 55 | # drop the first dimension of the elements in the true target value list, 56 | # since it is not necessary (since we only forwarded a single feature vector) 57 | self.true_tar_val = [t[0] for t in self.true_tar_val] 58 | 59 | 60 | #%% 61 | # -------------------- METHOD RETURNING EXPLANATIONS -------------------------- 62 | 63 | def get_rel_vect(self, win_size, overlap=True): 64 | """ 65 | Main method to use, will return a relevance vector. 66 | Input: win_size the window size (k in alg. 1) 67 | overlap whether the windows should be overlapping, default is True 68 | Output: rel_vects the relevance vectors, dimensions are: 69 | - number of features (input) 70 | - number of outputs (usually output layer, can be hidden layer) 71 | to interpret the result, look at one output (e.g., the predicted class) 72 | and visualise the input features in some way 73 | """ 74 | 75 | # create array for relevance vectors, each element has dimensions (num_feats)*blobdimension 76 | # where the relevance of each feature on the different activations in that blob is stored 77 | rel_vects = [np.zeros((self.num_feats, self.true_tar_val[b].shape[0]), dtype=np.float64) for b in xrange(self.num_blobs)] 78 | 79 | # a counts vector to keep track of how often a feature is marginalised out 80 | counts = np.zeros((self.num_feats), dtype=np.int) 81 | 82 | # a matrix where each entry reflects the index in the flattened input (image) 83 | all_feats = np.reshape([i for i in xrange(self.num_feats*3)], self.x.shape) 84 | 85 | if overlap: 86 | 87 | windows = np.zeros((self.tests_per_batch, win_size*win_size*3), dtype=int) 88 | win_idx = 0 89 | for i in range(self.x.shape[1]-win_size+1): # rows 90 | start_time = time.time() 91 | for j in range(self.x.shape[2]-win_size+1): # columns 92 | # get the window which we want to simulate as unknown 93 | window = all_feats[:,i:i+win_size,j:j+win_size].ravel() 94 | windows[win_idx] = window 95 | win_idx += 1 96 | if win_idx==self.tests_per_batch: 97 | # evaluate the prediction difference 98 | pred_diffs = self._get_rel_vect_subset(windows) 99 | for w in xrange(self.tests_per_batch): 100 | window = windows[w] 101 | for b in xrange(self.num_blobs): 102 | rel_vects[b][window[window= img_dim[0] and np_img.shape[1] >= img_dim[1]: 46 | o = int(0.5*np.array([np_img.shape[0]-img_dim[0], np_img.shape[1]-img_dim[1]])) 47 | X = np.vstack((X, np_img[o[0]:o[0]+img_dim[0], o[1]:o[1]+img_dim[1], :][np.newaxis])) 48 | X_filenames.append(img_list[i].replace(".","")) 49 | else: 50 | print("Skipped ",img_list[i],", image dimensions were too small.") 51 | 52 | # the number of images we found in the folder 53 | num_imgs = X.shape[0] 54 | 55 | # cast to image values that can be displayed directly with plt.imshow() 56 | X_im = np.uint8(X) 57 | 58 | # preprocess 59 | X_pre = np.zeros((X.shape[0], 3, img_dim[0], img_dim[1])) 60 | for i in range(num_imgs): 61 | X_pre[i] = net.transformer.preprocess('data', X[i]) 62 | X = X_pre 63 | 64 | return X, X_im, X_filenames 65 | 66 | 67 | def get_imagenet_classnames(): 68 | """ Returns the classnames of all 1000 ImageNet classes """ 69 | return np.loadtxt(open(path_data+'/ilsvrc_2012_labels.txt'), dtype=object, delimiter='\n') 70 | -------------------------------------------------------------------------------- /utils_sampling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Classes for (1) marginal and (2) conditional sampling for the IMAGENET data. 4 | 5 | Note that every sampler has to implement the method 6 | get_samples(sampleIndices, featVect, numSamples) 7 | 8 | """ 9 | 10 | import numpy as np 11 | import random 12 | import scipy 13 | import os.path 14 | # utilities 15 | import utils_data as utlD 16 | import utils_classifiers as utlC 17 | 18 | 19 | class marg_sampler_imagenet: 20 | ''' 21 | Marginal sampler for image patches 22 | ''' 23 | 24 | def __init__(self, X, net): 25 | ''' 26 | Sampler to draw marginal samples 27 | Input: 28 | X imagenet data from which we will take our samples 29 | for the feature values 30 | ''' 31 | # we will use raveled arrays for each image 32 | self.X = X.reshape(X.shape[0],-1) 33 | 34 | def get_samples(self, sampleIndices, featVect=None, numSamples=10): 35 | ''' 36 | Draw marginal samples for the given indices 37 | Input: 38 | sampleIndices the indices in the image for which we want to 39 | draw samples; is expected to reflect the 40 | indices of the raveled (!) image 41 | featVect (irrelevant because we don't condition on it) 42 | numSamples the number of samples to return 43 | Output: 44 | samples the marginal samples, in a matrix of size 45 | (numSamples)*(number of sampleIndices) 46 | ''' 47 | return np.take(self.X[:numSamples], sampleIndices.ravel(), axis=1) 48 | 49 | 50 | class cond_sampler_imagenet: 51 | ''' 52 | Conditional sampler for image patches 53 | using a multivariate Gaussian distribution 54 | ''' 55 | 56 | def __init__(self, win_size, padding_size, image_dims, netname, num_samples_fit=20000): 57 | ''' 58 | Sampler to conditionally sample pixel patches using a gaussian model 59 | Input: 60 | win_size the size (=width=height) of the window which 61 | we want to sample values for 62 | padding_size the padding size that is added on each side of 63 | the window to condition the sampled values on 64 | image_dim the (2d) dimensions of the image, i.e., 65 | (width, height) 66 | netname the name of the convnet which is being used 67 | (needed because each net preprocesses the data 68 | differently) 69 | num_samples_fit number of samples used to fit the gaussian 70 | ''' 71 | 72 | # inputs 73 | self.win_size = win_size 74 | self.padding_size = padding_size 75 | self.image_dims = image_dims 76 | self.netname = netname 77 | self.num_samples_fit = num_samples_fit 78 | 79 | self.path_folder = './gaussians/' 80 | if not os.path.exists(self.path_folder): 81 | os.makedirs(self.path_folder) 82 | 83 | # the whole patch size whose pixel distribution we model with a gaussian 84 | self.patchSize = win_size+2*self.padding_size 85 | # the mean and covariance for the gaussian distribution on the whole patch 86 | self.meanVects, self.covMats = self._get_gauss_params() 87 | 88 | # the min/max values for the features seen in the data, so that we can cut off overfloating values 89 | if not os.path.exists(self.path_folder+'{}_minMaxVals.npy'.format(netname)): 90 | save_minmax_values(self.netname) 91 | self.minMaxVals = np.load(self.path_folder+'{}_minMaxVals.npy'.format(netname)) 92 | 93 | self.location = None 94 | self.dotProdForMean = None 95 | self.cond_cov = None 96 | 97 | def _get_gauss_params(self): 98 | ''' 99 | Returns the mean and covariance for the gaussian model on the whole 100 | patch (i.e., window to sample plus padding around it) 101 | ''' 102 | 103 | means = np.zeros((3, self.patchSize*self.patchSize)) 104 | covs = np.zeros((3, self.patchSize*self.patchSize, self.patchSize*self.patchSize)) 105 | 106 | path_mean = self.path_folder+'{}_means{}_indep'.format(self.netname, self.patchSize) 107 | path_cov = self.path_folder+'{}_covs{}_indep'.format(self.netname, self.patchSize) 108 | 109 | # check if values are already precomputed and saved; otherwise do so first 110 | if os.path.exists(path_mean+'.npy') and os.path.exists(path_cov+'.npy'): 111 | 112 | means = np.load(path_mean+'.npy') 113 | covs = np.load(path_cov+'.npy') 114 | 115 | else: 116 | 117 | for c in [0,1,2]: 118 | 119 | net = utlC.get_caffenet(self.netname) 120 | 121 | # get the imagenet data 122 | X, _, _ = utlD.get_imagenet_data(net) 123 | 124 | # get samples for fitting the distribution 125 | patchesMat = np.empty((0,self.patchSize*self.patchSize), dtype=np.float) 126 | for i in xrange(int(self.num_samples_fit/X.shape[0])+1): 127 | # get a random (upper left) position of the patch 128 | idx = random.sample(range((self.image_dims[0]-self.patchSize)*(self.image_dims[1]-self.patchSize)), 1)[0] 129 | idx = np.unravel_index(idx, (self.image_dims[0]-self.patchSize, self.image_dims[1]-self.patchSize)) 130 | idx = [idx[0],idx[1]] 131 | # get the patch from all the images in X, from the given channel 132 | patch = X[:,c,idx[0]:idx[0]+self.patchSize,idx[1]:idx[1]+self.patchSize] 133 | patchesMat = np.vstack((patchesMat, patch.reshape((X.shape[0],self.patchSize*self.patchSize)))) 134 | 135 | # compute the mean and covariance of the collected samples 136 | means[c] = np.mean(patchesMat, axis=0) 137 | covs[c] = np.cov(patchesMat.T) 138 | 139 | # save the mean and the covariance 140 | np.save(path_mean, means) 141 | np.save(path_cov, covs) 142 | 143 | return means, covs 144 | 145 | 146 | def _get_cond_params(self, surrPatch, inPatchIdx, channel): 147 | ''' 148 | Input: 149 | surrpatch the variables over which we have a distribution 150 | inPatchIdx the index/indices from what we want to sample 151 | Output: 152 | cond_mean the conditional mean of the inner patch, 153 | conditioned on the surrounding pixels 154 | cond_cov the conditional covariance 155 | ''' 156 | 157 | # get the part of the surrPacth vector which we use to condition the values on 158 | x2 = np.delete(surrPatch, inPatchIdx) 159 | # split the mean vector into mu1 and mu2 (matching what we want to sample/condition on) 160 | mu1 = np.take(self.meanVects[channel], inPatchIdx) 161 | mu2 = np.delete(self.meanVects[channel], inPatchIdx) 162 | 163 | path_dotProdForMean = self.path_folder+'{}_cov{}_win{}_dotProdForMean_{}_{}'.format(self.netname, self.patchSize, self.win_size, inPatchIdx[0], inPatchIdx[-1]) 164 | 165 | # get the dot product for the mean (check if precomputed, otherwise do this first) 166 | if not os.path.exists(path_dotProdForMean+'.npy'): 167 | cov11 = self.covMats[channel][inPatchIdx][:,inPatchIdx] 168 | cov12 = np.delete(self.covMats[channel][inPatchIdx,:], inPatchIdx, axis=1) if np.ndim(inPatchIdx>1) else np.delete(self.covMats[channel][inPatchIdx,:], inPatchIdx) 169 | cov21 = np.delete(self.covMats[channel][:,inPatchIdx], inPatchIdx, axis=0) 170 | cov22 = np.delete(np.delete(self.covMats[channel], inPatchIdx, axis=0), inPatchIdx, axis=1) 171 | # compute the conditional mean and covariance 172 | dotProdForMean = np.dot(cov12,scipy.linalg.inv(cov22)) 173 | np.save(path_dotProdForMean, dotProdForMean) 174 | else: 175 | dotProdForMean = np.load(path_dotProdForMean+'.npy') 176 | 177 | # with the dotproduct, we can now evaluate the conditional mean 178 | cond_mean = mu1 + np.dot(dotProdForMean, x2-mu2) 179 | 180 | path_condCov = self.path_folder+'{}_cov{}_win{}_cond_cov_{}_{}_indep'.format(self.netname, self.patchSize, self.win_size, inPatchIdx[0], inPatchIdx[-1]) 181 | 182 | # get the conditional covariance 183 | if not os.path.exists(path_condCov+'.npy'): 184 | cov11 = self.covMats[channel][inPatchIdx][:,inPatchIdx] 185 | cov12 = np.delete(self.covMats[channel][inPatchIdx,:], inPatchIdx, axis=1) if np.ndim(inPatchIdx>1) else np.delete(self.covMat[inPatchIdx,:], inPatchIdx) 186 | cov21 = np.delete(self.covMats[channel][:,inPatchIdx], inPatchIdx, axis=0) 187 | cov22 = np.delete(np.delete(self.covMats[channel], inPatchIdx, axis=0), inPatchIdx, axis=1) 188 | cond_cov = cov11 - np.dot(np.dot(cov12,scipy.linalg.inv(cov22)),cov21) 189 | np.save(path_condCov, cond_cov) 190 | else: 191 | cond_cov = np.load(path_condCov+'.npy') 192 | 193 | return cond_mean, cond_cov 194 | 195 | 196 | def _get_surr_patch(self, x, sampleIndices): 197 | ''' 198 | Gets a patch around the sampleIndices 199 | Input: 200 | x the whole feature vector 201 | sampleIndices the (raveled) indices for which we want to 202 | get samples 203 | ''' 204 | height = self.image_dims[0] 205 | width = self.image_dims[1] 206 | # get the 2d values of the sample indices (since we sample from all color at once channels anyway) 207 | subset3d = np.unravel_index(sampleIndices.ravel(), [3, self.image_dims[0], self.image_dims[1]]) 208 | subset2d = [subset3d[1],subset3d[2]] 209 | # we will need this to find the index of the sample inside the surrounding patch 210 | inPatchIdx = np.tile(np.array([i for i in xrange(self.patchSize*self.patchSize)]),3).reshape(3,self.patchSize,self.patchSize) 211 | # indices of the subset relative to the whole feature map x 212 | upperIdx = subset2d[0][0] 213 | lowerIdx = subset2d[0][-1] 214 | leftIdx = subset2d[1][0] 215 | rightIdx = subset2d[1][-1] 216 | # indices of the subset relative to the surrounding patch 217 | upperIdxPatch = upperIdx 218 | lowerIdxPatch = self.patchSize-(height-lowerIdx) 219 | leftIdxPatch = leftIdx 220 | rightIdxPatch = self.patchSize-(width-rightIdx) 221 | # get a surrounding patch, depending on where the subset lies 222 | # and find the indices inside the patch where the subset is 223 | self.location = None 224 | if leftIdx(height-1-self.padding_size): # lower left 228 | surrPatch = x[:, -self.patchSize:, :self.patchSize] 229 | inPatchIdx = inPatchIdx[:, lowerIdxPatch-self.win_size+1:lowerIdxPatch+1, leftIdxPatch:leftIdxPatch+self.win_size] 230 | elif rightIdx>(width-1-self.padding_size) and lowerIdx>(height-1-self.padding_size): # lower right 231 | surrPatch = x[:, -self.patchSize:, -self.patchSize:] 232 | inPatchIdx = inPatchIdx[:, lowerIdxPatch-self.win_size+1:lowerIdxPatch+1, rightIdxPatch-self.win_size+1:rightIdxPatch+1] 233 | elif rightIdx>(width-1-self.padding_size) and upperIdx(width-1-self.padding_size): # right side 240 | surrPatch = x[:, upperIdx-self.padding_size:lowerIdx+self.padding_size+1, -self.patchSize:] 241 | inPatchIdx = inPatchIdx[:, self.padding_size:-self.padding_size, rightIdxPatch-self.win_size+1:rightIdxPatch+1] 242 | elif upperIdx(height-1-self.padding_size): # lower side 246 | surrPatch = x[:, -self.patchSize:, leftIdx-self.padding_size:rightIdx+self.padding_size+1] 247 | inPatchIdx = inPatchIdx[:, lowerIdxPatch-self.win_size+1:lowerIdxPatch+1, self.padding_size:-self.padding_size] 248 | else: # somewhere in the middle 249 | self.location = 'middle' 250 | surrPatch = x[:, upperIdx-self.padding_size:lowerIdx+self.padding_size+1, leftIdx-self.padding_size:rightIdx+self.padding_size+1] 251 | inPatchIdx = inPatchIdx[:, self.padding_size:-self.padding_size, self.padding_size:-self.padding_size] 252 | # return the patch and the indices of the subset relative to that 253 | return surrPatch, inPatchIdx 254 | 255 | 256 | def get_samples(self, sampleIndices, featVect_orig, numSamples=100): 257 | ''' 258 | Input featVect the complete feature vector 259 | sampleIndices the raveled(!) indices which we want to sample 260 | numSamples how many samples to draw 261 | 262 | ''' 263 | 264 | featVect = np.copy(featVect_orig) 265 | 266 | # to avoid mistakes, remove the feature values of the part that we want to sample 267 | featVect.ravel()[sampleIndices.ravel()] = 0 268 | 269 | # reshape inputs if necessary 270 | if np.ndim(sampleIndices)==1: 271 | sampleIndices = sampleIndices.reshape(3, self.win_size, self.win_size) 272 | if np.ndim(featVect)==1: 273 | featVect = featVect.reshape([3, self.image_dims[0], self.image_dims[1]]) 274 | 275 | # get a patch surrounding the sample indices and the indices relative to that 276 | patch, patchIndices = self._get_surr_patch(featVect, sampleIndices) 277 | 278 | # For each color channel, we will conditionally sample pixel 279 | # values from a multivariate distribution 280 | 281 | samples = np.zeros((numSamples, 3, self.win_size*self.win_size)) 282 | 283 | for c in [0,1,2]: 284 | 285 | patch_c = patch[c].ravel() 286 | patchIndices_c = patchIndices[c].ravel() 287 | 288 | # get the conditional mean and covariance 289 | if self.padding_size==0: 290 | cond_mean = self.meanVects[c] 291 | cond_cov = self.covMat[c] 292 | else: 293 | cond_mean, cond_cov = self._get_cond_params(patch_c, patchIndices_c, c) 294 | 295 | # sample from the conditional distribution 296 | # samples = np.random.multivariate_normal(cond_mean, cond_cov, numSamples) 297 | # -- FASTER: 298 | dimGauss = self.win_size*self.win_size 299 | # --- (1) find real matrix A such that AA^T=Sigma --- 300 | A = np.linalg.cholesky(cond_cov) 301 | # --- (2) get (numSamples) samples from a standard normal --- 302 | z = np.random.normal(size=numSamples*dimGauss).reshape(dimGauss,numSamples) 303 | # --- (3) x=mu+Az --- 304 | samples[:,c] = cond_mean[np.newaxis,:] + np.dot(A,z).T 305 | 306 | samples = samples.reshape((numSamples, -1)) 307 | 308 | # get the min/max values for this particular sample 309 | # (since the data is preprocessed these can be different for each pixel!)\ 310 | minVals_sample = self.minMaxVals[0].ravel()[sampleIndices.ravel()] 311 | maxVals_sample = self.minMaxVals[1].ravel()[sampleIndices.ravel()] 312 | # clip the values 313 | for i in xrange(samples.shape[0]): 314 | samples[i][samples[i]maxVals_sample] = maxVals_sample[samples[i]>maxVals_sample] 316 | 317 | return samples 318 | 319 | 320 | #%% 321 | 322 | 323 | def save_minmax_values(netname): 324 | ''' 325 | When X.npy is updated, this can be executed to also update the min/max 326 | values of the data (which is being used to cut off the values in the 327 | sampler so that we don't have overflowing values) 328 | ''' 329 | net = utlC.get_caffenet(netname) 330 | X, _, _ = utlD.get_imagenet_data(net) 331 | minMaxVals = np.zeros((2,3,X.shape[-1],X.shape[-1])) 332 | minMaxVals[0] = np.min(X,axis=0) 333 | minMaxVals[1] = np.max(X,axis=0) 334 | path_folder = './gaussians/' 335 | if not os.path.exists(path_folder): 336 | os.makedirs(path_folder) 337 | np.save(path_folder+'{}_minMaxVals'.format(netname), minMaxVals) 338 | 339 | 340 | 341 | -------------------------------------------------------------------------------- /utils_visualise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Some utility functions for visualisation, not documented properly 4 | """ 5 | 6 | from skimage import color 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import matplotlib.cm as cm 10 | import pylab 11 | 12 | 13 | def plot_results(x_test, x_test_im, sensMap, predDiff, tarFunc, classnames, testIdx, save_path): 14 | ''' 15 | Plot the results of the relevance estimation 16 | ''' 17 | imsize = x_test.shape 18 | 19 | tarIdx = np.argmax(tarFunc(x_test)[-1]) 20 | tarClass = classnames[tarIdx] 21 | #tarIdx = 287 22 | 23 | plt.figure() 24 | plt.subplot(2,2,1) 25 | plt.imshow(x_test_im, interpolation='nearest') 26 | plt.title('original') 27 | frame = pylab.gca() 28 | frame.axes.get_xaxis().set_ticks([]) 29 | frame.axes.get_yaxis().set_ticks([]) 30 | plt.subplot(2,2,2) 31 | plt.imshow(sensMap, cmap=cm.Greys_r, interpolation='nearest') 32 | plt.title('sensitivity map') 33 | frame = pylab.gca() 34 | frame.axes.get_xaxis().set_ticks([]) 35 | frame.axes.get_yaxis().set_ticks([]) 36 | plt.subplot(2,2,3) 37 | p = predDiff.reshape((imsize[1],imsize[2],-1))[:,:,tarIdx] 38 | plt.imshow(p, cmap=cm.seismic, vmin=-np.max(np.abs(p)), vmax=np.max(np.abs(p)), interpolation='nearest') 39 | plt.colorbar() 40 | #plt.imshow(np.abs(p), cmap=cm.Greys_r) 41 | plt.title('weight of evidence') 42 | frame = pylab.gca() 43 | frame.axes.get_xaxis().set_ticks([]) 44 | frame.axes.get_yaxis().set_ticks([]) 45 | plt.subplot(2,2,4) 46 | plt.title('class: {}'.format(tarClass)) 47 | p = get_overlayed_image(x_test_im, p) 48 | #p = predDiff[0,:,:,np.argmax(netPred(net, x_test)[0]),1].reshape((224,224)) 49 | plt.imshow(p, cmap=cm.seismic, vmin=-np.max(np.abs(p)), vmax=np.max(np.abs(p)), interpolation='nearest') 50 | #plt.title('class entropy') 51 | frame = pylab.gca() 52 | frame.axes.get_xaxis().set_ticks([]) 53 | frame.axes.get_yaxis().set_ticks([]) 54 | 55 | fig = plt.gcf() 56 | fig.set_size_inches(np.array([12,12]), forward=True) 57 | plt.tight_layout() 58 | plt.tight_layout() 59 | plt.tight_layout() 60 | plt.savefig(save_path) 61 | plt.close() 62 | 63 | 64 | def get_overlayed_image(x, c, gray_factor_bg = 0.3): 65 | ''' 66 | For an image x and a relevance vector c, overlay the image with the 67 | relevance vector to visualise the influence of the image pixels. 68 | ''' 69 | imDim = x.shape[0] 70 | 71 | if np.ndim(c)==1: 72 | c = c.reshape((imDim,imDim)) 73 | if np.ndim(x)==2: # this happens with the MNIST Data 74 | x = 1-np.dstack((x, x, x))*gray_factor_bg # make it a bit grayish 75 | if np.ndim(x)==3: # this is what happens with cifar data 76 | x = color.rgb2gray(x) 77 | x = 1-(1-x)*0.5 78 | x = np.dstack((x,x,x)) 79 | 80 | alpha = 0.8 81 | 82 | # Construct a colour image to superimpose 83 | im = plt.imshow(c, cmap = cm.seismic, vmin=-np.max(np.abs(c)), vmax=np.max(np.abs(c)), interpolation='nearest') 84 | color_mask = im.to_rgba(c)[:,:,[0,1,2]] 85 | 86 | # Convert the input image and color mask to Hue Saturation Value (HSV) colorspace 87 | img_hsv = color.rgb2hsv(x) 88 | color_mask_hsv = color.rgb2hsv(color_mask) 89 | 90 | # Replace the hue and saturation of the original image 91 | # with that of the color mask 92 | img_hsv[..., 0] = color_mask_hsv[..., 0] 93 | img_hsv[..., 1] = color_mask_hsv[..., 1] * alpha 94 | 95 | img_masked = color.hsv2rgb(img_hsv) 96 | 97 | return img_masked 98 | 99 | 100 | 101 | --------------------------------------------------------------------------------