├── .gitignore ├── .gitmodules ├── FaceVis ├── DeepFace_set003_inception.prototxt ├── Inceptionism_face.m ├── LightenedCNN_B_deploy.prototxt ├── README.md ├── gallery │ ├── Anne Hathaway.png │ ├── Bingbing Fan.png │ ├── Bingbing Li.png │ ├── Bruce Lee.png │ ├── Mr Bean.png │ └── Yun-Fat Chow.png └── webface_mean.proto ├── FilterVis ├── GoogLeNet_Visualization_4c.m ├── GoogLeNet_Visualization_conv2.m ├── README.md ├── ShowMidFilter.m ├── demo.m ├── gallery │ ├── alexnet.png │ ├── googlenet-conv2.png │ ├── inception.png │ └── vgg.png ├── getReceptiveField.m ├── googlenet.prototxt ├── googlenet_receptive.prototxt └── googlenet_visualize.prototxt ├── Inceptionism ├── Inceptionism_big.m ├── README.md ├── big_compare.png ├── deploy.prototxt └── smoothL1.m ├── LICENSE ├── NNComplexity ├── CNNComplexity.m ├── CNNComplexitySave.m ├── Inception.prototxt ├── Lab2RGB.m ├── README.md ├── RGB2Lab.m ├── img │ ├── 10conv_relu.png │ ├── 10conv_sigmoid.png │ ├── 1conv_relu.png │ ├── 1conv_sigmoid.png │ ├── hori.png │ └── vert.png ├── maxout.prototxt ├── net_define.prototxt ├── output.prototxt ├── pooling.prototxt └── run_save.m ├── NeuralArt ├── MakeStylePrototxt.m ├── NeuralArtCost.m ├── NeuralArtDemo.m ├── NeuralArtFunc.m ├── NeuralArtLbfgs.m ├── README.md ├── VGG_16_nueralart.prototxt ├── VGG_16_nueralart_content.prototxt ├── VGG_16_nueralart_style.prototxt ├── VGG_16_nueralart_style_gen.prototxt ├── gallery │ └── star-tubingen.png ├── gaussian_net.prototxt ├── gaussian_net_template.prototxt ├── getColorPrior.m ├── googlenet_neuralart.prototxt └── smoothL1.m ├── PrototxtGen ├── 1x1conv.prototxt ├── 3x3conv.prototxt ├── Inception.prototxt ├── PrototxtGen.m ├── README.md ├── activation.prototxt ├── euclideanloss.prototxt ├── gram.prototxt ├── maxout.prototxt ├── net_define.prototxt ├── output.prototxt ├── pooling.prototxt ├── run_save.m └── smoothL1Loss.prototxt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | #google log 2 | *.INFO 3 | *.WARNING 4 | *.ERROR 5 | *.FATAL 6 | 7 | log 8 | 9 | *.asv 10 | 11 | material -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "NeuralArt/minFunc"] 2 | path = NeuralArt/minFunc 3 | url = https://github.com/ganguli-lab/minFunc 4 | -------------------------------------------------------------------------------- /FaceVis/DeepFace_set003_inception.prototxt: -------------------------------------------------------------------------------- 1 | name: "DeepFace_set003_net" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 1 5 | input_dim: 128 6 | input_dim: 128 7 | force_backward: true 8 | layers{ 9 | name: "conv1" 10 | type: CONVOLUTION 11 | 12 | convolution_param { 13 | num_output: 96 14 | kernel_size: 9 15 | stride: 1 16 | weight_filler { 17 | type: "xavier" 18 | } 19 | bias_filler { 20 | type: "constant" 21 | value: 0.1 22 | } 23 | } 24 | bottom: "data" 25 | top: "conv1" 26 | } 27 | 28 | layers{ 29 | name: "pool1" 30 | type: POOLING 31 | pooling_param { 32 | pool: MAX 33 | kernel_size: 2 34 | stride: 2 35 | } 36 | bottom: "conv1" 37 | top: "pool1" 38 | } 39 | 40 | layers{ 41 | name: "slice1" 42 | type:SLICE 43 | slice_param { 44 | slice_dim: 1 45 | } 46 | bottom: "pool1" 47 | top: "slice1_1" 48 | top: "slice1_2" 49 | } 50 | 51 | layers{ 52 | name: "etlwise1" 53 | type: ELTWISE 54 | bottom: "slice1_1" 55 | bottom: "slice1_2" 56 | top: "eltwise1" 57 | eltwise_param { 58 | operation: MAX 59 | } 60 | } 61 | 62 | layers{ 63 | name: "conv2" 64 | type: CONVOLUTION 65 | 66 | convolution_param { 67 | num_output: 192 68 | kernel_size: 5 69 | stride: 1 70 | weight_filler { 71 | type: "xavier" 72 | } 73 | bias_filler { 74 | type: "constant" 75 | value: 0.1 76 | } 77 | } 78 | bottom: "eltwise1" 79 | top: "conv2" 80 | } 81 | layers{ 82 | name: "pool2" 83 | type: POOLING 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 2 87 | stride: 2 88 | } 89 | bottom: "conv2" 90 | top: "pool2" 91 | } 92 | 93 | 94 | layers{ 95 | name: "slice2" 96 | type:SLICE 97 | slice_param { 98 | slice_dim: 1 99 | } 100 | bottom: "pool2" 101 | top: "slice2_1" 102 | top: "slice2_2" 103 | } 104 | 105 | layers{ 106 | name: "etlwise2" 107 | type: ELTWISE 108 | bottom: "slice2_1" 109 | bottom: "slice2_2" 110 | top: "eltwise2" 111 | 112 | eltwise_param { 113 | operation: MAX 114 | } 115 | } 116 | 117 | layers{ 118 | name: "conv3" 119 | type: CONVOLUTION 120 | 121 | convolution_param { 122 | num_output: 256 123 | kernel_size: 5 124 | stride: 1 125 | weight_filler { 126 | type: "xavier" 127 | } 128 | bias_filler { 129 | type: "constant" 130 | value: 0.1 131 | } 132 | } 133 | bottom: "eltwise2" 134 | top: "conv3" 135 | } 136 | 137 | layers{ 138 | name: "pool3" 139 | type: POOLING 140 | pooling_param { 141 | pool: MAX 142 | kernel_size: 2 143 | stride: 2 144 | } 145 | bottom: "conv3" 146 | top: "pool3" 147 | } 148 | 149 | layers{ 150 | name: "slice3" 151 | type:SLICE 152 | slice_param { 153 | slice_dim: 1 154 | } 155 | bottom: "pool3" 156 | top: "slice3_1" 157 | top: "slice3_2" 158 | } 159 | 160 | layers{ 161 | name: "etlwise3" 162 | type: ELTWISE 163 | bottom: "slice3_1" 164 | bottom: "slice3_2" 165 | top: "eltwise3" 166 | eltwise_param { 167 | operation: MAX 168 | } 169 | } 170 | layers{ 171 | name: "conv4" 172 | type: CONVOLUTION 173 | 174 | convolution_param { 175 | num_output: 384 176 | kernel_size: 4 177 | stride: 1 178 | weight_filler { 179 | type: "xavier" 180 | } 181 | bias_filler { 182 | type: "constant" 183 | value: 0.1 184 | } 185 | } 186 | bottom: "eltwise3" 187 | top: "conv4" 188 | } 189 | 190 | layers{ 191 | name: "pool4" 192 | type: POOLING 193 | pooling_param { 194 | pool: MAX 195 | kernel_size: 2 196 | stride: 2 197 | } 198 | bottom: "conv4" 199 | top: "pool4" 200 | } 201 | 202 | layers{ 203 | name: "slice4" 204 | type:SLICE 205 | slice_param { 206 | slice_dim: 1 207 | } 208 | bottom: "pool4" 209 | top: "slice4_1" 210 | top: "slice4_2" 211 | } 212 | 213 | layers{ 214 | name: "etlwise4" 215 | type: ELTWISE 216 | bottom: "slice4_1" 217 | bottom: "slice4_2" 218 | top: "eltwise4" 219 | eltwise_param { 220 | operation: MAX 221 | } 222 | } 223 | 224 | layers{ 225 | name: "fc1" 226 | type: INNER_PRODUCT 227 | inner_product_param{ 228 | num_output: 512 229 | weight_filler { 230 | type: "xavier" 231 | } 232 | bias_filler { 233 | type: "constant" 234 | value: 0.1 235 | } 236 | } 237 | bottom: "eltwise4" 238 | top: "fc1" 239 | } 240 | 241 | layers{ 242 | name: "slice6" 243 | type:SLICE 244 | slice_param { 245 | slice_dim: 1 246 | } 247 | bottom: "fc1" 248 | top: "slice6_1" 249 | top: "slice6_2" 250 | } 251 | 252 | layers{ 253 | name: "eltwise6" 254 | type: ELTWISE 255 | bottom: "slice6_1" 256 | bottom: "slice6_2" 257 | top: "eltwise6" 258 | eltwise_param { 259 | operation: MAX 260 | } 261 | } 262 | layers{ 263 | name: "fc2" 264 | type: INNER_PRODUCT 265 | weight_decay: 10 266 | weight_decay: 10 267 | inner_product_param{ 268 | num_output: 10575 269 | weight_filler { 270 | type: "xavier" 271 | } 272 | bias_filler { 273 | type: "constant" 274 | value: 0.1 275 | } 276 | } 277 | bottom: "eltwise6" 278 | top: "fc2" 279 | } -------------------------------------------------------------------------------- /FaceVis/Inceptionism_face.m: -------------------------------------------------------------------------------- 1 | % Visualize faces in a face recognition model. 2 | % The model can be downloaded from 3 | % https://github.com/AlfredXiangWu/face_verification_experiment . 4 | % This model is trained with CASIA-webface, People_ID can be set from 1 5 | % to 10575, to get the memorized face of the corresponding person. 6 | 7 | % Based on paper: 8 | % Feng Wang, Haijun Liu, Jian Cheng, 9 | % Visualizing Deep Neural Network by Alternately Image Blurring and Deblurring 10 | caffe.reset_all(); 11 | caffe.set_mode_gpu(); 12 | gpu_id = 0; % we will use the first gpu in this demo 13 | caffe.set_device(gpu_id); 14 | 15 | % net_model = 'DeepFace_set003_inception.prototxt';% Please, remember to add force_backward:true to this file. 16 | % net_weights = 'D:\project\lfw_face_verification_experiment\model\DeepFace_set003_net_iter.caffemodel'; 17 | net_model = 'LightenedCNN_B_deploy.prototxt';% Please, remember to add force_backward:true to this file. 18 | net_weights = 'D:\project\lfw_face_verification_experiment\model\LightenedCNN_B.caffemodel'; 19 | 20 | mean_file = []; 21 | 22 | train_net = caffe.Net(net_model,net_weights,'test'); 23 | if isempty(mean_file) 24 | mean_image = zeros(128,128); 25 | else 26 | mean_image = caffe.read_mean(mean_file); 27 | end; 28 | for People_ID = 1:10575 29 | 30 | % mean_image = mean_image(17:240,17:240,:); 31 | % mean_image = mean_image + randn(size(mean_image)); 32 | input_data = zeros(size(mean_image,1), size(mean_image,2), 1, 1, 'single'); 33 | 34 | mean_file = 'webface_mean.proto'; 35 | mean_face = caffe.read_mean(mean_file); 36 | mean_face = mean_face / 256; 37 | input_data(:,:,1,1) = imresize(mean_face,[128,128]); 38 | 39 | use_clip = false; 40 | use_cv_norm = false; 41 | use_weight_decay = false; 42 | use_image_blur = false; 43 | use_image_deblur = false; 44 | use_gradient_blur = false; 45 | use_dropout = false; 46 | use_maxGray = false; 47 | 48 | H = fspecial('gaussian',[7 7],1.2); 49 | prob = train_net.forward({input_data}); 50 | % input_data = input_data - min(input_data(:)); 51 | % input_data = input_data / max(input_data(:)); 52 | [max_prob,max_idx] = sort(prob{1},'descend'); 53 | max_idx = People_ID;%max_idx(1); 54 | this_prob = prob{1}(max_idx); 55 | back_data = ones(size(prob{1}),'single') * -1; 56 | back_data(max_idx) = 1; 57 | back_cell = prob; 58 | back_cell{1} = back_data; 59 | blur_data = zeros(size(input_data)); 60 | base_lr = 0.01; 61 | max_lr = 0.01; 62 | lambda1 = 0.00001; 63 | lambda2 = 0.1; 64 | last_prob = -999; 65 | momentum = 0.8; 66 | momentum2 = 0.99; 67 | lastgrad = zeros(size(mean_image)); 68 | lastgrad2 = zeros(size(mean_image)); 69 | mask = ones(size(mean_image,1), size(mean_image,2)); 70 | iter = 1; 71 | dropout = 0.5; 72 | 73 | while 1 74 | lr = base_lr;% * sqrt(this_prob / (1 - this_prob)); 75 | res = train_net.backward(back_cell); 76 | 77 | bak_data = input_data; 78 | 79 | if use_gradient_blur 80 | res{1} = imfilter(res{1},H,'same'); 81 | end; 82 | grad = res{1}; 83 | 84 | if use_clip 85 | app_gradient = sum(abs(res{1} .* input_data(:,:,:,1)),3); 86 | app_gradient = app_gradient < mean(app_gradient(:)) * 0.5; 87 | clip_grad = reshape(res{1},[size(mean_image,1)*size(mean_image,2) 3]); 88 | clip_grad(app_gradient==1,:) = 0; 89 | clip_grad = reshape(clip_grad,size(input_data)); 90 | res{1} = clip_grad; 91 | end; 92 | 93 | if use_cv_norm 94 | I = input_data(:,:,:,1); 95 | % Gx = sign(I(2:end-1,2:end-1,:) - I(1:end-2,2:end-1,:)) - sign(I(3:end,2:end-1,:) - I(2:end-1,2:end-1,:)); 96 | % Gy = sign(I(2:end-1,2:end-1,:) - I(2:end-1,1:end-2,:)) - sign(I(2:end-1,3:end,:) - I(2:end-1,2:end-1,:)); 97 | Gx = smoothL1(I(2:end-1,:,:) - I(1:end-2,:,:)) - smoothL1(I(3:end,:,:) - I(2:end-1,:,:)); 98 | Gx = [smoothL1(I(1,:,:) - I(2,:,:)); Gx; smoothL1(I(end,:,:) - I(end-1,:,:))]; 99 | Gy = smoothL1(I(:,2:end-1,:) - I(:,1:end-2,:)) - smoothL1(I(:,3:end,:) - I(:,2:end-1,:)); 100 | Gy = [smoothL1(I(:,1,:) - I(:,2,:)) Gy smoothL1(I(:,end,:) - I(:,end-1,:))]; 101 | % Gx = sign(I(2:end-1,:,:) - I(1:end-2,:,:)) - sign(I(3:end,:,:) - I(2:end-1,:,:)); 102 | % Gx = [sign(I(1,:,:) - I(2,:,:)); Gx; sign(I(end,:,:) - I(end-1,:,:))]; 103 | % Gy = sign(I(:,2:end-1,:) - I(:,1:end-2,:)) - sign(I(:,3:end,:) - I(:,2:end-1,:)); 104 | % Gy = [sign(I(:,1,:) - I(:,2,:)) Gy sign(I(:,end,:) - I(:,end-1,:))]; 105 | grad = grad - lambda2 * (Gx + Gy); 106 | end; 107 | 108 | lastgrad = (1 - momentum) * lr * res{1} + momentum * lastgrad;%/ norm(res{1}(:)) 109 | input_data(:,:,:,1) = input_data(:,:,:,1) + lastgrad; 110 | % lastgrad = (1 - momentum) * grad + momentum * lastgrad;%/ norm(res(:)) 111 | % lastgrad2 = (1 - momentum2) * grad.^2 + momentum2 * lastgrad2;%/ norm(res(:)) 112 | % lg_correct = lastgrad ./ (1 - momentum^iter); 113 | % lg2_correct = lastgrad2 ./ (1 - momentum2^iter); 114 | % input_data(:,:,:,1) = input_data(:,:,:,1) + lr * lg_correct ./ (sqrt(lg2_correct) + 1e-8); 115 | 116 | 117 | 118 | if use_weight_decay 119 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * lambda1 * I; 120 | end; 121 | if use_maxGray 122 | % if max(input_data(:))>1 123 | input_data(:,:,:,1) = input_data(:,:,:,1) - min(input_data(:)); 124 | input_data(:,:,:,1) = input_data(:,:,:,1) / max(input_data(:)); 125 | % end; 126 | end; 127 | % input_data = (input_data -mean(input_data(:))) / std(input_data(:)) * 30; 128 | % end; 129 | 130 | % for_forward = reshape(input_data,[size(mean_image,1)*size(mean_image,2) 3]); 131 | % mask = rand(size(mean_image,1), size(mean_image,2)) < dropout; 132 | % for_forward(mask==1,:) = 0; 133 | % for_forward = reshape(for_forward,size(input_data)); 134 | 135 | if mod(iter,10) ==0%&&iter<2000 136 | if mod(iter,20) ~= 0 137 | H = fspecial('gaussian',[5 5],rand()/2+0.5); 138 | if use_image_blur 139 | input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 140 | end 141 | else 142 | if mod(iter,20) == 0 143 | if use_image_deblur 144 | input_data(:,:,:,1) = deconvlucy(input_data(:,:,:,1), H); 145 | end; 146 | end; 147 | end; 148 | end; 149 | prob = train_net.forward({input_data}); 150 | 151 | this_prob = prob{1}(max_idx); 152 | fprintf('id=%d,iter=%d,lr=%f,prob1=%f,last_prob=%f\n',People_ID,iter,lr,prob{1}(max_idx),last_prob); 153 | iter = iter + 1; 154 | 155 | if mod(iter,100)==0 156 | figure(2); 157 | % imshow(uint8(mean_image + input_data)); 158 | output = input_data(:,:,:,1); 159 | output = output'; 160 | imshow(output); 161 | I = output; 162 | % Gx = abs(I(2:end-1,2:end-1,:) - I(1:end-2,2:end-1,:)) + abs(I(3:end,2:end-1,:) - I(2:end-1,2:end-1,:)); 163 | % Gy = abs(I(2:end-1,2:end-1,:) - I(2:end-1,1:end-2,:)) + abs(I(2:end-1,3:end,:) - I(2:end-1,2:end-1,:)); 164 | % figure(3);hist(I(:),1000); 165 | % figure(4);hist(Gy(:),1000); 166 | if iter == 200 167 | break; 168 | end; 169 | end; 170 | if this_problast_prob&&base_lrlast_prob 179 | last_prob = this_prob; 180 | % end; 181 | if lr<0.000001 182 | break; 183 | end; 184 | end; 185 | output = input_data(:,:,:,1); 186 | output = output'; 187 | imwrite(output, ['gallery/' num2str(People_ID) '.png']); 188 | end; -------------------------------------------------------------------------------- /FaceVis/LightenedCNN_B_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "DeepFace_set003_net" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 1 5 | input_dim: 128 6 | input_dim: 128 7 | force_backward: true 8 | layers{ 9 | name: "conv1" 10 | type: CONVOLUTION 11 | blobs_lr: 1 12 | blobs_lr: 2 13 | weight_decay: 1 14 | weight_decay: 0 15 | convolution_param { 16 | num_output: 96 17 | kernel_size: 5 18 | stride: 1 19 | pad: 2 20 | weight_filler { 21 | type: "xavier" 22 | } 23 | bias_filler { 24 | type: "constant" 25 | value: 0.1 26 | } 27 | } 28 | bottom: "data" 29 | top: "conv1" 30 | } 31 | layers{ 32 | name: "slice1" 33 | type:SLICE 34 | slice_param { 35 | slice_dim: 1 36 | } 37 | bottom: "conv1" 38 | top: "slice1_1" 39 | top: "slice1_2" 40 | } 41 | layers{ 42 | name: "etlwise1" 43 | type: ELTWISE 44 | bottom: "slice1_1" 45 | bottom: "slice1_2" 46 | top: "eltwise1" 47 | eltwise_param { 48 | operation: MAX 49 | } 50 | } 51 | layers{ 52 | name: "pool1" 53 | type: POOLING 54 | pooling_param { 55 | pool: MAX 56 | kernel_size: 2 57 | stride: 2 58 | } 59 | bottom: "eltwise1" 60 | top: "pool1" 61 | } 62 | 63 | layers{ 64 | name: "conv2a" 65 | type: CONVOLUTION 66 | blobs_lr: 1 67 | blobs_lr: 2 68 | weight_decay: 1 69 | weight_decay: 0 70 | convolution_param { 71 | num_output: 96 72 | kernel_size: 1 73 | stride: 1 74 | weight_filler { 75 | type: "xavier" 76 | } 77 | bias_filler { 78 | type: "constant" 79 | value: 0.1 80 | } 81 | } 82 | bottom: "pool1" 83 | top: "conv2a" 84 | } 85 | layers{ 86 | name: "slice2a" 87 | type:SLICE 88 | slice_param { 89 | slice_dim: 1 90 | } 91 | bottom: "conv2a" 92 | top: "slice2a_1" 93 | top: "slice2a_2" 94 | } 95 | layers{ 96 | name: "etlwise2a" 97 | type: ELTWISE 98 | bottom: "slice2a_1" 99 | bottom: "slice2a_2" 100 | top: "eltwise2a" 101 | eltwise_param { 102 | operation: MAX 103 | } 104 | } 105 | 106 | layers{ 107 | name: "conv2" 108 | type: CONVOLUTION 109 | blobs_lr: 1 110 | blobs_lr: 2 111 | weight_decay: 1 112 | weight_decay: 0 113 | convolution_param { 114 | num_output: 192 115 | kernel_size: 3 116 | stride: 1 117 | pad: 1 118 | weight_filler { 119 | type: "xavier" 120 | } 121 | bias_filler { 122 | type: "constant" 123 | value: 0.1 124 | } 125 | } 126 | bottom: "eltwise2a" 127 | top: "conv2" 128 | } 129 | layers{ 130 | name: "slice2" 131 | type:SLICE 132 | slice_param { 133 | slice_dim: 1 134 | } 135 | bottom: "conv2" 136 | top: "slice2_1" 137 | top: "slice2_2" 138 | } 139 | layers{ 140 | name: "etlwise2" 141 | type: ELTWISE 142 | bottom: "slice2_1" 143 | bottom: "slice2_2" 144 | top: "eltwise2" 145 | eltwise_param { 146 | operation: MAX 147 | } 148 | } 149 | layers{ 150 | name: "pool2" 151 | type: POOLING 152 | pooling_param { 153 | pool: MAX 154 | kernel_size: 2 155 | stride: 2 156 | } 157 | bottom: "eltwise2" 158 | top: "pool2" 159 | } 160 | 161 | layers{ 162 | name: "conv3a" 163 | type: CONVOLUTION 164 | blobs_lr: 1 165 | blobs_lr: 2 166 | weight_decay: 1 167 | weight_decay: 0 168 | convolution_param { 169 | num_output: 192 170 | kernel_size: 1 171 | stride: 1 172 | weight_filler { 173 | type: "xavier" 174 | } 175 | bias_filler { 176 | type: "constant" 177 | value: 0.1 178 | } 179 | } 180 | bottom: "pool2" 181 | top: "conv3a" 182 | } 183 | layers{ 184 | name: "slice3a" 185 | type:SLICE 186 | slice_param { 187 | slice_dim: 1 188 | } 189 | bottom: "conv3a" 190 | top: "slice3a_1" 191 | top: "slice3a_2" 192 | } 193 | layers{ 194 | name: "etlwise3a" 195 | type: ELTWISE 196 | bottom: "slice3a_1" 197 | bottom: "slice3a_2" 198 | top: "eltwise3a" 199 | eltwise_param { 200 | operation: MAX 201 | } 202 | } 203 | 204 | layers{ 205 | name: "conv3" 206 | type: CONVOLUTION 207 | blobs_lr: 1 208 | blobs_lr: 2 209 | weight_decay: 1 210 | weight_decay: 0 211 | convolution_param { 212 | num_output: 384 213 | kernel_size: 3 214 | stride: 1 215 | pad: 1 216 | weight_filler { 217 | type: "xavier" 218 | } 219 | bias_filler { 220 | type: "constant" 221 | value: 0.1 222 | } 223 | } 224 | bottom: "eltwise3a" 225 | top: "conv3" 226 | } 227 | layers{ 228 | name: "slice3" 229 | type:SLICE 230 | slice_param { 231 | slice_dim: 1 232 | } 233 | bottom: "conv3" 234 | top: "slice3_1" 235 | top: "slice3_2" 236 | } 237 | layers{ 238 | name: "etlwise3" 239 | type: ELTWISE 240 | bottom: "slice3_1" 241 | bottom: "slice3_2" 242 | top: "eltwise3" 243 | eltwise_param { 244 | operation: MAX 245 | } 246 | } 247 | layers{ 248 | name: "pool3" 249 | type: POOLING 250 | pooling_param { 251 | pool: MAX 252 | kernel_size: 2 253 | stride: 2 254 | } 255 | bottom: "eltwise3" 256 | top: "pool3" 257 | } 258 | 259 | layers{ 260 | name: "conv4a" 261 | type: CONVOLUTION 262 | blobs_lr: 1 263 | blobs_lr: 2 264 | weight_decay: 1 265 | weight_decay: 0 266 | convolution_param{ 267 | num_output: 384 268 | kernel_size: 1 269 | stride: 1 270 | weight_filler{ 271 | type:"xavier" 272 | } 273 | bias_filler{ 274 | type: "constant" 275 | value: 0.1 276 | } 277 | } 278 | bottom: "pool3" 279 | top: "conv4a" 280 | } 281 | layers{ 282 | name: "slice4a" 283 | type:SLICE 284 | slice_param { 285 | slice_dim: 1 286 | } 287 | bottom: "conv4a" 288 | top: "slice4a_1" 289 | top: "slice4a_2" 290 | } 291 | layers{ 292 | name: "etlwise4a" 293 | type: ELTWISE 294 | bottom: "slice4a_1" 295 | bottom: "slice4a_2" 296 | top: "eltwise4a" 297 | eltwise_param { 298 | operation: MAX 299 | } 300 | } 301 | layers{ 302 | name: "conv4" 303 | type: CONVOLUTION 304 | blobs_lr: 1 305 | blobs_lr: 2 306 | weight_decay: 1 307 | weight_decay: 0 308 | convolution_param{ 309 | num_output: 256 310 | kernel_size: 3 311 | stride: 1 312 | pad: 1 313 | weight_filler{ 314 | type:"xavier" 315 | } 316 | bias_filler{ 317 | type: "constant" 318 | value: 0.1 319 | } 320 | } 321 | bottom: "eltwise4a" 322 | top: "conv4" 323 | } 324 | layers{ 325 | name: "slice4" 326 | type:SLICE 327 | slice_param { 328 | slice_dim: 1 329 | } 330 | bottom: "conv4" 331 | top: "slice4_1" 332 | top: "slice4_2" 333 | } 334 | layers{ 335 | name: "etlwise4" 336 | type: ELTWISE 337 | bottom: "slice4_1" 338 | bottom: "slice4_2" 339 | top: "eltwise4" 340 | eltwise_param { 341 | operation: MAX 342 | } 343 | } 344 | 345 | layers{ 346 | name: "conv5a" 347 | type: CONVOLUTION 348 | blobs_lr: 1 349 | blobs_lr: 2 350 | weight_decay: 1 351 | weight_decay: 0 352 | convolution_param{ 353 | num_output: 256 354 | kernel_size: 1 355 | stride: 1 356 | weight_filler{ 357 | type:"xavier" 358 | } 359 | bias_filler{ 360 | type: "constant" 361 | value: 0.1 362 | } 363 | } 364 | bottom: "eltwise4" 365 | top: "conv5a" 366 | } 367 | layers{ 368 | name: "slice5a" 369 | type:SLICE 370 | slice_param { 371 | slice_dim: 1 372 | } 373 | bottom: "conv5a" 374 | top: "slice5a_1" 375 | top: "slice5a_2" 376 | } 377 | layers{ 378 | name: "etlwise5a" 379 | type: ELTWISE 380 | bottom: "slice5a_1" 381 | bottom: "slice5a_2" 382 | top: "eltwise5a" 383 | eltwise_param { 384 | operation: MAX 385 | } 386 | } 387 | layers{ 388 | name: "conv5" 389 | type: CONVOLUTION 390 | blobs_lr: 1 391 | blobs_lr: 2 392 | weight_decay: 1 393 | weight_decay: 0 394 | convolution_param{ 395 | num_output: 256 396 | kernel_size: 3 397 | stride: 1 398 | pad: 1 399 | weight_filler{ 400 | type:"xavier" 401 | } 402 | bias_filler{ 403 | type: "constant" 404 | value: 0.1 405 | } 406 | } 407 | bottom: "eltwise5a" 408 | top: "conv5" 409 | } 410 | layers{ 411 | name: "slice5" 412 | type:SLICE 413 | slice_param { 414 | slice_dim: 1 415 | } 416 | bottom: "conv5" 417 | top: "slice5_1" 418 | top: "slice5_2" 419 | } 420 | layers{ 421 | name: "etlwise5" 422 | type: ELTWISE 423 | bottom: "slice5_1" 424 | bottom: "slice5_2" 425 | top: "eltwise5" 426 | eltwise_param { 427 | operation: MAX 428 | } 429 | } 430 | 431 | layers{ 432 | name: "pool4" 433 | type: POOLING 434 | pooling_param { 435 | pool: MAX 436 | kernel_size: 2 437 | stride: 2 438 | } 439 | bottom: "eltwise5" 440 | top: "pool4" 441 | } 442 | 443 | layers{ 444 | name: "fc1" 445 | type: INNER_PRODUCT 446 | blobs_lr: 1 447 | blobs_lr: 2 448 | weight_decay: 1 449 | weight_decay: 0 450 | inner_product_param { 451 | num_output: 512 452 | weight_filler { 453 | type: "xavier" 454 | } 455 | bias_filler { 456 | type: "constant" 457 | value: 0.1 458 | } 459 | } 460 | bottom: "pool4" 461 | top: "fc1" 462 | } 463 | layers{ 464 | name: "slice_fc1" 465 | type:SLICE 466 | slice_param { 467 | slice_dim: 1 468 | } 469 | bottom: "fc1" 470 | top: "slice_fc1_1" 471 | top: "slice_fc1_2" 472 | } 473 | layers{ 474 | name: "etlwise_fc1" 475 | type: ELTWISE 476 | bottom: "slice_fc1_1" 477 | bottom: "slice_fc1_2" 478 | top: "eltwise_fc1" 479 | eltwise_param { 480 | operation: MAX 481 | } 482 | } 483 | 484 | layers{ 485 | name: "drop1" 486 | type: DROPOUT 487 | dropout_param{ 488 | dropout_ratio: 0.75 489 | } 490 | bottom: "eltwise_fc1" 491 | top: "eltwise_fc1" 492 | } 493 | layers{ 494 | name: "fc2" 495 | type: INNER_PRODUCT 496 | 497 | inner_product_param{ 498 | num_output: 10575 499 | weight_filler { 500 | type: "xavier" 501 | } 502 | bias_filler { 503 | type: "constant" 504 | value: 0.1 505 | } 506 | } 507 | bottom: "eltwise_fc1" 508 | top: "fc2" 509 | } -------------------------------------------------------------------------------- /FaceVis/README.md: -------------------------------------------------------------------------------- 1 | Visualize faces in a face recognition model. 2 | 3 | The model can be downloaded from 4 | https://github.com/AlfredXiangWu/face_verification_experiment . 5 | This model is trained with CASIA-webface, People_ID can be set from 1 6 | to 10575, to get the memorized face of the corresponding person. 7 | 8 | Here are some generated faces memorized by the recoginition model mentioned above : 9 | 10 | Bruce Lee: 11 | ![Bruce Lee](gallery/Bruce Lee.png) 12 | Mr Bean: 13 | ![Mr Bean](gallery/Mr Bean.png) 14 | 15 | Yun-Fat Chow: 16 | ![Yun-Fat Chow](gallery/Yun-Fat Chow.png) 17 | Anne Hathaway 18 | ![Anne Hathaway](gallery/Anne Hathaway.png) 19 | 20 | Bingbing Li: 21 | ![Bingbing Li](gallery/Bingbing Li.png) 22 | Bingbing Fan: 23 | ![Bingbing Fan](gallery/Bingbing Fan.png) 24 | 25 | 26 | Yes, they are not as beautiful as they in the real world. If you feel uncomfortable with these images, 27 | please contact me, I will remove them. 28 | -------------------------------------------------------------------------------- /FaceVis/gallery/Anne Hathaway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Anne Hathaway.png -------------------------------------------------------------------------------- /FaceVis/gallery/Bingbing Fan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Bingbing Fan.png -------------------------------------------------------------------------------- /FaceVis/gallery/Bingbing Li.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Bingbing Li.png -------------------------------------------------------------------------------- /FaceVis/gallery/Bruce Lee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Bruce Lee.png -------------------------------------------------------------------------------- /FaceVis/gallery/Mr Bean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Mr Bean.png -------------------------------------------------------------------------------- /FaceVis/gallery/Yun-Fat Chow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/gallery/Yun-Fat Chow.png -------------------------------------------------------------------------------- /FaceVis/webface_mean.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FaceVis/webface_mean.proto -------------------------------------------------------------------------------- /FilterVis/GoogLeNet_Visualization_4c.m: -------------------------------------------------------------------------------- 1 | % Visualize all the inception_4c filters in bvlc_googlenet. 2 | % Because of the size, I splitted the visualization map into 32*16 grids, 3 | % with each grid contains visualization of 4 filters. 4 | % This code is messay, I will refactor it in the future. 5 | % Based on paper: 6 | % Feng Wang, Haijun Liu, Jian Cheng, 7 | % Visualizing Deep Neural Network by Alternately Image Blurring and Deblurring 8 | caffe.reset_all(); 9 | caffe.set_mode_gpu(); 10 | gpu_id = 0; % we will use the first gpu in this demo 11 | caffe.set_device(gpu_id); 12 | original_prototxt = 'D:\deepLearning\caffe-windows\models\bvlc_googlenet\googlenet.prototxt'; 13 | net_weights = 'D:\deepLearning\caffe-windows\models\bvlc_googlenet\thinned.caffemodel'; 14 | layer_name = 'inception_4c/output'; 15 | pattern_index=400; 16 | channels = 512; 17 | initial_size = [400 400]; 18 | [field_size, field_stride] = getReceptiveField(original_prototxt, net_weights, layer_name, pattern_index, initial_size); 19 | field_size = ceil(field_size ./ field_stride) .* field_stride; 20 | border = field_stride(1); 21 | map_border = border ./ field_stride; 22 | if max(field_size) > 224 23 | % error('field_size:(%d,%d),field_stride:(%d,%d), please increase the initial size.',field_size(1),field_size(2),field_stride(1),field_stride(2)); 24 | field_size = [224 224]; 25 | field_size = ceil(field_size ./ field_stride) .* field_stride; 26 | map_stride = ceil((field_size + border) ./ field_stride); 27 | else 28 | map_stride = ceil((field_size + border) ./ field_stride); 29 | end; 30 | 31 | vert_num = 32; 32 | hori_num = 16; 33 | total_map = reshape(1:vert_num*hori_num,hori_num,vert_num)'; 34 | vert_split = vert_num/16; 35 | hori_split = hori_num/8; 36 | for vert_ind = 1:16 37 | for hori_ind = 1:8 38 | % disp([layer_name '_' num2str(vert_ind) num2str(hori_ind) '.png']); 39 | % if exist([layer_name '_' num2str(vert_ind) num2str(hori_ind) '.png'],'file') 40 | % continue; 41 | % end; 42 | % vert_ind = 2; 43 | % hori_ind = 2; 44 | caffe.reset_all(); 45 | current_map = total_map((vert_ind-1)*vert_split+1:vert_ind*vert_split,(hori_ind-1)*hori_split+1:hori_ind*hori_split)'; 46 | 47 | height = vert_split * (field_size(1)+border(1)) + border(1); 48 | width = hori_split * (field_size(2)+border(1)) + border(1); 49 | 50 | vgg_mean = [103.939, 116.779, 123.68]; 51 | mean_image = permute(repmat(vgg_mean',[1,width,height]),[2,3,1]); 52 | input_data = randn(width, height, 3, 1, 'single')*50; 53 | 54 | for need_negative=1:1 55 | original_net_model = fileread(original_prototxt); 56 | original_net_model = strrep(original_net_model,'negative_slope:0#4c','negative_slope:1#4c'); 57 | 58 | visualize_prototxt = strrep(original_prototxt,'.prototxt','_visualize.prototxt'); 59 | 60 | fid = fopen(visualize_prototxt,'w'); 61 | proto_txt{1} = 'name: "Visualize"'; 62 | proto_txt{2} = 'input: "data"'; 63 | proto_txt{3} = 'input_dim: 1'; 64 | proto_txt{4} = 'input_dim: 3'; 65 | proto_txt{5} = ['input_dim: ' num2str(height)]; 66 | proto_txt{6} = ['input_dim: ' num2str(width)]; 67 | for i=1:6 68 | fprintf(fid,'%s\r\n',proto_txt{i}); 69 | end; 70 | 71 | fprintf(fid,'%s\r\n',original_net_model); 72 | fclose(fid); 73 | 74 | if need_negative == 0 75 | input_data = randn(width, height, 3, 1, 'single')*50; 76 | end; 77 | all_grad = zeros(width, height, 3, 1, 'single'); 78 | 79 | visualize_net = caffe.Net(visualize_prototxt,net_weights,'test'); 80 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 81 | visualize_net.forward_to(layer_name); 82 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 83 | output_data = target_blob.get_data(); 84 | backward_mask = zeros(size(output_data,1), size(output_data,2),'uint16'); 85 | backward_label = zeros(size(output_data,1), size(output_data,2),'uint16'); 86 | backward_data = zeros(size(output_data,1), size(output_data,2),channels, 'uint16'); 87 | label_mat = repmat(1:channels,size(output_data,1) * size(output_data,2),1)'; 88 | if map_stride(1) > 0 89 | backward_mask(ceil(map_stride(1)/2+map_border(1)):map_stride(1):end, ceil(map_stride(2)/2+map_border(2)):map_stride(2):end) = 1; 90 | backward_label(backward_mask==1) = current_map(:); 91 | for i=1:channels 92 | backward_data(:,:,i) = (backward_label == i); 93 | end; 94 | else 95 | backward_mask(floor(size(backward_mask,1)/2),floor(size(backward_mask,2)/2)) = 1; 96 | end; 97 | item_num = sum(backward_mask(:)); 98 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 99 | data_blob = visualize_net.blob_vec(visualize_net.name2blob_index('data')); 100 | for i=1:channels 101 | if i>384 && i<=448 102 | backward_data(:,:,i) = backward_data(:,:,i) * 2; 103 | end; 104 | end; 105 | 106 | weight_decay = 0; 107 | use_color_prior = false; 108 | num_cluster=6; 109 | color_prior = 0.5; 110 | long_size = 512; 111 | if need_negative==0 112 | tv_norm = 0; 113 | use_image_blur = false; 114 | use_image_deblur = false; 115 | else 116 | tv_norm = 0; 117 | use_image_blur = true; 118 | use_image_deblur = true; 119 | end; 120 | 121 | if need_negative==0 122 | lr = 10; 123 | else 124 | lr=200; 125 | end; 126 | max_lr = 50; 127 | momentum = 0.8; 128 | momentum2 = 0.99; 129 | lastgrad = zeros(size(mean_image)); 130 | lastgrad2 = zeros(size(mean_image)); 131 | last_cost = -9999999999999; 132 | blurred = false; 133 | numLast = 0; 134 | 135 | for iter = 1:1000 136 | bak_data = input_data; 137 | bak_grad = lastgrad; 138 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 139 | visualize_net.forward_to(layer_name); 140 | output_data = target_blob.get_data(); 141 | output_data(backward_data==0) = -999; 142 | output_map = max(output_data,[],3); 143 | min_cost = min(output_map(backward_mask==1)); 144 | if min_cost>300 && need_negative==0 145 | disp(min_cost); 146 | break; 147 | end; 148 | cost = sum(output_map(backward_mask==1)) / item_num; 149 | if need_negative == 0 150 | for i=1:channels 151 | if output_map(backward_label == i) > 300 152 | backward_data(:,:,i) = 0; 153 | backward_mask(backward_label == i) = 0; 154 | end; 155 | end; 156 | fprintf('iter=%d,lr=%f,this_cost=%f,last_cost=%f,min_cost=%f,min_num=%d\n',iter,lr,cost, last_cost,min_cost,sum(sum(backward_mask==1))); 157 | else 158 | fprintf('iter=%d,lr=%f,this_cost=%f,last_cost=%f,image_norm=%f,min_cost=%f\n',iter,lr,cost, last_cost,norm(input_data(:)),min_cost); 159 | end; 160 | % output_data(:) = 0; 161 | % output_data(:,:,pattern_index) = backward_mask; 162 | target_blob.set_diff(backward_data); 163 | visualize_net.backward_from(layer_name); 164 | grad = data_blob.get_diff(); 165 | all_grad = all_grad + grad; 166 | if iter==20 167 | input_data(all_grad==0) = 0; 168 | end; 169 | 170 | if tv_norm > 0 171 | I = input_data(:,:,:,1); 172 | Gx = (I(2:end-1,:,:) - I(1:end-2,:,:)) - (I(3:end,:,:) - I(2:end-1,:,:)); 173 | Gx = [(I(1,:,:) - I(2,:,:)); Gx; (I(end,:,:) - I(end-1,:,:))]; 174 | Gy = (I(:,2:end-1,:) - I(:,1:end-2,:)) - (I(:,3:end,:) - I(:,2:end-1,:)); 175 | Gy = [(I(:,1,:) - I(:,2,:)) Gy (I(:,end,:) - I(:,end-1,:))]; 176 | grad = grad - tv_norm * (Gx+Gy); 177 | end; 178 | 179 | if weight_decay > 0 180 | grad = grad - weight_decay * I; 181 | end; 182 | 183 | if use_color_prior 184 | gmm_prior = gaussian_net.forward({input_data}); 185 | sum_gp = zeros(size(mean_image,1),size(mean_image,2)); 186 | sum_prob_gradient = zeros(size(mean_image)); 187 | for i=1:num_cluster 188 | gp = bsxfun(@minus,input_data(:,:,:,1),reshape(colorObj.mu(i,:),[1 1 3])) .* gmm_prior{1}(:,:,(i-1)*3+1:i*3); 189 | gp = sum(gp,3); 190 | gp = colorObj.PComponents(i) * exp(-gp); 191 | sum_prob_gradient = sum_prob_gradient + bsxfun(@times,gp,gmm_prior{1}(:,:,(i-1)*3+1:i*3)); 192 | sum_gp = sum_gp + gp; 193 | end; 194 | sum_prob_gradient = bsxfun(@rdivide,sum_prob_gradient,sum_gp); 195 | sum_prob_gradient(isnan(sum_prob_gradient)) = 0; 196 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * color_prior * sum_prob_gradient; 197 | end; 198 | if need_negative==1 199 | %%%%%%%%%%%%%%%%%%%%%%%%gd linear search 200 | lastgrad = (1 - momentum) * lr * grad + momentum * lastgrad;%/ norm(res(:)) 201 | input_data(:,:,:,1) = input_data(:,:,:,1) + lastgrad; 202 | else 203 | %%%%%%%%%%%%%%%%%%%%%%%%adam 204 | lastgrad = (1 - momentum) * grad + momentum * lastgrad;%/ norm(res(:)) 205 | lastgrad2 = (1 - momentum2) * grad.^2 + momentum2 * lastgrad2;%/ norm(res(:)) 206 | lg_correct = lastgrad ./ (1 - momentum^iter); 207 | lg2_correct = lastgrad2 ./ (1 - momentum2^iter); 208 | input_data(:,:,:,1) = input_data(:,:,:,1) + lr * lg_correct ./ (sqrt(lg2_correct) + 1e-8); 209 | end; 210 | % input_data(:,:,:,1) = input_data(:,:,:,1) / norm(input_data(:)); 211 | 212 | k = mod(iter,10); 213 | if k==1 214 | H = fspecial('gaussian',[7 7],1.2); 215 | if use_image_blur 216 | input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 217 | blurred = true; 218 | end 219 | end; 220 | if k==6 221 | H = fspecial('gaussian',[7 7],1); 222 | if use_image_deblur 223 | input_data(:,:,:,1) = deconvlucy(input_data(:,:,:,1), H); 224 | blurred = false; 225 | end; 226 | end; 227 | 228 | if (need_negative==1 && cost>last_cost) 229 | last_cost = cost; 230 | numLast = 0; 231 | else 232 | numLast = numLast + 1; 233 | end; 234 | if (need_negative==0 && min_cost>last_cost) 235 | last_cost = min_cost; 236 | numLast = 0; 237 | else 238 | numLast = numLast + 1; 239 | end; 240 | if numLast>100 241 | break; 242 | end; 243 | if lr<1e-5 244 | break; 245 | end; 246 | 247 | %%%%%%%%%%%%%%%%%%%%%%gd 248 | 249 | if mod(iter,100)==0 250 | output = mean_image + input_data(:,:,:,1); 251 | output = output(:, :, [3, 2, 1]); 252 | output = permute(output, [2 1 3]); 253 | figure(3); 254 | imshow(uint8(output)); 255 | % title('generated image'); 256 | I = output; 257 | end; 258 | end; 259 | end; 260 | % imwrite(uint8(output),[layer_name '_' num2str(vert_ind) num2str(hori_ind) '.png']); 261 | end; 262 | end; -------------------------------------------------------------------------------- /FilterVis/GoogLeNet_Visualization_conv2.m: -------------------------------------------------------------------------------- 1 | % Visualize all the conv2 filters in bvlc_googlenet. 2 | % This code is messay, I will refactor it in the future. 3 | % Based on paper: 4 | % Feng Wang, Haijun Liu, Jian Cheng, 5 | % Visualizing Deep Neural Network by Alternately Image Blurring and Deblurring 6 | caffe.reset_all(); 7 | caffe.set_mode_gpu(); 8 | gpu_id = 1; % we will use the first gpu in this demo 9 | caffe.set_device(gpu_id); 10 | % original_prototxt = 'googlenet_neuralart_conv2.prototxt'; 11 | % net_weights = 'D:\deeplearning\caffe-windows\examples\GoogLeNet\imagenet_googlenet.caffemodel'; 12 | % layer_name = 'conv2'; 13 | original_prototxt = 'D:\deepLearning\caffe-windows\models\bvlc_googlenet\googlenet.prototxt'; 14 | net_weights = 'D:\deepLearning\caffe-windows\models\bvlc_googlenet\thinned.caffemodel'; 15 | layer_name = 'conv2/3x3'; 16 | channels = 192; 17 | pattern_index = 1; 18 | initial_size = [256 256]; 19 | [field_size, field_stride] = getReceptiveField(original_prototxt, net_weights, layer_name, pattern_index, initial_size); 20 | if max(field_size) > initial_size(1)*2/3 21 | % error('field_size:(%d,%d),field_stride:(%d,%d), please increase the initial size.',field_size(1),field_size(2),field_stride(1),field_stride(2)); 22 | map_stride = zeros(1,2); 23 | else 24 | map_stride = ceil((field_size + 7) ./ field_stride); 25 | end; 26 | 27 | height = 230; 28 | width = 230 * 3; 29 | 30 | for need_negative=1:1 31 | original_net_model = fileread(original_prototxt); 32 | 33 | visualize_prototxt = strrep(original_prototxt,'.prototxt','_visualize.prototxt'); 34 | 35 | fid = fopen(visualize_prototxt,'w'); 36 | proto_txt{1} = 'name: "Visualize"'; 37 | proto_txt{2} = 'input: "data"'; 38 | proto_txt{3} = 'input_dim: 1'; 39 | proto_txt{4} = 'input_dim: 3'; 40 | proto_txt{5} = ['input_dim: ' num2str(height)]; 41 | proto_txt{6} = ['input_dim: ' num2str(width)]; 42 | for i=1:6 43 | fprintf(fid,'%s\r\n',proto_txt{i}); 44 | end; 45 | 46 | fprintf(fid,'%s\r\n',original_net_model); 47 | fclose(fid); 48 | 49 | vgg_mean = [103.939, 116.779, 123.68]; 50 | mean_image = permute(repmat(vgg_mean',[1,width,height]),[2,3,1]); 51 | input_data = randn(width, height, 3, 1, 'single'); 52 | all_grad = zeros(width, height, 3, 1, 'single'); 53 | 54 | visualize_net = caffe.Net(visualize_prototxt,net_weights,'test'); 55 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 56 | visualize_net.forward_to(layer_name); 57 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 58 | output_data = target_blob.get_data(); 59 | backward_mask = zeros(size(output_data,1), size(output_data,2),'uint8'); 60 | backward_label = zeros(size(output_data,1), size(output_data,2),'uint8'); 61 | backward_data = zeros(size(output_data,1), size(output_data,2),channels, 'uint8'); 62 | label_mat = repmat(1:channels,size(output_data,1) * size(output_data,2),1)'; 63 | if map_stride(1) > 0 64 | backward_mask(floor(map_stride(1)/2+field_stride(1)/2):map_stride(1):end, floor(map_stride(2)/2+field_stride(2)/2):map_stride(2):end) = 1; 65 | backward_label(backward_mask==1) = label_mat(1:sum(sum(backward_mask==1))); 66 | for i=1:channels 67 | backward_data(:,:,i) = (backward_label == i); 68 | end; 69 | else 70 | backward_mask(floor(size(backward_mask,1)/2),floor(size(backward_mask,2)/2)) = 1; 71 | end; 72 | item_num = sum(backward_mask(:)); 73 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 74 | data_blob = visualize_net.blob_vec(visualize_net.name2blob_index('data')); 75 | 76 | target_layer = visualize_net.layers(layer_name); 77 | target_weight = target_layer.params(1).get_data(); 78 | target_weight_hist = sum(sum(sum(abs(target_weight),1),2),3); 79 | figure(1); 80 | bar(target_weight_hist(:)); 81 | 82 | weight_decay = 0; 83 | use_color_prior = false; 84 | num_cluster=6; 85 | color_prior = 0.5; 86 | long_size = 512; 87 | tv_norm = 0; 88 | use_image_blur = true; 89 | use_image_deblur = true; 90 | weight_decay = 0; 91 | lr=100; 92 | max_lr = 200; 93 | 94 | momentum = 0.8; 95 | momentum2 = 0.99; 96 | lastgrad = zeros(size(mean_image)); 97 | lastgrad2 = zeros(size(mean_image)); 98 | last_cost = -9999999999999; 99 | if need_negative==0 100 | maxiter = 1000; 101 | else 102 | maxiter = 100; 103 | end; 104 | 105 | for iter = 1:1000 106 | bak_data = input_data; 107 | bak_grad = lastgrad; 108 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 109 | visualize_net.forward_to(layer_name); 110 | output_data = target_blob.get_data(); 111 | output_data(backward_data==0) = -999; 112 | output_map = max(output_data,[],3); 113 | min_cost = min(output_map(backward_mask==1)); 114 | if min_cost>100 && need_negative==0 115 | disp(min_cost); 116 | break; 117 | end; 118 | cost = sum(output_map(backward_mask==1)) / item_num; 119 | if need_negative == 0 120 | for i=1:channels 121 | if output_map(backward_label == i) > 100 122 | backward_data(:,:,i) = 0; 123 | backward_mask(backward_label == i) = 0; 124 | end; 125 | end; 126 | fprintf('iter=%d,lr=%f,this_cost=%f,last_cost=%f,min_cost=%f,min_num=%d\n',iter,lr,cost, last_cost,min_cost,sum(backward_mask(:))); 127 | else 128 | fprintf('iter=%d,lr=%f,this_cost=%f,last_cost=%f,image_norm=%f,min_cost=%f\n',iter,lr,cost, last_cost,norm(input_data(:)),min_cost); 129 | end; 130 | 131 | % output_data(:) = 0; 132 | % output_data(:,:,pattern_index) = backward_mask; 133 | target_blob.set_diff(backward_data); 134 | visualize_net.backward_from(layer_name); 135 | grad = data_blob.get_diff(); 136 | all_grad = all_grad + grad; 137 | if iter==20 138 | input_data(all_grad==0) = 0; 139 | end; 140 | 141 | if tv_norm > 0 142 | I = input_data(:,:,:,1); 143 | Gx = (I(2:end-1,:,:) - I(1:end-2,:,:)) - (I(3:end,:,:) - I(2:end-1,:,:)); 144 | Gx = [(I(1,:,:) - I(2,:,:)); Gx; (I(end,:,:) - I(end-1,:,:))]; 145 | Gy = (I(:,2:end-1,:) - I(:,1:end-2,:)) - (I(:,3:end,:) - I(:,2:end-1,:)); 146 | Gy = [(I(:,1,:) - I(:,2,:)) Gy (I(:,end,:) - I(:,end-1,:))]; 147 | grad = grad - tv_norm * (Gx+Gy); 148 | end; 149 | 150 | if weight_decay > 0 151 | grad = grad - weight_decay * input_data(:,:,:,1); 152 | end; 153 | 154 | if use_color_prior 155 | gmm_prior = gaussian_net.forward({input_data}); 156 | sum_gp = zeros(size(mean_image,1),size(mean_image,2)); 157 | sum_prob_gradient = zeros(size(mean_image)); 158 | for i=1:num_cluster 159 | gp = bsxfun(@minus,input_data(:,:,:,1),reshape(colorObj.mu(i,:),[1 1 3])) .* gmm_prior{1}(:,:,(i-1)*3+1:i*3); 160 | gp = sum(gp,3); 161 | gp = colorObj.PComponents(i) * exp(-gp); 162 | sum_prob_gradient = sum_prob_gradient + bsxfun(@times,gp,gmm_prior{1}(:,:,(i-1)*3+1:i*3)); 163 | sum_gp = sum_gp + gp; 164 | end; 165 | sum_prob_gradient = bsxfun(@rdivide,sum_prob_gradient,sum_gp); 166 | sum_prob_gradient(isnan(sum_prob_gradient)) = 0; 167 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * color_prior * sum_prob_gradient; 168 | end; 169 | if need_negative==1 170 | %%%%%%%%%%%%%%%%%%%%%%%%gd linear search 171 | lastgrad = (1 - momentum) * lr * grad + momentum * lastgrad;%/ norm(res(:)) 172 | input_data(:,:,:,1) = input_data(:,:,:,1) + lastgrad; 173 | if cost>last_cost 174 | last_cost = cost; 175 | end; 176 | else 177 | %%%%%%%%%%%%%%%%%%%%%%%%adam 178 | lastgrad = (1 - momentum) * grad + momentum * lastgrad;%/ norm(res(:)) 179 | lastgrad2 = (1 - momentum2) * grad.^2 + momentum2 * lastgrad2;%/ norm(res(:)) 180 | lg_correct = lastgrad ./ (1 - momentum^iter); 181 | lg2_correct = lastgrad2 ./ (1 - momentum2^iter); 182 | input_data(:,:,:,1) = input_data(:,:,:,1) + lr * lg_correct ./ (sqrt(lg2_correct) + 1e-8); 183 | % lastgrad = (1 - momentum) * lr * grad + momentum * lastgrad;%/ norm(res(:)) 184 | % input_data(:,:,:,1) = input_data(:,:,:,1) + lastgrad; 185 | if min_costlast_cost + 1e-5&&lrlast_cost 192 | last_cost = min_cost; 193 | end; 194 | end; 195 | % input_data(:,:,:,1) = input_data(:,:,:,1) / norm(input_data(:)); 196 | 197 | k = mod(iter,10); 198 | if k==0 199 | H = fspecial('gaussian',[5 5],0.6); 200 | if use_image_blur 201 | input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 202 | end 203 | end; 204 | if k==5 205 | H = fspecial('gaussian',[5 5],0.5); 206 | if use_image_deblur 207 | input_data(:,:,:,1) = deconvlucy(input_data(:,:,:,1), H); 208 | end; 209 | end; 210 | 211 | % if costlast_cost&&lr 224 31 | % error('field_size:(%d,%d),field_stride:(%d,%d), please increase the initial size.',field_size(1),field_size(2),field_stride(1),field_stride(2)); 32 | field_size = [224 224]; 33 | field_size = ceil(field_size ./ field_stride) .* field_stride; 34 | map_stride = ceil((field_size + border) ./ field_stride); 35 | else 36 | map_stride = ceil((field_size + border) ./ field_stride); 37 | end; 38 | 39 | caffe.reset_all() 40 | vert_num = 1; 41 | hori_num = 1; 42 | height = vert_num * (field_size(1)+border(1)) + border(1); 43 | width = hori_num * (field_size(2)+border(1)) + border(1); 44 | 45 | vgg_mean = [103.939, 116.779, 123.68]; 46 | mean_image = permute(repmat(vgg_mean',[1,width,height]),[2,3,1]); 47 | input_data = randn(width, height, 3, 1, 'single'); 48 | 49 | original_net_model = fileread(original_prototxt); 50 | original_net_model = strrep(original_net_model,['negative_slope:0#' layer_name(11:12)],['negative_slope:1#' layer_name(11:12)]); 51 | 52 | visualize_prototxt = strrep(original_prototxt,'.prototxt','_visualize.prototxt'); 53 | 54 | fid = fopen(visualize_prototxt,'w'); 55 | proto_txt{1} = 'name: "Visualize"'; 56 | proto_txt{2} = 'input: "data"'; 57 | proto_txt{3} = 'input_dim: 1'; 58 | proto_txt{4} = 'input_dim: 3'; 59 | proto_txt{5} = ['input_dim: ' num2str(height)]; 60 | proto_txt{6} = ['input_dim: ' num2str(width)]; 61 | for i=1:6 62 | fprintf(fid,'%s\r\n',proto_txt{i}); 63 | end; 64 | 65 | fprintf(fid,'%s\r\n',original_net_model); 66 | fclose(fid); 67 | 68 | all_grad = zeros(width, height, 3, 1, 'single'); 69 | 70 | visualize_net = caffe.Net(visualize_prototxt,net_weights,'test'); 71 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 72 | visualize_net.forward_to(layer_name); 73 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 74 | output_data = target_blob.get_data(); 75 | backward_mask = zeros(size(output_data,1), size(output_data,2),'uint16'); 76 | backward_label = zeros(size(output_data,1), size(output_data,2),'uint16'); 77 | backward_data = zeros(size(output_data,1), size(output_data,2),channels, 'uint16'); 78 | if map_stride(1) > 0 79 | backward_mask(ceil(map_stride(1)/2+map_border(1)):map_stride(1):end, ceil(map_stride(2)/2+map_border(2)):map_stride(2):end) = 1; 80 | backward_label(backward_mask==1) = filter_id; 81 | for i=1:channels 82 | backward_data(:,:,i) = (backward_label == i); 83 | end; 84 | else 85 | backward_mask(floor(size(backward_mask,1)/2),floor(size(backward_mask,2)/2)) = 1; 86 | end; 87 | item_num = sum(backward_mask(:)); 88 | target_blob = visualize_net.blob_vec(visualize_net.name2blob_index(layer_name)); 89 | data_blob = visualize_net.blob_vec(visualize_net.name2blob_index('data')); 90 | 91 | if isfield(param,'weight_decay') 92 | weight_decay = param.weight_decay; 93 | else 94 | weight_decay = 0; 95 | end; 96 | if isfield(param,'tv_norm') 97 | tv_norm = param.tv_norm; 98 | else 99 | tv_norm = 0; 100 | end; 101 | if isfield(param,'use_image_blur') 102 | use_image_blur = param.use_image_blur; 103 | else 104 | use_image_blur = true; 105 | end; 106 | if isfield(param,'use_image_deblur') 107 | use_image_deblur = param.use_image_deblur; 108 | else 109 | use_image_deblur = true; 110 | end; 111 | if isfield(param,'learning_rate') 112 | lr = param.learning_rate; 113 | else 114 | lr=200; 115 | end; 116 | 117 | momentum = 0.8; 118 | momentum2 = 0.99; 119 | lastgrad = zeros(size(mean_image)); 120 | lastgrad2 = zeros(size(mean_image)); 121 | last_cost = -9999999999999; 122 | numLast = 0; 123 | 124 | for iter = 1:1000 125 | visualize_net.blobs(visualize_net.inputs{1}).set_data(input_data); 126 | visualize_net.forward_to(layer_name); 127 | output_data = target_blob.get_data(); 128 | output_data(backward_data==0) = -999; 129 | output_map = max(output_data,[],3); 130 | min_cost = min(output_map(backward_mask==1)); 131 | cost = sum(output_map(backward_mask==1)) / item_num; 132 | fprintf('iter=%d,lr=%f,this_cost=%f,last_cost=%f,image_norm=%f,min_cost=%f\n',iter,lr,cost, last_cost,norm(input_data(:)),min_cost); 133 | 134 | target_blob.set_diff(backward_data); 135 | visualize_net.backward_from(layer_name); 136 | grad = data_blob.get_diff(); 137 | all_grad = all_grad + grad; 138 | if iter==20 139 | input_data(all_grad==0) = 0; 140 | end; 141 | 142 | if tv_norm > 0 143 | I = input_data(:,:,:,1); 144 | Gx = (I(2:end-1,:,:) - I(1:end-2,:,:)) - (I(3:end,:,:) - I(2:end-1,:,:)); 145 | Gx = [(I(1,:,:) - I(2,:,:)); Gx; (I(end,:,:) - I(end-1,:,:))]; 146 | Gy = (I(:,2:end-1,:) - I(:,1:end-2,:)) - (I(:,3:end,:) - I(:,2:end-1,:)); 147 | Gy = [(I(:,1,:) - I(:,2,:)) Gy (I(:,end,:) - I(:,end-1,:))]; 148 | grad = grad - tv_norm * (Gx+Gy); 149 | end; 150 | 151 | if weight_decay > 0 152 | grad = grad - weight_decay * I; 153 | end; 154 | 155 | if true 156 | %%%%%%%%%%%%%%%%%%%%%%%%gd linear search 157 | lastgrad = (1 - momentum) * lr * grad + momentum * lastgrad;%/ norm(res(:)) 158 | input_data(:,:,:,1) = input_data(:,:,:,1) + lastgrad; 159 | else 160 | %%%%%%%%%%%%%%%%%%%%%%%%adam 161 | lastgrad = (1 - momentum) * grad + momentum * lastgrad;%/ norm(res(:)) 162 | lastgrad2 = (1 - momentum2) * grad.^2 + momentum2 * lastgrad2;%/ norm(res(:)) 163 | lg_correct = lastgrad ./ (1 - momentum^iter); 164 | lg2_correct = lastgrad2 ./ (1 - momentum2^iter); 165 | input_data(:,:,:,1) = input_data(:,:,:,1) + lr * lg_correct ./ (sqrt(lg2_correct) + 1e-8); 166 | end; 167 | 168 | k = mod(iter,10); 169 | if k==1 170 | H = fspecial('gaussian',[7 7],0.8); 171 | if use_image_blur 172 | input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 173 | blurred = true; 174 | end 175 | end; 176 | if k==6 177 | H = fspecial('gaussian',[7 7],0.6); 178 | if use_image_deblur 179 | input_data(:,:,:,1) = deconvlucy(input_data(:,:,:,1), H); 180 | blurred = false; 181 | end; 182 | end; 183 | 184 | if cost>last_cost 185 | last_cost = cost; 186 | numLast = 0; 187 | else 188 | numLast = numLast + 1; 189 | end; 190 | if numLast>100 191 | break; 192 | end; 193 | if lr<1e-5 194 | break; 195 | end; 196 | 197 | if mod(iter,100)==0 198 | output = mean_image + input_data(:,:,:,1); 199 | output = output(:, :, [3, 2, 1]); 200 | output = permute(output, [2 1 3]); 201 | figure(3); 202 | imshow(uint8(output)); 203 | % title('generated image'); 204 | I = output; 205 | end; 206 | end; -------------------------------------------------------------------------------- /FilterVis/demo.m: -------------------------------------------------------------------------------- 1 | original_prototxt = 'googlenet.prototxt'; 2 | net_weights = 'thinned.caffemodel'; 3 | layer_name = 'inception_4c/output'; 4 | channels = 512; 5 | filter_id = 1; 6 | 7 | param.weight_decay = 0; 8 | param.tv_norm = 0; 9 | param.use_image_blur = true; 10 | param.use_image_deblur = false; 11 | param.learning_rate = 400; 12 | 13 | ShowMidFilter(original_prototxt, net_weights, layer_name, channels, filter_id, param); -------------------------------------------------------------------------------- /FilterVis/gallery/alexnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FilterVis/gallery/alexnet.png -------------------------------------------------------------------------------- /FilterVis/gallery/googlenet-conv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FilterVis/gallery/googlenet-conv2.png -------------------------------------------------------------------------------- /FilterVis/gallery/inception.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FilterVis/gallery/inception.png -------------------------------------------------------------------------------- /FilterVis/gallery/vgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FilterVis/gallery/vgg.png -------------------------------------------------------------------------------- /FilterVis/getReceptiveField.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/FilterVis/getReceptiveField.m -------------------------------------------------------------------------------- /Inceptionism/Inceptionism_big.m: -------------------------------------------------------------------------------- 1 | % Visualization of the classifier neurons. Modify the targetclass variable 2 | % to see other classes. 3 | % Please feel free to modify all the switches in line 40 - 46 and width, 4 | % height, scale, rotate variables to see what happens. 5 | % The best visualization can be produced by setting use_image_blur = true 6 | % and use_image_deblur = true, with all other switches are turned to false. 7 | % However, with no constraints, you can still get recognizable images. 8 | % Feel the magic by your self! 9 | % 10 | % This code is messay, I will refactor it in the future. 11 | % Based on paper: 12 | % Feng Wang, Haijun Liu, Jian Cheng, 13 | % Visualizing Deep Neural Network by Alternately Image Blurring and Deblurring 14 | caffe.reset_all(); 15 | caffe.set_mode_gpu(); 16 | gpu_id = 0; % we will use the first gpu in this demo 17 | caffe.set_device(gpu_id); 18 | 19 | % net_model = 'D:\deeplearning\caffe-windows\examples\GoogLeNet\googlenet_deploy_prob3.prototxt'; 20 | % net_weights = 'D:\deeplearning\caffe-windows\examples\GoogLeNet\imagenet_googlenet.caffemodel'; 21 | net_model ='D:\deepLearning\caffe-windows\models\bvlc_googlenet\deploy.prototxt'; 22 | net_weights = 'D:\deepLearning\caffe-windows\models\bvlc_googlenet\thinned.caffemodel'; 23 | % mean_file = 'D:\deeplearning\caffe-windows\examples\GoogLeNet\imagenet_mean.binaryproto'; 24 | 25 | % net_model = 'D:\deeplearning\caffe-windows\examples\PlaceCNN\hybridCNN_deploy_upgraded.prototxt'; 26 | % net_weights = 'D:\deeplearning\caffe-windows\examples\PlaceCNN\hybridCNN_iter_700000_upgraded.caffemodel'; 27 | % mean_file = 'D:\deeplearning\caffe-windows\examples\PlaceCNN\hybridCNN_mean.binaryproto'; 28 | 29 | % net_model = 'D:\deeplearning\caffe-windows\examples\VGG\VGG_ILSVRC_16_layers_deploy.prototxt'; 30 | % net_weights = 'D:\deeplearning\caffe-windows\examples\VGG\VGG_ILSVRC_16_layers.caffemodel'; 31 | % mean_file = []; 32 | % vgg_mean = [103.939, 116.779, 123.68]; 33 | % net_model = 'D:\deeplearning\caffe-windows\models\bvlc_googlenet\deploy.prototxt'; 34 | % net_weights = 'D:\deeplearning\caffe-windows\models\bvlc_googlenet\bvlc_googlenet.caffemodel'; 35 | mean_file = []; 36 | vgg_mean = [103.939, 116.779, 123.68]; 37 | 38 | %%%%%%%%%extract the train features 39 | train_net = caffe.Net(net_model,net_weights,'test'); 40 | use_clip = false; 41 | use_cv_norm = false; 42 | use_weight_decay = false; 43 | use_image_blur = true; 44 | use_image_deblur = true; 45 | use_gradient_blur = false; 46 | use_dropout = false; 47 | 48 | for targetclass = 13:13 49 | % if exist(['gallery\' num2str(targetclass) '.png'],'file') 50 | % continue; 51 | % end; 52 | if ~isempty(mean_file) 53 | mean_image = caffe.read_mean(mean_file); 54 | mean_image = mean_image(16:242,16:242,:); 55 | else 56 | mean_image = permute(repmat(vgg_mean',[1,224,224]),[2,3,1]); 57 | end; 58 | % mean_image = mean_image + randn(size(mean_image)); 59 | input_data = zeros(size(mean_image,1), size(mean_image,2), 3, 1, 'single'); 60 | 61 | width = 300; 62 | height = 300; 63 | meanV = mean(mean_image(:)); 64 | BigImage = permute(repmat(vgg_mean',[1,height,width]),[2,3,1]);%this image will be transposed 65 | H = fspecial('gaussian',[7 7],1.2); 66 | 67 | if length(targetclass) > 1 68 | sample_density = 2; 69 | class_center = zeros(sample_density*length(targetclass),2); 70 | for i = 1:sample_density*length(targetclass) 71 | class_center(i,1) = randi(height - size(mean_image,1),1,1) + size(mean_image,1) / 2; 72 | class_center(i,2) = randi(width - size(mean_image,2),1,1) + size(mean_image,2) / 2; 73 | if i > 1 74 | min_distance = min(pdist2(class_center(i,:),class_center(1:i-1,:))); 75 | while min_distance < size(mean_image,1) / 2 76 | class_center(i,1) = randi(height - size(mean_image,1),1,1) + size(mean_image,1) / 2; 77 | class_center(i,2) = randi(width - size(mean_image,2),1,1) + size(mean_image,2) / 2; 78 | min_distance = min(pdist2(class_center(i,:),class_center(1:i-1,:))); 79 | end; 80 | end; 81 | end; 82 | 83 | class_label = repmat(targetclass,1,sample_density); 84 | end; 85 | 86 | blur_data = zeros(size(input_data)); 87 | base_lr = 5000; 88 | max_lr = 5000; 89 | lambda1 = 0.01; 90 | lambda2 = 0.1; 91 | lambda3 = 300; 92 | last_prob = -999; 93 | momentum = 0.8; 94 | lastgrad = zeros(size(mean_image)); 95 | mask = ones(size(mean_image,1), size(mean_image,2)); 96 | iter = 1; 97 | dropout = 0.5; 98 | sep_iter = zeros(ceil((height - size(mean_image,1)) / size(mean_image,1) *2),ceil((width - size(mean_image,2)) / size(mean_image,2) * 2)); 99 | erode = 5; 100 | start_sigma = 1.2; 101 | end_sigma = 0.5; 102 | this_sigma = start_sigma; 103 | sigma_decay = 0.99; 104 | deblur_sigma_ratio = 0.8; 105 | scale = [0.8 1.2]; 106 | rotate = [0 0] / 180 * pi; 107 | func_loss = []; 108 | BigGrad = zeros(size(BigImage)); 109 | 110 | while 1 111 | % for i = 0:size(mean_image,1) / 2:height - size(mean_image,1) 112 | % for j = 0:size(mean_image,2) / 2:width - size(mean_image,2) 113 | this_sigma = this_sigma * sigma_decay; 114 | if this_sigma < end_sigma 115 | this_sigma = end_sigma; 116 | end; 117 | s = rand(1) * (scale(2)-scale(1)) + scale(1); 118 | r = rand(1) * (rotate(2)-rotate(1)) + rotate(1); 119 | if length(targetclass)>1 && iter < 500 120 | idx = randi(size(class_center,1),1,1); 121 | x = class_center(idx,1) - size(mean_image,1) / 2; 122 | y = class_center(idx,2) - size(mean_image,2) / 2; 123 | else 124 | if rand() > size(mean_image,1) / height 125 | x = randi(height - floor(size(mean_image,1) * s),1,1);% + size(mean_image,1) / 2; 126 | else 127 | pos = randi(2,1,1)-1; 128 | x = pos*(height - floor(size(mean_image,1) * s)); 129 | end; 130 | if rand() > size(mean_image,2) / width 131 | y = randi(width - floor(size(mean_image,2) * s),1,1);% - size(mean_image,2) / 2; 132 | else 133 | pos = randi(2,1,1)-1; 134 | y = pos*(width - floor(size(mean_image,2) * s)); 135 | end; 136 | end; 137 | 138 | II = zeros(floor(size(mean_image,1) * s),floor(size(mean_image,2) * s),3); 139 | II(:,:,1) = BigImage(x + 1:x + floor(size(mean_image,1) * s),y + 1:y + floor(size(mean_image,2) * s),1) - vgg_mean(1); 140 | II(:,:,2) = BigImage(x + 1:x + floor(size(mean_image,1) * s),y + 1:y + floor(size(mean_image,2) * s),2) - vgg_mean(2); 141 | II(:,:,3) = BigImage(x + 1:x + floor(size(mean_image,1) * s),y + 1:y + floor(size(mean_image,2) * s),3) - vgg_mean(3); 142 | input_data(:,:,:,1) = imresize(imrotate(II,r),[size(mean_image,1), size(mean_image,2)]); 143 | GG = BigGrad(x + 1:x + floor(size(mean_image,1) * s),y + 1:y + floor(size(mean_image,2) * s),:); 144 | last_grad = imresize(GG,[size(mean_image,1), size(mean_image,2)]); 145 | for k = 1 : 10 146 | 147 | lr = base_lr;% * sqrt(this_prob / (1 - this_prob)); 148 | 149 | prob = train_net.forward({input_data}); 150 | 151 | this_prob = prob{end}(targetclass); 152 | [max_prob,max_idx] = max(this_prob); 153 | fprintf('iter=%d,lr=%f,max_idx=%d,prob1=%f,last_prob=%f,this_sigma=%f\n',iter,lr,max_idx,this_prob(max_idx),last_prob,this_sigma); 154 | func_loss = [func_loss; this_prob(max_idx)]; 155 | 156 | if k==1 157 | back_data = ones(size(prob{1}),'single') * -1; 158 | % back_data = zeros(size(prob{end}),'single'); 159 | if length(targetclass) >1 160 | center_x = x + size(mean_image,1) / 2; 161 | center_y = y + size(mean_image,2) / 2; 162 | distances = pdist2([center_x center_y],class_center); 163 | distances = exp(-(distances / size(mean_image,1) * 4).^2); 164 | distances = distances / sum(distances); 165 | distances = reshape(distances,length(targetclass),sample_density); 166 | distances = sum(distances,2); 167 | distances = distances * 2 - 1; 168 | back_data(targetclass) = distances'; 169 | else 170 | back_data(targetclass(max_idx)) = 1; 171 | end; 172 | 173 | back_cell = cell(length(prob),1); 174 | % back_cell{1} = zeros(size(back_data)); 175 | % back_cell{2} = zeros(size(back_data)); 176 | % back_cell{1} = back_data; 177 | % back_cell{2} = back_data; 178 | back_cell{1} = back_data; 179 | end; 180 | 181 | iter = iter + 1; 182 | 183 | % sep_iter(floor(i / size(mean_image,1) * 2)+1,floor(j/size(mean_image,1)*2) + 1) = sep_iter(floor(i / size(mean_image,1) * 2)+1,floor(j/size(mean_image,1)*2) + 1) + 1; 184 | 185 | if max_problast_prob&&base_lrlast_prob 194 | last_prob = max_prob; 195 | % end; 196 | 197 | res = train_net.backward(back_cell); 198 | 199 | bak_data = input_data; 200 | 201 | if use_clip 202 | app_gradient = sum(abs(res{1} .* input_data(:,:,:,1)),3); 203 | app_gradient = app_gradient < mean(app_gradient(:)); 204 | grad = reshape(res{1},[size(mean_image,1)*size(mean_image,2) 3]); 205 | grad(app_gradient==1,:) = 0; 206 | grad = reshape(grad,size(input_data)); 207 | res{1} = grad; 208 | end; 209 | 210 | input_data(:,:,:,1) = input_data(:,:,:,1) + res{1} * 1.5 / mean(abs(res{1}(:))); 211 | 212 | if use_cv_norm 213 | I = input_data(:,:,:,1); 214 | Gx = smoothL1(I(2:end-1,:,:) - I(1:end-2,:,:)) - smoothL1(I(3:end,:,:) - I(2:end-1,:,:)); 215 | Gx = [smoothL1(I(1,:,:) - I(2,:,:)); Gx; smoothL1(I(end,:,:) - I(end-1,:,:))]; 216 | Gy = smoothL1(I(:,2:end-1,:) - I(:,1:end-2,:)) - smoothL1(I(:,3:end,:) - I(:,2:end-1,:)); 217 | Gy = [smoothL1(I(:,1,:) - I(:,2,:)) Gy smoothL1(I(:,end,:) - I(:,end-1,:))]; 218 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * lambda2 * (Gx + Gy); 219 | end; 220 | if use_weight_decay 221 | input_data(:,:,:,1) = input_data(:,:,:,1) - lambda1 * input_data(:,:,:,1); 222 | end; 223 | 224 | if k==1 225 | H = fspecial('gaussian',[randi(6)+4 7],this_sigma); 226 | if use_image_blur 227 | input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 228 | end 229 | end; 230 | if k==6 231 | H = fspecial('gaussian',[randi(6)+4 7],this_sigma * deblur_sigma_ratio); 232 | if use_image_deblur 233 | input_data(:,:,:,1) = deconvlucy(input_data(:,:,:,1), H); 234 | end; 235 | end; 236 | 237 | if lr<0.000001 238 | break; 239 | end; 240 | end; 241 | II = imresize(imrotate(input_data(:,:,:,1),-r),floor([size(mean_image,1), size(mean_image,2)] * s)); 242 | BigImage(x + 1+erode:x + floor(size(mean_image,1) * s)-erode,y + 1+erode:y + floor(size(mean_image,2) * s)-erode,1) = II(erode+1:end-erode,erode+1:end-erode,1) + vgg_mean(1); 243 | BigImage(x + 1+erode:x + floor(size(mean_image,1) * s)-erode,y + 1+erode:y + floor(size(mean_image,2) * s)-erode,2) = II(erode+1:end-erode,erode+1:end-erode,2) + vgg_mean(2); 244 | BigImage(x + 1+erode:x + floor(size(mean_image,1) * s)-erode,y + 1+erode:y + floor(size(mean_image,2) * s)-erode,3) = II(erode+1:end-erode,erode+1:end-erode,3) + vgg_mean(3); 245 | GG = imresize(last_grad,floor([size(mean_image,1), size(mean_image,2)] * s)); 246 | BigGrad(x + 1+erode:x + floor(size(mean_image,1) * s)-erode,y + 1+erode:y + floor(size(mean_image,2) * s)-erode,:) = GG(erode+1:end-erode,erode+1:end-erode,:); 247 | % sort_data = sort(BigImage(:)); 248 | % BigImage = BigImage .* (255 / sort_data(floor(length(sort_data) * 0.999))); 249 | if mod(iter,100)==1 250 | figure(2); 251 | % imshow(uint8(mean_image + input_data)); 252 | output = BigImage; 253 | output = output(:, :, [3, 2, 1]); 254 | output = permute(output, [2 1 3]); 255 | output = output(erode+1:end-erode,erode+1:end-erode,:); 256 | sort_output = sort(output(:)); 257 | output = output .* (255 / sort_output(floor(length(sort_output) * 0.99))); 258 | imshow(uint8(output)); 259 | figure(3); 260 | % hist(output(:),1000); 261 | plot(1:iter-1, func_loss); 262 | end; 263 | if iter > 1500 264 | break; 265 | end; 266 | % end; 267 | % end; 268 | end; 269 | LastBigImage = BigImage; 270 | imwrite(uint8(output),['gallery\' num2str(targetclass) '.png']); 271 | end; -------------------------------------------------------------------------------- /Inceptionism/README.md: -------------------------------------------------------------------------------- 1 | Visualization for the classifier neurons in any networks. 2 | 3 | This is the codes for reproducing the results in my paper, 4 | Visualizing Deep Neural Network by Alternately Image Blurring and Deblurring. 5 | 6 | http://www.sciencedirect.com/science/article/pii/S0893608017302095 7 | 8 | Please feel free to modify the switches and paramters in `inceptionism_big.m` and see what happens. 9 | 10 | Some results are uploaded to [Baidu Photo](http://xiangce.baidu.com/picture/album/list/41344a5443cc313414de9559899e213417674a76). 11 | 12 | Here is the figure displayed in my paper for comparison. The (c) rows are created by [Inceptionism](https://github.com/google/deepdream) , (d) rows's algorithm is similar with https://github.com/auduno/deepdraw and (e) rows are my results. 13 | 14 | ![big_compare](big_compare.png) 15 | -------------------------------------------------------------------------------- /Inceptionism/big_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/Inceptionism/big_compare.png -------------------------------------------------------------------------------- /Inceptionism/smoothL1.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/Inceptionism/smoothL1.m -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Feng Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /NNComplexity/CNNComplexity.m: -------------------------------------------------------------------------------- 1 | 2 | caffe.reset_all(); 3 | caffe.set_mode_gpu(); 4 | gpu_id = 0; % we will use the first gpu in this demo 5 | caffe.set_device(gpu_id); 6 | 7 | addpath('../PrototxtGen'); 8 | net_model = 'net_define.prototxt'; 9 | inception_file = 'inception.prototxt'; 10 | inception_content = fileread(inception_file); 11 | conv1x1_file = '1x1conv.prototxt'; 12 | conv1x1_content = fileread(conv1x1_file); 13 | pooling_file = 'pooling.prototxt'; 14 | pooling_content = fileread(pooling_file); 15 | output_file = 'output.prototxt'; 16 | output_content = fileread(output_file); 17 | 18 | activation = 'ReLU'; 19 | 20 | layers = { 21 | struct('type', 'convolution', 'outputmaps', 10, 'kernelsize', 3, 'activation', activation) %convolution layer 22 | struct('type', 'convolution', 'outputmaps', 10, 'kernelsize', 3, 'activation', activation) %convolution layer 23 | % struct('type', 'convolution', 'outputmaps', 100, 'kernelsize', 3, 'activation', activation) %convolution layer 24 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 25 | % struct('type', 'inception', 'node1x1', 100, 'reduce3x3', 50, 'node3x3', 100, 'reduce5x5', 50, 'node5x5', 100, 'poolconv', 100) 26 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 27 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 28 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 29 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 30 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 31 | }; 32 | width = 600; 33 | height = 400; 34 | border = 5; 35 | 36 | fid = fopen(net_model,'w'); 37 | proto_file{1} = 'name: "mnist_siamese_train_test"'; 38 | proto_file{2} = 'input: "data"'; 39 | proto_file{3} = 'input_dim: 1'; 40 | proto_file{4} = 'input_dim: 2'; 41 | proto_file{5} = ['input_dim: ' num2str(width)]; 42 | proto_file{6} = ['input_dim: ' num2str(height)]; 43 | for i=1:6 44 | fprintf(fid,'%s\r\n',proto_file{i}); 45 | end; 46 | top_layer = 'data'; 47 | top_layer_exp = 'top: "(.+?)"'; 48 | for i=1:length(layers) 49 | if strcmp(layers{i}.type,'convolution') 50 | this_layer = strrep(conv1x1_content,'{num}',num2str(i)); 51 | this_layer = strrep(this_layer,'{node_num}',num2str(layers{i}.outputmaps)); 52 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 53 | this_layer = strrep(this_layer,'{kernel_size}',num2str(layers{i}.kernelsize)); 54 | this_layer = strrep(this_layer,'{activation}',layers{i}.activation); 55 | elseif strcmp(layers{i}.type,'pooling') 56 | this_layer = strrep(pooling_content,'{num}',num2str(i)); 57 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 58 | this_layer = strrep(this_layer,'{method}',layers{i}.method); 59 | this_layer = strrep(this_layer,'{scale}',num2str(layers{i}.scale)); 60 | elseif strcmp(layers{i}.type,'inception') 61 | this_layer = strrep(inception_content,'{num}',num2str(i)); 62 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 63 | this_layer = strrep(this_layer,'{1x1node}',num2str(layers{i}.node1x1)); 64 | this_layer = strrep(this_layer,'{3x3reduce}',num2str(layers{i}.reduce3x3)); 65 | this_layer = strrep(this_layer,'{3x3node}',num2str(layers{i}.node3x3)); 66 | this_layer = strrep(this_layer,'{5x5reduce}',num2str(layers{i}.reduce5x5)); 67 | this_layer = strrep(this_layer,'{5x5node}',num2str(layers{i}.node5x5)); 68 | this_layer = strrep(this_layer,'{poolconv}',num2str(layers{i}.poolconv)); 69 | end; 70 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 71 | top_layer = top_layer{end}{1}; 72 | fprintf(fid,'%s\r\n',this_layer); 73 | end; 74 | this_layer = strrep(output_content,'{bottom_name}',top_layer); 75 | fprintf(fid,'%s\r\n',this_layer); 76 | fclose(fid); 77 | 78 | train_net = caffe.Net(net_model,'train'); 79 | 80 | input_data = zeros(height, width, 2, 1, 'single'); 81 | input_data(:,:,1,1) = repmat((1:height)',1,width) / height; 82 | input_data(:,:,2,1) = repmat((1:width),height,1) / width; 83 | input_data = input_data - 0.5; 84 | 85 | output_data = train_net.forward({input_data}); 86 | 87 | output = output_data{1}; 88 | output = output(border+1:end-border,border+1:end-border,:); 89 | output = bsxfun(@minus,output,min(min(output,[],1),[],2)); 90 | output = bsxfun(@rdivide,output,max(max(output,[],1),[],2)); 91 | figure(1); 92 | imshow(Lab2RGB(output)); 93 | % imshow(output); -------------------------------------------------------------------------------- /NNComplexity/CNNComplexitySave.m: -------------------------------------------------------------------------------- 1 | function CNNComplexitySave(layers,savefolder,image_size,save_num) 2 | 3 | net_model = 'D:\project\NNComplexity\net_define.prototxt'; 4 | inception_file = 'inception.prototxt'; 5 | inception_content = fileread(inception_file); 6 | conv1x1_file = '1x1conv.prototxt'; 7 | conv1x1_content = fileread(conv1x1_file); 8 | pooling_file = 'pooling.prototxt'; 9 | pooling_content = fileread(pooling_file); 10 | maxout_file = 'maxout.prototxt'; 11 | maxout_content = fileread(maxout_file); 12 | activation_file = 'activation.prototxt'; 13 | activation_content = fileread(activation_file); 14 | output_file = 'output.prototxt'; 15 | output_content = fileread(output_file); 16 | 17 | for sss=1:save_num 18 | caffe.reset_all(); 19 | caffe.set_mode_gpu(); 20 | gpu_id = 0; % we will use the first gpu in this demo 21 | caffe.set_device(gpu_id); 22 | 23 | activation = 'ReLU'; 24 | 25 | width = image_size(1); 26 | height = image_size(2); 27 | border = 5; 28 | 29 | fid = fopen(net_model,'w'); 30 | proto_file{1} = 'name: "mnist_siamese_train_test"'; 31 | proto_file{2} = 'input: "data"'; 32 | proto_file{3} = 'input_dim: 1'; 33 | proto_file{4} = 'input_dim: 2'; 34 | proto_file{5} = ['input_dim: ' num2str(width)]; 35 | proto_file{6} = ['input_dim: ' num2str(height)]; 36 | for i=1:6 37 | fprintf(fid,'%s\r\n',proto_file{i}); 38 | end; 39 | top_layer = 'data'; 40 | top_layer_exp = 'top: "(.+?)"'; 41 | for i=1:length(layers) 42 | if strcmp(layers{i}.type,'convolution') 43 | this_layer = strrep(conv1x1_content,'{num}',num2str(i)); 44 | this_layer = strrep(this_layer,'{node_num}',num2str(layers{i}.outputmaps)); 45 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 46 | this_layer = strrep(this_layer,'{kernel_size}',num2str(layers{i}.kernelsize)); 47 | if strcmp(layers{i}.activation,'maxout') 48 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 49 | top_layer = top_layer{end}{1}; 50 | fprintf(fid,'%s\r\n',this_layer); 51 | this_layer = strrep(maxout_content,'{num}',num2str(i)); 52 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 53 | elseif strcmp(layers{i}.activation,'LReLU') 54 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 55 | top_layer = top_layer{end}{1}; 56 | fprintf(fid,'%s\r\n',this_layer); 57 | this_layer = strrep(activation_content,'{num}',num2str(i)); 58 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 59 | this_layer = strrep(this_layer,'{activation}','ReLU'); 60 | this_layer = strrep(this_layer,'{negative_slope}',num2str(rand() / 2)); 61 | else 62 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 63 | top_layer = top_layer{end}{1}; 64 | fprintf(fid,'%s\r\n',this_layer); 65 | this_layer = strrep(activation_content,'{num}',num2str(i)); 66 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 67 | this_layer = strrep(this_layer,'{activation}',layers{i}.activation); 68 | this_layer = strrep(this_layer,'{negative_slope}',num2str(0)); 69 | end; 70 | elseif strcmp(layers{i}.type,'pooling') 71 | this_layer = strrep(pooling_content,'{num}',num2str(i)); 72 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 73 | this_layer = strrep(this_layer,'{method}',layers{i}.method); 74 | this_layer = strrep(this_layer,'{scale}',num2str(layers{i}.scale)); 75 | elseif strcmp(layers{i}.type,'inception') 76 | this_layer = strrep(inception_content,'{num}',num2str(i)); 77 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 78 | this_layer = strrep(this_layer,'{1x1node}',num2str(layers{i}.node1x1)); 79 | this_layer = strrep(this_layer,'{3x3reduce}',num2str(layers{i}.reduce3x3)); 80 | this_layer = strrep(this_layer,'{3x3node}',num2str(layers{i}.node3x3)); 81 | this_layer = strrep(this_layer,'{5x5reduce}',num2str(layers{i}.reduce5x5)); 82 | this_layer = strrep(this_layer,'{5x5node}',num2str(layers{i}.node5x5)); 83 | this_layer = strrep(this_layer,'{poolconv}',num2str(layers{i}.poolconv)); 84 | elseif strcmp(layers{i}.type,'maxout') 85 | this_layer = strrep(maxout_content,'{num}',num2str(i)); 86 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 87 | end; 88 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 89 | top_layer = top_layer{end}{1}; 90 | fprintf(fid,'%s\r\n',this_layer); 91 | end; 92 | this_layer = strrep(output_content,'{bottom_name}',top_layer); 93 | fprintf(fid,'%s\r\n',this_layer); 94 | fclose(fid); 95 | 96 | train_net = caffe.Net(net_model,'train'); 97 | 98 | input_data = zeros(height, width, 2, 1, 'single'); 99 | input_data(:,:,1,1) = repmat((1:height)',1,width) / height; 100 | input_data(:,:,2,1) = repmat((1:width),height,1) / width; 101 | input_data = input_data - 0.5; 102 | 103 | output_data = train_net.forward({input_data}); 104 | 105 | output = output_data{1}; 106 | output = output(border+1:end-border,border+1:end-border,:); 107 | output = bsxfun(@minus,output,mean(mean(output,1),2)); 108 | output_std = std(reshape(output,[size(output,1)*size(output,2) size(output,3)])); 109 | output_std = reshape(output_std,[1 1 3]); 110 | output = bsxfun(@rdivide,output,output_std); 111 | output = uint8(output * 100 + 128); 112 | % output = Lab2RGB(output); 113 | if ~exist(savefolder,'dir') 114 | mkdir(savefolder); 115 | end; 116 | imwrite(output,[savefolder '/' num2str(sss) '.bmp']); 117 | end; 118 | % imshow(output); 119 | end -------------------------------------------------------------------------------- /NNComplexity/Inception.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "inception_{num}a/1x1" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "inception_{num}a/1x1" 6 | param { 7 | lr_mult: 1 8 | decay_mult: 1 9 | } 10 | param { 11 | lr_mult: 2 12 | decay_mult: 0 13 | } 14 | convolution_param { 15 | num_output: {1x1node} 16 | kernel_size: 1 17 | weight_filler { 18 | type: "xavier" 19 | } 20 | bias_filler { 21 | type: "constant" 22 | value: 0 23 | } 24 | } 25 | } 26 | layer { 27 | name: "bn{num}/relu_1x1" 28 | type: "BN" 29 | bottom: "inception_{num}a/1x1" 30 | top: "bn{num}/relu_1x1" 31 | bn_param { 32 | scale_filler { 33 | type: "constant" 34 | value: 3 35 | } 36 | shift_filler { 37 | type: "constant" 38 | value: 0 39 | } 40 | } 41 | } 42 | layer { 43 | name: "inception_{num}a/relu_1x1" 44 | type: "ReLU" 45 | bottom: "bn{num}/relu_1x1" 46 | top: "bn{num}/relu_1x1" 47 | } 48 | layer { 49 | name: "inception_{num}a/3x3_reduce" 50 | type: "Convolution" 51 | bottom: "{bottom_name}" 52 | top: "inception_{num}a/3x3_reduce" 53 | param { 54 | lr_mult: 1 55 | decay_mult: 1 56 | } 57 | param { 58 | lr_mult: 2 59 | decay_mult: 0 60 | } 61 | convolution_param { 62 | num_output: {3x3reduce} 63 | kernel_size: 1 64 | weight_filler { 65 | type: "xavier" 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "bn{num}/relu_3x3_reduce" 75 | type: "BN" 76 | bottom: "inception_{num}a/3x3_reduce" 77 | top: "bn{num}/relu_3x3_reduce" 78 | bn_param { 79 | scale_filler { 80 | type: "constant" 81 | value: 3 82 | } 83 | shift_filler { 84 | type: "constant" 85 | value: 0 86 | } 87 | } 88 | } 89 | layer { 90 | name: "inception_{num}a/relu_3x3_reduce" 91 | type: "ReLU" 92 | bottom: "bn{num}/relu_3x3_reduce" 93 | top: "bn{num}/relu_3x3_reduce" 94 | } 95 | layer { 96 | name: "inception_{num}a/3x3" 97 | type: "Convolution" 98 | bottom: "bn{num}/relu_3x3_reduce" 99 | top: "inception_{num}a/3x3" 100 | param { 101 | lr_mult: 1 102 | decay_mult: 1 103 | } 104 | param { 105 | lr_mult: 2 106 | decay_mult: 0 107 | } 108 | convolution_param { 109 | num_output: {3x3node} 110 | pad: 1 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | } 115 | bias_filler { 116 | type: "constant" 117 | value: 0 118 | } 119 | } 120 | } 121 | layer { 122 | name: "bn{num}/relu_3x3" 123 | type: "BN" 124 | bottom: "inception_{num}a/3x3" 125 | top: "bn{num}/relu_3x3" 126 | bn_param { 127 | scale_filler { 128 | type: "constant" 129 | value: 3 130 | } 131 | shift_filler { 132 | type: "constant" 133 | value: 0 134 | } 135 | } 136 | } 137 | layer { 138 | name: "inception_{num}a/relu_3x3" 139 | type: "ReLU" 140 | bottom: "bn{num}/relu_3x3" 141 | top: "bn{num}/relu_3x3" 142 | } 143 | layer { 144 | name: "inception_{num}a/5x5_reduce" 145 | type: "Convolution" 146 | bottom: "{bottom_name}" 147 | top: "inception_{num}a/5x5_reduce" 148 | param { 149 | lr_mult: 1 150 | decay_mult: 1 151 | } 152 | param { 153 | lr_mult: 2 154 | decay_mult: 0 155 | } 156 | convolution_param { 157 | num_output: {5x5reduce} 158 | kernel_size: 1 159 | weight_filler { 160 | type: "xavier" 161 | } 162 | bias_filler { 163 | type: "constant" 164 | value: 0 165 | } 166 | } 167 | } 168 | layer { 169 | name: "bn{num}/relu_5x5_reduce" 170 | type: "BN" 171 | bottom: "inception_{num}a/5x5_reduce" 172 | top: "bn{num}/relu_5x5_reduce" 173 | bn_param { 174 | scale_filler { 175 | type: "constant" 176 | value: 3 177 | } 178 | shift_filler { 179 | type: "constant" 180 | value: 0 181 | } 182 | } 183 | } 184 | layer { 185 | name: "inception_{num}a/relu_5x5_reduce" 186 | type: "ReLU" 187 | bottom: "bn{num}/relu_5x5_reduce" 188 | top: "bn{num}/relu_5x5_reduce" 189 | } 190 | layer { 191 | name: "inception_{num}a/5x5" 192 | type: "Convolution" 193 | bottom: "bn{num}/relu_5x5_reduce" 194 | top: "inception_{num}a/5x5" 195 | param { 196 | lr_mult: 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: {5x5node} 205 | pad: 2 206 | kernel_size: 5 207 | weight_filler { 208 | type: "xavier" 209 | } 210 | bias_filler { 211 | type: "constant" 212 | value: 0 213 | } 214 | } 215 | } 216 | layer { 217 | name: "bn{num}/relu_5x5" 218 | type: "BN" 219 | bottom: "inception_{num}a/5x5" 220 | top: "bn{num}/relu_5x5" 221 | bn_param { 222 | scale_filler { 223 | type: "constant" 224 | value: 3 225 | } 226 | shift_filler { 227 | type: "constant" 228 | value: 0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "inception_{num}a/relu_5x5" 234 | type: "ReLU" 235 | bottom: "bn{num}/relu_5x5" 236 | top: "bn{num}/relu_5x5" 237 | } 238 | layer { 239 | name: "inception_{num}a/pool" 240 | type: "Pooling" 241 | bottom: "{bottom_name}" 242 | top: "inception_{num}a/pool" 243 | pooling_param { 244 | pool: MAX 245 | kernel_size: 3 246 | stride: 1 247 | pad: 1 248 | } 249 | } 250 | layer { 251 | name: "inception_{num}a/pool_proj" 252 | type: "Convolution" 253 | bottom: "inception_{num}a/pool" 254 | top: "inception_{num}a/pool_proj" 255 | param { 256 | lr_mult: 1 257 | decay_mult: 1 258 | } 259 | param { 260 | lr_mult: 2 261 | decay_mult: 0 262 | } 263 | convolution_param { 264 | num_output: {poolconv} 265 | kernel_size: 1 266 | weight_filler { 267 | type: "xavier" 268 | } 269 | bias_filler { 270 | type: "constant" 271 | value: 0 272 | } 273 | } 274 | } 275 | layer { 276 | name: "bn{num}/relu_pool_proj" 277 | type: "BN" 278 | bottom: "inception_{num}a/pool_proj" 279 | top: "bn{num}/relu_pool_proj" 280 | bn_param { 281 | scale_filler { 282 | type: "constant" 283 | value: 3 284 | } 285 | shift_filler { 286 | type: "constant" 287 | value: 0 288 | } 289 | } 290 | } 291 | layer { 292 | name: "inception_{num}a/relu_pool_proj" 293 | type: "ReLU" 294 | bottom: "bn{num}/relu_pool_proj" 295 | top: "bn{num}/relu_pool_proj" 296 | } 297 | layer { 298 | name: "inception_{num}a/output" 299 | type: "Concat" 300 | bottom: "bn{num}/relu_1x1" 301 | bottom: "bn{num}/relu_3x3" 302 | bottom: "bn{num}/relu_5x5" 303 | bottom: "bn{num}/relu_pool_proj" 304 | top: "inception_{num}a/output" 305 | } -------------------------------------------------------------------------------- /NNComplexity/Lab2RGB.m: -------------------------------------------------------------------------------- 1 | function [R, G, B] = Lab2RGB(L, a, b) 2 | %LAB2RGB Convert an image from CIELAB to RGB 3 | % 4 | % function [R, G, B] = Lab2RGB(L, a, b) 5 | % function [R, G, B] = Lab2RGB(I) 6 | % function I = Lab2RGB(...) 7 | % 8 | % Lab2RGB takes L, a, and b double matrices, or an M x N x 3 double 9 | % image, and returns an image in the RGB color space. Values for L are in 10 | % the range [0,100] while a* and b* are roughly in the range [-110,110]. 11 | % If 3 outputs are specified, the values will be returned as doubles in the 12 | % range [0,1], otherwise the values will be uint8s in the range [0,255]. 13 | % 14 | % This transform is based on ITU-R Recommendation BT.709 using the D65 15 | % white point reference. The error in transforming RGB -> Lab -> RGB is 16 | % approximately 10^-5. 17 | % 18 | % See also RGB2LAB. 19 | 20 | % By Mark Ruzon from C code by Yossi Rubner, 23 September 1997. 21 | % Updated for MATLAB 5 28 January 1998. 22 | % Fixed a bug in conversion back to uint8 9 September 1999. 23 | % Updated for MATLAB 7 30 March 2009. 24 | 25 | if nargin == 1 26 | b = L(:,:,3); 27 | a = L(:,:,2); 28 | L = L(:,:,1); 29 | end 30 | 31 | if max(max(L)) < 1.1 || max(max(a)) < 1.1 || max(max(b)) < 1.1 32 | L = double(L) * 100; 33 | a = double(a) * 220; 34 | a = a - 110; 35 | b = double(b) * 220; 36 | b = b - 110; 37 | end 38 | 39 | % Thresholds 40 | T1 = 0.008856; 41 | T2 = 0.206893; 42 | 43 | [M, N] = size(L); 44 | s = M * N; 45 | L = reshape(L, 1, s); 46 | a = reshape(a, 1, s); 47 | b = reshape(b, 1, s); 48 | 49 | % Compute Y 50 | fY = ((L + 16) / 116) .^ 3; 51 | YT = fY > T1; 52 | fY = (~YT) .* (L / 903.3) + YT .* fY; 53 | Y = fY; 54 | 55 | % Alter fY slightly for further calculations 56 | fY = YT .* (fY .^ (1/3)) + (~YT) .* (7.787 .* fY + 16/116); 57 | 58 | % Compute X 59 | fX = a / 500 + fY; 60 | XT = fX > T2; 61 | X = (XT .* (fX .^ 3) + (~XT) .* ((fX - 16/116) / 7.787)); 62 | 63 | % Compute Z 64 | fZ = fY - b / 200; 65 | ZT = fZ > T2; 66 | Z = (ZT .* (fZ .^ 3) + (~ZT) .* ((fZ - 16/116) / 7.787)); 67 | 68 | % Normalize for D65 white point 69 | X = X * 0.950456; 70 | Z = Z * 1.088754; 71 | 72 | % XYZ to RGB 73 | MAT = [ 3.240479 -1.537150 -0.498535; 74 | -0.969256 1.875992 0.041556; 75 | 0.055648 -0.204043 1.057311]; 76 | 77 | RGB = max(min(MAT * [X; Y; Z], 1), 0); 78 | 79 | R = reshape(RGB(1,:), M, N); 80 | G = reshape(RGB(2,:), M, N); 81 | B = reshape(RGB(3,:), M, N); 82 | 83 | if nargout < 2 84 | R = uint8(round(cat(3,R,G,B) * 255)); 85 | end -------------------------------------------------------------------------------- /NNComplexity/README.md: -------------------------------------------------------------------------------- 1 | Visualize how complex the neural network's fitting function can achieve, w.r.t. width, depth and structure. 2 | 3 | `CNNComplexity.m` is our main script. You can also use `CNNComplexitySave.m` and `run_save.m` to generate more images at once. 4 | 5 | These codes are from a failed work. If you can read Mandarine, please see this blog for details http://blog.csdn.net/happynear/article/details/46583811 . 6 | I have underestimated the effect of scale & shift in Batch Normalization. **They are very important!** 7 | 8 | However, I don't want this work to be thrown into dust basket. I still think that we can get some interesting and direct feelings from the generated images. 9 | 10 | Recently, I noticed that there were similar works long ago. This algorithm is called [Compositional pattern-producing network](https://en.wikipedia.org/wiki/Compositional_pattern-producing_network) and some other posts also generates beautiful images, such as http://blog.otoro.net/2016/03/25/generating-abstract-patterns-with-tensorflow/ and http://zhouchang.info/blog/2016-02-02/simple-cppn.html . 11 | 12 | Brief Algorithm Description 13 | ---------- 14 | 15 | Firstly we take an two channel "slope" image as input. 16 | 17 | | first channel | second channel | 18 | | ----------------------|:---------------------:| 19 | | ![1](img/vert.png) | ![1](img/hori.png) | 20 | 21 | Then we use a randomly initialized (convolutional) nerual network to wrap the slope input to some more complex shapes. Note that a neural network is continuous function w.r.t. the input, the output will also be a continuous but more complex image. 22 | 23 | In order to control the range of each layers' output, we add batch normalization after every convolutional layer as introduced in the original paper. BTW, since we have only one input image, the name "batch normalization" is better to be changed to "spatial normalization". Without the spatial normalization, the range of the output will get exponential increasement or decreasement with the depth, which is not what we want. 24 | 25 | Now we can see how complex the neural network could be. Firstly, with a single layer, 100 hidden channels. 26 | 27 | | ReLU activation | Sigmoid activation | 28 | | -------------------------|:---------------------------:| 29 | | ![1](img/1conv_relu.png) | ![1](img/1conv_sigmoid.png) | 30 | 31 | How about 10 layers with 10 hidden channels respectively? 32 | 33 | | ReLU activation | Sigmoid activation | 34 | | --------------------------|:-----------------------------:| 35 | | ![1](img/10conv_relu.png) | ![1](img/10conv_sigmoid.png) | 36 | 37 | Much more complex, right? Please note that they all have about 100 paramters, but with deeper structure, we produce images with a huge leap in complexity. 38 | 39 | We can also apply other sturctures on the input, such as NIN, VGG, Inception etc, and see what's the difference of them. Now, try it yourself! 40 | -------------------------------------------------------------------------------- /NNComplexity/RGB2Lab.m: -------------------------------------------------------------------------------- 1 | function [L,a,b] = RGB2Lab(R,G,B) 2 | %RGB2LAB Convert an image from RGB to CIELAB 3 | % 4 | % function [L, a, b] = RGB2Lab(R, G, B) 5 | % function [L, a, b] = RGB2Lab(I) 6 | % function I = RGB2Lab(...) 7 | % 8 | % RGB2Lab takes red, green, and blue matrices, or a single M x N x 3 image, 9 | % and returns an image in the CIELAB color space. RGB values can be 10 | % either between 0 and 1 or between 0 and 255. Values for L are in the 11 | % range [0,100] while a and b are roughly in the range [-110,110]. The 12 | % output is of type double. 13 | % 14 | % This transform is based on ITU-R Recommendation BT.709 using the D65 15 | % white point reference. The error in transforming RGB -> Lab -> RGB is 16 | % approximately 10^-5. 17 | % 18 | % See also LAB2RGB. 19 | 20 | % By Mark Ruzon from C code by Yossi Rubner, 23 September 1997. 21 | % Updated for MATLAB 5 28 January 1998. 22 | % Updated for MATLAB 7 30 March 2009. 23 | 24 | if nargin == 1 25 | B = double(R(:,:,3)); 26 | G = double(R(:,:,2)); 27 | R = double(R(:,:,1)); 28 | end 29 | 30 | if max(max(R)) > 1.0 || max(max(G)) > 1.0 || max(max(B)) > 1.0 31 | R = double(R) / 255; 32 | G = double(G) / 255; 33 | B = double(B) / 255; 34 | end 35 | 36 | % Set a threshold 37 | T = 0.008856; 38 | 39 | [M, N] = size(R); 40 | s = M * N; 41 | RGB = [reshape(R,1,s); reshape(G,1,s); reshape(B,1,s)]; 42 | 43 | % RGB to XYZ 44 | MAT = [0.412453 0.357580 0.180423; 45 | 0.212671 0.715160 0.072169; 46 | 0.019334 0.119193 0.950227]; 47 | XYZ = MAT * RGB; 48 | 49 | % Normalize for D65 white point 50 | X = XYZ(1,:) / 0.950456; 51 | Y = XYZ(2,:); 52 | Z = XYZ(3,:) / 1.088754; 53 | 54 | XT = X > T; 55 | YT = Y > T; 56 | ZT = Z > T; 57 | 58 | Y3 = Y.^(1/3); 59 | 60 | fX = XT .* X.^(1/3) + (~XT) .* (7.787 .* X + 16/116); 61 | fY = YT .* Y3 + (~YT) .* (7.787 .* Y + 16/116); 62 | fZ = ZT .* Z.^(1/3) + (~ZT) .* (7.787 .* Z + 16/116); 63 | 64 | L = reshape(YT .* (116 * Y3 - 16.0) + (~YT) .* (903.3 * Y), M, N); 65 | a = reshape(500 * (fX - fY), M, N); 66 | b = reshape(200 * (fY - fZ), M, N); 67 | 68 | if nargout < 2 69 | L = cat(3,L,a,b); 70 | end -------------------------------------------------------------------------------- /NNComplexity/img/10conv_relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/10conv_relu.png -------------------------------------------------------------------------------- /NNComplexity/img/10conv_sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/10conv_sigmoid.png -------------------------------------------------------------------------------- /NNComplexity/img/1conv_relu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/1conv_relu.png -------------------------------------------------------------------------------- /NNComplexity/img/1conv_sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/1conv_sigmoid.png -------------------------------------------------------------------------------- /NNComplexity/img/hori.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/hori.png -------------------------------------------------------------------------------- /NNComplexity/img/vert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NNComplexity/img/vert.png -------------------------------------------------------------------------------- /NNComplexity/maxout.prototxt: -------------------------------------------------------------------------------- 1 | layer{ 2 | name: "slice{num}" 3 | type: "Slice" 4 | slice_param { 5 | slice_dim: 1 6 | } 7 | bottom: "{bottom_name}" 8 | top: "slice{num}_1" 9 | top: "slice{num}_2" 10 | } 11 | 12 | layer{ 13 | name: "etlwise{num}" 14 | type: "Eltwise" 15 | bottom: "slice{num}_1" 16 | bottom: "slice{num}_2" 17 | top: "eltwise{num}" 18 | eltwise_param { 19 | operation: MAX 20 | } 21 | } -------------------------------------------------------------------------------- /NNComplexity/net_define.prototxt: -------------------------------------------------------------------------------- 1 | name: "mnist_siamese_train_test" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 2 5 | input_dim: 600 6 | input_dim: 400 7 | layer { 8 | name: "conv1" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1" 12 | param { 13 | lr_mult: 1 14 | } 15 | param { 16 | lr_mult: 2 17 | } 18 | convolution_param { 19 | num_output: 10 20 | kernel_size: 3 21 | stride: 1 22 | weight_filler { 23 | type: "msra" 24 | } 25 | bias_filler { 26 | type: "gaussian" 27 | std: 0.5 28 | # type: "constant" 29 | # value: 0 30 | } 31 | } 32 | } 33 | layer { 34 | name: "bn1" 35 | type: "BN" 36 | bottom: "conv1" 37 | top: "bn1" 38 | bn_param { 39 | scale_filler { 40 | type: "constant" 41 | value: 3 42 | } 43 | shift_filler { 44 | type: "gaussian" 45 | std: 2 46 | # type: "constant" 47 | # value: 0 48 | } 49 | } 50 | } 51 | layer { 52 | name: "actiavation1" 53 | type: "ReLU" 54 | bottom: "bn1" 55 | top: "bn1" 56 | } 57 | layer { 58 | name: "conv2" 59 | type: "Convolution" 60 | bottom: "bn1" 61 | top: "conv2" 62 | param { 63 | lr_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | } 68 | convolution_param { 69 | num_output: 10 70 | kernel_size: 3 71 | stride: 1 72 | weight_filler { 73 | type: "msra" 74 | } 75 | bias_filler { 76 | type: "gaussian" 77 | std: 0.5 78 | # type: "constant" 79 | # value: 0 80 | } 81 | } 82 | } 83 | layer { 84 | name: "bn2" 85 | type: "BN" 86 | bottom: "conv2" 87 | top: "bn2" 88 | bn_param { 89 | scale_filler { 90 | type: "constant" 91 | value: 3 92 | } 93 | shift_filler { 94 | type: "gaussian" 95 | std: 2 96 | # type: "constant" 97 | # value: 0 98 | } 99 | } 100 | } 101 | layer { 102 | name: "actiavation2" 103 | type: "ReLU" 104 | bottom: "bn2" 105 | top: "bn2" 106 | } 107 | layer { 108 | name: "conv3" 109 | type: "Convolution" 110 | bottom: "bn2" 111 | top: "conv3" 112 | param { 113 | lr_mult: 1 114 | } 115 | param { 116 | lr_mult: 2 117 | } 118 | convolution_param { 119 | num_output: 20 120 | kernel_size: 1 121 | stride: 1 122 | weight_filler { 123 | type: "msra" 124 | } 125 | bias_filler { 126 | type: "gaussian" 127 | std: 0.5 128 | # type: "constant" 129 | # value: 0 130 | } 131 | } 132 | } 133 | layer { 134 | name: "bn3" 135 | type: "BN" 136 | bottom: "conv3" 137 | top: "bn3" 138 | bn_param { 139 | scale_filler { 140 | type: "constant" 141 | value: 3 142 | } 143 | shift_filler { 144 | type: "gaussian" 145 | std: 2 146 | # type: "constant" 147 | # value: 0 148 | } 149 | } 150 | } 151 | layer { 152 | name: "actiavation3" 153 | type: "ReLU" 154 | bottom: "bn3" 155 | top: "bn3" 156 | } 157 | layer { 158 | name: "conv4" 159 | type: "Convolution" 160 | bottom: "bn3" 161 | top: "conv4" 162 | param { 163 | lr_mult: 1 164 | } 165 | param { 166 | lr_mult: 2 167 | } 168 | convolution_param { 169 | num_output: 20 170 | kernel_size: 1 171 | stride: 1 172 | weight_filler { 173 | type: "msra" 174 | } 175 | bias_filler { 176 | type: "gaussian" 177 | std: 0.5 178 | # type: "constant" 179 | # value: 0 180 | } 181 | } 182 | } 183 | layer { 184 | name: "bn4" 185 | type: "BN" 186 | bottom: "conv4" 187 | top: "bn4" 188 | bn_param { 189 | scale_filler { 190 | type: "constant" 191 | value: 3 192 | } 193 | shift_filler { 194 | type: "gaussian" 195 | std: 2 196 | # type: "constant" 197 | # value: 0 198 | } 199 | } 200 | } 201 | layer { 202 | name: "actiavation4" 203 | type: "ReLU" 204 | bottom: "bn4" 205 | top: "bn4" 206 | } 207 | layer { 208 | name: "output" 209 | type: "Convolution" 210 | bottom: "bn4" 211 | top: "output" 212 | param { 213 | lr_mult: 1 214 | } 215 | param { 216 | lr_mult: 2 217 | } 218 | convolution_param { 219 | num_output: 3 220 | kernel_size: 1 221 | stride: 1 222 | weight_filler { 223 | type: "msra" 224 | } 225 | bias_filler { 226 | type: "constant" 227 | } 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /NNComplexity/output.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "output" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "output" 6 | param { 7 | lr_mult: 1 8 | } 9 | param { 10 | lr_mult: 2 11 | } 12 | convolution_param { 13 | num_output: 3 14 | kernel_size: 1 15 | stride: 1 16 | weight_filler { 17 | type: "msra" 18 | } 19 | bias_filler { 20 | type: "constant" 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /NNComplexity/pooling.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "pool{num}" 3 | type: "Pooling" 4 | bottom: "{bottom_name}" 5 | top: "pool{num}" 6 | pooling_param { 7 | pool: {method} 8 | kernel_size: {scale} 9 | stride: {scale} 10 | } 11 | } -------------------------------------------------------------------------------- /NNComplexity/run_save.m: -------------------------------------------------------------------------------- 1 | activation = 'Sigmoid'; 2 | layers = { 3 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 4 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 5 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 6 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 7 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 8 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 9 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 10 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 11 | % struct('type', 'convolution', 'outputmaps', 80, 'kernelsize', 3, 'activation', activation) %convolution layer 12 | % struct('type', 'convolution', 'outputmaps', 80, 'kernelsize', 3, 'activation', activation) %convolution layer 13 | % struct('type', 'convolution', 'outputmaps', 10, 'kernelsize', 3, 'activation', activation) %convolution layer 14 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 15 | % struct('type', 'inception', 'node1x1', 100, 'reduce3x3', 50, 'node3x3', 100, 'reduce5x5', 50, 'node5x5', 100, 'poolconv', 100) 16 | % struct('type', 'inception', 'node1x1', 50, 'reduce3x3', 25, 'node3x3', 50, 'reduce5x5', 25, 'node5x5', 50, 'poolconv', 50) 17 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 18 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 19 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 20 | % struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 21 | % struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 22 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 23 | }; 24 | CNNComplexitySave(layers,'8convSigmoid',[640 480],10); -------------------------------------------------------------------------------- /NeuralArt/MakeStylePrototxt.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NeuralArt/MakeStylePrototxt.m -------------------------------------------------------------------------------- /NeuralArt/NeuralArtCost.m: -------------------------------------------------------------------------------- 1 | function [cost, grad] = NeuralArtCost(input_data,input_size,stylegen_net,forward_input,style_weights) 2 | if ~isempty(input_size) 3 | input_data = reshape(input_data,input_size); 4 | end; 5 | forward_input(1) = {input_data}; 6 | prob = stylegen_net.forward(forward_input); 7 | i=1; 8 | while i<=length(prob) 9 | if length(prob{i})>1 10 | prob(i)=[]; 11 | end; 12 | i=i+1; 13 | end; 14 | cost = [prob{:}] * style_weights' / 1000; 15 | stylegen_net.backward_prefilled(); 16 | grad = stylegen_net.blob_vec(stylegen_net.name2blob_index('data')).get_diff(); 17 | if ~isempty(input_size) 18 | grad = grad(:); 19 | end; -------------------------------------------------------------------------------- /NeuralArt/NeuralArtDemo.m: -------------------------------------------------------------------------------- 1 | %% A messy code for Neural Art 2 | %% It's not very good now. 3 | caffe.reset_all(); 4 | caffe.set_mode_gpu(); 5 | gpu_id = 0; % we will use the first gpu in this demo 6 | caffe.set_device(gpu_id); 7 | 8 | % original_file = 'D:\deepLearning\caffe-windows\matlab\demo\Inceptionism\googlenet_neuralart.prototxt'; 9 | % net_weights = 'D:\deeplearning\caffe-windows\examples\GoogLeNet\imagenet_googlenet.caffemodel'; 10 | original_file = 'VGG_16_nueralart.prototxt'; 11 | net_weights = 'VGG16_thinned_net.caffemodel'; 12 | % style_layer = {'icp2_in','icp3_out','icp5_out','icp7_out','icp9_out'}; 13 | style_layer = {'conv1_1','conv2_1','conv3_1','conv4_1','conv5_1'}; 14 | style_weights = [1 1 1 1 1 0.1]; 15 | content_layer = {'conv4_2'}; 16 | style_image = imread('material\starry_night.jpg'); 17 | content_image = imread( 'material\caobin.jpg'); 18 | long_size = 512; 19 | if size(content_image,1) > size(content_image,2) 20 | content_image = imresize(content_image,[long_size, size(content_image,2) / size(content_image,1) * long_size]); 21 | style_image = imresize(style_image,[long_size, size(style_image,2) / size(style_image,1) * long_size]); 22 | else 23 | content_image = imresize(content_image,[size(content_image,1) / size(content_image,2) * long_size, long_size]); 24 | style_image = imresize(style_image,[size(style_image,1) / size(style_image,2) * long_size, long_size]); 25 | end; 26 | figure(1); 27 | imshow(style_image); 28 | title('style image'); 29 | figure(2); 30 | imshow(content_image); 31 | title('content image'); 32 | % content_image = imresize(content_image,0.25,'bilinear'); 33 | % content_image = content_image(end-199:end,1:200,:); 34 | [style_generate_prototxt, style_pattern, content_pattern] = MakeStylePrototxt(original_file, net_weights, style_layer, style_weights, content_layer, style_image, content_image, 0); 35 | 36 | forward_input = cell(length(style_pattern)+2,1); 37 | forward_input(3:end) = style_pattern; 38 | forward_input(2) = {content_pattern}; 39 | 40 | mean_file = []; 41 | vgg_mean = [103.939, 116.779, 123.68]; 42 | [height, width, channel] = size(content_image); 43 | 44 | %%%%%%%%%extract the train features 45 | stylegen_net = caffe.Net(style_generate_prototxt,net_weights,'test'); 46 | mean_image = permute(repmat(vgg_mean',[1,size(content_image,2),size(content_image,1)]),[2,3,1]); 47 | im_data = content_image(:, :, [3, 2, 1]); % permute channels from RGB to BGR 48 | im_data = permute(im_data, [2, 1, 3]); % flip width and height 49 | im_data = single(im_data); % convert from uint8 to single 50 | im_data = reshape(im_data,[width, height, 3, 1]); 51 | for c = 1:3 52 | im_data(:, :, c, :) = im_data(:, :, c, :) - vgg_mean(c); 53 | end 54 | input_data = randn(size(mean_image,1), size(mean_image,2), 3, 1, 'single')*50; 55 | % input_data = im_data; 56 | 57 | use_clip = false; 58 | use_tv_norm = true; 59 | use_weight_decay = false; 60 | use_gradient_blur = false; 61 | use_dropout = false; 62 | 63 | H = fspecial('gaussian',[17 17],5); 64 | % input_data(:,:,:,1) = imfilter(input_data(:,:,:,1),H,'same'); 65 | forward_input(1) = {input_data}; 66 | prob = stylegen_net.forward(forward_input); 67 | % stylegen_content_pattern = stylegen_net.blob_vec(stylegen_net.name2blob_index(content_layer{1})).get_data(); 68 | % 69 | % c1 = content_pattern(:,:,1); 70 | % c2 = stylegen_content_pattern(:,:,1); 71 | blur_data = zeros(size(input_data)); 72 | base_lr = 10; 73 | max_lr = 100; 74 | lambda1 = 0.00001; 75 | lambda2 = 0.001; 76 | forward_input(1) = {input_data}; 77 | i = 1; 78 | while i<=length(prob) 79 | if length(prob{i})>1 80 | prob(i)=[]; 81 | end; 82 | i=i+1; 83 | end; 84 | last_prob = [prob{:}] * style_weights'; 85 | momentum = 0.8; 86 | lastgrad = zeros(size(mean_image)); 87 | mask = ones(size(mean_image,1), size(mean_image,2)); 88 | iter = 1; 89 | dropout = 0.5; 90 | 91 | while 1 92 | lr = base_lr;% * sqrt(this_prob / (1 - this_prob)); 93 | stylegen_net.backward_prefilled(); 94 | res = stylegen_net.blob_vec(stylegen_net.name2blob_index('data')).get_diff(); 95 | 96 | bak_data = input_data; 97 | 98 | if use_gradient_blur 99 | res = imfilter(res,H,'same'); 100 | end; 101 | 102 | if use_clip 103 | app_gradient = sum(abs(res .* input_data(:,:,:,1)),3); 104 | app_gradient = app_gradient < mean(app_gradient(:)) * 0.5; 105 | grad = reshape(res,[size(mean_image,1)*size(mean_image,2) 3]); 106 | grad(app_gradient==1,:) = 0; 107 | grad = reshape(grad,size(input_data)); 108 | res = grad; 109 | end; 110 | 111 | 112 | lastgrad = (1 - momentum) * lr * res + momentum * lastgrad;%/ norm(res(:)) 113 | input_data(:,:,:,1) = input_data(:,:,:,1) - lastgrad; 114 | % input_data(:,:,:,1) = min(max(mean_image + input_data(:,:,:,1),0),255.9) - mean_image; 115 | 116 | if use_tv_norm 117 | I = input_data(:,:,:,1); 118 | % Gx = sign(I(2:end-1,2:end-1,:) - I(1:end-2,2:end-1,:)) - sign(I(3:end,2:end-1,:) - I(2:end-1,2:end-1,:)); 119 | % Gy = sign(I(2:end-1,2:end-1,:) - I(2:end-1,1:end-2,:)) - sign(I(2:end-1,3:end,:) - I(2:end-1,2:end-1,:)); 120 | Gx = smoothL1(I(2:end-1,:,:) - I(1:end-2,:,:)) - smoothL1(I(3:end,:,:) - I(2:end-1,:,:)); 121 | Gx = [smoothL1(I(1,:,:) - I(2,:,:)); Gx; smoothL1(I(end,:,:) - I(end-1,:,:))]; 122 | Gy = smoothL1(I(:,2:end-1,:) - I(:,1:end-2,:)) - smoothL1(I(:,3:end,:) - I(:,2:end-1,:)); 123 | Gy = [smoothL1(I(:,1,:) - I(:,2,:)) Gy smoothL1(I(:,end,:) - I(:,end-1,:))]; 124 | % Gx = (I(2:end-1,:,:) - I(1:end-2,:,:)) - (I(3:end,:,:) - I(2:end-1,:,:)); 125 | % Gx = [(I(1,:,:) - I(2,:,:)); Gx; (I(end,:,:) - I(end-1,:,:))]; 126 | % Gy = (I(:,2:end-1,:) - I(:,1:end-2,:)) - (I(:,3:end,:) - I(:,2:end-1,:)); 127 | % Gy = [(I(:,1,:) - I(:,2,:)) Gy (I(:,end,:) - I(:,end-1,:))]; 128 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * lambda2 * (Gx + Gy); 129 | end; 130 | if use_weight_decay 131 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * lambda1 * I; 132 | end; 133 | 134 | forward_input(1) = {input_data}; 135 | prob = stylegen_net.forward(forward_input); 136 | i = 1; 137 | while i<=length(prob) 138 | if length(prob{i})>1 139 | prob(i)=[]; 140 | end; 141 | i=i+1; 142 | end; 143 | 144 | 145 | this_prob = [prob{:}] * style_weights'; 146 | fprintf('iter=%d,lr=%f,',iter,lr); 147 | % for p = 2:length(style_pattern) + 2 148 | % fprintf('%f,',prob{p}); 149 | % end; 150 | fprintf('this_cost=%f,last_cost=%f\n',this_prob, last_prob); 151 | iter = iter + 1; 152 | 153 | if mod(iter,10)==0 154 | figure(3); 155 | % imshow(uint8(mean_image + input_data)); 156 | output = mean_image + input_data(:,:,:,1); 157 | output = output(:, :, [3, 2, 1]); 158 | output = permute(output, [2 1 3]); 159 | imshow(uint8(output)); 160 | if iter>100 && max(abs(output(:) - last_output(:))) < 1 161 | break; 162 | end; 163 | last_output = output; 164 | title('generated image'); 165 | end; 166 | if this_prob>last_prob 167 | base_lr = base_lr * 0.9; 168 | input_data = bak_data; 169 | end; 170 | if this_prob size(style_image,2) 25 | style_image = imresize(style_image,[long_size, size(style_image,2) / size(style_image,1) * long_size]); 26 | else 27 | style_image = imresize(style_image,[size(style_image,1) / size(style_image,2) * long_size, long_size]); 28 | end; 29 | content_image = imread('d:\hoovertowernight.jpg'); 30 | if size(content_image,1) > size(content_image,2) 31 | content_image = imresize(content_image,[long_size, size(content_image,2) / size(content_image,1) * long_size]); 32 | else 33 | content_image = imresize(content_image,[size(content_image,1) / size(content_image,2) * long_size, long_size]); 34 | end; 35 | figure(1); 36 | imshow(style_image); 37 | title('style image'); 38 | figure(2); 39 | imshow(content_image); 40 | title('content image'); 41 | [style_generate_prototxt, style_pattern, content_pattern,colorObj] = MakeStylePrototxt(original_file, net_weights, style_layer, style_weights, content_layer, style_image, content_image,num_cluster); 42 | if use_color_prior 43 | colorObj 44 | num_cluster = colorObj.NComponents; 45 | end; 46 | if use_color_prior 47 | gaussian_net = caffe.Net('gaussian_net.prototxt','test'); 48 | 49 | W = single(zeros(1,1,3,3*num_cluster)); 50 | b = single(zeros(3*num_cluster,1)); 51 | 52 | for i=1:num_cluster 53 | W(:,:,:,(i-1)*3+1:i*3) = inv(colorObj.Sigma(:,:,i)); 54 | b((i-1)*3+1:i*3) = -1 * colorObj.Sigma(:,:,i) \ colorObj.mu(i,:)'; 55 | end; 56 | 57 | nth_layer = gaussian_net.layer_vec(gaussian_net.name2layer_index('gaussian_prior')); 58 | nth_layer.params(1).set_data(W); 59 | nth_layer.params(2).set_data(b); 60 | end; 61 | 62 | forward_input = cell(length(style_pattern)+2,1); 63 | forward_input(3:end) = style_pattern; 64 | forward_input(2) = {content_pattern}; 65 | 66 | mean_file = []; 67 | vgg_mean = [103.939, 116.779, 123.68]; 68 | [height, width, channel] = size(content_image); 69 | 70 | %%%%%%%%%extract the train features 71 | stylegen_net = caffe.Net(style_generate_prototxt,net_weights,'test'); 72 | mean_image = permute(repmat(vgg_mean',[1,size(content_image,2),size(content_image,1)]),[2,3,1]); 73 | input_data = randn(size(mean_image,1), size(mean_image,2), 3, 1, 'single')*50; 74 | 75 | lr = 10; 76 | max_lr = 50; 77 | momentum = 0.8; 78 | lastgrad = zeros(size(mean_image)); 79 | last_cost = 9999999999999; 80 | 81 | for iter = 1:2000 82 | bak_data = input_data; 83 | bak_grad = lastgrad; 84 | [cost, grad] = NeuralArtCost(input_data,[],stylegen_net,forward_input,style_weights); 85 | 86 | if tv_norm > 0 87 | I = input_data(:,:,:,1); 88 | Gx = (I(2:end-1,:,:) - I(1:end-2,:,:)) - (I(3:end,:,:) - I(2:end-1,:,:)); 89 | Gx = [(I(1,:,:) - I(2,:,:)); Gx; (I(end,:,:) - I(end-1,:,:))]; 90 | Gy = (I(:,2:end-1,:) - I(:,1:end-2,:)) - (I(:,3:end,:) - I(:,2:end-1,:)); 91 | Gy = [(I(:,1,:) - I(:,2,:)) Gy (I(:,end,:) - I(:,end-1,:))]; 92 | grad = grad + tv_norm * (Gx+Gy); 93 | end; 94 | 95 | if use_color_prior 96 | gmm_prior = gaussian_net.forward({input_data}); 97 | sum_gp = zeros(size(mean_image,1),size(mean_image,2)); 98 | sum_prob_gradient = zeros(size(mean_image)); 99 | for i=1:num_cluster 100 | gp = bsxfun(@minus,input_data(:,:,:,1),reshape(colorObj.mu(i,:),[1 1 3])) .* gmm_prior{1}(:,:,(i-1)*3+1:i*3); 101 | gp = sum(gp,3); 102 | gp = colorObj.PComponents(i) * exp(-gp); 103 | sum_prob_gradient = sum_prob_gradient + bsxfun(@times,gp,gmm_prior{1}(:,:,(i-1)*3+1:i*3)); 104 | sum_gp = sum_gp + gp; 105 | end; 106 | sum_prob_gradient = bsxfun(@rdivide,sum_prob_gradient,sum_gp); 107 | sum_prob_gradient(isnan(sum_prob_gradient)) = 0; 108 | input_data(:,:,:,1) = input_data(:,:,:,1) - lr * color_prior * sum_prob_gradient; 109 | end; 110 | %%%%%%%%%%%%%%%%%%%%%%%%gd linear search 111 | lastgrad = (1 - momentum) * lr * grad + momentum * lastgrad;%/ norm(res(:)) 112 | input_data(:,:,:,1) = input_data(:,:,:,1) - lastgrad; 113 | if cost>last_cost 114 | lr = lr * 0.9; 115 | % input_data = bak_data;%why... 116 | % last_grad = bak_grad; 117 | end; 118 | if cost A Neural Algorthm of Artistic Style' by Leon Gatys, Alexander Ecker, and Matthias Bethge (http://arxiv.org/abs/1508.06576). 6 | 7 | The model used here is VGG-16. I have thinned the net model with only conv layer paramters retaining. The thinned model can 8 | be downloaded from http://pan.baidu.com/s/1kT8d3Iv . 9 | 10 | Usage 11 | =========== 12 | 13 | Open `NeuralArtDemo.m` by Matlab, modify the path to style image and content image, then run it. 14 | 15 | I exploited this on my laptop and it's too slow to tune the parameters to be the best. The performance is not very good right now. However, you can still try it. 16 | 17 | For caffe linux users: I have written a new layer called [*gram layer*](https://github.com/happynear/caffe-windows/blob/master/src/caffe/layers/gram_layer.cpp) to calculate the gram matrix of a feature map. If you want to run this code with your own caffe, please add this layer to your caffe project. 18 | 19 | I have replaced the Euclidean loss with smoothL1 loss to get better numerical stability and faster convergence speed. You can change it back to Euclidean loss in `MakeStylePrototxt.m` line 31. With smoothL1 loss, we need only 200 iterations to get a good result, while for Euclidean loss, we need 500 . 20 | 21 | Gallery 22 | =========== 23 | style: starry night
content: tubingen 24 | 25 | ![star-tubingen](gallery/star-tubingen.png) 26 | 27 | I haven't add total-variance norm prior on this image because it is difficult for my laptop to tune the parameter of it, so this image may be not very smoothy. 28 | -------------------------------------------------------------------------------- /NeuralArt/VGG_16_nueralart.prototxt: -------------------------------------------------------------------------------- 1 | 2 | force_backward: true 3 | layer { 4 | name: "conv1_1" 5 | type: "Convolution" 6 | bottom: "data" 7 | top: "conv1_1" 8 | convolution_param { 9 | num_output: 64 10 | pad: 1 11 | kernel_size: 3 12 | } 13 | } 14 | layer { 15 | name: "relu1_1" 16 | type: "ReLU" 17 | bottom: "conv1_1" 18 | top: "conv1_1" 19 | } 20 | layer { 21 | name: "conv1_2" 22 | type: "Convolution" 23 | bottom: "conv1_1" 24 | top: "conv1_2" 25 | convolution_param { 26 | num_output: 64 27 | pad: 1 28 | kernel_size: 3 29 | } 30 | } 31 | layer { 32 | name: "relu1_2" 33 | type: "ReLU" 34 | bottom: "conv1_2" 35 | top: "conv1_2" 36 | } 37 | layer { 38 | name: "pool1" 39 | type: "Pooling" 40 | bottom: "conv1_2" 41 | top: "pool1" 42 | pooling_param { 43 | pool: AVE 44 | kernel_size: 2 45 | stride: 2 46 | } 47 | } 48 | layer { 49 | name: "conv2_1" 50 | type: "Convolution" 51 | bottom: "pool1" 52 | top: "conv2_1" 53 | convolution_param { 54 | num_output: 128 55 | pad: 1 56 | kernel_size: 3 57 | } 58 | } 59 | layer { 60 | name: "relu2_1" 61 | type: "ReLU" 62 | bottom: "conv2_1" 63 | top: "conv2_1" 64 | } 65 | layer { 66 | name: "conv2_2" 67 | type: "Convolution" 68 | bottom: "conv2_1" 69 | top: "conv2_2" 70 | convolution_param { 71 | num_output: 128 72 | pad: 1 73 | kernel_size: 3 74 | } 75 | } 76 | layer { 77 | name: "relu2_2" 78 | type: "ReLU" 79 | bottom: "conv2_2" 80 | top: "conv2_2" 81 | } 82 | layer { 83 | name: "pool2" 84 | type: "Pooling" 85 | bottom: "conv2_2" 86 | top: "pool2" 87 | pooling_param { 88 | pool: AVE 89 | kernel_size: 2 90 | stride: 2 91 | } 92 | } 93 | layer { 94 | name: "conv3_1" 95 | type: "Convolution" 96 | bottom: "pool2" 97 | top: "conv3_1" 98 | convolution_param { 99 | num_output: 256 100 | pad: 1 101 | kernel_size: 3 102 | } 103 | } 104 | layer { 105 | name: "relu3_1" 106 | type: "ReLU" 107 | bottom: "conv3_1" 108 | top: "conv3_1" 109 | } 110 | layer { 111 | name: "conv3_2" 112 | type: "Convolution" 113 | bottom: "conv3_1" 114 | top: "conv3_2" 115 | convolution_param { 116 | num_output: 256 117 | pad: 1 118 | kernel_size: 3 119 | } 120 | } 121 | layer { 122 | name: "relu3_2" 123 | type: "ReLU" 124 | bottom: "conv3_2" 125 | top: "conv3_2" 126 | } 127 | layer { 128 | name: "conv3_3" 129 | type: "Convolution" 130 | bottom: "conv3_2" 131 | top: "conv3_3" 132 | convolution_param { 133 | num_output: 256 134 | pad: 1 135 | kernel_size: 3 136 | } 137 | } 138 | layer { 139 | name: "relu3_3" 140 | type: "ReLU" 141 | bottom: "conv3_3" 142 | top: "conv3_3" 143 | } 144 | layer { 145 | name: "pool3" 146 | type: "Pooling" 147 | bottom: "conv3_3" 148 | top: "pool3" 149 | pooling_param { 150 | pool: AVE 151 | kernel_size: 2 152 | stride: 2 153 | } 154 | } 155 | layer { 156 | name: "conv4_1" 157 | type: "Convolution" 158 | bottom: "pool3" 159 | top: "conv4_1" 160 | convolution_param { 161 | num_output: 512 162 | pad: 1 163 | kernel_size: 3 164 | } 165 | } 166 | layer { 167 | name: "relu4_1" 168 | type: "ReLU" 169 | bottom: "conv4_1" 170 | top: "conv4_1" 171 | } 172 | layer { 173 | name: "conv4_2" 174 | type: "Convolution" 175 | bottom: "conv4_1" 176 | top: "conv4_2" 177 | convolution_param { 178 | num_output: 512 179 | pad: 1 180 | kernel_size: 3 181 | } 182 | } 183 | layer { 184 | name: "relu4_2" 185 | type: "ReLU" 186 | bottom: "conv4_2" 187 | top: "conv4_2" 188 | } 189 | layer { 190 | name: "conv4_3" 191 | type: "Convolution" 192 | bottom: "conv4_2" 193 | top: "conv4_3" 194 | convolution_param { 195 | num_output: 512 196 | pad: 1 197 | kernel_size: 3 198 | } 199 | } 200 | layer { 201 | name: "relu4_3" 202 | type: "ReLU" 203 | bottom: "conv4_3" 204 | top: "conv4_3" 205 | } 206 | layer { 207 | name: "pool4" 208 | type: "Pooling" 209 | bottom: "conv4_3" 210 | top: "pool4" 211 | pooling_param { 212 | pool: AVE 213 | kernel_size: 2 214 | stride: 2 215 | } 216 | } 217 | layer { 218 | name: "conv5_1" 219 | type: "Convolution" 220 | bottom: "pool4" 221 | top: "conv5_1" 222 | convolution_param { 223 | num_output: 512 224 | pad: 1 225 | kernel_size: 3 226 | } 227 | } 228 | layer { 229 | name: "relu5_1" 230 | type: "ReLU" 231 | bottom: "conv5_1" 232 | top: "conv5_1" 233 | } 234 | layer { 235 | name: "conv5_2" 236 | type: "Convolution" 237 | bottom: "conv5_1" 238 | top: "conv5_2" 239 | convolution_param { 240 | num_output: 512 241 | pad: 1 242 | kernel_size: 3 243 | } 244 | } 245 | layer { 246 | name: "relu5_2" 247 | type: "ReLU" 248 | bottom: "conv5_2" 249 | top: "conv5_2" 250 | } 251 | layer { 252 | name: "conv5_3" 253 | type: "Convolution" 254 | bottom: "conv5_2" 255 | top: "conv5_3" 256 | convolution_param { 257 | num_output: 512 258 | pad: 1 259 | kernel_size: 3 260 | } 261 | } 262 | layer { 263 | name: "relu5_3" 264 | type: "ReLU" 265 | bottom: "conv5_3" 266 | top: "conv5_3" 267 | } 268 | layer { 269 | name: "pool5" 270 | type: "Pooling" 271 | bottom: "conv5_3" 272 | top: "pool5" 273 | pooling_param { 274 | pool: AVE 275 | kernel_size: 2 276 | stride: 2 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /NeuralArt/VGG_16_nueralart_content.prototxt: -------------------------------------------------------------------------------- 1 | name: "ContentNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 512 6 | input_dim: 405 7 | 8 | force_backward: true 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | num_output: 64 16 | pad: 1 17 | kernel_size: 3 18 | } 19 | } 20 | layer { 21 | name: "relu1_1" 22 | type: "ReLU" 23 | bottom: "conv1_1" 24 | top: "conv1_1" 25 | } 26 | layer { 27 | name: "conv1_2" 28 | type: "Convolution" 29 | bottom: "conv1_1" 30 | top: "conv1_2" 31 | convolution_param { 32 | num_output: 64 33 | pad: 1 34 | kernel_size: 3 35 | } 36 | } 37 | layer { 38 | name: "relu1_2" 39 | type: "ReLU" 40 | bottom: "conv1_2" 41 | top: "conv1_2" 42 | } 43 | layer { 44 | name: "pool1" 45 | type: "Pooling" 46 | bottom: "conv1_2" 47 | top: "pool1" 48 | pooling_param { 49 | pool: MAX 50 | kernel_size: 2 51 | stride: 2 52 | } 53 | } 54 | layer { 55 | name: "conv2_1" 56 | type: "Convolution" 57 | bottom: "pool1" 58 | top: "conv2_1" 59 | convolution_param { 60 | num_output: 128 61 | pad: 1 62 | kernel_size: 3 63 | } 64 | } 65 | layer { 66 | name: "relu2_1" 67 | type: "ReLU" 68 | bottom: "conv2_1" 69 | top: "conv2_1" 70 | } 71 | layer { 72 | name: "conv2_2" 73 | type: "Convolution" 74 | bottom: "conv2_1" 75 | top: "conv2_2" 76 | convolution_param { 77 | num_output: 128 78 | pad: 1 79 | kernel_size: 3 80 | } 81 | } 82 | layer { 83 | name: "relu2_2" 84 | type: "ReLU" 85 | bottom: "conv2_2" 86 | top: "conv2_2" 87 | } 88 | layer { 89 | name: "pool2" 90 | type: "Pooling" 91 | bottom: "conv2_2" 92 | top: "pool2" 93 | pooling_param { 94 | pool: MAX 95 | kernel_size: 2 96 | stride: 2 97 | } 98 | } 99 | layer { 100 | name: "conv3_1" 101 | type: "Convolution" 102 | bottom: "pool2" 103 | top: "conv3_1" 104 | convolution_param { 105 | num_output: 256 106 | pad: 1 107 | kernel_size: 3 108 | } 109 | } 110 | layer { 111 | name: "relu3_1" 112 | type: "ReLU" 113 | bottom: "conv3_1" 114 | top: "conv3_1" 115 | } 116 | layer { 117 | name: "conv3_2" 118 | type: "Convolution" 119 | bottom: "conv3_1" 120 | top: "conv3_2" 121 | convolution_param { 122 | num_output: 256 123 | pad: 1 124 | kernel_size: 3 125 | } 126 | } 127 | layer { 128 | name: "relu3_2" 129 | type: "ReLU" 130 | bottom: "conv3_2" 131 | top: "conv3_2" 132 | } 133 | layer { 134 | name: "conv3_3" 135 | type: "Convolution" 136 | bottom: "conv3_2" 137 | top: "conv3_3" 138 | convolution_param { 139 | num_output: 256 140 | pad: 1 141 | kernel_size: 3 142 | } 143 | } 144 | layer { 145 | name: "relu3_3" 146 | type: "ReLU" 147 | bottom: "conv3_3" 148 | top: "conv3_3" 149 | } 150 | layer { 151 | name: "pool3" 152 | type: "Pooling" 153 | bottom: "conv3_3" 154 | top: "pool3" 155 | pooling_param { 156 | pool: MAX 157 | kernel_size: 2 158 | stride: 2 159 | } 160 | } 161 | layer { 162 | name: "conv4_1" 163 | type: "Convolution" 164 | bottom: "pool3" 165 | top: "conv4_1" 166 | convolution_param { 167 | num_output: 512 168 | pad: 1 169 | kernel_size: 3 170 | } 171 | } 172 | layer { 173 | name: "relu4_1" 174 | type: "ReLU" 175 | bottom: "conv4_1" 176 | top: "conv4_1" 177 | } 178 | layer { 179 | name: "conv4_2" 180 | type: "Convolution" 181 | bottom: "conv4_1" 182 | top: "conv4_2" 183 | convolution_param { 184 | num_output: 512 185 | pad: 1 186 | kernel_size: 3 187 | } 188 | } 189 | layer { 190 | name: "relu4_2" 191 | type: "ReLU" 192 | bottom: "conv4_2" 193 | top: "conv4_2" 194 | } 195 | layer { 196 | name: "conv4_3" 197 | type: "Convolution" 198 | bottom: "conv4_2" 199 | top: "conv4_3" 200 | convolution_param { 201 | num_output: 512 202 | pad: 1 203 | kernel_size: 3 204 | } 205 | } 206 | layer { 207 | name: "relu4_3" 208 | type: "ReLU" 209 | bottom: "conv4_3" 210 | top: "conv4_3" 211 | } 212 | layer { 213 | name: "pool4" 214 | type: "Pooling" 215 | bottom: "conv4_3" 216 | top: "pool4" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 2 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "conv5_1" 225 | type: "Convolution" 226 | bottom: "pool4" 227 | top: "conv5_1" 228 | convolution_param { 229 | num_output: 512 230 | pad: 1 231 | kernel_size: 3 232 | } 233 | } 234 | layer { 235 | name: "relu5_1" 236 | type: "ReLU" 237 | bottom: "conv5_1" 238 | top: "conv5_1" 239 | } 240 | layer { 241 | name: "conv5_2" 242 | type: "Convolution" 243 | bottom: "conv5_1" 244 | top: "conv5_2" 245 | convolution_param { 246 | num_output: 512 247 | pad: 1 248 | kernel_size: 3 249 | } 250 | } 251 | layer { 252 | name: "relu5_2" 253 | type: "ReLU" 254 | bottom: "conv5_2" 255 | top: "conv5_2" 256 | } 257 | layer { 258 | name: "conv5_3" 259 | type: "Convolution" 260 | bottom: "conv5_2" 261 | top: "conv5_3" 262 | convolution_param { 263 | num_output: 512 264 | pad: 1 265 | kernel_size: 3 266 | } 267 | } 268 | layer { 269 | name: "relu5_3" 270 | type: "ReLU" 271 | bottom: "conv5_3" 272 | top: "conv5_3" 273 | } 274 | layer { 275 | name: "pool5" 276 | type: "Pooling" 277 | bottom: "conv5_3" 278 | top: "pool5" 279 | pooling_param { 280 | pool: MAX 281 | kernel_size: 2 282 | stride: 2 283 | } 284 | } 285 | 286 | -------------------------------------------------------------------------------- /NeuralArt/VGG_16_nueralart_style.prototxt: -------------------------------------------------------------------------------- 1 | name: "StylePatternGen" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 400 6 | input_dim: 279 7 | 8 | force_backward: true 9 | layer { 10 | name: "conv1_1" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv1_1" 14 | convolution_param { 15 | num_output: 64 16 | pad: 1 17 | kernel_size: 3 18 | } 19 | } 20 | layer { 21 | name: "relu1_1" 22 | type: "ReLU" 23 | bottom: "conv1_1" 24 | top: "conv1_1" 25 | } 26 | layer { 27 | name: "conv1_2" 28 | type: "Convolution" 29 | bottom: "conv1_1" 30 | top: "conv1_2" 31 | convolution_param { 32 | num_output: 64 33 | pad: 1 34 | kernel_size: 3 35 | } 36 | } 37 | layer { 38 | name: "relu1_2" 39 | type: "ReLU" 40 | bottom: "conv1_2" 41 | top: "conv1_2" 42 | } 43 | layer { 44 | name: "pool1" 45 | type: "Pooling" 46 | bottom: "conv1_2" 47 | top: "pool1" 48 | pooling_param { 49 | pool: MAX 50 | kernel_size: 2 51 | stride: 2 52 | } 53 | } 54 | layer { 55 | name: "conv2_1" 56 | type: "Convolution" 57 | bottom: "pool1" 58 | top: "conv2_1" 59 | convolution_param { 60 | num_output: 128 61 | pad: 1 62 | kernel_size: 3 63 | } 64 | } 65 | layer { 66 | name: "relu2_1" 67 | type: "ReLU" 68 | bottom: "conv2_1" 69 | top: "conv2_1" 70 | } 71 | layer { 72 | name: "conv2_2" 73 | type: "Convolution" 74 | bottom: "conv2_1" 75 | top: "conv2_2" 76 | convolution_param { 77 | num_output: 128 78 | pad: 1 79 | kernel_size: 3 80 | } 81 | } 82 | layer { 83 | name: "relu2_2" 84 | type: "ReLU" 85 | bottom: "conv2_2" 86 | top: "conv2_2" 87 | } 88 | layer { 89 | name: "pool2" 90 | type: "Pooling" 91 | bottom: "conv2_2" 92 | top: "pool2" 93 | pooling_param { 94 | pool: MAX 95 | kernel_size: 2 96 | stride: 2 97 | } 98 | } 99 | layer { 100 | name: "conv3_1" 101 | type: "Convolution" 102 | bottom: "pool2" 103 | top: "conv3_1" 104 | convolution_param { 105 | num_output: 256 106 | pad: 1 107 | kernel_size: 3 108 | } 109 | } 110 | layer { 111 | name: "relu3_1" 112 | type: "ReLU" 113 | bottom: "conv3_1" 114 | top: "conv3_1" 115 | } 116 | layer { 117 | name: "conv3_2" 118 | type: "Convolution" 119 | bottom: "conv3_1" 120 | top: "conv3_2" 121 | convolution_param { 122 | num_output: 256 123 | pad: 1 124 | kernel_size: 3 125 | } 126 | } 127 | layer { 128 | name: "relu3_2" 129 | type: "ReLU" 130 | bottom: "conv3_2" 131 | top: "conv3_2" 132 | } 133 | layer { 134 | name: "conv3_3" 135 | type: "Convolution" 136 | bottom: "conv3_2" 137 | top: "conv3_3" 138 | convolution_param { 139 | num_output: 256 140 | pad: 1 141 | kernel_size: 3 142 | } 143 | } 144 | layer { 145 | name: "relu3_3" 146 | type: "ReLU" 147 | bottom: "conv3_3" 148 | top: "conv3_3" 149 | } 150 | layer { 151 | name: "pool3" 152 | type: "Pooling" 153 | bottom: "conv3_3" 154 | top: "pool3" 155 | pooling_param { 156 | pool: MAX 157 | kernel_size: 2 158 | stride: 2 159 | } 160 | } 161 | layer { 162 | name: "conv4_1" 163 | type: "Convolution" 164 | bottom: "pool3" 165 | top: "conv4_1" 166 | convolution_param { 167 | num_output: 512 168 | pad: 1 169 | kernel_size: 3 170 | } 171 | } 172 | layer { 173 | name: "relu4_1" 174 | type: "ReLU" 175 | bottom: "conv4_1" 176 | top: "conv4_1" 177 | } 178 | layer { 179 | name: "conv4_2" 180 | type: "Convolution" 181 | bottom: "conv4_1" 182 | top: "conv4_2" 183 | convolution_param { 184 | num_output: 512 185 | pad: 1 186 | kernel_size: 3 187 | } 188 | } 189 | layer { 190 | name: "relu4_2" 191 | type: "ReLU" 192 | bottom: "conv4_2" 193 | top: "conv4_2" 194 | } 195 | layer { 196 | name: "conv4_3" 197 | type: "Convolution" 198 | bottom: "conv4_2" 199 | top: "conv4_3" 200 | convolution_param { 201 | num_output: 512 202 | pad: 1 203 | kernel_size: 3 204 | } 205 | } 206 | layer { 207 | name: "relu4_3" 208 | type: "ReLU" 209 | bottom: "conv4_3" 210 | top: "conv4_3" 211 | } 212 | layer { 213 | name: "pool4" 214 | type: "Pooling" 215 | bottom: "conv4_3" 216 | top: "pool4" 217 | pooling_param { 218 | pool: MAX 219 | kernel_size: 2 220 | stride: 2 221 | } 222 | } 223 | layer { 224 | name: "conv5_1" 225 | type: "Convolution" 226 | bottom: "pool4" 227 | top: "conv5_1" 228 | convolution_param { 229 | num_output: 512 230 | pad: 1 231 | kernel_size: 3 232 | } 233 | } 234 | layer { 235 | name: "relu5_1" 236 | type: "ReLU" 237 | bottom: "conv5_1" 238 | top: "conv5_1" 239 | } 240 | layer { 241 | name: "conv5_2" 242 | type: "Convolution" 243 | bottom: "conv5_1" 244 | top: "conv5_2" 245 | convolution_param { 246 | num_output: 512 247 | pad: 1 248 | kernel_size: 3 249 | } 250 | } 251 | layer { 252 | name: "relu5_2" 253 | type: "ReLU" 254 | bottom: "conv5_2" 255 | top: "conv5_2" 256 | } 257 | layer { 258 | name: "conv5_3" 259 | type: "Convolution" 260 | bottom: "conv5_2" 261 | top: "conv5_3" 262 | convolution_param { 263 | num_output: 512 264 | pad: 1 265 | kernel_size: 3 266 | } 267 | } 268 | layer { 269 | name: "relu5_3" 270 | type: "ReLU" 271 | bottom: "conv5_3" 272 | top: "conv5_3" 273 | } 274 | layer { 275 | name: "pool5" 276 | type: "Pooling" 277 | bottom: "conv5_3" 278 | top: "pool5" 279 | pooling_param { 280 | pool: MAX 281 | kernel_size: 2 282 | stride: 2 283 | } 284 | } 285 | 286 | layer { 287 | name: "gram1" 288 | type: "Gram" 289 | bottom: "conv1_1" 290 | top: "gram1" 291 | } 292 | layer { 293 | name: "gram2" 294 | type: "Gram" 295 | bottom: "conv2_1" 296 | top: "gram2" 297 | } 298 | layer { 299 | name: "gram3" 300 | type: "Gram" 301 | bottom: "conv3_1" 302 | top: "gram3" 303 | } 304 | layer { 305 | name: "gram4" 306 | type: "Gram" 307 | bottom: "conv4_1" 308 | top: "gram4" 309 | } 310 | layer { 311 | name: "gram5" 312 | type: "Gram" 313 | bottom: "conv5_1" 314 | top: "gram5" 315 | } 316 | -------------------------------------------------------------------------------- /NeuralArt/VGG_16_nueralart_style_gen.prototxt: -------------------------------------------------------------------------------- 1 | name: "StyleGen" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 512 6 | input_dim: 405 7 | 8 | input: "content" 9 | input_dim: 1 10 | input_dim: 512 11 | input_dim: 64 12 | input_dim: 51 13 | 14 | input: "style_pattern1" 15 | input_dim: 1 16 | input_dim: 64 17 | input_dim: 64 18 | input_dim: 1 19 | 20 | input: "style_pattern2" 21 | input_dim: 1 22 | input_dim: 128 23 | input_dim: 128 24 | input_dim: 1 25 | 26 | input: "style_pattern3" 27 | input_dim: 1 28 | input_dim: 256 29 | input_dim: 256 30 | input_dim: 1 31 | 32 | input: "style_pattern4" 33 | input_dim: 1 34 | input_dim: 512 35 | input_dim: 512 36 | input_dim: 1 37 | 38 | input: "style_pattern5" 39 | input_dim: 1 40 | input_dim: 512 41 | input_dim: 512 42 | input_dim: 1 43 | 44 | force_backward: true 45 | layer { 46 | name: "conv1_1" 47 | type: "Convolution" 48 | bottom: "data" 49 | top: "conv1_1" 50 | convolution_param { 51 | num_output: 64 52 | pad: 1 53 | kernel_size: 3 54 | } 55 | } 56 | layer { 57 | name: "relu1_1" 58 | type: "ReLU" 59 | bottom: "conv1_1" 60 | top: "conv1_1" 61 | } 62 | layer { 63 | name: "conv1_2" 64 | type: "Convolution" 65 | bottom: "conv1_1" 66 | top: "conv1_2" 67 | convolution_param { 68 | num_output: 64 69 | pad: 1 70 | kernel_size: 3 71 | } 72 | } 73 | layer { 74 | name: "relu1_2" 75 | type: "ReLU" 76 | bottom: "conv1_2" 77 | top: "conv1_2" 78 | } 79 | layer { 80 | name: "pool1" 81 | type: "Pooling" 82 | bottom: "conv1_2" 83 | top: "pool1" 84 | pooling_param { 85 | pool: MAX 86 | kernel_size: 2 87 | stride: 2 88 | } 89 | } 90 | layer { 91 | name: "conv2_1" 92 | type: "Convolution" 93 | bottom: "pool1" 94 | top: "conv2_1" 95 | convolution_param { 96 | num_output: 128 97 | pad: 1 98 | kernel_size: 3 99 | } 100 | } 101 | layer { 102 | name: "relu2_1" 103 | type: "ReLU" 104 | bottom: "conv2_1" 105 | top: "conv2_1" 106 | } 107 | layer { 108 | name: "conv2_2" 109 | type: "Convolution" 110 | bottom: "conv2_1" 111 | top: "conv2_2" 112 | convolution_param { 113 | num_output: 128 114 | pad: 1 115 | kernel_size: 3 116 | } 117 | } 118 | layer { 119 | name: "relu2_2" 120 | type: "ReLU" 121 | bottom: "conv2_2" 122 | top: "conv2_2" 123 | } 124 | layer { 125 | name: "pool2" 126 | type: "Pooling" 127 | bottom: "conv2_2" 128 | top: "pool2" 129 | pooling_param { 130 | pool: MAX 131 | kernel_size: 2 132 | stride: 2 133 | } 134 | } 135 | layer { 136 | name: "conv3_1" 137 | type: "Convolution" 138 | bottom: "pool2" 139 | top: "conv3_1" 140 | convolution_param { 141 | num_output: 256 142 | pad: 1 143 | kernel_size: 3 144 | } 145 | } 146 | layer { 147 | name: "relu3_1" 148 | type: "ReLU" 149 | bottom: "conv3_1" 150 | top: "conv3_1" 151 | } 152 | layer { 153 | name: "conv3_2" 154 | type: "Convolution" 155 | bottom: "conv3_1" 156 | top: "conv3_2" 157 | convolution_param { 158 | num_output: 256 159 | pad: 1 160 | kernel_size: 3 161 | } 162 | } 163 | layer { 164 | name: "relu3_2" 165 | type: "ReLU" 166 | bottom: "conv3_2" 167 | top: "conv3_2" 168 | } 169 | layer { 170 | name: "conv3_3" 171 | type: "Convolution" 172 | bottom: "conv3_2" 173 | top: "conv3_3" 174 | convolution_param { 175 | num_output: 256 176 | pad: 1 177 | kernel_size: 3 178 | } 179 | } 180 | layer { 181 | name: "relu3_3" 182 | type: "ReLU" 183 | bottom: "conv3_3" 184 | top: "conv3_3" 185 | } 186 | layer { 187 | name: "pool3" 188 | type: "Pooling" 189 | bottom: "conv3_3" 190 | top: "pool3" 191 | pooling_param { 192 | pool: MAX 193 | kernel_size: 2 194 | stride: 2 195 | } 196 | } 197 | layer { 198 | name: "conv4_1" 199 | type: "Convolution" 200 | bottom: "pool3" 201 | top: "conv4_1" 202 | convolution_param { 203 | num_output: 512 204 | pad: 1 205 | kernel_size: 3 206 | } 207 | } 208 | layer { 209 | name: "relu4_1" 210 | type: "ReLU" 211 | bottom: "conv4_1" 212 | top: "conv4_1" 213 | } 214 | layer { 215 | name: "conv4_2" 216 | type: "Convolution" 217 | bottom: "conv4_1" 218 | top: "conv4_2" 219 | convolution_param { 220 | num_output: 512 221 | pad: 1 222 | kernel_size: 3 223 | } 224 | } 225 | layer { 226 | name: "relu4_2" 227 | type: "ReLU" 228 | bottom: "conv4_2" 229 | top: "conv4_2" 230 | } 231 | layer { 232 | name: "conv4_3" 233 | type: "Convolution" 234 | bottom: "conv4_2" 235 | top: "conv4_3" 236 | convolution_param { 237 | num_output: 512 238 | pad: 1 239 | kernel_size: 3 240 | } 241 | } 242 | layer { 243 | name: "relu4_3" 244 | type: "ReLU" 245 | bottom: "conv4_3" 246 | top: "conv4_3" 247 | } 248 | layer { 249 | name: "pool4" 250 | type: "Pooling" 251 | bottom: "conv4_3" 252 | top: "pool4" 253 | pooling_param { 254 | pool: MAX 255 | kernel_size: 2 256 | stride: 2 257 | } 258 | } 259 | layer { 260 | name: "conv5_1" 261 | type: "Convolution" 262 | bottom: "pool4" 263 | top: "conv5_1" 264 | convolution_param { 265 | num_output: 512 266 | pad: 1 267 | kernel_size: 3 268 | } 269 | } 270 | layer { 271 | name: "relu5_1" 272 | type: "ReLU" 273 | bottom: "conv5_1" 274 | top: "conv5_1" 275 | } 276 | layer { 277 | name: "conv5_2" 278 | type: "Convolution" 279 | bottom: "conv5_1" 280 | top: "conv5_2" 281 | convolution_param { 282 | num_output: 512 283 | pad: 1 284 | kernel_size: 3 285 | } 286 | } 287 | layer { 288 | name: "relu5_2" 289 | type: "ReLU" 290 | bottom: "conv5_2" 291 | top: "conv5_2" 292 | } 293 | layer { 294 | name: "conv5_3" 295 | type: "Convolution" 296 | bottom: "conv5_2" 297 | top: "conv5_3" 298 | convolution_param { 299 | num_output: 512 300 | pad: 1 301 | kernel_size: 3 302 | } 303 | } 304 | layer { 305 | name: "relu5_3" 306 | type: "ReLU" 307 | bottom: "conv5_3" 308 | top: "conv5_3" 309 | } 310 | layer { 311 | name: "pool5" 312 | type: "Pooling" 313 | bottom: "conv5_3" 314 | top: "pool5" 315 | pooling_param { 316 | pool: MAX 317 | kernel_size: 2 318 | stride: 2 319 | } 320 | } 321 | 322 | layer { 323 | name: "gram1" 324 | type: "Gram" 325 | bottom: "conv1_1" 326 | top: "gram1" 327 | } 328 | layer { 329 | name: "smoothl11" 330 | type: "SmoothL1Loss" 331 | bottom: "gram1" 332 | bottom: "style_pattern1" 333 | top: "smoothl11" 334 | loss_weight: 1 335 | } 336 | layer { 337 | name: "gram2" 338 | type: "Gram" 339 | bottom: "conv2_1" 340 | top: "gram2" 341 | } 342 | layer { 343 | name: "smoothl12" 344 | type: "SmoothL1Loss" 345 | bottom: "gram2" 346 | bottom: "style_pattern2" 347 | top: "smoothl12" 348 | loss_weight: 1 349 | } 350 | layer { 351 | name: "gram3" 352 | type: "Gram" 353 | bottom: "conv3_1" 354 | top: "gram3" 355 | } 356 | layer { 357 | name: "smoothl13" 358 | type: "SmoothL1Loss" 359 | bottom: "gram3" 360 | bottom: "style_pattern3" 361 | top: "smoothl13" 362 | loss_weight: 1 363 | } 364 | layer { 365 | name: "gram4" 366 | type: "Gram" 367 | bottom: "conv4_1" 368 | top: "gram4" 369 | } 370 | layer { 371 | name: "smoothl14" 372 | type: "SmoothL1Loss" 373 | bottom: "gram4" 374 | bottom: "style_pattern4" 375 | top: "smoothl14" 376 | loss_weight: 1 377 | } 378 | layer { 379 | name: "gram5" 380 | type: "Gram" 381 | bottom: "conv5_1" 382 | top: "gram5" 383 | } 384 | layer { 385 | name: "smoothl15" 386 | type: "SmoothL1Loss" 387 | bottom: "gram5" 388 | bottom: "style_pattern5" 389 | top: "smoothl15" 390 | loss_weight: 1 391 | } 392 | layer { 393 | name: "smoothl1_content" 394 | type: "SmoothL1Loss" 395 | bottom: "conv4_2" 396 | bottom: "content" 397 | top: "smoothl1_content" 398 | loss_weight: 0.1 399 | } 400 | -------------------------------------------------------------------------------- /NeuralArt/gallery/star-tubingen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NeuralArt/gallery/star-tubingen.png -------------------------------------------------------------------------------- /NeuralArt/gaussian_net.prototxt: -------------------------------------------------------------------------------- 1 | name: "gaussian_net" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 270 6 | input_dim: 512 7 | layers { 8 | name: "gaussian_prior" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "gaussian_prior" 12 | convolution_param { 13 | num_output: 18 14 | kernel_size: 1 15 | stride: 1 16 | weight_filler { 17 | type: "constant" 18 | value: 0 19 | } 20 | bias_filler { 21 | type: "constant" 22 | value: 0 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /NeuralArt/gaussian_net_template.prototxt: -------------------------------------------------------------------------------- 1 | name: "gaussian_net" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: {height} 6 | input_dim: {width} 7 | layers { 8 | name: "gaussian_prior" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "gaussian_prior" 12 | convolution_param { 13 | num_output: {output_num} 14 | kernel_size: 1 15 | stride: 1 16 | weight_filler { 17 | type: "constant" 18 | value: 0 19 | } 20 | bias_filler { 21 | type: "constant" 22 | value: 0 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /NeuralArt/getColorPrior.m: -------------------------------------------------------------------------------- 1 | function obj = getColorPrior(image,k) 2 | x = reshape(permute(image,[3 1 2]),[3 size(image,1) * size(image,2)]); 3 | vgg_mean = [103.939, 116.779, 123.68]; 4 | x = double(x([3 2 1],:)); 5 | x = bsxfun(@minus,x,vgg_mean'); 6 | options = statset('MaxIter',1000); 7 | while k>0 8 | try 9 | fprintf('color GMM, try clusters=%d...\n',k); 10 | obj = gmdistribution.fit(x',k,'Options',options); 11 | break; 12 | catch 13 | k = k-1; 14 | end; 15 | end; 16 | end -------------------------------------------------------------------------------- /NeuralArt/googlenet_neuralart.prototxt: -------------------------------------------------------------------------------- 1 | 2 | force_backward: true 3 | layer { 4 | name: "conv1" 5 | type: "Convolution" 6 | bottom: "data" 7 | top: "conv1" 8 | param { 9 | lr_mult: 1 10 | decay_mult: 1 11 | } 12 | param { 13 | lr_mult: 2 14 | decay_mult: 0 15 | } 16 | convolution_param { 17 | num_output: 64 18 | pad: 3 19 | kernel_size: 7 20 | stride: 2 21 | weight_filler { 22 | type: "gaussian" 23 | std: 0.015 24 | } 25 | bias_filler { 26 | type: "constant" 27 | value: 0 28 | } 29 | } 30 | } 31 | layer { 32 | name: "relu1" 33 | type: "ReLU" 34 | bottom: "conv1" 35 | top: "conv1" 36 | } 37 | layer { 38 | name: "pool1" 39 | type: "Pooling" 40 | bottom: "conv1" 41 | top: "pool1" 42 | pooling_param { 43 | pool: MAX 44 | kernel_size: 3 45 | stride: 2 46 | pad: 0 47 | } 48 | } 49 | layer { 50 | name: "norm1" 51 | type: "LRN" 52 | bottom: "pool1" 53 | top: "norm1" 54 | lrn_param { 55 | local_size: 5 56 | alpha: 0.0001 57 | beta: 0.75 58 | } 59 | } 60 | layer { 61 | name: "reduction2" 62 | type: "Convolution" 63 | bottom: "norm1" 64 | top: "reduction2" 65 | param { 66 | lr_mult: 1 67 | decay_mult: 1 68 | } 69 | param { 70 | lr_mult: 2 71 | decay_mult: 0 72 | } 73 | convolution_param { 74 | num_output: 64 75 | pad: 0 76 | kernel_size: 1 77 | group: 1 78 | weight_filler { 79 | type: "xavier" 80 | } 81 | bias_filler { 82 | type: "constant" 83 | value: 0 84 | } 85 | } 86 | } 87 | layer { 88 | name: "relu_reduction2" 89 | type: "ReLU" 90 | bottom: "reduction2" 91 | top: "reduction2" 92 | } 93 | layer { 94 | name: "conv2" 95 | type: "Convolution" 96 | bottom: "reduction2" 97 | top: "conv2" 98 | param { 99 | lr_mult: 1 100 | decay_mult: 1 101 | } 102 | param { 103 | lr_mult: 2 104 | decay_mult: 0 105 | } 106 | convolution_param { 107 | num_output: 192 108 | pad: 1 109 | kernel_size: 3 110 | group: 1 111 | weight_filler { 112 | type: "gaussian" 113 | std: 0.02 114 | } 115 | bias_filler { 116 | type: "constant" 117 | value: 0 118 | } 119 | } 120 | } 121 | layer { 122 | name: "relu2" 123 | type: "ReLU" 124 | bottom: "conv2" 125 | top: "conv2" 126 | } 127 | layer { 128 | name: "norm2" 129 | type: "LRN" 130 | bottom: "conv2" 131 | top: "norm2" 132 | lrn_param { 133 | local_size: 5 134 | alpha: 0.0001 135 | beta: 0.75 136 | } 137 | } 138 | layer { 139 | name: "pool2" 140 | type: "Pooling" 141 | bottom: "norm2" 142 | top: "pool2" 143 | pooling_param { 144 | pool: MAX 145 | kernel_size: 3 146 | stride: 2 147 | pad: 0 148 | } 149 | } 150 | layer { 151 | name: "icp1_reduction1" 152 | type: "Convolution" 153 | bottom: "pool2" 154 | top: "icp1_reduction1" 155 | param { 156 | lr_mult: 1 157 | decay_mult: 1 158 | } 159 | param { 160 | lr_mult: 2 161 | decay_mult: 0 162 | } 163 | convolution_param { 164 | num_output: 96 165 | pad: 0 166 | kernel_size: 1 167 | weight_filler { 168 | type: "xavier" 169 | } 170 | bias_filler { 171 | type: "constant" 172 | value: 0 173 | } 174 | } 175 | } 176 | layer { 177 | name: "relu_icp1_reduction1" 178 | type: "ReLU" 179 | bottom: "icp1_reduction1" 180 | top: "icp1_reduction1" 181 | } 182 | layer { 183 | name: "icp1_reduction2" 184 | type: "Convolution" 185 | bottom: "pool2" 186 | top: "icp1_reduction2" 187 | param { 188 | lr_mult: 1 189 | decay_mult: 1 190 | } 191 | param { 192 | lr_mult: 2 193 | decay_mult: 0 194 | } 195 | convolution_param { 196 | num_output: 16 197 | pad: 0 198 | kernel_size: 1 199 | weight_filler { 200 | type: "xavier" 201 | } 202 | bias_filler { 203 | type: "constant" 204 | value: 0 205 | } 206 | } 207 | } 208 | layer { 209 | name: "relu_icp1_reduction2" 210 | type: "ReLU" 211 | bottom: "icp1_reduction2" 212 | top: "icp1_reduction2" 213 | } 214 | layer { 215 | name: "icp1_pool" 216 | type: "Pooling" 217 | bottom: "pool2" 218 | top: "icp1_pool" 219 | pooling_param { 220 | pool: MAX 221 | kernel_size: 3 222 | stride: 1 223 | pad: 1 224 | } 225 | } 226 | layer { 227 | name: "icp1_out0" 228 | type: "Convolution" 229 | bottom: "pool2" 230 | top: "icp1_out0" 231 | param { 232 | lr_mult: 1 233 | decay_mult: 1 234 | } 235 | param { 236 | lr_mult: 2 237 | decay_mult: 0 238 | } 239 | convolution_param { 240 | num_output: 64 241 | pad: 0 242 | kernel_size: 1 243 | weight_filler { 244 | type: "xavier" 245 | } 246 | bias_filler { 247 | type: "constant" 248 | value: 0 249 | } 250 | } 251 | } 252 | layer { 253 | name: "relu_icp1_out0" 254 | type: "ReLU" 255 | bottom: "icp1_out0" 256 | top: "icp1_out0" 257 | } 258 | layer { 259 | name: "icp1_out1" 260 | type: "Convolution" 261 | bottom: "icp1_reduction1" 262 | top: "icp1_out1" 263 | param { 264 | lr_mult: 1 265 | decay_mult: 1 266 | } 267 | param { 268 | lr_mult: 2 269 | decay_mult: 0 270 | } 271 | convolution_param { 272 | num_output: 128 273 | pad: 1 274 | kernel_size: 3 275 | weight_filler { 276 | type: "gaussian" 277 | std: 0.04 278 | } 279 | bias_filler { 280 | type: "constant" 281 | value: 0 282 | } 283 | } 284 | } 285 | layer { 286 | name: "relu_icp1_out1" 287 | type: "ReLU" 288 | bottom: "icp1_out1" 289 | top: "icp1_out1" 290 | } 291 | layer { 292 | name: "icp1_out2" 293 | type: "Convolution" 294 | bottom: "icp1_reduction2" 295 | top: "icp1_out2" 296 | param { 297 | lr_mult: 1 298 | decay_mult: 1 299 | } 300 | param { 301 | lr_mult: 2 302 | decay_mult: 0 303 | } 304 | convolution_param { 305 | num_output: 32 306 | pad: 2 307 | kernel_size: 5 308 | weight_filler { 309 | type: "gaussian" 310 | std: 0.08 311 | } 312 | bias_filler { 313 | type: "constant" 314 | value: 0 315 | } 316 | } 317 | } 318 | layer { 319 | name: "relu_icp1_out2" 320 | type: "ReLU" 321 | bottom: "icp1_out2" 322 | top: "icp1_out2" 323 | } 324 | layer { 325 | name: "icp1_out3" 326 | type: "Convolution" 327 | bottom: "icp1_pool" 328 | top: "icp1_out3" 329 | param { 330 | lr_mult: 1 331 | decay_mult: 1 332 | } 333 | param { 334 | lr_mult: 2 335 | decay_mult: 0 336 | } 337 | convolution_param { 338 | num_output: 32 339 | pad: 0 340 | kernel_size: 1 341 | weight_filler { 342 | type: "xavier" 343 | } 344 | bias_filler { 345 | type: "constant" 346 | value: 0 347 | } 348 | } 349 | } 350 | layer { 351 | name: "relu_icp1_out3" 352 | type: "ReLU" 353 | bottom: "icp1_out3" 354 | top: "icp1_out3" 355 | } 356 | layer { 357 | name: "icp2_in" 358 | type: "Concat" 359 | bottom: "icp1_out0" 360 | bottom: "icp1_out1" 361 | bottom: "icp1_out2" 362 | bottom: "icp1_out3" 363 | top: "icp2_in" 364 | } 365 | layer { 366 | name: "icp2_reduction1" 367 | type: "Convolution" 368 | bottom: "icp2_in" 369 | top: "icp2_reduction1" 370 | param { 371 | lr_mult: 1 372 | decay_mult: 1 373 | } 374 | param { 375 | lr_mult: 2 376 | decay_mult: 0 377 | } 378 | convolution_param { 379 | num_output: 128 380 | pad: 0 381 | kernel_size: 1 382 | weight_filler { 383 | type: "xavier" 384 | } 385 | bias_filler { 386 | type: "constant" 387 | value: 0 388 | } 389 | } 390 | } 391 | layer { 392 | name: "relu_icp2_reduction1" 393 | type: "ReLU" 394 | bottom: "icp2_reduction1" 395 | top: "icp2_reduction1" 396 | } 397 | layer { 398 | name: "icp2_reduction2" 399 | type: "Convolution" 400 | bottom: "icp2_in" 401 | top: "icp2_reduction2" 402 | param { 403 | lr_mult: 1 404 | decay_mult: 1 405 | } 406 | param { 407 | lr_mult: 2 408 | decay_mult: 0 409 | } 410 | convolution_param { 411 | num_output: 32 412 | pad: 0 413 | kernel_size: 1 414 | weight_filler { 415 | type: "xavier" 416 | } 417 | bias_filler { 418 | type: "constant" 419 | value: 0 420 | } 421 | } 422 | } 423 | layer { 424 | name: "relu_icp2_reduction2" 425 | type: "ReLU" 426 | bottom: "icp2_reduction2" 427 | top: "icp2_reduction2" 428 | } 429 | layer { 430 | name: "icp2_pool" 431 | type: "Pooling" 432 | bottom: "icp2_in" 433 | top: "icp2_pool" 434 | pooling_param { 435 | pool: MAX 436 | kernel_size: 3 437 | stride: 1 438 | pad: 1 439 | } 440 | } 441 | layer { 442 | name: "icp2_out0" 443 | type: "Convolution" 444 | bottom: "icp2_in" 445 | top: "icp2_out0" 446 | param { 447 | lr_mult: 1 448 | decay_mult: 1 449 | } 450 | param { 451 | lr_mult: 2 452 | decay_mult: 0 453 | } 454 | convolution_param { 455 | num_output: 128 456 | pad: 0 457 | kernel_size: 1 458 | weight_filler { 459 | type: "xavier" 460 | } 461 | bias_filler { 462 | type: "constant" 463 | value: 0 464 | } 465 | } 466 | } 467 | layer { 468 | name: "relu_icp2_out0" 469 | type: "ReLU" 470 | bottom: "icp2_out0" 471 | top: "icp2_out0" 472 | } 473 | layer { 474 | name: "icp2_out1" 475 | type: "Convolution" 476 | bottom: "icp2_reduction1" 477 | top: "icp2_out1" 478 | param { 479 | lr_mult: 1 480 | decay_mult: 1 481 | } 482 | param { 483 | lr_mult: 2 484 | decay_mult: 0 485 | } 486 | convolution_param { 487 | num_output: 192 488 | pad: 1 489 | kernel_size: 3 490 | weight_filler { 491 | type: "gaussian" 492 | std: 0.04 493 | } 494 | bias_filler { 495 | type: "constant" 496 | value: 0 497 | } 498 | } 499 | } 500 | layer { 501 | name: "relu_icp2_out1" 502 | type: "ReLU" 503 | bottom: "icp2_out1" 504 | top: "icp2_out1" 505 | } 506 | layer { 507 | name: "icp2_out2" 508 | type: "Convolution" 509 | bottom: "icp2_reduction2" 510 | top: "icp2_out2" 511 | param { 512 | lr_mult: 1 513 | decay_mult: 1 514 | } 515 | param { 516 | lr_mult: 2 517 | decay_mult: 0 518 | } 519 | convolution_param { 520 | num_output: 96 521 | pad: 2 522 | kernel_size: 5 523 | weight_filler { 524 | type: "gaussian" 525 | std: 0.08 526 | } 527 | bias_filler { 528 | type: "constant" 529 | value: 0 530 | } 531 | } 532 | } 533 | layer { 534 | name: "relu_icp2_out2" 535 | type: "ReLU" 536 | bottom: "icp2_out2" 537 | top: "icp2_out2" 538 | } 539 | layer { 540 | name: "icp2_out3" 541 | type: "Convolution" 542 | bottom: "icp2_pool" 543 | top: "icp2_out3" 544 | param { 545 | lr_mult: 1 546 | decay_mult: 1 547 | } 548 | param { 549 | lr_mult: 2 550 | decay_mult: 0 551 | } 552 | convolution_param { 553 | num_output: 64 554 | pad: 0 555 | kernel_size: 1 556 | weight_filler { 557 | type: "xavier" 558 | } 559 | bias_filler { 560 | type: "constant" 561 | value: 0 562 | } 563 | } 564 | } 565 | layer { 566 | name: "relu_icp2_out3" 567 | type: "ReLU" 568 | bottom: "icp2_out3" 569 | top: "icp2_out3" 570 | } 571 | layer { 572 | name: "icp2_out" 573 | type: "Concat" 574 | bottom: "icp2_out0" 575 | bottom: "icp2_out1" 576 | bottom: "icp2_out2" 577 | bottom: "icp2_out3" 578 | top: "icp2_out" 579 | } 580 | layer { 581 | name: "icp3_in" 582 | type: "Pooling" 583 | bottom: "icp2_out" 584 | top: "icp3_in" 585 | pooling_param { 586 | pool: MAX 587 | kernel_size: 3 588 | stride: 2 589 | pad: 0 590 | } 591 | } 592 | layer { 593 | name: "icp3_reduction1" 594 | type: "Convolution" 595 | bottom: "icp3_in" 596 | top: "icp3_reduction1" 597 | param { 598 | lr_mult: 1 599 | decay_mult: 1 600 | } 601 | param { 602 | lr_mult: 2 603 | decay_mult: 0 604 | } 605 | convolution_param { 606 | num_output: 96 607 | pad: 0 608 | kernel_size: 1 609 | weight_filler { 610 | type: "xavier" 611 | } 612 | bias_filler { 613 | type: "constant" 614 | value: 0 615 | } 616 | } 617 | } 618 | layer { 619 | name: "relu_icp3_reduction1" 620 | type: "ReLU" 621 | bottom: "icp3_reduction1" 622 | top: "icp3_reduction1" 623 | } 624 | layer { 625 | name: "icp3_reduction2" 626 | type: "Convolution" 627 | bottom: "icp3_in" 628 | top: "icp3_reduction2" 629 | param { 630 | lr_mult: 1 631 | decay_mult: 1 632 | } 633 | param { 634 | lr_mult: 2 635 | decay_mult: 0 636 | } 637 | convolution_param { 638 | num_output: 16 639 | pad: 0 640 | kernel_size: 1 641 | weight_filler { 642 | type: "xavier" 643 | } 644 | bias_filler { 645 | type: "constant" 646 | value: 0 647 | } 648 | } 649 | } 650 | layer { 651 | name: "relu_icp3_reduction2" 652 | type: "ReLU" 653 | bottom: "icp3_reduction2" 654 | top: "icp3_reduction2" 655 | } 656 | layer { 657 | name: "icp3_pool" 658 | type: "Pooling" 659 | bottom: "icp3_in" 660 | top: "icp3_pool" 661 | pooling_param { 662 | pool: MAX 663 | kernel_size: 3 664 | stride: 1 665 | pad: 1 666 | } 667 | } 668 | layer { 669 | name: "icp3_out0" 670 | type: "Convolution" 671 | bottom: "icp3_in" 672 | top: "icp3_out0" 673 | param { 674 | lr_mult: 1 675 | decay_mult: 1 676 | } 677 | param { 678 | lr_mult: 2 679 | decay_mult: 0 680 | } 681 | convolution_param { 682 | num_output: 192 683 | pad: 0 684 | kernel_size: 1 685 | weight_filler { 686 | type: "xavier" 687 | } 688 | bias_filler { 689 | type: "constant" 690 | value: 0 691 | } 692 | } 693 | } 694 | layer { 695 | name: "relu_icp3_out0" 696 | type: "ReLU" 697 | bottom: "icp3_out0" 698 | top: "icp3_out0" 699 | } 700 | layer { 701 | name: "icp3_out1" 702 | type: "Convolution" 703 | bottom: "icp3_reduction1" 704 | top: "icp3_out1" 705 | param { 706 | lr_mult: 1 707 | decay_mult: 1 708 | } 709 | param { 710 | lr_mult: 2 711 | decay_mult: 0 712 | } 713 | convolution_param { 714 | num_output: 208 715 | pad: 1 716 | kernel_size: 3 717 | weight_filler { 718 | type: "gaussian" 719 | std: 0.04 720 | } 721 | bias_filler { 722 | type: "constant" 723 | value: 0 724 | } 725 | } 726 | } 727 | layer { 728 | name: "relu_icp3_out1" 729 | type: "ReLU" 730 | bottom: "icp3_out1" 731 | top: "icp3_out1" 732 | } 733 | layer { 734 | name: "icp3_out2" 735 | type: "Convolution" 736 | bottom: "icp3_reduction2" 737 | top: "icp3_out2" 738 | param { 739 | lr_mult: 1 740 | decay_mult: 1 741 | } 742 | param { 743 | lr_mult: 2 744 | decay_mult: 0 745 | } 746 | convolution_param { 747 | num_output: 48 748 | pad: 2 749 | kernel_size: 5 750 | weight_filler { 751 | type: "gaussian" 752 | std: 0.08 753 | } 754 | bias_filler { 755 | type: "constant" 756 | value: 0 757 | } 758 | } 759 | } 760 | layer { 761 | name: "relu_icp3_out2" 762 | type: "ReLU" 763 | bottom: "icp3_out2" 764 | top: "icp3_out2" 765 | } 766 | layer { 767 | name: "icp3_out3" 768 | type: "Convolution" 769 | bottom: "icp3_pool" 770 | top: "icp3_out3" 771 | param { 772 | lr_mult: 1 773 | decay_mult: 1 774 | } 775 | param { 776 | lr_mult: 2 777 | decay_mult: 0 778 | } 779 | convolution_param { 780 | num_output: 64 781 | pad: 0 782 | kernel_size: 1 783 | weight_filler { 784 | type: "xavier" 785 | } 786 | bias_filler { 787 | type: "constant" 788 | value: 0 789 | } 790 | } 791 | } 792 | layer { 793 | name: "relu_icp3_out3" 794 | type: "ReLU" 795 | bottom: "icp3_out3" 796 | top: "icp3_out3" 797 | } 798 | layer { 799 | name: "icp3_out" 800 | type: "Concat" 801 | bottom: "icp3_out0" 802 | bottom: "icp3_out1" 803 | bottom: "icp3_out2" 804 | bottom: "icp3_out3" 805 | top: "icp3_out" 806 | } 807 | layer { 808 | name: "icp4_reduction1" 809 | type: "Convolution" 810 | bottom: "icp3_out" 811 | top: "icp4_reduction1" 812 | param { 813 | lr_mult: 1 814 | decay_mult: 1 815 | } 816 | param { 817 | lr_mult: 2 818 | decay_mult: 0 819 | } 820 | convolution_param { 821 | num_output: 112 822 | pad: 0 823 | kernel_size: 1 824 | weight_filler { 825 | type: "xavier" 826 | } 827 | bias_filler { 828 | type: "constant" 829 | value: 0 830 | } 831 | } 832 | } 833 | layer { 834 | name: "relu_icp4_reduction1" 835 | type: "ReLU" 836 | bottom: "icp4_reduction1" 837 | top: "icp4_reduction1" 838 | } 839 | layer { 840 | name: "icp4_reduction2" 841 | type: "Convolution" 842 | bottom: "icp3_out" 843 | top: "icp4_reduction2" 844 | param { 845 | lr_mult: 1 846 | decay_mult: 1 847 | } 848 | param { 849 | lr_mult: 2 850 | decay_mult: 0 851 | } 852 | convolution_param { 853 | num_output: 24 854 | pad: 0 855 | kernel_size: 1 856 | weight_filler { 857 | type: "xavier" 858 | } 859 | bias_filler { 860 | type: "constant" 861 | value: 0 862 | } 863 | } 864 | } 865 | layer { 866 | name: "relu_icp4_reduction2" 867 | type: "ReLU" 868 | bottom: "icp4_reduction2" 869 | top: "icp4_reduction2" 870 | } 871 | layer { 872 | name: "icp4_pool" 873 | type: "Pooling" 874 | bottom: "icp3_out" 875 | top: "icp4_pool" 876 | pooling_param { 877 | pool: MAX 878 | kernel_size: 3 879 | stride: 1 880 | pad: 1 881 | } 882 | } 883 | layer { 884 | name: "icp4_out0" 885 | type: "Convolution" 886 | bottom: "icp3_out" 887 | top: "icp4_out0" 888 | param { 889 | lr_mult: 1 890 | decay_mult: 1 891 | } 892 | param { 893 | lr_mult: 2 894 | decay_mult: 0 895 | } 896 | convolution_param { 897 | num_output: 160 898 | pad: 0 899 | kernel_size: 1 900 | weight_filler { 901 | type: "xavier" 902 | } 903 | bias_filler { 904 | type: "constant" 905 | value: 0 906 | } 907 | } 908 | } 909 | layer { 910 | name: "relu_icp4_out0" 911 | type: "ReLU" 912 | bottom: "icp4_out0" 913 | top: "icp4_out0" 914 | } 915 | layer { 916 | name: "icp4_out1" 917 | type: "Convolution" 918 | bottom: "icp4_reduction1" 919 | top: "icp4_out1" 920 | param { 921 | lr_mult: 1 922 | decay_mult: 1 923 | } 924 | param { 925 | lr_mult: 2 926 | decay_mult: 0 927 | } 928 | convolution_param { 929 | num_output: 224 930 | pad: 1 931 | kernel_size: 3 932 | weight_filler { 933 | type: "gaussian" 934 | std: 0.04 935 | } 936 | bias_filler { 937 | type: "constant" 938 | value: 0 939 | } 940 | } 941 | } 942 | layer { 943 | name: "relu_icp4_out1" 944 | type: "ReLU" 945 | bottom: "icp4_out1" 946 | top: "icp4_out1" 947 | } 948 | layer { 949 | name: "icp4_out2" 950 | type: "Convolution" 951 | bottom: "icp4_reduction2" 952 | top: "icp4_out2" 953 | param { 954 | lr_mult: 1 955 | decay_mult: 1 956 | } 957 | param { 958 | lr_mult: 2 959 | decay_mult: 0 960 | } 961 | convolution_param { 962 | num_output: 64 963 | pad: 2 964 | kernel_size: 5 965 | weight_filler { 966 | type: "gaussian" 967 | std: 0.08 968 | } 969 | bias_filler { 970 | type: "constant" 971 | value: 0 972 | } 973 | } 974 | } 975 | layer { 976 | name: "relu_icp4_out2" 977 | type: "ReLU" 978 | bottom: "icp4_out2" 979 | top: "icp4_out2" 980 | } 981 | layer { 982 | name: "icp4_out3" 983 | type: "Convolution" 984 | bottom: "icp4_pool" 985 | top: "icp4_out3" 986 | param { 987 | lr_mult: 1 988 | decay_mult: 1 989 | } 990 | param { 991 | lr_mult: 2 992 | decay_mult: 0 993 | } 994 | convolution_param { 995 | num_output: 64 996 | pad: 0 997 | kernel_size: 1 998 | weight_filler { 999 | type: "xavier" 1000 | } 1001 | bias_filler { 1002 | type: "constant" 1003 | value: 0 1004 | } 1005 | } 1006 | } 1007 | layer { 1008 | name: "relu_icp4_out3" 1009 | type: "ReLU" 1010 | bottom: "icp4_out3" 1011 | top: "icp4_out3" 1012 | } 1013 | layer { 1014 | name: "icp4_out" 1015 | type: "Concat" 1016 | bottom: "icp4_out0" 1017 | bottom: "icp4_out1" 1018 | bottom: "icp4_out2" 1019 | bottom: "icp4_out3" 1020 | top: "icp4_out" 1021 | } 1022 | layer { 1023 | name: "icp5_reduction1" 1024 | type: "Convolution" 1025 | bottom: "icp4_out" 1026 | top: "icp5_reduction1" 1027 | param { 1028 | lr_mult: 1 1029 | decay_mult: 1 1030 | } 1031 | param { 1032 | lr_mult: 2 1033 | decay_mult: 0 1034 | } 1035 | convolution_param { 1036 | num_output: 128 1037 | pad: 0 1038 | kernel_size: 1 1039 | weight_filler { 1040 | type: "xavier" 1041 | } 1042 | bias_filler { 1043 | type: "constant" 1044 | value: 0 1045 | } 1046 | } 1047 | } 1048 | layer { 1049 | name: "relu_icp5_reduction1" 1050 | type: "ReLU" 1051 | bottom: "icp5_reduction1" 1052 | top: "icp5_reduction1" 1053 | } 1054 | layer { 1055 | name: "icp5_reduction2" 1056 | type: "Convolution" 1057 | bottom: "icp4_out" 1058 | top: "icp5_reduction2" 1059 | param { 1060 | lr_mult: 1 1061 | decay_mult: 1 1062 | } 1063 | param { 1064 | lr_mult: 2 1065 | decay_mult: 0 1066 | } 1067 | convolution_param { 1068 | num_output: 24 1069 | pad: 0 1070 | kernel_size: 1 1071 | weight_filler { 1072 | type: "xavier" 1073 | } 1074 | bias_filler { 1075 | type: "constant" 1076 | value: 0 1077 | } 1078 | } 1079 | } 1080 | layer { 1081 | name: "relu_icp5_reduction2" 1082 | type: "ReLU" 1083 | bottom: "icp5_reduction2" 1084 | top: "icp5_reduction2" 1085 | } 1086 | layer { 1087 | name: "icp5_pool" 1088 | type: "Pooling" 1089 | bottom: "icp4_out" 1090 | top: "icp5_pool" 1091 | pooling_param { 1092 | pool: MAX 1093 | kernel_size: 3 1094 | stride: 1 1095 | pad: 1 1096 | } 1097 | } 1098 | layer { 1099 | name: "icp5_out0" 1100 | type: "Convolution" 1101 | bottom: "icp4_out" 1102 | top: "icp5_out0" 1103 | param { 1104 | lr_mult: 1 1105 | decay_mult: 1 1106 | } 1107 | param { 1108 | lr_mult: 2 1109 | decay_mult: 0 1110 | } 1111 | convolution_param { 1112 | num_output: 128 1113 | pad: 0 1114 | kernel_size: 1 1115 | weight_filler { 1116 | type: "xavier" 1117 | } 1118 | bias_filler { 1119 | type: "constant" 1120 | value: 0 1121 | } 1122 | } 1123 | } 1124 | layer { 1125 | name: "relu_icp5_out0" 1126 | type: "ReLU" 1127 | bottom: "icp5_out0" 1128 | top: "icp5_out0" 1129 | } 1130 | layer { 1131 | name: "icp5_out1" 1132 | type: "Convolution" 1133 | bottom: "icp5_reduction1" 1134 | top: "icp5_out1" 1135 | param { 1136 | lr_mult: 1 1137 | decay_mult: 1 1138 | } 1139 | param { 1140 | lr_mult: 2 1141 | decay_mult: 0 1142 | } 1143 | convolution_param { 1144 | num_output: 256 1145 | pad: 1 1146 | kernel_size: 3 1147 | weight_filler { 1148 | type: "gaussian" 1149 | std: 0.04 1150 | } 1151 | bias_filler { 1152 | type: "constant" 1153 | value: 0 1154 | } 1155 | } 1156 | } 1157 | layer { 1158 | name: "relu_icp5_out1" 1159 | type: "ReLU" 1160 | bottom: "icp5_out1" 1161 | top: "icp5_out1" 1162 | } 1163 | layer { 1164 | name: "icp5_out2" 1165 | type: "Convolution" 1166 | bottom: "icp5_reduction2" 1167 | top: "icp5_out2" 1168 | param { 1169 | lr_mult: 1 1170 | decay_mult: 1 1171 | } 1172 | param { 1173 | lr_mult: 2 1174 | decay_mult: 0 1175 | } 1176 | convolution_param { 1177 | num_output: 64 1178 | pad: 2 1179 | kernel_size: 5 1180 | weight_filler { 1181 | type: "gaussian" 1182 | std: 0.08 1183 | } 1184 | bias_filler { 1185 | type: "constant" 1186 | value: 0 1187 | } 1188 | } 1189 | } 1190 | layer { 1191 | name: "relu_icp5_out2" 1192 | type: "ReLU" 1193 | bottom: "icp5_out2" 1194 | top: "icp5_out2" 1195 | } 1196 | layer { 1197 | name: "icp5_out3" 1198 | type: "Convolution" 1199 | bottom: "icp5_pool" 1200 | top: "icp5_out3" 1201 | param { 1202 | lr_mult: 1 1203 | decay_mult: 1 1204 | } 1205 | param { 1206 | lr_mult: 2 1207 | decay_mult: 0 1208 | } 1209 | convolution_param { 1210 | num_output: 64 1211 | pad: 0 1212 | kernel_size: 1 1213 | weight_filler { 1214 | type: "xavier" 1215 | } 1216 | bias_filler { 1217 | type: "constant" 1218 | value: 0 1219 | } 1220 | } 1221 | } 1222 | layer { 1223 | name: "relu_icp5_out3" 1224 | type: "ReLU" 1225 | bottom: "icp5_out3" 1226 | top: "icp5_out3" 1227 | } 1228 | layer { 1229 | name: "icp5_out" 1230 | type: "Concat" 1231 | bottom: "icp5_out0" 1232 | bottom: "icp5_out1" 1233 | bottom: "icp5_out2" 1234 | bottom: "icp5_out3" 1235 | top: "icp5_out" 1236 | } 1237 | layer { 1238 | name: "icp6_reduction1" 1239 | type: "Convolution" 1240 | bottom: "icp5_out" 1241 | top: "icp6_reduction1" 1242 | param { 1243 | lr_mult: 1 1244 | decay_mult: 1 1245 | } 1246 | param { 1247 | lr_mult: 2 1248 | decay_mult: 0 1249 | } 1250 | convolution_param { 1251 | num_output: 144 1252 | pad: 0 1253 | kernel_size: 1 1254 | weight_filler { 1255 | type: "xavier" 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0 1260 | } 1261 | } 1262 | } 1263 | layer { 1264 | name: "relu_icp6_reduction1" 1265 | type: "ReLU" 1266 | bottom: "icp6_reduction1" 1267 | top: "icp6_reduction1" 1268 | } 1269 | layer { 1270 | name: "icp6_reduction2" 1271 | type: "Convolution" 1272 | bottom: "icp5_out" 1273 | top: "icp6_reduction2" 1274 | param { 1275 | lr_mult: 1 1276 | decay_mult: 1 1277 | } 1278 | param { 1279 | lr_mult: 2 1280 | decay_mult: 0 1281 | } 1282 | convolution_param { 1283 | num_output: 32 1284 | pad: 0 1285 | kernel_size: 1 1286 | weight_filler { 1287 | type: "xavier" 1288 | } 1289 | bias_filler { 1290 | type: "constant" 1291 | value: 0 1292 | } 1293 | } 1294 | } 1295 | layer { 1296 | name: "relu_icp6_reduction2" 1297 | type: "ReLU" 1298 | bottom: "icp6_reduction2" 1299 | top: "icp6_reduction2" 1300 | } 1301 | layer { 1302 | name: "icp6_pool" 1303 | type: "Pooling" 1304 | bottom: "icp5_out" 1305 | top: "icp6_pool" 1306 | pooling_param { 1307 | pool: MAX 1308 | kernel_size: 3 1309 | stride: 1 1310 | pad: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "icp6_out0" 1315 | type: "Convolution" 1316 | bottom: "icp5_out" 1317 | top: "icp6_out0" 1318 | param { 1319 | lr_mult: 1 1320 | decay_mult: 1 1321 | } 1322 | param { 1323 | lr_mult: 2 1324 | decay_mult: 0 1325 | } 1326 | convolution_param { 1327 | num_output: 112 1328 | pad: 0 1329 | kernel_size: 1 1330 | weight_filler { 1331 | type: "xavier" 1332 | } 1333 | bias_filler { 1334 | type: "constant" 1335 | value: 0 1336 | } 1337 | } 1338 | } 1339 | layer { 1340 | name: "relu_icp6_out0" 1341 | type: "ReLU" 1342 | bottom: "icp6_out0" 1343 | top: "icp6_out0" 1344 | } 1345 | layer { 1346 | name: "icp6_out1" 1347 | type: "Convolution" 1348 | bottom: "icp6_reduction1" 1349 | top: "icp6_out1" 1350 | param { 1351 | lr_mult: 1 1352 | decay_mult: 1 1353 | } 1354 | param { 1355 | lr_mult: 2 1356 | decay_mult: 0 1357 | } 1358 | convolution_param { 1359 | num_output: 288 1360 | pad: 1 1361 | kernel_size: 3 1362 | weight_filler { 1363 | type: "gaussian" 1364 | std: 0.04 1365 | } 1366 | bias_filler { 1367 | type: "constant" 1368 | value: 0 1369 | } 1370 | } 1371 | } 1372 | layer { 1373 | name: "relu_icp6_out1" 1374 | type: "ReLU" 1375 | bottom: "icp6_out1" 1376 | top: "icp6_out1" 1377 | } 1378 | layer { 1379 | name: "icp6_out2" 1380 | type: "Convolution" 1381 | bottom: "icp6_reduction2" 1382 | top: "icp6_out2" 1383 | param { 1384 | lr_mult: 1 1385 | decay_mult: 1 1386 | } 1387 | param { 1388 | lr_mult: 2 1389 | decay_mult: 0 1390 | } 1391 | convolution_param { 1392 | num_output: 64 1393 | pad: 2 1394 | kernel_size: 5 1395 | weight_filler { 1396 | type: "gaussian" 1397 | std: 0.08 1398 | } 1399 | bias_filler { 1400 | type: "constant" 1401 | value: 0 1402 | } 1403 | } 1404 | } 1405 | layer { 1406 | name: "relu_icp6_out2" 1407 | type: "ReLU" 1408 | bottom: "icp6_out2" 1409 | top: "icp6_out2" 1410 | } 1411 | layer { 1412 | name: "icp6_out3" 1413 | type: "Convolution" 1414 | bottom: "icp6_pool" 1415 | top: "icp6_out3" 1416 | param { 1417 | lr_mult: 1 1418 | decay_mult: 1 1419 | } 1420 | param { 1421 | lr_mult: 2 1422 | decay_mult: 0 1423 | } 1424 | convolution_param { 1425 | num_output: 64 1426 | pad: 0 1427 | kernel_size: 1 1428 | weight_filler { 1429 | type: "xavier" 1430 | } 1431 | bias_filler { 1432 | type: "constant" 1433 | value: 0 1434 | } 1435 | } 1436 | } 1437 | layer { 1438 | name: "relu_icp6_out3" 1439 | type: "ReLU" 1440 | bottom: "icp6_out3" 1441 | top: "icp6_out3" 1442 | } 1443 | layer { 1444 | name: "icp6_out" 1445 | type: "Concat" 1446 | bottom: "icp6_out0" 1447 | bottom: "icp6_out1" 1448 | bottom: "icp6_out2" 1449 | bottom: "icp6_out3" 1450 | top: "icp6_out" 1451 | } 1452 | layer { 1453 | name: "icp7_reduction1" 1454 | type: "Convolution" 1455 | bottom: "icp6_out" 1456 | top: "icp7_reduction1" 1457 | param { 1458 | lr_mult: 1 1459 | decay_mult: 1 1460 | } 1461 | param { 1462 | lr_mult: 2 1463 | decay_mult: 0 1464 | } 1465 | convolution_param { 1466 | num_output: 160 1467 | pad: 0 1468 | kernel_size: 1 1469 | weight_filler { 1470 | type: "xavier" 1471 | } 1472 | bias_filler { 1473 | type: "constant" 1474 | value: 0 1475 | } 1476 | } 1477 | } 1478 | layer { 1479 | name: "relu_icp7_reduction1" 1480 | type: "ReLU" 1481 | bottom: "icp7_reduction1" 1482 | top: "icp7_reduction1" 1483 | } 1484 | layer { 1485 | name: "icp7_reduction2" 1486 | type: "Convolution" 1487 | bottom: "icp6_out" 1488 | top: "icp7_reduction2" 1489 | param { 1490 | lr_mult: 1 1491 | decay_mult: 1 1492 | } 1493 | param { 1494 | lr_mult: 2 1495 | decay_mult: 0 1496 | } 1497 | convolution_param { 1498 | num_output: 32 1499 | pad: 0 1500 | kernel_size: 1 1501 | weight_filler { 1502 | type: "xavier" 1503 | } 1504 | bias_filler { 1505 | type: "constant" 1506 | value: 0 1507 | } 1508 | } 1509 | } 1510 | layer { 1511 | name: "relu_icp7_reduction2" 1512 | type: "ReLU" 1513 | bottom: "icp7_reduction2" 1514 | top: "icp7_reduction2" 1515 | } 1516 | layer { 1517 | name: "icp7_pool" 1518 | type: "Pooling" 1519 | bottom: "icp6_out" 1520 | top: "icp7_pool" 1521 | pooling_param { 1522 | pool: MAX 1523 | kernel_size: 3 1524 | stride: 1 1525 | pad: 1 1526 | } 1527 | } 1528 | layer { 1529 | name: "icp7_out0" 1530 | type: "Convolution" 1531 | bottom: "icp6_out" 1532 | top: "icp7_out0" 1533 | param { 1534 | lr_mult: 1 1535 | decay_mult: 1 1536 | } 1537 | param { 1538 | lr_mult: 2 1539 | decay_mult: 0 1540 | } 1541 | convolution_param { 1542 | num_output: 256 1543 | pad: 0 1544 | kernel_size: 1 1545 | weight_filler { 1546 | type: "xavier" 1547 | } 1548 | bias_filler { 1549 | type: "constant" 1550 | value: 0 1551 | } 1552 | } 1553 | } 1554 | layer { 1555 | name: "relu_icp7_out0" 1556 | type: "ReLU" 1557 | bottom: "icp7_out0" 1558 | top: "icp7_out0" 1559 | } 1560 | layer { 1561 | name: "icp7_out1" 1562 | type: "Convolution" 1563 | bottom: "icp7_reduction1" 1564 | top: "icp7_out1" 1565 | param { 1566 | lr_mult: 1 1567 | decay_mult: 1 1568 | } 1569 | param { 1570 | lr_mult: 2 1571 | decay_mult: 0 1572 | } 1573 | convolution_param { 1574 | num_output: 320 1575 | pad: 1 1576 | kernel_size: 3 1577 | weight_filler { 1578 | type: "gaussian" 1579 | std: 0.04 1580 | } 1581 | bias_filler { 1582 | type: "constant" 1583 | value: 0 1584 | } 1585 | } 1586 | } 1587 | layer { 1588 | name: "relu_icp7_out1" 1589 | type: "ReLU" 1590 | bottom: "icp7_out1" 1591 | top: "icp7_out1" 1592 | } 1593 | layer { 1594 | name: "icp7_out2" 1595 | type: "Convolution" 1596 | bottom: "icp7_reduction2" 1597 | top: "icp7_out2" 1598 | param { 1599 | lr_mult: 1 1600 | decay_mult: 1 1601 | } 1602 | param { 1603 | lr_mult: 2 1604 | decay_mult: 0 1605 | } 1606 | convolution_param { 1607 | num_output: 128 1608 | pad: 2 1609 | kernel_size: 5 1610 | weight_filler { 1611 | type: "gaussian" 1612 | std: 0.08 1613 | } 1614 | bias_filler { 1615 | type: "constant" 1616 | value: 0 1617 | } 1618 | } 1619 | } 1620 | layer { 1621 | name: "relu_icp7_out2" 1622 | type: "ReLU" 1623 | bottom: "icp7_out2" 1624 | top: "icp7_out2" 1625 | } 1626 | layer { 1627 | name: "icp7_out3" 1628 | type: "Convolution" 1629 | bottom: "icp7_pool" 1630 | top: "icp7_out3" 1631 | param { 1632 | lr_mult: 1 1633 | decay_mult: 1 1634 | } 1635 | param { 1636 | lr_mult: 2 1637 | decay_mult: 0 1638 | } 1639 | convolution_param { 1640 | num_output: 128 1641 | pad: 0 1642 | kernel_size: 1 1643 | weight_filler { 1644 | type: "xavier" 1645 | } 1646 | bias_filler { 1647 | type: "constant" 1648 | value: 0 1649 | } 1650 | } 1651 | } 1652 | layer { 1653 | name: "relu_icp7_out3" 1654 | type: "ReLU" 1655 | bottom: "icp7_out3" 1656 | top: "icp7_out3" 1657 | } 1658 | layer { 1659 | name: "icp7_out" 1660 | type: "Concat" 1661 | bottom: "icp7_out0" 1662 | bottom: "icp7_out1" 1663 | bottom: "icp7_out2" 1664 | bottom: "icp7_out3" 1665 | top: "icp7_out" 1666 | } 1667 | layer { 1668 | name: "icp8_in" 1669 | type: "Pooling" 1670 | bottom: "icp7_out" 1671 | top: "icp8_in" 1672 | pooling_param { 1673 | pool: MAX 1674 | kernel_size: 3 1675 | stride: 2 1676 | pad: 0 1677 | } 1678 | } 1679 | # Inception module 8 *************** 1680 | 1681 | layer { 1682 | name: "icp8_reduction1" 1683 | type: "Convolution" 1684 | bottom: "icp8_in" 1685 | top: "icp8_reduction1" 1686 | param { 1687 | lr_mult: 1 1688 | decay_mult: 1 1689 | } 1690 | param { 1691 | lr_mult: 2 1692 | decay_mult: 0 1693 | } 1694 | convolution_param { 1695 | num_output: 160 1696 | pad: 0 1697 | kernel_size: 1 1698 | weight_filler { 1699 | type: "xavier" 1700 | } 1701 | bias_filler { 1702 | type: "constant" 1703 | value: 0 1704 | } 1705 | } 1706 | } 1707 | layer { 1708 | name: "relu_icp8_reduction1" 1709 | type: "ReLU" 1710 | bottom: "icp8_reduction1" 1711 | top: "icp8_reduction1" 1712 | } 1713 | layer { 1714 | name: "icp8_reduction2" 1715 | type: "Convolution" 1716 | bottom: "icp8_in" 1717 | top: "icp8_reduction2" 1718 | param { 1719 | lr_mult: 1 1720 | decay_mult: 1 1721 | } 1722 | param { 1723 | lr_mult: 2 1724 | decay_mult: 0 1725 | } 1726 | convolution_param { 1727 | num_output: 32 1728 | pad: 0 1729 | kernel_size: 1 1730 | weight_filler { 1731 | type: "xavier" 1732 | } 1733 | bias_filler { 1734 | type: "constant" 1735 | value: 0 1736 | } 1737 | } 1738 | } 1739 | layer { 1740 | name: "relu_icp8_reduction2" 1741 | type: "ReLU" 1742 | bottom: "icp8_reduction2" 1743 | top: "icp8_reduction2" 1744 | } 1745 | layer { 1746 | name: "icp8_pool" 1747 | type: "Pooling" 1748 | bottom: "icp8_in" 1749 | top: "icp8_pool" 1750 | pooling_param { 1751 | pool: MAX 1752 | kernel_size: 3 1753 | stride: 1 1754 | pad: 1 1755 | } 1756 | } 1757 | layer { 1758 | name: "icp8_out0" 1759 | type: "Convolution" 1760 | bottom: "icp8_in" 1761 | top: "icp8_out0" 1762 | param { 1763 | lr_mult: 1 1764 | decay_mult: 1 1765 | } 1766 | param { 1767 | lr_mult: 2 1768 | decay_mult: 0 1769 | } 1770 | convolution_param { 1771 | num_output: 256 1772 | pad: 0 1773 | kernel_size: 1 1774 | weight_filler { 1775 | type: "xavier" 1776 | } 1777 | bias_filler { 1778 | type: "constant" 1779 | value: 0 1780 | } 1781 | } 1782 | } 1783 | layer { 1784 | name: "relu_icp8_out0" 1785 | type: "ReLU" 1786 | bottom: "icp8_out0" 1787 | top: "icp8_out0" 1788 | } 1789 | layer { 1790 | name: "icp8_out1" 1791 | type: "Convolution" 1792 | bottom: "icp8_reduction1" 1793 | top: "icp8_out1" 1794 | param { 1795 | lr_mult: 1 1796 | decay_mult: 1 1797 | } 1798 | param { 1799 | lr_mult: 2 1800 | decay_mult: 0 1801 | } 1802 | convolution_param { 1803 | num_output: 320 1804 | pad: 1 1805 | kernel_size: 3 1806 | weight_filler { 1807 | type: "gaussian" 1808 | std: 0.04 1809 | } 1810 | bias_filler { 1811 | type: "constant" 1812 | value: 0 1813 | } 1814 | } 1815 | } 1816 | layer { 1817 | name: "relu_icp8_out1" 1818 | type: "ReLU" 1819 | bottom: "icp8_out1" 1820 | top: "icp8_out1" 1821 | } 1822 | layer { 1823 | name: "icp8_out2" 1824 | type: "Convolution" 1825 | bottom: "icp8_reduction2" 1826 | top: "icp8_out2" 1827 | param { 1828 | lr_mult: 1 1829 | decay_mult: 1 1830 | } 1831 | param { 1832 | lr_mult: 2 1833 | decay_mult: 0 1834 | } 1835 | convolution_param { 1836 | num_output: 128 1837 | pad: 2 1838 | kernel_size: 5 1839 | weight_filler { 1840 | type: "gaussian" 1841 | std: 0.08 1842 | } 1843 | bias_filler { 1844 | type: "constant" 1845 | value: 0 1846 | } 1847 | } 1848 | } 1849 | layer { 1850 | name: "relu_icp8_out2" 1851 | type: "ReLU" 1852 | bottom: "icp8_out2" 1853 | top: "icp8_out2" 1854 | } 1855 | layer { 1856 | name: "icp8_out3" 1857 | type: "Convolution" 1858 | bottom: "icp8_pool" 1859 | top: "icp8_out3" 1860 | param { 1861 | lr_mult: 1 1862 | decay_mult: 1 1863 | } 1864 | param { 1865 | lr_mult: 2 1866 | decay_mult: 0 1867 | } 1868 | convolution_param { 1869 | num_output: 128 1870 | pad: 0 1871 | kernel_size: 1 1872 | weight_filler { 1873 | type: "xavier" 1874 | } 1875 | bias_filler { 1876 | type: "constant" 1877 | value: 0 1878 | } 1879 | } 1880 | } 1881 | layer { 1882 | name: "relu_icp8_out3" 1883 | type: "ReLU" 1884 | bottom: "icp8_out3" 1885 | top: "icp8_out3" 1886 | } 1887 | layer { 1888 | name: "icp8_out" 1889 | type: "Concat" 1890 | bottom: "icp8_out0" 1891 | bottom: "icp8_out1" 1892 | bottom: "icp8_out2" 1893 | bottom: "icp8_out3" 1894 | top: "icp8_out" 1895 | } 1896 | layer { 1897 | name: "icp9_reduction1" 1898 | type: "Convolution" 1899 | bottom: "icp8_out" 1900 | top: "icp9_reduction1" 1901 | param { 1902 | lr_mult: 1 1903 | decay_mult: 1 1904 | } 1905 | param { 1906 | lr_mult: 2 1907 | decay_mult: 0 1908 | } 1909 | convolution_param { 1910 | num_output: 192 1911 | pad: 0 1912 | kernel_size: 1 1913 | weight_filler { 1914 | type: "xavier" 1915 | } 1916 | bias_filler { 1917 | type: "constant" 1918 | value: 0 1919 | } 1920 | } 1921 | } 1922 | layer { 1923 | name: "relu_icp9_reduction1" 1924 | type: "ReLU" 1925 | bottom: "icp9_reduction1" 1926 | top: "icp9_reduction1" 1927 | } 1928 | layer { 1929 | name: "icp9_reduction2" 1930 | type: "Convolution" 1931 | bottom: "icp8_out" 1932 | top: "icp9_reduction2" 1933 | param { 1934 | lr_mult: 1 1935 | decay_mult: 1 1936 | } 1937 | param { 1938 | lr_mult: 2 1939 | decay_mult: 0 1940 | } 1941 | convolution_param { 1942 | num_output: 48 1943 | pad: 0 1944 | kernel_size: 1 1945 | weight_filler { 1946 | type: "xavier" 1947 | } 1948 | bias_filler { 1949 | type: "constant" 1950 | value: 0 1951 | } 1952 | } 1953 | } 1954 | layer { 1955 | name: "relu_icp9_reduction2" 1956 | type: "ReLU" 1957 | bottom: "icp9_reduction2" 1958 | top: "icp9_reduction2" 1959 | } 1960 | layer { 1961 | name: "icp9_pool" 1962 | type: "Pooling" 1963 | bottom: "icp8_out" 1964 | top: "icp9_pool" 1965 | pooling_param { 1966 | pool: MAX 1967 | kernel_size: 3 1968 | stride: 1 1969 | pad: 1 1970 | } 1971 | } 1972 | layer { 1973 | name: "icp9_out0" 1974 | type: "Convolution" 1975 | bottom: "icp8_out" 1976 | top: "icp9_out0" 1977 | param { 1978 | lr_mult: 1 1979 | decay_mult: 1 1980 | } 1981 | param { 1982 | lr_mult: 2 1983 | decay_mult: 0 1984 | } 1985 | convolution_param { 1986 | num_output: 384 1987 | pad: 0 1988 | kernel_size: 1 1989 | weight_filler { 1990 | type: "xavier" 1991 | } 1992 | bias_filler { 1993 | type: "constant" 1994 | value: 0 1995 | } 1996 | } 1997 | } 1998 | layer { 1999 | name: "relu_icp9_out0" 2000 | type: "ReLU" 2001 | bottom: "icp9_out0" 2002 | top: "icp9_out0" 2003 | } 2004 | layer { 2005 | name: "icp9_out1" 2006 | type: "Convolution" 2007 | bottom: "icp9_reduction1" 2008 | top: "icp9_out1" 2009 | param { 2010 | lr_mult: 1 2011 | decay_mult: 1 2012 | } 2013 | param { 2014 | lr_mult: 2 2015 | decay_mult: 0 2016 | } 2017 | convolution_param { 2018 | num_output: 384 2019 | pad: 1 2020 | kernel_size: 3 2021 | weight_filler { 2022 | type: "gaussian" 2023 | std: 0.04 2024 | } 2025 | bias_filler { 2026 | type: "constant" 2027 | value: 0 2028 | } 2029 | } 2030 | } 2031 | layer { 2032 | name: "relu_icp9_out1" 2033 | type: "ReLU" 2034 | bottom: "icp9_out1" 2035 | top: "icp9_out1" 2036 | } 2037 | layer { 2038 | name: "icp9_out2" 2039 | type: "Convolution" 2040 | bottom: "icp9_reduction2" 2041 | top: "icp9_out2" 2042 | param { 2043 | lr_mult: 1 2044 | decay_mult: 1 2045 | } 2046 | param { 2047 | lr_mult: 2 2048 | decay_mult: 0 2049 | } 2050 | convolution_param { 2051 | num_output: 128 2052 | pad: 2 2053 | kernel_size: 5 2054 | weight_filler { 2055 | type: "gaussian" 2056 | std: 0.08 2057 | } 2058 | bias_filler { 2059 | type: "constant" 2060 | value: 0 2061 | } 2062 | } 2063 | } 2064 | layer { 2065 | name: "relu_icp9_out2" 2066 | type: "ReLU" 2067 | bottom: "icp9_out2" 2068 | top: "icp9_out2" 2069 | } 2070 | layer { 2071 | name: "icp9_out3" 2072 | type: "Convolution" 2073 | bottom: "icp9_pool" 2074 | top: "icp9_out3" 2075 | param { 2076 | lr_mult: 1 2077 | decay_mult: 1 2078 | } 2079 | param { 2080 | lr_mult: 2 2081 | decay_mult: 0 2082 | } 2083 | convolution_param { 2084 | num_output: 128 2085 | pad: 0 2086 | kernel_size: 1 2087 | weight_filler { 2088 | type: "xavier" 2089 | } 2090 | bias_filler { 2091 | type: "constant" 2092 | value: 0 2093 | } 2094 | } 2095 | } 2096 | layer { 2097 | name: "relu_icp9_out3" 2098 | type: "ReLU" 2099 | bottom: "icp9_out3" 2100 | top: "icp9_out3" 2101 | } 2102 | layer { 2103 | name: "icp9_out" 2104 | type: "Concat" 2105 | bottom: "icp9_out0" 2106 | bottom: "icp9_out1" 2107 | bottom: "icp9_out2" 2108 | bottom: "icp9_out3" 2109 | top: "icp9_out" 2110 | } -------------------------------------------------------------------------------- /NeuralArt/smoothL1.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/happynear/DeepVisualization/6e39593b1b4bd3087e0486da97733c1228ca7420/NeuralArt/smoothL1.m -------------------------------------------------------------------------------- /PrototxtGen/1x1conv.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "conv{num}" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "conv{num}" 6 | param { 7 | lr_mult: 1 8 | } 9 | param { 10 | lr_mult: 2 11 | } 12 | convolution_param { 13 | num_output: {node_num} 14 | kernel_size: {kernel_size} 15 | stride: 1 16 | weight_filler { 17 | type: "msra" 18 | } 19 | bias_filler { 20 | type: "gaussian" 21 | std: 0.5 22 | # type: "constant" 23 | # value: 0 24 | } 25 | } 26 | } 27 | layer { 28 | name: "bn{num}" 29 | type: "BN" 30 | bottom: "conv{num}" 31 | top: "bn{num}" 32 | bn_param { 33 | scale_filler { 34 | type: "constant" 35 | value: 3 36 | } 37 | shift_filler { 38 | type: "gaussian" 39 | std: 2 40 | # type: "constant" 41 | # value: 0 42 | } 43 | } 44 | } 45 | layer { 46 | name: "actiavation{num}" 47 | type: "{activation}" 48 | bottom: "bn{num}" 49 | top: "bn{num}" 50 | } -------------------------------------------------------------------------------- /PrototxtGen/3x3conv.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "conv{num}" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "conv{num}" 6 | param { 7 | lr_mult: 1 8 | } 9 | param { 10 | lr_mult: 2 11 | } 12 | convolution_param { 13 | num_output: 20 14 | kernel_size: 3 15 | stride: 1 16 | weight_filler { 17 | type: "msra" 18 | } 19 | bias_filler { 20 | type: "constant" 21 | } 22 | } 23 | } 24 | layer { 25 | name: "bn{num}" 26 | type: "BN" 27 | bottom: "conv{num}" 28 | top: "bn{num}" 29 | bn_param { 30 | scale_filler { 31 | type: "constant" 32 | value: 1 33 | } 34 | shift_filler { 35 | type: "constant" 36 | value: 0 37 | } 38 | } 39 | } 40 | layer { 41 | name: "relu{num}" 42 | type: "Sigmoid" 43 | bottom: "bn{num}" 44 | top: "bn{num}" 45 | relu_param { 46 | negative_slope: 0.2 47 | } 48 | } -------------------------------------------------------------------------------- /PrototxtGen/Inception.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "inception_{num}a/1x1" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "inception_{num}a/1x1" 6 | param { 7 | lr_mult: 1 8 | decay_mult: 1 9 | } 10 | param { 11 | lr_mult: 2 12 | decay_mult: 0 13 | } 14 | convolution_param { 15 | num_output: {1x1node} 16 | kernel_size: 1 17 | weight_filler { 18 | type: "xavier" 19 | } 20 | bias_filler { 21 | type: "constant" 22 | value: 0 23 | } 24 | } 25 | } 26 | layer { 27 | name: "bn{num}/relu_1x1" 28 | type: "BN" 29 | bottom: "inception_{num}a/1x1" 30 | top: "bn{num}/relu_1x1" 31 | bn_param { 32 | scale_filler { 33 | type: "constant" 34 | value: 3 35 | } 36 | shift_filler { 37 | type: "constant" 38 | value: 0 39 | } 40 | } 41 | } 42 | layer { 43 | name: "inception_{num}a/relu_1x1" 44 | type: "ReLU" 45 | bottom: "bn{num}/relu_1x1" 46 | top: "bn{num}/relu_1x1" 47 | } 48 | layer { 49 | name: "inception_{num}a/3x3_reduce" 50 | type: "Convolution" 51 | bottom: "{bottom_name}" 52 | top: "inception_{num}a/3x3_reduce" 53 | param { 54 | lr_mult: 1 55 | decay_mult: 1 56 | } 57 | param { 58 | lr_mult: 2 59 | decay_mult: 0 60 | } 61 | convolution_param { 62 | num_output: {3x3reduce} 63 | kernel_size: 1 64 | weight_filler { 65 | type: "xavier" 66 | } 67 | bias_filler { 68 | type: "constant" 69 | value: 0 70 | } 71 | } 72 | } 73 | layer { 74 | name: "bn{num}/relu_3x3_reduce" 75 | type: "BN" 76 | bottom: "inception_{num}a/3x3_reduce" 77 | top: "bn{num}/relu_3x3_reduce" 78 | bn_param { 79 | scale_filler { 80 | type: "constant" 81 | value: 3 82 | } 83 | shift_filler { 84 | type: "constant" 85 | value: 0 86 | } 87 | } 88 | } 89 | layer { 90 | name: "inception_{num}a/relu_3x3_reduce" 91 | type: "ReLU" 92 | bottom: "bn{num}/relu_3x3_reduce" 93 | top: "bn{num}/relu_3x3_reduce" 94 | } 95 | layer { 96 | name: "inception_{num}a/3x3" 97 | type: "Convolution" 98 | bottom: "bn{num}/relu_3x3_reduce" 99 | top: "inception_{num}a/3x3" 100 | param { 101 | lr_mult: 1 102 | decay_mult: 1 103 | } 104 | param { 105 | lr_mult: 2 106 | decay_mult: 0 107 | } 108 | convolution_param { 109 | num_output: {3x3node} 110 | pad: 1 111 | kernel_size: 3 112 | weight_filler { 113 | type: "xavier" 114 | } 115 | bias_filler { 116 | type: "constant" 117 | value: 0 118 | } 119 | } 120 | } 121 | layer { 122 | name: "bn{num}/relu_3x3" 123 | type: "BN" 124 | bottom: "inception_{num}a/3x3" 125 | top: "bn{num}/relu_3x3" 126 | bn_param { 127 | scale_filler { 128 | type: "constant" 129 | value: 3 130 | } 131 | shift_filler { 132 | type: "constant" 133 | value: 0 134 | } 135 | } 136 | } 137 | layer { 138 | name: "inception_{num}a/relu_3x3" 139 | type: "ReLU" 140 | bottom: "bn{num}/relu_3x3" 141 | top: "bn{num}/relu_3x3" 142 | } 143 | layer { 144 | name: "inception_{num}a/5x5_reduce" 145 | type: "Convolution" 146 | bottom: "{bottom_name}" 147 | top: "inception_{num}a/5x5_reduce" 148 | param { 149 | lr_mult: 1 150 | decay_mult: 1 151 | } 152 | param { 153 | lr_mult: 2 154 | decay_mult: 0 155 | } 156 | convolution_param { 157 | num_output: {5x5reduce} 158 | kernel_size: 1 159 | weight_filler { 160 | type: "xavier" 161 | } 162 | bias_filler { 163 | type: "constant" 164 | value: 0 165 | } 166 | } 167 | } 168 | layer { 169 | name: "bn{num}/relu_5x5_reduce" 170 | type: "BN" 171 | bottom: "inception_{num}a/5x5_reduce" 172 | top: "bn{num}/relu_5x5_reduce" 173 | bn_param { 174 | scale_filler { 175 | type: "constant" 176 | value: 3 177 | } 178 | shift_filler { 179 | type: "constant" 180 | value: 0 181 | } 182 | } 183 | } 184 | layer { 185 | name: "inception_{num}a/relu_5x5_reduce" 186 | type: "ReLU" 187 | bottom: "bn{num}/relu_5x5_reduce" 188 | top: "bn{num}/relu_5x5_reduce" 189 | } 190 | layer { 191 | name: "inception_{num}a/5x5" 192 | type: "Convolution" 193 | bottom: "bn{num}/relu_5x5_reduce" 194 | top: "inception_{num}a/5x5" 195 | param { 196 | lr_mult: 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: {5x5node} 205 | pad: 2 206 | kernel_size: 5 207 | weight_filler { 208 | type: "xavier" 209 | } 210 | bias_filler { 211 | type: "constant" 212 | value: 0 213 | } 214 | } 215 | } 216 | layer { 217 | name: "bn{num}/relu_5x5" 218 | type: "BN" 219 | bottom: "inception_{num}a/5x5" 220 | top: "bn{num}/relu_5x5" 221 | bn_param { 222 | scale_filler { 223 | type: "constant" 224 | value: 3 225 | } 226 | shift_filler { 227 | type: "constant" 228 | value: 0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "inception_{num}a/relu_5x5" 234 | type: "ReLU" 235 | bottom: "bn{num}/relu_5x5" 236 | top: "bn{num}/relu_5x5" 237 | } 238 | layer { 239 | name: "inception_{num}a/pool" 240 | type: "Pooling" 241 | bottom: "{bottom_name}" 242 | top: "inception_{num}a/pool" 243 | pooling_param { 244 | pool: MAX 245 | kernel_size: 3 246 | stride: 1 247 | pad: 1 248 | } 249 | } 250 | layer { 251 | name: "inception_{num}a/pool_proj" 252 | type: "Convolution" 253 | bottom: "inception_{num}a/pool" 254 | top: "inception_{num}a/pool_proj" 255 | param { 256 | lr_mult: 1 257 | decay_mult: 1 258 | } 259 | param { 260 | lr_mult: 2 261 | decay_mult: 0 262 | } 263 | convolution_param { 264 | num_output: {poolconv} 265 | kernel_size: 1 266 | weight_filler { 267 | type: "xavier" 268 | } 269 | bias_filler { 270 | type: "constant" 271 | value: 0 272 | } 273 | } 274 | } 275 | layer { 276 | name: "bn{num}/relu_pool_proj" 277 | type: "BN" 278 | bottom: "inception_{num}a/pool_proj" 279 | top: "bn{num}/relu_pool_proj" 280 | bn_param { 281 | scale_filler { 282 | type: "constant" 283 | value: 3 284 | } 285 | shift_filler { 286 | type: "constant" 287 | value: 0 288 | } 289 | } 290 | } 291 | layer { 292 | name: "inception_{num}a/relu_pool_proj" 293 | type: "ReLU" 294 | bottom: "bn{num}/relu_pool_proj" 295 | top: "bn{num}/relu_pool_proj" 296 | } 297 | layer { 298 | name: "inception_{num}a/output" 299 | type: "Concat" 300 | bottom: "bn{num}/relu_1x1" 301 | bottom: "bn{num}/relu_3x3" 302 | bottom: "bn{num}/relu_5x5" 303 | bottom: "bn{num}/relu_pool_proj" 304 | top: "inception_{num}a/output" 305 | } -------------------------------------------------------------------------------- /PrototxtGen/PrototxtGen.m: -------------------------------------------------------------------------------- 1 | function PrototxtGen(layers,savefolder,image_size,save_num) 2 | 3 | net_model = 'D:\project\NNComplexity\net_define.prototxt'; 4 | inception_file = 'inception.prototxt'; 5 | inception_content = fileread(inception_file); 6 | conv1x1_file = '1x1conv.prototxt'; 7 | conv1x1_content = fileread(conv1x1_file); 8 | pooling_file = 'pooling.prototxt'; 9 | pooling_content = fileread(pooling_file); 10 | maxout_file = 'maxout.prototxt'; 11 | maxout_content = fileread(maxout_file); 12 | activation_file = 'activation.prototxt'; 13 | activation_content = fileread(activation_file); 14 | output_file = 'output.prototxt'; 15 | output_content = fileread(output_file); 16 | 17 | activation = 'ReLU'; 18 | 19 | width = image_size(1); 20 | height = image_size(2); 21 | border = 5; 22 | 23 | fid = fopen(net_model,'w'); 24 | proto_file{1} = 'name: "mnist_siamese_train_test"'; 25 | proto_file{2} = 'input: "data"'; 26 | proto_file{3} = 'input_dim: 1'; 27 | proto_file{4} = 'input_dim: 2'; 28 | proto_file{5} = ['input_dim: ' num2str(width)]; 29 | proto_file{6} = ['input_dim: ' num2str(height)]; 30 | for i=1:6 31 | fprintf(fid,'%s\r\n',proto_file{i}); 32 | end; 33 | top_layer = 'data'; 34 | top_layer_exp = 'top: "(.+?)"'; 35 | for i=1:length(layers) 36 | if strcmp(layers{i}.type,'convolution') 37 | this_layer = strrep(conv1x1_content,'{num}',num2str(i)); 38 | this_layer = strrep(this_layer,'{node_num}',num2str(layers{i}.outputmaps)); 39 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 40 | this_layer = strrep(this_layer,'{kernel_size}',num2str(layers{i}.kernelsize)); 41 | if strcmp(layers{i}.activation,'maxout') 42 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 43 | top_layer = top_layer{end}{1}; 44 | fprintf(fid,'%s\r\n',this_layer); 45 | this_layer = strrep(maxout_content,'{num}',num2str(i)); 46 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 47 | elseif strcmp(layers{i}.activation,'LReLU') 48 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 49 | top_layer = top_layer{end}{1}; 50 | fprintf(fid,'%s\r\n',this_layer); 51 | this_layer = strrep(activation_content,'{num}',num2str(i)); 52 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 53 | this_layer = strrep(this_layer,'{activation}','ReLU'); 54 | this_layer = strrep(this_layer,'{negative_slope}',num2str(rand() / 2)); 55 | else 56 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 57 | top_layer = top_layer{end}{1}; 58 | fprintf(fid,'%s\r\n',this_layer); 59 | this_layer = strrep(activation_content,'{num}',num2str(i)); 60 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 61 | this_layer = strrep(this_layer,'{activation}',layers{i}.activation); 62 | this_layer = strrep(this_layer,'{negative_slope}',num2str(0)); 63 | end; 64 | elseif strcmp(layers{i}.type,'pooling') 65 | this_layer = strrep(pooling_content,'{num}',num2str(i)); 66 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 67 | this_layer = strrep(this_layer,'{method}',layers{i}.method); 68 | this_layer = strrep(this_layer,'{scale}',num2str(layers{i}.scale)); 69 | elseif strcmp(layers{i}.type,'inception') 70 | this_layer = strrep(inception_content,'{num}',num2str(i)); 71 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 72 | this_layer = strrep(this_layer,'{1x1node}',num2str(layers{i}.node1x1)); 73 | this_layer = strrep(this_layer,'{3x3reduce}',num2str(layers{i}.reduce3x3)); 74 | this_layer = strrep(this_layer,'{3x3node}',num2str(layers{i}.node3x3)); 75 | this_layer = strrep(this_layer,'{5x5reduce}',num2str(layers{i}.reduce5x5)); 76 | this_layer = strrep(this_layer,'{5x5node}',num2str(layers{i}.node5x5)); 77 | this_layer = strrep(this_layer,'{poolconv}',num2str(layers{i}.poolconv)); 78 | elseif strcmp(layers{i}.type,'maxout') 79 | this_layer = strrep(maxout_content,'{num}',num2str(i)); 80 | this_layer = strrep(this_layer,'{bottom_name}',top_layer); 81 | end; 82 | top_layer = regexp(this_layer,top_layer_exp,'tokens'); 83 | top_layer = top_layer{end}{1}; 84 | fprintf(fid,'%s\r\n',this_layer); 85 | end; 86 | this_layer = strrep(output_content,'{bottom_name}',top_layer); 87 | fprintf(fid,'%s\r\n',this_layer); 88 | fclose(fid); -------------------------------------------------------------------------------- /PrototxtGen/README.md: -------------------------------------------------------------------------------- 1 | Some sub-module of a neural network. These modules are used for automatic network generation. 2 | -------------------------------------------------------------------------------- /PrototxtGen/activation.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "relu{num}" 3 | type: "{activation}" 4 | bottom: "{bottom_name}" 5 | top: "{bottom_name}" 6 | relu_param { 7 | negative_slope: {negative_slope} 8 | } 9 | } -------------------------------------------------------------------------------- /PrototxtGen/euclideanloss.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "euclidean{num}" 3 | type: "EuclideanLoss" 4 | bottom: "{bottom1}" 5 | bottom: "{bottom2}" 6 | top: "euclidean{num}" 7 | loss_weight: {loss_weight} 8 | } -------------------------------------------------------------------------------- /PrototxtGen/gram.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "gram{num}" 3 | type: "Gram" 4 | bottom: "{bottom_name}" 5 | top: "gram{num}" 6 | } -------------------------------------------------------------------------------- /PrototxtGen/maxout.prototxt: -------------------------------------------------------------------------------- 1 | layer{ 2 | name: "slice{num}" 3 | type: "Slice" 4 | slice_param { 5 | slice_dim: 1 6 | } 7 | bottom: "{bottom_name}" 8 | top: "slice{num}_1" 9 | top: "slice{num}_2" 10 | } 11 | 12 | layer{ 13 | name: "etlwise{num}" 14 | type: "Eltwise" 15 | bottom: "slice{num}_1" 16 | bottom: "slice{num}_2" 17 | top: "eltwise{num}" 18 | eltwise_param { 19 | operation: MAX 20 | } 21 | } -------------------------------------------------------------------------------- /PrototxtGen/net_define.prototxt: -------------------------------------------------------------------------------- 1 | name: "mnist_siamese_train_test" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 2 5 | input_dim: 640 6 | input_dim: 480 7 | layer { 8 | name: "conv1" 9 | type: "Convolution" 10 | bottom: "data" 11 | top: "conv1" 12 | param { 13 | lr_mult: 1 14 | } 15 | param { 16 | lr_mult: 2 17 | } 18 | convolution_param { 19 | num_output: 20 20 | kernel_size: 3 21 | stride: 1 22 | weight_filler { 23 | type: "msra" 24 | } 25 | bias_filler { 26 | type: "gaussian" 27 | std: 0.5 28 | # type: "constant" 29 | # value: 0 30 | } 31 | } 32 | } 33 | layer { 34 | name: "bn1" 35 | type: "BN" 36 | bottom: "conv1" 37 | top: "bn1" 38 | bn_param { 39 | scale_filler { 40 | type: "constant" 41 | value: 3 42 | } 43 | shift_filler { 44 | type: "gaussian" 45 | std: 2 46 | # type: "constant" 47 | # value: 0 48 | } 49 | } 50 | } 51 | layer { 52 | name: "relu1" 53 | type: "Sigmoid" 54 | bottom: "bn1" 55 | top: "bn1" 56 | relu_param { 57 | negative_slope: 0 58 | } 59 | } 60 | layer { 61 | name: "conv2" 62 | type: "Convolution" 63 | bottom: "bn1" 64 | top: "conv2" 65 | param { 66 | lr_mult: 1 67 | } 68 | param { 69 | lr_mult: 2 70 | } 71 | convolution_param { 72 | num_output: 20 73 | kernel_size: 3 74 | stride: 1 75 | weight_filler { 76 | type: "msra" 77 | } 78 | bias_filler { 79 | type: "gaussian" 80 | std: 0.5 81 | # type: "constant" 82 | # value: 0 83 | } 84 | } 85 | } 86 | layer { 87 | name: "bn2" 88 | type: "BN" 89 | bottom: "conv2" 90 | top: "bn2" 91 | bn_param { 92 | scale_filler { 93 | type: "constant" 94 | value: 3 95 | } 96 | shift_filler { 97 | type: "gaussian" 98 | std: 2 99 | # type: "constant" 100 | # value: 0 101 | } 102 | } 103 | } 104 | layer { 105 | name: "relu2" 106 | type: "Sigmoid" 107 | bottom: "bn2" 108 | top: "bn2" 109 | relu_param { 110 | negative_slope: 0 111 | } 112 | } 113 | layer { 114 | name: "conv3" 115 | type: "Convolution" 116 | bottom: "bn2" 117 | top: "conv3" 118 | param { 119 | lr_mult: 1 120 | } 121 | param { 122 | lr_mult: 2 123 | } 124 | convolution_param { 125 | num_output: 20 126 | kernel_size: 3 127 | stride: 1 128 | weight_filler { 129 | type: "msra" 130 | } 131 | bias_filler { 132 | type: "gaussian" 133 | std: 0.5 134 | # type: "constant" 135 | # value: 0 136 | } 137 | } 138 | } 139 | layer { 140 | name: "bn3" 141 | type: "BN" 142 | bottom: "conv3" 143 | top: "bn3" 144 | bn_param { 145 | scale_filler { 146 | type: "constant" 147 | value: 3 148 | } 149 | shift_filler { 150 | type: "gaussian" 151 | std: 2 152 | # type: "constant" 153 | # value: 0 154 | } 155 | } 156 | } 157 | layer { 158 | name: "relu3" 159 | type: "Sigmoid" 160 | bottom: "bn3" 161 | top: "bn3" 162 | relu_param { 163 | negative_slope: 0 164 | } 165 | } 166 | layer { 167 | name: "conv4" 168 | type: "Convolution" 169 | bottom: "bn3" 170 | top: "conv4" 171 | param { 172 | lr_mult: 1 173 | } 174 | param { 175 | lr_mult: 2 176 | } 177 | convolution_param { 178 | num_output: 20 179 | kernel_size: 3 180 | stride: 1 181 | weight_filler { 182 | type: "msra" 183 | } 184 | bias_filler { 185 | type: "gaussian" 186 | std: 0.5 187 | # type: "constant" 188 | # value: 0 189 | } 190 | } 191 | } 192 | layer { 193 | name: "bn4" 194 | type: "BN" 195 | bottom: "conv4" 196 | top: "bn4" 197 | bn_param { 198 | scale_filler { 199 | type: "constant" 200 | value: 3 201 | } 202 | shift_filler { 203 | type: "gaussian" 204 | std: 2 205 | # type: "constant" 206 | # value: 0 207 | } 208 | } 209 | } 210 | layer { 211 | name: "relu4" 212 | type: "Sigmoid" 213 | bottom: "bn4" 214 | top: "bn4" 215 | relu_param { 216 | negative_slope: 0 217 | } 218 | } 219 | layer { 220 | name: "conv5" 221 | type: "Convolution" 222 | bottom: "bn4" 223 | top: "conv5" 224 | param { 225 | lr_mult: 1 226 | } 227 | param { 228 | lr_mult: 2 229 | } 230 | convolution_param { 231 | num_output: 20 232 | kernel_size: 3 233 | stride: 1 234 | weight_filler { 235 | type: "msra" 236 | } 237 | bias_filler { 238 | type: "gaussian" 239 | std: 0.5 240 | # type: "constant" 241 | # value: 0 242 | } 243 | } 244 | } 245 | layer { 246 | name: "bn5" 247 | type: "BN" 248 | bottom: "conv5" 249 | top: "bn5" 250 | bn_param { 251 | scale_filler { 252 | type: "constant" 253 | value: 3 254 | } 255 | shift_filler { 256 | type: "gaussian" 257 | std: 2 258 | # type: "constant" 259 | # value: 0 260 | } 261 | } 262 | } 263 | layer { 264 | name: "relu5" 265 | type: "Sigmoid" 266 | bottom: "bn5" 267 | top: "bn5" 268 | relu_param { 269 | negative_slope: 0 270 | } 271 | } 272 | layer { 273 | name: "conv6" 274 | type: "Convolution" 275 | bottom: "bn5" 276 | top: "conv6" 277 | param { 278 | lr_mult: 1 279 | } 280 | param { 281 | lr_mult: 2 282 | } 283 | convolution_param { 284 | num_output: 20 285 | kernel_size: 3 286 | stride: 1 287 | weight_filler { 288 | type: "msra" 289 | } 290 | bias_filler { 291 | type: "gaussian" 292 | std: 0.5 293 | # type: "constant" 294 | # value: 0 295 | } 296 | } 297 | } 298 | layer { 299 | name: "bn6" 300 | type: "BN" 301 | bottom: "conv6" 302 | top: "bn6" 303 | bn_param { 304 | scale_filler { 305 | type: "constant" 306 | value: 3 307 | } 308 | shift_filler { 309 | type: "gaussian" 310 | std: 2 311 | # type: "constant" 312 | # value: 0 313 | } 314 | } 315 | } 316 | layer { 317 | name: "relu6" 318 | type: "Sigmoid" 319 | bottom: "bn6" 320 | top: "bn6" 321 | relu_param { 322 | negative_slope: 0 323 | } 324 | } 325 | layer { 326 | name: "conv7" 327 | type: "Convolution" 328 | bottom: "bn6" 329 | top: "conv7" 330 | param { 331 | lr_mult: 1 332 | } 333 | param { 334 | lr_mult: 2 335 | } 336 | convolution_param { 337 | num_output: 20 338 | kernel_size: 3 339 | stride: 1 340 | weight_filler { 341 | type: "msra" 342 | } 343 | bias_filler { 344 | type: "gaussian" 345 | std: 0.5 346 | # type: "constant" 347 | # value: 0 348 | } 349 | } 350 | } 351 | layer { 352 | name: "bn7" 353 | type: "BN" 354 | bottom: "conv7" 355 | top: "bn7" 356 | bn_param { 357 | scale_filler { 358 | type: "constant" 359 | value: 3 360 | } 361 | shift_filler { 362 | type: "gaussian" 363 | std: 2 364 | # type: "constant" 365 | # value: 0 366 | } 367 | } 368 | } 369 | layer { 370 | name: "relu7" 371 | type: "Sigmoid" 372 | bottom: "bn7" 373 | top: "bn7" 374 | relu_param { 375 | negative_slope: 0 376 | } 377 | } 378 | layer { 379 | name: "conv8" 380 | type: "Convolution" 381 | bottom: "bn7" 382 | top: "conv8" 383 | param { 384 | lr_mult: 1 385 | } 386 | param { 387 | lr_mult: 2 388 | } 389 | convolution_param { 390 | num_output: 20 391 | kernel_size: 3 392 | stride: 1 393 | weight_filler { 394 | type: "msra" 395 | } 396 | bias_filler { 397 | type: "gaussian" 398 | std: 0.5 399 | # type: "constant" 400 | # value: 0 401 | } 402 | } 403 | } 404 | layer { 405 | name: "bn8" 406 | type: "BN" 407 | bottom: "conv8" 408 | top: "bn8" 409 | bn_param { 410 | scale_filler { 411 | type: "constant" 412 | value: 3 413 | } 414 | shift_filler { 415 | type: "gaussian" 416 | std: 2 417 | # type: "constant" 418 | # value: 0 419 | } 420 | } 421 | } 422 | layer { 423 | name: "relu8" 424 | type: "Sigmoid" 425 | bottom: "bn8" 426 | top: "bn8" 427 | relu_param { 428 | negative_slope: 0 429 | } 430 | } 431 | layer { 432 | name: "output" 433 | type: "Convolution" 434 | bottom: "bn8" 435 | top: "output" 436 | param { 437 | lr_mult: 1 438 | } 439 | param { 440 | lr_mult: 2 441 | } 442 | convolution_param { 443 | num_output: 3 444 | kernel_size: 1 445 | stride: 1 446 | weight_filler { 447 | type: "msra" 448 | } 449 | bias_filler { 450 | type: "constant" 451 | } 452 | } 453 | } 454 | -------------------------------------------------------------------------------- /PrototxtGen/output.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "output" 3 | type: "Convolution" 4 | bottom: "{bottom_name}" 5 | top: "output" 6 | param { 7 | lr_mult: 1 8 | } 9 | param { 10 | lr_mult: 2 11 | } 12 | convolution_param { 13 | num_output: 3 14 | kernel_size: 1 15 | stride: 1 16 | weight_filler { 17 | type: "msra" 18 | } 19 | bias_filler { 20 | type: "constant" 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /PrototxtGen/pooling.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "pool{num}" 3 | type: "Pooling" 4 | bottom: "{bottom_name}" 5 | top: "pool{num}" 6 | pooling_param { 7 | pool: {method} 8 | kernel_size: {scale} 9 | stride: {scale} 10 | } 11 | } -------------------------------------------------------------------------------- /PrototxtGen/run_save.m: -------------------------------------------------------------------------------- 1 | activation = 'Sigmoid'; 2 | layers = { 3 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 4 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 5 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 6 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 7 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 8 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 9 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 10 | struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 3, 'activation', activation) %convolution layer 11 | % struct('type', 'convolution', 'outputmaps', 80, 'kernelsize', 3, 'activation', activation) %convolution layer 12 | % struct('type', 'convolution', 'outputmaps', 80, 'kernelsize', 3, 'activation', activation) %convolution layer 13 | % struct('type', 'convolution', 'outputmaps', 10, 'kernelsize', 3, 'activation', activation) %convolution layer 14 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 15 | % struct('type', 'inception', 'node1x1', 100, 'reduce3x3', 50, 'node3x3', 100, 'reduce5x5', 50, 'node5x5', 100, 'poolconv', 100) 16 | % struct('type', 'inception', 'node1x1', 50, 'reduce3x3', 25, 'node3x3', 50, 'reduce5x5', 25, 'node5x5', 50, 'poolconv', 50) 17 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 18 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 19 | % struct('type', 'inception', 'node1x1', 20, 'reduce3x3', 10, 'node3x3', 20, 'reduce5x5', 10, 'node5x5', 20, 'poolconv', 20) 20 | % struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 21 | % struct('type', 'convolution', 'outputmaps', 20, 'kernelsize', 1, 'activation', activation) %convolution layer 22 | % struct('type', 'pooling', 'scale', 2, 'method', 'AVE') 23 | }; 24 | CNNComplexitySave(layers,'8convSigmoid',[640 480],10); -------------------------------------------------------------------------------- /PrototxtGen/smoothL1Loss.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "smoothl1{num}" 3 | type: "SmoothL1Loss" 4 | bottom: "{bottom1}" 5 | bottom: "{bottom2}" 6 | top: "smoothl1{num}" 7 | loss_weight: {loss_weight} 8 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepVisualization 2 | 3 | Some interesting visualization demos using Caffe and Matlab. 4 | 5 | - FaceVis 6 | 7 | Visualize the face memorized by a face recognition model. 8 | 9 | - FilterVis 10 | 11 | Visualize the mid-level features learned by a CNN model. 12 | 13 | - Inceptionism 14 | 15 | Visualize the classifier neurons in a CNN model. 16 | 17 | - NNComplexity 18 | 19 | Show how complex the neural network can achieve, w.r.t. width, depth and structure. 20 | 21 | - NeuralArt 22 | 23 | Yet another re-implementation for paper 24 | > A Neural Algorthm of Artistic Style' by Leon Gatys, Alexander Ecker, and Matthias Bethge (http://arxiv.org/abs/1508.06576). 25 | 26 | - PrototxtGen 27 | 28 | Some modules used to generate a prototxt for Caffe. 29 | --------------------------------------------------------------------------------