├── .gitignore ├── License ├── README.md ├── cnnmrf.lua ├── data ├── content │ ├── 0.jpg │ ├── 1.jpg │ ├── 2.jpg │ └── potrait1.jpg ├── examples │ ├── 0_to_0.png │ ├── 1_to_1.png │ ├── Interpolation │ │ ├── 2_morecontent.png │ │ ├── 2_morecontent2.png │ │ ├── 3_balanced.png │ │ ├── 4_morestyle.png │ │ └── 4_morestyle2.png │ ├── MultiRes │ │ ├── syn_res_1.png │ │ ├── syn_res_2.png │ │ ├── syn_res_3.png │ │ └── syn_res_4.png │ ├── content.jpg │ ├── content2.jpg │ ├── style.jpg │ └── style2.jpg ├── models │ ├── _gitignore │ └── download_models.sh └── style │ ├── 0.jpg │ ├── 1.jpg │ ├── 2.jpg │ └── picasso.jpg ├── mylib ├── content.lua ├── helper.lua ├── mrf.lua ├── myoptimizer.lua ├── style.lua └── tv.lua ├── run_syn.lua ├── run_trans.lua ├── syn_CNNMRF_wrapper.lua └── transfer_CNNMRF_wrapper.lua /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2016 Chuan Li and Michael Wand 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNNMRF 2 | This is the torch implementation for paper "[Combining Markov Random Fields and Convolutional Neural Networks for Image Synthesis](http://arxiv.org/abs/1601.04589)" 3 | 4 | This algorithm is for 5 | * un-guided image synthesis (for example, classical texture synthesis) 6 | * guided image synthesis (for example, transfer the style between different images) 7 | 8 | # Hardware 9 | * For CUDA backend: choose 'speed' if your have at least 4GB graphic memory, and 'memory' otherwise. There is also an opencl backend (thanks to Dionýz Lazar). See "run_trans.lua" and "run_syn.lua" for our reference tests with Titan X, GT750M 2G and Sapphire Radeon R9 280 3G. 10 | 11 | 12 | # Examples 13 | * guided image synthesis 14 | 15 |

16 | 17 | 18 |

19 | 20 |

A photo (left) is transfered into a painting (right) using Picasso's self portrait 1907 (middle) as the reference style. Notice important facial features, such as eyes and nose, are faithfully kept as those in the Picasso's painting.

21 | 22 |

23 | 24 |

25 |

In this example, we first transfer a cartoon into a photo.

26 |

27 | 28 |

29 |

We then swap the two inputs and transfer the photo into the cartoon.

30 | 31 |

32 | 33 | 34 |

35 |

36 | 37 | 38 |

39 |

It is possible to balance the amount of content and the style in the result: pictures in the second coloumn take more content, and pictures in the third column take more style.

40 | 41 | # Setup 42 | 43 | As building Torch with the latest CUDA is a troublesome work, we recommend following the following steps to people who want to reproduce the results: 44 | It has been tested on Ubuntu with CUDA 10. 45 | 46 | __Step One: Install CUDA 10 and CUDNN 7.6.2__ 47 | 48 | If you have a fresh Ubuntu, we recommend [Lambda Stack](https://lambdalabs.com/lambda-stack-deep-learning-software) which helps you install the latest drivers, libraries, and frameworks for deep learning. Otherwise, you can install the CUDA toolkit and CUDNN from these links: 49 | * [CUDA](https://developer.nvidia.com/cuda-downloads) 50 | * [CUDNN](https://developer.nvidia.com/cudnn) 51 | 52 | __Step Two: Install Torch__ 53 | ``` 54 | git clone https://github.com/nagadomi/distro.git ~/torch --recursive 55 | cd ~/torch 56 | ./install-deps 57 | ./clean.sh 58 | ./update.sh 59 | 60 | . ~/torch/install/bin/torch-activate 61 | sudo apt-get install libprotobuf-dev protobuf-compiler 62 | luarocks install loadcaffe 63 | ``` 64 | 65 | __Step Three: Download Pre-trained VGG Network__ 66 | Pre-trained network: 67 | 68 | ``` 69 | cd data/models 70 | ./download_models.sh 71 | ``` 72 | 73 | # Un-guided Synthesis 74 | 75 | ``` 76 | qlua cnnmrf.lua 77 | ``` 78 | 79 | * Most important parameters are '-style_image' for specifying style input image and '-max_size' for resulting image size. 80 | * The content/style images are located in the folders "data/content" and "data/style" respectively. Notice by default the content image is the same as the style image; and the content image is only used for initalization (optional). 81 | * Results are located in the folder "data/result/freesyn/MRF" 82 | * All parameters are explained in "qlua cnnmrf.lua --help". 83 | 84 | # Guided Synthesis 85 | 86 | ```qlua run_trans.lua``` 87 | 88 | * Most important parameters are '-style_image' for specifying style input image, '-content_image' for specifying content input image and '-max_size' for resulting image size. 89 | * The content/style images are located in the folders "data/content" and "data/style" respectively. 90 | * Results are located in the folder "data/result/trans/MRF" 91 | * Parameters are defined & explained in "run_trans.lua". 92 | 93 | # Acknowledgement 94 | * This work is inspired and closely related to the paper: [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge. The key difference between their method and our method is the different "style" constraints: While Gatys et al used a global constraint for non-photorealistic synthesis, we use a local constraint which works for both non-photorealistic and photorealistic synthesis. See our paper for more details. 95 | * Our implementation is based on Justin Johnson's implementation of [Neural Style](https://github.com/jcjohnson/neural-style). 96 | 97 | 98 | -------------------------------------------------------------------------------- /cnnmrf.lua: -------------------------------------------------------------------------------- 1 | -- -*- coding: utf-8 -*- 2 | require 'torch' 3 | require 'paths' 4 | 5 | paths.dofile('mylib/helper.lua') 6 | 7 | --adapted from http://lua-users.org/wiki/SplitJoin 8 | function split(str, pat, cast_to_func) 9 | local t = {} -- NOTE: use {n = 0} in Lua-5.0 10 | local fpat = "(.-)" .. pat 11 | local last_end = 1 12 | local s, e, cap = str:find(fpat, 1) 13 | while s do 14 | if s ~= 1 or cap ~= "" then 15 | table.insert(t, cast_to_func(cap)) 16 | end 17 | last_end = e+1 18 | s, e, cap = str:find(fpat, last_end) 19 | end 20 | if last_end <= #str then 21 | cap = str:sub(last_end) 22 | table.insert(t, cast_to_func(cap)) 23 | end 24 | return t 25 | end 26 | 27 | ----------------------------------------- 28 | -- Parameters 29 | ----------------------------------------- 30 | 31 | cmd = torch.CmdLine() 32 | 33 | cmd:text('Below are all options with their default values in [].') 34 | cmd:text() 35 | cmd:text('Basic options: ') 36 | cmd:option('-content_name', 'potrait1', "The content image located in folder 'data/content'") 37 | cmd:option('-style_name', 'picasso', "The style image located in folder 'data/style'") 38 | cmd:option('-ini_method', 'image', "Initial method, set to 'image' to use the content image as the initialization; set to 'random' to use random noise.") 39 | cmd:option('-type', 'transfer', 'Use Guided Synthesis (transfer) or Un-guided Synthesis (syn)') 40 | cmd:option('-max_size',384, "Maximum size of the image. Larger image needs more time and memory.") 41 | cmd:option('-backend','cudnn', "Use cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL.") 42 | cmd:option('-mode','speed', "Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory. ") 43 | cmd:option('-num_res',3, "Number of resolutions. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize.") 44 | cmd:option('-num_iter','100,100,100', "Number of iterations for each resolution. You can use comma-separated values.") 45 | 46 | cmd:text() 47 | cmd:text('Advanced options: ') 48 | cmd:option('-mrf_layers','12,21', "The layers for MRF constraint. Usually layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost. You can use comma-separated values.") 49 | cmd:option('-mrf_weight','1e-4,1e-4', "Weight for each MRF layer. Higher weights leads to more style faithful results. You can use comma-separated values.") 50 | cmd:option('-mrf_patch_size', '3,3', "The patch size for MRF constraint. This value is defined seperately for each MRF layer. You can use comma-separated values.") 51 | cmd:option('-target_num_rotation',0, 'To matching objects of different poses. This value is shared by all MRF layers. The total number of rotational copies is "2 * mrf_num_rotation + 1"') 52 | cmd:option('-target_num_scale',0, 'To matching objects of different scales. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1"') 53 | cmd:option('-target_sample_stride','2,2', "Stride to sample mrf on style image. This value is defined seperately for each MRF layer. You can use comma-separated values.") 54 | cmd:option('-mrf_confidence_threshold','0,0', "Threshold for filtering out bad matching. Default value 0 means we keep all matchings. This value is defined seperately for all layers. You can use comma-separated values.") 55 | cmd:option('-source_sample_stride','2,2', "Stride to sample mrf on synthesis image. This value is defined seperately for each MRF layer. This settings is relevant only for syn setting. You can use comma-separated values.") 56 | 57 | cmd:option('-content_layers','21', "The layers for content constraint. You can use comma-separated values.") 58 | cmd:option('-content_weight',2e1, "The weight for content constraint. Increasing this value will make the result more content faithful. Decreasing the value will make the method more style faithful. Notice this value should be increase (for example, doubled) if layer 12 is included for MRF constraint.") 59 | cmd:option('-tv_weight',1e-3, "TV smoothness weight") 60 | cmd:option('-scaler', 2, "Relative expansion from example to result. This settings is relevant only for syn setting.") 61 | 62 | cmd:option('-gpu_chunck_size_1',256, "Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G.") 63 | cmd:option('-gpu_chunck_size_2',16, "Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G.") 64 | 65 | -- fixed parameters 66 | cmd:option('-target_step_rotation', math.pi/24) 67 | cmd:option('-target_step_scale', 1.05) 68 | cmd:option('-output_folder', 'data/result/trans/MRF/') 69 | 70 | cmd:option('-proto_file', 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt') 71 | cmd:option('-model_file', 'data/models/VGG_ILSVRC_19_layers.caffemodel') 72 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use') 73 | cmd:option('-nCorrection', 25) 74 | cmd:option('-print_iter', 10) 75 | cmd:option('-save_iter', 10) 76 | 77 | params = cmd:parse(arg) 78 | 79 | 80 | for _,par in pairs({'mrf_layers', 'mrf_weight', 'num_iter', 'mrf_patch_size', 'target_sample_stride', 'mrf_confidence_threshold', 'source_sample_stride', 'content_layers'}) do 81 | params[par] = split(params[par], ',', tonumber) 82 | end 83 | 84 | 85 | local wrapper = nil 86 | if params.type == 'transfer' then 87 | wrapper = require 'transfer_CNNMRF_wrapper' 88 | else 89 | wrapper = require 'syn_CNNMRF_wrapper' 90 | end 91 | 92 | wrapper.main(params) -------------------------------------------------------------------------------- /data/content/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/0.jpg -------------------------------------------------------------------------------- /data/content/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/1.jpg -------------------------------------------------------------------------------- /data/content/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/2.jpg -------------------------------------------------------------------------------- /data/content/potrait1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/potrait1.jpg -------------------------------------------------------------------------------- /data/examples/0_to_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/0_to_0.png -------------------------------------------------------------------------------- /data/examples/1_to_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/1_to_1.png -------------------------------------------------------------------------------- /data/examples/Interpolation/2_morecontent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/2_morecontent.png -------------------------------------------------------------------------------- /data/examples/Interpolation/2_morecontent2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/2_morecontent2.png -------------------------------------------------------------------------------- /data/examples/Interpolation/3_balanced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/3_balanced.png -------------------------------------------------------------------------------- /data/examples/Interpolation/4_morestyle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/4_morestyle.png -------------------------------------------------------------------------------- /data/examples/Interpolation/4_morestyle2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/4_morestyle2.png -------------------------------------------------------------------------------- /data/examples/MultiRes/syn_res_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_1.png -------------------------------------------------------------------------------- /data/examples/MultiRes/syn_res_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_2.png -------------------------------------------------------------------------------- /data/examples/MultiRes/syn_res_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_3.png -------------------------------------------------------------------------------- /data/examples/MultiRes/syn_res_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_4.png -------------------------------------------------------------------------------- /data/examples/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/content.jpg -------------------------------------------------------------------------------- /data/examples/content2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/content2.jpg -------------------------------------------------------------------------------- /data/examples/style.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/style.jpg -------------------------------------------------------------------------------- /data/examples/style2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/style2.jpg -------------------------------------------------------------------------------- /data/models/_gitignore: -------------------------------------------------------------------------------- 1 | VGG_ILSVRC_19_layers.caffemodel 2 | VGG_ILSVRC_19_layers_deploy.prototxt 3 | VGG_ILSVRC_19_layers_deploy.prototxt.lua 4 | -------------------------------------------------------------------------------- /data/models/download_models.sh: -------------------------------------------------------------------------------- 1 | cd models 2 | wget https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt 3 | wget --no-check-certificate https://bethgelab.org/media/uploads/deeptextures/vgg_normalised.caffemodel 4 | wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel 5 | cd .. 6 | -------------------------------------------------------------------------------- /data/style/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/0.jpg -------------------------------------------------------------------------------- /data/style/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/1.jpg -------------------------------------------------------------------------------- /data/style/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/2.jpg -------------------------------------------------------------------------------- /data/style/picasso.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/picasso.jpg -------------------------------------------------------------------------------- /mylib/content.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | -- ContentLoss 3 | ------------------------------------------------------------------------ 4 | local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module') 5 | function ContentLoss:__init(strength, target, normalize) 6 | parent.__init(self) 7 | self.strength = strength 8 | self.target = target 9 | self.normalize = normalize or false 10 | self.loss = 0 11 | self.crit = nn.MSECriterion() 12 | end 13 | function ContentLoss:updateOutput(input) 14 | if input:nElement() == self.target:nElement() then 15 | self.loss = self.crit:forward(input, self.target) * self.strength 16 | else 17 | -- print(input:size()) 18 | -- print(self.target:size()) 19 | -- print('WARNING: Skipping content loss') 20 | end 21 | self.output = input 22 | return self.output 23 | end 24 | function ContentLoss:updateGradInput(input, gradOutput) 25 | if input:nElement() == self.target:nElement() then 26 | self.gradInput = self.crit:backward(input, self.target) 27 | end 28 | if self.normalize then 29 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8) 30 | end 31 | self.gradInput:mul(self.strength) 32 | self.gradInput:add(gradOutput) 33 | return self.gradInput 34 | end 35 | 36 | function ContentLoss:update(other) 37 | self.strength = other.strength 38 | self.target = other.target 39 | self.normalize = other.normalize 40 | self.loss = other.loss 41 | self.crit = other.crit 42 | end -------------------------------------------------------------------------------- /mylib/helper.lua: -------------------------------------------------------------------------------- 1 | function computeMRF(input, size, stride, gpu, backend) 2 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride) 3 | local dim_1 = input:size()[1] * size * size 4 | local dim_2 = coord_y:nElement() 5 | local dim_3 = coord_x:nElement() 6 | local t_feature_mrf = torch.Tensor(dim_2 * dim_3, input:size()[1], size, size) 7 | 8 | if gpu >= 0 then 9 | if backend == 'cudnn' then 10 | t_feature_mrf = t_feature_mrf:cuda() 11 | else 12 | t_feature_mrf = t_feature_mrf:cl() 13 | end 14 | end 15 | local count = 1 16 | for i_row = 1, dim_2 do 17 | for i_col = 1, dim_3 do 18 | t_feature_mrf[count] = input[{{1, input:size()[1]}, {coord_y[i_row], coord_y[i_row] + size - 1}, {coord_x[i_col], coord_x[i_col] + size - 1}}] 19 | count = count + 1 20 | end 21 | end 22 | local feature_mrf = t_feature_mrf:resize(dim_2 * dim_3, dim_1) 23 | 24 | return t_feature_mrf, feature_mrf, coord_x, coord_y 25 | end 26 | 27 | 28 | function computeMRFnoTensor(input, size, stride, gpu, backend) 29 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride) 30 | local dim_1 = input:size()[1] * size * size 31 | local dim_2 = coord_y:nElement() 32 | local dim_3 = coord_x:nElement() 33 | local t_feature_mrf = torch.Tensor(dim_2 * dim_3, input:size()[1], size, size) 34 | 35 | if gpu >= 0 then 36 | if backend == 'cudnn' then 37 | t_feature_mrf = t_feature_mrf:cuda() 38 | else 39 | t_feature_mrf = t_feature_mrf:cl() 40 | end 41 | end 42 | local count = 1 43 | for i_row = 1, dim_2 do 44 | for i_col = 1, dim_3 do 45 | t_feature_mrf[count] = input[{{1, input:size()[1]}, {coord_y[i_row], coord_y[i_row] + size - 1}, {coord_x[i_col], coord_x[i_col] + size - 1}}] 46 | count = count + 1 47 | end 48 | end 49 | local feature_mrf = t_feature_mrf:resize(dim_2 * dim_3, dim_1) 50 | 51 | t_feature_mrf = nil 52 | collectgarbage() 53 | return feature_mrf, coord_x, coord_y 54 | end 55 | 56 | 57 | function drill_computeMRFfull(input, size, stride, gpu) 58 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride, 1) 59 | local dim = torch.Tensor(2) 60 | return coord_x, coord_y 61 | end 62 | 63 | 64 | function sampleMRFAndTensorfromLocation2(coord_x, coord_y, input, size, gpu) 65 | local t_feature_mrf = torch.Tensor(coord_x:nElement(), input:size()[1], size, size) 66 | for i_patch = 1, coord_x:nElement() do 67 | t_feature_mrf[i_patch] = input[{{1, input:size()[1]}, {coord_y[i_patch], coord_y[i_patch] + size - 1}, {coord_x[i_patch], coord_x[i_patch] + size - 1}}] 68 | end 69 | local feature_mrf = t_feature_mrf:reshape(coord_x:nElement(), input:size()[1] * size * size) 70 | return t_feature_mrf, feature_mrf 71 | end 72 | 73 | 74 | function computeBB(width, height, alpha) 75 | local min_x, min_y, max_x, max_y 76 | local x1 = 1 77 | local y1 = 1 78 | local x2 = width 79 | local y2 = 1 80 | local x3 = width 81 | local y3 = height 82 | local x4 = 1 83 | local y4 = height 84 | local x0 = width / 2 85 | local y0 = height / 2 86 | 87 | local x1r = x0+(x1-x0)*math.cos(alpha)+(y1-y0)*math.sin(alpha) 88 | local y1r = y0-(x1-x0)*math.sin(alpha)+(y1-y0)*math.cos(alpha) 89 | 90 | local x2r = x0+(x2-x0)*math.cos(alpha)+(y2-y0)*math.sin(alpha) 91 | local y2r = y0-(x2-x0)*math.sin(alpha)+(y2-y0)*math.cos(alpha) 92 | 93 | local x3r = x0+(x3-x0)*math.cos(alpha)+(y3-y0)*math.sin(alpha) 94 | local y3r = y0-(x3-x0)*math.sin(alpha)+(y3-y0)*math.cos(alpha) 95 | 96 | local x4r = x0+(x4-x0)*math.cos(alpha)+(y4-y0)*math.sin(alpha) 97 | local y4r = y0-(x4-x0)*math.sin(alpha)+(y4-y0)*math.cos(alpha) 98 | 99 | -- print(x1r .. ' ' .. y1r .. ' ' .. x2r .. ' ' .. y2r .. ' ' .. x3r .. ' ' .. y3r .. ' ' .. x4r .. ' ' .. y4r) 100 | if alpha > 0 then 101 | -- find intersection P of line [x1, y1]-[x4, y4] and [x1r, y1r]-[x2r, y2r] 102 | local px1 = ((x1 * y4 - y1 * x4) * (x1r - x2r) - (x1 - x4) * (x1r * y2r - y1r * x2r)) / ((x1 - x4) * (y1r - y2r) - (y1 - y4) * (x1r - x2r)) 103 | local py1 = ((x1 * y4 - y1 * x4) * (y1r - y2r) - (y1 - y4) * (x1r * y2r - y1r * x2r)) / ((x1 - x4) * (y1r - y2r) - (y1 - y4) * (x1r - x2r)) 104 | local px2 = px1 + 1 105 | local py2 = py1 106 | -- print(px1 .. ' ' .. py1) 107 | -- find the intersection Q of line [px1, py1]-[px2, py2] and [x2r, y2r]-[x3r][y3r] 108 | 109 | local qx = ((px1 * py2 - py1 * px2) * (x2r - x3r) - (px1 - px2) * (x2r * y3r - y2r * x3r)) / ((px1 - px2) * (y2r - y3r) - (py1 - py2) * (x2r - x3r)) 110 | local qy = ((px1 * py2 - py1 * px2) * (y2r - y3r) - (py1 - py2) * (x2r * y3r - y2r * x3r)) / ((px1 - px2) * (y2r - y3r) - (py1 - py2) * (x2r - x3r)) 111 | -- print(qx .. ' ' .. qy) 112 | 113 | min_x = width - qx 114 | min_y = qy 115 | max_x = qx 116 | max_y = height - qy 117 | else if alpha < 0 then 118 | -- find intersection P of line [x2, y2]-[x3, y3] and [x1r, y1r]-[x2r, y2r] 119 | local px1 = ((x2 * y3 - y2 * x3) * (x1r - x2r) - (x2 - x3) * (x1r * y2r - y1r * x2r)) / ((x2 - x3) * (y1r - y2r) - (y2 - y3) * (x1r - x2r)) 120 | local py1 = ((x2 * y3 - y1 * x3) * (y1r - y2r) - (y2 - y3) * (x1r * y2r - y1r * x2r)) / ((x2 - x3) * (y1r - y2r) - (y2 - y3) * (x1r - x2r)) 121 | local px2 = px1 - 1 122 | local py2 = py1 123 | -- find the intersection Q of line [px1, py1]-[px2, py2] and [x1r, y1r]-[x4r][y4r] 124 | local qx = ((px1 * py2 - py1 * px2) * (x1r - x4r) - (px1 - px2) * (x1r * y4r - y1r * x4r)) / ((px1 - px2) * (y1r - y4r) - (py1 - py2) * (x1r - x4r)) 125 | local qy = ((px1 * py2 - py1 * px2) * (y1r - y4r) - (py1 - py2) * (x1r * y4r - y1r * x4r)) / ((px1 - px2) * (y1r - y4r) - (py1 - py2) * (x1r - x4r)) 126 | min_x = qx 127 | min_y = qy 128 | max_x = width - min_x 129 | max_y = height - min_y 130 | else 131 | min_x = x1 132 | min_y = y1 133 | max_x = x2 134 | max_y = y3 135 | end 136 | end 137 | 138 | return math.max(math.floor(min_x), 1), math.max(math.floor(min_y), 1), math.floor(max_x), math.floor(max_y) 139 | end 140 | 141 | function computegrid(width, height, block_size, block_stride, flag_all) 142 | local coord_block_y = torch.range(1, height - block_size + 1, block_stride) 143 | if flag_all == 1 then 144 | if coord_block_y[#coord_block_y] < height - block_size + 1 then 145 | local tail = torch.Tensor(1) 146 | tail[1] = height - block_size + 1 147 | coord_block_y = torch.cat(coord_block_y, tail) 148 | end 149 | end 150 | local coord_block_x = torch.range(1, width - block_size + 1, block_stride) 151 | if flag_all == 1 then 152 | if coord_block_x[#coord_block_x] < width - block_size + 1 then 153 | local tail = torch.Tensor(1) 154 | tail[1] = width - block_size + 1 155 | coord_block_x = torch.cat(coord_block_x, tail) 156 | end 157 | end 158 | return coord_block_x, coord_block_y 159 | end 160 | 161 | function preprocess(img) 162 | local mean_pixel = torch.Tensor({103.939, 116.779, 123.68}) 163 | local perm = torch.LongTensor{3, 2, 1} 164 | img = img:index(1, perm):mul(256.0) 165 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img) 166 | img:add(-1, mean_pixel) 167 | return img 168 | end 169 | 170 | -- Undo the above preprocessing. 171 | function deprocess(img) 172 | local mean_pixel = torch.Tensor({103.939, 116.779, 123.68}) 173 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img) 174 | img = img + mean_pixel:float() 175 | local perm = torch.LongTensor{3, 2, 1} 176 | img = img:index(1, perm):div(256.0) 177 | return img 178 | end 179 | 180 | function run_tests(run_type, list_params) 181 | local wrapper = run_type 182 | for i_test = 1, #list_params do 183 | wrapper.run_test(table.unpack(list_params[i_test])) 184 | end 185 | end -------------------------------------------------------------------------------- /mylib/mrf.lua: -------------------------------------------------------------------------------- 1 | local MRFMM, parent = torch.class('nn.MRFMM', 'nn.Module') 2 | 3 | function MRFMM:__init() 4 | parent.__init(self) 5 | end 6 | 7 | function MRFMM:implement(mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, dW, dH, threshold_conf, strength, gpu_chunck_size_1, gpu_chunck_size_2, backend, gpu) 8 | self.target_mrf = target_mrf:clone() 9 | self.target_mrfnorm = target_mrfnorm:clone() 10 | self.source_x = source_x 11 | self.source_y = source_y 12 | self.input_size = input_size 13 | self.nInputPlane = nInputPlane 14 | self.nOutputPlane = nOutputPlane 15 | self.kW = kW 16 | self.kH = kH 17 | self.dW = dW 18 | self.dH = dH 19 | self.threshold_conf = threshold_conf 20 | self.strength = strength 21 | self.padW = padW or 0 22 | self.padH = padH or self.padW 23 | self.bias = torch.Tensor(nOutputPlane):fill(0) 24 | self.backend = backend 25 | self.gpu = gpu 26 | if self.gpu >= 0 then 27 | if self.backend == 'cudnn' then 28 | self.bias = self.bias:cuda() 29 | else 30 | self.bias = self.bias:cl() 31 | end 32 | end 33 | self.gradTO = torch.Tensor(input_size[1], input_size[2], input_size[3]) 34 | self.gradTO_confident = torch.Tensor(input_size[2], input_size[3]) 35 | self.response = torch.Tensor(response_size[1], response_size[2], response_size[3]) 36 | self.mode = mode -- memory or speed 37 | self.gpu_chunck_size_1 = gpu_chunck_size_1 38 | self.gpu_chunck_size_2 = gpu_chunck_size_2 39 | self.tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm, 1, self.gpu_chunck_size_2, input_size[3] - (kW - 1)) 40 | 41 | if self.mode == 'speed' then 42 | if self.backend == 'cudnn' then 43 | self.target_mrf = self.target_mrf:cuda() 44 | self.target_mrfnorm = self.target_mrfnorm:cuda() 45 | self.tensor_target_mrfnorm = self.tensor_target_mrfnorm:cuda() 46 | self.gradTO = self.gradTO:cuda() 47 | self.gradTO_confident = self.gradTO_confident:cuda() 48 | self.response = self.response:cuda() 49 | else 50 | self.target_mrf = self.target_mrf:cl() 51 | self.target_mrfnorm = self.target_mrfnorm:cl() 52 | self.tensor_target_mrfnorm = self.tensor_target_mrfnorm:cl() 53 | self.gradTO = self.gradTO:cl() 54 | self.gradTO_confident = self.gradTO_confident:cl() 55 | self.response = self.response:cl() 56 | end 57 | end 58 | 59 | --[[print('***********************************') 60 | print('mrf layer: ') 61 | print('***********************************') 62 | print(self.target_mrf:size()) 63 | print(self.tensor_target_mrf:size()) 64 | print(self.tensor_target_mrfnorm:size()) 65 | print(self.source_x) 66 | print(self.source_y) 67 | print(self.nInputPlane) 68 | print(self.nOutputPlane) 69 | print(self.kW) 70 | print(self.kH) 71 | print(self.strength) 72 | print(self.mode)--]] 73 | end 74 | 75 | 76 | local function makeContiguous(self, input, gradOutput) 77 | if not input:isContiguous() then 78 | print('not contiguous, make it so') 79 | self._input = self._input or input.new() 80 | self._input:resizeAs(input):copy(input) 81 | input = self._input 82 | end 83 | if gradOutput then 84 | if not gradOutput:isContiguous() then 85 | self._gradOutput = self._gradOutput or gradOutput.new() 86 | self._gradOutput:resizeAs(gradOutput):copy(gradOutput) 87 | gradOutput = self._gradOutput 88 | end 89 | end 90 | return input, gradOutput 91 | end 92 | 93 | function MRFMM:updateOutput(input) 94 | input = makeContiguous(self, input) 95 | self.output = input:clone() 96 | return self.output 97 | end 98 | 99 | function MRFMM:updateGradInput(input, gradOutput) 100 | 101 | -- local timer_ALL = torch.Timer() 102 | 103 | -- local timer_PREP = torch.Timer() 104 | input = makeContiguous(self, input) 105 | self.gradTO = self.gradTO:fill(0) 106 | self.gradTO_confident = self.gradTO_confident:fill(0) + 1e-10 107 | local source_mrf, x, y = computeMRFnoTensor(input:float(), self.kW, 1, self.mode == 'memory' and -1 or 1, self.backend) 108 | local source_mrfnorm = torch.Tensor(source_mrf:size()[1]) 109 | if self.mode == 'speed' then 110 | if self.backend == 'cudnn' then 111 | source_mrfnorm = torch.sqrt(torch.sum(torch.cmul(source_mrf, source_mrf), 2)):resize(1, y:nElement(), x:nElement()) 112 | else 113 | for i_source = 1, source_mrf:size()[1] do 114 | source_mrfnorm[i_source] = torch.sqrt(torch.sum(torch.cmul(source_mrf[i_source], source_mrf[i_source]))) 115 | end 116 | source_mrfnorm = source_mrfnorm:resize(1, y:nElement(), x:nElement()) 117 | end 118 | else 119 | source_mrfnorm = torch.sqrt(torch.sum(torch.cmul(source_mrf, source_mrf), 2)):resize(1, y:nElement(), x:nElement()) 120 | end 121 | local tensor_source_mrfnorm = torch.repeatTensor(source_mrfnorm, self.gpu_chunck_size_1, 1, 1) 122 | if self.gpu >= 0 then 123 | if self.backend == 'cudnn' then 124 | tensor_source_mrfnorm = tensor_source_mrfnorm:cuda() 125 | else 126 | tensor_source_mrfnorm = tensor_source_mrfnorm:cl() 127 | end 128 | end 129 | local nOutputPlane_all = self.nOutputPlane -- hacked for memory safety 130 | local num_chunk = math.ceil(nOutputPlane_all / self.gpu_chunck_size_1) 131 | -- local t_prep = timer_PREP:time().real 132 | 133 | -- local timer_MATCH = torch.Timer() 134 | -- local t_io = 0 135 | -- local t_conv = 0 136 | -- local t_clone = 0 137 | for i_chunk = 1, num_chunk do 138 | local i_start = (i_chunk - 1) * self.gpu_chunck_size_1 + 1 139 | local i_end = math.min(i_start + self.gpu_chunck_size_1 - 1, nOutputPlane_all) 140 | 141 | -- local timer_CLONE = torch.Timer() 142 | self.weight = self.target_mrf[{{i_start, i_end}, {1, self.target_mrf:size()[2]}}] 143 | -- t_clone = t_clone + timer_CLONE:time().real 144 | 145 | if self.mode == 'memory' then 146 | -- local timer_IO = torch.Timer() 147 | if self.gpu >= 0 then 148 | if self.backend == 'cudnn' then 149 | self.weight = self.weight:cuda() 150 | else 151 | self.weight = self.weight:cl() 152 | end 153 | end 154 | -- t_io = t_io + timer_IO:time().real 155 | end 156 | self.nOutputPlane = i_end - i_start + 1 157 | 158 | -- local timer_CONV = torch.Timer() 159 | --local temp = input.nn.SpatialConvolutionMM_updateOutput(self, input) 160 | -- t_conv = t_conv + timer_CONV:time().real 161 | local subBias = self.bias:sub(i_start, i_end) 162 | if self.gpu < 0 then 163 | self.finput = torch.Tensor() 164 | self.fgradInput = torch.Tensor() 165 | end 166 | 167 | input.THNN.SpatialConvolutionMM_updateOutput( 168 | input:cdata(), 169 | self.output:cdata(), 170 | self.weight:cdata(), 171 | subBias:cdata(), 172 | self.finput:cdata(), 173 | self.fgradInput:cdata(), 174 | self.kW, self.kH, 175 | self.dW, self.dH, 176 | self.padW, self.padH 177 | ) 178 | local temp = self.output 179 | 180 | -- normalize w.r.t source_mrfnorm 181 | if i_chunk < num_chunk then 182 | temp = temp:cdiv(tensor_source_mrfnorm) 183 | else 184 | temp = temp:cdiv(tensor_source_mrfnorm[{{1, i_end - i_start + 1}, {1, temp:size()[2]}, {1, temp:size()[3]}}]) 185 | end 186 | 187 | if self.mode == 'memory' then 188 | -- local timer_IO = torch.Timer() 189 | temp = temp:float() 190 | -- t_io = t_io + timer_IO:time().real 191 | end 192 | self.response[{{i_start, i_end}, {1, self.response:size()[2]}, {1, self.response:size()[3]}}] = temp 193 | end 194 | 195 | local num_chunk_2 = math.ceil(self.response:size()[2] / self.gpu_chunck_size_2) 196 | for i_chunk_2 = 1, num_chunk_2 do 197 | local i_start = (i_chunk_2 - 1) * self.gpu_chunck_size_2 + 1 198 | local i_end = math.min(i_start + self.gpu_chunck_size_2 - 1, self.response:size()[2]) 199 | if i_chunk_2 < num_chunk_2 then 200 | self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}] = self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}]:cdiv(self.tensor_target_mrfnorm) 201 | else 202 | self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}] = self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}]:cdiv(self.tensor_target_mrfnorm[{{1, self.response:size()[1]}, {1, i_end - i_start + 1}, {1, self.response:size()[3]}}]) 203 | end 204 | end 205 | 206 | -- local timer_AFT = torch.Timer() 207 | local max_response, max_id = torch.max(self.response, 1) 208 | -- local t_aft = timer_AFT:time().real 209 | 210 | -- local t_match = timer_MATCH:time().real 211 | 212 | -- local timer_SYN = torch.Timer() 213 | source_mrf = source_mrf:resize(source_mrf:size()[1], self.nInputPlane, self.kW, self.kH) 214 | self.target_mrf = self.target_mrf:resize(self.target_mrf:size()[1], self.nInputPlane, self.kW, self.kH) 215 | for i_patch = 1, self.source_x:nElement() do 216 | local sel_response = max_response[1][self.source_y[i_patch]][self.source_x[i_patch]] 217 | if sel_response >= self.threshold_conf then 218 | local sel_idx = max_id[1][self.source_y[i_patch]][self.source_x[i_patch]] 219 | local source_idx = (self.source_y[i_patch] - 1) * x:nElement() + self.source_x[i_patch] 220 | self.gradTO[{{1, self.nInputPlane}, {self.source_y[i_patch], self.source_y[i_patch] + self.kH - 1}, {self.source_x[i_patch], self.source_x[i_patch] + self.kW - 1}}]:add(self.target_mrf[sel_idx] - source_mrf[source_idx]) 221 | self.gradTO_confident[{{self.source_y[i_patch], self.source_y[i_patch] + self.kH - 1}, {self.source_x[i_patch], self.source_x[i_patch] + self.kW - 1}}]:add(1) 222 | end 223 | end 224 | self.gradTO:cdiv(torch.repeatTensor(self.gradTO_confident, self.nInputPlane, 1, 1)) 225 | self.nOutputPlane = nOutputPlane_all 226 | self.target_mrf = self.target_mrf:resize(self.target_mrf:size()[1], self.nInputPlane * self.kW * self.kH) 227 | -- local t_syn = timer_SYN:time().real 228 | 229 | if gradOutput:size()[1] == input:size()[1] then 230 | if self.gpu >= 0 then 231 | if self.backend == 'cudnn' then 232 | self.gradInput = gradOutput:clone() + self.gradTO:cuda() * self.strength * (-1) 233 | else 234 | self.gradInput = gradOutput:clone() + self.gradTO:cl() * self.strength * (-1) 235 | end 236 | else 237 | self.gradInput = gradOutput:clone() + self.gradTO * self.strength * (-1) 238 | end 239 | else 240 | self.gradInput = self.gradTO * self.strength * (-1) 241 | end 242 | 243 | -- local t_all = timer_ALL:time().real 244 | -- print('t_all: ' .. t_all .. ', t_prep: ' .. t_prep .. ', t_match: ' .. t_match .. ', t_io: ' .. t_io .. ', t_conv: ' .. t_conv .. ', t_aft: ' .. t_aft .. ', t_syn: ' .. t_syn) 245 | -- print('t_all: ' .. t_all .. ', t_prep: ' .. t_prep/t_all .. ', t_match: ' .. t_match/t_all .. ', t_io: ' .. t_io/t_all .. ', t_conv: ' .. t_conv/t_all .. ', t_aft: ' .. t_aft/t_all .. ', t_syn: ' .. t_syn/t_all) 246 | -- print('**************************************************************************************************') 247 | -- print('t_all: ' .. t_all .. ', t_clone: ' .. t_clone/t_match .. ', t_io: ' .. t_io/t_match .. ', t_conv: ' .. t_conv/t_match .. ', t_aft: ' .. t_aft/t_match) 248 | -- print('t_all: ' .. t_all .. ', t_clone: ' .. t_clone .. ', t_io: ' .. t_io .. ', t_conv: ' .. t_conv .. ', t_aft: ' .. t_aft) 249 | -- tensor_source_mrf = nil 250 | source_mrf = nil 251 | source_mrfnorm = nil 252 | tensor_source_mrfnorm = nil 253 | collectgarbage() 254 | return self.gradInput 255 | end 256 | 257 | function MRFMM:type(type) 258 | self.finput = torch.Tensor() 259 | self.fgradInput = torch.Tensor() 260 | return parent.type(self,type) 261 | end 262 | -------------------------------------------------------------------------------- /mylib/myoptimizer.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | -- mylbfgs 3 | ------------------------------------------------------------------------ 4 | function mylbfgs(opfunc, x, config, state, mask) 5 | -- get/update state 6 | local config = config or {} 7 | local state = state or config 8 | local maxIter = tonumber(config.maxIter) or 20 9 | local maxEval = tonumber(config.maxEval) or maxIter*1.25 10 | local tolFun = config.tolFun or 1e-5 11 | local tolX = config.tolX or 1e-9 12 | local nCorrection = config.nCorrection or 100 13 | local lineSearch = config.lineSearch 14 | local lineSearchOpts = config.lineSearchOptions 15 | local learningRate = config.learningRate or 1 16 | local isverbose = config.verbose or false 17 | 18 | 19 | state.funcEval = state.funcEval or 0 20 | state.nIter = state.nIter or 0 21 | -- verbose function 22 | local function verbose(...) 23 | if isverbose then print(' ', ...) end 24 | end 25 | 26 | -- import some functions 27 | local zeros = torch.zeros 28 | local randn = torch.randn 29 | local append = table.insert 30 | local abs = math.abs 31 | local min = math.min 32 | 33 | -- evaluate initial f(x) and df/dx 34 | local f,g = opfunc(x) 35 | g:cmul(mask) -- add by chris 36 | local f_hist = {f} 37 | local currentFuncEval = 1 38 | state.funcEval = state.funcEval + 1 39 | 40 | -- check optimality of initial point 41 | state.tmp1 = state.abs_g or zeros(g:size()); local tmp1 = state.tmp1 42 | tmp1:copy(g):abs() 43 | if tmp1:sum() <= tolFun then 44 | -- optimality condition below tolFun 45 | verbose('optimality condition below tolFun') 46 | return x,f_hist 47 | end 48 | 49 | -- variables cached in state (for tracing) 50 | local d = state.d 51 | local t = state.t 52 | local old_dirs = state.old_dirs 53 | local old_stps = state.old_stps 54 | local Hdiag = state.Hdiag 55 | local g_old = state.g_old 56 | local f_old = state.f_old 57 | 58 | -- optimize for a max of maxIter iterations 59 | local nIter = 0 60 | while nIter < maxIter do 61 | -- keep track of nb of iterations 62 | nIter = nIter + 1 63 | state.nIter = state.nIter + 1 64 | -- print(state.nIter) 65 | ------------------------------------------------------------ 66 | -- compute gradient descent direction 67 | ------------------------------------------------------------ 68 | if state.nIter == 1 then 69 | d = g:clone():mul(-1) -- -g 70 | old_dirs = {} 71 | old_stps = {} 72 | Hdiag = 1 73 | else 74 | -- do lbfgs update (update memory) 75 | local y = g:clone():add(-1, g_old) -- g - g_old 76 | local s = d:clone():mul(t) -- d*t 77 | local ys = y:dot(s) -- y*s 78 | 79 | if ys > 1e-10 then 80 | -- updating memory 81 | if #old_dirs == nCorrection then 82 | -- shift history by one (limited-memory) 83 | local prev_old_dirs = old_dirs 84 | local prev_old_stps = old_stps 85 | old_dirs = {} 86 | old_stps = {} 87 | for i = 2,#prev_old_dirs do 88 | append(old_dirs, prev_old_dirs[i]) 89 | append(old_stps, prev_old_stps[i]) 90 | end 91 | end 92 | 93 | -- store new direction/step 94 | append(old_dirs, s) 95 | append(old_stps, y) 96 | 97 | -- update scale of initial Hessian approximation 98 | Hdiag = ys / y:dot(y) -- (y*y) 99 | 100 | -- cleanup 101 | collectgarbage() 102 | end 103 | 104 | -- compute the approximate (L-BFGS) inverse Hessian 105 | -- multiplied by the gradient 106 | local p = g:size(1) 107 | local k = #old_dirs 108 | 109 | state.ro = state.ro or zeros(nCorrection); local ro = state.ro 110 | for i = 1,k do 111 | ro[i] = 1 / old_stps[i]:dot(old_dirs[i]) 112 | end 113 | 114 | state.q = state.q or zeros(nCorrection+1,p):typeAs(g) 115 | local q = state.q 116 | state.r = state.r or zeros(nCorrection+1,p):typeAs(g) 117 | local r = state.r 118 | state.al = state.al or zeros(nCorrection):typeAs(g) 119 | local al = state.al 120 | state.be = state.be or zeros(nCorrection):typeAs(g) 121 | local be = state.be 122 | 123 | q[k+1] = g:clone():mul(-1) -- -g 124 | 125 | for i = k,1,-1 do 126 | al[i] = old_dirs[i]:dot(q[i+1]) * ro[i] 127 | q[i] = q[i+1] 128 | q[i]:add(-al[i], old_stps[i]) 129 | end 130 | 131 | -- multiply by initial Hessian 132 | r[1] = q[1]:clone():mul(Hdiag) -- q[1] * Hdiag 133 | 134 | for i = 1,k do 135 | be[i] = old_stps[i]:dot(r[i]) * ro[i] 136 | r[i+1] = r[i] 137 | r[i+1]:add((al[i] - be[i]), old_dirs[i]) 138 | end 139 | 140 | -- final direction: 141 | d:copy(r[k+1]) 142 | end -- end if state.nIter == 1 then 143 | 144 | g_old = g:clone() 145 | f_old = f 146 | 147 | ------------------------------------------------------------ 148 | -- compute step length 149 | ------------------------------------------------------------ 150 | -- directional derivative 151 | local gtd = g:dot(d) -- g * d 152 | 153 | -- check that progress can be made along that direction 154 | if gtd > -tolX then 155 | break 156 | end 157 | 158 | -- reset initial guess for step size 159 | if state.nIter == 1 then 160 | tmp1:copy(g):abs() 161 | t = min(1,1/tmp1:sum()) * learningRate 162 | else 163 | t = learningRate 164 | end 165 | 166 | -- optional line search: user function 167 | local lsFuncEval = 0 168 | if lineSearch and type(lineSearch) == 'function' then 169 | -- perform line search, using user function 170 | f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,lineSearchOpts) 171 | append(f_hist, f) 172 | else 173 | -- no line search, simply move with fixed-step 174 | x:add(t,d) 175 | if nIter ~= maxIter then 176 | -- re-evaluate function only if not in last iteration 177 | -- the reason we do this: in a stochastic setting, 178 | -- no use to re-evaluate that function here 179 | f,g = opfunc(x) 180 | g:cmul(mask) -- add by chris 181 | lsFuncEval = 1 182 | append(f_hist, f) 183 | end 184 | end 185 | 186 | -- update func eval 187 | currentFuncEval = currentFuncEval + lsFuncEval 188 | state.funcEval = state.funcEval + lsFuncEval 189 | 190 | ------------------------------------------------------------ 191 | -- check conditions 192 | ------------------------------------------------------------ 193 | if nIter == maxIter then 194 | -- no use to run tests 195 | verbose('reached max number of iterations') 196 | break 197 | end 198 | 199 | if currentFuncEval >= maxEval then 200 | -- max nb of function evals 201 | verbose('max nb of function evals') 202 | break 203 | end 204 | 205 | tmp1:copy(g):abs() 206 | if tmp1:sum() <= tolFun then 207 | -- check optimality 208 | verbose('optimality condition below tolFun') 209 | break 210 | end 211 | 212 | tmp1:copy(d):mul(t):abs() 213 | if tmp1:sum() <= tolX then 214 | -- step size below tolX 215 | verbose('step size below tolX') 216 | break 217 | end 218 | 219 | if abs(f-f_old) < tolX then 220 | -- function value changing less than tolX 221 | verbose('function value changing less than tolX') 222 | break 223 | end 224 | end -- end while nIter < maxIter do 225 | 226 | -- save state 227 | state.old_dirs = old_dirs 228 | state.old_stps = old_stps 229 | state.Hdiag = Hdiag 230 | state.g_old = g_old 231 | state.f_old = f_old 232 | state.t = t 233 | state.d = d 234 | 235 | -- return optimal x, and history of f(x) 236 | return x,f_hist,currentFuncEval 237 | end -------------------------------------------------------------------------------- /mylib/style.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | -- StyleLoss 3 | ------------------------------------------------------------------------ 4 | -- Returns a network that computes the CxC Gram matrix from inputs 5 | -- of size C x H x W 6 | function GramMatrix() 7 | local net = nn.Sequential() 8 | net:add(nn.View(-1):setNumInputDims(2)) 9 | local concat = nn.ConcatTable() 10 | concat:add(nn.Identity()) 11 | concat:add(nn.Identity()) 12 | net:add(concat) 13 | net:add(nn.MM(false, true)) 14 | return net 15 | end 16 | 17 | local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module') 18 | 19 | function StyleLoss:__init(strength, target, normalize) 20 | parent.__init(self) 21 | self.normalize = normalize or false 22 | self.strength = strength 23 | self.target = target 24 | self.loss = 0 25 | 26 | self.gram = GramMatrix() 27 | self.G = nil 28 | self.crit = nn.MSECriterion() 29 | end 30 | 31 | function StyleLoss:updateOutput(input) 32 | self.G = self.gram:forward(input) 33 | self.G:div(input:nElement()) 34 | self.loss = self.crit:forward(self.G, self.target) 35 | self.loss = self.loss * self.strength 36 | self.output = input 37 | return self.output 38 | end 39 | 40 | function StyleLoss:updateGradInput(input, gradOutput) 41 | local dG = self.crit:backward(self.G, self.target) 42 | dG:div(input:nElement()) 43 | self.gradInput = self.gram:backward(input, dG) 44 | if self.normalize then 45 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8) 46 | end 47 | self.gradInput:mul(self.strength) 48 | self.gradInput:add(gradOutput) 49 | return self.gradInput 50 | end -------------------------------------------------------------------------------- /mylib/tv.lua: -------------------------------------------------------------------------------- 1 | local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module') 2 | 3 | function TVLoss:__init(strength) 4 | parent.__init(self) 5 | self.strength = strength 6 | self.x_diff = torch.Tensor() 7 | self.y_diff = torch.Tensor() 8 | end 9 | 10 | ------------------------------------------------------------------------ 11 | -- TVLoss 12 | ------------------------------------------------------------------------ 13 | function TVLoss:updateOutput(input) 14 | self.output = input 15 | return self.output 16 | end 17 | 18 | -- TV loss backward pass inspired by kaishengtai/neuralart 19 | function TVLoss:updateGradInput(input, gradOutput) 20 | self.gradInput:resizeAs(input):zero() 21 | local C, H, W = input:size(1), input:size(2), input:size(3) 22 | self.x_diff:resize(3, H - 1, W - 1) 23 | self.y_diff:resize(3, H - 1, W - 1) 24 | self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}]) 25 | self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}]) 26 | self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}]) 27 | self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}]) 28 | self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff) 29 | self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff) 30 | self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff) 31 | self.gradInput:mul(self.strength) 32 | self.gradInput:add(gradOutput) 33 | return self.gradInput 34 | end -------------------------------------------------------------------------------- /run_syn.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('mylib/helper.lua') 3 | 4 | ----------------------------------------- 5 | -- Parameters: 6 | ----------------------------------------- 7 | -- content_name: the content image located in folder "data/content". Notice for free synthesis this image is only used for initialization (and only when "ini_method" is set to "image") 8 | -- style_name: the style image located in folder "data/style" 9 | -- ini_method: initial method, set to "image" to use the content image as the initialization; set to "random" to use random noise (of the same size as the content image). 10 | -- max_size: maximum size of the synthesis image. Default value 384. Larger image needs more time and memory. 11 | -- scaler: relative expansion from example to result. Default value 2. 12 | -- num_res: number of resolutions. Default value 3. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize. 13 | -- num_iter: number of iterations for each resolution. Default value 100 for all resolutions. 14 | 15 | -- mrf_layers: the layers for MRF constraint. Usualy layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost. 16 | -- mrf_weight: weight for each MRF layer. Default value 1e-4. For free texture synthesis it can be seen as the "learning rate" in gradient decent. 17 | -- mrf_patch_size: the patch size for MRF constraint. Default value 3. This value is defined seperately for each MRF layer. 18 | -- mrf_num_rotation: To matching objects of different poses. Default value 0. This value is shared by all MRF layers. The total number of rotatoinal copies is "2 * mrf_num_rotation + 1" 19 | -- mrf_num_scale: To matching objects of different scales. Default value 0. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1" 20 | -- mrf_sample_stride: stride to sample mrf on style image. Default value 2. This value is defined seperately for each MRF layer. 21 | -- mrf_synthesis_stride: stride to sample mrf on synthesis image. Default value 2. This value is defined seperately for each MRF layer. 22 | -- mrf_confidence_threshold: threshold for filtering out bad matching. Default value 0 -- means we keep all matchings. This value is defined seperately for all layers. 23 | 24 | -- tv_weight: TV smoothness weight. Default value 1e-3. 25 | 26 | -- mode: speed or memory. Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory. 27 | -- gpu_chunck_size_1: Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G. 28 | -- gpu_chunck_size_2: Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G. 29 | -- backend: Use 'cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL. 30 | 31 | ----------------------------------------- 32 | -- Reference tests 33 | ----------------------------------------- 34 | -- speed mode V.S. memory mode (Titan X 12G) 35 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 131 seconds 36 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'} -- 172 seconds 37 | 38 | -- speed mode V.S. memory mode (Geforce GT750M 2G) 39 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 552 seconds (gpu streching, not recommended) 40 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 1506 seconds 41 | 42 | -- speed mode V.S. memory mode (Sapphire Radeon R9 280 3G) 43 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'clnn'}, -- 193 seconds (240 seconds total) 44 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'clnn'}, -- 175 seconds (216 seconds total) 45 | local list_params = { 46 | {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'}, 47 | } 48 | 49 | run_tests(require 'syn_CNNMRF_wrapper', list_params) -------------------------------------------------------------------------------- /run_trans.lua: -------------------------------------------------------------------------------- 1 | require 'paths' 2 | paths.dofile('mylib/helper.lua') 3 | 4 | ----------------------------------------- 5 | -- Parameters: 6 | ----------------------------------------- 7 | -- content_name: the content image located in folder "data/content" 8 | -- style_name: the style image located in folder "data/style" 9 | -- ini_method: initial method, set to "image" to use the content image as the initialization; set to "random" to use random noise. 10 | -- max_size: maximum size of the synthesis image. Default value 384. Larger image needs more time and memory. 11 | -- num_res: number of resolutions. Default value 3. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize. 12 | -- num_iter: number of iterations for each resolution. Default value 100 for all resolutions. 13 | 14 | -- mrf_layers: the layers for MRF constraint. Usualy layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost. 15 | -- mrf_weight: weight for each MRF layer. Default value 1e-4. Higher weights leads to more style faithful results. 16 | -- mrf_patch_size: the patch size for MRF constraint. Default value 3. This value is defined seperately for each MRF layer. 17 | -- mrf_num_rotation: To matching objects of different poses. Default value 0. This value is shared by all MRF layers. The total number of rotatoinal copies is "2 * mrf_num_rotation + 1" 18 | -- mrf_num_scale: To matching objects of different scales. Default value 0. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1" 19 | -- mrf_sample_stride: stride to sample mrf on style image. Default value 2. This value is defined seperately for each MRF layer. 20 | -- mrf_synthesis_stride: stride to sample mrf on synthesis image. Default value 2. This value is defined seperately for each MRF layer. 21 | -- mrf_confidence_threshold: threshold for filtering out bad matching. Default value 0 -- means we keep all matchings. This value is defined seperately for all layers. 22 | 23 | -- content_layers: the layers for content constraint. Default value 23. 24 | -- content_weight: The weight for content constraint. Default value 2e1. Increasing this value will make the result more content faithful. Decreasing the value will make the method more style faithful. Notice this value should be increase (for example, doubled) if layer 12 is included for MRF constraint, 25 | 26 | -- tv_weight: TV smoothness weight. Default value 1e-3. 27 | 28 | -- mode: speed or memory. Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory. 29 | -- gpu_chunck_size_1: Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G. 30 | -- gpu_chunck_size_2: Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G. 31 | -- backend: Use 'cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL. 32 | 33 | ----------------------------------------- 34 | -- Reference tests 35 | ----------------------------------------- 36 | -- speed mode V.S. memory mode (Titan X 12G) 37 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 101 seconds 38 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 283 seconds 39 | 40 | -- speed mode V.S. memory mode (Geforce GT750M 2G) 41 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 570 seconds (gpu streching, not recommended) 42 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 973 seconds 43 | 44 | -- speed mode V.S. memory mode (Sapphire Radeon R9 280 3G) 45 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'clnn'}, -- 301 seconds (346 seconds total) 46 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'clnn'}, -- 6500 seconds (7032 seconds total) 47 | 48 | -- style interpolation (high resolution with Titan X 12G): 49 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- balanced 50 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 4e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- more content 51 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- more style 52 | 53 | -- style interpolation (low resolution with Geforce GT750M 2G): 54 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- balanced 55 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 4e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- more content 56 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- more style 57 | 58 | -- other 59 | -- {'0', '0', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- Titan X 12G: 145 seconds 60 | -- {'1', '1', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- Titan X 12G: 146 seconds 61 | -- {'0', '0', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {3, 3}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- Geforce GT750M 2G: 593 seconds 62 | -- {'1', '1', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {3, 3}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- Geforce GT750M 2G: 623 seconds 63 | 64 | 65 | local list_params = { 66 | {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, 67 | {'0', '0', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, 68 | {'1', '1', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 256, 16, 'cudnn'}, 69 | {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'clnn'}, 70 | } 71 | 72 | run_tests(require 'transfer_CNNMRF_wrapper', list_params) -------------------------------------------------------------------------------- /syn_CNNMRF_wrapper.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'image' 4 | require 'paths' 5 | require 'loadcaffe' 6 | 7 | paths.dofile('mylib/myoptimizer.lua') 8 | paths.dofile('mylib/tv.lua') 9 | paths.dofile('mylib/mrf.lua') 10 | paths.dofile('mylib/helper.lua') 11 | 12 | torch.setdefaulttensortype('torch.FloatTensor') -- float as default tensor type 13 | 14 | local function main(params) 15 | os.execute('mkdir data/result/') 16 | os.execute('mkdir data/result/freesyn/') 17 | os.execute('mkdir data/result/freesyn/MRF/') 18 | os.execute(string.format('mkdir %s', params.output_folder)) 19 | 20 | local net = nn.Sequential() 21 | local i_net_layer = 0 22 | local num_calls = 0 23 | local next_mrf_idx = 1 24 | local mrf_losses = {} 25 | local mrf_layers = {} 26 | local i_mrf_layer = 0 27 | local input_image 28 | local output_image 29 | local cur_res 30 | local mrf_layers_pretrained = params.mrf_layers 31 | 32 | ----------------------------------------------------------------------------------- 33 | -- read images 34 | ----------------------------------------------------------------------------------- 35 | local source_image = image.load(string.format('data/content/%s.jpg', params.content_name), 3) 36 | local target_image = image.load(string.format('data/style/%s.jpg', params.style_name), 3) 37 | 38 | source_image = image.scale(source_image, params.max_size, 'bilinear') 39 | target_image = image.scale(target_image, math.floor(params.max_size / params.scaler), 'bilinear') 40 | 41 | local render_height = source_image:size()[2] 42 | local render_width = source_image:size()[3] 43 | local source_image_caffe = preprocess(source_image):float() 44 | local target_image_caffe = preprocess(target_image):float() 45 | 46 | local pyramid_source_image_caffe = {} 47 | for i_res = 1, params.num_res do 48 | pyramid_source_image_caffe[i_res] = image.scale(source_image_caffe, math.ceil(source_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(source_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear') 49 | end 50 | 51 | local pyramid_target_image_caffe = {} 52 | for i_res = 1, params.num_res do 53 | pyramid_target_image_caffe[i_res] = image.scale(target_image_caffe, math.ceil(target_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(target_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear') 54 | end 55 | 56 | -- -------------------------------------------------------------------------------------------------------- 57 | -- -- local function for adding a mrf layer, with image rotation andn scaling 58 | -- -------------------------------------------------------------------------------------------------------- 59 | local function add_mrf() 60 | local mrf_module = nn.MRFMM() 61 | i_mrf_layer = i_mrf_layer + 1 62 | i_net_layer = i_net_layer + 1 63 | next_mrf_idx = next_mrf_idx + 1 64 | if params.gpu >= 0 then 65 | if params.backend == 'cudnn' then 66 | mrf_module:cuda() 67 | else 68 | mrf_module:cl() 69 | end 70 | end 71 | net:add(mrf_module) 72 | table.insert(mrf_losses, mrf_module) 73 | table.insert(mrf_layers, i_mrf_layer, i_net_layer) 74 | return true 75 | end 76 | 77 | local function build_mrf(id_mrf) 78 | -------------------------------------------------------- 79 | -- deal with target 80 | -------------------------------------------------------- 81 | local target_images_caffe = {} 82 | for i_r = -params.target_num_rotation, params.target_num_rotation do 83 | local alpha = params.target_step_rotation * i_r 84 | local min_x, min_y, max_x, max_y = computeBB(pyramid_target_image_caffe[cur_res]:size()[3], pyramid_target_image_caffe[cur_res]:size()[2], alpha) 85 | local target_image_rt_caffe = image.rotate(pyramid_target_image_caffe[cur_res], alpha, 'bilinear') 86 | target_image_rt_caffe = target_image_rt_caffe[{{1, target_image_rt_caffe:size()[1]}, {min_y, max_y}, {min_x, max_x}}] 87 | 88 | for i_s = -params.target_num_scale, params.target_num_scale do 89 | local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s)) 90 | local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear') 91 | if params.gpu >= 0 then 92 | if params.backend == 'cudnn' then 93 | target_image_rt_s_caffe = target_image_rt_s_caffe:cuda() 94 | else 95 | target_image_rt_s_caffe = target_image_rt_s_caffe:cl() 96 | end 97 | end 98 | table.insert(target_images_caffe, target_image_rt_s_caffe) 99 | end 100 | end 101 | 102 | -- compute the coordinates on the pixel layer 103 | local target_x 104 | local target_y 105 | local target_x_per_image = {} 106 | local target_y_per_image = {} 107 | local target_imageid 108 | -- print('*****************************************************') 109 | -- print(string.format('build target mrf')); 110 | -- print('*****************************************************') 111 | for i_image = 1, #target_images_caffe do 112 | -- print(string.format('image %d, ', i_image)) 113 | net:forward(target_images_caffe[i_image]) 114 | local target_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float() 115 | 116 | if params.mrf_patch_size[id_mrf] > target_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > target_feature_map:size()[3] then 117 | print('target_images is not big enough for patch') 118 | print('target_images size: ') 119 | print(target_feature_map:size()) 120 | print('patch size: ') 121 | print(params.mrf_patch_size[id_mrf]) 122 | do return end 123 | end 124 | local target_x_, target_y_ = drill_computeMRFfull(target_feature_map, params.mrf_patch_size[id_mrf], params.target_sample_stride[id_mrf], -1) 125 | 126 | 127 | local x = torch.Tensor(target_x_:nElement() * target_y_:nElement()) 128 | local y = torch.Tensor(target_x_:nElement() * target_y_:nElement()) 129 | local target_imageid_ = torch.Tensor(target_x_:nElement() * target_y_:nElement()):fill(i_image) 130 | local count = 1 131 | for i_row = 1, target_y_:nElement() do 132 | for i_col = 1, target_x_:nElement() do 133 | x[count] = target_x_[i_col] 134 | y[count] = target_y_[i_row] 135 | count = count + 1 136 | end 137 | end 138 | if i_image == 1 then 139 | target_x = x:clone() 140 | target_y = y:clone() 141 | target_imageid = target_imageid_:clone() 142 | else 143 | target_x = torch.cat(target_x, x, 1) 144 | target_y = torch.cat(target_y, y, 1) 145 | target_imageid = torch.cat(target_imageid, target_imageid_, 1) 146 | end 147 | table.insert(target_x_per_image, x) 148 | table.insert(target_y_per_image, y) 149 | end -- end for i_image = 1, #target_images do 150 | 151 | -- print('*****************************************************') 152 | -- print(string.format('collect mrf')); 153 | -- print('*****************************************************') 154 | 155 | local num_channel_mrf = net:get(mrf_layers[id_mrf] - 1).output:size()[1] 156 | local target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf * params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf]) 157 | local tensor_target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf, params.mrf_patch_size[id_mrf], params.mrf_patch_size[id_mrf]) 158 | local count_mrf = 1 159 | for i_image = 1, #target_images_caffe do 160 | -- print(string.format('image %d, ', i_image)); 161 | net:forward(target_images_caffe[i_image]) 162 | -- sample mrf on mrf_layers 163 | local tensor_target_mrf_, target_mrf_ = sampleMRFAndTensorfromLocation2(target_x_per_image[i_image], target_y_per_image[i_image], net:get(mrf_layers[id_mrf] - 1).output:float(), params.mrf_patch_size[id_mrf]) 164 | target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, target_mrf:size()[2]}}] = target_mrf_:clone() 165 | tensor_target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, tensor_target_mrf:size()[2]}, {1, tensor_target_mrf:size()[3]}, {1, tensor_target_mrf:size()[4]}}] = tensor_target_mrf_:clone() 166 | count_mrf = count_mrf + target_mrf_:size()[1] 167 | tensor_target_mrf_ = nil 168 | target_mrf_ = nil 169 | collectgarbage() 170 | end --for i_image = 1, #target_images do 171 | local target_mrfnorm = torch.sqrt(torch.sum(torch.cmul(target_mrf, target_mrf), 2)):resize(target_mrf:size()[1], 1, 1) 172 | 173 | -------------------------------------------------------- 174 | -- process source 175 | -------------------------------------------------------- 176 | -- print('*****************************************************') 177 | -- print(string.format('process source image')); 178 | -- print('*****************************************************') 179 | if params.gpu >= 0 then 180 | if params.backend == 'cudnn' then 181 | net:forward(pyramid_source_image_caffe[cur_res]:cuda()) 182 | else 183 | net:forward(pyramid_source_image_caffe[cur_res]:cl()) 184 | end 185 | else 186 | net:forward(pyramid_source_image_caffe[cur_res]) 187 | end 188 | local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float() 189 | if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then 190 | print('source_image_caffe is not big enough for patch') 191 | print('source_image_caffe size: ') 192 | print(source_feature_map:size()) 193 | print('patch size: ') 194 | print(params.mrf_patch_size[id_mrf]) 195 | do return end 196 | end 197 | local source_xgrid, source_ygrid = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], params.source_sample_stride[id_mrf], -1) 198 | local source_x = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement()) 199 | local source_y = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement()) 200 | local count = 1 201 | for i_row = 1, source_ygrid:nElement() do 202 | for i_col = 1, source_xgrid:nElement() do 203 | source_x[count] = source_xgrid[i_col] 204 | source_y[count] = source_ygrid[i_row] 205 | count = count + 1 206 | end 207 | end 208 | -- local tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm:float(), 1, net:get(mrf_layers[id_mrf] - 1).output:size()[2] - (params.mrf_patch_size[id_mrf] - 1), net:get(mrf_layers[id_mrf] - 1).output:size()[3] - (params.mrf_patch_size[id_mrf] - 1)) 209 | 210 | -- print('*****************************************************') 211 | -- print(string.format('call layer implemetation')); 212 | -- print('*****************************************************') 213 | local nInputPlane = target_mrf:size()[2] / (params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf]) 214 | local nOutputPlane = target_mrf:size()[1] 215 | local kW = params.mrf_patch_size[id_mrf] 216 | local kH = params.mrf_patch_size[id_mrf] 217 | local dW = 1 218 | local dH = 1 219 | local input_size = source_feature_map:size() 220 | 221 | local source_xgrid_, source_ygrid_ = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], 1, -1) 222 | local response_size = torch.LongStorage(3) 223 | response_size[1] = nOutputPlane 224 | response_size[2] = source_ygrid_:nElement() 225 | response_size[3] = source_xgrid_:nElement() 226 | net:get(mrf_layers[id_mrf]):implement(params.mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, 1, 1, params.mrf_confidence_threshold[id_mrf], params.mrf_weight[id_mrf], params.gpu_chunck_size_1, params.gpu_chunck_size_2, params.backend, params.gpu) 227 | target_mrf = nil 228 | tensor_target_mrf = nil 229 | source_feature_map = nil 230 | collectgarbage() 231 | end 232 | 233 | -------------------------------------------------------------------------------------------------------- 234 | -- local function for printing inter-mediate result 235 | -------------------------------------------------------------------------------------------------------- 236 | local function maybe_print(t, loss) 237 | local verbose = (params.print_iter > 0 and t % params.print_iter == 0) 238 | if verbose then 239 | print(string.format('Iteration %d, %d', t, params.num_iter[cur_res])) 240 | end 241 | end 242 | 243 | -------------------------------------------------------------------------------------------------------- 244 | -- local function for saving inter-mediate result 245 | -------------------------------------------------------------------------------------------------------- 246 | local function maybe_save(t) 247 | local should_save = params.save_iter > 0 and t % params.save_iter == 0 248 | should_save = should_save or t == params.num_iter 249 | if should_save then 250 | local disp = deprocess(input_image:float()) 251 | disp = image.minmax{tensor=disp, min=0, max=1} 252 | disp = image.scale(disp, render_width, render_height, 'bilinear') 253 | local filename = string.format('%s/res_%d_%d.jpg', params.output_folder, cur_res, t) 254 | image.save(filename, disp) 255 | end 256 | end 257 | 258 | -------------------------------------------------------------------------------------------------------- 259 | -- local function for computing energy 260 | -------------------------------------------------------------------------------------------------------- 261 | local function feval(x) 262 | num_calls = num_calls + 1 263 | net:forward(x) 264 | local grad = net:backward(x, dy) 265 | local loss = 0 266 | collectgarbage() 267 | 268 | maybe_print(num_calls, loss) 269 | maybe_save(num_calls) 270 | 271 | -- optim.lbfgs expects a vector for gradients 272 | return loss, grad:view(grad:nElement()) 273 | end 274 | 275 | ------------------------------------------------------------------------------- 276 | -- initialize network 277 | ------------------------------------------------------------------------------- 278 | if params.gpu >= 0 then 279 | if params.backend == 'cudnn' then 280 | require 'cutorch' 281 | require 'cunn' 282 | cutorch.setDevice(params.gpu + 1) 283 | else 284 | require 'cltorch' 285 | require 'clnn' 286 | cltorch.setDevice(params.gpu + 1) 287 | end 288 | else 289 | params.backend = 'nn' 290 | end 291 | 292 | if params.backend == 'cudnn' then 293 | require 'cudnn' 294 | end 295 | 296 | local loadcaffe_backend = params.backend 297 | if params.backend == 'clnn' then 298 | loadcaffe_backend = 'nn' 299 | end 300 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float() 301 | if params.gpu >= 0 then 302 | if params.backend == 'cudnn' then 303 | cnn:cuda() 304 | else 305 | cnn:cl() 306 | end 307 | end 308 | print('cnn succesfully loaded') 309 | 310 | for i_res = 1, params.num_res do 311 | local timer = torch.Timer() 312 | 313 | cur_res = i_res 314 | num_calls = 0 315 | local optim_state = { 316 | maxIter = params.num_iter[i_res], 317 | nCorrection = params.nCorrection, 318 | verbose=true, 319 | tolX = 0, 320 | tolFun = 0, 321 | } 322 | 323 | -- initialize image and target 324 | if i_res == 1 then 325 | 326 | if params.ini_method == 'random' then 327 | input_image = torch.randn(pyramid_source_image_caffe[i_res]:size()):float():mul(0.001) 328 | elseif params.ini_method == 'image' then 329 | input_image = pyramid_source_image_caffe[i_res]:clone():float() 330 | else 331 | error('Invalid init type') 332 | end 333 | if params.gpu >= 0 then 334 | if params.backend == 'cudnn' then 335 | input_image = input_image:cuda() 336 | else 337 | input_image = input_image:cl() 338 | end 339 | end 340 | ----------------------------------------------------- 341 | -- add a tv layer 342 | ----------------------------------------------------- 343 | if params.tv_weight > 0 then 344 | local tv_mod = nn.TVLoss(params.tv_weight):float() 345 | if params.gpu >= 0 then 346 | if params.backend == 'cudnn' then 347 | tv_mod:cuda() 348 | else 349 | tv_mod:cl() 350 | end 351 | end 352 | i_net_layer = i_net_layer + 1 353 | net:add(tv_mod) 354 | end 355 | 356 | for i = 1, #cnn do 357 | if next_mrf_idx <= #mrf_layers_pretrained then 358 | local layer = cnn:get(i) 359 | 360 | i_net_layer = i_net_layer + 1 361 | net:add(layer) 362 | 363 | -- -- add mrfstatsyn layer 364 | if i == mrf_layers_pretrained[next_mrf_idx] then 365 | if add_mrf() == false then 366 | print('build network failed: adding mrf layer failed') 367 | do return end 368 | end 369 | end 370 | 371 | end 372 | end -- for i = 1, #cnn do 373 | 374 | cnn = nil 375 | collectgarbage() 376 | 377 | print(net) 378 | 379 | 380 | print('mrf_layers: ') 381 | for i = 1, #mrf_layers do 382 | print(mrf_layers[i]) 383 | end 384 | 385 | print('network has been built.') 386 | else 387 | input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone() 388 | if params.gpu >= 0 then 389 | if params.backend == 'cudnn' then 390 | input_image = input_image:cuda() 391 | else 392 | input_image = input_image:cl() 393 | end 394 | end 395 | end 396 | 397 | print('*****************************************************') 398 | print(string.format('Synthesis started at resolution ', cur_res)) 399 | print('*****************************************************') 400 | 401 | print('Implementing mrf layers ...') 402 | for i = 1, #mrf_layers do 403 | if build_mrf(i) == false then 404 | print('build_mrf failed') 405 | do return end 406 | end 407 | end 408 | 409 | local mask = torch.Tensor(input_image:size()):fill(1) 410 | if params.gpu >= 0 then 411 | if params.backend == 'cudnn' then 412 | mask = mask:cuda() 413 | else 414 | mask = mask:cl() 415 | end 416 | end 417 | 418 | y = net:forward(input_image) 419 | dy = input_image.new(#y):zero() 420 | 421 | -- do optimizatoin 422 | local x, losses = mylbfgs(feval, input_image, optim_state, nil, mask) 423 | 424 | local t = timer:time().real 425 | print(string.format('Synthesis finished at resolution %d, %f seconds', cur_res, t)) 426 | end 427 | 428 | net = nil 429 | source_image = nil 430 | target_image = nil 431 | pyramid_source_image_caffe = nil 432 | pyramid_target_image_caffe = nil 433 | input_image = nil 434 | output_image = nil 435 | mrf_losses = nil 436 | mrf_layers = nil 437 | optim_state = nil 438 | collectgarbage() 439 | collectgarbage() 440 | 441 | end -- end of main 442 | 443 | 444 | local function run_test(content_name, style_name, ini_method, max_size, scaler, num_res, num_iter, mrf_layers, mrf_weight, mrf_patch_size, mrf_num_rotation, mrf_num_scale, mrf_sample_stride, mrf_synthesis_stride, mrf_confidence_threshold, tv_weight, mode, gpu_chunck_size_1, gpu_chunck_size_2, backend) 445 | 446 | -- local clock = os.clock 447 | -- function sleep(n) -- seconds 448 | -- local t0 = clock() 449 | -- while clock() - t0 <= n do end 450 | -- end 451 | 452 | local timer_TEST = torch.Timer() 453 | 454 | local flag_state = 1 455 | 456 | local params = {} 457 | 458 | -- externally set paramters 459 | params.content_name = content_name 460 | params.style_name = style_name 461 | --print(backend) 462 | params.ini_method = ini_method 463 | params.max_size = max_size or 384 464 | params.scaler = scaler or 2 465 | params.num_res = num_res or 3 466 | params.num_iter = num_iter or {100, 100, 100} 467 | params.mrf_layers = mrf_layers or {12, 21} 468 | params.mrf_weight = mrf_weight or {1e-4, 1e-4} 469 | params.mrf_patch_size = mrf_patch_size or {3, 3} 470 | params.target_num_rotation = mrf_num_rotation or 0 471 | params.target_num_scale = mrf_num_scale or 0 472 | params.target_sample_stride = mrf_sample_stride or {2, 2} 473 | params.source_sample_stride = mrf_synthesis_stride or {2, 2} 474 | params.mrf_confidence_threshold = mrf_confidence_threshold or {0, 0} 475 | params.tv_weight = tv_weight or 1e-3 476 | 477 | params.mode = mode or 'speed' 478 | params.gpu_chunck_size_1 = gpu_chunck_size_1 or 256 479 | params.gpu_chunck_size_2 = gpu_chunck_size_2 or 16 480 | params.backend = backend or 'cudnn' 481 | 482 | -- fixed parameters 483 | params.target_step_rotation = math.pi/24 484 | params.target_step_scale = 1.05 485 | 486 | params.proto_file = 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt' 487 | params.model_file = 'data/models/VGG_ILSVRC_19_layers.caffemodel' 488 | params.gpu = 0 489 | params.nCorrection = 25 490 | params.print_iter = 10 491 | params.save_iter = 10 492 | params.gpu_chunck_size_1 = 32 493 | params.gpu_chunck_size_2 = 2 494 | 495 | params.output_folder = string.format('data/result/freesyn/MRF/%s_TO_%s', params.content_name, params.style_name) 496 | 497 | main(params) 498 | 499 | local t_test = timer_TEST:time().real 500 | print(string.format('Total time: %f seconds', t_test)) 501 | -- sleep(1) 502 | return flag_state 503 | end 504 | 505 | return { 506 | run_test = run_test, 507 | main = main 508 | } 509 | -------------------------------------------------------------------------------- /transfer_CNNMRF_wrapper.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'image' 4 | require 'paths' 5 | require 'loadcaffe' 6 | 7 | paths.dofile('mylib/myoptimizer.lua') 8 | paths.dofile('mylib/tv.lua') 9 | paths.dofile('mylib/mrf.lua') 10 | paths.dofile('mylib/helper.lua') 11 | paths.dofile('mylib/content.lua') 12 | 13 | torch.setdefaulttensortype('torch.FloatTensor') -- float as default tensor type 14 | 15 | local function main(params) 16 | os.execute('mkdir data/result/') 17 | os.execute('mkdir data/result/trans/') 18 | os.execute('mkdir data/result/trans/MRF/') 19 | os.execute(string.format('mkdir %s', params.output_folder)) 20 | 21 | local net = nn.Sequential() 22 | local next_content_idx = 1 23 | local i_net_layer = 0 24 | local num_calls = 0 25 | local content_losses = {} 26 | local content_layers = {} 27 | local i_content_layer = 0 28 | local next_mrf_idx = 1 29 | local mrf_losses = {} 30 | local mrf_layers = {} 31 | local i_mrf_layer = 0 32 | local input_image 33 | local output_image 34 | local cur_res 35 | local content_layers_pretrained = params.content_layers 36 | local mrf_layers_pretrained = params.mrf_layers 37 | 38 | ----------------------------------------------------------------------------------- 39 | -- read images 40 | ----------------------------------------------------------------------------------- 41 | local source_image = image.load(string.format('data/content/%s.jpg', params.content_name), 3) 42 | local target_image = image.load(string.format('data/style/%s.jpg', params.style_name), 3) 43 | 44 | source_image = image.scale(source_image, params.max_size, 'bilinear') 45 | target_image = image.scale(target_image, params.max_size, 'bilinear') 46 | 47 | local render_height = source_image:size()[2] 48 | local render_width = source_image:size()[3] 49 | local source_image_caffe = preprocess(source_image):float() 50 | local target_image_caffe = preprocess(target_image):float() 51 | 52 | local pyramid_source_image_caffe = {} 53 | for i_res = 1, params.num_res do 54 | pyramid_source_image_caffe[i_res] = image.scale(source_image_caffe, math.ceil(source_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(source_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear') 55 | end 56 | 57 | local pyramid_target_image_caffe = {} 58 | for i_res = 1, params.num_res do 59 | pyramid_target_image_caffe[i_res] = image.scale(target_image_caffe, math.ceil(target_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(target_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear') 60 | end 61 | 62 | ------------------------------------------------------------------------------------------------------ 63 | -- local function for adding a content layer 64 | ------------------------------------------------------------------------------------------------------ 65 | local function add_content() 66 | local source = pyramid_source_image_caffe[cur_res]:clone() 67 | if params.gpu >= 0 then 68 | if params.backend == 'cudnn' then 69 | source = source:cuda() 70 | else 71 | source = source:cl() 72 | end 73 | end 74 | local feature = net:forward(source):clone() -- generate the content target using content image 75 | if params.gpu >= 0 then 76 | if params.backend == 'cudnn' then 77 | feature = feature:cuda() 78 | else 79 | feature = feature:cl() 80 | end 81 | end 82 | 83 | local norm = params.normalize_gradients 84 | print(params.normalize_gradients) 85 | local loss_module = nn.ContentLoss(params.content_weight, feature, norm):float() 86 | if params.gpu >= 0 then 87 | if params.backend == 'cudnn' then 88 | loss_module:cuda() 89 | else 90 | loss_module:cl() 91 | end 92 | end 93 | 94 | i_content_layer = i_content_layer + 1 95 | i_net_layer = i_net_layer + 1 96 | next_content_idx = next_content_idx + 1 97 | net:add(loss_module) 98 | table.insert(content_losses, loss_module) 99 | table.insert(content_layers, i_content_layer, i_net_layer) 100 | end 101 | 102 | local function update_content(idx_layer, idx_content) 103 | local source = pyramid_source_image_caffe[cur_res]:clone() 104 | if params.gpu >= 0 then 105 | if params.backend == 'cudnn' then 106 | source = source:cuda() 107 | else 108 | source = source:cl() 109 | end 110 | end 111 | net:forward(source) 112 | local feature = net:get(idx_layer).output:clone() 113 | if params.gpu >= 0 then 114 | if params.backend == 'cudnn' then 115 | feature = feature:cuda() 116 | else 117 | feature = feature:cl() 118 | end 119 | end 120 | 121 | local norm = params.normalize_gradients 122 | local loss_module = nn.ContentLoss(params.content_weight, feature, norm):float() 123 | if params.gpu >= 0 then 124 | if params.backend == 'cudnn' then 125 | loss_module:cuda() 126 | else 127 | loss_module:cl() 128 | end 129 | end 130 | net:get(idx_layer):update(loss_module) 131 | end 132 | 133 | 134 | -- -------------------------------------------------------------------------------------------------------- 135 | -- -- local function for adding a mrf layer, with image rotation andn scaling 136 | -- -------------------------------------------------------------------------------------------------------- 137 | local function add_mrf() 138 | local mrf_module = nn.MRFMM() 139 | i_mrf_layer = i_mrf_layer + 1 140 | i_net_layer = i_net_layer + 1 141 | next_mrf_idx = next_mrf_idx + 1 142 | if params.gpu >= 0 then 143 | if params.backend == 'cudnn' then 144 | mrf_module:cuda() 145 | else 146 | mrf_module:cl() 147 | end 148 | end 149 | net:add(mrf_module) 150 | table.insert(mrf_losses, mrf_module) 151 | table.insert(mrf_layers, i_mrf_layer, i_net_layer) 152 | return true 153 | end 154 | 155 | local function build_mrf(id_mrf) 156 | -------------------------------------------------------- 157 | -- deal with target 158 | -------------------------------------------------------- 159 | local target_images_caffe = {} 160 | for i_r = -params.target_num_rotation, params.target_num_rotation do 161 | local alpha = params.target_step_rotation * i_r 162 | local min_x, min_y, max_x, max_y = computeBB(pyramid_target_image_caffe[cur_res]:size()[3], pyramid_target_image_caffe[cur_res]:size()[2], alpha) 163 | local target_image_rt_caffe = image.rotate(pyramid_target_image_caffe[cur_res], alpha, 'bilinear') 164 | target_image_rt_caffe = target_image_rt_caffe[{{1, target_image_rt_caffe:size()[1]}, {min_y, max_y}, {min_x, max_x}}] 165 | 166 | for i_s = -params.target_num_scale, params.target_num_scale do 167 | local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s)) 168 | local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear') 169 | if params.gpu >= 0 then 170 | if params.backend == 'cudnn' then 171 | target_image_rt_s_caffe = target_image_rt_s_caffe:cuda() 172 | else 173 | target_image_rt_s_caffe = target_image_rt_s_caffe:cl() 174 | end 175 | end 176 | table.insert(target_images_caffe, target_image_rt_s_caffe) 177 | end 178 | end 179 | 180 | -- compute the coordinates on the pixel layer 181 | local target_x 182 | local target_y 183 | local target_x_per_image = {} 184 | local target_y_per_image = {} 185 | local target_imageid 186 | -- print('*****************************************************') 187 | -- print(string.format('build target mrf')); 188 | -- print('*****************************************************') 189 | for i_image = 1, #target_images_caffe do 190 | -- print(string.format('image %d, ', i_image)) 191 | net:forward(target_images_caffe[i_image]) 192 | local target_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float() 193 | 194 | if params.mrf_patch_size[id_mrf] > target_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > target_feature_map:size()[3] then 195 | print('target_images is not big enough for patch') 196 | print('target_images size: ') 197 | print(target_feature_map:size()) 198 | print('patch size: ') 199 | print(params.mrf_patch_size[id_mrf]) 200 | do return end 201 | end 202 | local target_x_, target_y_ = drill_computeMRFfull(target_feature_map, params.mrf_patch_size[id_mrf], params.target_sample_stride[id_mrf], -1) 203 | 204 | 205 | local x = torch.Tensor(target_x_:nElement() * target_y_:nElement()) 206 | local y = torch.Tensor(target_x_:nElement() * target_y_:nElement()) 207 | local target_imageid_ = torch.Tensor(target_x_:nElement() * target_y_:nElement()):fill(i_image) 208 | local count = 1 209 | for i_row = 1, target_y_:nElement() do 210 | for i_col = 1, target_x_:nElement() do 211 | x[count] = target_x_[i_col] 212 | y[count] = target_y_[i_row] 213 | count = count + 1 214 | end 215 | end 216 | if i_image == 1 then 217 | target_x = x:clone() 218 | target_y = y:clone() 219 | target_imageid = target_imageid_:clone() 220 | else 221 | target_x = torch.cat(target_x, x, 1) 222 | target_y = torch.cat(target_y, y, 1) 223 | target_imageid = torch.cat(target_imageid, target_imageid_, 1) 224 | end 225 | table.insert(target_x_per_image, x) 226 | table.insert(target_y_per_image, y) 227 | end -- end for i_image = 1, #target_images do 228 | 229 | -- print('*****************************************************') 230 | -- print(string.format('collect mrf')); 231 | -- print('*****************************************************') 232 | 233 | local num_channel_mrf = net:get(mrf_layers[id_mrf] - 1).output:size()[1] 234 | local target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf * params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf]) 235 | local tensor_target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf, params.mrf_patch_size[id_mrf], params.mrf_patch_size[id_mrf]) 236 | local count_mrf = 1 237 | for i_image = 1, #target_images_caffe do 238 | -- print(string.format('image %d, ', i_image)); 239 | net:forward(target_images_caffe[i_image]) 240 | -- sample mrf on mrf_layers 241 | local tensor_target_mrf_, target_mrf_ = sampleMRFAndTensorfromLocation2(target_x_per_image[i_image], target_y_per_image[i_image], net:get(mrf_layers[id_mrf] - 1).output:float(), params.mrf_patch_size[id_mrf]) 242 | target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, target_mrf:size()[2]}}] = target_mrf_:clone() 243 | tensor_target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, tensor_target_mrf:size()[2]}, {1, tensor_target_mrf:size()[3]}, {1, tensor_target_mrf:size()[4]}}] = tensor_target_mrf_:clone() 244 | count_mrf = count_mrf + target_mrf_:size()[1] 245 | tensor_target_mrf_ = nil 246 | target_mrf_ = nil 247 | collectgarbage() 248 | end --for i_image = 1, #target_images do 249 | local target_mrfnorm = torch.sqrt(torch.sum(torch.cmul(target_mrf, target_mrf), 2)):resize(target_mrf:size()[1], 1, 1) 250 | 251 | -------------------------------------------------------- 252 | -- process source 253 | -------------------------------------------------------- 254 | -- print('*****************************************************') 255 | -- print(string.format('process source image')); 256 | -- print('*****************************************************') 257 | if params.gpu >= 0 then 258 | if params.backend == 'cudnn' then 259 | net:forward(pyramid_source_image_caffe[cur_res]:cuda()) 260 | else 261 | net:forward(pyramid_source_image_caffe[cur_res]:cl()) 262 | end 263 | else 264 | net:forward(pyramid_source_image_caffe[cur_res]) 265 | end 266 | local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float() 267 | if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then 268 | print('source_image_caffe is not big enough for patch') 269 | print('source_image_caffe size: ') 270 | print(source_feature_map:size()) 271 | print('patch size: ') 272 | print(params.mrf_patch_size[id_mrf]) 273 | do return end 274 | end 275 | local source_xgrid, source_ygrid = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], params.source_sample_stride[id_mrf], -1) 276 | local source_x = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement()) 277 | local source_y = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement()) 278 | local count = 1 279 | for i_row = 1, source_ygrid:nElement() do 280 | for i_col = 1, source_xgrid:nElement() do 281 | source_x[count] = source_xgrid[i_col] 282 | source_y[count] = source_ygrid[i_row] 283 | count = count + 1 284 | end 285 | end 286 | -- local tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm:float(), 1, net:get(mrf_layers[id_mrf] - 1).output:size()[2] - (params.mrf_patch_size[id_mrf] - 1), net:get(mrf_layers[id_mrf] - 1).output:size()[3] - (params.mrf_patch_size[id_mrf] - 1)) 287 | 288 | -- print('*****************************************************') 289 | -- print(string.format('call layer implemetation')); 290 | -- print('*****************************************************') 291 | local nInputPlane = target_mrf:size()[2] / (params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf]) 292 | local nOutputPlane = target_mrf:size()[1] 293 | local kW = params.mrf_patch_size[id_mrf] 294 | local kH = params.mrf_patch_size[id_mrf] 295 | local dW = 1 296 | local dH = 1 297 | local input_size = source_feature_map:size() 298 | 299 | local source_xgrid_, source_ygrid_ = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], 1, -1) 300 | local response_size = torch.LongStorage(3) 301 | response_size[1] = nOutputPlane 302 | response_size[2] = source_ygrid_:nElement() 303 | response_size[3] = source_xgrid_:nElement() 304 | net:get(mrf_layers[id_mrf]):implement(params.mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, 1, 1, params.mrf_confidence_threshold[id_mrf], params.mrf_weight[id_mrf], params.gpu_chunck_size_1, params.gpu_chunck_size_2, params.backend, params.gpu) 305 | target_mrf = nil 306 | tensor_target_mrf = nil 307 | source_feature_map = nil 308 | collectgarbage() 309 | end 310 | 311 | -------------------------------------------------------------------------------------------------------- 312 | -- local function for printing inter-mediate result 313 | -------------------------------------------------------------------------------------------------------- 314 | local function maybe_print(t, loss) 315 | local verbose = (params.print_iter > 0 and t % params.print_iter == 0) 316 | if verbose then 317 | print(string.format('Iteration %d, %d', t, params.num_iter[cur_res])) 318 | end 319 | end 320 | 321 | -------------------------------------------------------------------------------------------------------- 322 | -- local function for saving inter-mediate result 323 | -------------------------------------------------------------------------------------------------------- 324 | local function maybe_save(t) 325 | local should_save = params.save_iter > 0 and t % params.save_iter == 0 326 | should_save = should_save or t == params.num_iter 327 | if should_save then 328 | local disp = deprocess(input_image:float()) 329 | disp = image.minmax{tensor=disp, min=0, max=1} 330 | disp = image.scale(disp, render_width, render_height, 'bilinear') 331 | local filename = string.format('%s/res_%d_%d.jpg', params.output_folder, cur_res, t) 332 | image.save(filename, disp) 333 | end 334 | end 335 | 336 | -------------------------------------------------------------------------------------------------------- 337 | -- local function for computing energy 338 | -------------------------------------------------------------------------------------------------------- 339 | local function feval(x) 340 | num_calls = num_calls + 1 341 | net:forward(x) 342 | local grad = net:backward(x, dy) 343 | local loss = 0 344 | collectgarbage() 345 | 346 | maybe_print(num_calls, loss) 347 | maybe_save(num_calls) 348 | 349 | -- optim.lbfgs expects a vector for gradients 350 | return loss, grad:view(grad:nElement()) 351 | end 352 | 353 | ------------------------------------------------------------------------------- 354 | -- initialize network 355 | ------------------------------------------------------------------------------- 356 | if params.gpu >= 0 then 357 | if params.backend == 'cudnn' then 358 | require 'cutorch' 359 | require 'cunn' 360 | cutorch.setDevice(params.gpu + 1) 361 | else 362 | require 'cltorch' 363 | require 'clnn' 364 | cltorch.setDevice(params.gpu + 1) 365 | end 366 | else 367 | params.backend = 'nn' 368 | end 369 | 370 | if params.backend == 'cudnn' then 371 | require 'cudnn' 372 | end 373 | 374 | local loadcaffe_backend = params.backend 375 | if params.backend == 'clnn' then 376 | loadcaffe_backend = 'nn' 377 | end 378 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float() 379 | if params.gpu >= 0 then 380 | if params.backend == 'cudnn' then 381 | cnn:cuda() 382 | else 383 | cnn:cl() 384 | end 385 | end 386 | print('cnn succesfully loaded') 387 | 388 | for i_res = 1, params.num_res do 389 | local timer = torch.Timer() 390 | 391 | cur_res = i_res 392 | num_calls = 0 393 | local optim_state = { 394 | maxIter = params.num_iter[i_res], 395 | nCorrection = params.nCorrection, 396 | verbose=true, 397 | tolX = 0, 398 | tolFun = 0, 399 | } 400 | 401 | -- initialize image and target 402 | if i_res == 1 then 403 | 404 | if params.ini_method == 'random' then 405 | input_image = torch.randn(pyramid_source_image_caffe[i_res]:size()):float():mul(0.001) 406 | elseif params.ini_method == 'image' then 407 | input_image = pyramid_source_image_caffe[i_res]:clone():float() 408 | else 409 | error('Invalid init type') 410 | end 411 | if params.gpu >= 0 then 412 | if params.backend == 'cudnn' then 413 | input_image = input_image:cuda() 414 | else 415 | input_image = input_image:cl() 416 | end 417 | end 418 | 419 | ----------------------------------------------------- 420 | -- add a tv layer 421 | ----------------------------------------------------- 422 | if params.tv_weight > 0 then 423 | local tv_mod = nn.TVLoss(params.tv_weight):float() 424 | if params.gpu >= 0 then 425 | if params.backend == 'cudnn' then 426 | tv_mod:cuda() 427 | else 428 | tv_mod:cl() 429 | end 430 | end 431 | i_net_layer = i_net_layer + 1 432 | net:add(tv_mod) 433 | end 434 | 435 | for i = 1, #cnn do 436 | if next_content_idx <= #content_layers_pretrained or next_mrf_idx <= #mrf_layers_pretrained then 437 | local layer = cnn:get(i) 438 | 439 | i_net_layer = i_net_layer + 1 440 | net:add(layer) 441 | 442 | -- add a content_losses layer 443 | if i == content_layers_pretrained[next_content_idx] then 444 | add_content() 445 | end 446 | 447 | -- -- add mrfstatsyn layer 448 | if i == mrf_layers_pretrained[next_mrf_idx] then 449 | if add_mrf() == false then 450 | print('build network failed: adding mrf layer failed') 451 | do return end 452 | end 453 | end 454 | 455 | end 456 | end -- for i = 1, #cnn do 457 | 458 | cnn = nil 459 | collectgarbage() 460 | 461 | print(net) 462 | 463 | print('content_layers: ') 464 | for i = 1, #content_layers do 465 | print(content_layers[i]) 466 | end 467 | 468 | print('mrf_layers: ') 469 | for i = 1, #mrf_layers do 470 | print(mrf_layers[i]) 471 | end 472 | 473 | print('network has been built.') 474 | else 475 | input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone() 476 | if params.gpu >= 0 then 477 | if params.backend == 'cudnn' then 478 | input_image = input_image:cuda() 479 | else 480 | input_image = input_image:cl() 481 | end 482 | end 483 | 484 | -- -- update content layers 485 | for i_layer = 1, #content_layers do 486 | update_content(content_layers[i_layer], i_layer) 487 | -- print(string.format('content_layers %d has been updated', content_layers[i_layer])) 488 | end 489 | 490 | end 491 | 492 | print('*****************************************************') 493 | print(string.format('Synthesis started at resolution ', cur_res)) 494 | print('*****************************************************') 495 | 496 | print('Implementing mrf layers ...') 497 | for i = 1, #mrf_layers do 498 | if build_mrf(i) == false then 499 | print('build_mrf failed') 500 | do return end 501 | end 502 | end 503 | 504 | local mask = torch.Tensor(input_image:size()):fill(1) 505 | if params.gpu >= 0 then 506 | if params.backend == 'cudnn' then 507 | mask = mask:cuda() 508 | else 509 | mask = mask:cl() 510 | end 511 | end 512 | 513 | y = net:forward(input_image) 514 | dy = input_image.new(#y):zero() 515 | 516 | -- do optimizatoin 517 | local x, losses = mylbfgs(feval, input_image, optim_state, nil, mask) 518 | 519 | local t = timer:time().real 520 | print(string.format('Synthesis finished at resolution %d, %f seconds', cur_res, t)) 521 | end 522 | 523 | net = nil 524 | source_image = nil 525 | target_image = nil 526 | pyramid_source_image_caffe = nil 527 | pyramid_target_image_caffe = nil 528 | input_image = nil 529 | output_image = nil 530 | content_losses = nil 531 | content_layers = nil 532 | mrf_losses = nil 533 | mrf_layers = nil 534 | optim_state = nil 535 | collectgarbage() 536 | collectgarbage() 537 | 538 | end -- end of main 539 | 540 | 541 | local function run_test(content_name, style_name, ini_method, max_size, num_res, num_iter, mrf_layers, mrf_weight, mrf_patch_size, mrf_num_rotation, mrf_num_scale, mrf_sample_stride, mrf_synthesis_stride, mrf_confidence_threshold, content_layers, content_weight, tv_weight, mode, gpu_chunck_size_1, gpu_chunck_size_2, backend) 542 | -- local clock = os.clock 543 | -- function sleep(n) -- seconds 544 | -- local t0 = clock() 545 | -- while clock() - t0 <= n do end 546 | -- end 547 | 548 | local timer_TEST = torch.Timer() 549 | 550 | local flag_state = 1 551 | 552 | local params = {} 553 | 554 | -- externally set paramters 555 | params.content_name = content_name 556 | params.style_name = style_name 557 | params.ini_method = ini_method 558 | params.max_size = max_size or 384 559 | params.num_res = num_res or 3 560 | params.num_iter = num_iter or {100, 100, 100} 561 | params.mrf_layers = mrf_layers or {12, 21} 562 | params.mrf_weight = mrf_weight or {1e-4, 1e-4} 563 | params.mrf_patch_size = mrf_patch_size or {3, 3} 564 | params.target_num_rotation = mrf_num_rotation or 0 565 | params.target_num_scale = mrf_num_scale or 0 566 | params.target_sample_stride = mrf_sample_stride or {2, 2} 567 | params.source_sample_stride = mrf_synthesis_stride or {2, 2} 568 | params.mrf_confidence_threshold = mrf_confidence_threshold or {0, 0} 569 | params.content_layers = content_layers or {21} 570 | params.content_weight = content_weight or 2e1 571 | params.tv_weight = tv_weight or 1e-3 572 | 573 | params.mode = mode or 'speed' 574 | params.gpu_chunck_size_1 = gpu_chunck_size_1 or 256 575 | params.gpu_chunck_size_2 = gpu_chunck_size_2 or 16 576 | params.backend = backend or 'cudnn' 577 | -- fixed parameters 578 | params.target_step_rotation = math.pi/24 579 | params.target_step_scale = 1.05 580 | 581 | params.output_folder = string.format('data/result/trans/MRF/%s_TO_%s',params.content_name, params.style_name) 582 | params.proto_file = 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt' 583 | params.model_file = 'data/models/VGG_ILSVRC_19_layers.caffemodel' 584 | params.gpu = 0 585 | params.nCorrection = 25 586 | params.print_iter = 10 587 | params.save_iter = 10 588 | 589 | params.output_folder = string.format('data/result/trans/MRF/%s_TO_%s',params.content_name, params.style_name) 590 | 591 | main(params) 592 | 593 | local t_test = timer_TEST:time().real 594 | print(string.format('Total time: %f seconds', t_test)) 595 | -- sleep(1) 596 | return flag_state 597 | end 598 | 599 | return { 600 | run_test = run_test, 601 | main = main 602 | } 603 | --------------------------------------------------------------------------------