├── .gitignore
├── License
├── README.md
├── cnnmrf.lua
├── data
├── content
│ ├── 0.jpg
│ ├── 1.jpg
│ ├── 2.jpg
│ └── potrait1.jpg
├── examples
│ ├── 0_to_0.png
│ ├── 1_to_1.png
│ ├── Interpolation
│ │ ├── 2_morecontent.png
│ │ ├── 2_morecontent2.png
│ │ ├── 3_balanced.png
│ │ ├── 4_morestyle.png
│ │ └── 4_morestyle2.png
│ ├── MultiRes
│ │ ├── syn_res_1.png
│ │ ├── syn_res_2.png
│ │ ├── syn_res_3.png
│ │ └── syn_res_4.png
│ ├── content.jpg
│ ├── content2.jpg
│ ├── style.jpg
│ └── style2.jpg
├── models
│ ├── _gitignore
│ └── download_models.sh
└── style
│ ├── 0.jpg
│ ├── 1.jpg
│ ├── 2.jpg
│ └── picasso.jpg
├── mylib
├── content.lua
├── helper.lua
├── mrf.lua
├── myoptimizer.lua
├── style.lua
└── tv.lua
├── run_syn.lua
├── run_trans.lua
├── syn_CNNMRF_wrapper.lua
└── transfer_CNNMRF_wrapper.lua
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 |
--------------------------------------------------------------------------------
/License:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | =====================
3 |
4 | Copyright © 2016 Chuan Li and Michael Wand
5 |
6 | Permission is hereby granted, free of charge, to any person
7 | obtaining a copy of this software and associated documentation
8 | files (the “Software”), to deal in the Software without
9 | restriction, including without limitation the rights to use,
10 | copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the
12 | Software is furnished to do so, subject to the following
13 | conditions:
14 |
15 | The above copyright notice and this permission notice shall be
16 | included in all copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | OTHER DEALINGS IN THE SOFTWARE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CNNMRF
2 | This is the torch implementation for paper "[Combining Markov Random Fields and Convolutional Neural Networks for Image Synthesis](http://arxiv.org/abs/1601.04589)"
3 |
4 | This algorithm is for
5 | * un-guided image synthesis (for example, classical texture synthesis)
6 | * guided image synthesis (for example, transfer the style between different images)
7 |
8 | # Hardware
9 | * For CUDA backend: choose 'speed' if your have at least 4GB graphic memory, and 'memory' otherwise. There is also an opencl backend (thanks to Dionýz Lazar). See "run_trans.lua" and "run_syn.lua" for our reference tests with Titan X, GT750M 2G and Sapphire Radeon R9 280 3G.
10 |
11 |
12 | # Examples
13 | * guided image synthesis
14 |
15 |
16 |
17 |
18 |
19 |
20 | A photo (left) is transfered into a painting (right) using Picasso's self portrait 1907 (middle) as the reference style. Notice important facial features, such as eyes and nose, are faithfully kept as those in the Picasso's painting.
21 |
22 |
23 |
24 | 
25 | In this example, we first transfer a cartoon into a photo.
26 |
27 |
28 | 
29 | We then swap the two inputs and transfer the photo into the cartoon.
30 |
31 |
32 |
33 |
34 | 
35 |
36 |
37 |
38 | 
39 | It is possible to balance the amount of content and the style in the result: pictures in the second coloumn take more content, and pictures in the third column take more style.
40 |
41 | # Setup
42 |
43 | As building Torch with the latest CUDA is a troublesome work, we recommend following the following steps to people who want to reproduce the results:
44 | It has been tested on Ubuntu with CUDA 10.
45 |
46 | __Step One: Install CUDA 10 and CUDNN 7.6.2__
47 |
48 | If you have a fresh Ubuntu, we recommend [Lambda Stack](https://lambdalabs.com/lambda-stack-deep-learning-software) which helps you install the latest drivers, libraries, and frameworks for deep learning. Otherwise, you can install the CUDA toolkit and CUDNN from these links:
49 | * [CUDA](https://developer.nvidia.com/cuda-downloads)
50 | * [CUDNN](https://developer.nvidia.com/cudnn)
51 |
52 | __Step Two: Install Torch__
53 | ```
54 | git clone https://github.com/nagadomi/distro.git ~/torch --recursive
55 | cd ~/torch
56 | ./install-deps
57 | ./clean.sh
58 | ./update.sh
59 |
60 | . ~/torch/install/bin/torch-activate
61 | sudo apt-get install libprotobuf-dev protobuf-compiler
62 | luarocks install loadcaffe
63 | ```
64 |
65 | __Step Three: Download Pre-trained VGG Network__
66 | Pre-trained network:
67 |
68 | ```
69 | cd data/models
70 | ./download_models.sh
71 | ```
72 |
73 | # Un-guided Synthesis
74 |
75 | ```
76 | qlua cnnmrf.lua
77 | ```
78 |
79 | * Most important parameters are '-style_image' for specifying style input image and '-max_size' for resulting image size.
80 | * The content/style images are located in the folders "data/content" and "data/style" respectively. Notice by default the content image is the same as the style image; and the content image is only used for initalization (optional).
81 | * Results are located in the folder "data/result/freesyn/MRF"
82 | * All parameters are explained in "qlua cnnmrf.lua --help".
83 |
84 | # Guided Synthesis
85 |
86 | ```qlua run_trans.lua```
87 |
88 | * Most important parameters are '-style_image' for specifying style input image, '-content_image' for specifying content input image and '-max_size' for resulting image size.
89 | * The content/style images are located in the folders "data/content" and "data/style" respectively.
90 | * Results are located in the folder "data/result/trans/MRF"
91 | * Parameters are defined & explained in "run_trans.lua".
92 |
93 | # Acknowledgement
94 | * This work is inspired and closely related to the paper: [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge. The key difference between their method and our method is the different "style" constraints: While Gatys et al used a global constraint for non-photorealistic synthesis, we use a local constraint which works for both non-photorealistic and photorealistic synthesis. See our paper for more details.
95 | * Our implementation is based on Justin Johnson's implementation of [Neural Style](https://github.com/jcjohnson/neural-style).
96 |
97 |
98 |
--------------------------------------------------------------------------------
/cnnmrf.lua:
--------------------------------------------------------------------------------
1 | -- -*- coding: utf-8 -*-
2 | require 'torch'
3 | require 'paths'
4 |
5 | paths.dofile('mylib/helper.lua')
6 |
7 | --adapted from http://lua-users.org/wiki/SplitJoin
8 | function split(str, pat, cast_to_func)
9 | local t = {} -- NOTE: use {n = 0} in Lua-5.0
10 | local fpat = "(.-)" .. pat
11 | local last_end = 1
12 | local s, e, cap = str:find(fpat, 1)
13 | while s do
14 | if s ~= 1 or cap ~= "" then
15 | table.insert(t, cast_to_func(cap))
16 | end
17 | last_end = e+1
18 | s, e, cap = str:find(fpat, last_end)
19 | end
20 | if last_end <= #str then
21 | cap = str:sub(last_end)
22 | table.insert(t, cast_to_func(cap))
23 | end
24 | return t
25 | end
26 |
27 | -----------------------------------------
28 | -- Parameters
29 | -----------------------------------------
30 |
31 | cmd = torch.CmdLine()
32 |
33 | cmd:text('Below are all options with their default values in [].')
34 | cmd:text()
35 | cmd:text('Basic options: ')
36 | cmd:option('-content_name', 'potrait1', "The content image located in folder 'data/content'")
37 | cmd:option('-style_name', 'picasso', "The style image located in folder 'data/style'")
38 | cmd:option('-ini_method', 'image', "Initial method, set to 'image' to use the content image as the initialization; set to 'random' to use random noise.")
39 | cmd:option('-type', 'transfer', 'Use Guided Synthesis (transfer) or Un-guided Synthesis (syn)')
40 | cmd:option('-max_size',384, "Maximum size of the image. Larger image needs more time and memory.")
41 | cmd:option('-backend','cudnn', "Use cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL.")
42 | cmd:option('-mode','speed', "Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory. ")
43 | cmd:option('-num_res',3, "Number of resolutions. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize.")
44 | cmd:option('-num_iter','100,100,100', "Number of iterations for each resolution. You can use comma-separated values.")
45 |
46 | cmd:text()
47 | cmd:text('Advanced options: ')
48 | cmd:option('-mrf_layers','12,21', "The layers for MRF constraint. Usually layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost. You can use comma-separated values.")
49 | cmd:option('-mrf_weight','1e-4,1e-4', "Weight for each MRF layer. Higher weights leads to more style faithful results. You can use comma-separated values.")
50 | cmd:option('-mrf_patch_size', '3,3', "The patch size for MRF constraint. This value is defined seperately for each MRF layer. You can use comma-separated values.")
51 | cmd:option('-target_num_rotation',0, 'To matching objects of different poses. This value is shared by all MRF layers. The total number of rotational copies is "2 * mrf_num_rotation + 1"')
52 | cmd:option('-target_num_scale',0, 'To matching objects of different scales. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1"')
53 | cmd:option('-target_sample_stride','2,2', "Stride to sample mrf on style image. This value is defined seperately for each MRF layer. You can use comma-separated values.")
54 | cmd:option('-mrf_confidence_threshold','0,0', "Threshold for filtering out bad matching. Default value 0 means we keep all matchings. This value is defined seperately for all layers. You can use comma-separated values.")
55 | cmd:option('-source_sample_stride','2,2', "Stride to sample mrf on synthesis image. This value is defined seperately for each MRF layer. This settings is relevant only for syn setting. You can use comma-separated values.")
56 |
57 | cmd:option('-content_layers','21', "The layers for content constraint. You can use comma-separated values.")
58 | cmd:option('-content_weight',2e1, "The weight for content constraint. Increasing this value will make the result more content faithful. Decreasing the value will make the method more style faithful. Notice this value should be increase (for example, doubled) if layer 12 is included for MRF constraint.")
59 | cmd:option('-tv_weight',1e-3, "TV smoothness weight")
60 | cmd:option('-scaler', 2, "Relative expansion from example to result. This settings is relevant only for syn setting.")
61 |
62 | cmd:option('-gpu_chunck_size_1',256, "Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G.")
63 | cmd:option('-gpu_chunck_size_2',16, "Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G.")
64 |
65 | -- fixed parameters
66 | cmd:option('-target_step_rotation', math.pi/24)
67 | cmd:option('-target_step_scale', 1.05)
68 | cmd:option('-output_folder', 'data/result/trans/MRF/')
69 |
70 | cmd:option('-proto_file', 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt')
71 | cmd:option('-model_file', 'data/models/VGG_ILSVRC_19_layers.caffemodel')
72 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use')
73 | cmd:option('-nCorrection', 25)
74 | cmd:option('-print_iter', 10)
75 | cmd:option('-save_iter', 10)
76 |
77 | params = cmd:parse(arg)
78 |
79 |
80 | for _,par in pairs({'mrf_layers', 'mrf_weight', 'num_iter', 'mrf_patch_size', 'target_sample_stride', 'mrf_confidence_threshold', 'source_sample_stride', 'content_layers'}) do
81 | params[par] = split(params[par], ',', tonumber)
82 | end
83 |
84 |
85 | local wrapper = nil
86 | if params.type == 'transfer' then
87 | wrapper = require 'transfer_CNNMRF_wrapper'
88 | else
89 | wrapper = require 'syn_CNNMRF_wrapper'
90 | end
91 |
92 | wrapper.main(params)
--------------------------------------------------------------------------------
/data/content/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/0.jpg
--------------------------------------------------------------------------------
/data/content/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/1.jpg
--------------------------------------------------------------------------------
/data/content/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/2.jpg
--------------------------------------------------------------------------------
/data/content/potrait1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/content/potrait1.jpg
--------------------------------------------------------------------------------
/data/examples/0_to_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/0_to_0.png
--------------------------------------------------------------------------------
/data/examples/1_to_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/1_to_1.png
--------------------------------------------------------------------------------
/data/examples/Interpolation/2_morecontent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/2_morecontent.png
--------------------------------------------------------------------------------
/data/examples/Interpolation/2_morecontent2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/2_morecontent2.png
--------------------------------------------------------------------------------
/data/examples/Interpolation/3_balanced.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/3_balanced.png
--------------------------------------------------------------------------------
/data/examples/Interpolation/4_morestyle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/4_morestyle.png
--------------------------------------------------------------------------------
/data/examples/Interpolation/4_morestyle2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/Interpolation/4_morestyle2.png
--------------------------------------------------------------------------------
/data/examples/MultiRes/syn_res_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_1.png
--------------------------------------------------------------------------------
/data/examples/MultiRes/syn_res_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_2.png
--------------------------------------------------------------------------------
/data/examples/MultiRes/syn_res_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_3.png
--------------------------------------------------------------------------------
/data/examples/MultiRes/syn_res_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/MultiRes/syn_res_4.png
--------------------------------------------------------------------------------
/data/examples/content.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/content.jpg
--------------------------------------------------------------------------------
/data/examples/content2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/content2.jpg
--------------------------------------------------------------------------------
/data/examples/style.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/style.jpg
--------------------------------------------------------------------------------
/data/examples/style2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/examples/style2.jpg
--------------------------------------------------------------------------------
/data/models/_gitignore:
--------------------------------------------------------------------------------
1 | VGG_ILSVRC_19_layers.caffemodel
2 | VGG_ILSVRC_19_layers_deploy.prototxt
3 | VGG_ILSVRC_19_layers_deploy.prototxt.lua
4 |
--------------------------------------------------------------------------------
/data/models/download_models.sh:
--------------------------------------------------------------------------------
1 | cd models
2 | wget https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt
3 | wget --no-check-certificate https://bethgelab.org/media/uploads/deeptextures/vgg_normalised.caffemodel
4 | wget http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel
5 | cd ..
6 |
--------------------------------------------------------------------------------
/data/style/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/0.jpg
--------------------------------------------------------------------------------
/data/style/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/1.jpg
--------------------------------------------------------------------------------
/data/style/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/2.jpg
--------------------------------------------------------------------------------
/data/style/picasso.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chuanli11/CNNMRF/fddcf4d01e2a6ce201059d8bc38597f74a09ba3f/data/style/picasso.jpg
--------------------------------------------------------------------------------
/mylib/content.lua:
--------------------------------------------------------------------------------
1 | ------------------------------------------------------------------------
2 | -- ContentLoss
3 | ------------------------------------------------------------------------
4 | local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')
5 | function ContentLoss:__init(strength, target, normalize)
6 | parent.__init(self)
7 | self.strength = strength
8 | self.target = target
9 | self.normalize = normalize or false
10 | self.loss = 0
11 | self.crit = nn.MSECriterion()
12 | end
13 | function ContentLoss:updateOutput(input)
14 | if input:nElement() == self.target:nElement() then
15 | self.loss = self.crit:forward(input, self.target) * self.strength
16 | else
17 | -- print(input:size())
18 | -- print(self.target:size())
19 | -- print('WARNING: Skipping content loss')
20 | end
21 | self.output = input
22 | return self.output
23 | end
24 | function ContentLoss:updateGradInput(input, gradOutput)
25 | if input:nElement() == self.target:nElement() then
26 | self.gradInput = self.crit:backward(input, self.target)
27 | end
28 | if self.normalize then
29 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
30 | end
31 | self.gradInput:mul(self.strength)
32 | self.gradInput:add(gradOutput)
33 | return self.gradInput
34 | end
35 |
36 | function ContentLoss:update(other)
37 | self.strength = other.strength
38 | self.target = other.target
39 | self.normalize = other.normalize
40 | self.loss = other.loss
41 | self.crit = other.crit
42 | end
--------------------------------------------------------------------------------
/mylib/helper.lua:
--------------------------------------------------------------------------------
1 | function computeMRF(input, size, stride, gpu, backend)
2 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride)
3 | local dim_1 = input:size()[1] * size * size
4 | local dim_2 = coord_y:nElement()
5 | local dim_3 = coord_x:nElement()
6 | local t_feature_mrf = torch.Tensor(dim_2 * dim_3, input:size()[1], size, size)
7 |
8 | if gpu >= 0 then
9 | if backend == 'cudnn' then
10 | t_feature_mrf = t_feature_mrf:cuda()
11 | else
12 | t_feature_mrf = t_feature_mrf:cl()
13 | end
14 | end
15 | local count = 1
16 | for i_row = 1, dim_2 do
17 | for i_col = 1, dim_3 do
18 | t_feature_mrf[count] = input[{{1, input:size()[1]}, {coord_y[i_row], coord_y[i_row] + size - 1}, {coord_x[i_col], coord_x[i_col] + size - 1}}]
19 | count = count + 1
20 | end
21 | end
22 | local feature_mrf = t_feature_mrf:resize(dim_2 * dim_3, dim_1)
23 |
24 | return t_feature_mrf, feature_mrf, coord_x, coord_y
25 | end
26 |
27 |
28 | function computeMRFnoTensor(input, size, stride, gpu, backend)
29 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride)
30 | local dim_1 = input:size()[1] * size * size
31 | local dim_2 = coord_y:nElement()
32 | local dim_3 = coord_x:nElement()
33 | local t_feature_mrf = torch.Tensor(dim_2 * dim_3, input:size()[1], size, size)
34 |
35 | if gpu >= 0 then
36 | if backend == 'cudnn' then
37 | t_feature_mrf = t_feature_mrf:cuda()
38 | else
39 | t_feature_mrf = t_feature_mrf:cl()
40 | end
41 | end
42 | local count = 1
43 | for i_row = 1, dim_2 do
44 | for i_col = 1, dim_3 do
45 | t_feature_mrf[count] = input[{{1, input:size()[1]}, {coord_y[i_row], coord_y[i_row] + size - 1}, {coord_x[i_col], coord_x[i_col] + size - 1}}]
46 | count = count + 1
47 | end
48 | end
49 | local feature_mrf = t_feature_mrf:resize(dim_2 * dim_3, dim_1)
50 |
51 | t_feature_mrf = nil
52 | collectgarbage()
53 | return feature_mrf, coord_x, coord_y
54 | end
55 |
56 |
57 | function drill_computeMRFfull(input, size, stride, gpu)
58 | local coord_x, coord_y = computegrid(input:size()[3], input:size()[2], size, stride, 1)
59 | local dim = torch.Tensor(2)
60 | return coord_x, coord_y
61 | end
62 |
63 |
64 | function sampleMRFAndTensorfromLocation2(coord_x, coord_y, input, size, gpu)
65 | local t_feature_mrf = torch.Tensor(coord_x:nElement(), input:size()[1], size, size)
66 | for i_patch = 1, coord_x:nElement() do
67 | t_feature_mrf[i_patch] = input[{{1, input:size()[1]}, {coord_y[i_patch], coord_y[i_patch] + size - 1}, {coord_x[i_patch], coord_x[i_patch] + size - 1}}]
68 | end
69 | local feature_mrf = t_feature_mrf:reshape(coord_x:nElement(), input:size()[1] * size * size)
70 | return t_feature_mrf, feature_mrf
71 | end
72 |
73 |
74 | function computeBB(width, height, alpha)
75 | local min_x, min_y, max_x, max_y
76 | local x1 = 1
77 | local y1 = 1
78 | local x2 = width
79 | local y2 = 1
80 | local x3 = width
81 | local y3 = height
82 | local x4 = 1
83 | local y4 = height
84 | local x0 = width / 2
85 | local y0 = height / 2
86 |
87 | local x1r = x0+(x1-x0)*math.cos(alpha)+(y1-y0)*math.sin(alpha)
88 | local y1r = y0-(x1-x0)*math.sin(alpha)+(y1-y0)*math.cos(alpha)
89 |
90 | local x2r = x0+(x2-x0)*math.cos(alpha)+(y2-y0)*math.sin(alpha)
91 | local y2r = y0-(x2-x0)*math.sin(alpha)+(y2-y0)*math.cos(alpha)
92 |
93 | local x3r = x0+(x3-x0)*math.cos(alpha)+(y3-y0)*math.sin(alpha)
94 | local y3r = y0-(x3-x0)*math.sin(alpha)+(y3-y0)*math.cos(alpha)
95 |
96 | local x4r = x0+(x4-x0)*math.cos(alpha)+(y4-y0)*math.sin(alpha)
97 | local y4r = y0-(x4-x0)*math.sin(alpha)+(y4-y0)*math.cos(alpha)
98 |
99 | -- print(x1r .. ' ' .. y1r .. ' ' .. x2r .. ' ' .. y2r .. ' ' .. x3r .. ' ' .. y3r .. ' ' .. x4r .. ' ' .. y4r)
100 | if alpha > 0 then
101 | -- find intersection P of line [x1, y1]-[x4, y4] and [x1r, y1r]-[x2r, y2r]
102 | local px1 = ((x1 * y4 - y1 * x4) * (x1r - x2r) - (x1 - x4) * (x1r * y2r - y1r * x2r)) / ((x1 - x4) * (y1r - y2r) - (y1 - y4) * (x1r - x2r))
103 | local py1 = ((x1 * y4 - y1 * x4) * (y1r - y2r) - (y1 - y4) * (x1r * y2r - y1r * x2r)) / ((x1 - x4) * (y1r - y2r) - (y1 - y4) * (x1r - x2r))
104 | local px2 = px1 + 1
105 | local py2 = py1
106 | -- print(px1 .. ' ' .. py1)
107 | -- find the intersection Q of line [px1, py1]-[px2, py2] and [x2r, y2r]-[x3r][y3r]
108 |
109 | local qx = ((px1 * py2 - py1 * px2) * (x2r - x3r) - (px1 - px2) * (x2r * y3r - y2r * x3r)) / ((px1 - px2) * (y2r - y3r) - (py1 - py2) * (x2r - x3r))
110 | local qy = ((px1 * py2 - py1 * px2) * (y2r - y3r) - (py1 - py2) * (x2r * y3r - y2r * x3r)) / ((px1 - px2) * (y2r - y3r) - (py1 - py2) * (x2r - x3r))
111 | -- print(qx .. ' ' .. qy)
112 |
113 | min_x = width - qx
114 | min_y = qy
115 | max_x = qx
116 | max_y = height - qy
117 | else if alpha < 0 then
118 | -- find intersection P of line [x2, y2]-[x3, y3] and [x1r, y1r]-[x2r, y2r]
119 | local px1 = ((x2 * y3 - y2 * x3) * (x1r - x2r) - (x2 - x3) * (x1r * y2r - y1r * x2r)) / ((x2 - x3) * (y1r - y2r) - (y2 - y3) * (x1r - x2r))
120 | local py1 = ((x2 * y3 - y1 * x3) * (y1r - y2r) - (y2 - y3) * (x1r * y2r - y1r * x2r)) / ((x2 - x3) * (y1r - y2r) - (y2 - y3) * (x1r - x2r))
121 | local px2 = px1 - 1
122 | local py2 = py1
123 | -- find the intersection Q of line [px1, py1]-[px2, py2] and [x1r, y1r]-[x4r][y4r]
124 | local qx = ((px1 * py2 - py1 * px2) * (x1r - x4r) - (px1 - px2) * (x1r * y4r - y1r * x4r)) / ((px1 - px2) * (y1r - y4r) - (py1 - py2) * (x1r - x4r))
125 | local qy = ((px1 * py2 - py1 * px2) * (y1r - y4r) - (py1 - py2) * (x1r * y4r - y1r * x4r)) / ((px1 - px2) * (y1r - y4r) - (py1 - py2) * (x1r - x4r))
126 | min_x = qx
127 | min_y = qy
128 | max_x = width - min_x
129 | max_y = height - min_y
130 | else
131 | min_x = x1
132 | min_y = y1
133 | max_x = x2
134 | max_y = y3
135 | end
136 | end
137 |
138 | return math.max(math.floor(min_x), 1), math.max(math.floor(min_y), 1), math.floor(max_x), math.floor(max_y)
139 | end
140 |
141 | function computegrid(width, height, block_size, block_stride, flag_all)
142 | local coord_block_y = torch.range(1, height - block_size + 1, block_stride)
143 | if flag_all == 1 then
144 | if coord_block_y[#coord_block_y] < height - block_size + 1 then
145 | local tail = torch.Tensor(1)
146 | tail[1] = height - block_size + 1
147 | coord_block_y = torch.cat(coord_block_y, tail)
148 | end
149 | end
150 | local coord_block_x = torch.range(1, width - block_size + 1, block_stride)
151 | if flag_all == 1 then
152 | if coord_block_x[#coord_block_x] < width - block_size + 1 then
153 | local tail = torch.Tensor(1)
154 | tail[1] = width - block_size + 1
155 | coord_block_x = torch.cat(coord_block_x, tail)
156 | end
157 | end
158 | return coord_block_x, coord_block_y
159 | end
160 |
161 | function preprocess(img)
162 | local mean_pixel = torch.Tensor({103.939, 116.779, 123.68})
163 | local perm = torch.LongTensor{3, 2, 1}
164 | img = img:index(1, perm):mul(256.0)
165 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
166 | img:add(-1, mean_pixel)
167 | return img
168 | end
169 |
170 | -- Undo the above preprocessing.
171 | function deprocess(img)
172 | local mean_pixel = torch.Tensor({103.939, 116.779, 123.68})
173 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
174 | img = img + mean_pixel:float()
175 | local perm = torch.LongTensor{3, 2, 1}
176 | img = img:index(1, perm):div(256.0)
177 | return img
178 | end
179 |
180 | function run_tests(run_type, list_params)
181 | local wrapper = run_type
182 | for i_test = 1, #list_params do
183 | wrapper.run_test(table.unpack(list_params[i_test]))
184 | end
185 | end
--------------------------------------------------------------------------------
/mylib/mrf.lua:
--------------------------------------------------------------------------------
1 | local MRFMM, parent = torch.class('nn.MRFMM', 'nn.Module')
2 |
3 | function MRFMM:__init()
4 | parent.__init(self)
5 | end
6 |
7 | function MRFMM:implement(mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, dW, dH, threshold_conf, strength, gpu_chunck_size_1, gpu_chunck_size_2, backend, gpu)
8 | self.target_mrf = target_mrf:clone()
9 | self.target_mrfnorm = target_mrfnorm:clone()
10 | self.source_x = source_x
11 | self.source_y = source_y
12 | self.input_size = input_size
13 | self.nInputPlane = nInputPlane
14 | self.nOutputPlane = nOutputPlane
15 | self.kW = kW
16 | self.kH = kH
17 | self.dW = dW
18 | self.dH = dH
19 | self.threshold_conf = threshold_conf
20 | self.strength = strength
21 | self.padW = padW or 0
22 | self.padH = padH or self.padW
23 | self.bias = torch.Tensor(nOutputPlane):fill(0)
24 | self.backend = backend
25 | self.gpu = gpu
26 | if self.gpu >= 0 then
27 | if self.backend == 'cudnn' then
28 | self.bias = self.bias:cuda()
29 | else
30 | self.bias = self.bias:cl()
31 | end
32 | end
33 | self.gradTO = torch.Tensor(input_size[1], input_size[2], input_size[3])
34 | self.gradTO_confident = torch.Tensor(input_size[2], input_size[3])
35 | self.response = torch.Tensor(response_size[1], response_size[2], response_size[3])
36 | self.mode = mode -- memory or speed
37 | self.gpu_chunck_size_1 = gpu_chunck_size_1
38 | self.gpu_chunck_size_2 = gpu_chunck_size_2
39 | self.tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm, 1, self.gpu_chunck_size_2, input_size[3] - (kW - 1))
40 |
41 | if self.mode == 'speed' then
42 | if self.backend == 'cudnn' then
43 | self.target_mrf = self.target_mrf:cuda()
44 | self.target_mrfnorm = self.target_mrfnorm:cuda()
45 | self.tensor_target_mrfnorm = self.tensor_target_mrfnorm:cuda()
46 | self.gradTO = self.gradTO:cuda()
47 | self.gradTO_confident = self.gradTO_confident:cuda()
48 | self.response = self.response:cuda()
49 | else
50 | self.target_mrf = self.target_mrf:cl()
51 | self.target_mrfnorm = self.target_mrfnorm:cl()
52 | self.tensor_target_mrfnorm = self.tensor_target_mrfnorm:cl()
53 | self.gradTO = self.gradTO:cl()
54 | self.gradTO_confident = self.gradTO_confident:cl()
55 | self.response = self.response:cl()
56 | end
57 | end
58 |
59 | --[[print('***********************************')
60 | print('mrf layer: ')
61 | print('***********************************')
62 | print(self.target_mrf:size())
63 | print(self.tensor_target_mrf:size())
64 | print(self.tensor_target_mrfnorm:size())
65 | print(self.source_x)
66 | print(self.source_y)
67 | print(self.nInputPlane)
68 | print(self.nOutputPlane)
69 | print(self.kW)
70 | print(self.kH)
71 | print(self.strength)
72 | print(self.mode)--]]
73 | end
74 |
75 |
76 | local function makeContiguous(self, input, gradOutput)
77 | if not input:isContiguous() then
78 | print('not contiguous, make it so')
79 | self._input = self._input or input.new()
80 | self._input:resizeAs(input):copy(input)
81 | input = self._input
82 | end
83 | if gradOutput then
84 | if not gradOutput:isContiguous() then
85 | self._gradOutput = self._gradOutput or gradOutput.new()
86 | self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
87 | gradOutput = self._gradOutput
88 | end
89 | end
90 | return input, gradOutput
91 | end
92 |
93 | function MRFMM:updateOutput(input)
94 | input = makeContiguous(self, input)
95 | self.output = input:clone()
96 | return self.output
97 | end
98 |
99 | function MRFMM:updateGradInput(input, gradOutput)
100 |
101 | -- local timer_ALL = torch.Timer()
102 |
103 | -- local timer_PREP = torch.Timer()
104 | input = makeContiguous(self, input)
105 | self.gradTO = self.gradTO:fill(0)
106 | self.gradTO_confident = self.gradTO_confident:fill(0) + 1e-10
107 | local source_mrf, x, y = computeMRFnoTensor(input:float(), self.kW, 1, self.mode == 'memory' and -1 or 1, self.backend)
108 | local source_mrfnorm = torch.Tensor(source_mrf:size()[1])
109 | if self.mode == 'speed' then
110 | if self.backend == 'cudnn' then
111 | source_mrfnorm = torch.sqrt(torch.sum(torch.cmul(source_mrf, source_mrf), 2)):resize(1, y:nElement(), x:nElement())
112 | else
113 | for i_source = 1, source_mrf:size()[1] do
114 | source_mrfnorm[i_source] = torch.sqrt(torch.sum(torch.cmul(source_mrf[i_source], source_mrf[i_source])))
115 | end
116 | source_mrfnorm = source_mrfnorm:resize(1, y:nElement(), x:nElement())
117 | end
118 | else
119 | source_mrfnorm = torch.sqrt(torch.sum(torch.cmul(source_mrf, source_mrf), 2)):resize(1, y:nElement(), x:nElement())
120 | end
121 | local tensor_source_mrfnorm = torch.repeatTensor(source_mrfnorm, self.gpu_chunck_size_1, 1, 1)
122 | if self.gpu >= 0 then
123 | if self.backend == 'cudnn' then
124 | tensor_source_mrfnorm = tensor_source_mrfnorm:cuda()
125 | else
126 | tensor_source_mrfnorm = tensor_source_mrfnorm:cl()
127 | end
128 | end
129 | local nOutputPlane_all = self.nOutputPlane -- hacked for memory safety
130 | local num_chunk = math.ceil(nOutputPlane_all / self.gpu_chunck_size_1)
131 | -- local t_prep = timer_PREP:time().real
132 |
133 | -- local timer_MATCH = torch.Timer()
134 | -- local t_io = 0
135 | -- local t_conv = 0
136 | -- local t_clone = 0
137 | for i_chunk = 1, num_chunk do
138 | local i_start = (i_chunk - 1) * self.gpu_chunck_size_1 + 1
139 | local i_end = math.min(i_start + self.gpu_chunck_size_1 - 1, nOutputPlane_all)
140 |
141 | -- local timer_CLONE = torch.Timer()
142 | self.weight = self.target_mrf[{{i_start, i_end}, {1, self.target_mrf:size()[2]}}]
143 | -- t_clone = t_clone + timer_CLONE:time().real
144 |
145 | if self.mode == 'memory' then
146 | -- local timer_IO = torch.Timer()
147 | if self.gpu >= 0 then
148 | if self.backend == 'cudnn' then
149 | self.weight = self.weight:cuda()
150 | else
151 | self.weight = self.weight:cl()
152 | end
153 | end
154 | -- t_io = t_io + timer_IO:time().real
155 | end
156 | self.nOutputPlane = i_end - i_start + 1
157 |
158 | -- local timer_CONV = torch.Timer()
159 | --local temp = input.nn.SpatialConvolutionMM_updateOutput(self, input)
160 | -- t_conv = t_conv + timer_CONV:time().real
161 | local subBias = self.bias:sub(i_start, i_end)
162 | if self.gpu < 0 then
163 | self.finput = torch.Tensor()
164 | self.fgradInput = torch.Tensor()
165 | end
166 |
167 | input.THNN.SpatialConvolutionMM_updateOutput(
168 | input:cdata(),
169 | self.output:cdata(),
170 | self.weight:cdata(),
171 | subBias:cdata(),
172 | self.finput:cdata(),
173 | self.fgradInput:cdata(),
174 | self.kW, self.kH,
175 | self.dW, self.dH,
176 | self.padW, self.padH
177 | )
178 | local temp = self.output
179 |
180 | -- normalize w.r.t source_mrfnorm
181 | if i_chunk < num_chunk then
182 | temp = temp:cdiv(tensor_source_mrfnorm)
183 | else
184 | temp = temp:cdiv(tensor_source_mrfnorm[{{1, i_end - i_start + 1}, {1, temp:size()[2]}, {1, temp:size()[3]}}])
185 | end
186 |
187 | if self.mode == 'memory' then
188 | -- local timer_IO = torch.Timer()
189 | temp = temp:float()
190 | -- t_io = t_io + timer_IO:time().real
191 | end
192 | self.response[{{i_start, i_end}, {1, self.response:size()[2]}, {1, self.response:size()[3]}}] = temp
193 | end
194 |
195 | local num_chunk_2 = math.ceil(self.response:size()[2] / self.gpu_chunck_size_2)
196 | for i_chunk_2 = 1, num_chunk_2 do
197 | local i_start = (i_chunk_2 - 1) * self.gpu_chunck_size_2 + 1
198 | local i_end = math.min(i_start + self.gpu_chunck_size_2 - 1, self.response:size()[2])
199 | if i_chunk_2 < num_chunk_2 then
200 | self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}] = self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}]:cdiv(self.tensor_target_mrfnorm)
201 | else
202 | self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}] = self.response[{{1, self.response:size()[1]}, {i_start, i_end}, {1, self.response:size()[3]}}]:cdiv(self.tensor_target_mrfnorm[{{1, self.response:size()[1]}, {1, i_end - i_start + 1}, {1, self.response:size()[3]}}])
203 | end
204 | end
205 |
206 | -- local timer_AFT = torch.Timer()
207 | local max_response, max_id = torch.max(self.response, 1)
208 | -- local t_aft = timer_AFT:time().real
209 |
210 | -- local t_match = timer_MATCH:time().real
211 |
212 | -- local timer_SYN = torch.Timer()
213 | source_mrf = source_mrf:resize(source_mrf:size()[1], self.nInputPlane, self.kW, self.kH)
214 | self.target_mrf = self.target_mrf:resize(self.target_mrf:size()[1], self.nInputPlane, self.kW, self.kH)
215 | for i_patch = 1, self.source_x:nElement() do
216 | local sel_response = max_response[1][self.source_y[i_patch]][self.source_x[i_patch]]
217 | if sel_response >= self.threshold_conf then
218 | local sel_idx = max_id[1][self.source_y[i_patch]][self.source_x[i_patch]]
219 | local source_idx = (self.source_y[i_patch] - 1) * x:nElement() + self.source_x[i_patch]
220 | self.gradTO[{{1, self.nInputPlane}, {self.source_y[i_patch], self.source_y[i_patch] + self.kH - 1}, {self.source_x[i_patch], self.source_x[i_patch] + self.kW - 1}}]:add(self.target_mrf[sel_idx] - source_mrf[source_idx])
221 | self.gradTO_confident[{{self.source_y[i_patch], self.source_y[i_patch] + self.kH - 1}, {self.source_x[i_patch], self.source_x[i_patch] + self.kW - 1}}]:add(1)
222 | end
223 | end
224 | self.gradTO:cdiv(torch.repeatTensor(self.gradTO_confident, self.nInputPlane, 1, 1))
225 | self.nOutputPlane = nOutputPlane_all
226 | self.target_mrf = self.target_mrf:resize(self.target_mrf:size()[1], self.nInputPlane * self.kW * self.kH)
227 | -- local t_syn = timer_SYN:time().real
228 |
229 | if gradOutput:size()[1] == input:size()[1] then
230 | if self.gpu >= 0 then
231 | if self.backend == 'cudnn' then
232 | self.gradInput = gradOutput:clone() + self.gradTO:cuda() * self.strength * (-1)
233 | else
234 | self.gradInput = gradOutput:clone() + self.gradTO:cl() * self.strength * (-1)
235 | end
236 | else
237 | self.gradInput = gradOutput:clone() + self.gradTO * self.strength * (-1)
238 | end
239 | else
240 | self.gradInput = self.gradTO * self.strength * (-1)
241 | end
242 |
243 | -- local t_all = timer_ALL:time().real
244 | -- print('t_all: ' .. t_all .. ', t_prep: ' .. t_prep .. ', t_match: ' .. t_match .. ', t_io: ' .. t_io .. ', t_conv: ' .. t_conv .. ', t_aft: ' .. t_aft .. ', t_syn: ' .. t_syn)
245 | -- print('t_all: ' .. t_all .. ', t_prep: ' .. t_prep/t_all .. ', t_match: ' .. t_match/t_all .. ', t_io: ' .. t_io/t_all .. ', t_conv: ' .. t_conv/t_all .. ', t_aft: ' .. t_aft/t_all .. ', t_syn: ' .. t_syn/t_all)
246 | -- print('**************************************************************************************************')
247 | -- print('t_all: ' .. t_all .. ', t_clone: ' .. t_clone/t_match .. ', t_io: ' .. t_io/t_match .. ', t_conv: ' .. t_conv/t_match .. ', t_aft: ' .. t_aft/t_match)
248 | -- print('t_all: ' .. t_all .. ', t_clone: ' .. t_clone .. ', t_io: ' .. t_io .. ', t_conv: ' .. t_conv .. ', t_aft: ' .. t_aft)
249 | -- tensor_source_mrf = nil
250 | source_mrf = nil
251 | source_mrfnorm = nil
252 | tensor_source_mrfnorm = nil
253 | collectgarbage()
254 | return self.gradInput
255 | end
256 |
257 | function MRFMM:type(type)
258 | self.finput = torch.Tensor()
259 | self.fgradInput = torch.Tensor()
260 | return parent.type(self,type)
261 | end
262 |
--------------------------------------------------------------------------------
/mylib/myoptimizer.lua:
--------------------------------------------------------------------------------
1 | ------------------------------------------------------------------------
2 | -- mylbfgs
3 | ------------------------------------------------------------------------
4 | function mylbfgs(opfunc, x, config, state, mask)
5 | -- get/update state
6 | local config = config or {}
7 | local state = state or config
8 | local maxIter = tonumber(config.maxIter) or 20
9 | local maxEval = tonumber(config.maxEval) or maxIter*1.25
10 | local tolFun = config.tolFun or 1e-5
11 | local tolX = config.tolX or 1e-9
12 | local nCorrection = config.nCorrection or 100
13 | local lineSearch = config.lineSearch
14 | local lineSearchOpts = config.lineSearchOptions
15 | local learningRate = config.learningRate or 1
16 | local isverbose = config.verbose or false
17 |
18 |
19 | state.funcEval = state.funcEval or 0
20 | state.nIter = state.nIter or 0
21 | -- verbose function
22 | local function verbose(...)
23 | if isverbose then print(' ', ...) end
24 | end
25 |
26 | -- import some functions
27 | local zeros = torch.zeros
28 | local randn = torch.randn
29 | local append = table.insert
30 | local abs = math.abs
31 | local min = math.min
32 |
33 | -- evaluate initial f(x) and df/dx
34 | local f,g = opfunc(x)
35 | g:cmul(mask) -- add by chris
36 | local f_hist = {f}
37 | local currentFuncEval = 1
38 | state.funcEval = state.funcEval + 1
39 |
40 | -- check optimality of initial point
41 | state.tmp1 = state.abs_g or zeros(g:size()); local tmp1 = state.tmp1
42 | tmp1:copy(g):abs()
43 | if tmp1:sum() <= tolFun then
44 | -- optimality condition below tolFun
45 | verbose('optimality condition below tolFun')
46 | return x,f_hist
47 | end
48 |
49 | -- variables cached in state (for tracing)
50 | local d = state.d
51 | local t = state.t
52 | local old_dirs = state.old_dirs
53 | local old_stps = state.old_stps
54 | local Hdiag = state.Hdiag
55 | local g_old = state.g_old
56 | local f_old = state.f_old
57 |
58 | -- optimize for a max of maxIter iterations
59 | local nIter = 0
60 | while nIter < maxIter do
61 | -- keep track of nb of iterations
62 | nIter = nIter + 1
63 | state.nIter = state.nIter + 1
64 | -- print(state.nIter)
65 | ------------------------------------------------------------
66 | -- compute gradient descent direction
67 | ------------------------------------------------------------
68 | if state.nIter == 1 then
69 | d = g:clone():mul(-1) -- -g
70 | old_dirs = {}
71 | old_stps = {}
72 | Hdiag = 1
73 | else
74 | -- do lbfgs update (update memory)
75 | local y = g:clone():add(-1, g_old) -- g - g_old
76 | local s = d:clone():mul(t) -- d*t
77 | local ys = y:dot(s) -- y*s
78 |
79 | if ys > 1e-10 then
80 | -- updating memory
81 | if #old_dirs == nCorrection then
82 | -- shift history by one (limited-memory)
83 | local prev_old_dirs = old_dirs
84 | local prev_old_stps = old_stps
85 | old_dirs = {}
86 | old_stps = {}
87 | for i = 2,#prev_old_dirs do
88 | append(old_dirs, prev_old_dirs[i])
89 | append(old_stps, prev_old_stps[i])
90 | end
91 | end
92 |
93 | -- store new direction/step
94 | append(old_dirs, s)
95 | append(old_stps, y)
96 |
97 | -- update scale of initial Hessian approximation
98 | Hdiag = ys / y:dot(y) -- (y*y)
99 |
100 | -- cleanup
101 | collectgarbage()
102 | end
103 |
104 | -- compute the approximate (L-BFGS) inverse Hessian
105 | -- multiplied by the gradient
106 | local p = g:size(1)
107 | local k = #old_dirs
108 |
109 | state.ro = state.ro or zeros(nCorrection); local ro = state.ro
110 | for i = 1,k do
111 | ro[i] = 1 / old_stps[i]:dot(old_dirs[i])
112 | end
113 |
114 | state.q = state.q or zeros(nCorrection+1,p):typeAs(g)
115 | local q = state.q
116 | state.r = state.r or zeros(nCorrection+1,p):typeAs(g)
117 | local r = state.r
118 | state.al = state.al or zeros(nCorrection):typeAs(g)
119 | local al = state.al
120 | state.be = state.be or zeros(nCorrection):typeAs(g)
121 | local be = state.be
122 |
123 | q[k+1] = g:clone():mul(-1) -- -g
124 |
125 | for i = k,1,-1 do
126 | al[i] = old_dirs[i]:dot(q[i+1]) * ro[i]
127 | q[i] = q[i+1]
128 | q[i]:add(-al[i], old_stps[i])
129 | end
130 |
131 | -- multiply by initial Hessian
132 | r[1] = q[1]:clone():mul(Hdiag) -- q[1] * Hdiag
133 |
134 | for i = 1,k do
135 | be[i] = old_stps[i]:dot(r[i]) * ro[i]
136 | r[i+1] = r[i]
137 | r[i+1]:add((al[i] - be[i]), old_dirs[i])
138 | end
139 |
140 | -- final direction:
141 | d:copy(r[k+1])
142 | end -- end if state.nIter == 1 then
143 |
144 | g_old = g:clone()
145 | f_old = f
146 |
147 | ------------------------------------------------------------
148 | -- compute step length
149 | ------------------------------------------------------------
150 | -- directional derivative
151 | local gtd = g:dot(d) -- g * d
152 |
153 | -- check that progress can be made along that direction
154 | if gtd > -tolX then
155 | break
156 | end
157 |
158 | -- reset initial guess for step size
159 | if state.nIter == 1 then
160 | tmp1:copy(g):abs()
161 | t = min(1,1/tmp1:sum()) * learningRate
162 | else
163 | t = learningRate
164 | end
165 |
166 | -- optional line search: user function
167 | local lsFuncEval = 0
168 | if lineSearch and type(lineSearch) == 'function' then
169 | -- perform line search, using user function
170 | f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,lineSearchOpts)
171 | append(f_hist, f)
172 | else
173 | -- no line search, simply move with fixed-step
174 | x:add(t,d)
175 | if nIter ~= maxIter then
176 | -- re-evaluate function only if not in last iteration
177 | -- the reason we do this: in a stochastic setting,
178 | -- no use to re-evaluate that function here
179 | f,g = opfunc(x)
180 | g:cmul(mask) -- add by chris
181 | lsFuncEval = 1
182 | append(f_hist, f)
183 | end
184 | end
185 |
186 | -- update func eval
187 | currentFuncEval = currentFuncEval + lsFuncEval
188 | state.funcEval = state.funcEval + lsFuncEval
189 |
190 | ------------------------------------------------------------
191 | -- check conditions
192 | ------------------------------------------------------------
193 | if nIter == maxIter then
194 | -- no use to run tests
195 | verbose('reached max number of iterations')
196 | break
197 | end
198 |
199 | if currentFuncEval >= maxEval then
200 | -- max nb of function evals
201 | verbose('max nb of function evals')
202 | break
203 | end
204 |
205 | tmp1:copy(g):abs()
206 | if tmp1:sum() <= tolFun then
207 | -- check optimality
208 | verbose('optimality condition below tolFun')
209 | break
210 | end
211 |
212 | tmp1:copy(d):mul(t):abs()
213 | if tmp1:sum() <= tolX then
214 | -- step size below tolX
215 | verbose('step size below tolX')
216 | break
217 | end
218 |
219 | if abs(f-f_old) < tolX then
220 | -- function value changing less than tolX
221 | verbose('function value changing less than tolX')
222 | break
223 | end
224 | end -- end while nIter < maxIter do
225 |
226 | -- save state
227 | state.old_dirs = old_dirs
228 | state.old_stps = old_stps
229 | state.Hdiag = Hdiag
230 | state.g_old = g_old
231 | state.f_old = f_old
232 | state.t = t
233 | state.d = d
234 |
235 | -- return optimal x, and history of f(x)
236 | return x,f_hist,currentFuncEval
237 | end
--------------------------------------------------------------------------------
/mylib/style.lua:
--------------------------------------------------------------------------------
1 | ------------------------------------------------------------------------
2 | -- StyleLoss
3 | ------------------------------------------------------------------------
4 | -- Returns a network that computes the CxC Gram matrix from inputs
5 | -- of size C x H x W
6 | function GramMatrix()
7 | local net = nn.Sequential()
8 | net:add(nn.View(-1):setNumInputDims(2))
9 | local concat = nn.ConcatTable()
10 | concat:add(nn.Identity())
11 | concat:add(nn.Identity())
12 | net:add(concat)
13 | net:add(nn.MM(false, true))
14 | return net
15 | end
16 |
17 | local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')
18 |
19 | function StyleLoss:__init(strength, target, normalize)
20 | parent.__init(self)
21 | self.normalize = normalize or false
22 | self.strength = strength
23 | self.target = target
24 | self.loss = 0
25 |
26 | self.gram = GramMatrix()
27 | self.G = nil
28 | self.crit = nn.MSECriterion()
29 | end
30 |
31 | function StyleLoss:updateOutput(input)
32 | self.G = self.gram:forward(input)
33 | self.G:div(input:nElement())
34 | self.loss = self.crit:forward(self.G, self.target)
35 | self.loss = self.loss * self.strength
36 | self.output = input
37 | return self.output
38 | end
39 |
40 | function StyleLoss:updateGradInput(input, gradOutput)
41 | local dG = self.crit:backward(self.G, self.target)
42 | dG:div(input:nElement())
43 | self.gradInput = self.gram:backward(input, dG)
44 | if self.normalize then
45 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
46 | end
47 | self.gradInput:mul(self.strength)
48 | self.gradInput:add(gradOutput)
49 | return self.gradInput
50 | end
--------------------------------------------------------------------------------
/mylib/tv.lua:
--------------------------------------------------------------------------------
1 | local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')
2 |
3 | function TVLoss:__init(strength)
4 | parent.__init(self)
5 | self.strength = strength
6 | self.x_diff = torch.Tensor()
7 | self.y_diff = torch.Tensor()
8 | end
9 |
10 | ------------------------------------------------------------------------
11 | -- TVLoss
12 | ------------------------------------------------------------------------
13 | function TVLoss:updateOutput(input)
14 | self.output = input
15 | return self.output
16 | end
17 |
18 | -- TV loss backward pass inspired by kaishengtai/neuralart
19 | function TVLoss:updateGradInput(input, gradOutput)
20 | self.gradInput:resizeAs(input):zero()
21 | local C, H, W = input:size(1), input:size(2), input:size(3)
22 | self.x_diff:resize(3, H - 1, W - 1)
23 | self.y_diff:resize(3, H - 1, W - 1)
24 | self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
25 | self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
26 | self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
27 | self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
28 | self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
29 | self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
30 | self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
31 | self.gradInput:mul(self.strength)
32 | self.gradInput:add(gradOutput)
33 | return self.gradInput
34 | end
--------------------------------------------------------------------------------
/run_syn.lua:
--------------------------------------------------------------------------------
1 | require 'paths'
2 | paths.dofile('mylib/helper.lua')
3 |
4 | -----------------------------------------
5 | -- Parameters:
6 | -----------------------------------------
7 | -- content_name: the content image located in folder "data/content". Notice for free synthesis this image is only used for initialization (and only when "ini_method" is set to "image")
8 | -- style_name: the style image located in folder "data/style"
9 | -- ini_method: initial method, set to "image" to use the content image as the initialization; set to "random" to use random noise (of the same size as the content image).
10 | -- max_size: maximum size of the synthesis image. Default value 384. Larger image needs more time and memory.
11 | -- scaler: relative expansion from example to result. Default value 2.
12 | -- num_res: number of resolutions. Default value 3. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize.
13 | -- num_iter: number of iterations for each resolution. Default value 100 for all resolutions.
14 |
15 | -- mrf_layers: the layers for MRF constraint. Usualy layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost.
16 | -- mrf_weight: weight for each MRF layer. Default value 1e-4. For free texture synthesis it can be seen as the "learning rate" in gradient decent.
17 | -- mrf_patch_size: the patch size for MRF constraint. Default value 3. This value is defined seperately for each MRF layer.
18 | -- mrf_num_rotation: To matching objects of different poses. Default value 0. This value is shared by all MRF layers. The total number of rotatoinal copies is "2 * mrf_num_rotation + 1"
19 | -- mrf_num_scale: To matching objects of different scales. Default value 0. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1"
20 | -- mrf_sample_stride: stride to sample mrf on style image. Default value 2. This value is defined seperately for each MRF layer.
21 | -- mrf_synthesis_stride: stride to sample mrf on synthesis image. Default value 2. This value is defined seperately for each MRF layer.
22 | -- mrf_confidence_threshold: threshold for filtering out bad matching. Default value 0 -- means we keep all matchings. This value is defined seperately for all layers.
23 |
24 | -- tv_weight: TV smoothness weight. Default value 1e-3.
25 |
26 | -- mode: speed or memory. Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory.
27 | -- gpu_chunck_size_1: Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G.
28 | -- gpu_chunck_size_2: Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G.
29 | -- backend: Use 'cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL.
30 |
31 | -----------------------------------------
32 | -- Reference tests
33 | -----------------------------------------
34 | -- speed mode V.S. memory mode (Titan X 12G)
35 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 131 seconds
36 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'} -- 172 seconds
37 |
38 | -- speed mode V.S. memory mode (Geforce GT750M 2G)
39 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 552 seconds (gpu streching, not recommended)
40 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 1506 seconds
41 |
42 | -- speed mode V.S. memory mode (Sapphire Radeon R9 280 3G)
43 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'speed', 256, 16, 'clnn'}, -- 193 seconds (240 seconds total)
44 | -- {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'clnn'}, -- 175 seconds (216 seconds total)
45 | local list_params = {
46 | {'2', '2', 'random', 384, 2, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, 1e-3, 'memory', 256, 16, 'cudnn'},
47 | }
48 |
49 | run_tests(require 'syn_CNNMRF_wrapper', list_params)
--------------------------------------------------------------------------------
/run_trans.lua:
--------------------------------------------------------------------------------
1 | require 'paths'
2 | paths.dofile('mylib/helper.lua')
3 |
4 | -----------------------------------------
5 | -- Parameters:
6 | -----------------------------------------
7 | -- content_name: the content image located in folder "data/content"
8 | -- style_name: the style image located in folder "data/style"
9 | -- ini_method: initial method, set to "image" to use the content image as the initialization; set to "random" to use random noise.
10 | -- max_size: maximum size of the synthesis image. Default value 384. Larger image needs more time and memory.
11 | -- num_res: number of resolutions. Default value 3. Notice the lowest resolution image should be larger than the patch size otherwise it won't synthesize.
12 | -- num_iter: number of iterations for each resolution. Default value 100 for all resolutions.
13 |
14 | -- mrf_layers: the layers for MRF constraint. Usualy layer 21 alone already gives decent results. Including layer 12 may improve the results but at significantly more computational cost.
15 | -- mrf_weight: weight for each MRF layer. Default value 1e-4. Higher weights leads to more style faithful results.
16 | -- mrf_patch_size: the patch size for MRF constraint. Default value 3. This value is defined seperately for each MRF layer.
17 | -- mrf_num_rotation: To matching objects of different poses. Default value 0. This value is shared by all MRF layers. The total number of rotatoinal copies is "2 * mrf_num_rotation + 1"
18 | -- mrf_num_scale: To matching objects of different scales. Default value 0. This value is shared by all MRF layers. The total number of scaled copies is "2 * mrf_num_scale + 1"
19 | -- mrf_sample_stride: stride to sample mrf on style image. Default value 2. This value is defined seperately for each MRF layer.
20 | -- mrf_synthesis_stride: stride to sample mrf on synthesis image. Default value 2. This value is defined seperately for each MRF layer.
21 | -- mrf_confidence_threshold: threshold for filtering out bad matching. Default value 0 -- means we keep all matchings. This value is defined seperately for all layers.
22 |
23 | -- content_layers: the layers for content constraint. Default value 23.
24 | -- content_weight: The weight for content constraint. Default value 2e1. Increasing this value will make the result more content faithful. Decreasing the value will make the method more style faithful. Notice this value should be increase (for example, doubled) if layer 12 is included for MRF constraint,
25 |
26 | -- tv_weight: TV smoothness weight. Default value 1e-3.
27 |
28 | -- mode: speed or memory. Try 'speed' if you have a GPU with more than 4GB memory, and try 'memory' otherwise. The 'speed' mode is significantly faster (especially for synthesizing high resolutions) at the cost of higher GPU memory.
29 | -- gpu_chunck_size_1: Size of chunks to split feature maps along the channel dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 256 for Titan X, and 32 for Geforce GT750M 2G.
30 | -- gpu_chunck_size_2: Size of chuncks to split feature maps along the y dimension. This is to save memory when normalizing the matching score in mrf layers. Use large value if you have large gpu memory. As reference we use 16 for Titan X, and 2 for Geforce GT750M 2G.
31 | -- backend: Use 'cudnn' for CUDA-enabled GPUs or 'clnn' for OpenCL.
32 |
33 | -----------------------------------------
34 | -- Reference tests
35 | -----------------------------------------
36 | -- speed mode V.S. memory mode (Titan X 12G)
37 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 101 seconds
38 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 283 seconds
39 |
40 | -- speed mode V.S. memory mode (Geforce GT750M 2G)
41 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- 570 seconds (gpu streching, not recommended)
42 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'cudnn'}, -- 973 seconds
43 |
44 | -- speed mode V.S. memory mode (Sapphire Radeon R9 280 3G)
45 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'clnn'}, -- 301 seconds (346 seconds total)
46 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'clnn'}, -- 6500 seconds (7032 seconds total)
47 |
48 | -- style interpolation (high resolution with Titan X 12G):
49 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- balanced
50 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 4e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- more content
51 | -- {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- more style
52 |
53 | -- style interpolation (low resolution with Geforce GT750M 2G):
54 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- balanced
55 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 4e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- more content
56 | -- {'potrait1', 'picasso', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- more style
57 |
58 | -- other
59 | -- {'0', '0', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- Titan X 12G: 145 seconds
60 | -- {'1', '1', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 256, 16, 'cudnn'}, -- Titan X 12G: 146 seconds
61 | -- {'0', '0', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {3, 3}, {2, 2}, {0, 0}, {23}, 1e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- Geforce GT750M 2G: 593 seconds
62 | -- {'1', '1', 'image', 256, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {3, 3}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 32, 2, 'cudnn'}, -- Geforce GT750M 2G: 623 seconds
63 |
64 |
65 | local list_params = {
66 | {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'},
67 | {'0', '0', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'speed', 256, 16, 'cudnn'},
68 | {'1', '1', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 3, 3, {2, 2}, {2, 2}, {0, 0}, {23}, 0.5e1, 1e-3, 'speed', 256, 16, 'cudnn'},
69 | {'potrait1', 'picasso', 'image', 384, 3, {100, 100, 100}, {12, 21}, {1e-4, 1e-4}, {3, 3}, 1, 1, {2, 2}, {2, 2}, {0, 0}, {23}, 2e1, 1e-3, 'memory', 256, 16, 'clnn'},
70 | }
71 |
72 | run_tests(require 'transfer_CNNMRF_wrapper', list_params)
--------------------------------------------------------------------------------
/syn_CNNMRF_wrapper.lua:
--------------------------------------------------------------------------------
1 | require 'torch'
2 | require 'nn'
3 | require 'image'
4 | require 'paths'
5 | require 'loadcaffe'
6 |
7 | paths.dofile('mylib/myoptimizer.lua')
8 | paths.dofile('mylib/tv.lua')
9 | paths.dofile('mylib/mrf.lua')
10 | paths.dofile('mylib/helper.lua')
11 |
12 | torch.setdefaulttensortype('torch.FloatTensor') -- float as default tensor type
13 |
14 | local function main(params)
15 | os.execute('mkdir data/result/')
16 | os.execute('mkdir data/result/freesyn/')
17 | os.execute('mkdir data/result/freesyn/MRF/')
18 | os.execute(string.format('mkdir %s', params.output_folder))
19 |
20 | local net = nn.Sequential()
21 | local i_net_layer = 0
22 | local num_calls = 0
23 | local next_mrf_idx = 1
24 | local mrf_losses = {}
25 | local mrf_layers = {}
26 | local i_mrf_layer = 0
27 | local input_image
28 | local output_image
29 | local cur_res
30 | local mrf_layers_pretrained = params.mrf_layers
31 |
32 | -----------------------------------------------------------------------------------
33 | -- read images
34 | -----------------------------------------------------------------------------------
35 | local source_image = image.load(string.format('data/content/%s.jpg', params.content_name), 3)
36 | local target_image = image.load(string.format('data/style/%s.jpg', params.style_name), 3)
37 |
38 | source_image = image.scale(source_image, params.max_size, 'bilinear')
39 | target_image = image.scale(target_image, math.floor(params.max_size / params.scaler), 'bilinear')
40 |
41 | local render_height = source_image:size()[2]
42 | local render_width = source_image:size()[3]
43 | local source_image_caffe = preprocess(source_image):float()
44 | local target_image_caffe = preprocess(target_image):float()
45 |
46 | local pyramid_source_image_caffe = {}
47 | for i_res = 1, params.num_res do
48 | pyramid_source_image_caffe[i_res] = image.scale(source_image_caffe, math.ceil(source_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(source_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear')
49 | end
50 |
51 | local pyramid_target_image_caffe = {}
52 | for i_res = 1, params.num_res do
53 | pyramid_target_image_caffe[i_res] = image.scale(target_image_caffe, math.ceil(target_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(target_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear')
54 | end
55 |
56 | -- --------------------------------------------------------------------------------------------------------
57 | -- -- local function for adding a mrf layer, with image rotation andn scaling
58 | -- --------------------------------------------------------------------------------------------------------
59 | local function add_mrf()
60 | local mrf_module = nn.MRFMM()
61 | i_mrf_layer = i_mrf_layer + 1
62 | i_net_layer = i_net_layer + 1
63 | next_mrf_idx = next_mrf_idx + 1
64 | if params.gpu >= 0 then
65 | if params.backend == 'cudnn' then
66 | mrf_module:cuda()
67 | else
68 | mrf_module:cl()
69 | end
70 | end
71 | net:add(mrf_module)
72 | table.insert(mrf_losses, mrf_module)
73 | table.insert(mrf_layers, i_mrf_layer, i_net_layer)
74 | return true
75 | end
76 |
77 | local function build_mrf(id_mrf)
78 | --------------------------------------------------------
79 | -- deal with target
80 | --------------------------------------------------------
81 | local target_images_caffe = {}
82 | for i_r = -params.target_num_rotation, params.target_num_rotation do
83 | local alpha = params.target_step_rotation * i_r
84 | local min_x, min_y, max_x, max_y = computeBB(pyramid_target_image_caffe[cur_res]:size()[3], pyramid_target_image_caffe[cur_res]:size()[2], alpha)
85 | local target_image_rt_caffe = image.rotate(pyramid_target_image_caffe[cur_res], alpha, 'bilinear')
86 | target_image_rt_caffe = target_image_rt_caffe[{{1, target_image_rt_caffe:size()[1]}, {min_y, max_y}, {min_x, max_x}}]
87 |
88 | for i_s = -params.target_num_scale, params.target_num_scale do
89 | local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s))
90 | local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear')
91 | if params.gpu >= 0 then
92 | if params.backend == 'cudnn' then
93 | target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
94 | else
95 | target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
96 | end
97 | end
98 | table.insert(target_images_caffe, target_image_rt_s_caffe)
99 | end
100 | end
101 |
102 | -- compute the coordinates on the pixel layer
103 | local target_x
104 | local target_y
105 | local target_x_per_image = {}
106 | local target_y_per_image = {}
107 | local target_imageid
108 | -- print('*****************************************************')
109 | -- print(string.format('build target mrf'));
110 | -- print('*****************************************************')
111 | for i_image = 1, #target_images_caffe do
112 | -- print(string.format('image %d, ', i_image))
113 | net:forward(target_images_caffe[i_image])
114 | local target_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
115 |
116 | if params.mrf_patch_size[id_mrf] > target_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > target_feature_map:size()[3] then
117 | print('target_images is not big enough for patch')
118 | print('target_images size: ')
119 | print(target_feature_map:size())
120 | print('patch size: ')
121 | print(params.mrf_patch_size[id_mrf])
122 | do return end
123 | end
124 | local target_x_, target_y_ = drill_computeMRFfull(target_feature_map, params.mrf_patch_size[id_mrf], params.target_sample_stride[id_mrf], -1)
125 |
126 |
127 | local x = torch.Tensor(target_x_:nElement() * target_y_:nElement())
128 | local y = torch.Tensor(target_x_:nElement() * target_y_:nElement())
129 | local target_imageid_ = torch.Tensor(target_x_:nElement() * target_y_:nElement()):fill(i_image)
130 | local count = 1
131 | for i_row = 1, target_y_:nElement() do
132 | for i_col = 1, target_x_:nElement() do
133 | x[count] = target_x_[i_col]
134 | y[count] = target_y_[i_row]
135 | count = count + 1
136 | end
137 | end
138 | if i_image == 1 then
139 | target_x = x:clone()
140 | target_y = y:clone()
141 | target_imageid = target_imageid_:clone()
142 | else
143 | target_x = torch.cat(target_x, x, 1)
144 | target_y = torch.cat(target_y, y, 1)
145 | target_imageid = torch.cat(target_imageid, target_imageid_, 1)
146 | end
147 | table.insert(target_x_per_image, x)
148 | table.insert(target_y_per_image, y)
149 | end -- end for i_image = 1, #target_images do
150 |
151 | -- print('*****************************************************')
152 | -- print(string.format('collect mrf'));
153 | -- print('*****************************************************')
154 |
155 | local num_channel_mrf = net:get(mrf_layers[id_mrf] - 1).output:size()[1]
156 | local target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf * params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf])
157 | local tensor_target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf, params.mrf_patch_size[id_mrf], params.mrf_patch_size[id_mrf])
158 | local count_mrf = 1
159 | for i_image = 1, #target_images_caffe do
160 | -- print(string.format('image %d, ', i_image));
161 | net:forward(target_images_caffe[i_image])
162 | -- sample mrf on mrf_layers
163 | local tensor_target_mrf_, target_mrf_ = sampleMRFAndTensorfromLocation2(target_x_per_image[i_image], target_y_per_image[i_image], net:get(mrf_layers[id_mrf] - 1).output:float(), params.mrf_patch_size[id_mrf])
164 | target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, target_mrf:size()[2]}}] = target_mrf_:clone()
165 | tensor_target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, tensor_target_mrf:size()[2]}, {1, tensor_target_mrf:size()[3]}, {1, tensor_target_mrf:size()[4]}}] = tensor_target_mrf_:clone()
166 | count_mrf = count_mrf + target_mrf_:size()[1]
167 | tensor_target_mrf_ = nil
168 | target_mrf_ = nil
169 | collectgarbage()
170 | end --for i_image = 1, #target_images do
171 | local target_mrfnorm = torch.sqrt(torch.sum(torch.cmul(target_mrf, target_mrf), 2)):resize(target_mrf:size()[1], 1, 1)
172 |
173 | --------------------------------------------------------
174 | -- process source
175 | --------------------------------------------------------
176 | -- print('*****************************************************')
177 | -- print(string.format('process source image'));
178 | -- print('*****************************************************')
179 | if params.gpu >= 0 then
180 | if params.backend == 'cudnn' then
181 | net:forward(pyramid_source_image_caffe[cur_res]:cuda())
182 | else
183 | net:forward(pyramid_source_image_caffe[cur_res]:cl())
184 | end
185 | else
186 | net:forward(pyramid_source_image_caffe[cur_res])
187 | end
188 | local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
189 | if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then
190 | print('source_image_caffe is not big enough for patch')
191 | print('source_image_caffe size: ')
192 | print(source_feature_map:size())
193 | print('patch size: ')
194 | print(params.mrf_patch_size[id_mrf])
195 | do return end
196 | end
197 | local source_xgrid, source_ygrid = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], params.source_sample_stride[id_mrf], -1)
198 | local source_x = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement())
199 | local source_y = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement())
200 | local count = 1
201 | for i_row = 1, source_ygrid:nElement() do
202 | for i_col = 1, source_xgrid:nElement() do
203 | source_x[count] = source_xgrid[i_col]
204 | source_y[count] = source_ygrid[i_row]
205 | count = count + 1
206 | end
207 | end
208 | -- local tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm:float(), 1, net:get(mrf_layers[id_mrf] - 1).output:size()[2] - (params.mrf_patch_size[id_mrf] - 1), net:get(mrf_layers[id_mrf] - 1).output:size()[3] - (params.mrf_patch_size[id_mrf] - 1))
209 |
210 | -- print('*****************************************************')
211 | -- print(string.format('call layer implemetation'));
212 | -- print('*****************************************************')
213 | local nInputPlane = target_mrf:size()[2] / (params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf])
214 | local nOutputPlane = target_mrf:size()[1]
215 | local kW = params.mrf_patch_size[id_mrf]
216 | local kH = params.mrf_patch_size[id_mrf]
217 | local dW = 1
218 | local dH = 1
219 | local input_size = source_feature_map:size()
220 |
221 | local source_xgrid_, source_ygrid_ = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], 1, -1)
222 | local response_size = torch.LongStorage(3)
223 | response_size[1] = nOutputPlane
224 | response_size[2] = source_ygrid_:nElement()
225 | response_size[3] = source_xgrid_:nElement()
226 | net:get(mrf_layers[id_mrf]):implement(params.mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, 1, 1, params.mrf_confidence_threshold[id_mrf], params.mrf_weight[id_mrf], params.gpu_chunck_size_1, params.gpu_chunck_size_2, params.backend, params.gpu)
227 | target_mrf = nil
228 | tensor_target_mrf = nil
229 | source_feature_map = nil
230 | collectgarbage()
231 | end
232 |
233 | --------------------------------------------------------------------------------------------------------
234 | -- local function for printing inter-mediate result
235 | --------------------------------------------------------------------------------------------------------
236 | local function maybe_print(t, loss)
237 | local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
238 | if verbose then
239 | print(string.format('Iteration %d, %d', t, params.num_iter[cur_res]))
240 | end
241 | end
242 |
243 | --------------------------------------------------------------------------------------------------------
244 | -- local function for saving inter-mediate result
245 | --------------------------------------------------------------------------------------------------------
246 | local function maybe_save(t)
247 | local should_save = params.save_iter > 0 and t % params.save_iter == 0
248 | should_save = should_save or t == params.num_iter
249 | if should_save then
250 | local disp = deprocess(input_image:float())
251 | disp = image.minmax{tensor=disp, min=0, max=1}
252 | disp = image.scale(disp, render_width, render_height, 'bilinear')
253 | local filename = string.format('%s/res_%d_%d.jpg', params.output_folder, cur_res, t)
254 | image.save(filename, disp)
255 | end
256 | end
257 |
258 | --------------------------------------------------------------------------------------------------------
259 | -- local function for computing energy
260 | --------------------------------------------------------------------------------------------------------
261 | local function feval(x)
262 | num_calls = num_calls + 1
263 | net:forward(x)
264 | local grad = net:backward(x, dy)
265 | local loss = 0
266 | collectgarbage()
267 |
268 | maybe_print(num_calls, loss)
269 | maybe_save(num_calls)
270 |
271 | -- optim.lbfgs expects a vector for gradients
272 | return loss, grad:view(grad:nElement())
273 | end
274 |
275 | -------------------------------------------------------------------------------
276 | -- initialize network
277 | -------------------------------------------------------------------------------
278 | if params.gpu >= 0 then
279 | if params.backend == 'cudnn' then
280 | require 'cutorch'
281 | require 'cunn'
282 | cutorch.setDevice(params.gpu + 1)
283 | else
284 | require 'cltorch'
285 | require 'clnn'
286 | cltorch.setDevice(params.gpu + 1)
287 | end
288 | else
289 | params.backend = 'nn'
290 | end
291 |
292 | if params.backend == 'cudnn' then
293 | require 'cudnn'
294 | end
295 |
296 | local loadcaffe_backend = params.backend
297 | if params.backend == 'clnn' then
298 | loadcaffe_backend = 'nn'
299 | end
300 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
301 | if params.gpu >= 0 then
302 | if params.backend == 'cudnn' then
303 | cnn:cuda()
304 | else
305 | cnn:cl()
306 | end
307 | end
308 | print('cnn succesfully loaded')
309 |
310 | for i_res = 1, params.num_res do
311 | local timer = torch.Timer()
312 |
313 | cur_res = i_res
314 | num_calls = 0
315 | local optim_state = {
316 | maxIter = params.num_iter[i_res],
317 | nCorrection = params.nCorrection,
318 | verbose=true,
319 | tolX = 0,
320 | tolFun = 0,
321 | }
322 |
323 | -- initialize image and target
324 | if i_res == 1 then
325 |
326 | if params.ini_method == 'random' then
327 | input_image = torch.randn(pyramid_source_image_caffe[i_res]:size()):float():mul(0.001)
328 | elseif params.ini_method == 'image' then
329 | input_image = pyramid_source_image_caffe[i_res]:clone():float()
330 | else
331 | error('Invalid init type')
332 | end
333 | if params.gpu >= 0 then
334 | if params.backend == 'cudnn' then
335 | input_image = input_image:cuda()
336 | else
337 | input_image = input_image:cl()
338 | end
339 | end
340 | -----------------------------------------------------
341 | -- add a tv layer
342 | -----------------------------------------------------
343 | if params.tv_weight > 0 then
344 | local tv_mod = nn.TVLoss(params.tv_weight):float()
345 | if params.gpu >= 0 then
346 | if params.backend == 'cudnn' then
347 | tv_mod:cuda()
348 | else
349 | tv_mod:cl()
350 | end
351 | end
352 | i_net_layer = i_net_layer + 1
353 | net:add(tv_mod)
354 | end
355 |
356 | for i = 1, #cnn do
357 | if next_mrf_idx <= #mrf_layers_pretrained then
358 | local layer = cnn:get(i)
359 |
360 | i_net_layer = i_net_layer + 1
361 | net:add(layer)
362 |
363 | -- -- add mrfstatsyn layer
364 | if i == mrf_layers_pretrained[next_mrf_idx] then
365 | if add_mrf() == false then
366 | print('build network failed: adding mrf layer failed')
367 | do return end
368 | end
369 | end
370 |
371 | end
372 | end -- for i = 1, #cnn do
373 |
374 | cnn = nil
375 | collectgarbage()
376 |
377 | print(net)
378 |
379 |
380 | print('mrf_layers: ')
381 | for i = 1, #mrf_layers do
382 | print(mrf_layers[i])
383 | end
384 |
385 | print('network has been built.')
386 | else
387 | input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone()
388 | if params.gpu >= 0 then
389 | if params.backend == 'cudnn' then
390 | input_image = input_image:cuda()
391 | else
392 | input_image = input_image:cl()
393 | end
394 | end
395 | end
396 |
397 | print('*****************************************************')
398 | print(string.format('Synthesis started at resolution ', cur_res))
399 | print('*****************************************************')
400 |
401 | print('Implementing mrf layers ...')
402 | for i = 1, #mrf_layers do
403 | if build_mrf(i) == false then
404 | print('build_mrf failed')
405 | do return end
406 | end
407 | end
408 |
409 | local mask = torch.Tensor(input_image:size()):fill(1)
410 | if params.gpu >= 0 then
411 | if params.backend == 'cudnn' then
412 | mask = mask:cuda()
413 | else
414 | mask = mask:cl()
415 | end
416 | end
417 |
418 | y = net:forward(input_image)
419 | dy = input_image.new(#y):zero()
420 |
421 | -- do optimizatoin
422 | local x, losses = mylbfgs(feval, input_image, optim_state, nil, mask)
423 |
424 | local t = timer:time().real
425 | print(string.format('Synthesis finished at resolution %d, %f seconds', cur_res, t))
426 | end
427 |
428 | net = nil
429 | source_image = nil
430 | target_image = nil
431 | pyramid_source_image_caffe = nil
432 | pyramid_target_image_caffe = nil
433 | input_image = nil
434 | output_image = nil
435 | mrf_losses = nil
436 | mrf_layers = nil
437 | optim_state = nil
438 | collectgarbage()
439 | collectgarbage()
440 |
441 | end -- end of main
442 |
443 |
444 | local function run_test(content_name, style_name, ini_method, max_size, scaler, num_res, num_iter, mrf_layers, mrf_weight, mrf_patch_size, mrf_num_rotation, mrf_num_scale, mrf_sample_stride, mrf_synthesis_stride, mrf_confidence_threshold, tv_weight, mode, gpu_chunck_size_1, gpu_chunck_size_2, backend)
445 |
446 | -- local clock = os.clock
447 | -- function sleep(n) -- seconds
448 | -- local t0 = clock()
449 | -- while clock() - t0 <= n do end
450 | -- end
451 |
452 | local timer_TEST = torch.Timer()
453 |
454 | local flag_state = 1
455 |
456 | local params = {}
457 |
458 | -- externally set paramters
459 | params.content_name = content_name
460 | params.style_name = style_name
461 | --print(backend)
462 | params.ini_method = ini_method
463 | params.max_size = max_size or 384
464 | params.scaler = scaler or 2
465 | params.num_res = num_res or 3
466 | params.num_iter = num_iter or {100, 100, 100}
467 | params.mrf_layers = mrf_layers or {12, 21}
468 | params.mrf_weight = mrf_weight or {1e-4, 1e-4}
469 | params.mrf_patch_size = mrf_patch_size or {3, 3}
470 | params.target_num_rotation = mrf_num_rotation or 0
471 | params.target_num_scale = mrf_num_scale or 0
472 | params.target_sample_stride = mrf_sample_stride or {2, 2}
473 | params.source_sample_stride = mrf_synthesis_stride or {2, 2}
474 | params.mrf_confidence_threshold = mrf_confidence_threshold or {0, 0}
475 | params.tv_weight = tv_weight or 1e-3
476 |
477 | params.mode = mode or 'speed'
478 | params.gpu_chunck_size_1 = gpu_chunck_size_1 or 256
479 | params.gpu_chunck_size_2 = gpu_chunck_size_2 or 16
480 | params.backend = backend or 'cudnn'
481 |
482 | -- fixed parameters
483 | params.target_step_rotation = math.pi/24
484 | params.target_step_scale = 1.05
485 |
486 | params.proto_file = 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt'
487 | params.model_file = 'data/models/VGG_ILSVRC_19_layers.caffemodel'
488 | params.gpu = 0
489 | params.nCorrection = 25
490 | params.print_iter = 10
491 | params.save_iter = 10
492 | params.gpu_chunck_size_1 = 32
493 | params.gpu_chunck_size_2 = 2
494 |
495 | params.output_folder = string.format('data/result/freesyn/MRF/%s_TO_%s', params.content_name, params.style_name)
496 |
497 | main(params)
498 |
499 | local t_test = timer_TEST:time().real
500 | print(string.format('Total time: %f seconds', t_test))
501 | -- sleep(1)
502 | return flag_state
503 | end
504 |
505 | return {
506 | run_test = run_test,
507 | main = main
508 | }
509 |
--------------------------------------------------------------------------------
/transfer_CNNMRF_wrapper.lua:
--------------------------------------------------------------------------------
1 | require 'torch'
2 | require 'nn'
3 | require 'image'
4 | require 'paths'
5 | require 'loadcaffe'
6 |
7 | paths.dofile('mylib/myoptimizer.lua')
8 | paths.dofile('mylib/tv.lua')
9 | paths.dofile('mylib/mrf.lua')
10 | paths.dofile('mylib/helper.lua')
11 | paths.dofile('mylib/content.lua')
12 |
13 | torch.setdefaulttensortype('torch.FloatTensor') -- float as default tensor type
14 |
15 | local function main(params)
16 | os.execute('mkdir data/result/')
17 | os.execute('mkdir data/result/trans/')
18 | os.execute('mkdir data/result/trans/MRF/')
19 | os.execute(string.format('mkdir %s', params.output_folder))
20 |
21 | local net = nn.Sequential()
22 | local next_content_idx = 1
23 | local i_net_layer = 0
24 | local num_calls = 0
25 | local content_losses = {}
26 | local content_layers = {}
27 | local i_content_layer = 0
28 | local next_mrf_idx = 1
29 | local mrf_losses = {}
30 | local mrf_layers = {}
31 | local i_mrf_layer = 0
32 | local input_image
33 | local output_image
34 | local cur_res
35 | local content_layers_pretrained = params.content_layers
36 | local mrf_layers_pretrained = params.mrf_layers
37 |
38 | -----------------------------------------------------------------------------------
39 | -- read images
40 | -----------------------------------------------------------------------------------
41 | local source_image = image.load(string.format('data/content/%s.jpg', params.content_name), 3)
42 | local target_image = image.load(string.format('data/style/%s.jpg', params.style_name), 3)
43 |
44 | source_image = image.scale(source_image, params.max_size, 'bilinear')
45 | target_image = image.scale(target_image, params.max_size, 'bilinear')
46 |
47 | local render_height = source_image:size()[2]
48 | local render_width = source_image:size()[3]
49 | local source_image_caffe = preprocess(source_image):float()
50 | local target_image_caffe = preprocess(target_image):float()
51 |
52 | local pyramid_source_image_caffe = {}
53 | for i_res = 1, params.num_res do
54 | pyramid_source_image_caffe[i_res] = image.scale(source_image_caffe, math.ceil(source_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(source_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear')
55 | end
56 |
57 | local pyramid_target_image_caffe = {}
58 | for i_res = 1, params.num_res do
59 | pyramid_target_image_caffe[i_res] = image.scale(target_image_caffe, math.ceil(target_image:size()[3] * math.pow(0.5, params.num_res - i_res)), math.ceil(target_image:size()[2] * math.pow(0.5, params.num_res - i_res)), 'bilinear')
60 | end
61 |
62 | ------------------------------------------------------------------------------------------------------
63 | -- local function for adding a content layer
64 | ------------------------------------------------------------------------------------------------------
65 | local function add_content()
66 | local source = pyramid_source_image_caffe[cur_res]:clone()
67 | if params.gpu >= 0 then
68 | if params.backend == 'cudnn' then
69 | source = source:cuda()
70 | else
71 | source = source:cl()
72 | end
73 | end
74 | local feature = net:forward(source):clone() -- generate the content target using content image
75 | if params.gpu >= 0 then
76 | if params.backend == 'cudnn' then
77 | feature = feature:cuda()
78 | else
79 | feature = feature:cl()
80 | end
81 | end
82 |
83 | local norm = params.normalize_gradients
84 | print(params.normalize_gradients)
85 | local loss_module = nn.ContentLoss(params.content_weight, feature, norm):float()
86 | if params.gpu >= 0 then
87 | if params.backend == 'cudnn' then
88 | loss_module:cuda()
89 | else
90 | loss_module:cl()
91 | end
92 | end
93 |
94 | i_content_layer = i_content_layer + 1
95 | i_net_layer = i_net_layer + 1
96 | next_content_idx = next_content_idx + 1
97 | net:add(loss_module)
98 | table.insert(content_losses, loss_module)
99 | table.insert(content_layers, i_content_layer, i_net_layer)
100 | end
101 |
102 | local function update_content(idx_layer, idx_content)
103 | local source = pyramid_source_image_caffe[cur_res]:clone()
104 | if params.gpu >= 0 then
105 | if params.backend == 'cudnn' then
106 | source = source:cuda()
107 | else
108 | source = source:cl()
109 | end
110 | end
111 | net:forward(source)
112 | local feature = net:get(idx_layer).output:clone()
113 | if params.gpu >= 0 then
114 | if params.backend == 'cudnn' then
115 | feature = feature:cuda()
116 | else
117 | feature = feature:cl()
118 | end
119 | end
120 |
121 | local norm = params.normalize_gradients
122 | local loss_module = nn.ContentLoss(params.content_weight, feature, norm):float()
123 | if params.gpu >= 0 then
124 | if params.backend == 'cudnn' then
125 | loss_module:cuda()
126 | else
127 | loss_module:cl()
128 | end
129 | end
130 | net:get(idx_layer):update(loss_module)
131 | end
132 |
133 |
134 | -- --------------------------------------------------------------------------------------------------------
135 | -- -- local function for adding a mrf layer, with image rotation andn scaling
136 | -- --------------------------------------------------------------------------------------------------------
137 | local function add_mrf()
138 | local mrf_module = nn.MRFMM()
139 | i_mrf_layer = i_mrf_layer + 1
140 | i_net_layer = i_net_layer + 1
141 | next_mrf_idx = next_mrf_idx + 1
142 | if params.gpu >= 0 then
143 | if params.backend == 'cudnn' then
144 | mrf_module:cuda()
145 | else
146 | mrf_module:cl()
147 | end
148 | end
149 | net:add(mrf_module)
150 | table.insert(mrf_losses, mrf_module)
151 | table.insert(mrf_layers, i_mrf_layer, i_net_layer)
152 | return true
153 | end
154 |
155 | local function build_mrf(id_mrf)
156 | --------------------------------------------------------
157 | -- deal with target
158 | --------------------------------------------------------
159 | local target_images_caffe = {}
160 | for i_r = -params.target_num_rotation, params.target_num_rotation do
161 | local alpha = params.target_step_rotation * i_r
162 | local min_x, min_y, max_x, max_y = computeBB(pyramid_target_image_caffe[cur_res]:size()[3], pyramid_target_image_caffe[cur_res]:size()[2], alpha)
163 | local target_image_rt_caffe = image.rotate(pyramid_target_image_caffe[cur_res], alpha, 'bilinear')
164 | target_image_rt_caffe = target_image_rt_caffe[{{1, target_image_rt_caffe:size()[1]}, {min_y, max_y}, {min_x, max_x}}]
165 |
166 | for i_s = -params.target_num_scale, params.target_num_scale do
167 | local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s))
168 | local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear')
169 | if params.gpu >= 0 then
170 | if params.backend == 'cudnn' then
171 | target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
172 | else
173 | target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
174 | end
175 | end
176 | table.insert(target_images_caffe, target_image_rt_s_caffe)
177 | end
178 | end
179 |
180 | -- compute the coordinates on the pixel layer
181 | local target_x
182 | local target_y
183 | local target_x_per_image = {}
184 | local target_y_per_image = {}
185 | local target_imageid
186 | -- print('*****************************************************')
187 | -- print(string.format('build target mrf'));
188 | -- print('*****************************************************')
189 | for i_image = 1, #target_images_caffe do
190 | -- print(string.format('image %d, ', i_image))
191 | net:forward(target_images_caffe[i_image])
192 | local target_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
193 |
194 | if params.mrf_patch_size[id_mrf] > target_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > target_feature_map:size()[3] then
195 | print('target_images is not big enough for patch')
196 | print('target_images size: ')
197 | print(target_feature_map:size())
198 | print('patch size: ')
199 | print(params.mrf_patch_size[id_mrf])
200 | do return end
201 | end
202 | local target_x_, target_y_ = drill_computeMRFfull(target_feature_map, params.mrf_patch_size[id_mrf], params.target_sample_stride[id_mrf], -1)
203 |
204 |
205 | local x = torch.Tensor(target_x_:nElement() * target_y_:nElement())
206 | local y = torch.Tensor(target_x_:nElement() * target_y_:nElement())
207 | local target_imageid_ = torch.Tensor(target_x_:nElement() * target_y_:nElement()):fill(i_image)
208 | local count = 1
209 | for i_row = 1, target_y_:nElement() do
210 | for i_col = 1, target_x_:nElement() do
211 | x[count] = target_x_[i_col]
212 | y[count] = target_y_[i_row]
213 | count = count + 1
214 | end
215 | end
216 | if i_image == 1 then
217 | target_x = x:clone()
218 | target_y = y:clone()
219 | target_imageid = target_imageid_:clone()
220 | else
221 | target_x = torch.cat(target_x, x, 1)
222 | target_y = torch.cat(target_y, y, 1)
223 | target_imageid = torch.cat(target_imageid, target_imageid_, 1)
224 | end
225 | table.insert(target_x_per_image, x)
226 | table.insert(target_y_per_image, y)
227 | end -- end for i_image = 1, #target_images do
228 |
229 | -- print('*****************************************************')
230 | -- print(string.format('collect mrf'));
231 | -- print('*****************************************************')
232 |
233 | local num_channel_mrf = net:get(mrf_layers[id_mrf] - 1).output:size()[1]
234 | local target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf * params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf])
235 | local tensor_target_mrf = torch.Tensor(target_x:nElement(), num_channel_mrf, params.mrf_patch_size[id_mrf], params.mrf_patch_size[id_mrf])
236 | local count_mrf = 1
237 | for i_image = 1, #target_images_caffe do
238 | -- print(string.format('image %d, ', i_image));
239 | net:forward(target_images_caffe[i_image])
240 | -- sample mrf on mrf_layers
241 | local tensor_target_mrf_, target_mrf_ = sampleMRFAndTensorfromLocation2(target_x_per_image[i_image], target_y_per_image[i_image], net:get(mrf_layers[id_mrf] - 1).output:float(), params.mrf_patch_size[id_mrf])
242 | target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, target_mrf:size()[2]}}] = target_mrf_:clone()
243 | tensor_target_mrf[{{count_mrf, count_mrf + target_mrf_:size()[1] - 1}, {1, tensor_target_mrf:size()[2]}, {1, tensor_target_mrf:size()[3]}, {1, tensor_target_mrf:size()[4]}}] = tensor_target_mrf_:clone()
244 | count_mrf = count_mrf + target_mrf_:size()[1]
245 | tensor_target_mrf_ = nil
246 | target_mrf_ = nil
247 | collectgarbage()
248 | end --for i_image = 1, #target_images do
249 | local target_mrfnorm = torch.sqrt(torch.sum(torch.cmul(target_mrf, target_mrf), 2)):resize(target_mrf:size()[1], 1, 1)
250 |
251 | --------------------------------------------------------
252 | -- process source
253 | --------------------------------------------------------
254 | -- print('*****************************************************')
255 | -- print(string.format('process source image'));
256 | -- print('*****************************************************')
257 | if params.gpu >= 0 then
258 | if params.backend == 'cudnn' then
259 | net:forward(pyramid_source_image_caffe[cur_res]:cuda())
260 | else
261 | net:forward(pyramid_source_image_caffe[cur_res]:cl())
262 | end
263 | else
264 | net:forward(pyramid_source_image_caffe[cur_res])
265 | end
266 | local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
267 | if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then
268 | print('source_image_caffe is not big enough for patch')
269 | print('source_image_caffe size: ')
270 | print(source_feature_map:size())
271 | print('patch size: ')
272 | print(params.mrf_patch_size[id_mrf])
273 | do return end
274 | end
275 | local source_xgrid, source_ygrid = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], params.source_sample_stride[id_mrf], -1)
276 | local source_x = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement())
277 | local source_y = torch.Tensor(source_xgrid:nElement() * source_ygrid:nElement())
278 | local count = 1
279 | for i_row = 1, source_ygrid:nElement() do
280 | for i_col = 1, source_xgrid:nElement() do
281 | source_x[count] = source_xgrid[i_col]
282 | source_y[count] = source_ygrid[i_row]
283 | count = count + 1
284 | end
285 | end
286 | -- local tensor_target_mrfnorm = torch.repeatTensor(target_mrfnorm:float(), 1, net:get(mrf_layers[id_mrf] - 1).output:size()[2] - (params.mrf_patch_size[id_mrf] - 1), net:get(mrf_layers[id_mrf] - 1).output:size()[3] - (params.mrf_patch_size[id_mrf] - 1))
287 |
288 | -- print('*****************************************************')
289 | -- print(string.format('call layer implemetation'));
290 | -- print('*****************************************************')
291 | local nInputPlane = target_mrf:size()[2] / (params.mrf_patch_size[id_mrf] * params.mrf_patch_size[id_mrf])
292 | local nOutputPlane = target_mrf:size()[1]
293 | local kW = params.mrf_patch_size[id_mrf]
294 | local kH = params.mrf_patch_size[id_mrf]
295 | local dW = 1
296 | local dH = 1
297 | local input_size = source_feature_map:size()
298 |
299 | local source_xgrid_, source_ygrid_ = drill_computeMRFfull(source_feature_map:float(), params.mrf_patch_size[id_mrf], 1, -1)
300 | local response_size = torch.LongStorage(3)
301 | response_size[1] = nOutputPlane
302 | response_size[2] = source_ygrid_:nElement()
303 | response_size[3] = source_xgrid_:nElement()
304 | net:get(mrf_layers[id_mrf]):implement(params.mode, target_mrf, tensor_target_mrf, target_mrfnorm, source_x, source_y, input_size, response_size, nInputPlane, nOutputPlane, kW, kH, 1, 1, params.mrf_confidence_threshold[id_mrf], params.mrf_weight[id_mrf], params.gpu_chunck_size_1, params.gpu_chunck_size_2, params.backend, params.gpu)
305 | target_mrf = nil
306 | tensor_target_mrf = nil
307 | source_feature_map = nil
308 | collectgarbage()
309 | end
310 |
311 | --------------------------------------------------------------------------------------------------------
312 | -- local function for printing inter-mediate result
313 | --------------------------------------------------------------------------------------------------------
314 | local function maybe_print(t, loss)
315 | local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
316 | if verbose then
317 | print(string.format('Iteration %d, %d', t, params.num_iter[cur_res]))
318 | end
319 | end
320 |
321 | --------------------------------------------------------------------------------------------------------
322 | -- local function for saving inter-mediate result
323 | --------------------------------------------------------------------------------------------------------
324 | local function maybe_save(t)
325 | local should_save = params.save_iter > 0 and t % params.save_iter == 0
326 | should_save = should_save or t == params.num_iter
327 | if should_save then
328 | local disp = deprocess(input_image:float())
329 | disp = image.minmax{tensor=disp, min=0, max=1}
330 | disp = image.scale(disp, render_width, render_height, 'bilinear')
331 | local filename = string.format('%s/res_%d_%d.jpg', params.output_folder, cur_res, t)
332 | image.save(filename, disp)
333 | end
334 | end
335 |
336 | --------------------------------------------------------------------------------------------------------
337 | -- local function for computing energy
338 | --------------------------------------------------------------------------------------------------------
339 | local function feval(x)
340 | num_calls = num_calls + 1
341 | net:forward(x)
342 | local grad = net:backward(x, dy)
343 | local loss = 0
344 | collectgarbage()
345 |
346 | maybe_print(num_calls, loss)
347 | maybe_save(num_calls)
348 |
349 | -- optim.lbfgs expects a vector for gradients
350 | return loss, grad:view(grad:nElement())
351 | end
352 |
353 | -------------------------------------------------------------------------------
354 | -- initialize network
355 | -------------------------------------------------------------------------------
356 | if params.gpu >= 0 then
357 | if params.backend == 'cudnn' then
358 | require 'cutorch'
359 | require 'cunn'
360 | cutorch.setDevice(params.gpu + 1)
361 | else
362 | require 'cltorch'
363 | require 'clnn'
364 | cltorch.setDevice(params.gpu + 1)
365 | end
366 | else
367 | params.backend = 'nn'
368 | end
369 |
370 | if params.backend == 'cudnn' then
371 | require 'cudnn'
372 | end
373 |
374 | local loadcaffe_backend = params.backend
375 | if params.backend == 'clnn' then
376 | loadcaffe_backend = 'nn'
377 | end
378 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
379 | if params.gpu >= 0 then
380 | if params.backend == 'cudnn' then
381 | cnn:cuda()
382 | else
383 | cnn:cl()
384 | end
385 | end
386 | print('cnn succesfully loaded')
387 |
388 | for i_res = 1, params.num_res do
389 | local timer = torch.Timer()
390 |
391 | cur_res = i_res
392 | num_calls = 0
393 | local optim_state = {
394 | maxIter = params.num_iter[i_res],
395 | nCorrection = params.nCorrection,
396 | verbose=true,
397 | tolX = 0,
398 | tolFun = 0,
399 | }
400 |
401 | -- initialize image and target
402 | if i_res == 1 then
403 |
404 | if params.ini_method == 'random' then
405 | input_image = torch.randn(pyramid_source_image_caffe[i_res]:size()):float():mul(0.001)
406 | elseif params.ini_method == 'image' then
407 | input_image = pyramid_source_image_caffe[i_res]:clone():float()
408 | else
409 | error('Invalid init type')
410 | end
411 | if params.gpu >= 0 then
412 | if params.backend == 'cudnn' then
413 | input_image = input_image:cuda()
414 | else
415 | input_image = input_image:cl()
416 | end
417 | end
418 |
419 | -----------------------------------------------------
420 | -- add a tv layer
421 | -----------------------------------------------------
422 | if params.tv_weight > 0 then
423 | local tv_mod = nn.TVLoss(params.tv_weight):float()
424 | if params.gpu >= 0 then
425 | if params.backend == 'cudnn' then
426 | tv_mod:cuda()
427 | else
428 | tv_mod:cl()
429 | end
430 | end
431 | i_net_layer = i_net_layer + 1
432 | net:add(tv_mod)
433 | end
434 |
435 | for i = 1, #cnn do
436 | if next_content_idx <= #content_layers_pretrained or next_mrf_idx <= #mrf_layers_pretrained then
437 | local layer = cnn:get(i)
438 |
439 | i_net_layer = i_net_layer + 1
440 | net:add(layer)
441 |
442 | -- add a content_losses layer
443 | if i == content_layers_pretrained[next_content_idx] then
444 | add_content()
445 | end
446 |
447 | -- -- add mrfstatsyn layer
448 | if i == mrf_layers_pretrained[next_mrf_idx] then
449 | if add_mrf() == false then
450 | print('build network failed: adding mrf layer failed')
451 | do return end
452 | end
453 | end
454 |
455 | end
456 | end -- for i = 1, #cnn do
457 |
458 | cnn = nil
459 | collectgarbage()
460 |
461 | print(net)
462 |
463 | print('content_layers: ')
464 | for i = 1, #content_layers do
465 | print(content_layers[i])
466 | end
467 |
468 | print('mrf_layers: ')
469 | for i = 1, #mrf_layers do
470 | print(mrf_layers[i])
471 | end
472 |
473 | print('network has been built.')
474 | else
475 | input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone()
476 | if params.gpu >= 0 then
477 | if params.backend == 'cudnn' then
478 | input_image = input_image:cuda()
479 | else
480 | input_image = input_image:cl()
481 | end
482 | end
483 |
484 | -- -- update content layers
485 | for i_layer = 1, #content_layers do
486 | update_content(content_layers[i_layer], i_layer)
487 | -- print(string.format('content_layers %d has been updated', content_layers[i_layer]))
488 | end
489 |
490 | end
491 |
492 | print('*****************************************************')
493 | print(string.format('Synthesis started at resolution ', cur_res))
494 | print('*****************************************************')
495 |
496 | print('Implementing mrf layers ...')
497 | for i = 1, #mrf_layers do
498 | if build_mrf(i) == false then
499 | print('build_mrf failed')
500 | do return end
501 | end
502 | end
503 |
504 | local mask = torch.Tensor(input_image:size()):fill(1)
505 | if params.gpu >= 0 then
506 | if params.backend == 'cudnn' then
507 | mask = mask:cuda()
508 | else
509 | mask = mask:cl()
510 | end
511 | end
512 |
513 | y = net:forward(input_image)
514 | dy = input_image.new(#y):zero()
515 |
516 | -- do optimizatoin
517 | local x, losses = mylbfgs(feval, input_image, optim_state, nil, mask)
518 |
519 | local t = timer:time().real
520 | print(string.format('Synthesis finished at resolution %d, %f seconds', cur_res, t))
521 | end
522 |
523 | net = nil
524 | source_image = nil
525 | target_image = nil
526 | pyramid_source_image_caffe = nil
527 | pyramid_target_image_caffe = nil
528 | input_image = nil
529 | output_image = nil
530 | content_losses = nil
531 | content_layers = nil
532 | mrf_losses = nil
533 | mrf_layers = nil
534 | optim_state = nil
535 | collectgarbage()
536 | collectgarbage()
537 |
538 | end -- end of main
539 |
540 |
541 | local function run_test(content_name, style_name, ini_method, max_size, num_res, num_iter, mrf_layers, mrf_weight, mrf_patch_size, mrf_num_rotation, mrf_num_scale, mrf_sample_stride, mrf_synthesis_stride, mrf_confidence_threshold, content_layers, content_weight, tv_weight, mode, gpu_chunck_size_1, gpu_chunck_size_2, backend)
542 | -- local clock = os.clock
543 | -- function sleep(n) -- seconds
544 | -- local t0 = clock()
545 | -- while clock() - t0 <= n do end
546 | -- end
547 |
548 | local timer_TEST = torch.Timer()
549 |
550 | local flag_state = 1
551 |
552 | local params = {}
553 |
554 | -- externally set paramters
555 | params.content_name = content_name
556 | params.style_name = style_name
557 | params.ini_method = ini_method
558 | params.max_size = max_size or 384
559 | params.num_res = num_res or 3
560 | params.num_iter = num_iter or {100, 100, 100}
561 | params.mrf_layers = mrf_layers or {12, 21}
562 | params.mrf_weight = mrf_weight or {1e-4, 1e-4}
563 | params.mrf_patch_size = mrf_patch_size or {3, 3}
564 | params.target_num_rotation = mrf_num_rotation or 0
565 | params.target_num_scale = mrf_num_scale or 0
566 | params.target_sample_stride = mrf_sample_stride or {2, 2}
567 | params.source_sample_stride = mrf_synthesis_stride or {2, 2}
568 | params.mrf_confidence_threshold = mrf_confidence_threshold or {0, 0}
569 | params.content_layers = content_layers or {21}
570 | params.content_weight = content_weight or 2e1
571 | params.tv_weight = tv_weight or 1e-3
572 |
573 | params.mode = mode or 'speed'
574 | params.gpu_chunck_size_1 = gpu_chunck_size_1 or 256
575 | params.gpu_chunck_size_2 = gpu_chunck_size_2 or 16
576 | params.backend = backend or 'cudnn'
577 | -- fixed parameters
578 | params.target_step_rotation = math.pi/24
579 | params.target_step_scale = 1.05
580 |
581 | params.output_folder = string.format('data/result/trans/MRF/%s_TO_%s',params.content_name, params.style_name)
582 | params.proto_file = 'data/models/VGG_ILSVRC_19_layers_deploy.prototxt'
583 | params.model_file = 'data/models/VGG_ILSVRC_19_layers.caffemodel'
584 | params.gpu = 0
585 | params.nCorrection = 25
586 | params.print_iter = 10
587 | params.save_iter = 10
588 |
589 | params.output_folder = string.format('data/result/trans/MRF/%s_TO_%s',params.content_name, params.style_name)
590 |
591 | main(params)
592 |
593 | local t_test = timer_TEST:time().real
594 | print(string.format('Total time: %f seconds', t_test))
595 | -- sleep(1)
596 | return flag_state
597 | end
598 |
599 | return {
600 | run_test = run_test,
601 | main = main
602 | }
603 |
--------------------------------------------------------------------------------