├── example ├── marple8_01.ppm ├── marple8_02.ppm ├── marple8_03.ppm ├── marple8_04.ppm ├── marple8_05.ppm ├── seated-nude.jpg └── deepflow │ ├── forward_1_2.flo │ ├── forward_2_3.flo │ ├── forward_3_4.flo │ ├── forward_4_5.flo │ ├── backward_2_1.flo │ ├── backward_3_2.flo │ ├── backward_4_3.flo │ ├── backward_5_4.flo │ ├── reliable_1_2.pgm │ ├── reliable_2_1.pgm │ ├── reliable_2_3.pgm │ ├── reliable_3_2.pgm │ ├── reliable_3_4.pgm │ ├── reliable_4_3.pgm │ ├── reliable_4_5.pgm │ └── reliable_5_4.pgm ├── consistencyChecker ├── NMath.cpp ├── Makefile ├── consistencyChecker.cpp ├── NMath.h ├── CVector.h ├── CTensor4D.h └── CTensor.h ├── .gitignore ├── run-deepflow.sh ├── models └── download_models.sh ├── flowFileLoader.lua ├── makeOptFlow.sh ├── stylizeVideo.sh ├── LICENSE ├── lbfgs.lua ├── README.md ├── artistic_video_multiPass.lua ├── artistic_video.lua └── artistic_video_core.lua /example/marple8_01.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_01.ppm -------------------------------------------------------------------------------- /example/marple8_02.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_02.ppm -------------------------------------------------------------------------------- /example/marple8_03.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_03.ppm -------------------------------------------------------------------------------- /example/marple8_04.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_04.ppm -------------------------------------------------------------------------------- /example/marple8_05.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_05.ppm -------------------------------------------------------------------------------- /example/seated-nude.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/seated-nude.jpg -------------------------------------------------------------------------------- /consistencyChecker/NMath.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/consistencyChecker/NMath.cpp -------------------------------------------------------------------------------- /consistencyChecker/Makefile: -------------------------------------------------------------------------------- 1 | default: 2 | g++ -O3 -fPIC consistencyChecker.cpp NMath.cpp -I. -o consistencyChecker -L. 3 | 4 | -------------------------------------------------------------------------------- /example/deepflow/forward_1_2.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_1_2.flo -------------------------------------------------------------------------------- /example/deepflow/forward_2_3.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_2_3.flo -------------------------------------------------------------------------------- /example/deepflow/forward_3_4.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_3_4.flo -------------------------------------------------------------------------------- /example/deepflow/forward_4_5.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_4_5.flo -------------------------------------------------------------------------------- /example/deepflow/backward_2_1.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_2_1.flo -------------------------------------------------------------------------------- /example/deepflow/backward_3_2.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_3_2.flo -------------------------------------------------------------------------------- /example/deepflow/backward_4_3.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_4_3.flo -------------------------------------------------------------------------------- /example/deepflow/backward_5_4.flo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_5_4.flo -------------------------------------------------------------------------------- /example/deepflow/reliable_1_2.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_1_2.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_2_1.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_2_1.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_2_3.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_2_3.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_3_2.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_3_2.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_3_4.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_3_4.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_4_3.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_4_3.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_4_5.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_4_5.pgm -------------------------------------------------------------------------------- /example/deepflow/reliable_5_4.pgm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_5_4.pgm -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | out*.png 3 | *.png 4 | *.jpg 5 | *.prototxt* 6 | *.caffemodel 7 | models/ 8 | !models/download_models.sh 9 | deepflow2-static 10 | deepmatching-static 11 | consistencyChecker/consistencyChecker -------------------------------------------------------------------------------- /run-deepflow.sh: -------------------------------------------------------------------------------- 1 | if [ "$#" -ne 3 ]; then 2 | echo "This is an auxiliary script for makeOptFlow.sh. No need to call this script directly." 3 | exit 1 4 | fi 5 | if [ ! -f deepmatching-static ] && [ ! -f deepflow2-static ]; then 6 | echo "Place deepflow2-static and deepmatching-static in this directory." 7 | exit 1 8 | fi 9 | 10 | ./deepmatching-static $1 $2 -nt 0 | ./deepflow2-static $1 $2 $3 -match -------------------------------------------------------------------------------- /models/download_models.sh: -------------------------------------------------------------------------------- 1 | cd models 2 | wget -c https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt 3 | wget -c --no-check-certificate https://bethgelab.org/media/uploads/deeptextures/vgg_normalised.caffemodel 4 | wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel 5 | cd .. 6 | -------------------------------------------------------------------------------- /flowFileLoader.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'image' 3 | 4 | --[[ 5 | Reads a flow field from a binary flow file. 6 | 7 | bytes contents 8 | 0-3 tag: "PIEH" in ASCII, which in little endian happens to be the float 202021.25 9 | (just a sanity check that floats are represented correctly) 10 | 4-7 width as an integer 11 | 8-11 height as an integer 12 | 12-end data (width*height*2*4 bytes total) 13 | --]] 14 | local function flowFileLoader_load(fileName) 15 | local flowFile = torch.DiskFile(fileName, 'r') 16 | flowFile:binary() 17 | flowFile:readFloat() 18 | local W = flowFile:readInt() 19 | local H = flowFile:readInt() 20 | -- image.warp needs 2xHxW, and also expects (y, x) for some reason... 21 | local flow = torch.Tensor(2, H, W) 22 | local raw_flow = torch.data(flow) 23 | local elems_in_dim = H * W 24 | local storage = flowFile:readFloat(2 * elems_in_dim) 25 | for y=0, H - 1 do 26 | for x=0, W - 1 do 27 | local shift = y * W + x 28 | raw_flow[elems_in_dim + shift] = storage[2 * shift + 1] 29 | raw_flow[shift] = storage[2 * shift + 2] 30 | end 31 | end 32 | flowFile:close() 33 | return flow 34 | end 35 | 36 | return { 37 | load = flowFileLoader_load 38 | } 39 | -------------------------------------------------------------------------------- /makeOptFlow.sh: -------------------------------------------------------------------------------- 1 | # Specify the path to the optical flow utility here. 2 | # Also check line 44 and 47 whether the arguments are in the correct order. 3 | flowCommandLine="bash run-deepflow.sh" 4 | 5 | if [ -z "$flowCommandLine" ]; then 6 | echo "Please open makeOptFlow.sh and specify the command line for computing the optical flow." 7 | exit 1 8 | fi 9 | 10 | if [ ! -f ./consistencyChecker/consistencyChecker ]; then 11 | if [ ! -f ./consistencyChecker/Makefile ]; then 12 | echo "Consistency checker makefile not found." 13 | exit 1 14 | fi 15 | cd consistencyChecker/ 16 | make 17 | cd .. 18 | fi 19 | 20 | filePattern=$1 21 | folderName=$2 22 | startFrame=${3:-1} 23 | stepSize=${4:-1} 24 | 25 | if [ "$#" -le 1 ]; then 26 | echo "Usage: ./makeOptFlow [ []]" 27 | echo -e "\tfilePattern:\tFilename pattern of the frames of the videos." 28 | echo -e "\toutputFolder:\tOutput folder." 29 | echo -e "\tstartNumber:\tThe index of the first frame. Default: 1" 30 | echo -e "\tstepSize:\tThe step size to create long-term flow. Default: 1" 31 | exit 1 32 | fi 33 | 34 | i=$[$startFrame] 35 | j=$[$startFrame + $stepSize] 36 | 37 | mkdir -p "${folderName}" 38 | 39 | while true; do 40 | file1=$(printf "$filePattern" "$i") 41 | file2=$(printf "$filePattern" "$j") 42 | if [ -a $file2 ]; then 43 | if [ ! -f ${folderName}/forward_${i}_${j}.flo ]; then 44 | eval $flowCommandLine "$file1" "$file2" "${folderName}/forward_${i}_${j}.flo" 45 | fi 46 | if [ ! -f ${folderName}/backward_${j}_${i}.flo ]; then 47 | eval $flowCommandLine "$file2" "$file1" "${folderName}/backward_${j}_${i}.flo" 48 | fi 49 | ./consistencyChecker/consistencyChecker "${folderName}/backward_${j}_${i}.flo" "${folderName}/forward_${i}_${j}.flo" "${folderName}/reliable_${j}_${i}.pgm" 50 | ./consistencyChecker/consistencyChecker "${folderName}/forward_${i}_${j}.flo" "${folderName}/backward_${j}_${i}.flo" "${folderName}/reliable_${i}_${j}.pgm" 51 | else 52 | break 53 | fi 54 | i=$[$i +1] 55 | j=$[$j +1] 56 | done 57 | -------------------------------------------------------------------------------- /stylizeVideo.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | # Get a carriage return into `cr` 3 | cr=`echo $'\n.'` 4 | cr=${cr%.} 5 | 6 | 7 | # Find out whether ffmpeg or avconv is installed on the system 8 | FFMPEG=ffmpeg 9 | command -v $FFMPEG >/dev/null 2>&1 || { 10 | FFMPEG=avconv 11 | command -v $FFMPEG >/dev/null 2>&1 || { 12 | echo >&2 "This script requires either ffmpeg or avconv installed. Aborting."; exit 1; 13 | } 14 | } 15 | 16 | if [ "$#" -le 1 ]; then 17 | echo "Usage: ./stylizeVideo " 18 | exit 1 19 | fi 20 | 21 | # Parse arguments 22 | filename=$(basename "$1") 23 | extension="${filename##*.}" 24 | filename="${filename%.*}" 25 | filename=${filename//[%]/x} 26 | style_image=$2 27 | 28 | # Create output folder 29 | mkdir -p $filename 30 | 31 | 32 | echo "" 33 | read -p "Which backend do you want to use? \ 34 | For Nvidia GPU, use cudnn if available, otherwise nn. \ 35 | For non-Nvidia GPU, use clnn. Note: You have to have the given backend installed in order to use it. [nn] $cr > " backend 36 | backend=${backend:-nn} 37 | 38 | if [ "$backend" == "cudnn" ]; then 39 | echo "" 40 | read -p "This algorithm needs a lot of memory. \ 41 | For a resolution of 450x350 you'll need roughly 2GB VRAM. \ 42 | VRAM usage increases linear with resolution. \ 43 | Please enter a resolution at which the video should be processed, \ 44 | in the format w:h, or leave blank to use the original resolution $cr > " resolution 45 | elif [ "$backend" = "nn" ] || [ "$backend" = "clnn" ]; then 46 | echo "" 47 | read -p "This algorithm needs a lot of memory. \ 48 | For a resolution of 450x350 you'll need roughly 4GB VRAM. \ 49 | VRAM usage increases linear with resolution. \ 50 | Maximum recommended resolution with a Titan X 12GB: 960:540. \ 51 | Please enter a resolution at which the video should be processed, \ 52 | in the format w:h, or leave blank to use the original resolution $cr > " resolution 53 | else 54 | echo "Unknown backend." 55 | exit 1 56 | fi 57 | 58 | # Save frames of the video as individual image files 59 | if [ -z $resolution ]; then 60 | $FFMPEG -i $1 ${filename}/frame_%04d.ppm 61 | resolution=default 62 | else 63 | $FFMPEG -i $1 -vf scale=$resolution ${filename}/frame_%04d.ppm 64 | fi 65 | 66 | echo "" 67 | read -p "How much do you want to weight the style reconstruction term? \ 68 | Default value: 1e2 for a resolution of 450x350. Increase for a higher resolution. \ 69 | [1e2] $cr > " style_weight 70 | style_weight=${style_weight:-1e2} 71 | 72 | temporal_weight=1e3 73 | 74 | echo "" 75 | read -p "Enter the zero-indexed ID of the GPU to use, or -1 for CPU mode (very slow!).\ 76 | [0] $cr > " gpu 77 | gpu=${gpu:-0} 78 | 79 | echo "" 80 | echo "Computing optical flow. This may take a while..." 81 | bash makeOptFlow.sh ./${filename}/frame_%04d.ppm ./${filename}/flow_$resolution 82 | 83 | # Perform style transfer 84 | th artistic_video.lua \ 85 | -content_pattern ${filename}/frame_%04d.ppm \ 86 | -flow_pattern ${filename}/flow_${resolution}/backward_[%d]_{%d}.flo \ 87 | -flowWeight_pattern ${filename}/flow_${resolution}/reliable_[%d]_{%d}.pgm \ 88 | -style_weight $style_weight \ 89 | -temporal_weight $temporal_weight \ 90 | -output_folder ${filename}/ \ 91 | -style_image $style_image \ 92 | -backend $backend \ 93 | -gpu $gpu \ 94 | -cudnn_autotune \ 95 | -number_format %04d 96 | 97 | # Create video from output images. 98 | $FFMPEG -i ${filename}/out-%04d.png ${filename}-stylized.$extension -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This code is for non-profit use only. Any commercial use is 2 | prohibited. 3 | 4 | (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016 5 | 6 | If you use this program, you should cite the following paper: 7 | 8 | M. Ruder, A. Dosovitskiy, T. Brox (2016). "Artistic style transfer for videos". arXiv:1604.08610 9 | 10 | 11 | 12 | This code is partially based on the neural-style code by Justin Johnson, 13 | which is covered by the following copyright and permission notice: 14 | 15 | ****************************************************************************** 16 | The MIT License (MIT) 17 | 18 | Copyright (c) 2015 Justin Johnson 19 | 20 | Permission is hereby granted, free of charge, to any person obtaining a copy 21 | of this software and associated documentation files (the "Software"), to deal 22 | in the Software without restriction, including without limitation the rights 23 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 24 | copies of the Software, and to permit persons to whom the Software is 25 | furnished to do so, subject to the following conditions: 26 | 27 | The above copyright notice and this permission notice shall be included in all 28 | copies or substantial portions of the Software. 29 | 30 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 31 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 32 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 33 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 34 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 35 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | SOFTWARE. 37 | ****************************************************************************** 38 | 39 | 40 | 41 | The present "lbfgs.lua" is a modified version of "lbfgs.lua" included in the 42 | Torch "Optimization package", which is covered by the following copyright and 43 | permission notice: 44 | 45 | ****************************************************************************** 46 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 47 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 48 | Copyright (c) 2011-2013 NYU (Clement Farabet) 49 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 50 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 51 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 52 | 53 | All rights reserved. 54 | 55 | Redistribution and use in source and binary forms, with or without 56 | modification, are permitted provided that the following conditions are met: 57 | 58 | 1. Redistributions of source code must retain the above copyright 59 | notice, this list of conditions and the following disclaimer. 60 | 61 | 2. Redistributions in binary form must reproduce the above copyright 62 | notice, this list of conditions and the following disclaimer in the 63 | documentation and/or other materials provided with the distribution. 64 | 65 | 3. Neither the names of NEC Laboratories American and IDIAP Research 66 | Institute nor the names of its contributors may be used to endorse or 67 | promote products derived from this software without specific prior 68 | written permission. 69 | 70 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 71 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 72 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 73 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 74 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 75 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 76 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 77 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 78 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 79 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 80 | POSSIBILITY OF SUCH DAMAGE. 81 | ****************************************************************************** -------------------------------------------------------------------------------- /consistencyChecker/consistencyChecker.cpp: -------------------------------------------------------------------------------- 1 | // consistencyChecker 2 | // Check consistency of forward flow via backward flow. 3 | // 4 | // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016 5 | 6 | #include 7 | #include 8 | #include "CTensor.h" 9 | #include "CFilter.h" 10 | 11 | // Which certainty value motion boundaries should get. Value between 0 (uncertain) and 255 (certain). 12 | #define MOTION_BOUNDARIE_VALUE 0 13 | 14 | // The amount of gaussian smoothing that sould be applied. Set 0 to disable smoothing. 15 | #define SMOOTH_STRENGH 0.8 16 | 17 | // readMiddlebury 18 | bool readMiddlebury(const char* filename, CTensor& flow) { 19 | FILE *stream = fopen(filename, "rb"); 20 | if (stream == 0) { 21 | std::cout << "Could not open " << filename << std::endl; 22 | return false; 23 | } 24 | float help; 25 | int dummy; 26 | dummy = fread(&help,sizeof(float),1,stream); 27 | int aXSize,aYSize; 28 | dummy = fread(&aXSize,sizeof(int),1,stream); 29 | dummy = fread(&aYSize,sizeof(int),1,stream); 30 | flow.setSize(aXSize,aYSize,2); 31 | for (int y = 0; y < flow.ySize(); y++) 32 | for (int x = 0; x < flow.xSize(); x++) { 33 | dummy = fread(&flow(x,y,0),sizeof(float),1,stream); 34 | dummy = fread(&flow(x,y,1),sizeof(float),1,stream); 35 | } 36 | fclose(stream); 37 | return true; 38 | } 39 | 40 | void checkConsistency(const CTensor& flow1, const CTensor& flow2, CMatrix& reliable, int argc, char** args) { 41 | int xSize = flow1.xSize(), ySize = flow1.ySize(); 42 | int size = xSize * ySize; 43 | CTensor dx(xSize,ySize,2); 44 | CTensor dy(xSize,ySize,2); 45 | CDerivative derivative(3); 46 | NFilter::filter(flow1,dx,derivative,1,1); 47 | NFilter::filter(flow1,dy,1,derivative,1); 48 | CMatrix motionEdge(xSize,ySize,0); 49 | for (int i = 0; i < size; i++) { 50 | motionEdge.data()[i] += dx.data()[i]*dx.data()[i]; 51 | motionEdge.data()[i] += dx.data()[size+i]*dx.data()[size+i]; 52 | motionEdge.data()[i] += dy.data()[i]*dy.data()[i]; 53 | motionEdge.data()[i] += dy.data()[size+i]*dy.data()[size+i]; 54 | } 55 | 56 | for (int ay = 0; ay < flow1.ySize(); ay++) 57 | for (int ax = 0; ax < flow1.xSize(); ax++) { 58 | float bx = ax+flow1(ax, ay, 0); 59 | float by = ay+flow1(ax, ay, 1); 60 | int x1 = floor(bx); 61 | int y1 = floor(by); 62 | int x2 = x1 + 1; 63 | int y2 = y1 + 1; 64 | if (x1 < 0 || x2 >= xSize || y1 < 0 || y2 >= ySize) 65 | { reliable(ax, ay) = 0.0f; continue; } 66 | float alphaX = bx-x1; float alphaY = by-y1; 67 | float a = (1.0-alphaX) * flow2(x1, y1, 0) + alphaX * flow2(x2, y1, 0); 68 | float b = (1.0-alphaX) * flow2(x1, y2, 0) + alphaX * flow2(x2, y2, 0); 69 | float u = (1.0-alphaY)*a+alphaY*b; 70 | a = (1.0-alphaX) * flow2(x1, y1, 1) + alphaX * flow2(x2, y1, 1); 71 | b = (1.0-alphaX) * flow2(x1, y2, 1) + alphaX * flow2(x2, y2, 1); 72 | float v = (1.0-alphaY)*a+alphaY*b; 73 | float cx = bx+u; 74 | float cy = by+v; 75 | float u2 = flow1(ax,ay,0); 76 | float v2 = flow1(ax,ay,1); 77 | if (((cx-ax) * (cx-ax) + (cy-ay) * (cy-ay)) >= 0.01*(u2*u2 + v2*v2 + u*u + v*v) + 0.5f) { 78 | // Set to a negative value so that when smoothing is applied the smoothing goes "to the outside". 79 | // Afterwards, we clip values below 0. 80 | reliable(ax, ay) = -255.0f; 81 | continue; 82 | } 83 | if (motionEdge(ax, ay) > 0.01 * (u2*u2+v2*v2) + 0.002f) { 84 | reliable(ax, ay) = MOTION_BOUNDARIE_VALUE; 85 | continue; 86 | } 87 | } 88 | } 89 | 90 | int main(int argc, char** args) { 91 | assert(argc >= 4); 92 | 93 | CTensor flow1,flow2; 94 | readMiddlebury(args[1], flow1); 95 | readMiddlebury(args[2], flow2); 96 | 97 | assert(flow1.xSize() == flow2.xSize()); 98 | assert(flow1.ySize() == flow2.ySize()); 99 | 100 | int xSize = flow1.xSize(), ySize = flow1.ySize(); 101 | 102 | // Check consistency of forward flow via backward flow and exlucde motion boundaries 103 | CMatrix reliable(xSize, ySize, 255.0f); 104 | checkConsistency(flow1, flow2, reliable, argc, args); 105 | 106 | if (SMOOTH_STRENGH > 0) { 107 | CSmooth smooth(SMOOTH_STRENGH, 2.0f); 108 | NFilter::filter(reliable, smooth, smooth); 109 | } 110 | reliable.clip(0.0f, 255.0f); 111 | 112 | reliable.writeToPGM(args[3]); 113 | } -------------------------------------------------------------------------------- /consistencyChecker/NMath.h: -------------------------------------------------------------------------------- 1 | // NMath 2 | // A collection of mathematical functions and numerical algorithms 3 | // 4 | // Author: Thomas Brox 5 | 6 | #ifndef NMathH 7 | #define NMathH 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace NMath { 15 | // Returns the faculty of a number 16 | int faculty(int n); 17 | // Computes the binomial coefficient of two numbers 18 | int binCoeff(const int n, const int k); 19 | // Returns the angle of the line connecting (x1,y1) with (y1,y2) 20 | float tangent(const float x1, const float y1, const float x2, const float y2); 21 | // Absolute for floating points 22 | inline float abs(const float aValue); 23 | // Computes min or max value of two numbers 24 | inline float min(float aVal1, float aVal2); 25 | inline float max(float aVal1, float aVal2); 26 | inline int min(int aVal1, int aVal2); 27 | inline int max(int aVal1, int aVal2); 28 | // Computes the sign of a value 29 | inline float sign(float aVal); 30 | // minmod function (see description in implementation) 31 | inline float minmod(float a, float b, float c); 32 | // Computes the difference between two angles respecting the cyclic property of an angle 33 | // The result is always between 0 and Pi 34 | float absAngleDifference(const float aFirstAngle, const float aSecondAngle); 35 | // Computes the difference between two angles aFirstAngle - aSecondAngle 36 | // respecting the cyclic property of an angle 37 | // The result ist between -Pi and Pi 38 | float angleDifference(const float aFirstAngle, const float aSecondAngle); 39 | // Computes the sum of two angles respecting the cyclic property of an angle 40 | // The result is between -Pi and Pi 41 | float angleSum(const float aFirstAngle, const float aSecondAngle); 42 | // Rounds to the nearest integer 43 | int round(const float aValue); 44 | // Computes the arctan with results between 0 and 2*Pi 45 | inline float arctan(float x, float y); 46 | 47 | // Computes [0,1] uniformly distributed random number 48 | inline float random(); 49 | // Computes N(0,1) distributed random number 50 | inline float randomGauss(); 51 | 52 | extern const float Pi; 53 | 54 | // Computes a principal axis transformation 55 | // Eigenvectors are in the rows of aEigenvectors 56 | void PATransformation(const CMatrix& aMatrix, CVector& aEigenvalues, CMatrix& aEigenvectors, bool aOrdering = true); 57 | // Computes the principal axis backtransformation 58 | void PABacktransformation(const CMatrix& aEigenVectors, const CVector& aEigenValues, CMatrix& aMatrix); 59 | // Computes a singular value decomposition A=USV^T 60 | // Input: U MxN matrix 61 | // Output: U MxN matrix, S NxN diagonal matrix, V NxN diagonal matrix 62 | void svd(CMatrix& U, CMatrix& S, CMatrix& V, bool aOrdering = true, int aIterations = 20); 63 | // Reassembles A = USV^T, Result in U 64 | void svdBack(CMatrix& U, const CMatrix& S, const CMatrix& V); 65 | // Applies the Householder method to A and b, i.e., A is transformed into an upper triangular matrix 66 | void householder(CMatrix& A, CVector& b); 67 | // Computes least squares solution of an overdetermined linear system Ax=b using the Householder method 68 | CVector leastSquares(CMatrix& A, CVector& b); 69 | // Inverts a square matrix by eigenvalue decomposition, 70 | // eigenvalues smaller than aReg are replaced by aReg 71 | void invRegularized(CMatrix& A, int aReg); 72 | // Given a positive-definite symmetric matrix A, this routine constructs A = LL^T. 73 | // Only the upper triangle of A need be given. L is returned in the lower triangle. 74 | void cholesky(CMatrix& A); 75 | // Solves L*aOut = aIn when L is a lower triangular matrix (e.g. result from cholesky) 76 | void triangularSolve(CMatrix& L, CVector& aIn, CVector& aOut); 77 | void triangularSolve(CMatrix& L, CMatrix& aIn, CMatrix& aOut); 78 | // Solves L^T*aOut = aIn when L is a lower triangular matrix (e.g. result from cholesky) 79 | void triangularSolveTransposed(CMatrix& L, CVector& aIn, CVector& aOut); 80 | void triangularSolveTransposed(CMatrix& L, CMatrix& aIn, CMatrix& aOut); 81 | // Computes the inverse of a matrix, given its cholesky decomposition L (lower triangle) 82 | void choleskyInv(const CMatrix& L, CMatrix& aInv); 83 | // Creates the rotation matrix RzRyRx and extends it to a 4x4 RBM matrix with translation 0 84 | void eulerAngles(float rx, float ry, float rz, CMatrix& A); 85 | // Transforms a rigid body motion in matrix representation to a twist representation 86 | void RBM2Twist(CVector &T, CMatrix& RBM); 87 | } 88 | 89 | // I M P L E M E N T A T I O N ------------------------------------------------- 90 | // Inline functions have to be implemented directly in the header file 91 | 92 | namespace NMath { 93 | 94 | // abs 95 | inline float abs(const float aValue) { 96 | if (aValue >= 0) return aValue; 97 | else return -aValue; 98 | } 99 | 100 | // min 101 | inline float min(float aVal1, float aVal2) { 102 | if (aVal1 < aVal2) return aVal1; 103 | else return aVal2; 104 | } 105 | 106 | // max 107 | inline float max(float aVal1, float aVal2) { 108 | if (aVal1 > aVal2) return aVal1; 109 | else return aVal2; 110 | } 111 | 112 | // min 113 | inline int min(int aVal1, int aVal2) { 114 | if (aVal1 < aVal2) return aVal1; 115 | else return aVal2; 116 | } 117 | 118 | // max 119 | inline int max(int aVal1, int aVal2) { 120 | if (aVal1 > aVal2) return aVal1; 121 | else return aVal2; 122 | } 123 | 124 | // sign 125 | inline float sign(float aVal) { 126 | if (aVal > 0) return 1.0; 127 | else return -1.0; 128 | } 129 | 130 | // minmod function: 131 | // 0, if any of the a, b, c are 0 or of opposite sign 132 | // sign(a) min(|a|,|b|,|c|) else 133 | inline float minmod(float a, float b, float c) { 134 | if ((sign(a) == sign(b)) && (sign(b) == sign(c)) && (a != 0.0)) { 135 | float aMin = fabs(a); 136 | if (fabs(b) < aMin) aMin = fabs(b); 137 | if (fabs(c) < aMin) aMin = fabs(c); 138 | return sign(a)*aMin; 139 | } 140 | else return 0.0; 141 | } 142 | 143 | // arctan 144 | inline float arctan(float x, float y) { 145 | if (x == 0.0) 146 | if (y >= 0.0) return 0.5 * 3.1415926536; 147 | else return 1.5 * 3.1415926536; 148 | else if (x > 0.0) 149 | if (y >= 0.0) return atan (y/x); 150 | else return 2.0 * 3.1415926536 + atan (y/x); 151 | else return 3.1415926536 + atan (y/x); 152 | } 153 | 154 | // random 155 | inline float random() { 156 | return (float)rand()/RAND_MAX; 157 | } 158 | 159 | // randomGauss 160 | inline float randomGauss() { 161 | // Draw two [0,1]-uniformly distributed numbers a and b 162 | float a = random(); 163 | float b = random(); 164 | // assemble a N(0,1) number c according to Box-Muller */ 165 | if (a > 0.0) return sqrt(-2.0*log(a)) * cos(2.0*3.1415926536*b); 166 | else return 0; 167 | } 168 | 169 | } 170 | #endif 171 | -------------------------------------------------------------------------------- /lbfgs.lua: -------------------------------------------------------------------------------- 1 | --[[ An implementation of L-BFGS, heavily inspired by minFunc (Mark Schmidt) 2 | 3 | This implementation of L-BFGS relies on a user-provided line 4 | search function (state.lineSearch). If this function is not 5 | provided, then a simple learningRate is used to produce fixed 6 | size steps. Fixed size steps are much less costly than line 7 | searches, and can be useful for stochastic problems. 8 | 9 | The learning rate is used even when a line search is provided. 10 | This is also useful for large-scale stochastic problems, where 11 | opfunc is a noisy approximation of f(x). In that case, the learning 12 | rate allows a reduction of confidence in the step size. 13 | 14 | ARGS: 15 | 16 | - `opfunc` : a function that takes a single input (X), the point of 17 | evaluation, and returns f(X) and df/dX 18 | - `x` : the initial point 19 | - `state` : a table describing the state of the optimizer; after each 20 | call the state is modified 21 | - `state.maxIter` : Maximum number of iterations allowed 22 | - `state.maxEval` : Maximum number of function evaluations 23 | - `state.tolFun` : Termination tolerance on the first-order optimality 24 | - `state.tolX` : Termination tol on progress in terms of func/param changes 25 | - `state.lineSearch` : A line search function 26 | - `state.learningRate` : If no line search provided, then a fixed step size is used 27 | 28 | RETURN: 29 | - `x*` : the new `x` vector, at the optimal point 30 | - `f` : a table of all function values: 31 | `f[1]` is the value of the function before any optimization and 32 | `f[#f]` is the final fully optimized value, at `x*` 33 | 34 | (Clement Farabet, 2012) 35 | ]] 36 | function optim.lbfgs(opfunc, x, config, state) 37 | -- get/update state 38 | local config = config or {} 39 | local state = state or config 40 | local maxIter = tonumber(config.maxIter) or 20 41 | local maxEval = tonumber(config.maxEval) or maxIter*1.25 42 | local tolFun = config.tolFun or 1e-5 43 | local tolFunRelative = tonumber(config.tolFunRelative) or 0 44 | local tolFunRelativeInterval = tonumber(config.tolFunRelativeInterval) or 100 45 | local tolX = config.tolX or 1e-9 46 | local nCorrection = config.nCorrection or 100 47 | local lineSearch = config.lineSearch 48 | local lineSearchOpts = config.lineSearchOptions 49 | local learningRate = config.learningRate or 1 50 | local isverbose = config.verbose or false 51 | 52 | state.funcEval = state.funcEval or 0 53 | state.nIter = state.nIter or 0 54 | 55 | -- verbose function 56 | local verbose 57 | if isverbose then 58 | verbose = function(...) print(' ', ...) end 59 | else 60 | verbose = function() end 61 | end 62 | 63 | -- import some functions 64 | local abs = math.abs 65 | local min = math.min 66 | 67 | -- evaluate initial f(x) and df/dx 68 | local f,g = opfunc(x) 69 | local f_hist = {f} 70 | local currentFuncEval = 1 71 | state.funcEval = state.funcEval + 1 72 | local p = g:size(1) 73 | 74 | -- check optimality of initial point 75 | state.tmp1 = state.tmp1 or g.new(g:size()):zero(); local tmp1 = state.tmp1 76 | tmp1:copy(g):abs() 77 | if tmp1:sum() <= tolFun then 78 | -- optimality condition below tolFun 79 | verbose('optimality condition below tolFun') 80 | return x,f_hist 81 | end 82 | 83 | if not state.dir_bufs then 84 | -- reusable buffers for y's and s's, and their histories 85 | verbose('creating recyclable direction/step/history buffers') 86 | state.dir_bufs = state.dir_bufs or g.new(nCorrection+1, p):split(1) 87 | state.stp_bufs = state.stp_bufs or g.new(nCorrection+1, p):split(1) 88 | for i=1,#state.dir_bufs do 89 | state.dir_bufs[i] = state.dir_bufs[i]:squeeze(1) 90 | state.stp_bufs[i] = state.stp_bufs[i]:squeeze(1) 91 | end 92 | end 93 | 94 | -- variables cached in state (for tracing) 95 | local d = state.d 96 | local t = state.t 97 | local old_dirs = state.old_dirs 98 | local old_stps = state.old_stps 99 | local Hdiag = state.Hdiag 100 | local g_old = state.g_old 101 | local f_old = state.f_old 102 | local f_past = nil 103 | 104 | -- optimize for a max of maxIter iterations 105 | local nIter = 0 106 | while nIter < maxIter do 107 | -- keep track of nb of iterations 108 | nIter = nIter + 1 109 | state.nIter = state.nIter + 1 110 | 111 | ------------------------------------------------------------ 112 | -- compute gradient descent direction 113 | ------------------------------------------------------------ 114 | if state.nIter == 1 then 115 | d = g:clone():mul(-1) -- -g 116 | old_dirs = {} 117 | old_stps = {} 118 | Hdiag = 1 119 | else 120 | -- do lbfgs update (update memory) 121 | local y = table.remove(state.dir_bufs) -- pop 122 | local s = table.remove(state.stp_bufs) 123 | y:add(g, -1, g_old) -- g - g_old 124 | s:mul(d, t) -- d*t 125 | local ys = y:dot(s) -- y*s 126 | if ys > 1e-10 then 127 | -- updating memory 128 | if #old_dirs == nCorrection then 129 | -- shift history by one (limited-memory) 130 | local removed1 = table.remove(old_dirs, 1) 131 | local removed2 = table.remove(old_stps, 1) 132 | table.insert(state.dir_bufs, removed1) 133 | table.insert(state.stp_bufs, removed2) 134 | end 135 | 136 | -- store new direction/step 137 | table.insert(old_dirs, s) 138 | table.insert(old_stps, y) 139 | 140 | -- update scale of initial Hessian approximation 141 | Hdiag = ys / y:dot(y) -- (y*y) 142 | else 143 | -- put y and s back into the buffer pool 144 | table.insert(state.dir_bufs, y) 145 | table.insert(state.stp_bufs, s) 146 | end 147 | 148 | -- compute the approximate (L-BFGS) inverse Hessian 149 | -- multiplied by the gradient 150 | local k = #old_dirs 151 | 152 | -- need to be accessed element-by-element, so don't re-type tensor: 153 | state.ro = state.ro or torch.Tensor(nCorrection); local ro = state.ro 154 | for i = 1,k do 155 | ro[i] = 1 / old_stps[i]:dot(old_dirs[i]) 156 | end 157 | 158 | -- iteration in L-BFGS loop collapsed to use just one buffer 159 | local q = tmp1 -- reuse tmp1 for the q buffer 160 | -- need to be accessed element-by-element, so don't re-type tensor: 161 | state.al = state.al or torch.zeros(nCorrection) local al = state.al 162 | 163 | q:mul(g, -1) -- -g 164 | for i = k,1,-1 do 165 | al[i] = old_dirs[i]:dot(q) * ro[i] 166 | q:add(-al[i], old_stps[i]) 167 | end 168 | 169 | -- multiply by initial Hessian 170 | r = d -- share the same buffer, since we don't need the old d 171 | r:mul(q, Hdiag) -- q[1] * Hdiag 172 | for i = 1,k do 173 | local be_i = old_stps[i]:dot(r) * ro[i] 174 | r:add(al[i]-be_i, old_dirs[i]) 175 | end 176 | -- final direction is in r/d (same object) 177 | end 178 | g_old = g_old or g:clone() 179 | g_old:copy(g) 180 | f_old = f 181 | 182 | ------------------------------------------------------------ 183 | -- compute step length 184 | ------------------------------------------------------------ 185 | -- directional derivative 186 | local gtd = g:dot(d) -- g * d 187 | 188 | -- check that progress can be made along that direction 189 | if gtd > -tolX then 190 | break 191 | end 192 | 193 | -- reset initial guess for step size 194 | if state.nIter == 1 then 195 | tmp1:copy(g):abs() 196 | t = min(1,1/tmp1:sum()) * learningRate 197 | else 198 | t = learningRate 199 | end 200 | 201 | -- optional line search: user function 202 | local lsFuncEval = 0 203 | if lineSearch and type(lineSearch) == 'function' then 204 | -- perform line search, using user function 205 | f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,lineSearchOpts) 206 | table.insert(f_hist, f) 207 | else 208 | -- no line search, simply move with fixed-step 209 | x:add(t,d) 210 | if nIter ~= maxIter then 211 | -- re-evaluate function only if not in last iteration 212 | -- the reason we do this: in a stochastic setting, 213 | -- no use to re-evaluate that function here 214 | f,g = opfunc(x) 215 | lsFuncEval = 1 216 | table.insert(f_hist, f) 217 | end 218 | end 219 | 220 | -- update func eval 221 | currentFuncEval = currentFuncEval + lsFuncEval 222 | state.funcEval = state.funcEval + lsFuncEval 223 | 224 | ------------------------------------------------------------ 225 | -- check conditions 226 | ------------------------------------------------------------ 227 | if nIter == maxIter then 228 | -- no use to run tests 229 | verbose('reached max number of iterations') 230 | break 231 | end 232 | 233 | if currentFuncEval >= maxEval then 234 | -- max nb of function evals 235 | verbose('max nb of function evals') 236 | break 237 | end 238 | 239 | tmp1:copy(g):abs() 240 | if tmp1:sum() <= tolFun then 241 | -- check optimality 242 | verbose('optimality condition below tolFun') 243 | break 244 | end 245 | 246 | tmp1:copy(d):mul(t):abs() 247 | if tmp1:sum() <= tolX then 248 | -- step size below tolX 249 | verbose('step size below tolX') 250 | break 251 | end 252 | 253 | if abs(f-f_old) < tolX then 254 | -- function value changing less than tolX 255 | verbose('function value changing less than tolX') 256 | break 257 | end 258 | 259 | if nIter % tolFunRelativeInterval == 0 then 260 | if f_past ~= nil and (abs(f-f_past) / f_past) < tolFunRelative then 261 | verbose('relative change in function value is less than tolFunRelative') 262 | break 263 | end 264 | f_past = f 265 | end 266 | 267 | end 268 | 269 | -- save state 270 | state.old_dirs = old_dirs 271 | state.old_stps = old_stps 272 | state.Hdiag = Hdiag 273 | state.g_old = g_old 274 | state.f_old = f_old 275 | state.t = t 276 | state.d = d 277 | 278 | -- return optimal x, and history of f(x) 279 | return x,f_hist,currentFuncEval 280 | end 281 | 282 | return { 283 | optimize = optim.lbfgs 284 | } 285 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # artistic-videos 2 | 3 | This is the torch implementation for the paper "[Artistic style transfer for videos](http://arxiv.org/abs/1604.08610)", based on neural-style code by Justin Johnson https://github.com/jcjohnson/neural-style . 4 | 5 | Our algorithm allows to transfer the style from one image (for example, a painting) to a whole video sequence and generates consistent and stable stylized video sequences. 6 | 7 | **UPDATE:** A much faster version which runs in under one second per frame is avaliable at [fast-artistic-videos](https://github.com/manuelruder/fast-artistic-videos), but it only works for precomputed style templates. This repository allows arbitrary styles, but needs several minutes per frame. 8 | 9 | **Example video:** 10 | 11 | [![Artistic style transfer for videos](http://img.youtube.com/vi/Khuj4ASldmU/0.jpg)](https://www.youtube.com/watch?v=Khuj4ASldmU "Artistic style transfer for videos") 12 | 13 | ## Contact 14 | 15 | For issues or questions related to this implementation, please use the [issue tracker](https://github.com/manuelruder/artistic-videos/issues). 16 | For everything else, including licensing issues, please email us. Our contact details can be found in [our paper](http://arxiv.org/pdf/1604.08610.pdf). 17 | 18 | ## Setup 19 | 20 | Tested with Ubuntu 14.04. 21 | 22 | * Install torch7, loadcaffe and the CUDA backend (otherwise you have to use CPU mode which is horribly slow) and download the VGG model, as described by jcjohnson: [neural-style#setup](https://github.com/jcjohnson/neural-style#setup). Optional: Install cuDNN. This requires registration as a developer with NVIDIA, but significantly reduces memory usage. For non-Nvidia GPUs you can also use the OpenCL backend. 23 | * To use the temporal consistency constraints, you need an utility which estimates the [optical flow](https://en.wikipedia.org/wiki/Optical_flow) between two images. You can use [DeepFlow](http://lear.inrialpes.fr/src/deepflow/) which we also used in our paper. In this case, just download both DeepFlow and DeepMatching (CPU version) from their website and place the static binaries (`deepmatching-static` and `deepflow2-static`) in the main directory of this repository. Then, the scripts included in this repository can be used to generate the optical flow for all frames as well as the certainty of the flow field. If you want to use a different optical flow algorithm, specify the path to your optical flow utility in the first line of `makeOptFlow.sh`; the flow files have to be created in the [middlebury file format](http://vision.middlebury.edu/flow/code/flow-code/README.txt). 24 | 25 | ## Requirements 26 | 27 | A fast GPU with a large amount of video memory is recommended to execute this script. The ability to run in CPU mode is impractical due to the enormous running time. 28 | 29 | For a resolution of 450x350, you will need at least a 4GB GPU (around 3,5 GB memory usage). If you use cuDNN, a 2GB GPU is sufficient (around 1,7GB memory usage). Memory usage scales linearly with resolution, so if you experience an out of memory error, downscale the video. 30 | 31 | Other ways to reduce memory footprint are to use the ADAM optimizer instead of L-BFGS and/or to use the NIN Imagenet model instead of VGG-19. However, we didn't test our method with either of these and you will likely get inferior results. 32 | 33 | ## Simple style transfer 34 | 35 | To perform style transfer with mostly the default parameters, execute `stylizeVideo.sh `. This script will perform all the steps necessary to create a stylized version of the video. Note: You have to have ffmpeg (or libav-tools for Ubuntu 14.10 and earlier) installed. 36 | 37 | A more advanced version of this script can be found in NameRX's fork which computes optical flow in parallel to the video stylization for improved performance: [NameRX/artistic-videos](https://github.com/NameRX/artistic-videos) 38 | 39 | ## FAQ 40 | 41 | See [here](https://github.com/manuelruder/artistic-videos/issues?q=label%3Aquestion) for a list of frequently asked question. 42 | 43 | ## Advanced Usage 44 | 45 | Please read the script `stylizeVideo.sh` to see which steps you have to perform in advance exactly. Basically you have to save the frames of the video as individual image files and you have to compute the optical flow between all adjacent frames as well as the certainty of the flow field (both can be accomplished with `makeOptFlow.sh`). 46 | 47 | There are two versions of this algorithm, a single-pass and a multi-pass version. The multi-pass version yields better results in case of strong camera motion, but needs more iterations per frame. 48 | 49 | Basic usage: 50 | 51 | ``` 52 | th artistic_video.lua [-args ] 53 | ``` 54 | 55 | ``` 56 | th artistic_video_multiPass.lua [-args ] 57 | ``` 58 | 59 | Arguments can be given by command line and/or written in a file with one argument per line. Specify the path to this file through the option `-args`. Arguments given by command line will override arguments written in this file. 60 | 61 | **Basic arguments**: 62 | * `-style_image`: The style image. 63 | * `-content_pattern`: A file path pattern for the individual frames of the videos, for example `frame_%04d.png`. 64 | * `-num_images`: The number of frames. Set to `0` to process all available frames. 65 | * `-start_number`: The index of the first frame. Default: 1 66 | * `-gpu`: Zero-indexed ID of the GPU to use; for CPU mode set `-gpu` to -1. 67 | 68 | **Arguments for the single-pass algorithm** (only present in `artistic_video.lua`) 69 | * `-flow_pattern`: A file path pattern for files that store the backward flow between the frames. The placeholder in square brackets refers to the frame position where the optical flow starts and the placeholder in braces refers to the frame index where the optical flow points to. For example `flow_[%02d]_{%02d}.flo` means the flow files are named *flow_02_01.flo*, *flow_03_02.flo*, etc. If you use the script included in this repository (makeOptFlow.sh), the filename pattern will be `backward_[%d]_{%d}.flo`. 70 | * `-flowWeight_pattern`: A file path pattern for the weights / certainty of the flow field. These files should be a grey scale image where a white pixel indicates a high flow weight and a black pixel a low weight, respective. Same format as above. If you use the script, the filename pattern will be `reliable_[%d]_{%d}.pgm`. 71 | * `-flow_relative_indices`: The indices for the long-term consistency constraint as comma-separated list. Indices should be relative to the current frame. For example `1,2,4` means it uses frames *i-1*,*i-2* and *i-4* warped for current frame at position *i* as consistency constraint. Default value is 1 which means it uses short-term consistency only. If you use non-default values, you have to compute the corresponding long-term flow as well. 72 | 73 | **Arguments for the multi-pass algorithm** (only present in `artistic_video_multiPass.lua`) 74 | * `-forwardFlow_pattern`: A file path pattern for the forward flow. Same format as in `-flow_pattern`. 75 | * `-backwardFlow_pattern`: A file path pattern for the backward flow. Same format as above. 76 | * `-forwardFlow_weight_pattern`: A file path pattern for the forward-flow. Same format as above. 77 | * `-backwardFlow_weight_pattern`: A file path pattern for the backward flow. Same format as above. 78 | * `-num_passes`: Number of passes. Default: 15. 79 | * `-use_temporalLoss_after`: Uses temporal consistency loss in given pass and afterwards. Default: `8`. 80 | * `-blendWeight`: The blending factor of the previous stylized frame. The higher this value, the stronger the temporal consistency. Default value is `1` which means that the previous stylized frame is blended equally with the current frame. 81 | 82 | **Optimization options**: 83 | * `-content_weight`: How much to weight the content reconstruction term. Default is 5e0. 84 | * `-style_weight`: How much to weight the style reconstruction term. Default is 1e2. 85 | * `-temporal_weight`: How much to weight the temporal consistency loss. Default is 1e3. Set to 0 to disable the temporal consistency loss. 86 | * `-temporal_loss_criterion`: Which error function is used for the temporal consistency loss. Can be either `mse` for the mead squared error or `smoothl1` for the [smooth L1 criterion](https://github.com/torch/nn/blob/master/doc/criterion.md#nn.SmoothL1Criterion). 87 | * `-tv_weight`: Weight of total-variation (TV) regularization; this helps to smooth the image. 88 | Default is 1e-3. Set to 0 to disable TV regularization. 89 | * `-num_iterations`: 90 | * Single-pass: Two comma-separated values for the maximum number of iterations for the first frame and for subsequent frames. Default is 2000,1000. 91 | * Multi-pass: A single value for the number of iterations *per pass*. 92 | * `-tol_loss_relative`: Stop if the relative change of the loss function in an interval of `tol_loss_relative_interval` iterations falls below this threshold. Default is `0.0001` which means that the optimizer stops if the loss function changes less than 0.01% in the given interval. Meaningful values are between `0.001` and `0.0001` in the default interval. 93 | * `-tol_loss_relative_interval`: Se above. Default value: `50`. 94 | * `-init`: 95 | * Single-pass: Two comma-separated values for the initialization method for the first frame and for subsequent frames; one of `random`, `image`, `prev` or `prevWarped`. 96 | Default is `random,prevWarped` which uses a noise initialization for the first frame and the previous stylized frame warped for subsequent frames. `image` initializes with the content frames. `prev` initializes with the previous stylized frames without warping. 97 | * Multi-pass: One value for the initialization method. Either `random` or `image`. 98 | * `-optimizer`: The optimization algorithm to use; either `lbfgs` or `adam`; default is `lbfgs`. 99 | L-BFGS tends to give better results, but uses more memory. Switching to ADAM will reduce memory usage; 100 | when using ADAM you will probably need to play with other parameters to get good results, especially 101 | the style weight, content weight, and learning rate; you may also want to normalize gradients when 102 | using ADAM. 103 | * `-learning_rate`: Learning rate to use with the ADAM optimizer. Default is 1e1. 104 | * `-normalize_gradients`: If this flag is present, style and content gradients from each layer will be 105 | L1 normalized. Idea from [andersbll/neural_artistic_style](https://github.com/andersbll/neural_artistic_style). 106 | 107 | **Output options**: 108 | * `-output_image`: Name of the output image. Default is `out.png` which will produce output images of the form *out-\.png* for the single-pass and *out-\_\.png* for the multi-pass algorithm. 109 | * `-number_format`: Which number format to use for the output image. For example `%04d` adds up to three leading zeros. Some users reported that ffmpeg may use lexicographical sorting in some cases; therefore the output frames would be combined in the wrong order without leading zeros. Default: `%d`. 110 | * `-output_folder`: Directory where the output images should be saved. Must end with a slash. 111 | * `-print_iter`: Print progress every `print_iter` iterations. Set to 0 to disable printing. 112 | * `-save_iter`: Save the image every `save_iter` iterations. Set to 0 to disable saving intermediate results. 113 | * `-save_init`: If this option is present, the initialization image will be saved. 114 | 115 | **Other arguments**: 116 | * `-content_layers`: Comma-separated list of layer names to use for content reconstruction. 117 | Default is `relu4_2`. 118 | * `-style_layers`: Comman-separated list of layer names to use for style reconstruction. 119 | Default is `relu1_1,relu2_1,relu3_1,relu4_1,relu5_1`. 120 | * `-style_blend_weights`: The weight for blending the style of multiple style images, as a 121 | comma-separated list, such as `-style_blend_weights 3,7`. By default, all style images 122 | are equally weighted. 123 | * `-style_scale`: Scale at which to extract features from the style image, relative to the size of the content video. Default is `1.0`. 124 | * `-proto_file`: Path to the `deploy.txt` file for the VGG Caffe model. 125 | * `-model_file`: Path to the `.caffemodel` file for the VGG Caffe model. 126 | Default is the original VGG-19 model; you can also try the normalized VGG-19 model used in the paper. 127 | * `-pooling`: The type of pooling layers to use; one of `max` or `avg`. Default is `max`. 128 | The VGG-19 models uses max pooling layers, but Gatys et al. mentioned that replacing these layers with average 129 | pooling layers can improve the results. We haven't been able to get good results using average pooling, but 130 | the option is here. 131 | * `-backend`: `nn`, `cudnn` or `clnn`. Default is `nn`. `cudnn` requires 132 | [cudnn.torch](https://github.com/soumith/cudnn.torch) and may reduce memory usage. 133 | `clnn` requires [cltorch](https://github.com/hughperkins/cltorch) and [clnn](https://github.com/hughperkins/clnn). 134 | * `-cudnn_autotune`: When using the cuDNN backend, pass this flag to use the built-in cuDNN autotuner to select 135 | the best convolution algorithms for your architecture. This will make the first iteration a bit slower and can 136 | take a bit more memory, but may significantly speed up the cuDNN backend. 137 | 138 | ## Acknowledgement 139 | * This work was inspired by the paper [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge, which introduced an approach for style transfer in still images. 140 | * Our implementation is based on Justin Johnson's implementation [neural-style](https://github.com/jcjohnson/neural-style). 141 | 142 | ## Citation 143 | 144 | If you use this code or its parts in your research, please cite the following paper: 145 | 146 | ``` 147 | @inproceedings{RuderDB2016, 148 | author = {Manuel Ruder and Alexey Dosovitskiy and Thomas Brox}, 149 | title = {Artistic Style Transfer for Videos}, 150 | booktitle = {German Conference on Pattern Recognition}, 151 | pages = {26--36}, 152 | year = {2016}, 153 | } 154 | ``` 155 | -------------------------------------------------------------------------------- /artistic_video_multiPass.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'image' 4 | require 'optim' 5 | require 'loadcaffe' 6 | require 'artistic_video_core' 7 | 8 | local flowFile = require 'flowFileLoader' 9 | 10 | -------------------------------------------------------------------------------- 11 | 12 | local cmd = torch.CmdLine() 13 | 14 | -- Basic options 15 | cmd:option('-style_image', 'example/seated-nude.jpg', 16 | 'Style target image') 17 | cmd:option('-style_blend_weights', 'nil') 18 | cmd:option('-content_pattern', 'example/marple8_%02d.ppm', 19 | 'Content target pattern') 20 | cmd:option('-num_images', 0, 'Number of content images. Set 0 for autodetect.') 21 | cmd:option('-start_number', 1, 'Frame index to start with') 22 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1') 23 | cmd:option('-number_format', '%d', 'Number format of the output images.') 24 | 25 | -- Flow options 26 | cmd:option('-forwardFlow_pattern', 'example/deepflow/forward_[%d]_{%d}.flo', 27 | 'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.') 28 | cmd:option('-backwardFlow_pattern', 'example/deepflow/backward_[%d]_{%d}.flo', 29 | 'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.') 30 | cmd:option('-forwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm', 31 | 'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.') 32 | cmd:option('-backwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm', 33 | 'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.') 34 | 35 | -- Multi-pass options 36 | cmd:option('-blendWeight', 1.0, '') 37 | cmd:option('-blendWeight_lastPass', 0.0, '') 38 | cmd:option('-use_temporalLoss_after', 8, '') 39 | cmd:option('-num_passes', 15, 'Number of passes') 40 | cmd:option('-continue_with_pass', 1, '') 41 | 42 | -- Optimization options 43 | cmd:option('-content_weight', 5e0) 44 | cmd:option('-style_weight', 1e2) 45 | cmd:option('-temporal_weight', 5e2) 46 | cmd:option('-tv_weight', 1e-3) 47 | cmd:option('-temporal_loss_criterion', 'mse', 'mse|smoothl1') 48 | cmd:option('-num_iterations', 100, 'Number of iterations per pass') 49 | cmd:option('-tol_loss_relative', 0, 'stop if relative change of the loss function is below this value') 50 | cmd:option('-tol_loss_relative_interval', 100, 'interval between two function comparisons') 51 | cmd:option('-normalize_gradients', false) 52 | cmd:option('-init', 'random', 'random|image|prevWarped') 53 | cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam') 54 | cmd:option('-learning_rate', 1e1) 55 | 56 | -- Output options 57 | cmd:option('-print_iter', 50) 58 | cmd:option('-save_iter', 0) 59 | cmd:option('-output_image', 'out.png') 60 | cmd:option('-output_folder', '') 61 | cmd:option('-save_init', false, 'Whether the initialization image should be saved (for debugging purposes).') 62 | 63 | -- Other options 64 | cmd:option('-style_scale', 1.0) 65 | cmd:option('-pooling', 'max', 'max|avg') 66 | cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt') 67 | cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel') 68 | cmd:option('-backend', 'nn', 'nn|cudnn|clnn') 69 | cmd:option('-cudnn_autotune', false) 70 | cmd:option('-seed', -1) 71 | 72 | cmd:option('-content_layers', 'relu4_2', 'layers for content') 73 | cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style') 74 | 75 | cmd:option('-args', '', 'Arguments in a file, one argument per line') 76 | 77 | function nn.SpatialConvolutionMM:accGradParameters() 78 | -- nop. not needed by our net 79 | end 80 | 81 | local function main(params) 82 | if params.gpu >= 0 then 83 | if params.backend ~= 'clnn' then 84 | require 'cutorch' 85 | require 'cunn' 86 | cutorch.setDevice(params.gpu + 1) 87 | else 88 | require 'clnn' 89 | require 'cltorch' 90 | cltorch.setDevice(params.gpu + 1) 91 | end 92 | else 93 | params.backend = 'nn' 94 | end 95 | 96 | if params.backend == 'cudnn' then 97 | require 'cudnn' 98 | if params.cudnn_autotune then 99 | cudnn.benchmark = true 100 | end 101 | cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop 102 | end 103 | 104 | local loadcaffe_backend = params.backend 105 | if params.backend == 'clnn' then loadcaffe_backend = 'nn' end 106 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float() 107 | cnn = MaybePutOnGPU(cnn, params) 108 | 109 | local num_images = params.num_images 110 | if num_images == 0 then 111 | num_images = calcNumberOfContentImages(params) 112 | print("Detected " .. num_images .. " content images.") 113 | end 114 | local end_image_idx = num_images + params.start_number - 1 115 | 116 | local style_images_caffe = getStyleImages(params) 117 | 118 | -- Set up the network, inserting style and content loss modules 119 | local net, style_losses, losses_indices, losses_type = buildNet(cnn, params, style_images_caffe) 120 | 121 | -- We don't need the base CNN anymore, so clean it up to save memory. 122 | cnn = nil 123 | for i=1,#net.modules do 124 | local module = net.modules[i] 125 | if torch.type(module) == 'nn.SpatialConvolutionMM' then 126 | -- remote these, not used, but uses gpu memory 127 | module.gradWeight = nil 128 | module.gradBias = nil 129 | end 130 | end 131 | collectgarbage() 132 | 133 | local img = nil 134 | 135 | -- Initialize the image 136 | if params.seed >= 0 then 137 | torch.manualSeed(params.seed) 138 | end 139 | local content_size = image.load(string.format(params.content_pattern, params.start_number), 3):size() 140 | local randImg = torch.randn(content_size):mul(0.001) 141 | 142 | local usePrev = params.init == 'prev' or params.init == 'prevWarped' 143 | local needFlow = params.init == 'prevWarped' or params.prevPlusFlow_layers ~= '' 144 | 145 | for run=params.continue_with_pass, params.num_passes do 146 | 147 | local flag = run % 2 148 | local start = (flag == 0) and end_image_idx or params.start_number 149 | local endp = (flag == 0) and params.start_number or end_image_idx 150 | local incr = (flag == 0) and -1 or 1 151 | 152 | for frameIdx=start,endp, incr do 153 | 154 | local content_image_caffe = getContentImage(frameIdx, params) 155 | local content_losses, prevPlusFlow_losses = {}, {} 156 | local additional_layers = 0 157 | local num_iterations = params.num_iterations 158 | 159 | -- Previous and following frame warped 160 | local prevImageWarped, nextImageWarped = nil, nil 161 | -- The warped frame which will be used for temporal consistency. 162 | local imageWarped = nil 163 | 164 | -- Find out if we are forward or backward pass, and set "imageWarped" accordingly. 165 | if frameIdx > params.start_number then 166 | prevImageWarped = readPrevImageWarped(frameIdx, params, run - (1 - flag), false) 167 | end 168 | if run > 1 and frameIdx < end_image_idx then 169 | nextImageWarped = readNextImageWarped(frameIdx, params, run - flag, false) 170 | end 171 | if flag == 1 then imageWarped = prevImageWarped end 172 | if flag == 0 then imageWarped = nextImageWarped end 173 | 174 | local temporalLossEnabled = run >= params.use_temporalLoss_after and imageWarped ~= nil 175 | 176 | -- add layers for this iteration 177 | for i=1, #losses_indices do 178 | if losses_type[i] == 'content' then 179 | local content_loss = getContentLossModuleForLayer(net, 180 | losses_indices[i] + additional_layers, 181 | content_image_caffe, params) 182 | net:insert(content_loss, losses_indices[i] + additional_layers) 183 | additional_layers = additional_layers + 1 184 | table.insert(content_losses, content_loss) 185 | elseif temporalLossEnabled then 186 | imageWarped = preprocess(imageWarped):float() 187 | imageWarped = MaybePutOnGPU(imageWarped, params) 188 | local flowWeights = nil 189 | if losses_type[i] == 'prevPlusFlowWeighted' then 190 | local weightsFileName = nil 191 | if flag == 1 then 192 | weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, frameIdx-1, frameIdx) 193 | else 194 | weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, frameIdx+1, frameIdx) 195 | end 196 | print(string.format('Reading flowWeights file "%s".', weightsFileName)) 197 | flowWeights = image.load(weightsFileName):float() 198 | flowWeights = flowWeights:expand(3, flowWeights:size(2), flowWeights:size(3)) 199 | flowWeights = MaybePutOnGPU(flowWeights, params) 200 | end 201 | local loss_module = getWeightedContentLossModuleForLayer(net, 202 | losses_indices[i] + additional_layers, imageWarped, 203 | params, flowWeights) 204 | net:insert(loss_module, losses_indices[i] + additional_layers) 205 | table.insert(prevPlusFlow_losses, loss_module) 206 | additional_layers = additional_layers + 1 207 | end 208 | end 209 | 210 | if run == 1 then 211 | -- For the first run, process the frames independently 212 | if frameIdx == params.start_number or params.init == 'random' then 213 | img = randImg:clone():float() 214 | elseif init == 'image' then 215 | img = content_image:clone():float() 216 | elseif params.init == 'prevWarped' then 217 | local prevImageWarpedWithPad = readPrevImageWarped(frameIdx, params, run - (1 - flag), true) 218 | img = preprocess(prevImageWarpedWithPad):float() 219 | else 220 | print('Unknown initialization method.') 221 | os.exit() 222 | end 223 | else 224 | -- For subsequent runs, blend neighboring frames into the current frame 225 | img = image.load(build_OutFilename(params, frameIdx, run - 1), 3) 226 | -- Make sure to correctly normalize the result 227 | local divisor = torch.zeros(content_image_caffe:size()) 228 | divisor:add(1) 229 | if frameIdx > params.start_number then 230 | local weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, frameIdx-1, frameIdx) 231 | print(string.format('Reading flowWeights file "%s".', weightsFileName)) 232 | local prevImageWeights = image.load(weightsFileName) 233 | prevImageWeights = prevImageWeights:expand(3, prevImageWeights:size(2), prevImageWeights:size(3)) 234 | prevImageWeights:mul(flag == 1 and params.blendWeight or params.blendWeight_lastPass) 235 | img:add(torch.cmul(prevImageWarped, prevImageWeights)) 236 | divisor:add(prevImageWeights) 237 | end 238 | if frameIdx < end_image_idx then 239 | local weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, frameIdx+1, frameIdx) 240 | print(string.format('Reading flowWeights file "%s".', weightsFileName)) 241 | local nextImageWeights = image.load(weightsFileName) 242 | nextImageWeights = nextImageWeights:expand(3, nextImageWeights:size(2), nextImageWeights:size(3)) 243 | nextImageWeights:mul(flag == 0 and params.blendWeight or params.blendWeight_lastPass) 244 | img:add(torch.cmul(nextImageWarped, nextImageWeights)) 245 | divisor:add(nextImageWeights) 246 | end 247 | img:cdiv(divisor) 248 | img = preprocess(img):float() 249 | end 250 | 251 | img = MaybePutOnGPU(img, params) 252 | 253 | if params.save_init then 254 | save_image(img, params.output_folder .. string.format( 255 | 'init-' .. params.number_format .. '_%d.png', frameIdx, run)) 256 | end 257 | 258 | -- Run the optimization for some iterations, save the result to disk 259 | runOptimization(params, net, content_losses, style_losses, prevPlusFlow_losses, 260 | img, frameIdx, run, num_iterations) 261 | 262 | -- Remove this iteration's content and temporal layers 263 | for i=#losses_indices, 1, -1 do 264 | if temporalLossEnabled or losses_type[i] == 'content' then 265 | additional_layers = additional_layers - 1 266 | net:remove(losses_indices[i] + additional_layers) 267 | end 268 | end 269 | 270 | assert(additional_layers == 0) 271 | 272 | end 273 | 274 | end 275 | 276 | end 277 | 278 | -- warp previous frame. 279 | -- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true. 280 | function readPrevImageWarped(idx, params, run, pad_mean_pixel) 281 | local flowFileName = getFormatedFlowFileName(params.backwardFlow_pattern, idx-1, idx) 282 | print(string.format('Reading backward flow file "%s".', flowFileName)) 283 | local flow = flowFile.load(flowFileName) 284 | local prevImg = image.load(build_OutFilename(params, idx-1, run), 3) 285 | local result = nil 286 | if pad_mean_pixel then 287 | local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0}) 288 | result = image.warp(prevImg, flow, 'bilinear', true, 'pad', -1) 289 | for x=1, result:size(2) do 290 | for y=1, result:size(3) do 291 | if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then 292 | result[1][x][y] = mean_pixel[1] 293 | result[2][x][y] = mean_pixel[2] 294 | result[3][x][y] = mean_pixel[3] 295 | end 296 | end 297 | end 298 | else 299 | result = image.warp(prevImg, flow) 300 | end 301 | return result 302 | end 303 | 304 | -- warp following frame. 305 | -- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true. 306 | function readNextImageWarped(idx, params, run, pad_mean_pixel) 307 | local flowFileName = getFormatedFlowFileName(params.forwardFlow_pattern, idx+1, idx) 308 | print(string.format('Reading forward flow file "%s".', flowFileName)) 309 | local flow = flowFile.load(flowFileName) 310 | local nextImg = image.load(build_OutFilename(params, idx+1, run), 3) 311 | if pad_mean_pixel then 312 | local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0}) 313 | result = image.warp(nextImg, flow, 'bilinear', true, 'pad', -1) 314 | for x=1, result:size(2) do 315 | for y=1, result:size(3) do 316 | if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then 317 | result[1][x][y] = mean_pixel[1] 318 | result[2][x][y] = mean_pixel[2] 319 | result[3][x][y] = mean_pixel[3] 320 | end 321 | end 322 | end 323 | else 324 | result = image.warp(nextImg, flow) 325 | end 326 | return result 327 | end 328 | 329 | local tmpParams = cmd:parse(arg) 330 | local params = nil 331 | local file = io.open(tmpParams.args, 'r') 332 | 333 | if tmpParams.args == '' or file == nil then 334 | params = cmd:parse(arg) 335 | else 336 | local args = {} 337 | io.input(file) 338 | local argPos = 1 339 | while true do 340 | local line = io.read() 341 | if line == nil then break end 342 | if line:sub(0, 1) == '-' then 343 | local splits = str_split(line, " ", 2) 344 | args[argPos] = splits[1] 345 | args[argPos + 1] = splits[2] 346 | argPos = argPos + 2 347 | end 348 | end 349 | for i=1, #arg do 350 | args[argPos] = arg[i] 351 | argPos = argPos + 1 352 | end 353 | params = cmd:parse(args) 354 | io.close(file) 355 | end 356 | 357 | main(params) 358 | -------------------------------------------------------------------------------- /artistic_video.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'image' 4 | require 'loadcaffe' 5 | require 'artistic_video_core' 6 | 7 | local flowFile = require 'flowFileLoader' 8 | 9 | -------------------------------------------------------------------------------- 10 | 11 | local cmd = torch.CmdLine() 12 | 13 | -- Basic options 14 | cmd:option('-style_image', 'example/seated-nude.jpg', 15 | 'Style target image') 16 | cmd:option('-style_blend_weights', 'nil') 17 | cmd:option('-content_pattern', 'example/marple8_%02d.ppm', 18 | 'Content target pattern') 19 | cmd:option('-num_images', 0, 'Number of content images. Set 0 for autodetect.') 20 | cmd:option('-start_number', 1, 'Frame index to start with') 21 | cmd:option('-continue_with', 1, 'Continue with the given frame index.') 22 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1') 23 | cmd:option('-number_format', '%d', 'Number format of the output images.') 24 | 25 | --Flow options 26 | cmd:option('-flow_pattern', 'example/deepflow/backward_[%d]_{%d}.flo', 27 | 'Optical flow files pattern') 28 | cmd:option('-flowWeight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm', 29 | 'Optical flow weight files pattern.') 30 | cmd:option('-flow_relative_indices', '1', 'Use flow from the given indices.') 31 | cmd:option('-use_flow_every', -1, 'Uses flow from the given index and every multiple of that; -1 to to disable.') 32 | cmd:option('-invert_flowWeights', 0, 'Invert flow weights given by flowWeight_pattern.') 33 | 34 | -- Optimization options 35 | cmd:option('-content_weight', 5e0) 36 | cmd:option('-style_weight', 1e2) 37 | cmd:option('-temporal_weight', 1e3) 38 | cmd:option('-tv_weight', 1e-3) 39 | cmd:option('-temporal_loss_criterion', 'mse', 'mse|smoothl1') 40 | cmd:option('-num_iterations', '2000,1000', 41 | 'Can be set separately for the first and for subsequent iterations, separated by comma, or one value for all.') 42 | cmd:option('-tol_loss_relative', 0.0001, 'Stop if relative change of the loss function is below this value') 43 | cmd:option('-tol_loss_relative_interval', 50, 'Interval between two loss comparisons') 44 | cmd:option('-normalize_gradients', false) 45 | cmd:option('-init', 'random,prevWarped', 'random|image,random|image|prev|prevWarped') 46 | cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam') 47 | cmd:option('-learning_rate', 1e1) 48 | 49 | -- Output options 50 | cmd:option('-print_iter', 100) 51 | cmd:option('-save_iter', 0) 52 | cmd:option('-output_image', 'out.png') 53 | cmd:option('-output_folder', '') 54 | cmd:option('-save_init', false, 'Whether the initialization image should be saved (for debugging purposes).') 55 | 56 | -- Other options 57 | cmd:option('-style_scale', 1.0) 58 | cmd:option('-pooling', 'max', 'max|avg') 59 | cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt') 60 | cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel') 61 | cmd:option('-backend', 'nn', 'nn|cudnn|clnn') 62 | cmd:option('-cudnn_autotune', false) 63 | cmd:option('-seed', -1) 64 | cmd:option('-content_layers', 'relu4_2', 'layers for content') 65 | cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style') 66 | cmd:option('-args', '', 'Arguments in a file, one argument per line') 67 | 68 | -- Advanced options (changing them is usually not required) 69 | cmd:option('-combine_flowWeights_method', 'closestFirst', 70 | 'Which long-term weighting scheme to use: normalize or closestFirst. Deafult and recommended: closestFirst') 71 | 72 | function nn.SpatialConvolutionMM:accGradParameters() 73 | -- nop. not needed by our net 74 | end 75 | 76 | local function main(params) 77 | if params.gpu >= 0 then 78 | if params.backend ~= 'clnn' then 79 | require 'cutorch' 80 | require 'cunn' 81 | cutorch.setDevice(params.gpu + 1) 82 | else 83 | require 'clnn' 84 | require 'cltorch' 85 | cltorch.setDevice(params.gpu + 1) 86 | end 87 | else 88 | params.backend = 'nn' 89 | end 90 | 91 | if params.backend == 'cudnn' then 92 | require 'cudnn' 93 | if params.cudnn_autotune then 94 | cudnn.benchmark = true 95 | end 96 | cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop 97 | end 98 | 99 | local loadcaffe_backend = params.backend 100 | if params.backend == 'clnn' then loadcaffe_backend = 'nn' end 101 | local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float() 102 | cnn = MaybePutOnGPU(cnn, params) 103 | 104 | local style_images_caffe = getStyleImages(params) 105 | 106 | -- Set up the network, inserting style losses. Content and temporal loss will be inserted in each iteration. 107 | local net, style_losses, losses_indices, losses_type = buildNet(cnn, params, style_images_caffe) 108 | 109 | -- We don't need the base CNN anymore, so clean it up to save memory. 110 | cnn = nil 111 | for i=1,#net.modules do 112 | local module = net.modules[i] 113 | if torch.type(module) == 'nn.SpatialConvolutionMM' then 114 | -- remote these, not used, but uses gpu memory 115 | module.gradWeight = nil 116 | module.gradBias = nil 117 | end 118 | end 119 | collectgarbage() 120 | 121 | -- There can be different setting for the first frame and for subsequent frames. 122 | local num_iterations_split = params.num_iterations:split(",") 123 | local numIters_first, numIters_subseq = num_iterations_split[1], num_iterations_split[2] or num_iterations_split[1] 124 | local init_split = params.init:split(",") 125 | local init_first, init_subseq = init_split[1], init_split[2] or init_split[1] 126 | 127 | local firstImg = nil 128 | local flow_relative_indices_split = params.flow_relative_indices:split(",") 129 | 130 | local num_images = params.num_images 131 | if num_images == 0 then 132 | num_images = calcNumberOfContentImages(params) 133 | print("Detected " .. num_images .. " content images.") 134 | end 135 | 136 | -- Iterate over all frames in the video sequence 137 | for frameIdx=params.start_number + params.continue_with - 1, params.start_number + num_images - 1 do 138 | 139 | -- Set seed 140 | if params.seed >= 0 then 141 | torch.manualSeed(params.seed) 142 | end 143 | 144 | local content_image = getContentImage(frameIdx, params) 145 | if content_image == nil then 146 | print("No more frames.") 147 | do return end 148 | end 149 | local content_losses, temporal_losses = {}, {} 150 | local additional_layers = 0 151 | local num_iterations = frameIdx == params.start_number and tonumber(numIters_first) or tonumber(numIters_subseq) 152 | local init = frameIdx == params.start_number and init_first or init_subseq 153 | -- stores previous image indices used for the temporal constraint 154 | local J = {} 155 | -- stores previous image(s) warped 156 | local imgsWarped = {} 157 | 158 | -- Calculate from which indices we need a warped image 159 | if frameIdx > params.start_number and params.temporal_weight ~= 0 then 160 | for i=1, #flow_relative_indices_split do 161 | local prevIndex = frameIdx - tonumber(flow_relative_indices_split[i]) 162 | if prevIndex >= params.start_number then 163 | table.insert(J, frameIdx - tonumber(flow_relative_indices_split[i])) 164 | end 165 | end 166 | if params.use_flow_every > 0 then 167 | for prevIndex=frameIdx - params.use_flow_every, params.start_number, -1 * params.use_flow_every do 168 | if not tabl_contains(J, prevIndex) then 169 | table.insert(J, prevIndex) 170 | end 171 | end 172 | end 173 | -- Sort table descending, usefull to compute the long-term weights 174 | table.sort(J, function(a,b) return a>b end) 175 | -- Read the optical flow(s) and warp the previous image(s) 176 | for j=1, #J do 177 | local prevIndex = J[j] 178 | local flowFileName = getFormatedFlowFileName(params.flow_pattern, math.abs(prevIndex), math.abs(frameIdx)) 179 | print(string.format('Reading flow file "%s".', flowFileName)) 180 | local flow = flowFile.load(flowFileName) 181 | local fileName = build_OutFilename(params, math.abs(prevIndex - params.start_number + 1), -1) 182 | local imgWarped = warpImage(image.load(fileName, 3), flow) 183 | imgWarped = preprocess(imgWarped):float() 184 | imgWarped = MaybePutOnGPU(imgWarped, params) 185 | table.insert(imgsWarped, imgWarped) 186 | end 187 | end 188 | 189 | -- Add content and temporal loss for this iteration. Style loss is already included in the net. 190 | for i=1, #losses_indices do 191 | if losses_type[i] == 'content' then 192 | local loss_module = getContentLossModuleForLayer(net, 193 | losses_indices[i] + additional_layers, content_image, params) 194 | net:insert(loss_module, losses_indices[i] + additional_layers) 195 | table.insert(content_losses, loss_module) 196 | additional_layers = additional_layers + 1 197 | elseif losses_type[i] == 'prevPlusFlow' and frameIdx > params.start_number then 198 | for j=1, #J do 199 | local loss_module = getWeightedContentLossModuleForLayer(net, 200 | losses_indices[i] + additional_layers, imgsWarped[j], 201 | params, nil) 202 | net:insert(loss_module, losses_indices[i] + additional_layers) 203 | table.insert(temporal_losses, loss_module) 204 | additional_layers = additional_layers + 1 205 | end 206 | elseif losses_type[i] == 'prevPlusFlowWeighted' and frameIdx > params.start_number then 207 | local flowWeightsTabl = {} 208 | -- Read all flow weights 209 | for j=1, #J do 210 | local weightsFileName = getFormatedFlowFileName(params.flowWeight_pattern, J[j], math.abs(frameIdx)) 211 | print(string.format('Reading flowWeights file "%s".', weightsFileName)) 212 | table.insert(flowWeightsTabl, image.load(weightsFileName):float()) 213 | end 214 | -- Preprocess flow weights, calculate long-term weights 215 | processFlowWeights(flowWeightsTabl, params.combine_flowWeights_method, params.invert_flowWeights) 216 | -- Create loss modules, one for each previous frame warped 217 | for j=1, #J do 218 | local flowWeights = flowWeightsTabl[j] 219 | flowWeights = flowWeights:expand(3, flowWeights:size(2), flowWeights:size(3)) 220 | flowWeights = MaybePutOnGPU(flowWeights, params) 221 | local loss_module = getWeightedContentLossModuleForLayer(net, 222 | losses_indices[i] + additional_layers, imgsWarped[j], 223 | params, flowWeights) 224 | net:insert(loss_module, losses_indices[i] + additional_layers) 225 | table.insert(temporal_losses, loss_module) 226 | additional_layers = additional_layers + 1 227 | end 228 | end 229 | end 230 | 231 | -- Initialization 232 | local img = nil 233 | if init == 'random' then 234 | img = torch.randn(content_image:size()):float():mul(0.001) 235 | elseif init == 'image' then 236 | img = content_image:clone():float() 237 | elseif init == 'prevWarped' and frameIdx > params.start_number then 238 | local flowFileName = getFormatedFlowFileName(params.flow_pattern, math.abs(frameIdx - 1), math.abs(frameIdx)) 239 | print(string.format('Reading flow file "%s".', flowFileName)) 240 | local flow = flowFile.load(flowFileName) 241 | local fileName = build_OutFilename(params, math.abs(frameIdx - params.start_number), -1) 242 | img = warpImage(image.load(fileName, 3), flow) 243 | img = preprocess(img):float() 244 | elseif init == 'prev' and frameIdx > params.start_number then 245 | local fileName = build_OutFilename(params, math.abs(frameIdx - params.start_number), -1) 246 | img = image.load(fileName, 3) 247 | img = preprocess(img):float() 248 | elseif init == 'first' then 249 | img = firstImg:clone():float() 250 | else 251 | print('ERROR: Invalid initialization method.') 252 | os.exit() 253 | end 254 | img = MaybePutOnGPU(img, params) 255 | if params.save_init then 256 | save_image(img, 257 | string.format('%sinit-' .. params.number_format .. '.png', 258 | params.output_folder, math.abs(frameIdx - params.start_number + 1))) 259 | end 260 | 261 | -- Run the optimization to stylize the image, save the result to disk 262 | runOptimization(params, net, content_losses, style_losses, temporal_losses, img, frameIdx, -1, num_iterations) 263 | 264 | if frameIdx == params.start_number then 265 | firstImg = img:clone():float() 266 | end 267 | 268 | -- Remove this iteration's content and temporal layers 269 | for i=#losses_indices, 1, -1 do 270 | if frameIdx > params.start_number or losses_type[i] == 'content' then 271 | if losses_type[i] == 'prevPlusFlowWeighted' or losses_type[i] == 'prevPlusFlow' then 272 | for j=1, #J do 273 | additional_layers = additional_layers - 1 274 | net:remove(losses_indices[i] + additional_layers) 275 | end 276 | else 277 | additional_layers = additional_layers - 1 278 | net:remove(losses_indices[i] + additional_layers) 279 | end 280 | end 281 | end 282 | 283 | -- Ensure that all layer have been removed correctly 284 | assert(additional_layers == 0) 285 | 286 | end 287 | end 288 | 289 | -- warp a given image according to the given optical flow. 290 | -- Disocclusions at the borders will be filled with the VGG mean pixel. 291 | function warpImage(img, flow) 292 | local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0}) 293 | result = image.warp(img, flow, 'bilinear', true, 'pad', -1) 294 | for x=1, result:size(2) do 295 | for y=1, result:size(3) do 296 | if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then 297 | result[1][x][y] = mean_pixel[1] 298 | result[2][x][y] = mean_pixel[2] 299 | result[3][x][y] = mean_pixel[3] 300 | end 301 | end 302 | end 303 | return result 304 | end 305 | 306 | -- Creates long-term flow weights 307 | function processFlowWeights(flowWeightsTabl, method, invert) 308 | if invert == 1 then 309 | for j=1, #flowWeightsTabl do 310 | flowWeightsTabl[j]:apply(function(x) return 1 - x end) 311 | end 312 | end 313 | if method == 'normalize' then 314 | -- Normalize so that the weights sum up to max 1 315 | local sum = tabl_sum(flowWeightsTabl) 316 | sum:cmax(1) 317 | for j=1, #flowWeightsTabl do 318 | flowWeightsTabl[j]:cdiv(sum) 319 | end 320 | elseif method == 'closestFirst' then 321 | -- Take the closest previous frame(s). 322 | for j=2, #flowWeightsTabl do 323 | for k=1, j-1 do 324 | flowWeightsTabl[j]:add(-1, flowWeightsTabl[j-k]) 325 | end 326 | flowWeightsTabl[j]:cmax(0) 327 | end 328 | end 329 | end 330 | 331 | local tmpParams = cmd:parse(arg) 332 | local params = nil 333 | local file = io.open(tmpParams.args, 'r') 334 | 335 | if tmpParams.args == '' or file == nil then 336 | params = cmd:parse(arg) 337 | else 338 | local args = {} 339 | io.input(file) 340 | local argPos = 1 341 | while true do 342 | local line = io.read() 343 | if line == nil then break end 344 | if line:sub(0, 1) == '-' then 345 | local splits = str_split(line, " ", 2) 346 | args[argPos] = splits[1] 347 | args[argPos + 1] = splits[2] 348 | argPos = argPos + 2 349 | end 350 | end 351 | for i=1, #arg do 352 | args[argPos] = arg[i] 353 | argPos = argPos + 1 354 | end 355 | params = cmd:parse(args) 356 | io.close(file) 357 | end 358 | 359 | main(params) 360 | -------------------------------------------------------------------------------- /consistencyChecker/CVector.h: -------------------------------------------------------------------------------- 1 | // CVector 2 | // A one-dimensional array including basic vector operations 3 | // 4 | // Author: Thomas Brox 5 | // Last change: 23.05.2005 6 | //------------------------------------------------------------------------- 7 | #ifndef CVECTOR_H 8 | #define CVECTOR_H 9 | 10 | #include 11 | #include 12 | 13 | template class CMatrix; 14 | template class CTensor; 15 | 16 | template 17 | class CVector { 18 | public: 19 | // constructor 20 | inline CVector(); 21 | // constructor 22 | inline CVector(const int aSize); 23 | // copy constructor 24 | CVector(const CVector& aCopyFrom); 25 | // constructor (from array) 26 | CVector(const T* aPointer, const int aSize); 27 | // constructor with implicit filling 28 | CVector(const int aSize, const T aFillValue); 29 | // destructor 30 | virtual ~CVector(); 31 | 32 | // Changes the size of the vector (data is lost) 33 | void setSize(int aSize); 34 | // Fills the vector with the specified value (see also operator=) 35 | void fill(const T aValue); 36 | // Appends the values of another vector 37 | void append(CVector& aVector); 38 | // Normalizes the length of the vector to 1 39 | void normalize(); 40 | // Normalizes the component sum to 1 41 | void normalizeSum(); 42 | // Reads values from a text file 43 | void readFromTXT(const char* aFilename); 44 | // Writes values to a text file 45 | void writeToTXT(char* aFilename); 46 | // Returns the sum of all values 47 | T sum(); 48 | // Returns the minimum value 49 | T min(); 50 | // Returns the maximum value 51 | T max(); 52 | // Returns the Euclidean norm 53 | T norm(); 54 | 55 | // Converts vector to homogeneous coordinates, i.e., all components are divided by last component 56 | CVector& homogen(); 57 | // Remove the last component 58 | inline void homogen_nD(); 59 | // Computes the cross product between this vector and aVector 60 | void cross(CVector& aVector); 61 | 62 | // Gives full access to the vector's values 63 | inline T& operator()(const int aIndex) const; 64 | inline T& operator[](const int aIndex) const; 65 | // Fills the vector with the specified value (equivalent to fill) 66 | inline CVector& operator=(const T aValue); 67 | // Copies a vector into this vector (size might change) 68 | CVector& operator=(const CVector& aCopyFrom); 69 | // Copies values from a matrix to the vector (size might change) 70 | CVector& operator=(const CMatrix& aCopyFrom); 71 | // Copies values from a tensor to the vector (size might change) 72 | CVector& operator=(const CTensor& aCopyFrom); 73 | // Adds another vector 74 | CVector& operator+=(const CVector& aVector); 75 | // Substracts another vector 76 | CVector& operator-=(const CVector& aVector); 77 | // Multiplies the vector with a scalar 78 | CVector& operator*=(const T aValue); 79 | // Scalar product 80 | T operator*=(const CVector& aVector); 81 | // Checks (non-)equivalence to another vector 82 | bool operator==(const CVector& aVector); 83 | inline bool operator!=(const CVector& aVector); 84 | 85 | // Gives access to the vector's size 86 | inline int size() const; 87 | // Gives access to the internal data representation 88 | inline T* data() const {return mData;} 89 | protected: 90 | int mSize; 91 | T* mData; 92 | }; 93 | 94 | // Adds two vectors 95 | template CVector operator+(const CVector& vec1, const CVector& vec2); 96 | // Substracts two vectors 97 | template CVector operator-(const CVector& vec1, const CVector& vec2); 98 | // Multiplies vector with a scalar 99 | template CVector operator*(const CVector& aVector, const T aValue); 100 | template CVector operator*(const T aValue, const CVector& aVector); 101 | // Computes the scalar product of two vectors 102 | template T operator*(const CVector& vec1, const CVector& vec2); 103 | // Computes cross product of two vectors 104 | template CVector operator/(const CVector& vec1, const CVector& vec2); 105 | // Sends the vector to an output stream 106 | template std::ostream& operator<<(std::ostream& aStream, const CVector& aVector); 107 | 108 | // Exceptions thrown by CVector-------------------------------------------- 109 | 110 | // Thrown if one tries to access an element of a vector which is out of 111 | // the vector's bounds 112 | struct EVectorRangeOverflow { 113 | EVectorRangeOverflow(const int aIndex) { 114 | using namespace std; 115 | cerr << "Exception EVectorRangeOverflow: Index = " << aIndex << endl; 116 | } 117 | }; 118 | 119 | struct EVectorIncompatibleSize { 120 | EVectorIncompatibleSize(int aSize1, int aSize2) { 121 | using namespace std; 122 | cerr << "Exception EVectorIncompatibleSize: " << aSize1 << " <> " << aSize2 << endl; 123 | } 124 | }; 125 | 126 | 127 | // I M P L E M E N T A T I O N -------------------------------------------- 128 | // 129 | // You might wonder why there is implementation code in a header file. 130 | // The reason is that not all C++ compilers yet manage separate compilation 131 | // of templates. Inline functions cannot be compiled separately anyway. 132 | // So in this case the whole implementation code is added to the header 133 | // file. 134 | // Users of CVector should ignore everything that's beyond this line. 135 | // ------------------------------------------------------------------------ 136 | 137 | // P U B L I C ------------------------------------------------------------ 138 | // constructor 139 | template 140 | inline CVector::CVector() : mSize(0) { 141 | mData = new T[0]; 142 | } 143 | 144 | // constructor 145 | template 146 | inline CVector::CVector(const int aSize) 147 | : mSize(aSize) { 148 | mData = new T[aSize]; 149 | } 150 | 151 | // copy constructor 152 | template 153 | CVector::CVector(const CVector& aCopyFrom) 154 | : mSize(aCopyFrom.mSize) { 155 | mData = new T[mSize]; 156 | for (int i = 0; i < mSize; i++) 157 | mData[i] = aCopyFrom.mData[i]; 158 | } 159 | 160 | // constructor (from array) 161 | template 162 | CVector::CVector(const T* aPointer, const int aSize) 163 | : mSize(aSize) { 164 | mData = new T[mSize]; 165 | for (int i = 0; i < mSize; i++) 166 | mData[i] = aPointer[i]; 167 | } 168 | 169 | // constructor with implicit filling 170 | template 171 | CVector::CVector(const int aSize, const T aFillValue) 172 | : mSize(aSize) { 173 | mData = new T[aSize]; 174 | fill(aFillValue); 175 | } 176 | 177 | // destructor 178 | template 179 | CVector::~CVector() { 180 | delete[] mData; 181 | } 182 | 183 | // setSize 184 | template 185 | void CVector::setSize(int aSize) { 186 | if (mData != 0) delete[] mData; 187 | mData = new T[aSize]; 188 | mSize = aSize; 189 | } 190 | 191 | // fill 192 | template 193 | void CVector::fill(const T aValue) { 194 | for (register int i = 0; i < mSize; i++) 195 | mData[i] = aValue; 196 | } 197 | 198 | // append 199 | template 200 | void CVector::append(CVector& aVector) { 201 | T* aNewData = new T[mSize+aVector.size()]; 202 | for (int i = 0; i < mSize; i++) 203 | aNewData[i] = mData[i]; 204 | for (int i = 0; i < aVector.size(); i++) 205 | aNewData[i+mSize] = aVector(i); 206 | mSize += aVector.size(); 207 | delete[] mData; 208 | mData = aNewData; 209 | } 210 | 211 | // normalize 212 | template 213 | void CVector::normalize() { 214 | T aSum = 0; 215 | for (register int i = 0; i < mSize; i++) 216 | aSum += mData[i]*mData[i]; 217 | if (aSum == 0) return; 218 | aSum = 1.0/sqrt(aSum); 219 | for (register int i = 0; i < mSize; i++) 220 | mData[i] *= aSum; 221 | } 222 | 223 | // normalizeSum 224 | template 225 | void CVector::normalizeSum() { 226 | T aSum = 0; 227 | for (register int i = 0; i < mSize; i++) 228 | aSum += mData[i]; 229 | if (aSum == 0) return; 230 | aSum = 1.0/aSum; 231 | for (register int i = 0; i < mSize; i++) 232 | mData[i] *= aSum; 233 | } 234 | 235 | // readFromTXT 236 | template 237 | void CVector::readFromTXT(const char* aFilename) { 238 | std::ifstream aStream(aFilename); 239 | mSize = 0; 240 | float aDummy; 241 | while (!aStream.eof()) { 242 | aStream >> aDummy; 243 | mSize++; 244 | } 245 | aStream.close(); 246 | std::ifstream aStream2(aFilename); 247 | delete mData; 248 | mData = new T[mSize]; 249 | for (int i = 0; i < mSize; i++) 250 | aStream2 >> mData[i]; 251 | } 252 | 253 | // writeToTXT 254 | template 255 | void CVector::writeToTXT(char* aFilename) { 256 | std::ofstream aStream(aFilename); 257 | for (int i = 0; i < mSize; i++) 258 | aStream << mData[i] << std::endl; 259 | } 260 | 261 | // sum 262 | template 263 | T CVector::sum() { 264 | T val = mData[0]; 265 | for (int i = 1; i < mSize; i++) 266 | val += mData[i]; 267 | return val; 268 | } 269 | 270 | // min 271 | template 272 | T CVector::min() { 273 | T bestValue = mData[0]; 274 | for (int i = 1; i < mSize; i++) 275 | if (mData[i] < bestValue) bestValue = mData[i]; 276 | return bestValue; 277 | } 278 | 279 | // max 280 | template 281 | T CVector::max() { 282 | T bestValue = mData[0]; 283 | for (int i = 1; i < mSize; i++) 284 | if (mData[i] > bestValue) bestValue = mData[i]; 285 | return bestValue; 286 | } 287 | 288 | // norm 289 | template 290 | T CVector::norm() { 291 | T aSum = 0.0; 292 | for (int i = 0; i < mSize; i++) 293 | aSum += mData[i]*mData[i]; 294 | return sqrt(aSum); 295 | } 296 | 297 | // homogen 298 | template 299 | CVector& CVector::homogen() { 300 | if (mSize > 1 && mData[mSize-1] != 0) { 301 | T invVal = 1.0/mData[mSize-1]; 302 | for (int i = 0; i < mSize; i++) 303 | mData[i] *= invVal; 304 | } 305 | return (*this); 306 | } 307 | 308 | // homogen_nD 309 | template 310 | inline void CVector::homogen_nD() { 311 | mSize--; 312 | } 313 | 314 | // cross 315 | template 316 | void CVector::cross(CVector& aVector) { 317 | T aHelp0 = aVector(2)*mData[1] - aVector(1)*mData[2]; 318 | T aHelp1 = aVector(0)*mData[2] - aVector(2)*mData[0]; 319 | T aHelp2 = aVector(1)*mData[0] - aVector(0)*mData[1]; 320 | mData[0] = aHelp0; 321 | mData[1] = aHelp1; 322 | mData[2] = aHelp2; 323 | } 324 | 325 | // operator() 326 | template 327 | inline T& CVector::operator()(const int aIndex) const { 328 | #ifdef _DEBUG 329 | if (aIndex >= mSize || aIndex < 0) 330 | throw EVectorRangeOverflow(aIndex); 331 | #endif 332 | return mData[aIndex]; 333 | } 334 | 335 | // operator[] 336 | template 337 | inline T& CVector::operator[](const int aIndex) const { 338 | return operator()(aIndex); 339 | } 340 | 341 | // operator= 342 | template 343 | inline CVector& CVector::operator=(const T aValue) { 344 | fill(aValue); 345 | return *this; 346 | } 347 | 348 | template 349 | CVector& CVector::operator=(const CVector& aCopyFrom) { 350 | if (this != &aCopyFrom) { 351 | if (mSize != aCopyFrom.size()) { 352 | delete[] mData; 353 | mSize = aCopyFrom.size(); 354 | mData = new T[mSize]; 355 | } 356 | for (register int i = 0; i < mSize; i++) 357 | mData[i] = aCopyFrom.mData[i]; 358 | } 359 | return *this; 360 | } 361 | 362 | template 363 | CVector& CVector::operator=(const CMatrix& aCopyFrom) { 364 | if (mSize != aCopyFrom.size()) { 365 | delete[] mData; 366 | mSize = aCopyFrom.size(); 367 | mData = new T[mSize]; 368 | } 369 | for (register int i = 0; i < mSize; i++) 370 | mData[i] = aCopyFrom.data()[i]; 371 | return *this; 372 | } 373 | 374 | template 375 | CVector& CVector::operator=(const CTensor& aCopyFrom) { 376 | if (mSize != aCopyFrom.size()) { 377 | delete[] mData; 378 | mSize = aCopyFrom.size(); 379 | mData = new T[mSize]; 380 | } 381 | for (register int i = 0; i < mSize; i++) 382 | mData[i] = aCopyFrom.data()[i]; 383 | return *this; 384 | } 385 | 386 | // operator += 387 | template 388 | CVector& CVector::operator+=(const CVector& aVector) { 389 | #ifdef _DEBUG 390 | if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size()); 391 | #endif 392 | for (int i = 0; i < mSize; i++) 393 | mData[i] += aVector(i); 394 | return *this; 395 | } 396 | 397 | // operator -= 398 | template 399 | CVector& CVector::operator-=(const CVector& aVector) { 400 | #ifdef _DEBUG 401 | if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size()); 402 | #endif 403 | for (int i = 0; i < mSize; i++) 404 | mData[i] -= aVector(i); 405 | return *this; 406 | } 407 | 408 | // operator *= 409 | template 410 | CVector& CVector::operator*=(const T aValue) { 411 | for (int i = 0; i < mSize; i++) 412 | mData[i] *= aValue; 413 | return *this; 414 | } 415 | 416 | template 417 | T CVector::operator*=(const CVector& aVector) { 418 | #ifdef _DEBUG 419 | if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size()); 420 | #endif 421 | T aSum = 0.0; 422 | for (int i = 0; i < mSize; i++) 423 | aSum += mData[i]*aVector(i); 424 | return aSum; 425 | } 426 | 427 | // operator == 428 | template 429 | bool CVector::operator==(const CVector& aVector) { 430 | if (mSize != aVector.size()) return false; 431 | int i = 0; 432 | while (i < mSize && aVector(i) == mData[i]) 433 | i++; 434 | return (i == mSize); 435 | } 436 | 437 | // operator != 438 | template 439 | inline bool CVector::operator!=(const CVector& aVector) { 440 | return !((*this)==aVector); 441 | } 442 | 443 | // size 444 | template 445 | inline int CVector::size() const { 446 | return mSize; 447 | } 448 | 449 | // N O N - M E M B E R F U N C T I O N S ------------------------------------- 450 | 451 | // operator + 452 | template 453 | CVector operator+(const CVector& vec1, const CVector& vec2) { 454 | #ifdef _DEBUG 455 | if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size()); 456 | #endif 457 | CVector result(vec1.size()); 458 | for (int i = 0; i < vec1.size(); i++) 459 | result(i) = vec1[i]+vec2[i]; 460 | return result; 461 | } 462 | 463 | // operator - 464 | template 465 | CVector operator-(const CVector& vec1, const CVector& vec2) { 466 | #ifdef _DEBUG 467 | if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size()); 468 | #endif 469 | CVector result(vec1.size()); 470 | for (int i = 0; i < vec1.size(); i++) 471 | result(i) = vec1(i)-vec2(i); 472 | return result; 473 | } 474 | 475 | // operator * 476 | template 477 | CVector operator*(const T aValue, const CVector& aVector) { 478 | CVector result(aVector.size()); 479 | for (int i = 0; i < aVector.size(); i++) 480 | result(i) = aValue*aVector(i); 481 | return result; 482 | } 483 | 484 | template 485 | CVector operator*(const CVector& aVector, const T aValue) { 486 | return operator*(aValue,aVector); 487 | } 488 | 489 | template 490 | T operator*(const CVector& vec1, const CVector& vec2) { 491 | #ifdef _DEBUG 492 | if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size()); 493 | #endif 494 | T aSum = 0.0; 495 | for (int i = 0; i < vec1.size(); i++) 496 | aSum += vec1(i)*vec2(i); 497 | return aSum; 498 | } 499 | 500 | // operator / 501 | template 502 | CVector operator/(const CVector& vec1, const CVector& vec2) { 503 | CVector result(3); 504 | result[0]=vec1[1]*vec2[2] - vec1[2]*vec2[1]; 505 | result[1]=vec1[2]*vec2[0] - vec1[0]*vec2[2]; 506 | result[2]=vec1[0]*vec2[1] - vec1[1]*vec2[0]; 507 | return result; 508 | } 509 | 510 | // operator << 511 | template 512 | std::ostream& operator<<(std::ostream& aStream, const CVector& aVector) { 513 | for (int i = 0; i < aVector.size(); i++) 514 | aStream << aVector(i) << '|'; 515 | aStream << std::endl; 516 | return aStream; 517 | } 518 | 519 | #endif 520 | -------------------------------------------------------------------------------- /artistic_video_core.lua: -------------------------------------------------------------------------------- 1 | require 'optim' 2 | 3 | -- modified to include a threshold for relative changes in the loss function as stopping criterion 4 | local lbfgs_mod = require 'lbfgs' 5 | 6 | --- 7 | --- MAIN FUNCTIONS 8 | --- 9 | 10 | function runOptimization(params, net, content_losses, style_losses, temporal_losses, 11 | img, frameIdx, runIdx, max_iter) 12 | local isMultiPass = (runIdx ~= -1) 13 | 14 | -- Run it through the network once to get the proper size for the gradient 15 | -- All the gradients will come from the extra loss modules, so we just pass 16 | -- zeros into the top of the net on the backward pass. 17 | local y = net:forward(img) 18 | local dy = img.new(#y):zero() 19 | 20 | -- Declaring this here lets us access it in maybe_print 21 | local optim_state = nil 22 | if params.optimizer == 'lbfgs' then 23 | optim_state = { 24 | maxIter = max_iter, 25 | tolFunRelative = params.tol_loss_relative, 26 | tolFunRelativeInterval = params.tol_loss_relative_interval, 27 | verbose=true, 28 | } 29 | elseif params.optimizer == 'adam' then 30 | optim_state = { 31 | learningRate = params.learning_rate, 32 | } 33 | else 34 | error(string.format('Unrecognized optimizer "%s"', params.optimizer)) 35 | end 36 | 37 | local function maybe_print(t, loss, alwaysPrint) 38 | local should_print = (params.print_iter > 0 and t % params.print_iter == 0) or alwaysPrint 39 | if should_print then 40 | print(string.format('Iteration %d / %d', t, max_iter)) 41 | for i, loss_module in ipairs(content_losses) do 42 | print(string.format(' Content %d loss: %f', i, loss_module.loss)) 43 | end 44 | for i, loss_module in ipairs(temporal_losses) do 45 | print(string.format(' Temporal %d loss: %f', i, loss_module.loss)) 46 | end 47 | for i, loss_module in ipairs(style_losses) do 48 | print(string.format(' Style %d loss: %f', i, loss_module.loss)) 49 | end 50 | print(string.format(' Total loss: %f', loss)) 51 | end 52 | end 53 | 54 | local function print_end(t) 55 | --- calculate total loss 56 | local loss = 0 57 | for _, mod in ipairs(content_losses) do 58 | loss = loss + mod.loss 59 | end 60 | for _, mod in ipairs(temporal_losses) do 61 | loss = loss + mod.loss 62 | end 63 | for _, mod in ipairs(style_losses) do 64 | loss = loss + mod.loss 65 | end 66 | -- print informations 67 | maybe_print(t, loss, true) 68 | end 69 | 70 | local function maybe_save(t, isEnd) 71 | local should_save_intermed = params.save_iter > 0 and t % params.save_iter == 0 72 | local should_save_end = t == max_iter or isEnd 73 | if should_save_intermed or should_save_end then 74 | local filename = nil 75 | if isMultiPass then 76 | filename = build_OutFilename(params, frameIdx, runIdx) 77 | else 78 | filename = build_OutFilename(params, math.abs(frameIdx - params.start_number + 1), should_save_end and -1 or t) 79 | end 80 | save_image(img, filename) 81 | end 82 | end 83 | 84 | -- Function to evaluate loss and gradient. We run the net forward and 85 | -- backward to get the gradient, and sum up losses from the loss modules. 86 | -- optim.lbfgs internally handles iteration and calls this fucntion many 87 | -- times, so we manually count the number of iterations to handle printing 88 | -- and saving intermediate results. 89 | local num_calls = 0 90 | local function feval(x) 91 | num_calls = num_calls + 1 92 | net:forward(x) 93 | local grad = net:backward(x, dy) 94 | local loss = 0 95 | for _, mod in ipairs(content_losses) do 96 | loss = loss + mod.loss 97 | end 98 | for _, mod in ipairs(temporal_losses) do 99 | loss = loss + mod.loss 100 | end 101 | for _, mod in ipairs(style_losses) do 102 | loss = loss + mod.loss 103 | end 104 | maybe_print(num_calls, loss, false) 105 | -- Only need to print if single-pass algorithm is used. 106 | if not isMultiPass then 107 | maybe_save(num_calls, false) 108 | end 109 | 110 | collectgarbage() 111 | -- optim.lbfgs expects a vector for gradients 112 | return loss, grad:view(grad:nElement()) 113 | end 114 | 115 | start_time = os.time() 116 | 117 | -- Run optimization. 118 | if params.optimizer == 'lbfgs' then 119 | print('Running optimization with L-BFGS') 120 | local x, losses = lbfgs_mod.optimize(feval, img, optim_state) 121 | elseif params.optimizer == 'adam' then 122 | print('Running optimization with ADAM') 123 | for t = 1, max_iter do 124 | local x, losses = optim.adam(feval, img, optim_state) 125 | end 126 | end 127 | 128 | end_time = os.time() 129 | elapsed_time = os.difftime(end_time-start_time) 130 | print("Running time: " .. elapsed_time .. "s") 131 | 132 | print_end(num_calls) 133 | maybe_save(num_calls, true) 134 | end 135 | 136 | -- Rebuild the network, insert style loss and return the indices for content and temporal loss 137 | function buildNet(cnn, params, style_images_caffe) 138 | -- Handle style blending weights for multiple style inputs 139 | local style_blend_weights = nil 140 | if params.style_blend_weights == 'nil' then 141 | -- Style blending not specified, so use equal weighting 142 | style_blend_weights = {} 143 | for i = 1, #style_images_caffe do 144 | table.insert(style_blend_weights, 1.0) 145 | end 146 | else 147 | style_blend_weights = params.style_blend_weights:split(',') 148 | assert(#style_blend_weights == #style_images_caffe, 149 | '-style_blend_weights and -style_images must have the same number of elements') 150 | end 151 | -- Normalize the style blending weights so they sum to 1 152 | local style_blend_sum = 0 153 | for i = 1, #style_blend_weights do 154 | style_blend_weights[i] = tonumber(style_blend_weights[i]) 155 | style_blend_sum = style_blend_sum + style_blend_weights[i] 156 | end 157 | for i = 1, #style_blend_weights do 158 | style_blend_weights[i] = style_blend_weights[i] / style_blend_sum 159 | end 160 | 161 | local content_layers = params.content_layers:split(",") 162 | local style_layers = params.style_layers:split(",") 163 | -- Which layer to use for the temporal loss. By default, it uses a pixel based loss, masked by the certainty 164 | --(indicated by initWeighted). 165 | local temporal_layers = params.temporal_weight > 0 and {'initWeighted'} or {} 166 | 167 | local style_losses = {} 168 | local contentLike_layers_indices = {} 169 | local contentLike_layers_type = {} 170 | 171 | local next_content_i, next_style_i, next_temporal_i = 1, 1, 1 172 | local current_layer_index = 1 173 | local net = nn.Sequential() 174 | 175 | -- Set up pixel based loss. 176 | if temporal_layers[next_temporal_i] == 'init' or temporal_layers[next_temporal_i] == 'initWeighted' then 177 | print("Setting up temporal consistency.") 178 | table.insert(contentLike_layers_indices, current_layer_index) 179 | table.insert(contentLike_layers_type, 180 | (temporal_layers[next_temporal_i] == 'initWeighted') and 'prevPlusFlowWeighted' or 'prevPlusFlow') 181 | next_temporal_i = next_temporal_i + 1 182 | end 183 | 184 | -- Set up other loss modules. 185 | -- For content loss, only remember the indices at which they are inserted, because the content changes for each frame. 186 | if params.tv_weight > 0 then 187 | local tv_mod = nn.TVLoss(params.tv_weight):float() 188 | tv_mod = MaybePutOnGPU(tv_mod, params) 189 | net:add(tv_mod) 190 | current_layer_index = current_layer_index + 1 191 | end 192 | for i = 1, #cnn do 193 | if next_content_i <= #content_layers or next_style_i <= #style_layers or next_temporal_i <= #temporal_layers then 194 | local layer = cnn:get(i) 195 | local name = layer.name 196 | local layer_type = torch.type(layer) 197 | local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling') 198 | if is_pooling and params.pooling == 'avg' then 199 | assert(layer.padW == 0 and layer.padH == 0) 200 | local kW, kH = layer.kW, layer.kH 201 | local dW, dH = layer.dW, layer.dH 202 | local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float() 203 | avg_pool_layer = MaybePutOnGPU(avg_pool_layer, params) 204 | local msg = 'Replacing max pooling at layer %d with average pooling' 205 | print(string.format(msg, i)) 206 | net:add(avg_pool_layer) 207 | else 208 | net:add(layer) 209 | end 210 | current_layer_index = current_layer_index + 1 211 | if name == content_layers[next_content_i] then 212 | print("Setting up content layer", i, ":", layer.name) 213 | table.insert(contentLike_layers_indices, current_layer_index) 214 | table.insert(contentLike_layers_type, 'content') 215 | next_content_i = next_content_i + 1 216 | end 217 | if name == temporal_layers[next_temporal_i] then 218 | print("Setting up temporal layer", i, ":", layer.name) 219 | table.insert(contentLike_layers_indices, current_layer_index) 220 | table.insert(contentLike_layers_type, 'prevPlusFlow') 221 | next_temporal_i = next_temporal_i + 1 222 | end 223 | if name == style_layers[next_style_i] then 224 | print("Setting up style layer ", i, ":", layer.name) 225 | local gram = GramMatrix():float() 226 | gram = MaybePutOnGPU(gram, params) 227 | local target = nil 228 | for i = 1, #style_images_caffe do 229 | local target_features = net:forward(style_images_caffe[i]):clone() 230 | local target_i = gram:forward(target_features):clone() 231 | target_i:div(target_features:nElement()) 232 | target_i:mul(style_blend_weights[i]) 233 | if i == 1 then 234 | target = target_i 235 | else 236 | target:add(target_i) 237 | end 238 | end 239 | local norm = params.normalize_gradients 240 | local loss_module = nn.StyleLoss(params.style_weight, target, norm):float() 241 | loss_module = MaybePutOnGPU(loss_module, params) 242 | net:add(loss_module) 243 | current_layer_index = current_layer_index + 1 244 | table.insert(style_losses, loss_module) 245 | next_style_i = next_style_i + 1 246 | end 247 | end 248 | end 249 | return net, style_losses, contentLike_layers_indices, contentLike_layers_type 250 | end 251 | 252 | -- 253 | -- LOSS MODULES 254 | -- 255 | 256 | -- Define an nn Module to compute content loss in-place 257 | local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module') 258 | 259 | function ContentLoss:__init(strength, target, normalize) 260 | parent.__init(self) 261 | self.strength = strength 262 | self.target = target 263 | self.normalize = normalize or false 264 | self.loss = 0 265 | self.crit = nn.MSECriterion() 266 | end 267 | 268 | function ContentLoss:updateOutput(input) 269 | if input:nElement() == self.target:nElement() then 270 | self.loss = self.crit:forward(input, self.target) * self.strength 271 | else 272 | print('WARNING: Skipping content loss') 273 | end 274 | self.output = input 275 | return self.output 276 | end 277 | 278 | function ContentLoss:updateGradInput(input, gradOutput) 279 | if input:nElement() == self.target:nElement() then 280 | self.gradInput = self.crit:backward(input, self.target) 281 | end 282 | if self.normalize then 283 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8) 284 | end 285 | self.gradInput:mul(self.strength) 286 | self.gradInput:add(gradOutput) 287 | return self.gradInput 288 | end 289 | 290 | -- Define an nn Module to compute content loss in-place 291 | local WeightedContentLoss, parent = torch.class('nn.WeightedContentLoss', 'nn.Module') 292 | 293 | function WeightedContentLoss:__init(strength, target, weights, normalize, loss_criterion) 294 | parent.__init(self) 295 | self.strength = strength 296 | if weights ~= nil then 297 | -- Take square root of the weights, because of the way the weights are applied 298 | -- to the mean square error function. We want w*(error^2), but we can only 299 | -- do (w*error)^2 = w^2 * error^2 300 | self.weights = torch.sqrt(weights) 301 | self.target = torch.cmul(target, self.weights) 302 | else 303 | self.target = target 304 | self.weights = nil 305 | end 306 | self.normalize = normalize or false 307 | self.loss = 0 308 | if loss_criterion == 'mse' then 309 | self.crit = nn.MSECriterion() 310 | elseif loss_criterion == 'smoothl1' then 311 | self.crit = nn.SmoothL1Criterion() 312 | else 313 | print('WARNING: Unknown flow loss criterion. Using MSE.') 314 | self.crit = nn.MSECriterion() 315 | end 316 | end 317 | 318 | function WeightedContentLoss:updateOutput(input) 319 | if input:nElement() == self.target:nElement() then 320 | self.loss = self.crit:forward(input, self.target) * self.strength 321 | if self.weights ~= nil then 322 | self.loss = self.crit:forward(torch.cmul(input, self.weights), self.target) * self.strength 323 | else 324 | self.loss = self.crit:forward(input, self.target) * self.strength 325 | end 326 | else 327 | print('WARNING: Skipping content loss') 328 | end 329 | self.output = input 330 | return self.output 331 | end 332 | 333 | function WeightedContentLoss:updateGradInput(input, gradOutput) 334 | if input:nElement() == self.target:nElement() then 335 | if self.weights ~= nil then 336 | self.gradInput = self.crit:backward(torch.cmul(input, self.weights), self.target) 337 | else 338 | self.gradInput = self.crit:backward(input, self.target) 339 | end 340 | end 341 | if self.normalize then 342 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8) 343 | end 344 | self.gradInput:mul(self.strength) 345 | self.gradInput:add(gradOutput) 346 | return self.gradInput 347 | end 348 | 349 | -- Returns a network that computes the CxC Gram matrix from inputs 350 | -- of size C x H x W 351 | function GramMatrix() 352 | local net = nn.Sequential() 353 | net:add(nn.View(-1):setNumInputDims(2)) 354 | local concat = nn.ConcatTable() 355 | concat:add(nn.Identity()) 356 | concat:add(nn.Identity()) 357 | net:add(concat) 358 | net:add(nn.MM(false, true)) 359 | return net 360 | end 361 | 362 | 363 | -- Define an nn Module to compute style loss in-place 364 | local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module') 365 | 366 | function StyleLoss:__init(strength, target, normalize) 367 | parent.__init(self) 368 | self.normalize = normalize or false 369 | self.strength = strength 370 | self.target = target 371 | self.loss = 0 372 | 373 | self.gram = GramMatrix() 374 | self.G = nil 375 | self.crit = nn.MSECriterion() 376 | end 377 | 378 | function StyleLoss:updateOutput(input) 379 | self.G = self.gram:forward(input) 380 | self.G:div(input:nElement()) 381 | self.loss = self.crit:forward(self.G, self.target) 382 | self.loss = self.loss * self.strength 383 | self.output = input 384 | return self.output 385 | end 386 | 387 | function StyleLoss:updateGradInput(input, gradOutput) 388 | local dG = self.crit:backward(self.G, self.target) 389 | dG:div(input:nElement()) 390 | self.gradInput = self.gram:backward(input, dG) 391 | if self.normalize then 392 | self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8) 393 | end 394 | self.gradInput:mul(self.strength) 395 | self.gradInput:add(gradOutput) 396 | return self.gradInput 397 | end 398 | 399 | 400 | local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module') 401 | 402 | function TVLoss:__init(strength) 403 | parent.__init(self) 404 | self.strength = strength 405 | self.x_diff = torch.Tensor() 406 | self.y_diff = torch.Tensor() 407 | end 408 | 409 | function TVLoss:updateOutput(input) 410 | self.output = input 411 | return self.output 412 | end 413 | 414 | -- TV loss backward pass inspired by kaishengtai/neuralart 415 | function TVLoss:updateGradInput(input, gradOutput) 416 | self.gradInput:resizeAs(input):zero() 417 | local C, H, W = input:size(1), input:size(2), input:size(3) 418 | self.x_diff:resize(3, H - 1, W - 1) 419 | self.y_diff:resize(3, H - 1, W - 1) 420 | self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}]) 421 | self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}]) 422 | self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}]) 423 | self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}]) 424 | self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff) 425 | self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff) 426 | self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff) 427 | self.gradInput:mul(self.strength) 428 | self.gradInput:add(gradOutput) 429 | return self.gradInput 430 | end 431 | 432 | function getContentLossModuleForLayer(net, layer_idx, target_img, params) 433 | local tmpNet = nn.Sequential() 434 | for i = 1, layer_idx-1 do 435 | local layer = net:get(i) 436 | tmpNet:add(layer) 437 | end 438 | local target = tmpNet:forward(target_img):clone() 439 | local loss_module = nn.ContentLoss(params.content_weight, target, params.normalize_gradients):float() 440 | loss_module = MaybePutOnGPU(loss_module, params) 441 | return loss_module 442 | end 443 | 444 | function getWeightedContentLossModuleForLayer(net, layer_idx, target_img, params, weights) 445 | local tmpNet = nn.Sequential() 446 | for i = 1, layer_idx-1 do 447 | local layer = net:get(i) 448 | tmpNet:add(layer) 449 | end 450 | local target = tmpNet:forward(target_img):clone() 451 | local loss_module = nn.WeightedContentLoss(params.temporal_weight, target, weights, 452 | params.normalize_gradients, params.temporal_loss_criterion):float() 453 | loss_module = MaybePutOnGPU(loss_module, params) 454 | return loss_module 455 | end 456 | 457 | --- 458 | --- HELPER FUNCTIONS 459 | --- 460 | 461 | function MaybePutOnGPU(obj, params) 462 | if params.gpu >= 0 then 463 | if params.backend ~= 'clnn' then 464 | return obj:cuda() 465 | else 466 | return obj:cl() 467 | end 468 | end 469 | return obj 470 | end 471 | 472 | -- Preprocess an image before passing it to a Caffe model. 473 | -- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR, 474 | -- and subtract the mean pixel. 475 | function preprocess(img) 476 | local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68}) 477 | local perm = torch.LongTensor{3, 2, 1} 478 | img = img:index(1, perm):mul(256.0) 479 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img) 480 | img:add(-1, mean_pixel) 481 | return img 482 | end 483 | 484 | -- Undo the above preprocessing. 485 | function deprocess(img) 486 | local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68}) 487 | mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img) 488 | img = img + mean_pixel 489 | local perm = torch.LongTensor{3, 2, 1} 490 | img = img:index(1, perm):div(256.0) 491 | return img 492 | end 493 | 494 | function save_image(img, fileName) 495 | local disp = deprocess(img:double()) 496 | disp = image.minmax{tensor=disp, min=0, max=1} 497 | image.save(fileName, disp) 498 | end 499 | 500 | -- Checks whether a table contains a specific value 501 | function tabl_contains(tabl, val) 502 | for i=1,#tabl do 503 | if tabl[i] == val then 504 | return true 505 | end 506 | end 507 | return false 508 | end 509 | 510 | -- Sums up all element in a given table 511 | function tabl_sum(t) 512 | local sum = t[1]:clone() 513 | for i=2, #t do 514 | sum:add(t[i]) 515 | end 516 | return sum 517 | end 518 | 519 | function str_split(str, delim, maxNb) 520 | -- Eliminate bad cases... 521 | if string.find(str, delim) == nil then 522 | return { str } 523 | end 524 | if maxNb == nil or maxNb < 1 then 525 | maxNb = 0 -- No limit 526 | end 527 | local result = {} 528 | local pat = "(.-)" .. delim .. "()" 529 | local nb = 1 530 | local lastPos 531 | for part, pos in string.gfind(str, pat) do 532 | result[nb] = part 533 | lastPos = pos 534 | nb = nb + 1 535 | if nb == maxNb then break end 536 | end 537 | -- Handle the last field 538 | result[nb] = string.sub(str, lastPos) 539 | return result 540 | end 541 | 542 | function fileExists(name) 543 | local f=io.open(name,"r") 544 | if f~=nil then io.close(f) return true else return false end 545 | end 546 | 547 | function calcNumberOfContentImages(params) 548 | local frameIdx = 1 549 | while frameIdx < 100000 do 550 | local fileName = string.format(params.content_pattern, frameIdx + params.start_number) 551 | if not fileExists(fileName) then return frameIdx end 552 | frameIdx = frameIdx + 1 553 | end 554 | -- If there are too many content frames, something may be wrong. 555 | return 0 556 | end 557 | 558 | function build_OutFilename(params, image_number, iterationOrRun) 559 | local ext = paths.extname(params.output_image) 560 | local basename = paths.basename(params.output_image, ext) 561 | local fileNameBase = '%s%s-' .. params.number_format 562 | if iterationOrRun == -1 then 563 | return string.format(fileNameBase .. '.%s', 564 | params.output_folder, basename, image_number, ext) 565 | else 566 | return string.format(fileNameBase .. '_%d.%s', 567 | params.output_folder, basename, image_number, iterationOrRun, ext) 568 | end 569 | end 570 | 571 | function getFormatedFlowFileName(pattern, fromIndex, toIndex) 572 | local flowFileName = pattern 573 | flowFileName = string.gsub(flowFileName, '{(.-)}', 574 | function(a) return string.format(a, fromIndex) end ) 575 | flowFileName = string.gsub(flowFileName, '%[(.-)%]', 576 | function(a) return string.format(a, toIndex) end ) 577 | return flowFileName 578 | end 579 | 580 | function getContentImage(frameIdx, params) 581 | local fileName = string.format(params.content_pattern, frameIdx) 582 | if not fileExists(fileName) then return nil end 583 | local content_image = image.load(string.format(params.content_pattern, frameIdx), 3) 584 | content_image = preprocess(content_image):float() 585 | content_image = MaybePutOnGPU(content_image, params) 586 | return content_image 587 | end 588 | 589 | function getStyleImages(params) 590 | -- Needed to read content image size 591 | local firstContentImg = image.load(string.format(params.content_pattern, params.start_number), 3) 592 | local style_image_list = params.style_image:split(',') 593 | local style_images_caffe = {} 594 | for _, img_path in ipairs(style_image_list) do 595 | local img = image.load(img_path, 3) 596 | -- Scale the style image so that it's area equals the area of the content image multiplied by the style scale. 597 | local img_scale = math.sqrt(firstContentImg:size(2) * firstContentImg:size(3) / (img:size(3) * img:size(2))) 598 | * params.style_scale 599 | img = image.scale(img, img:size(3) * img_scale, img:size(2) * img_scale, 'bilinear') 600 | print("Style image size: " .. img:size(3) .. " x " .. img:size(2)) 601 | local img_caffe = preprocess(img):float() 602 | table.insert(style_images_caffe, img_caffe) 603 | end 604 | 605 | for i = 1, #style_images_caffe do 606 | style_images_caffe[i] = MaybePutOnGPU(style_images_caffe[i], params) 607 | end 608 | 609 | return style_images_caffe 610 | end 611 | -------------------------------------------------------------------------------- /consistencyChecker/CTensor4D.h: -------------------------------------------------------------------------------- 1 | // CTensor4D 2 | // A four-dimensional array 3 | // 4 | // Author: Thomas Brox 5 | // Last change: 05.11.2001 6 | //------------------------------------------------------------------------- 7 | // Note: 8 | // There is a difference between the GNU Compiler's STL and the standard 9 | // concerning the definition and usage of string streams as well as substrings. 10 | // Thus if using a GNU Compiler you should write #define GNU_COMPILER at the 11 | // beginning of your program. 12 | // 13 | // Another Note: 14 | // Linker problems occured in connection with from the STL. 15 | // In this case you should include this file in a namespace. 16 | // Example: 17 | // namespace NTensor4D { 18 | // #include 19 | // } 20 | // After including other packages you can then write: 21 | // using namespace NTensor4D; 22 | 23 | #ifndef CTENSOR4D_H 24 | #define CTENSOR4D_H 25 | 26 | #include 27 | #include 28 | #include 29 | #ifdef GNU_COMPILER 30 | #include 31 | #else 32 | #include 33 | #endif 34 | #include "CTensor.h" 35 | 36 | template 37 | class CTensor4D { 38 | public: 39 | // constructor 40 | inline CTensor4D(); 41 | inline CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize); 42 | // copy constructor 43 | CTensor4D(const CTensor4D& aCopyFrom); 44 | // constructor with implicit filling 45 | CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize, const T aFillValue); 46 | // destructor 47 | virtual ~CTensor4D(); 48 | 49 | // Changes the size of the tensor, data will be lost 50 | void setSize(int aXSize, int aYSize, int aZSize, int aASize); 51 | // Downsamples the tensor 52 | void downsample(int aNewXSize, int aNewYSize); 53 | void downsample(int aNewXSize, int aNewYSize, int aNewZSize); 54 | // Upsamples the tensor 55 | void upsample(int aNewXSize, int aNewYSize); 56 | void upsampleBilinear(int aNewXSize, int aNewYSize); 57 | void upsampleTrilinear(int aNewXSize, int aNewYSize, int aNewZSize); 58 | // Fills the tensor with the value aValue (see also operator =) 59 | void fill(const T aValue); 60 | // Copies a box from the tensor into aResult, the size of aResult will be adjusted 61 | void cut(CTensor4D& aResult, int x1, int y1, int z1, int a1, int x2, int y2, int z2, int a2); 62 | // Reads data from a list of PPM or PGM files given in a text file 63 | void readFromFile(char* aFilename); 64 | // Writes a set of colour images to a large PPM image 65 | void writeToPPM(const char* aFilename, int aCols = 0, int aRows = 0); 66 | 67 | // Gives full access to tensor's values 68 | inline T& operator()(const int ax, const int ay, const int az, const int aa) const; 69 | // Read access with bilinear interpolation 70 | CVector operator()(const float ax, const float ay, const int aa) const; 71 | // Fills the tensor with the value aValue (equivalent to fill()) 72 | inline CTensor4D& operator=(const T aValue); 73 | // Copies the tensor aCopyFrom to this tensor (size of tensor might change) 74 | CTensor4D& operator=(const CTensor4D& aCopyFrom); 75 | // Multiplication with a scalar 76 | CTensor4D& operator*=(const T aValue); 77 | // Component-wise addition 78 | CTensor4D& operator+=(const CTensor4D& aTensor); 79 | 80 | // Gives access to the tensor's size 81 | inline int xSize() const; 82 | inline int ySize() const; 83 | inline int zSize() const; 84 | inline int aSize() const; 85 | inline int size() const; 86 | // Returns the aath layer of the 4D-tensor as 3D-tensor 87 | CTensor getTensor3D(const int aa) const; 88 | // Removes one dimension and returns the resulting 3D-tensor 89 | void getTensor3D(CTensor& aTensor, int aIndex, int aDim = 3) const; 90 | // Copies the components of a 3D-tensor in the aDimth layer of the 4D-tensor 91 | void putTensor3D(CTensor& aTensor, int aIndex, int aDim = 3); 92 | // Removes two dimensions and returns the resulting matrix 93 | void getMatrix(CMatrix& aMatrix, int aZIndex, int aAIndex) const; 94 | // Copies the components of a 3D-tensor in the aDimth layer of the 4D-tensor 95 | void putMatrix(CMatrix& aMatrix, int aZIndex, int aAIndex); 96 | // Gives access to the internal data representation (use sparingly) 97 | inline T* data() const; 98 | protected: 99 | int mXSize,mYSize,mZSize,mASize; 100 | T *mData; 101 | }; 102 | 103 | // Provides basic output functionality (only appropriate for very small tensors) 104 | template std::ostream& operator<<(std::ostream& aStream, const CTensor4D& aTensor); 105 | 106 | // Exceptions thrown by CTensor------------------------------------------------- 107 | 108 | // Thrown when one tries to access an element of a tensor which is out of 109 | // the tensor's bounds 110 | struct ETensor4DRangeOverflow { 111 | ETensor4DRangeOverflow(const int ax, const int ay, const int az, const int aa) { 112 | using namespace std; 113 | cerr << "Exception ETensor4DRangeOverflow: x = " << ax << ", y = " << ay << ", z = " << az << ", a = " << aa << endl; 114 | } 115 | }; 116 | 117 | // Thrown from getTensor3D if the parameter's size does not match with the size 118 | // of this tensor 119 | struct ETensor4DIncompatibleSize { 120 | ETensor4DIncompatibleSize(int ax, int ay, int az, int ax2, int ay2, int az2) { 121 | using namespace std; 122 | cerr << "Exception ETensor4DIncompatibleSize: x = " << ax << ":" << ax2; 123 | cerr << ", y = " << ay << ":" << ay2; 124 | cerr << ", z = " << az << ":" << az2 << endl; 125 | } 126 | }; 127 | 128 | // Thrown from readFromFile if the file format is unknown 129 | struct ETensor4DInvalidFileFormat { 130 | ETensor4DInvalidFileFormat() { 131 | using namespace std; 132 | cerr << "Exception ETensor4DInvalidFileFormat" << endl; 133 | } 134 | }; 135 | 136 | // I M P L E M E N T A T I O N -------------------------------------------- 137 | // 138 | // You might wonder why there is implementation code in a header file. 139 | // The reason is that not all C++ compilers yet manage separate compilation 140 | // of templates. Inline functions cannot be compiled separately anyway. 141 | // So in this case the whole implementation code is added to the header 142 | // file. 143 | // Users of CTensor4D should ignore everything that's beyond this line :) 144 | // ------------------------------------------------------------------------ 145 | 146 | // P U B L I C ------------------------------------------------------------ 147 | 148 | // constructor 149 | template 150 | inline CTensor4D::CTensor4D() { 151 | mData = 0; mXSize = 0; mYSize = 0; mZSize = 0; mASize = 0; 152 | } 153 | 154 | // constructor 155 | template 156 | inline CTensor4D::CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize) 157 | : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize), mASize(aASize) { 158 | mData = new T[aXSize*aYSize*aZSize*aASize]; 159 | } 160 | 161 | // copy constructor 162 | template 163 | CTensor4D::CTensor4D(const CTensor4D& aCopyFrom) 164 | : mXSize(aCopyFrom.mXSize), mYSize(aCopyFrom.mYSize), mZSize(aCopyFrom.mZSize), mASize(aCopyFrom.mASize) { 165 | int wholeSize = mXSize*mYSize*mZSize*mASize; 166 | mData = new T[wholeSize]; 167 | for (register int i = 0; i < wholeSize; i++) 168 | mData[i] = aCopyFrom.mData[i]; 169 | } 170 | 171 | // constructor with implicit filling 172 | template 173 | CTensor4D::CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize, const T aFillValue) 174 | : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize), mASize(aASize) { 175 | mData = new T[aXSize*aYSize*aZSize*aASize]; 176 | fill(aFillValue); 177 | } 178 | 179 | // destructor 180 | template 181 | CTensor4D::~CTensor4D() { 182 | delete[] mData; 183 | } 184 | 185 | // setSize 186 | template 187 | void CTensor4D::setSize(int aXSize, int aYSize, int aZSize, int aASize) { 188 | if (mData != 0) delete[] mData; 189 | mData = new T[aXSize*aYSize*aZSize*aASize]; 190 | mXSize = aXSize; 191 | mYSize = aYSize; 192 | mZSize = aZSize; 193 | mASize = aASize; 194 | } 195 | 196 | //downsample 197 | template 198 | void CTensor4D::downsample(int aNewXSize, int aNewYSize) { 199 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize]; 200 | int aSize = aNewXSize*aNewYSize; 201 | for (int a = 0; a < mASize; a++) 202 | for (int z = 0; z < mZSize; z++) { 203 | CMatrix aTemp(mXSize,mYSize); 204 | getMatrix(aTemp,z,a); 205 | aTemp.downsample(aNewXSize,aNewYSize); 206 | for (int i = 0; i < aSize; i++) 207 | mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i]; 208 | } 209 | delete[] mData; 210 | mData = mData2; 211 | mXSize = aNewXSize; 212 | mYSize = aNewYSize; 213 | } 214 | 215 | template 216 | void CTensor4D::downsample(int aNewXSize, int aNewYSize, int aNewZSize) { 217 | T* mData2 = new T[aNewXSize*aNewYSize*aNewZSize*mASize]; 218 | int aSize = aNewXSize*aNewYSize*aNewZSize; 219 | for (int a = 0; a < mASize; a++) { 220 | CTensor aTemp(mXSize,mYSize,mZSize); 221 | getTensor3D(aTemp,a); 222 | aTemp.downsample(aNewXSize,aNewYSize,aNewZSize); 223 | for (int i = 0; i < aSize; i++) 224 | mData2[i+a*aSize] = aTemp.data()[i]; 225 | } 226 | delete[] mData; 227 | mData = mData2; 228 | mXSize = aNewXSize; 229 | mYSize = aNewYSize; 230 | mZSize = aNewZSize; 231 | } 232 | 233 | // upsample 234 | template 235 | void CTensor4D::upsample(int aNewXSize, int aNewYSize) { 236 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize]; 237 | int aSize = aNewXSize*aNewYSize; 238 | for (int a = 0; a < mASize; a++) 239 | for (int z = 0; z < mZSize; z++) { 240 | CMatrix aTemp(mXSize,mYSize); 241 | getMatrix(aTemp,z,a); 242 | aTemp.upsample(aNewXSize,aNewYSize); 243 | for (int i = 0; i < aSize; i++) 244 | mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i]; 245 | } 246 | delete[] mData; 247 | mData = mData2; 248 | mXSize = aNewXSize; 249 | mYSize = aNewYSize; 250 | } 251 | 252 | // upsampleBilinear 253 | template 254 | void CTensor4D::upsampleBilinear(int aNewXSize, int aNewYSize) { 255 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize]; 256 | int aSize = aNewXSize*aNewYSize; 257 | for (int a = 0; a < mASize; a++) 258 | for (int z = 0; z < mZSize; z++) { 259 | CMatrix aTemp(mXSize,mYSize); 260 | getMatrix(aTemp,z,a); 261 | aTemp.upsampleBilinear(aNewXSize,aNewYSize); 262 | for (int i = 0; i < aSize; i++) 263 | mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i]; 264 | } 265 | delete[] mData; 266 | mData = mData2; 267 | mXSize = aNewXSize; 268 | mYSize = aNewYSize; 269 | } 270 | 271 | // upsampleTrilinear 272 | template 273 | void CTensor4D::upsampleTrilinear(int aNewXSize, int aNewYSize, int aNewZSize) { 274 | T* mData2 = new T[aNewXSize*aNewYSize*aNewZSize*mASize]; 275 | int aSize = aNewXSize*aNewYSize*aNewZSize; 276 | for (int a = 0; a < mASize; a++) { 277 | CTensor aTemp(mXSize,mYSize,mZSize); 278 | getTensor3D(aTemp,a); 279 | aTemp.upsampleTrilinear(aNewXSize,aNewYSize,aNewZSize); 280 | for (int i = 0; i < aSize; i++) 281 | mData2[i+a*aSize] = aTemp.data()[i]; 282 | } 283 | delete[] mData; 284 | mData = mData2; 285 | mXSize = aNewXSize; 286 | mYSize = aNewYSize; 287 | mZSize = aNewZSize; 288 | } 289 | 290 | // fill 291 | template 292 | void CTensor4D::fill(const T aValue) { 293 | int wholeSize = mXSize*mYSize*mZSize*mASize; 294 | for (register int i = 0; i < wholeSize; i++) 295 | mData[i] = aValue; 296 | } 297 | 298 | // cut 299 | template 300 | void CTensor4D::cut(CTensor4D& aResult, int x1, int y1, int z1, int a1, int x2, int y2, int z2, int a2) { 301 | aResult.mXSize = x2-x1+1; 302 | aResult.mYSize = y2-y1+1; 303 | aResult.mZSize = z2-z1+1; 304 | aResult.mASize = a2-a1+1; 305 | delete[] aResult.mData; 306 | aResult.mData = new T[aResult.mXSize*aResult.mYSize*aResult.mZSize*aResult.mASize]; 307 | for (int a = a1; a <= a2; a++) 308 | for (int z = z1; z <= z2; z++) 309 | for (int y = y1; y <= y2; y++) 310 | for (int x = x1; x <= x2; x++) 311 | aResult(x-x1,y-y1,z-z1,a-a1) = operator()(x,y,z,a); 312 | } 313 | 314 | // readFromFile 315 | template 316 | void CTensor4D::readFromFile(char* aFilename) { 317 | if (mData != 0) delete[] mData; 318 | std::string s; 319 | std::string aPath = aFilename; 320 | aPath.erase(aPath.find_last_of('\\')+1,100); 321 | mASize = 0; 322 | { 323 | std::ifstream aStream(aFilename); 324 | while (!aStream.eof()) { 325 | aStream >> s; 326 | if (s != "") { 327 | mASize++; 328 | if (mASize == 1) { 329 | s.erase(0,s.find_last_of('.')); 330 | if (s == ".ppm" || s == ".PPM") mZSize = 3; 331 | else if (s == ".pgm" || s == ".PGM") mZSize = 1; 332 | else throw ETensor4DInvalidFileFormat(); 333 | } 334 | } 335 | } 336 | } 337 | std::ifstream aStream(aFilename); 338 | aStream >> s; 339 | s = aPath+s; 340 | // PGM 341 | if (mZSize == 1) { 342 | CMatrix aTemp; 343 | aTemp.readFromPGM(s.c_str()); 344 | mXSize = aTemp.xSize(); 345 | mYSize = aTemp.ySize(); 346 | int aSize = mXSize*mYSize; 347 | mData = new T[aSize*mASize]; 348 | for (int i = 0; i < aSize; i++) 349 | mData[i] = aTemp.data()[i]; 350 | for (int a = 1; a < mASize; a++) { 351 | aStream >> s; 352 | s = aPath+s; 353 | aTemp.readFromPGM(s.c_str()); 354 | for (int i = 0; i < aSize; i++) 355 | mData[i+a*aSize] = aTemp.data()[i]; 356 | } 357 | } 358 | // PPM 359 | else { 360 | CTensor aTemp; 361 | aTemp.readFromPPM(s.c_str()); 362 | mXSize = aTemp.xSize(); 363 | mYSize = aTemp.ySize(); 364 | int aSize = 3*mXSize*mYSize; 365 | mData = new T[aSize*mASize]; 366 | for (int i = 0; i < aSize; i++) 367 | mData[i] = aTemp.data()[i]; 368 | for (int a = 1; a < mASize; a++) { 369 | aStream >> s; 370 | s = aPath+s; 371 | aTemp.readFromPPM(s.c_str()); 372 | for (int i = 0; i < aSize; i++) 373 | mData[i+a*aSize] = aTemp.data()[i]; 374 | } 375 | } 376 | } 377 | 378 | // writeToPPM 379 | template 380 | void CTensor4D::writeToPPM(const char* aFilename, int aCols, int aRows) { 381 | int rows = (int)floor(sqrt(mASize)); 382 | if (aRows != 0) rows = aRows; 383 | int cols = (int)ceil(mASize*1.0/rows); 384 | if (aCols != 0) cols = aCols; 385 | FILE* outimage = fopen(aFilename, "wb"); 386 | fprintf(outimage, "P6 \n"); 387 | fprintf(outimage, "%ld %ld \n255\n", cols*mXSize,rows*mYSize); 388 | for (int r = 0; r < rows; r++) 389 | for (int y = 0; y < mYSize; y++) 390 | for (int c = 0; c < cols; c++) 391 | for (int x = 0; x < mXSize; x++) { 392 | unsigned char aHelp; 393 | if (r*cols+c >= mASize) aHelp = 0; 394 | else aHelp = (unsigned char)operator()(x,y,0,r*cols+c); 395 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 396 | if (r*cols+c >= mASize) aHelp = 0; 397 | else aHelp = (unsigned char)operator()(x,y,1,r*cols+c); 398 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 399 | if (r*cols+c >= mASize) aHelp = 0; 400 | else aHelp = (unsigned char)operator()(x,y,2,r*cols+c); 401 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 402 | } 403 | fclose(outimage); 404 | } 405 | 406 | // operator () 407 | template 408 | inline T& CTensor4D::operator()(const int ax, const int ay, const int az, const int aa) const { 409 | #ifdef DEBUG 410 | if (ax >= mXSize || ay >= mYSize || az >= mZSize || aa >= mASize || ax < 0 || ay < 0 || az < 0 || aa < 0) 411 | throw ETensorRangeOverflow(ax,ay,az,aa); 412 | #endif 413 | return mData[mXSize*(mYSize*(mZSize*aa+az)+ay)+ax]; 414 | } 415 | 416 | template 417 | CVector CTensor4D::operator()(const float ax, const float ay, const int aa) const { 418 | CVector aResult(mZSize); 419 | int x1 = (int)ax; 420 | int y1 = (int)ay; 421 | int x2 = x1+1; 422 | int y2 = y1+1; 423 | #ifdef _DEBUG 424 | if (x2 >= mXSize || y2 >= mYSize || x1 < 0 || y1 < 0) throw ETensorRangeOverflow(ax,ay,0); 425 | #endif 426 | float alphaX = ax-x1; float alphaXTrans = 1.0-alphaX; 427 | float alphaY = ay-y1; float alphaYTrans = 1.0-alphaY; 428 | for (int k = 0; k < mZSize; k++) { 429 | float a = alphaXTrans*operator()(x1,y1,k,aa)+alphaX*operator()(x2,y1,k,aa); 430 | float b = alphaXTrans*operator()(x1,y2,k,aa)+alphaX*operator()(x2,y2,k,aa); 431 | aResult(k) = alphaYTrans*a+alphaY*b; 432 | } 433 | return aResult; 434 | } 435 | 436 | // operator = 437 | template 438 | inline CTensor4D& CTensor4D::operator=(const T aValue) { 439 | fill(aValue); 440 | return *this; 441 | } 442 | 443 | template 444 | CTensor4D& CTensor4D::operator=(const CTensor4D& aCopyFrom) { 445 | if (this != &aCopyFrom) { 446 | if (mData != 0) delete[] mData; 447 | mXSize = aCopyFrom.mXSize; 448 | mYSize = aCopyFrom.mYSize; 449 | mZSize = aCopyFrom.mZSize; 450 | mASize = aCopyFrom.mASize; 451 | int wholeSize = mXSize*mYSize*mZSize*mASize; 452 | mData = new T[wholeSize]; 453 | for (register int i = 0; i < wholeSize; i++) 454 | mData[i] = aCopyFrom.mData[i]; 455 | } 456 | return *this; 457 | } 458 | 459 | // operator *= 460 | template 461 | CTensor4D& CTensor4D::operator*=(const T aValue) { 462 | int wholeSize = mXSize*mYSize*mZSize*mASize; 463 | for (int i = 0; i < wholeSize; i++) 464 | mData[i] *= aValue; 465 | return *this; 466 | } 467 | 468 | // operator += 469 | template 470 | CTensor4D& CTensor4D::operator+=(const CTensor4D& aTensor) { 471 | #ifdef _DEBUG 472 | if (mXSize != aTensor.mXSize || mYSize != aTensor.mYSize || mZSize != aTensor.mZSize || mASize != aTensor.mASize) 473 | throw ETensorIncompatibleSize(mXSize,mYSize,mZSize); 474 | #endif 475 | int wholeSize = size(); 476 | for (int i = 0; i < wholeSize; i++) 477 | mData[i] += aTensor.mData[i]; 478 | return *this; 479 | } 480 | 481 | // xSize 482 | template 483 | inline int CTensor4D::xSize() const { 484 | 485 | return mXSize; 486 | } 487 | 488 | // ySize 489 | template 490 | inline int CTensor4D::ySize() const { 491 | return mYSize; 492 | } 493 | 494 | // zSize 495 | template 496 | inline int CTensor4D::zSize() const { 497 | return mZSize; 498 | } 499 | 500 | // aSize 501 | template 502 | inline int CTensor4D::aSize() const { 503 | return mASize; 504 | } 505 | 506 | // size 507 | template 508 | inline int CTensor4D::size() const { 509 | return mXSize*mYSize*mZSize*mASize; 510 | } 511 | 512 | // getTensor3D 513 | template 514 | CTensor CTensor4D::getTensor3D(const int aa) const { 515 | CTensor aTemp(mXSize,mYSize,mZSize); 516 | int aTensorSize = mXSize*mYSize*mZSize; 517 | int aOffset = aa*aTensorSize; 518 | for (int i = 0; i < aTensorSize; i++) 519 | aTemp.data()[i] = mData[i+aOffset]; 520 | return aTemp; 521 | } 522 | 523 | // getTensor3D 524 | template 525 | void CTensor4D::getTensor3D(CTensor& aTensor, int aIndex, int aDim) const { 526 | int aSize; 527 | int aOffset; 528 | switch (aDim) { 529 | case 3: 530 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mZSize) 531 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mZSize); 532 | aSize = mXSize*mYSize*mZSize; 533 | aOffset = aIndex*aSize; 534 | for (int i = 0; i < aSize; i++) 535 | aTensor.data()[i] = mData[i+aOffset]; 536 | break; 537 | case 2: 538 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mASize) 539 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mASize); 540 | aSize = mXSize*mYSize; 541 | aOffset = aIndex*aSize; 542 | for (int a = 0; a < mASize; a++) 543 | for (int i = 0; i < aSize; i++) 544 | aTensor.data()[i+a*aSize] = mData[i+aOffset+a*aSize*mZSize]; 545 | break; 546 | case 1: 547 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize) 548 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mZSize,mASize); 549 | for (int a = 0; a < mASize; a++) 550 | for (int z = 0; z < mZSize; z++) 551 | for (int x = 0; x < mXSize; x++) 552 | aTensor(x,z,a) = operator()(x,aIndex,z,a); 553 | break; 554 | case 0: 555 | if (aTensor.xSize() != mYSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize) 556 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mYSize,mZSize,mASize); 557 | for (int a = 0; a < mASize; a++) 558 | for (int z = 0; z < mZSize; z++) 559 | for (int y = 0; y < mYSize; y++) 560 | aTensor(y,z,a) = operator()(aIndex,y,z,a); 561 | break; 562 | default: getTensor3D(aTensor,aIndex); 563 | } 564 | } 565 | 566 | // putTensor3D 567 | template 568 | void CTensor4D::putTensor3D(CTensor& aTensor, int aIndex, int aDim) { 569 | int aSize; 570 | int aOffset; 571 | switch (aDim) { 572 | case 3: 573 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mZSize) 574 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mZSize); 575 | aSize = mXSize*mYSize*mZSize; 576 | aOffset = aIndex*aSize; 577 | for (int i = 0; i < aSize; i++) 578 | mData[i+aOffset] = aTensor.data()[i]; 579 | break; 580 | case 2: 581 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mASize) 582 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mASize); 583 | aSize = mXSize*mYSize; 584 | aOffset = aIndex*aSize; 585 | for (int a = 0; a < mASize; a++) 586 | for (int i = 0; i < aSize; i++) 587 | mData[i+aOffset+a*aSize*mZSize] = aTensor.data()[i+a*aSize]; 588 | break; 589 | case 1: 590 | if (aTensor.xSize() != mXSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize) 591 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mZSize,mASize); 592 | for (int a = 0; a < mASize; a++) 593 | for (int z = 0; z < mZSize; z++) 594 | for (int x = 0; x < mXSize; x++) 595 | operator()(x,aIndex,z,a) = aTensor(x,z,a); 596 | break; 597 | case 0: 598 | if (aTensor.xSize() != mYSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize) 599 | throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mYSize,mZSize,mASize); 600 | for (int a = 0; a < mASize; a++) 601 | for (int z = 0; z < mZSize; z++) 602 | for (int y = 0; y < mYSize; y++) 603 | operator()(aIndex,y,z,a) = aTensor(y,z,a); 604 | break; 605 | default: putTensor3D(aTensor,aIndex); 606 | } 607 | } 608 | 609 | // getMatrix 610 | template 611 | void CTensor4D::getMatrix(CMatrix& aMatrix, int aZIndex, int aAIndex) const { 612 | if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize) 613 | throw ETensor4DIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),1,mXSize,mYSize,1); 614 | int aSize = mXSize*mYSize; 615 | int aOffset = aSize*(aAIndex*mZSize+aZIndex); 616 | for (int i = 0; i < aSize; i++) 617 | aMatrix.data()[i] = mData[i+aOffset]; 618 | } 619 | 620 | // putMatrix 621 | template 622 | void CTensor4D::putMatrix(CMatrix& aMatrix, int aZIndex, int aAIndex) { 623 | if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize) 624 | throw ETensor4DIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),1,mXSize,mYSize,1); 625 | int aSize = mXSize*mYSize; 626 | int aOffset = aSize*(aAIndex*mZSize+aZIndex); 627 | for (int i = 0; i < aSize; i++) 628 | mData[i+aOffset] = aMatrix.data()[i]; 629 | } 630 | 631 | // data() 632 | template 633 | inline T* CTensor4D::data() const { 634 | return mData; 635 | } 636 | 637 | // N O N - M E M B E R F U N C T I O N S -------------------------------------- 638 | 639 | // operator << 640 | template 641 | std::ostream& operator<<(std::ostream& aStream, const CTensor4D& aTensor) { 642 | for (int a = 0; a < aTensor.aSize(); a++) { 643 | for (int z = 0; z < aTensor.zSize(); z++) { 644 | for (int y = 0; y < aTensor.ySize(); y++) { 645 | for (int x = 0; x < aTensor.xSize(); x++) 646 | aStream << aTensor(x,y,z) << ' '; 647 | aStream << std::endl; 648 | } 649 | aStream << std::endl; 650 | } 651 | aStream << std::endl; 652 | } 653 | return aStream; 654 | } 655 | 656 | #endif 657 | -------------------------------------------------------------------------------- /consistencyChecker/CTensor.h: -------------------------------------------------------------------------------- 1 | // CTensor 2 | // A three-dimensional array 3 | // 4 | // Author: Thomas Brox 5 | 6 | #ifndef CTENSOR_H 7 | #define CTENSOR_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | inline int int_min(int x, int& y) { return (x 20 | class CTensor { 21 | public: 22 | // standard constructor 23 | inline CTensor(); 24 | // constructor 25 | inline CTensor(const int aXSize, const int aYSize, const int aZSize); 26 | // copy constructor 27 | CTensor(const CTensor& aCopyFrom); 28 | // constructor with implicit filling 29 | CTensor(const int aXSize, const int aYSize, const int aZSize, const T aFillValue); 30 | // destructor 31 | virtual ~CTensor(); 32 | 33 | // Changes the size of the tensor, data will be lost 34 | void setSize(int aXSize, int aYSize, int aZSize); 35 | // Downsamples the tensor 36 | void downsample(int aNewXSize, int aNewYSize); 37 | void downsample(int aNewXSize, int aNewYSize, CMatrix& aConfidence); 38 | void downsample(int aNewXSize, int aNewYSize, CTensor& aConfidence); 39 | // Upsamples the tensor 40 | void upsample(int aNewXSize, int aNewYSize); 41 | void upsampleBilinear(int aNewXSize, int aNewYSize); 42 | // Fills the tensor with the value aValue (see also operator =) 43 | void fill(const T aValue); 44 | // Fills a rectangular area with the value aValue 45 | void fillRect(const CVector& aValue, int ax1, int ay1, int ax2, int ay2); 46 | // Copies a box from the tensor into aResult, the size of aResult will be adjusted 47 | void cut(CTensor& aResult, int x1, int y1, int z1, int x2, int y2, int z2); 48 | // Copies aCopyFrom at a certain position of the tensor 49 | void paste(CTensor& aCopyFrom, int ax, int ay, int az); 50 | // Mirrors the boundaries, aFrom is the distance from the boundaries where the pixels are copied from, 51 | // aTo is the distance from the boundaries they are copied to 52 | void mirrorLayers(int aFrom, int aTo); 53 | // Transforms the values so that they are all between aMin and aMax 54 | // aInitialMin/Max are initializations for seeking the minimum and maximum, change if your 55 | // data is not in this range or the data type T cannot hold these values 56 | void normalizeEach(T aMin, T aMax, T aInitialMin = -30000, T aInitialMax = 30000); 57 | void normalize(T aMin, T aMax, int aChannel, T aInitialMin = -30000, T aInitialMax = 30000); 58 | void normalize(T aMin, T aMax, T aInitialMin = -30000, T aInitialMax = 30000); 59 | // Converts from RGB to CIELab color space and vice-versa 60 | void rgbToCielab(); 61 | void cielabToRGB(); 62 | // Draws a line into the image (only for mZSize = 3) 63 | void drawLine(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1 = 255, T aValue2 = 255, T aValue3 = 255); 64 | void drawRect(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1 = 255, T aValue2 = 255, T aValue3 = 255); 65 | 66 | // Applies a similarity transform (translation, rotation, scaling) to the image 67 | void applySimilarityTransform(CTensor& aWarped, CMatrix& aOutside, float tx, float ty, float cx, float cy, float phi, float scale); 68 | // Applies a homography (linear projective transformation) to the image 69 | void applyHomography(CTensor& aWarped, CMatrix& aOutside, const CMatrix& H); 70 | 71 | // Reads the tensor from a file in Mathematica format 72 | void readFromMathematicaFile(const char* aFilename); 73 | // Writes the tensor to a file in Mathematica format 74 | void writeToMathematicaFile(const char* aFilename); 75 | // Reads the tensor from a movie file in IM format 76 | void readFromIMFile(const char* aFilename); 77 | // Writes the tensor to a movie file in IM format 78 | void writeToIMFile(const char* aFilename); 79 | // Reads an image from a PGM file 80 | void readFromPGM(const char* aFilename); 81 | // Writes the tensor in PGM-Format 82 | void writeToPGM(const char* aFilename); 83 | // Extends a XxYx1 tensor to a XxYx3 tensor with three identical layers 84 | void makeColorTensor(); 85 | // Reads a color image from a PPM file 86 | void readFromPPM(const char* aFilename); 87 | // Writes the tensor in PPM-Format 88 | void writeToPPM(const char* aFilename); 89 | // Reads the tensor from a PDM file 90 | void readFromPDM(const char* aFilename); 91 | // Writes the tensor in PDM-Format 92 | void writeToPDM(const char* aFilename, char aFeatureType); 93 | 94 | // Gives full access to tensor's values 95 | inline T& operator()(const int ax, const int ay, const int az) const; 96 | // Read access with bilinear interpolation 97 | CVector operator()(const float ax, const float ay) const; 98 | // Fills the tensor with the value aValue (equivalent to fill()) 99 | inline CTensor& operator=(const T aValue); 100 | // Copies the tensor aCopyFrom to this tensor (size of tensor might change) 101 | CTensor& operator=(const CTensor& aCopyFrom); 102 | // Adds a tensor of same size 103 | CTensor& operator+=(const CTensor& aMatrix); 104 | // Adds a constant to the tensor 105 | CTensor& operator+=(const T aValue); 106 | // Multiplication with a scalar 107 | CTensor& operator*=(const T aValue); 108 | 109 | // Returns the minimum value 110 | T min() const; 111 | // Returns the maximum value 112 | T max() const; 113 | // Returns the average value 114 | T avg() const; 115 | // Returns the average value of a specific layer 116 | T avg(int az) const; 117 | // Gives access to the tensor's size 118 | inline int xSize() const; 119 | inline int ySize() const; 120 | inline int zSize() const; 121 | inline int size() const; 122 | // Returns the az layer of the tensor as matrix (slow and fast version) 123 | CMatrix getMatrix(const int az) const; 124 | void getMatrix(CMatrix& aMatrix, const int az) const; 125 | // Copies the matrix components of aMatrix into the az layer of the tensor 126 | void putMatrix(CMatrix& aMatrix, const int az); 127 | // Gives access to the internal data representation (use sparingly) 128 | inline T* data() const; 129 | 130 | // Possible interpretations of the third tensor dimension for PDM format 131 | static const char cSpacial = 'S'; 132 | static const char cVector = 'V'; 133 | static const char cColor = 'C'; 134 | static const char cSymmetricMatrix = 'Y'; 135 | protected: 136 | int mXSize,mYSize,mZSize; 137 | T *mData; 138 | }; 139 | 140 | // Provides basic output functionality (only appropriate for very small tensors) 141 | template std::ostream& operator<<(std::ostream& aStream, const CTensor& aTensor); 142 | 143 | // Exceptions thrown by CTensor------------------------------------------------- 144 | 145 | // Thrown when one tries to access an element of a tensor which is out of 146 | // the tensor's bounds 147 | struct ETensorRangeOverflow { 148 | ETensorRangeOverflow(const int ax, const int ay, const int az) { 149 | using namespace std; 150 | cerr << "Exception ETensorRangeOverflow: x = " << ax << ", y = " << ay << ", z = " << az << endl; 151 | } 152 | }; 153 | 154 | // Thrown when the size of a tensor does not match the needed size for a certain operation 155 | struct ETensorIncompatibleSize { 156 | ETensorIncompatibleSize(int ax, int ay, int ax2, int ay2) { 157 | using namespace std; 158 | cerr << "Exception ETensorIncompatibleSize: x = " << ax << ":" << ax2; 159 | cerr << ", y = " << ay << ":" << ay2 << endl; 160 | } 161 | ETensorIncompatibleSize(int ax, int ay, int az) { 162 | std::cerr << "Exception ETensorIncompatibleTensorSize: x = " << ax << ", y = " << ay << ", z= " << az << std::endl; 163 | } 164 | }; 165 | 166 | // I M P L E M E N T A T I O N -------------------------------------------- 167 | // 168 | // You might wonder why there is implementation code in a header file. 169 | // The reason is that not all C++ compilers yet manage separate compilation 170 | // of templates. Inline functions cannot be compiled separately anyway. 171 | // So in this case the whole implementation code is added to the header 172 | // file. 173 | // Users of CTensor should ignore everything that's beyond this line :) 174 | // ------------------------------------------------------------------------ 175 | 176 | // P U B L I C ------------------------------------------------------------ 177 | 178 | // standard constructor 179 | template 180 | inline CTensor::CTensor() { 181 | mData = 0; 182 | mXSize = mYSize = mZSize = 0; 183 | } 184 | 185 | // constructor 186 | template 187 | inline CTensor::CTensor(const int aXSize, const int aYSize, const int aZSize) 188 | : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize) { 189 | mData = new T[aXSize*aYSize*aZSize]; 190 | } 191 | 192 | // copy constructor 193 | template 194 | CTensor::CTensor(const CTensor& aCopyFrom) 195 | : mXSize(aCopyFrom.mXSize), mYSize(aCopyFrom.mYSize), mZSize(aCopyFrom.mZSize) { 196 | int wholeSize = mXSize*mYSize*mZSize; 197 | mData = new T[wholeSize]; 198 | for (register int i = 0; i < wholeSize; i++) 199 | mData[i] = aCopyFrom.mData[i]; 200 | } 201 | 202 | // constructor with implicit filling 203 | template 204 | CTensor::CTensor(const int aXSize, const int aYSize, const int aZSize, const T aFillValue) 205 | : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize) { 206 | mData = new T[aXSize*aYSize*aZSize]; 207 | fill(aFillValue); 208 | } 209 | 210 | // destructor 211 | template 212 | CTensor::~CTensor() { 213 | delete[] mData; 214 | } 215 | 216 | // setSize 217 | template 218 | void CTensor::setSize(int aXSize, int aYSize, int aZSize) { 219 | if (mData != 0) delete[] mData; 220 | mData = new T[aXSize*aYSize*aZSize]; 221 | mXSize = aXSize; 222 | mYSize = aYSize; 223 | mZSize = aZSize; 224 | } 225 | 226 | //downsample 227 | template 228 | void CTensor::downsample(int aNewXSize, int aNewYSize) { 229 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize]; 230 | int aSize = aNewXSize*aNewYSize; 231 | for (int z = 0; z < mZSize; z++) { 232 | CMatrix aTemp(mXSize,mYSize); 233 | getMatrix(aTemp,z); 234 | aTemp.downsample(aNewXSize,aNewYSize); 235 | for (int i = 0; i < aSize; i++) 236 | mData2[i+z*aSize] = aTemp.data()[i]; 237 | } 238 | delete[] mData; 239 | mData = mData2; 240 | mXSize = aNewXSize; 241 | mYSize = aNewYSize; 242 | } 243 | 244 | template 245 | void CTensor::downsample(int aNewXSize, int aNewYSize, CMatrix& aConfidence) { 246 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize]; 247 | int aSize = aNewXSize*aNewYSize; 248 | for (int z = 0; z < mZSize; z++) { 249 | CMatrix aTemp(mXSize,mYSize); 250 | getMatrix(aTemp,z); 251 | aTemp.downsample(aNewXSize,aNewYSize,aConfidence); 252 | for (int i = 0; i < aSize; i++) 253 | mData2[i+z*aSize] = aTemp.data()[i]; 254 | } 255 | delete[] mData; 256 | mData = mData2; 257 | mXSize = aNewXSize; 258 | mYSize = aNewYSize; 259 | } 260 | 261 | template 262 | void CTensor::downsample(int aNewXSize, int aNewYSize, CTensor& aConfidence) { 263 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize]; 264 | int aSize = aNewXSize*aNewYSize; 265 | CMatrix aConf(mXSize,mYSize); 266 | for (int z = 0; z < mZSize; z++) { 267 | CMatrix aTemp(mXSize,mYSize); 268 | getMatrix(aTemp,z); 269 | aConfidence.getMatrix(aConf,z); 270 | aTemp.downsample(aNewXSize,aNewYSize,aConf); 271 | for (int i = 0; i < aSize; i++) 272 | mData2[i+z*aSize] = aTemp.data()[i]; 273 | } 274 | delete[] mData; 275 | mData = mData2; 276 | mXSize = aNewXSize; 277 | mYSize = aNewYSize; 278 | } 279 | 280 | // upsample 281 | template 282 | void CTensor::upsample(int aNewXSize, int aNewYSize) { 283 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize]; 284 | int aSize = aNewXSize*aNewYSize; 285 | for (int z = 0; z < mZSize; z++) { 286 | CMatrix aTemp(mXSize,mYSize); 287 | getMatrix(aTemp,z); 288 | aTemp.upsample(aNewXSize,aNewYSize); 289 | for (int i = 0; i < aSize; i++) 290 | mData2[i+z*aSize] = aTemp.data()[i]; 291 | } 292 | delete[] mData; 293 | mData = mData2; 294 | mXSize = aNewXSize; 295 | mYSize = aNewYSize; 296 | } 297 | 298 | // upsampleBilinear 299 | template 300 | void CTensor::upsampleBilinear(int aNewXSize, int aNewYSize) { 301 | T* mData2 = new T[aNewXSize*aNewYSize*mZSize]; 302 | int aSize = aNewXSize*aNewYSize; 303 | for (int z = 0; z < mZSize; z++) { 304 | CMatrix aTemp(mXSize,mYSize); 305 | getMatrix(aTemp,z); 306 | aTemp.upsampleBilinear(aNewXSize,aNewYSize); 307 | for (int i = 0; i < aSize; i++) 308 | mData2[i+z*aSize] = aTemp.data()[i]; 309 | } 310 | delete[] mData; 311 | mData = mData2; 312 | mXSize = aNewXSize; 313 | mYSize = aNewYSize; 314 | } 315 | 316 | // fill 317 | template 318 | void CTensor::fill(const T aValue) { 319 | int wholeSize = mXSize*mYSize*mZSize; 320 | for (register int i = 0; i < wholeSize; i++) 321 | mData[i] = aValue; 322 | } 323 | 324 | // fillRect 325 | template 326 | void CTensor::fillRect(const CVector& aValue, int ax1, int ay1, int ax2, int ay2) { 327 | for (int z = 0; z < mZSize; z++) { 328 | T val = aValue(z); 329 | for (int y = int_max(0,ay1); y <= int_min(ySize()-1,ay2); y++) 330 | for (register int x = int_max(0,ax1); x <= int_min(xSize()-1,ax2); x++) 331 | operator()(x,y,z) = val; 332 | } 333 | } 334 | 335 | // cut 336 | template 337 | void CTensor::cut(CTensor& aResult, int x1, int y1, int z1, int x2, int y2, int z2) { 338 | aResult.mXSize = x2-x1+1; 339 | aResult.mYSize = y2-y1+1; 340 | aResult.mZSize = z2-z1+1; 341 | delete[] aResult.mData; 342 | aResult.mData = new T[aResult.mXSize*aResult.mYSize*aResult.mZSize]; 343 | for (int z = z1; z <= z2; z++) 344 | for (int y = y1; y <= y2; y++) 345 | for (int x = x1; x <= x2; x++) 346 | aResult(x-x1,y-y1,z-z1) = operator()(x,y,z); 347 | } 348 | 349 | // paste 350 | template 351 | void CTensor::paste(CTensor& aCopyFrom, int ax, int ay, int az) { 352 | for (int z = 0; z < aCopyFrom.zSize(); z++) 353 | for (int y = 0; y < aCopyFrom.ySize(); y++) 354 | for (int x = 0; x < aCopyFrom.xSize(); x++) 355 | operator()(ax+x,ay+y,az+z) = aCopyFrom(x,y,z); 356 | } 357 | 358 | // mirrorLayers 359 | template 360 | void CTensor::mirrorLayers(int aFrom, int aTo) { 361 | for (int z = 0; z < mZSize; z++) { 362 | int aToXIndex = mXSize-aTo-1; 363 | int aToYIndex = mYSize-aTo-1; 364 | int aFromXIndex = mXSize-aFrom-1; 365 | int aFromYIndex = mYSize-aFrom-1; 366 | for (int y = aFrom; y <= aFromYIndex; y++) { 367 | operator()(aTo,y,z) = operator()(aFrom,y,z); 368 | operator()(aToXIndex,y,z) = operator()(aFromXIndex,y,z); 369 | } 370 | for (int x = aTo; x <= aToXIndex; x++) { 371 | operator()(x,aTo,z) = operator()(x,aFrom,z); 372 | operator()(x,aToYIndex,z) = operator()(x,aFromYIndex,z); 373 | } 374 | } 375 | } 376 | 377 | // normalize 378 | template 379 | void CTensor::normalizeEach(T aMin, T aMax, T aInitialMin, T aInitialMax) { 380 | for (int k = 0; k < mZSize; k++) 381 | normalize(aMin,aMax,k,aInitialMin,aInitialMax); 382 | } 383 | 384 | template 385 | void CTensor::normalize(T aMin, T aMax, int aChannel, T aInitialMin, T aInitialMax) { 386 | int aChannelSize = mXSize*mYSize; 387 | T aCurrentMin = aInitialMax; 388 | T aCurrentMax = aInitialMin; 389 | int aIndex = aChannelSize*aChannel; 390 | for (int i = 0; i < aChannelSize; i++) { 391 | if (mData[aIndex] > aCurrentMax) aCurrentMax = mData[aIndex]; 392 | else if (mData[aIndex] < aCurrentMin) aCurrentMin = mData[aIndex]; 393 | aIndex++; 394 | } 395 | T aTemp1 = aCurrentMin - aMin; 396 | T aTemp2 = (aCurrentMax-aCurrentMin); 397 | if (aTemp2 == 0) aTemp2 = 1; 398 | else aTemp2 = (aMax-aMin)/aTemp2; 399 | aIndex = aChannelSize*aChannel; 400 | for (int i = 0; i < aChannelSize; i++) { 401 | mData[aIndex] -= aTemp1; 402 | mData[aIndex] *= aTemp2; 403 | aIndex++; 404 | } 405 | } 406 | 407 | // drawLine 408 | template 409 | void CTensor::drawLine(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1, T aValue2, T aValue3) { 410 | int aOffset1 = mXSize*mYSize; 411 | int aOffset2 = 2*aOffset1; 412 | // vertical line 413 | if (dStartX == dEndX) { 414 | if (dStartX < 0 || dStartX >= mXSize) return; 415 | int x = dStartX; 416 | if (dStartY < dEndY) { 417 | for (int y = dStartY; y <= dEndY; y++) 418 | if (y >= 0 && y < mYSize) { 419 | mData[x+y*mXSize] = aValue1; 420 | mData[x+y*mXSize+aOffset1] = aValue2; 421 | mData[x+y*mXSize+aOffset2] = aValue3; 422 | } 423 | } 424 | else { 425 | for (int y = dStartY; y >= dEndY; y--) 426 | if (y >= 0 && y < mYSize) { 427 | mData[x+y*mXSize] = aValue1; 428 | mData[x+y*mXSize+aOffset1] = aValue2; 429 | mData[x+y*mXSize+aOffset2] = aValue3; 430 | } 431 | } 432 | return; 433 | } 434 | // horizontal line 435 | if (dStartY == dEndY) { 436 | if (dStartY < 0 || dStartY >= mYSize) return; 437 | int y = dStartY; 438 | if (dStartX < dEndX) { 439 | for (int x = dStartX; x <= dEndX; x++) 440 | if (x >= 0 && x < mXSize) { 441 | mData[x+y*mXSize] = aValue1; 442 | mData[x+y*mXSize+aOffset1] = aValue2; 443 | mData[x+y*mXSize+aOffset2] = aValue3; 444 | } 445 | } 446 | else { 447 | for (int x = dStartX; x >= dEndX; x--) 448 | if (x >= 0 && x < mXSize) { 449 | mData[x+y*mXSize] = aValue1; 450 | mData[x+y*mXSize+aOffset1] = aValue2; 451 | mData[x+y*mXSize+aOffset2] = aValue3; 452 | } 453 | } 454 | return; 455 | } 456 | float m = float(dStartY - dEndY) / float(dStartX - dEndX); 457 | float invm = 1.0/m; 458 | if (fabs(m) > 1.0) { 459 | if (dEndY > dStartY) { 460 | for (int y = dStartY; y <= dEndY; y++) { 461 | int x = (int)(0.5+dStartX+(y-dStartY)*invm); 462 | if (x >= 0 && x < mXSize && y >= 0 && y < mYSize) { 463 | mData[x+y*mXSize] = aValue1; 464 | mData[x+y*mXSize+aOffset1] = aValue2; 465 | mData[x+y*mXSize+aOffset2] = aValue3; 466 | } 467 | } 468 | } 469 | else { 470 | for (int y = dStartY; y >= dEndY; y--) { 471 | int x = (int)(0.5+dStartX+(y-dStartY)*invm); 472 | if (x >= 0 && x < mXSize && y >= 0 && y < mYSize) { 473 | mData[x+y*mXSize] = aValue1; 474 | mData[x+y*mXSize+aOffset1] = aValue2; 475 | mData[x+y*mXSize+aOffset2] = aValue3; 476 | } 477 | } 478 | } 479 | } 480 | else { 481 | if (dEndX > dStartX) { 482 | for (int x = dStartX; x <= dEndX; x++) { 483 | int y = (int)(0.5+dStartY+(x-dStartX)*m); 484 | if (x >= 0 && x < mXSize && y >= 0 && y < mYSize) { 485 | mData[x+y*mXSize] = aValue1; 486 | mData[x+y*mXSize+aOffset1] = aValue2; 487 | mData[x+y*mXSize+aOffset2] = aValue3; 488 | } 489 | } 490 | } 491 | else { 492 | for (int x = dStartX; x >= dEndX; x--) { 493 | int y = (int)(0.5+dStartY+(x-dStartX)*m); 494 | if (x >= 0 && x < mXSize && y >= 0 && y < mYSize) { 495 | mData[x+y*mXSize] = aValue1; 496 | mData[x+y*mXSize+aOffset1] = aValue2; 497 | mData[x+y*mXSize+aOffset2] = aValue3; 498 | } 499 | } 500 | } 501 | } 502 | } 503 | 504 | // drawRect 505 | template 506 | void CTensor::drawRect(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1, T aValue2, T aValue3) { 507 | drawLine(dStartX,dStartY,dEndX,dStartY,aValue1,aValue2,aValue3); 508 | drawLine(dStartX,dEndY,dEndX,dEndY,aValue1,aValue2,aValue3); 509 | drawLine(dStartX,dStartY,dStartX,dEndY,aValue1,aValue2,aValue3); 510 | drawLine(dEndX,dStartY,dEndX,dEndY,aValue1,aValue2,aValue3); 511 | } 512 | 513 | template 514 | void CTensor::normalize(T aMin, T aMax, T aInitialMin, T aInitialMax) { 515 | int aSize = mXSize*mYSize*mZSize; 516 | T aCurrentMin = aInitialMax; 517 | T aCurrentMax = aInitialMin; 518 | for (int i = 0; i < aSize; i++) { 519 | if (mData[i] > aCurrentMax) aCurrentMax = mData[i]; 520 | else if (mData[i] < aCurrentMin) aCurrentMin = mData[i]; 521 | } 522 | T aTemp1 = aCurrentMin - aMin; 523 | T aTemp2 = (aCurrentMax-aCurrentMin); 524 | if (aTemp2 == 0) aTemp2 = 1; 525 | else aTemp2 = (aMax-aMin)/aTemp2; 526 | for (int i = 0; i < aSize; i++) { 527 | mData[i] -= aTemp1; 528 | mData[i] *= aTemp2; 529 | } 530 | } 531 | 532 | template 533 | void CTensor::rgbToCielab() { 534 | for (int y = 0; y < mYSize; y++) 535 | for (int x = 0; x < mXSize; x++) { 536 | float R = operator()(x,y,0)*0.003921569; 537 | float G = operator()(x,y,1)*0.003921569; 538 | float B = operator()(x,y,2)*0.003921569; 539 | if (R>0.0031308) R = pow((R + 0.055)*0.9478673, 2.4); else R *= 0.077399381; 540 | if (G>0.0031308) G = pow((G + 0.055)*0.9478673, 2.4); else G *= 0.077399381; 541 | if (B>0.0031308) B = pow((B + 0.055)*0.9478673, 2.4); else B *= 0.077399381; 542 | //Observer. = 2?, Illuminant = D65 543 | float X = R * 0.4124 + G * 0.3576 + B * 0.1805; 544 | float Y = R * 0.2126 + G * 0.7152 + B * 0.0722; 545 | float Z = R * 0.0193 + G * 0.1192 + B * 0.9505; 546 | X *= 1.052111; 547 | Z *= 0.918417; 548 | if (X > 0.008856) X = pow(X,0.33333333333); else X = 7.787*X + 0.137931034; 549 | if (Y > 0.008856) Y = pow(Y,0.33333333333); else Y = 7.787*Y + 0.137931034; 550 | if (Z > 0.008856) Z = pow(Z,0.33333333333); else Z = 7.787*Z + 0.137931034; 551 | operator()(x,y,0) = 1000.0*((295.8*Y) - 40.8)/255.0; 552 | operator()(x,y,1) = 128.0+637.5*(X-Y); 553 | operator()(x,y,2) = 128.0+255.0*(Y-Z); 554 | } 555 | } 556 | 557 | template 558 | void CTensor::cielabToRGB() { 559 | for (int y = 0; y < mYSize; y++) 560 | for (int x = 0; x < mXSize; x++) { 561 | float L = operator()(x,y,0)*0.255; 562 | float A = operator()(x,y,1); 563 | float B = operator()(x,y,2); 564 | float Y = (L+40.8)*0.00338066; 565 | float X = (A-128.0+637.5*Y)*0.0015686; 566 | float Z = (128.0+255.0*Y-B)*0.00392157; 567 | float temp = Y*Y*Y; 568 | if (temp > 0.008856) Y = temp; 569 | else Y = (Y-0.137931034)*0.12842; 570 | temp = X*X*X; 571 | if (temp > 0.008856) X = temp; 572 | else X = (X-0.137931034)*0.12842; 573 | temp = Z*Z*Z; 574 | if (temp > 0.008856) Z = temp; 575 | else Z = (Z-0.137931034)*0.12842; 576 | X *= 0.95047; 577 | Y *= 1.0; 578 | Z *= 1.08883; 579 | float r = 3.2406*X-1.5372*Y-0.4986*Z; 580 | float g = -0.9689*X+1.8758*Y+0.0415*Z; 581 | float b = 0.0557*X-0.204*Y+1.057*Z; 582 | if (r < 0) r = 0; 583 | temp = 1.055*pow(r,0.41667)-0.055; 584 | if (temp > 0.0031308) r = temp; 585 | else r *= 12.92; 586 | if (g < 0) g = 0; 587 | temp = 1.055*pow(g,0.41667)-0.055; 588 | if (temp > 0.0031308) g = temp; 589 | else g *= 12.92; 590 | if (b < 0) b = 0; 591 | temp = 1.055*pow(b,0.41667)-0.055; 592 | if (temp > 0.0031308) b = temp; 593 | else b *= 12.92; 594 | operator()(x,y,0) = 255.0*r; 595 | operator()(x,y,1) = 255.0*g; 596 | operator()(x,y,2) = 255.0*b; 597 | } 598 | } 599 | 600 | // applySimilarityTransform 601 | template 602 | void CTensor::applySimilarityTransform(CTensor& aWarped, CMatrix& aOutside, float tx, float ty, float cx, float cy, float phi, float scale) { 603 | float cosphi = scale*cos(phi); 604 | float sinphi = scale*sin(phi); 605 | int aSize = mXSize*mYSize; 606 | int aWarpedSize = aWarped.xSize()*aWarped.ySize(); 607 | float ctx = cx+tx-cx*cosphi+cy*sinphi; 608 | float cty = cy+ty-cy*cosphi-cx*sinphi; 609 | aOutside = false; 610 | int i = 0; 611 | for (int y = 0; y < aWarped.ySize(); y++) 612 | for (int x = 0; x < aWarped.xSize(); x++,i++) { 613 | float xf = x; float yf = y; 614 | float ax = xf*cosphi-yf*sinphi+ctx; 615 | float ay = yf*cosphi+xf*sinphi+cty; 616 | int x1 = (int)ax; int y1 = (int)ay; 617 | float alphaX = ax-x1; float alphaY = ay-y1; 618 | float betaX = 1.0-alphaX; float betaY = 1.0-alphaY; 619 | if (x1 < 0 || y1 < 0 || x1+1 >= mXSize || y1+1 >= mYSize) aOutside.data()[i] = true; 620 | else { 621 | int j = y1*mXSize+x1; 622 | for (int k = 0; k < mZSize; k++) { 623 | float a = betaX*mData[j] +alphaX*mData[j+1]; 624 | float b = betaX*mData[j+mXSize]+alphaX*mData[j+1+mXSize]; 625 | aWarped.data()[i+k*aWarpedSize] = betaY*a+alphaY*b; 626 | j += aSize; 627 | } 628 | } 629 | } 630 | } 631 | 632 | // applyHomography 633 | template 634 | void CTensor::applyHomography(CTensor& aWarped, CMatrix& aOutside, const CMatrix& H) { 635 | int aSize = mXSize*mYSize; 636 | int aWarpedSize = aWarped.xSize()*aWarped.ySize(); 637 | aOutside = false; 638 | int i = 0; 639 | for (int y = 0; y < aWarped.ySize(); y++) 640 | for (int x = 0; x < aWarped.xSize(); x++,i++) { 641 | float xf = x; float yf = y; 642 | float ax = H.data()[0]*xf+H.data()[1]*yf+H.data()[2]; 643 | float ay = H.data()[3]*xf+H.data()[4]*yf+H.data()[5]; 644 | float az = H.data()[6]*xf+H.data()[7]*yf+H.data()[8]; 645 | float invaz = 1.0/az; 646 | ax *= invaz; ay *= invaz; 647 | int x1 = (int)ax; int y1 = (int)ay; 648 | float alphaX = ax-x1; float alphaY = ay-y1; 649 | float betaX = 1.0-alphaX; float betaY = 1.0-alphaY; 650 | if (x1 < 0 || y1 < 0 || x1+1 >= mXSize || y1+1 >= mYSize) aOutside.data()[i] = true; 651 | else { 652 | int j = y1*mXSize+x1; 653 | for (int k = 0; k < mZSize; k++) { 654 | float a = betaX*mData[j] +alphaX*mData[j+1]; 655 | float b = betaX*mData[j+mXSize]+alphaX*mData[j+1+mXSize]; 656 | aWarped.data()[i+k*aWarpedSize] = betaY*a+alphaY*b; 657 | j += aSize; 658 | } 659 | } 660 | } 661 | } 662 | 663 | // ----------------------------------------------------------------------------- 664 | // File I/O 665 | // ----------------------------------------------------------------------------- 666 | 667 | // readFromMathematicaFile 668 | template 669 | void CTensor::readFromMathematicaFile(const char* aFilename) { 670 | using namespace std; 671 | // Read the whole file and store data in aData 672 | // Ignore blanks, tabs and lines 673 | // Also ignore Mathematica comments (* ... *) 674 | ifstream aStream(aFilename); 675 | string aData; 676 | char aChar; 677 | bool aBracketFound = false; 678 | bool aStarFound = false; 679 | bool aCommentFound = false; 680 | while (aStream.get(aChar)) 681 | if (aChar != ' ' && aChar != '\t' && aChar != '\n') { 682 | if (aCommentFound) { 683 | if (!aStarFound && aChar == '*') aStarFound = true; 684 | else { 685 | if (aStarFound && aChar == ')') aCommentFound = false; 686 | aStarFound = false; 687 | } 688 | } 689 | else { 690 | if (!aBracketFound && aChar == '(') aBracketFound = true; 691 | else { 692 | if (aBracketFound && aChar == '*') aCommentFound = true; 693 | else aData += aChar; 694 | aBracketFound = false; 695 | } 696 | } 697 | } 698 | // Count the number of braces and double braces to figure out z- and y-Size of tensor 699 | int aDoubleBraceCount = 0; 700 | int aBraceCount = 0; 701 | int aPos = 0; 702 | while ((aPos = aData.find_first_of('{',aPos)+1) > 0) { 703 | aBraceCount++; 704 | if (aData[aPos] == '{' && aData[aPos+1] != '{') aDoubleBraceCount++; 705 | } 706 | // Count the number of commas in the first section to figure out xSize of tensor 707 | int aCommaCount = 0; 708 | aPos = 0; 709 | while (aData[aPos] != '}') { 710 | if (aData[aPos] == ',') aCommaCount++; 711 | aPos++; 712 | } 713 | // Adapt size of tensor 714 | if (mData != 0) delete[] mData; 715 | mXSize = aCommaCount+1; 716 | mYSize = (aBraceCount-1-aDoubleBraceCount) / aDoubleBraceCount; 717 | mZSize = aDoubleBraceCount; 718 | mData = new T[mXSize*mYSize*mZSize]; 719 | // Analyse file --------------- 720 | aPos = 0; 721 | if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica"); 722 | aPos++; 723 | for (int z = 0; z < mZSize; z++) { 724 | if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica"); 725 | aPos++; 726 | for (int y = 0; y < mYSize; y++) { 727 | if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica"); 728 | aPos++; 729 | for (int x = 0; x < mXSize; x++) { 730 | int oldPos = aPos; 731 | if (x+1 < mXSize) aPos = aData.find_first_of(',',aPos); 732 | else aPos = aData.find_first_of('}',aPos); 733 | #ifdef GNU_COMPILER 734 | string s = aData.substr(oldPos,aPos-oldPos); 735 | istrstream is(s.c_str()); 736 | #else 737 | string s = aData.substr(oldPos,aPos-oldPos); 738 | istringstream is(s); 739 | #endif 740 | T aItem; 741 | is >> aItem; 742 | operator()(x,y,z) = aItem; 743 | aPos++; 744 | } 745 | if (y+1 < mYSize) { 746 | if (aData[aPos] != ',') throw EInvalidFileFormat("Mathematica"); 747 | aPos++; 748 | while (aData[aPos] != '{') 749 | aPos++; 750 | } 751 | } 752 | aPos++; 753 | if (z+1 < mZSize) { 754 | if (aData[aPos] != ',') throw EInvalidFileFormat("Mathematica"); 755 | aPos++; 756 | while (aData[aPos] != '{') 757 | aPos++; 758 | } 759 | } 760 | } 761 | 762 | // writeToMathematicaFile 763 | template 764 | void CTensor::writeToMathematicaFile(const char* aFilename) { 765 | using namespace std; 766 | ofstream aStream(aFilename); 767 | aStream << '{'; 768 | for (int z = 0; z < mZSize; z++) { 769 | aStream << '{'; 770 | for (int y = 0; y < mYSize; y++) { 771 | aStream << '{'; 772 | for (int x = 0; x < mXSize; x++) { 773 | aStream << operator()(x,y,z); 774 | if (x+1 < mXSize) aStream << ','; 775 | } 776 | aStream << '}'; 777 | if (y+1 < mYSize) aStream << ",\n"; 778 | } 779 | aStream << '}'; 780 | if (z+1 < mZSize) aStream << ",\n"; 781 | } 782 | aStream << '}'; 783 | } 784 | 785 | // readFromIMFile 786 | template 787 | void CTensor::readFromIMFile(const char* aFilename) { 788 | FILE *aStream; 789 | aStream = fopen(aFilename,"rb"); 790 | // Read image data 791 | for (int i = 0; i < mXSize*mYSize*mZSize; i++) 792 | mData[i] = getc(aStream); 793 | fclose(aStream); 794 | } 795 | 796 | // writeToIMFile 797 | template 798 | void CTensor::writeToIMFile(const char *aFilename) { 799 | FILE *aStream; 800 | aStream = fopen(aFilename,"wb"); 801 | // write data 802 | for (int i = 0; i < mXSize*mYSize*mZSize; i++) { 803 | char dummy = (char)mData[i]; 804 | fwrite(&dummy,1,1,aStream); 805 | } 806 | fclose(aStream); 807 | } 808 | 809 | // readFromPGM 810 | template 811 | void CTensor::readFromPGM(const char* aFilename) { 812 | FILE *aStream; 813 | aStream = fopen(aFilename,"rb"); 814 | if (aStream == 0) std::cerr << "File not found: " << aFilename << std::endl; 815 | int dummy; 816 | // Find beginning of file (P5) 817 | while (getc(aStream) != 'P'); 818 | if (getc(aStream) != '5') throw EInvalidFileFormat("PGM"); 819 | do 820 | dummy = getc(aStream); 821 | while (dummy != '\n' && dummy != ' '); 822 | // Remove comments and empty lines 823 | dummy = getc(aStream); 824 | while (dummy == '#') { 825 | while (getc(aStream) != '\n'); 826 | dummy = getc(aStream); 827 | } 828 | while (dummy == '\n') 829 | dummy = getc(aStream); 830 | // Read image size 831 | mXSize = dummy-48; 832 | while ((dummy = getc(aStream)) >= 48 && dummy < 58) 833 | mXSize = 10*mXSize+dummy-48; 834 | while ((dummy = getc(aStream)) < 48 || dummy >= 58); 835 | mYSize = dummy-48; 836 | while ((dummy = getc(aStream)) >= 48 && dummy < 58) 837 | mYSize = 10*mYSize+dummy-48; 838 | mZSize = 1; 839 | while (dummy != '\n' && dummy != ' ') 840 | dummy = getc(aStream); 841 | while (dummy != '\n' && dummy != ' ') 842 | dummy = getc(aStream); 843 | // Adjust size of data structure 844 | delete[] mData; 845 | mData = new T[mXSize*mYSize]; 846 | // Read image data 847 | for (int i = 0; i < mXSize*mYSize; i++) 848 | mData[i] = getc(aStream); 849 | fclose(aStream); 850 | } 851 | 852 | // writeToPGM 853 | template 854 | void CTensor::writeToPGM(const char* aFilename) { 855 | int rows = (int)floor(sqrt(mZSize)); 856 | int cols = (int)ceil(mZSize*1.0/rows); 857 | FILE* outimage = fopen(aFilename, "wb"); 858 | fprintf(outimage, "P5 \n"); 859 | fprintf(outimage, "%ld %ld \n255\n", cols*mXSize,rows*mYSize); 860 | for (int r = 0; r < rows; r++) 861 | for (int y = 0; y < mYSize; y++) 862 | for (int c = 0; c < cols; c++) 863 | for (int x = 0; x < mXSize; x++) { 864 | unsigned char aHelp; 865 | if (r*cols+c >= mZSize) aHelp = 0; 866 | else aHelp = (unsigned char)operator()(x,y,r*cols+c); 867 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 868 | } 869 | fclose(outimage); 870 | } 871 | 872 | // makeColorTensor 873 | template 874 | void CTensor::makeColorTensor() { 875 | if (mZSize != 1) return; 876 | int aSize = mXSize*mYSize; 877 | int a2Size = 2*aSize; 878 | T* aNewData = new T[aSize*3]; 879 | for (int i = 0; i < aSize; i++) 880 | aNewData[i] = aNewData[i+aSize] = aNewData[i+a2Size] = mData[i]; 881 | mZSize = 3; 882 | delete[] mData; 883 | mData = aNewData; 884 | } 885 | 886 | // readFromPPM 887 | template 888 | void CTensor::readFromPPM(const char* aFilename) { 889 | FILE *aStream; 890 | aStream = fopen(aFilename,"rb"); 891 | if (aStream == 0) 892 | std::cerr << "File not found: " << aFilename << std::endl; 893 | int dummy; 894 | // Find beginning of file (P6) 895 | while (getc(aStream) != 'P'); 896 | dummy = getc(aStream); 897 | if (dummy == '5') mZSize = 1; 898 | else if (dummy == '6') mZSize = 3; 899 | else throw EInvalidFileFormat("PPM"); 900 | do dummy = getc(aStream); while (dummy != '\n' && dummy != ' '); 901 | // Remove comments and empty lines 902 | dummy = getc(aStream); 903 | while (dummy == '#') { 904 | while (getc(aStream) != '\n'); 905 | dummy = getc(aStream); 906 | } 907 | while (dummy == '\n') 908 | dummy = getc(aStream); 909 | // Read image size 910 | mXSize = dummy-48; 911 | while ((dummy = getc(aStream)) >= 48 && dummy < 58) 912 | mXSize = 10*mXSize+dummy-48; 913 | while ((dummy = getc(aStream)) < 48 || dummy >= 58); 914 | mYSize = dummy-48; 915 | while ((dummy = getc(aStream)) >= 48 && dummy < 58) 916 | mYSize = 10*mYSize+dummy-48; 917 | while (dummy != '\n' && dummy != ' ') 918 | dummy = getc(aStream); 919 | while (dummy < 48 || dummy >= 58) dummy = getc(aStream); 920 | while ((dummy = getc(aStream)) >= 48 && dummy < 58); 921 | if (dummy != '\n') while (getc(aStream) != '\n'); 922 | // Adjust size of data structure 923 | delete[] mData; 924 | mData = new T[mXSize*mYSize*mZSize]; 925 | // Read image data 926 | int aSize = mXSize*mYSize; 927 | if (mZSize == 1) 928 | for (int i = 0; i < aSize; i++) 929 | mData[i] = getc(aStream); 930 | else { 931 | int aSizeTwice = aSize+aSize; 932 | for (int i = 0; i < aSize; i++) { 933 | mData[i] = getc(aStream); 934 | mData[i+aSize] = getc(aStream); 935 | mData[i+aSizeTwice] = getc(aStream); 936 | } 937 | } 938 | fclose(aStream); 939 | } 940 | 941 | // writeToPPM 942 | template 943 | void CTensor::writeToPPM(const char* aFilename) { 944 | FILE* outimage = fopen(aFilename, "wb"); 945 | fprintf(outimage, "P6 \n"); 946 | fprintf(outimage, "%d %d \n255\n", mXSize,mYSize); 947 | for (int y = 0; y < mYSize; y++) 948 | for (int x = 0; x < mXSize; x++) { 949 | unsigned char aHelp = (unsigned char)operator()(x,y,0); 950 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 951 | aHelp = (unsigned char)operator()(x,y,1); 952 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 953 | aHelp = (unsigned char)operator()(x,y,2); 954 | fwrite (&aHelp, sizeof(unsigned char), 1, outimage); 955 | } 956 | fclose(outimage); 957 | } 958 | 959 | // readFromPDM 960 | template 961 | void CTensor::readFromPDM(const char* aFilename) { 962 | std::ifstream aStream(aFilename); 963 | std::string s; 964 | // Read header 965 | aStream >> s; 966 | if (s != "P9") throw EInvalidFileFormat("PDM"); 967 | char aFeatureType; 968 | aStream >> aFeatureType; 969 | aStream >> s; 970 | aStream >> mXSize; 971 | aStream >> mYSize; 972 | aStream >> mZSize; 973 | aStream >> s; 974 | // Adjust size of data structure 975 | delete[] mData; 976 | mData = new T[mXSize*mYSize*mZSize]; 977 | // Read data 978 | for (int i = 0; i < mXSize*mYSize*mZSize; i++) 979 | aStream >> mData[i]; 980 | } 981 | 982 | // writeToPDM 983 | template 984 | void CTensor::writeToPDM(const char* aFilename, char aFeatureType) { 985 | std::ofstream aStream(aFilename); 986 | // write header 987 | aStream << "P9" << std::endl; 988 | aStream << aFeatureType << "SS" << std::endl; 989 | aStream << mZSize << ' ' << mYSize << ' ' << mXSize << std::endl; 990 | aStream << "F" << std::endl; 991 | // write data 992 | for (int i = 0; i < mXSize*mYSize*mZSize; i++) { 993 | aStream << mData[i]; 994 | if (i % 8 == 0) aStream << std::endl; 995 | else aStream << ' '; 996 | } 997 | } 998 | 999 | // operator () 1000 | template 1001 | inline T& CTensor::operator()(const int ax, const int ay, const int az) const { 1002 | #ifdef _DEBUG 1003 | if (ax >= mXSize || ay >= mYSize || az >= mZSize || ax < 0 || ay < 0 || az < 0) 1004 | throw ETensorRangeOverflow(ax,ay,az); 1005 | #endif 1006 | return mData[mXSize*(mYSize*az+ay)+ax]; 1007 | } 1008 | 1009 | template 1010 | CVector CTensor::operator()(const float ax, const float ay) const { 1011 | CVector aResult(mZSize); 1012 | int x1 = (int)ax; 1013 | int y1 = (int)ay; 1014 | int x2 = x1+1; 1015 | int y2 = y1+1; 1016 | #ifdef _DEBUG 1017 | if (x2 >= mXSize || y2 >= mYSize || x1 < 0 || y1 < 0) throw ETensorRangeOverflow(ax,ay,0); 1018 | #endif 1019 | float alphaX = ax-x1; float alphaXTrans = 1.0-alphaX; 1020 | float alphaY = ay-y1; float alphaYTrans = 1.0-alphaY; 1021 | for (int k = 0; k < mZSize; k++) { 1022 | float a = alphaXTrans*operator()(x1,y1,k)+alphaX*operator()(x2,y1,k); 1023 | float b = alphaXTrans*operator()(x1,y2,k)+alphaX*operator()(x2,y2,k); 1024 | aResult(k) = alphaYTrans*a+alphaY*b; 1025 | } 1026 | return aResult; 1027 | } 1028 | 1029 | // operator = 1030 | template 1031 | inline CTensor& CTensor::operator=(const T aValue) { 1032 | fill(aValue); 1033 | return *this; 1034 | } 1035 | 1036 | template 1037 | CTensor& CTensor::operator=(const CTensor& aCopyFrom) { 1038 | if (this != &aCopyFrom) { 1039 | delete[] mData; 1040 | if (aCopyFrom.mData == 0) { 1041 | mData = 0; mXSize = 0; mYSize = 0; mZSize = 0; 1042 | } 1043 | else { 1044 | mXSize = aCopyFrom.mXSize; 1045 | mYSize = aCopyFrom.mYSize; 1046 | mZSize = aCopyFrom.mZSize; 1047 | int wholeSize = mXSize*mYSize*mZSize; 1048 | mData = new T[wholeSize]; 1049 | for (register int i = 0; i < wholeSize; i++) 1050 | mData[i] = aCopyFrom.mData[i]; 1051 | } 1052 | } 1053 | return *this; 1054 | } 1055 | 1056 | // operator += 1057 | template 1058 | CTensor& CTensor::operator+=(const CTensor& aTensor) { 1059 | #ifdef _DEBUG 1060 | if (mXSize != aTensor.mXSize || mYSize != aTensor.mYSize || mZSize != aTensor.mZSize) 1061 | throw ETensorIncompatibleSize(mXSize,mYSize,mZSize); 1062 | #endif 1063 | int wholeSize = size(); 1064 | for (int i = 0; i < wholeSize; i++) 1065 | mData[i] += aTensor.mData[i]; 1066 | return *this; 1067 | } 1068 | 1069 | // operator += 1070 | template 1071 | CTensor& CTensor::operator+=(const T aValue) { 1072 | int wholeSize = mXSize*mYSize*mZSize; 1073 | for (int i = 0; i < wholeSize; i++) 1074 | mData[i] += aValue; 1075 | return *this; 1076 | } 1077 | 1078 | // operator *= 1079 | template 1080 | CTensor& CTensor::operator*=(const T aValue) { 1081 | int wholeSize = mXSize*mYSize*mZSize; 1082 | for (int i = 0; i < wholeSize; i++) 1083 | mData[i] *= aValue; 1084 | return *this; 1085 | } 1086 | 1087 | // min 1088 | template 1089 | T CTensor::min() const { 1090 | T aMin = mData[0]; 1091 | int aSize = mXSize*mYSize*mZSize; 1092 | for (int i = 1; i < aSize; i++) 1093 | if (mData[i] < aMin) aMin = mData[i]; 1094 | return aMin; 1095 | } 1096 | 1097 | // max 1098 | template 1099 | T CTensor::max() const { 1100 | T aMax = mData[0]; 1101 | int aSize = mXSize*mYSize*mZSize; 1102 | for (int i = 1; i < aSize; i++) 1103 | if (mData[i] > aMax) aMax = mData[i]; 1104 | return aMax; 1105 | } 1106 | 1107 | // avg 1108 | template 1109 | T CTensor::avg() const { 1110 | T aAvg = 0; 1111 | for (int z = 0; z < mZSize; z++) 1112 | aAvg += avg(z); 1113 | return aAvg/mZSize; 1114 | } 1115 | 1116 | template 1117 | T CTensor::avg(int az) const { 1118 | T aAvg = 0; 1119 | int aSize = mXSize*mYSize; 1120 | int aTemp = (az+1)*aSize; 1121 | for (int i = az*aSize; i < aTemp; i++) 1122 | aAvg += mData[i]; 1123 | return aAvg/aSize; 1124 | } 1125 | 1126 | // xSize 1127 | template 1128 | inline int CTensor::xSize() const { 1129 | return mXSize; 1130 | } 1131 | 1132 | // ySize 1133 | template 1134 | inline int CTensor::ySize() const { 1135 | return mYSize; 1136 | } 1137 | 1138 | // zSize 1139 | template 1140 | inline int CTensor::zSize() const { 1141 | return mZSize; 1142 | } 1143 | 1144 | // size 1145 | template 1146 | inline int CTensor::size() const { 1147 | return mXSize*mYSize*mZSize; 1148 | } 1149 | 1150 | // getMatrix 1151 | template 1152 | CMatrix CTensor::getMatrix(const int az) const { 1153 | CMatrix aTemp(mXSize,mYSize); 1154 | int aMatrixSize = mXSize*mYSize; 1155 | int aOffset = az*aMatrixSize; 1156 | for (int i = 0; i < aMatrixSize; i++) 1157 | aTemp.data()[i] = mData[i+aOffset]; 1158 | return aTemp; 1159 | } 1160 | 1161 | // getMatrix 1162 | template 1163 | void CTensor::getMatrix(CMatrix& aMatrix, const int az) const { 1164 | if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize) 1165 | throw ETensorIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),mXSize,mYSize); 1166 | int aMatrixSize = mXSize*mYSize; 1167 | int aOffset = az*aMatrixSize; 1168 | for (int i = 0; i < aMatrixSize; i++) 1169 | aMatrix.data()[i] = mData[i+aOffset]; 1170 | } 1171 | 1172 | // putMatrix 1173 | template 1174 | void CTensor::putMatrix(CMatrix& aMatrix, const int az) { 1175 | if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize) 1176 | throw ETensorIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),mXSize,mYSize); 1177 | int aMatrixSize = mXSize*mYSize; 1178 | int aOffset = az*aMatrixSize; 1179 | for (int i = 0; i < aMatrixSize; i++) 1180 | mData[i+aOffset] = aMatrix.data()[i]; 1181 | } 1182 | 1183 | // data() 1184 | template 1185 | inline T* CTensor::data() const { 1186 | return mData; 1187 | } 1188 | 1189 | // N O N - M E M B E R F U N C T I O N S -------------------------------------- 1190 | 1191 | // operator << 1192 | template 1193 | std::ostream& operator<<(std::ostream& aStream, const CTensor& aTensor) { 1194 | for (int z = 0; z < aTensor.zSize(); z++) { 1195 | for (int y = 0; y < aTensor.ySize(); y++) { 1196 | for (int x = 0; x < aTensor.xSize(); x++) 1197 | aStream << aTensor(x,y,z) << ' '; 1198 | aStream << std::endl; 1199 | } 1200 | aStream << std::endl; 1201 | } 1202 | return aStream; 1203 | } 1204 | 1205 | #endif 1206 | --------------------------------------------------------------------------------