├── example
    ├── marple8_01.ppm
    ├── marple8_02.ppm
    ├── marple8_03.ppm
    ├── marple8_04.ppm
    ├── marple8_05.ppm
    ├── seated-nude.jpg
    └── deepflow
    │   ├── forward_1_2.flo
    │   ├── forward_2_3.flo
    │   ├── forward_3_4.flo
    │   ├── forward_4_5.flo
    │   ├── backward_2_1.flo
    │   ├── backward_3_2.flo
    │   ├── backward_4_3.flo
    │   ├── backward_5_4.flo
    │   ├── reliable_1_2.pgm
    │   ├── reliable_2_1.pgm
    │   ├── reliable_2_3.pgm
    │   ├── reliable_3_2.pgm
    │   ├── reliable_3_4.pgm
    │   ├── reliable_4_3.pgm
    │   ├── reliable_4_5.pgm
    │   └── reliable_5_4.pgm
├── consistencyChecker
    ├── NMath.cpp
    ├── Makefile
    ├── consistencyChecker.cpp
    ├── NMath.h
    ├── CVector.h
    ├── CTensor4D.h
    └── CTensor.h
├── .gitignore
├── run-deepflow.sh
├── models
    └── download_models.sh
├── flowFileLoader.lua
├── makeOptFlow.sh
├── stylizeVideo.sh
├── LICENSE
├── lbfgs.lua
├── README.md
├── artistic_video_multiPass.lua
├── artistic_video.lua
└── artistic_video_core.lua


/example/marple8_01.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_01.ppm


--------------------------------------------------------------------------------
/example/marple8_02.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_02.ppm


--------------------------------------------------------------------------------
/example/marple8_03.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_03.ppm


--------------------------------------------------------------------------------
/example/marple8_04.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_04.ppm


--------------------------------------------------------------------------------
/example/marple8_05.ppm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/marple8_05.ppm


--------------------------------------------------------------------------------
/example/seated-nude.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/seated-nude.jpg


--------------------------------------------------------------------------------
/consistencyChecker/NMath.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/consistencyChecker/NMath.cpp


--------------------------------------------------------------------------------
/consistencyChecker/Makefile:
--------------------------------------------------------------------------------
1 | default:
2 | 	g++ -O3 -fPIC consistencyChecker.cpp NMath.cpp -I. -o consistencyChecker -L. 
3 | 
4 | 


--------------------------------------------------------------------------------
/example/deepflow/forward_1_2.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_1_2.flo


--------------------------------------------------------------------------------
/example/deepflow/forward_2_3.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_2_3.flo


--------------------------------------------------------------------------------
/example/deepflow/forward_3_4.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_3_4.flo


--------------------------------------------------------------------------------
/example/deepflow/forward_4_5.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/forward_4_5.flo


--------------------------------------------------------------------------------
/example/deepflow/backward_2_1.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_2_1.flo


--------------------------------------------------------------------------------
/example/deepflow/backward_3_2.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_3_2.flo


--------------------------------------------------------------------------------
/example/deepflow/backward_4_3.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_4_3.flo


--------------------------------------------------------------------------------
/example/deepflow/backward_5_4.flo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/backward_5_4.flo


--------------------------------------------------------------------------------
/example/deepflow/reliable_1_2.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_1_2.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_2_1.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_2_1.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_2_3.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_2_3.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_3_2.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_3_2.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_3_4.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_3_4.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_4_3.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_4_3.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_4_5.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_4_5.pgm


--------------------------------------------------------------------------------
/example/deepflow/reliable_5_4.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manuelruder/artistic-videos/HEAD/example/deepflow/reliable_5_4.pgm


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | out*.png
 3 | *.png
 4 | *.jpg
 5 | *.prototxt*
 6 | *.caffemodel
 7 | models/
 8 | !models/download_models.sh
 9 | deepflow2-static
10 | deepmatching-static
11 | consistencyChecker/consistencyChecker


--------------------------------------------------------------------------------
/run-deepflow.sh:
--------------------------------------------------------------------------------
 1 | if [ "$#" -ne 3 ]; then
 2 |   echo "This is an auxiliary script for makeOptFlow.sh. No need to call this script directly."
 3 |   exit 1
 4 | fi
 5 | if [ ! -f deepmatching-static ] && [ ! -f deepflow2-static ]; then
 6 |   echo "Place deepflow2-static and deepmatching-static in this directory."
 7 |   exit 1
 8 | fi
 9 | 
10 | ./deepmatching-static $1 $2 -nt 0 | ./deepflow2-static $1 $2 $3 -match


--------------------------------------------------------------------------------
/models/download_models.sh:
--------------------------------------------------------------------------------
1 | cd models
2 | wget -c https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt
3 | wget -c --no-check-certificate https://bethgelab.org/media/uploads/deeptextures/vgg_normalised.caffemodel
4 | wget -c http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel
5 | cd ..
6 | 


--------------------------------------------------------------------------------
/flowFileLoader.lua:
--------------------------------------------------------------------------------
 1 | require 'torch'
 2 | require 'image'
 3 | 
 4 | --[[
 5 |   Reads a flow field from a binary flow file.
 6 | 
 7 |    bytes   contents
 8 |     0-3     tag: "PIEH" in ASCII, which in little endian happens to be the float 202021.25
 9 |             (just a sanity check that floats are represented correctly)
10 |     4-7     width as an integer
11 |     8-11    height as an integer
12 |     12-end  data (width*height*2*4 bytes total)
13 | --]]
14 | local function flowFileLoader_load(fileName)
15 |   local flowFile = torch.DiskFile(fileName, 'r')
16 |   flowFile:binary()
17 |   flowFile:readFloat()
18 |   local W = flowFile:readInt()
19 |   local H = flowFile:readInt()
20 |   -- image.warp needs 2xHxW, and also expects (y, x) for some reason...
21 |   local flow = torch.Tensor(2, H, W)
22 |   local raw_flow = torch.data(flow)
23 |   local elems_in_dim = H * W
24 |   local storage = flowFile:readFloat(2 * elems_in_dim)
25 |   for y=0, H - 1 do
26 |     for x=0, W - 1 do
27 |       local shift = y * W + x
28 |       raw_flow[elems_in_dim + shift] = storage[2 * shift + 1]
29 |       raw_flow[shift] = storage[2 * shift + 2]
30 |     end
31 |   end
32 |   flowFile:close()
33 |   return flow
34 | end
35 | 
36 | return {
37 |   load = flowFileLoader_load
38 | }
39 | 


--------------------------------------------------------------------------------
/makeOptFlow.sh:
--------------------------------------------------------------------------------
 1 | # Specify the path to the optical flow utility here.
 2 | # Also check line 44 and 47 whether the arguments are in the correct order.
 3 | flowCommandLine="bash run-deepflow.sh"
 4 | 
 5 | if [ -z "$flowCommandLine" ]; then
 6 |   echo "Please open makeOptFlow.sh and specify the command line for computing the optical flow."
 7 |   exit 1
 8 | fi
 9 | 
10 | if [ ! -f ./consistencyChecker/consistencyChecker ]; then
11 |   if [ ! -f ./consistencyChecker/Makefile ]; then
12 |     echo "Consistency checker makefile not found."
13 |     exit 1
14 |   fi
15 |   cd consistencyChecker/
16 |   make
17 |   cd ..
18 | fi
19 | 
20 | filePattern=$1
21 | folderName=$2
22 | startFrame=${3:-1}
23 | stepSize=${4:-1}
24 | 
25 | if [ "$#" -le 1 ]; then
26 |    echo "Usage: ./makeOptFlow <filePattern> <outputFolder> [<startNumber> [<stepSize>]]"
27 |    echo -e "\tfilePattern:\tFilename pattern of the frames of the videos."
28 |    echo -e "\toutputFolder:\tOutput folder."
29 |    echo -e "\tstartNumber:\tThe index of the first frame. Default: 1"
30 |    echo -e "\tstepSize:\tThe step size to create long-term flow. Default: 1"
31 |    exit 1
32 | fi
33 | 
34 | i=$[$startFrame]
35 | j=$[$startFrame + $stepSize]
36 | 
37 | mkdir -p "${folderName}"
38 | 
39 | while true; do
40 |   file1=$(printf "$filePattern" "$i")
41 |   file2=$(printf "$filePattern" "$j")
42 |   if [ -a $file2 ]; then
43 |     if [ ! -f ${folderName}/forward_${i}_${j}.flo ]; then
44 |       eval $flowCommandLine "$file1" "$file2" "${folderName}/forward_${i}_${j}.flo"
45 |     fi
46 |     if [ ! -f ${folderName}/backward_${j}_${i}.flo ]; then
47 |       eval $flowCommandLine "$file2" "$file1" "${folderName}/backward_${j}_${i}.flo"
48 |     fi
49 |     ./consistencyChecker/consistencyChecker "${folderName}/backward_${j}_${i}.flo" "${folderName}/forward_${i}_${j}.flo" "${folderName}/reliable_${j}_${i}.pgm"
50 |     ./consistencyChecker/consistencyChecker "${folderName}/forward_${i}_${j}.flo" "${folderName}/backward_${j}_${i}.flo" "${folderName}/reliable_${i}_${j}.pgm"
51 |   else
52 |     break
53 |   fi
54 |   i=$[$i +1]
55 |   j=$[$j +1]
56 | done
57 | 


--------------------------------------------------------------------------------
/stylizeVideo.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | # Get a carriage return into `cr`
 3 | cr=`echo $'\n.'`
 4 | cr=${cr%.}
 5 | 
 6 | 
 7 | # Find out whether ffmpeg or avconv is installed on the system
 8 | FFMPEG=ffmpeg
 9 | command -v $FFMPEG >/dev/null 2>&1 || {
10 |   FFMPEG=avconv
11 |   command -v $FFMPEG >/dev/null 2>&1 || {
12 |     echo >&2 "This script requires either ffmpeg or avconv installed.  Aborting."; exit 1;
13 |   }
14 | }
15 | 
16 | if [ "$#" -le 1 ]; then
17 |    echo "Usage: ./stylizeVideo <path_to_video> <path_to_style_image>"
18 |    exit 1
19 | fi
20 | 
21 | # Parse arguments
22 | filename=$(basename "$1")
23 | extension="${filename##*.}"
24 | filename="${filename%.*}"
25 | filename=${filename//[%]/x}
26 | style_image=$2
27 | 
28 | # Create output folder
29 | mkdir -p $filename
30 | 
31 | 
32 | echo ""
33 | read -p "Which backend do you want to use? \
34 | For Nvidia GPU, use cudnn if available, otherwise nn. \
35 | For non-Nvidia GPU, use clnn. Note: You have to have the given backend installed in order to use it. [nn] $cr > " backend
36 | backend=${backend:-nn}
37 | 
38 | if [ "$backend" == "cudnn" ]; then
39 |   echo ""
40 |   read -p "This algorithm needs a lot of memory. \
41 |   For a resolution of 450x350 you'll need roughly 2GB VRAM. \
42 |   VRAM usage increases linear with resolution. \
43 |   Please enter a resolution at which the video should be processed, \
44 |   in the format w:h, or leave blank to use the original resolution $cr > " resolution
45 | elif [ "$backend" = "nn" ] || [ "$backend" = "clnn" ]; then
46 |   echo ""
47 |   read -p "This algorithm needs a lot of memory. \
48 |   For a resolution of 450x350 you'll need roughly 4GB VRAM. \
49 |   VRAM usage increases linear with resolution. \
50 |   Maximum recommended resolution with a Titan X 12GB: 960:540. \
51 |   Please enter a resolution at which the video should be processed, \
52 |   in the format w:h, or leave blank to use the original resolution $cr > " resolution
53 | else
54 |   echo "Unknown backend."
55 |   exit 1
56 | fi
57 | 
58 | # Save frames of the video as individual image files
59 | if [ -z $resolution ]; then
60 |   $FFMPEG -i $1 ${filename}/frame_%04d.ppm
61 |   resolution=default
62 | else
63 |   $FFMPEG -i $1 -vf scale=$resolution ${filename}/frame_%04d.ppm
64 | fi
65 | 
66 | echo ""
67 | read -p "How much do you want to weight the style reconstruction term? \
68 | Default value: 1e2 for a resolution of 450x350. Increase for a higher resolution. \
69 | [1e2] $cr > " style_weight
70 | style_weight=${style_weight:-1e2}
71 | 
72 | temporal_weight=1e3
73 | 
74 | echo ""
75 | read -p "Enter the zero-indexed ID of the GPU to use, or -1 for CPU mode (very slow!).\
76 |  [0] $cr > " gpu
77 | gpu=${gpu:-0}
78 | 
79 | echo ""
80 | echo "Computing optical flow. This may take a while..."
81 | bash makeOptFlow.sh ./${filename}/frame_%04d.ppm ./${filename}/flow_$resolution
82 | 
83 | # Perform style transfer
84 | th artistic_video.lua \
85 | -content_pattern ${filename}/frame_%04d.ppm \
86 | -flow_pattern ${filename}/flow_${resolution}/backward_[%d]_{%d}.flo \
87 | -flowWeight_pattern ${filename}/flow_${resolution}/reliable_[%d]_{%d}.pgm \
88 | -style_weight $style_weight \
89 | -temporal_weight $temporal_weight \
90 | -output_folder ${filename}/ \
91 | -style_image $style_image \
92 | -backend $backend \
93 | -gpu $gpu \
94 | -cudnn_autotune \
95 | -number_format %04d
96 | 
97 | # Create video from output images.
98 | $FFMPEG -i ${filename}/out-%04d.png ${filename}-stylized.$extension


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This code is for non-profit use only. Any commercial use is 
 2 | prohibited.
 3 | 
 4 | (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016
 5 | 
 6 | If you use this program, you should cite the following paper:
 7 | 
 8 | M. Ruder, A. Dosovitskiy, T. Brox (2016). "Artistic style transfer for videos". arXiv:1604.08610
 9 | 
10 | 
11 | 
12 | This code is partially based on the neural-style code by Justin Johnson,
13 | which is covered by the following copyright and permission notice:
14 | 
15 | ******************************************************************************
16 | The MIT License (MIT)
17 | 
18 | Copyright (c) 2015 Justin Johnson
19 | 
20 | Permission is hereby granted, free of charge, to any person obtaining a copy
21 | of this software and associated documentation files (the "Software"), to deal
22 | in the Software without restriction, including without limitation the rights
23 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
24 | copies of the Software, and to permit persons to whom the Software is
25 | furnished to do so, subject to the following conditions:
26 | 
27 | The above copyright notice and this permission notice shall be included in all
28 | copies or substantial portions of the Software.
29 | 
30 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
35 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 | SOFTWARE.
37 | ******************************************************************************
38 | 
39 | 
40 | 
41 | The present "lbfgs.lua" is a modified version of "lbfgs.lua" included in the
42 | Torch "Optimization package", which is covered by the following copyright and
43 | permission notice:
44 | 
45 | ******************************************************************************
46 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
47 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
48 | Copyright (c) 2011-2013 NYU (Clement Farabet)
49 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
50 | Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
51 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
52 | 
53 | All rights reserved.
54 | 
55 | Redistribution and use in source and binary forms, with or without
56 | modification, are permitted provided that the following conditions are met:
57 | 
58 | 1. Redistributions of source code must retain the above copyright
59 |    notice, this list of conditions and the following disclaimer.
60 | 
61 | 2. Redistributions in binary form must reproduce the above copyright
62 |    notice, this list of conditions and the following disclaimer in the
63 |    documentation and/or other materials provided with the distribution.
64 | 
65 | 3. Neither the names of NEC Laboratories American and IDIAP Research
66 |    Institute nor the names of its contributors may be used to endorse or
67 |    promote products derived from this software without specific prior
68 |    written permission.
69 | 
70 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
71 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
72 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
73 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
74 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
75 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
76 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
77 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
78 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
79 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
80 | POSSIBILITY OF SUCH DAMAGE.
81 | ******************************************************************************


--------------------------------------------------------------------------------
/consistencyChecker/consistencyChecker.cpp:
--------------------------------------------------------------------------------
  1 | // consistencyChecker
  2 | // Check consistency of forward flow via backward flow.
  3 | //
  4 | // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016
  5 | 
  6 | #include <algorithm>
  7 | #include <assert.h>
  8 | #include "CTensor.h"
  9 | #include "CFilter.h"
 10 | 
 11 | // Which certainty value motion boundaries should get. Value between 0 (uncertain) and 255 (certain).
 12 | #define MOTION_BOUNDARIE_VALUE 0
 13 | 
 14 | // The amount of gaussian smoothing that sould be applied. Set 0 to disable smoothing.
 15 | #define SMOOTH_STRENGH 0.8
 16 | 
 17 | // readMiddlebury
 18 | bool readMiddlebury(const char* filename, CTensor<float>& flow) {
 19 |   FILE *stream = fopen(filename, "rb");
 20 |   if (stream == 0) {
 21 |     std::cout << "Could not open " << filename << std::endl;
 22 |     return false;
 23 |   }
 24 |   float help;
 25 |   int dummy;
 26 |   dummy = fread(&help,sizeof(float),1,stream);
 27 |   int aXSize,aYSize;
 28 |   dummy = fread(&aXSize,sizeof(int),1,stream);
 29 |   dummy = fread(&aYSize,sizeof(int),1,stream);
 30 |   flow.setSize(aXSize,aYSize,2);
 31 |   for (int y = 0; y < flow.ySize(); y++)
 32 |     for (int x = 0; x < flow.xSize(); x++) {
 33 |       dummy = fread(&flow(x,y,0),sizeof(float),1,stream);
 34 |       dummy = fread(&flow(x,y,1),sizeof(float),1,stream);
 35 |     }
 36 |   fclose(stream);
 37 |   return true;
 38 | }
 39 | 
 40 | void checkConsistency(const CTensor<float>& flow1, const CTensor<float>& flow2, CMatrix<float>& reliable, int argc, char** args) {
 41 |   int xSize = flow1.xSize(), ySize = flow1.ySize();
 42 |   int size = xSize * ySize;
 43 |   CTensor<float> dx(xSize,ySize,2);
 44 |   CTensor<float> dy(xSize,ySize,2);
 45 |   CDerivative<float> derivative(3);
 46 |   NFilter::filter(flow1,dx,derivative,1,1);
 47 |   NFilter::filter(flow1,dy,1,derivative,1);
 48 |   CMatrix<float> motionEdge(xSize,ySize,0);
 49 |   for (int i = 0; i < size; i++) {
 50 |     motionEdge.data()[i] += dx.data()[i]*dx.data()[i];
 51 |     motionEdge.data()[i] += dx.data()[size+i]*dx.data()[size+i];
 52 |     motionEdge.data()[i] += dy.data()[i]*dy.data()[i];
 53 |     motionEdge.data()[i] += dy.data()[size+i]*dy.data()[size+i];
 54 |   }
 55 | 
 56 |   for (int ay = 0; ay < flow1.ySize(); ay++)
 57 |     for (int ax = 0; ax < flow1.xSize(); ax++) {
 58 |       float bx = ax+flow1(ax, ay, 0);
 59 |       float by = ay+flow1(ax, ay, 1);
 60 |       int x1 = floor(bx);
 61 |       int y1 = floor(by);
 62 |       int x2 = x1 + 1;
 63 |       int y2 = y1 + 1;
 64 |       if (x1 < 0 || x2 >= xSize || y1 < 0 || y2 >= ySize)
 65 |       { reliable(ax, ay) = 0.0f; continue; }
 66 |       float alphaX = bx-x1; float alphaY = by-y1;
 67 |       float a = (1.0-alphaX) * flow2(x1, y1, 0) + alphaX * flow2(x2, y1, 0);
 68 |       float b = (1.0-alphaX) * flow2(x1, y2, 0) + alphaX * flow2(x2, y2, 0);
 69 |       float u = (1.0-alphaY)*a+alphaY*b;
 70 |       a = (1.0-alphaX) * flow2(x1, y1, 1) + alphaX * flow2(x2, y1, 1);
 71 |       b = (1.0-alphaX) * flow2(x1, y2, 1) + alphaX * flow2(x2, y2, 1);
 72 |       float v = (1.0-alphaY)*a+alphaY*b;
 73 |       float cx = bx+u;
 74 |       float cy = by+v;
 75 |       float u2 = flow1(ax,ay,0);
 76 |       float v2 = flow1(ax,ay,1);
 77 |       if (((cx-ax) * (cx-ax) + (cy-ay) * (cy-ay)) >= 0.01*(u2*u2 + v2*v2 + u*u + v*v) + 0.5f) {
 78 |         // Set to a negative value so that when smoothing is applied the smoothing goes "to the outside".
 79 |         // Afterwards, we clip values below 0.
 80 |         reliable(ax, ay) = -255.0f;
 81 |         continue;
 82 |       }
 83 |       if (motionEdge(ax, ay) > 0.01 * (u2*u2+v2*v2) + 0.002f) {
 84 |         reliable(ax, ay) = MOTION_BOUNDARIE_VALUE;
 85 |         continue;
 86 |       }
 87 |     }
 88 | }
 89 | 
 90 | int main(int argc, char** args) {
 91 |   assert(argc >= 4);
 92 | 
 93 |   CTensor<float> flow1,flow2;
 94 |   readMiddlebury(args[1], flow1);
 95 |   readMiddlebury(args[2], flow2);
 96 |   
 97 |   assert(flow1.xSize() == flow2.xSize());
 98 |   assert(flow1.ySize() == flow2.ySize());
 99 |   
100 |   int xSize = flow1.xSize(), ySize = flow1.ySize();
101 |   
102 |   // Check consistency of forward flow via backward flow and exlucde motion boundaries
103 |   CMatrix<float> reliable(xSize, ySize, 255.0f);
104 |   checkConsistency(flow1, flow2, reliable, argc, args);
105 |   
106 |   if (SMOOTH_STRENGH > 0) {
107 |     CSmooth<float> smooth(SMOOTH_STRENGH, 2.0f);
108 |     NFilter::filter(reliable, smooth, smooth);
109 |   }
110 |   reliable.clip(0.0f, 255.0f);
111 | 
112 |   reliable.writeToPGM(args[3]);
113 | }


--------------------------------------------------------------------------------
/consistencyChecker/NMath.h:
--------------------------------------------------------------------------------
  1 | // NMath
  2 | // A collection of mathematical functions and numerical algorithms
  3 | //
  4 | // Author: Thomas Brox
  5 | 
  6 | #ifndef NMathH
  7 | #define NMathH
  8 | 
  9 | #include <math.h>
 10 | #include <stdlib.h>
 11 | #include <CVector.h>
 12 | #include <CMatrix.h>
 13 | 
 14 | namespace NMath {
 15 |   // Returns the faculty of a number
 16 |   int faculty(int n);
 17 |   // Computes the binomial coefficient of two numbers
 18 |   int binCoeff(const int n, const int k);
 19 |   // Returns the angle of the line connecting (x1,y1) with (y1,y2)
 20 |   float tangent(const float x1, const float y1, const float x2, const float y2);
 21 |   // Absolute for floating points
 22 |   inline float abs(const float aValue);
 23 |   // Computes min or max value of two numbers
 24 |   inline float min(float aVal1, float aVal2);
 25 |   inline float max(float aVal1, float aVal2);
 26 |   inline int min(int aVal1, int aVal2);
 27 |   inline int max(int aVal1, int aVal2);
 28 |   // Computes the sign of a value
 29 |   inline float sign(float aVal);
 30 |   // minmod function (see description in implementation)
 31 |   inline float minmod(float a, float b, float c);
 32 |   // Computes the difference between two angles respecting the cyclic property of an angle
 33 |   // The result is always between 0 and Pi
 34 |   float absAngleDifference(const float aFirstAngle, const float aSecondAngle);
 35 |   // Computes the difference between two angles aFirstAngle - aSecondAngle
 36 |   // respecting the cyclic property of an angle
 37 |   // The result ist between -Pi and Pi
 38 |   float angleDifference(const float aFirstAngle, const float aSecondAngle);
 39 |   // Computes the sum of two angles respecting the cyclic property of an angle
 40 |   // The result is between -Pi and Pi
 41 |   float angleSum(const float aFirstAngle, const float aSecondAngle);
 42 |   // Rounds to the nearest integer
 43 |   int round(const float aValue);
 44 |   // Computes the arctan with results between 0 and 2*Pi
 45 |   inline float arctan(float x, float y);
 46 | 
 47 |   // Computes [0,1] uniformly distributed random number
 48 |   inline float random();
 49 |   // Computes N(0,1) distributed random number
 50 |   inline float randomGauss();
 51 | 
 52 |   extern const float Pi;
 53 | 
 54 |   // Computes a principal axis transformation
 55 |   // Eigenvectors are in the rows of aEigenvectors
 56 |   void PATransformation(const CMatrix<float>& aMatrix, CVector<float>& aEigenvalues, CMatrix<float>& aEigenvectors, bool aOrdering = true);
 57 |   // Computes the principal axis backtransformation
 58 |   void PABacktransformation(const CMatrix<float>& aEigenVectors, const CVector<float>& aEigenValues, CMatrix<float>& aMatrix);
 59 |   // Computes a singular value decomposition A=USV^T
 60 |   // Input: U MxN matrix
 61 |   // Output: U MxN matrix, S NxN diagonal matrix, V NxN diagonal matrix
 62 |   void svd(CMatrix<float>& U, CMatrix<float>& S, CMatrix<float>& V, bool aOrdering = true, int aIterations = 20);
 63 |   // Reassembles A = USV^T, Result in U
 64 |   void svdBack(CMatrix<float>& U, const CMatrix<float>& S, const CMatrix<float>& V);
 65 |   // Applies the Householder method to A and b, i.e., A is transformed into an upper triangular matrix
 66 |   void householder(CMatrix<float>& A, CVector<float>& b);
 67 |   // Computes least squares solution of an overdetermined linear system Ax=b using the Householder method
 68 |   CVector<float> leastSquares(CMatrix<float>& A, CVector<float>& b);
 69 |   // Inverts a square matrix by eigenvalue decomposition,
 70 |   // eigenvalues smaller than aReg are replaced by aReg
 71 |   void invRegularized(CMatrix<float>& A, int aReg);
 72 |   // Given a positive-definite symmetric matrix A, this routine constructs A = LL^T.
 73 |   // Only the upper triangle of A need be given. L is returned in the lower triangle.
 74 |   void cholesky(CMatrix<float>& A);
 75 |   // Solves L*aOut = aIn when L is a lower triangular matrix (e.g. result from cholesky)
 76 |   void triangularSolve(CMatrix<float>& L, CVector<float>& aIn, CVector<float>& aOut);
 77 |   void triangularSolve(CMatrix<float>& L, CMatrix<float>& aIn, CMatrix<float>& aOut);
 78 |   // Solves L^T*aOut = aIn when L is a lower triangular matrix (e.g. result from cholesky)
 79 |   void triangularSolveTransposed(CMatrix<float>& L, CVector<float>& aIn, CVector<float>& aOut);
 80 |   void triangularSolveTransposed(CMatrix<float>& L, CMatrix<float>& aIn, CMatrix<float>& aOut);
 81 |   // Computes the inverse of a matrix, given its cholesky decomposition L (lower triangle)
 82 |   void choleskyInv(const CMatrix<float>& L, CMatrix<float>& aInv);
 83 |   // Creates the rotation matrix RzRyRx and extends it to a 4x4 RBM matrix with translation 0
 84 |   void eulerAngles(float rx, float ry, float rz, CMatrix<float>& A);
 85 |   // Transforms a rigid body motion in matrix representation to a twist representation
 86 |   void RBM2Twist(CVector<float> &T, CMatrix<float>& RBM); 
 87 | }
 88 | 
 89 | // I M P L E M E N T A T I O N -------------------------------------------------
 90 | // Inline functions have to be implemented directly in the header file
 91 | 
 92 | namespace NMath {
 93 | 
 94 |   // abs
 95 |   inline float abs(const float aValue) {
 96 |     if (aValue >= 0) return aValue;
 97 |     else return -aValue;
 98 |   }
 99 | 
100 |   // min
101 |   inline float min(float aVal1, float aVal2) {
102 |     if (aVal1 < aVal2) return aVal1;
103 |     else return aVal2;
104 |   }
105 | 
106 |   // max
107 |   inline float max(float aVal1, float aVal2) {
108 |     if (aVal1 > aVal2) return aVal1;
109 |     else return aVal2;
110 |   }
111 | 
112 |   // min
113 |   inline int min(int aVal1, int aVal2) {
114 |     if (aVal1 < aVal2) return aVal1;
115 |     else return aVal2;
116 |   }
117 | 
118 |   // max
119 |   inline int max(int aVal1, int aVal2) {
120 |     if (aVal1 > aVal2) return aVal1;
121 |     else return aVal2;
122 |   }
123 | 
124 |   // sign
125 |   inline float sign(float aVal) {
126 |     if (aVal > 0) return 1.0;
127 |     else return -1.0;
128 |   }
129 | 
130 |   // minmod function:
131 |   //     0,                       if any of the a, b, c are 0 or of opposite sign
132 |   //     sign(a) min(|a|,|b|,|c|) else
133 |   inline float minmod(float a, float b, float c) {
134 |     if ((sign(a) == sign(b)) && (sign(b) == sign(c)) && (a != 0.0)) {
135 |       float aMin = fabs(a);
136 |       if (fabs(b) < aMin) aMin = fabs(b);
137 |       if (fabs(c) < aMin) aMin = fabs(c);
138 |       return sign(a)*aMin;
139 |     }
140 |     else return 0.0;
141 |   }
142 | 
143 |   // arctan
144 |   inline float arctan(float x, float y) {
145 |     if (x == 0.0)
146 |       if (y >= 0.0) return 0.5 * 3.1415926536;
147 |       else return 1.5 * 3.1415926536;
148 |     else if (x > 0.0)
149 |       if (y >= 0.0) return atan (y/x);
150 |       else return 2.0 * 3.1415926536 + atan (y/x);
151 |     else return 3.1415926536 + atan (y/x);
152 |   }
153 | 
154 |   // random
155 |   inline float random() {
156 |     return (float)rand()/RAND_MAX;
157 |   }
158 | 
159 |   // randomGauss
160 |   inline float randomGauss() {
161 |     // Draw two [0,1]-uniformly distributed numbers a and b
162 |     float a = random();
163 |     float b = random();
164 |     // assemble a N(0,1) number c according to Box-Muller */
165 |     if (a > 0.0) return sqrt(-2.0*log(a)) * cos(2.0*3.1415926536*b);
166 |     else return 0;
167 |   }
168 | 
169 | }
170 | #endif
171 | 


--------------------------------------------------------------------------------
/lbfgs.lua:
--------------------------------------------------------------------------------
  1 | --[[ An implementation of L-BFGS, heavily inspired by minFunc (Mark Schmidt)
  2 | 
  3 | This implementation of L-BFGS relies on a user-provided line
  4 | search function (state.lineSearch). If this function is not
  5 | provided, then a simple learningRate is used to produce fixed
  6 | size steps. Fixed size steps are much less costly than line
  7 | searches, and can be useful for stochastic problems.
  8 | 
  9 | The learning rate is used even when a line search is provided.
 10 | This is also useful for large-scale stochastic problems, where
 11 | opfunc is a noisy approximation of f(x). In that case, the learning
 12 | rate allows a reduction of confidence in the step size.
 13 | 
 14 | ARGS:
 15 | 
 16 | - `opfunc` : a function that takes a single input (X), the point of
 17 |          evaluation, and returns f(X) and df/dX
 18 | - `x` : the initial point
 19 | - `state` : a table describing the state of the optimizer; after each
 20 |          call the state is modified
 21 | - `state.maxIter` : Maximum number of iterations allowed
 22 | - `state.maxEval` : Maximum number of function evaluations
 23 | - `state.tolFun` : Termination tolerance on the first-order optimality
 24 | - `state.tolX` : Termination tol on progress in terms of func/param changes
 25 | - `state.lineSearch` : A line search function
 26 | - `state.learningRate` : If no line search provided, then a fixed step size is used
 27 | 
 28 | RETURN:
 29 | - `x*` : the new `x` vector, at the optimal point
 30 | - `f`  : a table of all function values: 
 31 |      `f[1]` is the value of the function before any optimization and
 32 |      `f[#f]` is the final fully optimized value, at `x*`
 33 | 
 34 | (Clement Farabet, 2012)
 35 | ]]
 36 | function optim.lbfgs(opfunc, x, config, state)
 37 |    -- get/update state
 38 |    local config = config or {}
 39 |    local state = state or config
 40 |    local maxIter = tonumber(config.maxIter) or 20
 41 |    local maxEval = tonumber(config.maxEval) or maxIter*1.25
 42 |    local tolFun = config.tolFun or 1e-5
 43 |    local tolFunRelative = tonumber(config.tolFunRelative) or 0
 44 |    local tolFunRelativeInterval = tonumber(config.tolFunRelativeInterval) or 100
 45 |    local tolX = config.tolX or 1e-9
 46 |    local nCorrection = config.nCorrection or 100
 47 |    local lineSearch = config.lineSearch
 48 |    local lineSearchOpts = config.lineSearchOptions
 49 |    local learningRate = config.learningRate or 1
 50 |    local isverbose = config.verbose or false
 51 |    
 52 |    state.funcEval = state.funcEval or 0
 53 |    state.nIter = state.nIter or 0
 54 | 
 55 |    -- verbose function
 56 |    local verbose
 57 |    if isverbose then
 58 |       verbose = function(...) print('<optim.lbfgs> ', ...) end
 59 |    else
 60 |       verbose = function() end
 61 |    end
 62 | 
 63 |    -- import some functions
 64 |    local abs = math.abs
 65 |    local min = math.min
 66 | 
 67 |    -- evaluate initial f(x) and df/dx
 68 |    local f,g = opfunc(x)
 69 |    local f_hist = {f}
 70 |    local currentFuncEval = 1
 71 |    state.funcEval = state.funcEval + 1
 72 |    local p = g:size(1)
 73 | 
 74 |    -- check optimality of initial point
 75 |    state.tmp1 = state.tmp1 or g.new(g:size()):zero(); local tmp1 = state.tmp1
 76 |    tmp1:copy(g):abs()
 77 |    if tmp1:sum() <= tolFun then
 78 |       -- optimality condition below tolFun
 79 |       verbose('optimality condition below tolFun')
 80 |       return x,f_hist
 81 |    end
 82 | 
 83 |    if not state.dir_bufs then
 84 |       -- reusable buffers for y's and s's, and their histories
 85 |       verbose('creating recyclable direction/step/history buffers')
 86 |       state.dir_bufs = state.dir_bufs or g.new(nCorrection+1, p):split(1)
 87 |       state.stp_bufs = state.stp_bufs or g.new(nCorrection+1, p):split(1)
 88 |       for i=1,#state.dir_bufs do
 89 |          state.dir_bufs[i] = state.dir_bufs[i]:squeeze(1)
 90 |          state.stp_bufs[i] = state.stp_bufs[i]:squeeze(1)
 91 |       end
 92 |    end
 93 | 
 94 |    -- variables cached in state (for tracing)
 95 |    local d = state.d
 96 |    local t = state.t
 97 |    local old_dirs = state.old_dirs
 98 |    local old_stps = state.old_stps
 99 |    local Hdiag = state.Hdiag
100 |    local g_old = state.g_old
101 |    local f_old = state.f_old
102 |    local f_past = nil
103 | 
104 |    -- optimize for a max of maxIter iterations
105 |    local nIter = 0
106 |    while nIter < maxIter do
107 |       -- keep track of nb of iterations
108 |       nIter = nIter + 1
109 |       state.nIter = state.nIter + 1
110 | 
111 |       ------------------------------------------------------------
112 |       -- compute gradient descent direction
113 |       ------------------------------------------------------------
114 |       if state.nIter == 1 then
115 |          d = g:clone():mul(-1) -- -g
116 |          old_dirs = {}
117 |          old_stps = {}
118 |          Hdiag = 1
119 |       else
120 |          -- do lbfgs update (update memory)
121 |          local y = table.remove(state.dir_bufs)  -- pop
122 |          local s = table.remove(state.stp_bufs)
123 |          y:add(g, -1, g_old)  -- g - g_old
124 |          s:mul(d, t)          -- d*t
125 |          local ys = y:dot(s)  -- y*s
126 |          if ys > 1e-10 then
127 |             -- updating memory
128 |             if #old_dirs == nCorrection then
129 |                -- shift history by one (limited-memory)
130 |                local removed1 = table.remove(old_dirs, 1)
131 |                local removed2 = table.remove(old_stps, 1)
132 |                table.insert(state.dir_bufs, removed1)
133 |                table.insert(state.stp_bufs, removed2)
134 |             end
135 | 
136 |             -- store new direction/step
137 |             table.insert(old_dirs, s)
138 |             table.insert(old_stps, y)
139 | 
140 |             -- update scale of initial Hessian approximation
141 |             Hdiag = ys / y:dot(y)  -- (y*y)
142 |          else
143 |             -- put y and s back into the buffer pool
144 |             table.insert(state.dir_bufs, y)
145 |             table.insert(state.stp_bufs, s)
146 |          end
147 | 
148 |          -- compute the approximate (L-BFGS) inverse Hessian 
149 |          -- multiplied by the gradient
150 |          local k = #old_dirs
151 | 
152 |          -- need to be accessed element-by-element, so don't re-type tensor:
153 |          state.ro = state.ro or torch.Tensor(nCorrection); local ro = state.ro
154 |          for i = 1,k do
155 |             ro[i] = 1 / old_stps[i]:dot(old_dirs[i])
156 |          end
157 | 
158 |          -- iteration in L-BFGS loop collapsed to use just one buffer
159 |          local q = tmp1  -- reuse tmp1 for the q buffer
160 |          -- need to be accessed element-by-element, so don't re-type tensor:
161 |          state.al = state.al or torch.zeros(nCorrection) local al = state.al
162 | 
163 |          q:mul(g, -1)  -- -g
164 |          for i = k,1,-1 do
165 |             al[i] = old_dirs[i]:dot(q) * ro[i]
166 |             q:add(-al[i], old_stps[i])
167 |          end
168 | 
169 |          -- multiply by initial Hessian
170 |          r = d  -- share the same buffer, since we don't need the old d
171 |          r:mul(q, Hdiag)  -- q[1] * Hdiag
172 |          for i = 1,k do
173 |             local be_i = old_stps[i]:dot(r) * ro[i]
174 |             r:add(al[i]-be_i, old_dirs[i])
175 |          end
176 |          -- final direction is in r/d (same object)
177 |       end
178 |       g_old = g_old or g:clone()
179 |       g_old:copy(g)
180 |       f_old = f
181 | 
182 |       ------------------------------------------------------------
183 |       -- compute step length
184 |       ------------------------------------------------------------
185 |       -- directional derivative
186 |       local gtd = g:dot(d)  -- g * d
187 | 
188 |       -- check that progress can be made along that direction
189 |       if gtd > -tolX then
190 |          break
191 |       end
192 | 
193 |       -- reset initial guess for step size
194 |       if state.nIter == 1 then
195 |          tmp1:copy(g):abs()
196 |          t = min(1,1/tmp1:sum()) * learningRate
197 |       else
198 |          t = learningRate
199 |       end
200 | 
201 |       -- optional line search: user function
202 |       local lsFuncEval = 0
203 |       if lineSearch and type(lineSearch) == 'function' then
204 |          -- perform line search, using user function
205 |          f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,lineSearchOpts)
206 |          table.insert(f_hist, f)
207 |       else
208 |          -- no line search, simply move with fixed-step
209 |          x:add(t,d)
210 |          if nIter ~= maxIter then
211 |             -- re-evaluate function only if not in last iteration
212 |             -- the reason we do this: in a stochastic setting,
213 |             -- no use to re-evaluate that function here
214 |             f,g = opfunc(x)
215 |             lsFuncEval = 1
216 |             table.insert(f_hist, f)
217 |          end
218 |       end
219 | 
220 |       -- update func eval
221 |       currentFuncEval = currentFuncEval + lsFuncEval
222 |       state.funcEval = state.funcEval + lsFuncEval
223 | 
224 |       ------------------------------------------------------------
225 |       -- check conditions
226 |       ------------------------------------------------------------
227 |       if nIter == maxIter then
228 |          -- no use to run tests
229 |          verbose('reached max number of iterations')
230 |          break
231 |       end
232 | 
233 |       if currentFuncEval >= maxEval then
234 |          -- max nb of function evals
235 |          verbose('max nb of function evals')
236 |          break
237 |       end
238 | 
239 |       tmp1:copy(g):abs()
240 |       if tmp1:sum() <= tolFun then
241 |          -- check optimality
242 |          verbose('optimality condition below tolFun')
243 |          break
244 |       end
245 | 
246 |       tmp1:copy(d):mul(t):abs()
247 |       if tmp1:sum() <= tolX then
248 |          -- step size below tolX
249 |          verbose('step size below tolX')
250 |          break
251 |       end
252 | 
253 |       if abs(f-f_old) < tolX then
254 |          -- function value changing less than tolX
255 |          verbose('function value changing less than tolX')
256 |          break
257 |       end
258 |       
259 |       if nIter % tolFunRelativeInterval == 0 then
260 |         if f_past ~= nil and (abs(f-f_past) / f_past) < tolFunRelative then
261 |           verbose('relative change in function value is less than tolFunRelative')
262 |           break
263 |         end
264 |         f_past = f
265 |       end
266 | 	
267 |    end
268 | 
269 |    -- save state
270 |    state.old_dirs = old_dirs
271 |    state.old_stps = old_stps
272 |    state.Hdiag = Hdiag
273 |    state.g_old = g_old
274 |    state.f_old = f_old
275 |    state.t = t
276 |    state.d = d
277 | 
278 |    -- return optimal x, and history of f(x)
279 |    return x,f_hist,currentFuncEval
280 | end
281 | 
282 | return {
283 |   optimize = optim.lbfgs
284 | }
285 |   


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # artistic-videos
  2 | 
  3 | This is the torch implementation for the paper "[Artistic style transfer for videos](http://arxiv.org/abs/1604.08610)", based on neural-style code by Justin Johnson https://github.com/jcjohnson/neural-style .
  4 | 
  5 | Our algorithm allows to transfer the style from one image (for example, a painting) to a whole video sequence and generates consistent and stable stylized video sequences.
  6 | 
  7 | **UPDATE:** A much faster version which runs in under one second per frame is avaliable at [fast-artistic-videos](https://github.com/manuelruder/fast-artistic-videos), but it only works for precomputed style templates. This repository allows arbitrary styles, but needs several minutes per frame.
  8 | 
  9 | **Example video:**
 10 | 
 11 | [![Artistic style transfer for videos](http://img.youtube.com/vi/Khuj4ASldmU/0.jpg)](https://www.youtube.com/watch?v=Khuj4ASldmU "Artistic style transfer for videos")
 12 | 
 13 | ## Contact
 14 | 
 15 | For issues or questions related to this implementation, please use the [issue tracker](https://github.com/manuelruder/artistic-videos/issues).
 16 | For everything else, including licensing issues, please email us. Our contact details can be found in [our paper](http://arxiv.org/pdf/1604.08610.pdf).
 17 | 
 18 | ## Setup
 19 | 
 20 | Tested with Ubuntu 14.04.
 21 | 
 22 | * Install torch7, loadcaffe and the CUDA backend (otherwise you have to use CPU mode which is horribly slow) and download the VGG model, as described by jcjohnson: [neural-style#setup](https://github.com/jcjohnson/neural-style#setup). Optional: Install cuDNN. This requires registration as a developer with NVIDIA, but significantly reduces memory usage. For non-Nvidia GPUs you can also use the OpenCL backend.
 23 | * To use the temporal consistency constraints, you need an utility which estimates the [optical flow](https://en.wikipedia.org/wiki/Optical_flow) between two images. You can use [DeepFlow](http://lear.inrialpes.fr/src/deepflow/) which we also used in our paper. In this case, just download both DeepFlow and DeepMatching (CPU version) from their website and place the static binaries (`deepmatching-static` and `deepflow2-static`) in the main directory of this repository. Then, the scripts included in this repository can be used to generate the optical flow for all frames as well as the certainty of the flow field. If you want to use a different optical flow algorithm, specify the path to your optical flow utility in the first line of `makeOptFlow.sh`; the flow files have to be created in the [middlebury file format](http://vision.middlebury.edu/flow/code/flow-code/README.txt).
 24 | 
 25 | ## Requirements
 26 | 
 27 | A fast GPU with a large amount of video memory is recommended to execute this script. The ability to run in CPU mode is impractical due to the enormous running time.
 28 | 
 29 | For a resolution of 450x350, you will need at least a 4GB GPU (around 3,5 GB memory usage). If you use cuDNN, a 2GB GPU is sufficient (around 1,7GB memory usage). Memory usage scales linearly with resolution, so if you experience an out of memory error, downscale the video.
 30 | 
 31 | Other ways to reduce memory footprint are to use the ADAM optimizer instead of L-BFGS and/or to use the NIN Imagenet model instead of VGG-19. However, we didn't test our method with either of these and you will likely get inferior results.
 32 | 
 33 | ## Simple style transfer
 34 | 
 35 | To perform style transfer with mostly the default parameters, execute `stylizeVideo.sh <path_to_video> <path_to_style_image>`. This script will perform all the steps necessary to create a stylized version of the video. Note: You have to have ffmpeg (or libav-tools for Ubuntu 14.10 and earlier) installed.
 36 | 
 37 | A more advanced version of this script can be found in NameRX's fork which computes optical flow in parallel to the video stylization for improved performance: [NameRX/artistic-videos](https://github.com/NameRX/artistic-videos)
 38 | 
 39 | ## FAQ
 40 | 
 41 | See [here](https://github.com/manuelruder/artistic-videos/issues?q=label%3Aquestion) for a list of frequently asked question.
 42 | 
 43 | ## Advanced Usage
 44 | 
 45 | Please read the script `stylizeVideo.sh` to see which steps you have to perform in advance exactly. Basically you have to save the frames of the video as individual image files and you have to compute the optical flow between all adjacent frames as well as the certainty of the flow field (both can be accomplished with `makeOptFlow.sh`).
 46 | 
 47 | There are two versions of this algorithm, a single-pass and a multi-pass version. The multi-pass version yields better results in case of strong camera motion, but needs more iterations per frame.
 48 | 
 49 | Basic usage:
 50 | 
 51 | ```
 52 | th artistic_video.lua <arguments> [-args <fileName>]
 53 | ```
 54 | 
 55 | ```
 56 | th artistic_video_multiPass.lua <arguments> [-args <fileName>]
 57 | ```
 58 | 
 59 | Arguments can be given by command line and/or written in a file with one argument per line. Specify the path to this file through the option `-args`. Arguments given by command line will override arguments written in this file.
 60 | 
 61 | **Basic arguments**:
 62 | * `-style_image`: The style image.
 63 | * `-content_pattern`: A file path pattern for the individual frames of the videos, for example `frame_%04d.png`.
 64 | * `-num_images`: The number of frames. Set to `0` to process all available frames.
 65 | * `-start_number`: The index of the first frame. Default: 1
 66 | * `-gpu`: Zero-indexed ID of the GPU to use; for CPU mode set `-gpu` to -1.
 67 | 
 68 | **Arguments for the single-pass algorithm** (only present in `artistic_video.lua`)
 69 | * `-flow_pattern`: A file path pattern for files that store the backward flow between the frames. The placeholder in square brackets refers to the frame position where the optical flow starts and the placeholder in braces refers to the frame index where the optical flow points to. For example `flow_[%02d]_{%02d}.flo` means the flow files are named *flow_02_01.flo*, *flow_03_02.flo*, etc. If you use the script included in this repository (makeOptFlow.sh), the filename pattern will be `backward_[%d]_{%d}.flo`.
 70 | * `-flowWeight_pattern`: A file path pattern for the weights / certainty of the flow field. These files should be a grey scale image where a white pixel indicates a high flow weight and a black pixel a low weight, respective. Same format as above. If you use the script, the filename pattern will be `reliable_[%d]_{%d}.pgm`.
 71 | * `-flow_relative_indices`: The indices for the long-term consistency constraint as comma-separated list. Indices should be relative to the current frame. For example `1,2,4` means it uses frames *i-1*,*i-2* and *i-4* warped for current frame at position *i* as consistency constraint. Default value is 1 which means it uses short-term consistency only. If you use non-default values, you have to compute the corresponding long-term flow as well.
 72 | 
 73 | **Arguments for the multi-pass algorithm** (only present in `artistic_video_multiPass.lua`)
 74 | * `-forwardFlow_pattern`: A file path pattern for the forward flow. Same format as in `-flow_pattern`.
 75 | * `-backwardFlow_pattern`: A file path pattern for the backward flow. Same format as above.
 76 | * `-forwardFlow_weight_pattern`: A file path pattern for the forward-flow. Same format as above.
 77 | * `-backwardFlow_weight_pattern`: A file path pattern for the backward flow. Same format as above.
 78 | * `-num_passes`: Number of passes. Default: 15.
 79 | * `-use_temporalLoss_after`: Uses temporal consistency loss in given pass and afterwards. Default: `8`.
 80 | * `-blendWeight`: The blending factor of the previous stylized frame. The higher this value, the stronger the temporal consistency. Default value is `1` which means that the previous stylized frame is blended equally with the current frame.
 81 | 
 82 | **Optimization options**:
 83 | * `-content_weight`: How much to weight the content reconstruction term. Default is 5e0.
 84 | * `-style_weight`: How much to weight the style reconstruction term. Default is 1e2.
 85 | * `-temporal_weight`: How much to weight the temporal consistency loss. Default is 1e3. Set to 0 to disable the temporal consistency loss.
 86 | * `-temporal_loss_criterion`: Which error function is used for the temporal consistency loss. Can be either `mse` for the mead squared error or `smoothl1` for the [smooth L1 criterion](https://github.com/torch/nn/blob/master/doc/criterion.md#nn.SmoothL1Criterion).
 87 | * `-tv_weight`: Weight of total-variation (TV) regularization; this helps to smooth the image.
 88 |   Default is 1e-3. Set to 0 to disable TV regularization.
 89 | * `-num_iterations`:
 90 |   * Single-pass: Two comma-separated values for the maximum number of iterations for the first frame and for subsequent frames. Default is 2000,1000.
 91 |   * Multi-pass: A single value for the number of iterations *per pass*.
 92 | * `-tol_loss_relative`: Stop if the relative change of the loss function in an interval of `tol_loss_relative_interval` iterations falls below this threshold. Default is `0.0001` which means that the optimizer stops if the loss function changes less than 0.01% in the given interval. Meaningful values are between `0.001` and `0.0001` in the default interval.
 93 | * `-tol_loss_relative_interval`: Se above. Default value: `50`.
 94 | * `-init`:
 95 |   * Single-pass: Two comma-separated values for the initialization method for the first frame and for subsequent frames; one of `random`, `image`, `prev` or `prevWarped`.
 96 |   Default is `random,prevWarped` which uses a noise initialization for the first frame and the previous stylized frame warped for subsequent frames. `image` initializes with the content frames. `prev` initializes with the previous stylized frames without warping.
 97 |   * Multi-pass: One value for the initialization method. Either `random` or `image`.
 98 | * `-optimizer`: The optimization algorithm to use; either `lbfgs` or `adam`; default is `lbfgs`.
 99 |   L-BFGS tends to give better results, but uses more memory. Switching to ADAM will reduce memory usage;
100 |   when using ADAM you will probably need to play with other parameters to get good results, especially
101 |   the style weight, content weight, and learning rate; you may also want to normalize gradients when
102 |   using ADAM.
103 | * `-learning_rate`: Learning rate to use with the ADAM optimizer. Default is 1e1.
104 | * `-normalize_gradients`: If this flag is present, style and content gradients from each layer will be
105 |   L1 normalized. Idea from [andersbll/neural_artistic_style](https://github.com/andersbll/neural_artistic_style).
106 | 
107 | **Output options**:
108 | * `-output_image`: Name of the output image. Default is `out.png` which will produce output images of the form *out-\<frameIdx\>.png* for the single-pass and *out-\<frameIdx\>_\<passIdx\>.png* for the multi-pass algorithm.
109 | * `-number_format`: Which number format to use for the output image. For example `%04d` adds up to three leading zeros. Some users reported that ffmpeg may use lexicographical sorting in some cases; therefore the output frames would be combined in the wrong order without leading zeros. Default: `%d`.
110 | * `-output_folder`: Directory where the output images should be saved. Must end with a slash.
111 | * `-print_iter`: Print progress every `print_iter` iterations. Set to 0 to disable printing.
112 | * `-save_iter`: Save the image every `save_iter` iterations. Set to 0 to disable saving intermediate results.
113 | * `-save_init`: If this option is present, the initialization image will be saved.
114 | 
115 | **Other arguments**:
116 | * `-content_layers`: Comma-separated list of layer names to use for content reconstruction.
117 |   Default is `relu4_2`.
118 | * `-style_layers`: Comman-separated list of layer names to use for style reconstruction.
119 |   Default is `relu1_1,relu2_1,relu3_1,relu4_1,relu5_1`.
120 | * `-style_blend_weights`: The weight for blending the style of multiple style images, as a
121 |   comma-separated list, such as `-style_blend_weights 3,7`. By default, all style images
122 |   are equally weighted.
123 | * `-style_scale`: Scale at which to extract features from the style image, relative to the size of the content video. Default is `1.0`.
124 | * `-proto_file`: Path to the `deploy.txt` file for the VGG Caffe model.
125 | * `-model_file`: Path to the `.caffemodel` file for the VGG Caffe model.
126 |   Default is the original VGG-19 model; you can also try the normalized VGG-19 model used in the paper.
127 | * `-pooling`: The type of pooling layers to use; one of `max` or `avg`. Default is `max`.
128 |   The VGG-19 models uses max pooling layers, but Gatys et al. mentioned that replacing these layers with average
129 |   pooling layers can improve the results. We haven't been able to get good results using average pooling, but
130 |   the option is here.
131 | * `-backend`: `nn`, `cudnn` or `clnn`. Default is `nn`. `cudnn` requires
132 |   [cudnn.torch](https://github.com/soumith/cudnn.torch) and may reduce memory usage.
133 |   `clnn` requires [cltorch](https://github.com/hughperkins/cltorch) and [clnn](https://github.com/hughperkins/clnn).
134 | * `-cudnn_autotune`: When using the cuDNN backend, pass this flag to use the built-in cuDNN autotuner to select
135 |   the best convolution algorithms for your architecture. This will make the first iteration a bit slower and can
136 |   take a bit more memory, but may significantly speed up the cuDNN backend.
137 | 
138 | ## Acknowledgement
139 | * This work was inspired by the paper [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge, which introduced an approach for style transfer in still images.
140 | * Our implementation is based on Justin Johnson's implementation [neural-style](https://github.com/jcjohnson/neural-style).
141 | 
142 | ## Citation
143 | 
144 | If you use this code or its parts in your research, please cite the following paper:
145 | 
146 | ```
147 | @inproceedings{RuderDB2016,
148 |   author = {Manuel Ruder and Alexey Dosovitskiy and Thomas Brox},
149 |   title = {Artistic Style Transfer for Videos},
150 |   booktitle = {German Conference on Pattern Recognition},
151 |   pages     = {26--36},
152 |   year      = {2016},
153 | }
154 | ```
155 | 


--------------------------------------------------------------------------------
/artistic_video_multiPass.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | require 'nn'
  3 | require 'image'
  4 | require 'optim'
  5 | require 'loadcaffe'
  6 | require 'artistic_video_core'
  7 | 
  8 | local flowFile = require 'flowFileLoader'
  9 | 
 10 | --------------------------------------------------------------------------------
 11 | 
 12 | local cmd = torch.CmdLine()
 13 | 
 14 | -- Basic options
 15 | cmd:option('-style_image', 'example/seated-nude.jpg',
 16 |            'Style target image')
 17 | cmd:option('-style_blend_weights', 'nil')
 18 | cmd:option('-content_pattern', 'example/marple8_%02d.ppm',
 19 |            'Content target pattern')
 20 | cmd:option('-num_images', 0, 'Number of content images. Set 0 for autodetect.')
 21 | cmd:option('-start_number', 1, 'Frame index to start with')
 22 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')
 23 | cmd:option('-number_format', '%d', 'Number format of the output images.')
 24 | 
 25 | -- Flow options
 26 | cmd:option('-forwardFlow_pattern', 'example/deepflow/forward_[%d]_{%d}.flo',
 27 |            'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
 28 | cmd:option('-backwardFlow_pattern', 'example/deepflow/backward_[%d]_{%d}.flo',
 29 |            'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
 30 | cmd:option('-forwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm',
 31 |            'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
 32 | cmd:option('-backwardFlow_weight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm',
 33 |            'Flow file pattern. [.] will be replaced with the "from"-index, {.} with the "to"-index.')
 34 | 
 35 | -- Multi-pass options
 36 | cmd:option('-blendWeight', 1.0, '')
 37 | cmd:option('-blendWeight_lastPass', 0.0, '')
 38 | cmd:option('-use_temporalLoss_after', 8, '')
 39 | cmd:option('-num_passes', 15, 'Number of passes')
 40 | cmd:option('-continue_with_pass', 1, '')
 41 | 
 42 | -- Optimization options
 43 | cmd:option('-content_weight', 5e0)
 44 | cmd:option('-style_weight', 1e2)
 45 | cmd:option('-temporal_weight', 5e2)
 46 | cmd:option('-tv_weight', 1e-3)
 47 | cmd:option('-temporal_loss_criterion', 'mse', 'mse|smoothl1')
 48 | cmd:option('-num_iterations', 100, 'Number of iterations per pass')
 49 | cmd:option('-tol_loss_relative', 0, 'stop if relative change of the loss function is below this value')
 50 | cmd:option('-tol_loss_relative_interval', 100, 'interval between two function comparisons')
 51 | cmd:option('-normalize_gradients', false)
 52 | cmd:option('-init', 'random', 'random|image|prevWarped')
 53 | cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
 54 | cmd:option('-learning_rate', 1e1)
 55 | 
 56 | -- Output options
 57 | cmd:option('-print_iter', 50)
 58 | cmd:option('-save_iter', 0)
 59 | cmd:option('-output_image', 'out.png')
 60 | cmd:option('-output_folder', '')
 61 | cmd:option('-save_init', false, 'Whether the initialization image should be saved (for debugging purposes).')
 62 | 
 63 | -- Other options
 64 | cmd:option('-style_scale', 1.0)
 65 | cmd:option('-pooling', 'max', 'max|avg')
 66 | cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
 67 | cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')
 68 | cmd:option('-backend', 'nn', 'nn|cudnn|clnn')
 69 | cmd:option('-cudnn_autotune', false)
 70 | cmd:option('-seed', -1)
 71 | 
 72 | cmd:option('-content_layers', 'relu4_2', 'layers for content')
 73 | cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')
 74 | 
 75 | cmd:option('-args', '', 'Arguments in a file, one argument per line')
 76 | 
 77 | function nn.SpatialConvolutionMM:accGradParameters()
 78 |   -- nop.  not needed by our net
 79 | end
 80 | 
 81 | local function main(params)
 82 |   if params.gpu >= 0 then
 83 |     if params.backend ~= 'clnn' then
 84 |       require 'cutorch'
 85 |       require 'cunn'
 86 |       cutorch.setDevice(params.gpu + 1)
 87 |     else
 88 |       require 'clnn'
 89 |       require 'cltorch'
 90 |       cltorch.setDevice(params.gpu + 1)
 91 |     end
 92 |   else
 93 |     params.backend = 'nn'
 94 |   end
 95 | 
 96 |   if params.backend == 'cudnn' then
 97 |     require 'cudnn'
 98 |     if params.cudnn_autotune then 
 99 |       cudnn.benchmark = true
100 |     end
101 |     cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
102 |   end
103 |   
104 |   local loadcaffe_backend = params.backend
105 |   if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
106 |   local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
107 |   cnn = MaybePutOnGPU(cnn, params)
108 |   
109 |   local num_images = params.num_images
110 |   if num_images == 0 then
111 |     num_images = calcNumberOfContentImages(params)
112 |     print("Detected " .. num_images .. " content images.")
113 |   end
114 |   local end_image_idx = num_images + params.start_number - 1
115 | 
116 |   local style_images_caffe = getStyleImages(params)
117 |   
118 |   -- Set up the network, inserting style and content loss modules
119 |   local net, style_losses, losses_indices, losses_type = buildNet(cnn, params, style_images_caffe)
120 | 
121 |   -- We don't need the base CNN anymore, so clean it up to save memory.
122 |   cnn = nil
123 |   for i=1,#net.modules do
124 |     local module = net.modules[i]
125 |     if torch.type(module) == 'nn.SpatialConvolutionMM' then
126 |         -- remote these, not used, but uses gpu memory
127 |         module.gradWeight = nil
128 |         module.gradBias = nil
129 |     end
130 |   end
131 |   collectgarbage()
132 | 
133 |   local img = nil
134 |   
135 |   -- Initialize the image
136 |   if params.seed >= 0 then
137 |       torch.manualSeed(params.seed)
138 |   end
139 |   local content_size = image.load(string.format(params.content_pattern, params.start_number), 3):size()
140 |   local randImg = torch.randn(content_size):mul(0.001)
141 |   
142 |   local usePrev = params.init == 'prev' or params.init == 'prevWarped'
143 |   local needFlow = params.init == 'prevWarped' or params.prevPlusFlow_layers ~= ''
144 |   
145 |   for run=params.continue_with_pass, params.num_passes do
146 | 
147 |     local flag = run % 2
148 |     local start = (flag == 0) and end_image_idx or params.start_number
149 |     local endp = (flag == 0) and params.start_number or end_image_idx
150 |     local incr = (flag == 0) and -1 or 1
151 |   
152 |     for frameIdx=start,endp, incr do
153 | 
154 |       local content_image_caffe = getContentImage(frameIdx, params)
155 |       local content_losses, prevPlusFlow_losses = {}, {}
156 |       local additional_layers = 0
157 |       local num_iterations = params.num_iterations
158 | 
159 |       -- Previous and following frame warped
160 |       local prevImageWarped, nextImageWarped = nil, nil
161 |       -- The warped frame which will be used for temporal consistency.
162 |       local imageWarped = nil
163 |       
164 |       -- Find out if we are forward or backward pass, and set "imageWarped" accordingly.
165 |       if frameIdx > params.start_number then
166 |         prevImageWarped = readPrevImageWarped(frameIdx, params, run - (1 - flag), false)
167 |       end
168 |       if run > 1 and frameIdx < end_image_idx then
169 |         nextImageWarped = readNextImageWarped(frameIdx, params, run - flag, false)
170 |       end
171 |       if flag == 1 then imageWarped = prevImageWarped end
172 |       if flag == 0 then imageWarped = nextImageWarped end
173 |       
174 |       local temporalLossEnabled = run >= params.use_temporalLoss_after and imageWarped ~= nil
175 | 
176 |       -- add layers for this iteration
177 |       for i=1, #losses_indices do
178 |         if losses_type[i] == 'content'  then
179 |           local content_loss = getContentLossModuleForLayer(net,
180 |             losses_indices[i] + additional_layers,
181 |             content_image_caffe, params)
182 |           net:insert(content_loss, losses_indices[i] + additional_layers)
183 |           additional_layers = additional_layers + 1
184 |           table.insert(content_losses, content_loss)
185 |         elseif temporalLossEnabled then
186 |           imageWarped = preprocess(imageWarped):float()
187 |           imageWarped = MaybePutOnGPU(imageWarped, params)
188 |           local flowWeights = nil
189 |           if losses_type[i] == 'prevPlusFlowWeighted' then
190 |             local weightsFileName = nil
191 |             if flag == 1 then
192 |               weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, frameIdx-1, frameIdx)
193 |             else
194 |               weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, frameIdx+1, frameIdx)
195 |             end
196 |             print(string.format('Reading flowWeights file "%s".', weightsFileName))
197 |             flowWeights = image.load(weightsFileName):float()
198 |             flowWeights = flowWeights:expand(3, flowWeights:size(2), flowWeights:size(3))
199 |             flowWeights = MaybePutOnGPU(flowWeights, params)
200 |           end
201 |           local loss_module = getWeightedContentLossModuleForLayer(net,
202 |             losses_indices[i] + additional_layers, imageWarped,
203 |             params, flowWeights)
204 |           net:insert(loss_module, losses_indices[i] + additional_layers)
205 |           table.insert(prevPlusFlow_losses, loss_module)
206 |           additional_layers = additional_layers + 1
207 |         end
208 |       end
209 | 
210 |       if run == 1 then
211 |         -- For the first run, process the frames independently
212 |         if frameIdx == params.start_number or params.init == 'random' then
213 |           img = randImg:clone():float()
214 |         elseif init == 'image' then
215 |           img = content_image:clone():float()
216 |         elseif params.init == 'prevWarped' then
217 |           local prevImageWarpedWithPad = readPrevImageWarped(frameIdx, params, run - (1 - flag), true)
218 |           img = preprocess(prevImageWarpedWithPad):float()
219 |         else
220 |           print('Unknown initialization method.')
221 |           os.exit()
222 |         end
223 |       else
224 |         -- For subsequent runs, blend neighboring frames into the current frame
225 |         img = image.load(build_OutFilename(params, frameIdx, run - 1), 3)
226 |         -- Make sure to correctly normalize the result
227 |         local divisor = torch.zeros(content_image_caffe:size())
228 |         divisor:add(1)
229 |         if frameIdx > params.start_number then
230 |           local weightsFileName = getFormatedFlowFileName(params.backwardFlow_weight_pattern, frameIdx-1, frameIdx)
231 |           print(string.format('Reading flowWeights file "%s".', weightsFileName))
232 |           local prevImageWeights = image.load(weightsFileName)
233 |           prevImageWeights = prevImageWeights:expand(3, prevImageWeights:size(2), prevImageWeights:size(3))
234 |           prevImageWeights:mul(flag == 1 and params.blendWeight or params.blendWeight_lastPass)
235 |           img:add(torch.cmul(prevImageWarped, prevImageWeights))
236 |           divisor:add(prevImageWeights)
237 |         end
238 |         if frameIdx < end_image_idx then
239 |           local weightsFileName = getFormatedFlowFileName(params.forwardFlow_weight_pattern, frameIdx+1, frameIdx)
240 |           print(string.format('Reading flowWeights file "%s".', weightsFileName))
241 |           local nextImageWeights = image.load(weightsFileName)
242 |           nextImageWeights = nextImageWeights:expand(3, nextImageWeights:size(2), nextImageWeights:size(3))
243 |           nextImageWeights:mul(flag == 0 and params.blendWeight or params.blendWeight_lastPass)
244 |           img:add(torch.cmul(nextImageWarped, nextImageWeights))
245 |           divisor:add(nextImageWeights)
246 |         end
247 |         img:cdiv(divisor)
248 |         img = preprocess(img):float()
249 |       end
250 |       
251 |       img = MaybePutOnGPU(img, params)
252 | 
253 |       if params.save_init then
254 |         save_image(img, params.output_folder .. string.format(
255 |           'init-' .. params.number_format .. '_%d.png', frameIdx, run))
256 |       end
257 | 
258 |       -- Run the optimization for some iterations, save the result to disk
259 |       runOptimization(params, net, content_losses, style_losses, prevPlusFlow_losses,
260 |           img, frameIdx, run, num_iterations)
261 | 
262 |       -- Remove this iteration's content and temporal layers
263 |       for i=#losses_indices, 1, -1 do
264 |         if temporalLossEnabled or losses_type[i] == 'content' then
265 |           additional_layers = additional_layers - 1
266 |           net:remove(losses_indices[i] + additional_layers)
267 |         end
268 |       end
269 |       
270 |       assert(additional_layers == 0)
271 |       
272 |     end
273 |     
274 |   end
275 | 
276 | end
277 | 
278 | -- warp previous frame.
279 | -- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true.
280 | function readPrevImageWarped(idx, params, run, pad_mean_pixel)
281 |   local flowFileName = getFormatedFlowFileName(params.backwardFlow_pattern, idx-1, idx)
282 |   print(string.format('Reading backward flow file "%s".', flowFileName))
283 |   local flow = flowFile.load(flowFileName)
284 |   local prevImg = image.load(build_OutFilename(params, idx-1, run), 3)
285 |   local result = nil
286 |   if pad_mean_pixel then
287 |     local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0})
288 |     result = image.warp(prevImg, flow, 'bilinear', true, 'pad', -1)
289 |     for x=1, result:size(2) do
290 |       for y=1, result:size(3) do
291 |         if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then
292 |           result[1][x][y] = mean_pixel[1]
293 |           result[2][x][y] = mean_pixel[2]
294 |           result[3][x][y] = mean_pixel[3]
295 |         end
296 |       end
297 |     end
298 |   else
299 |     result = image.warp(prevImg, flow)
300 |   end
301 |   return result
302 | end
303 | 
304 | -- warp following frame.
305 | -- Disocclusions at the borders will be filled with the VGG mean pixel, if pad_mean_pixel is true.
306 | function readNextImageWarped(idx, params, run, pad_mean_pixel)
307 |   local flowFileName = getFormatedFlowFileName(params.forwardFlow_pattern, idx+1, idx)
308 |   print(string.format('Reading forward flow file "%s".', flowFileName))
309 |   local flow = flowFile.load(flowFileName)
310 |   local nextImg = image.load(build_OutFilename(params, idx+1, run), 3)
311 |   if pad_mean_pixel then
312 |     local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0})
313 |     result = image.warp(nextImg, flow, 'bilinear', true, 'pad', -1)
314 |     for x=1, result:size(2) do
315 |       for y=1, result:size(3) do
316 |         if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then
317 |           result[1][x][y] = mean_pixel[1]
318 |           result[2][x][y] = mean_pixel[2]
319 |           result[3][x][y] = mean_pixel[3]
320 |         end
321 |       end
322 |     end
323 |   else
324 |     result = image.warp(nextImg, flow)
325 |   end
326 |   return result
327 | end
328 | 
329 | local tmpParams = cmd:parse(arg)
330 | local params = nil
331 | local file = io.open(tmpParams.args, 'r')
332 | 
333 | if tmpParams.args == '' or file == nil  then
334 |   params = cmd:parse(arg)
335 | else
336 |   local args = {}
337 |   io.input(file)
338 |   local argPos = 1
339 |   while true do
340 |     local line = io.read()
341 |     if line == nil then break end
342 |     if line:sub(0, 1) == '-' then
343 |       local splits = str_split(line, " ", 2)
344 |       args[argPos] = splits[1]
345 |       args[argPos + 1] = splits[2]
346 |       argPos = argPos + 2
347 |     end
348 |   end
349 |   for i=1, #arg do
350 |     args[argPos] = arg[i]
351 |     argPos = argPos + 1
352 |   end
353 |   params = cmd:parse(args)
354 |   io.close(file)
355 | end
356 | 
357 | main(params)
358 | 


--------------------------------------------------------------------------------
/artistic_video.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | require 'nn'
  3 | require 'image'
  4 | require 'loadcaffe'
  5 | require 'artistic_video_core'
  6 | 
  7 | local flowFile = require 'flowFileLoader'
  8 | 
  9 | --------------------------------------------------------------------------------
 10 | 
 11 | local cmd = torch.CmdLine()
 12 | 
 13 | -- Basic options
 14 | cmd:option('-style_image', 'example/seated-nude.jpg',
 15 |            'Style target image')
 16 | cmd:option('-style_blend_weights', 'nil')
 17 | cmd:option('-content_pattern', 'example/marple8_%02d.ppm',
 18 |            'Content target pattern')
 19 | cmd:option('-num_images', 0, 'Number of content images. Set 0 for autodetect.')
 20 | cmd:option('-start_number', 1, 'Frame index to start with')
 21 | cmd:option('-continue_with', 1, 'Continue with the given frame index.')
 22 | cmd:option('-gpu', 0, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')
 23 | cmd:option('-number_format', '%d', 'Number format of the output images.')
 24 | 
 25 | --Flow options
 26 | cmd:option('-flow_pattern', 'example/deepflow/backward_[%d]_{%d}.flo',
 27 |            'Optical flow files pattern')
 28 | cmd:option('-flowWeight_pattern', 'example/deepflow/reliable_[%d]_{%d}.pgm',
 29 |            'Optical flow weight files pattern.')
 30 | cmd:option('-flow_relative_indices', '1', 'Use flow from the given indices.')
 31 | cmd:option('-use_flow_every', -1, 'Uses flow from the given index and every multiple of that; -1 to to disable.')
 32 | cmd:option('-invert_flowWeights', 0, 'Invert flow weights given by flowWeight_pattern.')
 33 | 
 34 | -- Optimization options
 35 | cmd:option('-content_weight', 5e0)
 36 | cmd:option('-style_weight', 1e2)
 37 | cmd:option('-temporal_weight', 1e3)
 38 | cmd:option('-tv_weight', 1e-3)
 39 | cmd:option('-temporal_loss_criterion', 'mse', 'mse|smoothl1')
 40 | cmd:option('-num_iterations', '2000,1000',
 41 |            'Can be set separately for the first and for subsequent iterations, separated by comma, or one value for all.')
 42 | cmd:option('-tol_loss_relative', 0.0001, 'Stop if relative change of the loss function is below this value')
 43 | cmd:option('-tol_loss_relative_interval', 50, 'Interval between two loss comparisons')
 44 | cmd:option('-normalize_gradients', false)
 45 | cmd:option('-init', 'random,prevWarped', 'random|image,random|image|prev|prevWarped')
 46 | cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
 47 | cmd:option('-learning_rate', 1e1)
 48 | 
 49 | -- Output options
 50 | cmd:option('-print_iter', 100)
 51 | cmd:option('-save_iter', 0)
 52 | cmd:option('-output_image', 'out.png')
 53 | cmd:option('-output_folder', '')
 54 | cmd:option('-save_init', false, 'Whether the initialization image should be saved (for debugging purposes).')
 55 | 
 56 | -- Other options
 57 | cmd:option('-style_scale', 1.0)
 58 | cmd:option('-pooling', 'max', 'max|avg')
 59 | cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
 60 | cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')
 61 | cmd:option('-backend', 'nn', 'nn|cudnn|clnn')
 62 | cmd:option('-cudnn_autotune', false)
 63 | cmd:option('-seed', -1)
 64 | cmd:option('-content_layers', 'relu4_2', 'layers for content')
 65 | cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')
 66 | cmd:option('-args', '', 'Arguments in a file, one argument per line')
 67 | 
 68 | -- Advanced options (changing them is usually not required)
 69 | cmd:option('-combine_flowWeights_method', 'closestFirst',
 70 |            'Which long-term weighting scheme to use: normalize or closestFirst. Deafult and recommended: closestFirst')
 71 | 
 72 | function nn.SpatialConvolutionMM:accGradParameters()
 73 |   -- nop.  not needed by our net
 74 | end
 75 | 
 76 | local function main(params)
 77 |   if params.gpu >= 0 then
 78 |     if params.backend ~= 'clnn' then
 79 |       require 'cutorch'
 80 |       require 'cunn'
 81 |       cutorch.setDevice(params.gpu + 1)
 82 |     else
 83 |       require 'clnn'
 84 |       require 'cltorch'
 85 |       cltorch.setDevice(params.gpu + 1)
 86 |     end
 87 |   else
 88 |     params.backend = 'nn'
 89 |   end
 90 | 
 91 |   if params.backend == 'cudnn' then
 92 |     require 'cudnn'
 93 |     if params.cudnn_autotune then 
 94 |       cudnn.benchmark = true
 95 |     end
 96 |     cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
 97 |   end
 98 | 
 99 |   local loadcaffe_backend = params.backend
100 |   if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
101 |   local cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):float()
102 |   cnn = MaybePutOnGPU(cnn, params)
103 | 
104 |   local style_images_caffe = getStyleImages(params)
105 | 
106 |   -- Set up the network, inserting style losses. Content and temporal loss will be inserted in each iteration.
107 |   local net, style_losses, losses_indices, losses_type = buildNet(cnn, params, style_images_caffe)
108 | 
109 |   -- We don't need the base CNN anymore, so clean it up to save memory.
110 |   cnn = nil
111 |   for i=1,#net.modules do
112 |     local module = net.modules[i]
113 |     if torch.type(module) == 'nn.SpatialConvolutionMM' then
114 |         -- remote these, not used, but uses gpu memory
115 |         module.gradWeight = nil
116 |         module.gradBias = nil
117 |     end
118 |   end
119 |   collectgarbage()
120 | 
121 |   -- There can be different setting for the first frame and for subsequent frames.
122 |   local num_iterations_split = params.num_iterations:split(",")
123 |   local numIters_first, numIters_subseq = num_iterations_split[1], num_iterations_split[2] or num_iterations_split[1]
124 |   local init_split = params.init:split(",")
125 |   local init_first, init_subseq = init_split[1], init_split[2] or init_split[1]
126 |   
127 |   local firstImg = nil
128 |   local flow_relative_indices_split = params.flow_relative_indices:split(",")
129 | 
130 |   local num_images = params.num_images
131 |   if num_images == 0 then
132 |     num_images = calcNumberOfContentImages(params)
133 |     print("Detected " .. num_images .. " content images.")
134 |   end
135 | 
136 |   -- Iterate over all frames in the video sequence
137 |   for frameIdx=params.start_number + params.continue_with - 1, params.start_number + num_images - 1 do
138 | 
139 |     -- Set seed
140 |     if params.seed >= 0 then
141 |       torch.manualSeed(params.seed)
142 |     end
143 | 
144 |     local content_image = getContentImage(frameIdx, params)
145 |     if content_image == nil then
146 |       print("No more frames.")
147 |       do return end
148 |     end
149 |     local content_losses, temporal_losses = {}, {}
150 |     local additional_layers = 0
151 |     local num_iterations = frameIdx == params.start_number and tonumber(numIters_first) or tonumber(numIters_subseq)
152 |     local init = frameIdx == params.start_number and init_first or init_subseq
153 |     -- stores previous image indices used for the temporal constraint
154 |     local J = {}
155 |     -- stores previous image(s) warped
156 |     local imgsWarped = {}
157 |     
158 |     -- Calculate from which indices we need a warped image
159 |     if frameIdx > params.start_number and params.temporal_weight ~= 0 then
160 |       for i=1, #flow_relative_indices_split do
161 |         local prevIndex = frameIdx - tonumber(flow_relative_indices_split[i])
162 |         if prevIndex >= params.start_number then 
163 |           table.insert(J, frameIdx - tonumber(flow_relative_indices_split[i]))
164 |         end
165 |       end
166 |       if params.use_flow_every > 0 then
167 |         for prevIndex=frameIdx - params.use_flow_every, params.start_number, -1 * params.use_flow_every do
168 |           if not tabl_contains(J, prevIndex) then
169 |             table.insert(J, prevIndex)
170 |           end
171 |         end
172 |       end
173 |       -- Sort table descending, usefull to compute the long-term weights
174 |       table.sort(J, function(a,b) return a>b end)
175 |       -- Read the optical flow(s) and warp the previous image(s)
176 |       for j=1, #J do
177 |         local prevIndex = J[j]
178 |         local flowFileName = getFormatedFlowFileName(params.flow_pattern, math.abs(prevIndex), math.abs(frameIdx))
179 |         print(string.format('Reading flow file "%s".', flowFileName))
180 |         local flow = flowFile.load(flowFileName)
181 |         local fileName = build_OutFilename(params, math.abs(prevIndex - params.start_number + 1), -1)
182 |         local imgWarped = warpImage(image.load(fileName, 3), flow)
183 |         imgWarped = preprocess(imgWarped):float()
184 |         imgWarped = MaybePutOnGPU(imgWarped, params)
185 |         table.insert(imgsWarped, imgWarped)
186 |       end
187 |     end
188 | 
189 |     -- Add content and temporal loss for this iteration. Style loss is already included in the net.
190 |     for i=1, #losses_indices do
191 |       if losses_type[i] == 'content'  then
192 |         local loss_module = getContentLossModuleForLayer(net,
193 |           losses_indices[i] + additional_layers, content_image, params)
194 |         net:insert(loss_module, losses_indices[i] + additional_layers)
195 |         table.insert(content_losses, loss_module)
196 |         additional_layers = additional_layers + 1
197 |       elseif losses_type[i] == 'prevPlusFlow' and frameIdx > params.start_number then
198 |         for j=1, #J do
199 |           local loss_module = getWeightedContentLossModuleForLayer(net,
200 |             losses_indices[i] + additional_layers, imgsWarped[j],
201 |             params, nil)
202 |           net:insert(loss_module, losses_indices[i] + additional_layers)
203 |           table.insert(temporal_losses, loss_module)
204 |           additional_layers = additional_layers + 1
205 |         end
206 |       elseif losses_type[i] == 'prevPlusFlowWeighted' and frameIdx > params.start_number then
207 |         local flowWeightsTabl = {}
208 |         -- Read all flow weights
209 |         for j=1, #J do
210 |           local weightsFileName = getFormatedFlowFileName(params.flowWeight_pattern, J[j], math.abs(frameIdx))
211 |           print(string.format('Reading flowWeights file "%s".', weightsFileName))
212 |           table.insert(flowWeightsTabl, image.load(weightsFileName):float())
213 |         end
214 |         -- Preprocess flow weights, calculate long-term weights
215 |         processFlowWeights(flowWeightsTabl, params.combine_flowWeights_method, params.invert_flowWeights)
216 |         -- Create loss modules, one for each previous frame warped
217 |         for j=1, #J do
218 |           local flowWeights = flowWeightsTabl[j]
219 |           flowWeights = flowWeights:expand(3, flowWeights:size(2), flowWeights:size(3))
220 |           flowWeights = MaybePutOnGPU(flowWeights, params)
221 |           local loss_module = getWeightedContentLossModuleForLayer(net,
222 |             losses_indices[i] + additional_layers, imgsWarped[j],
223 |             params, flowWeights)
224 |           net:insert(loss_module, losses_indices[i] + additional_layers)
225 |           table.insert(temporal_losses, loss_module)
226 |           additional_layers = additional_layers + 1
227 |         end
228 |       end
229 |     end
230 | 
231 |     -- Initialization
232 |     local img = nil
233 |     if init == 'random' then
234 |       img = torch.randn(content_image:size()):float():mul(0.001)
235 |     elseif init == 'image' then
236 |       img = content_image:clone():float()
237 |     elseif init == 'prevWarped' and frameIdx > params.start_number then
238 |       local flowFileName = getFormatedFlowFileName(params.flow_pattern, math.abs(frameIdx - 1), math.abs(frameIdx))
239 |       print(string.format('Reading flow file "%s".', flowFileName))
240 |       local flow = flowFile.load(flowFileName)
241 |       local fileName = build_OutFilename(params, math.abs(frameIdx - params.start_number), -1)
242 |       img = warpImage(image.load(fileName, 3), flow)
243 |       img = preprocess(img):float()
244 |     elseif init == 'prev' and frameIdx > params.start_number then
245 |       local fileName = build_OutFilename(params, math.abs(frameIdx - params.start_number), -1)
246 |       img = image.load(fileName, 3)
247 |       img = preprocess(img):float()
248 |     elseif init == 'first' then
249 |       img = firstImg:clone():float()
250 |     else
251 |       print('ERROR: Invalid initialization method.')
252 |       os.exit()
253 |     end
254 |     img = MaybePutOnGPU(img, params)
255 |     if params.save_init then
256 |       save_image(img,
257 |         string.format('%sinit-' .. params.number_format .. '.png',
258 |           params.output_folder, math.abs(frameIdx - params.start_number + 1)))
259 |     end
260 | 
261 |     -- Run the optimization to stylize the image, save the result to disk
262 |     runOptimization(params, net, content_losses, style_losses, temporal_losses, img, frameIdx, -1, num_iterations)
263 | 
264 |     if frameIdx == params.start_number then
265 |       firstImg = img:clone():float()
266 |     end
267 |     
268 |     -- Remove this iteration's content and temporal layers
269 |     for i=#losses_indices, 1, -1 do
270 |       if frameIdx > params.start_number or losses_type[i] == 'content' then
271 |         if losses_type[i] == 'prevPlusFlowWeighted' or losses_type[i] == 'prevPlusFlow' then
272 |           for j=1, #J do
273 |             additional_layers = additional_layers - 1
274 |             net:remove(losses_indices[i] + additional_layers)
275 |           end
276 |         else
277 |           additional_layers = additional_layers - 1
278 |           net:remove(losses_indices[i] + additional_layers)
279 |         end
280 |       end
281 |     end
282 |     
283 |     -- Ensure that all layer have been removed correctly
284 |     assert(additional_layers == 0)
285 |     
286 |   end
287 | end
288 | 
289 | -- warp a given image according to the given optical flow.
290 | -- Disocclusions at the borders will be filled with the VGG mean pixel.
291 | function warpImage(img, flow)
292 |   local mean_pixel = torch.DoubleTensor({123.68/256.0, 116.779/256.0, 103.939/256.0})
293 |   result = image.warp(img, flow, 'bilinear', true, 'pad', -1)
294 |   for x=1, result:size(2) do
295 |     for y=1, result:size(3) do
296 |       if result[1][x][y] == -1 and result[2][x][y] == -1 and result[3][x][y] == -1 then
297 |         result[1][x][y] = mean_pixel[1]
298 |         result[2][x][y] = mean_pixel[2]
299 |         result[3][x][y] = mean_pixel[3]
300 |       end
301 |     end
302 |   end
303 |   return result
304 | end
305 | 
306 | -- Creates long-term flow weights
307 | function processFlowWeights(flowWeightsTabl, method, invert)
308 |   if invert == 1 then
309 |     for j=1, #flowWeightsTabl do
310 |       flowWeightsTabl[j]:apply(function(x) return 1 - x end)
311 |     end
312 |   end
313 |   if method == 'normalize' then
314 |     -- Normalize so that the weights sum up to max 1
315 |     local sum = tabl_sum(flowWeightsTabl)
316 |     sum:cmax(1)
317 |     for j=1, #flowWeightsTabl do
318 |       flowWeightsTabl[j]:cdiv(sum)
319 |     end
320 |   elseif method == 'closestFirst' then
321 |     -- Take the closest previous frame(s).
322 |     for j=2, #flowWeightsTabl do
323 |       for k=1, j-1 do
324 |         flowWeightsTabl[j]:add(-1, flowWeightsTabl[j-k])
325 |       end
326 |       flowWeightsTabl[j]:cmax(0)
327 |     end
328 |   end
329 | end
330 | 
331 | local tmpParams = cmd:parse(arg)
332 | local params = nil
333 | local file = io.open(tmpParams.args, 'r')
334 | 
335 | if tmpParams.args == '' or file == nil  then
336 |   params = cmd:parse(arg)
337 | else
338 |   local args = {}
339 |   io.input(file)
340 |   local argPos = 1
341 |   while true do
342 |     local line = io.read()
343 |     if line == nil then break end
344 |     if line:sub(0, 1) == '-' then
345 |       local splits = str_split(line, " ", 2)
346 |       args[argPos] = splits[1]
347 |       args[argPos + 1] = splits[2]
348 |       argPos = argPos + 2
349 |     end
350 |   end
351 |   for i=1, #arg do
352 |     args[argPos] = arg[i]
353 |     argPos = argPos + 1
354 |   end
355 |   params = cmd:parse(args)
356 |   io.close(file)
357 | end
358 | 
359 | main(params)
360 | 


--------------------------------------------------------------------------------
/consistencyChecker/CVector.h:
--------------------------------------------------------------------------------
  1 | // CVector
  2 | // A one-dimensional array including basic vector operations
  3 | //
  4 | // Author: Thomas Brox
  5 | // Last change: 23.05.2005
  6 | //-------------------------------------------------------------------------
  7 | #ifndef CVECTOR_H
  8 | #define CVECTOR_H
  9 | 
 10 | #include <iostream>
 11 | #include <fstream>
 12 | 
 13 | template <class T> class CMatrix;
 14 | template <class T> class CTensor;
 15 | 
 16 | template <class T>
 17 | class CVector {
 18 | public:
 19 |   // constructor
 20 |   inline CVector();
 21 |   // constructor
 22 |   inline CVector(const int aSize);
 23 |   // copy constructor
 24 |   CVector(const CVector<T>& aCopyFrom);
 25 |   // constructor (from array)
 26 |   CVector(const T* aPointer, const int aSize);
 27 |   // constructor with implicit filling
 28 |   CVector(const int aSize, const T aFillValue);
 29 |   // destructor
 30 |   virtual ~CVector();
 31 | 
 32 |   // Changes the size of the vector (data is lost)
 33 |   void setSize(int aSize);
 34 |   // Fills the vector with the specified value (see also operator=)
 35 |   void fill(const T aValue);
 36 |   // Appends the values of another vector
 37 |   void append(CVector<T>& aVector);
 38 |   // Normalizes the length of the vector to 1
 39 |   void normalize();
 40 |   // Normalizes the component sum to 1
 41 |   void normalizeSum();
 42 |   // Reads values from a text file
 43 |   void readFromTXT(const char* aFilename);
 44 |   // Writes values to a text file
 45 |   void writeToTXT(char* aFilename);
 46 |   // Returns the sum of all values
 47 |   T sum();
 48 |   // Returns the minimum value
 49 |   T min();
 50 |   // Returns the maximum value
 51 |   T max();
 52 |   // Returns the Euclidean norm
 53 |   T norm();
 54 | 
 55 |   // Converts vector to homogeneous coordinates, i.e., all components are divided by last component
 56 |   CVector<T>& homogen();
 57 |   // Remove the last component
 58 |   inline void homogen_nD();
 59 |   // Computes the cross product between this vector and aVector
 60 |   void cross(CVector<T>& aVector);
 61 | 
 62 |   // Gives full access to the vector's values
 63 |   inline T& operator()(const int aIndex) const;
 64 |   inline T& operator[](const int aIndex) const;
 65 |   // Fills the vector with the specified value (equivalent to fill)
 66 |   inline CVector<T>& operator=(const T aValue);
 67 |   // Copies a vector into this vector (size might change)
 68 |   CVector<T>& operator=(const CVector<T>& aCopyFrom);
 69 |   // Copies values from a matrix to the vector (size might change)
 70 |   CVector<T>& operator=(const CMatrix<T>& aCopyFrom);
 71 |   // Copies values from a tensor to the vector (size might change)
 72 |   CVector<T>& operator=(const CTensor<T>& aCopyFrom);
 73 |   // Adds another vector
 74 |   CVector<T>& operator+=(const CVector<T>& aVector);
 75 |   // Substracts another vector
 76 |   CVector<T>& operator-=(const CVector<T>& aVector);
 77 |   // Multiplies the vector with a scalar
 78 |   CVector<T>& operator*=(const T aValue);
 79 |   // Scalar product
 80 |   T operator*=(const CVector<T>& aVector);
 81 |   // Checks (non-)equivalence to another vector
 82 |   bool operator==(const CVector<T>& aVector);
 83 |   inline bool operator!=(const CVector<T>& aVector);
 84 | 
 85 |   // Gives access to the vector's size
 86 |   inline int size() const;
 87 |   // Gives access to the internal data representation
 88 |   inline T* data() const {return mData;}
 89 | protected:
 90 |   int mSize;
 91 |   T* mData;
 92 | };
 93 | 
 94 | // Adds two vectors
 95 | template <class T> CVector<T> operator+(const CVector<T>& vec1, const CVector<T>& vec2);
 96 | // Substracts two vectors
 97 | template <class T> CVector<T> operator-(const CVector<T>& vec1, const CVector<T>& vec2);
 98 | // Multiplies vector with a scalar
 99 | template <class T> CVector<T> operator*(const CVector<T>& aVector, const T aValue);
100 | template <class T> CVector<T> operator*(const T aValue, const CVector<T>& aVector);
101 | // Computes the scalar product of two vectors
102 | template <class T> T operator*(const CVector<T>& vec1, const CVector<T>& vec2);
103 | // Computes cross product of two vectors
104 | template <class T> CVector<T> operator/(const CVector<T>& vec1, const CVector<T>& vec2);
105 | // Sends the vector to an output stream
106 | template <class T> std::ostream& operator<<(std::ostream& aStream, const CVector<T>& aVector);
107 | 
108 | // Exceptions thrown by CVector--------------------------------------------
109 | 
110 | // Thrown if one tries to access an element of a vector which is out of
111 | // the vector's bounds
112 | struct EVectorRangeOverflow {
113 |   EVectorRangeOverflow(const int aIndex) {
114 |     using namespace std;
115 |     cerr << "Exception EVectorRangeOverflow: Index = " << aIndex << endl;
116 |   }
117 | };
118 | 
119 | struct EVectorIncompatibleSize {
120 |   EVectorIncompatibleSize(int aSize1, int aSize2) {
121 |     using namespace std;
122 |     cerr << "Exception EVectorIncompatibleSize: " << aSize1 << " <> " << aSize2 << endl;
123 |   }
124 | };
125 | 
126 | 
127 | // I M P L E M E N T A T I O N --------------------------------------------
128 | //
129 | // You might wonder why there is implementation code in a header file.
130 | // The reason is that not all C++ compilers yet manage separate compilation
131 | // of templates. Inline functions cannot be compiled separately anyway.
132 | // So in this case the whole implementation code is added to the header
133 | // file.
134 | // Users of CVector should ignore everything that's beyond this line.
135 | // ------------------------------------------------------------------------
136 | 
137 | // P U B L I C ------------------------------------------------------------
138 | // constructor
139 | template <class T>
140 | inline CVector<T>::CVector() : mSize(0) {
141 |   mData = new T[0];
142 | }
143 | 
144 | // constructor
145 | template <class T>
146 | inline CVector<T>::CVector(const int aSize)
147 |   : mSize(aSize) {
148 |   mData = new T[aSize];
149 | }
150 | 
151 | // copy constructor
152 | template <class T>
153 | CVector<T>::CVector(const CVector<T>& aCopyFrom)
154 |   : mSize(aCopyFrom.mSize) {
155 |   mData = new T[mSize];
156 |   for (int i = 0; i < mSize; i++)
157 |     mData[i] = aCopyFrom.mData[i];
158 | }
159 | 
160 | // constructor (from array)
161 | template <class T>
162 | CVector<T>::CVector(const T* aPointer, const int aSize)
163 |   : mSize(aSize) {
164 |   mData = new T[mSize];
165 |   for (int i = 0; i < mSize; i++)
166 |     mData[i] = aPointer[i];
167 | }
168 | 
169 | // constructor with implicit filling
170 | template <class T>
171 | CVector<T>::CVector(const int aSize, const T aFillValue)
172 |   : mSize(aSize) {
173 |   mData = new T[aSize];
174 |   fill(aFillValue);
175 | }
176 | 
177 | // destructor
178 | template <class T>
179 | CVector<T>::~CVector() {
180 |   delete[] mData;
181 | }
182 | 
183 | // setSize
184 | template <class T>
185 | void CVector<T>::setSize(int aSize) {
186 |   if (mData != 0) delete[] mData;
187 |   mData = new T[aSize];
188 |   mSize = aSize;
189 | }
190 | 
191 | // fill
192 | template <class T>
193 | void CVector<T>::fill(const T aValue) {
194 |   for (register int i = 0; i < mSize; i++)
195 |     mData[i] = aValue;
196 | }
197 | 
198 | // append
199 | template <class T>
200 | void CVector<T>::append(CVector<T>& aVector) {
201 |   T* aNewData = new T[mSize+aVector.size()];
202 |   for (int i = 0; i < mSize; i++)
203 |     aNewData[i] = mData[i];
204 |   for (int i = 0; i < aVector.size(); i++)
205 |     aNewData[i+mSize] = aVector(i);
206 |   mSize += aVector.size();
207 |   delete[] mData;
208 |   mData = aNewData;
209 | }
210 | 
211 | // normalize
212 | template <class T>
213 | void CVector<T>::normalize() {
214 |   T aSum = 0;
215 |   for (register int i = 0; i < mSize; i++)
216 |     aSum += mData[i]*mData[i];
217 |   if (aSum == 0) return;
218 |   aSum = 1.0/sqrt(aSum);
219 |   for (register int i = 0; i < mSize; i++)
220 |     mData[i] *= aSum;
221 | }
222 | 
223 | // normalizeSum
224 | template <class T>
225 | void CVector<T>::normalizeSum() {
226 |   T aSum = 0;
227 |   for (register int i = 0; i < mSize; i++)
228 |     aSum += mData[i];
229 |   if (aSum == 0) return;
230 |   aSum = 1.0/aSum;
231 |   for (register int i = 0; i < mSize; i++)
232 |     mData[i] *= aSum;
233 | }
234 | 
235 | // readFromTXT
236 | template<class T>
237 | void CVector<T>::readFromTXT(const char* aFilename) {
238 |   std::ifstream aStream(aFilename);
239 |   mSize = 0;
240 |   float aDummy;
241 |   while (!aStream.eof()) {
242 |     aStream >> aDummy;
243 |     mSize++;
244 |   }
245 |   aStream.close();
246 |   std::ifstream aStream2(aFilename);
247 |   delete mData;
248 |   mData = new T[mSize];
249 |   for (int i = 0; i < mSize; i++)
250 |     aStream2 >> mData[i];
251 | }
252 | 
253 | // writeToTXT
254 | template<class T>
255 | void CVector<T>::writeToTXT(char* aFilename) {
256 |   std::ofstream aStream(aFilename);
257 |   for (int i = 0; i < mSize; i++)
258 |     aStream << mData[i] << std::endl;
259 | }
260 | 
261 | // sum
262 | template <class T>
263 | T CVector<T>::sum() {
264 |   T val = mData[0];
265 |   for (int i = 1; i < mSize; i++)
266 |     val += mData[i];
267 |   return val;
268 | }
269 | 
270 | // min
271 | template <class T>
272 | T CVector<T>::min() {
273 |   T bestValue = mData[0];
274 |   for (int i = 1; i < mSize; i++)
275 |     if (mData[i] < bestValue) bestValue = mData[i];
276 |   return bestValue;
277 | }
278 | 
279 | // max
280 | template <class T>
281 | T CVector<T>::max() {
282 |   T bestValue = mData[0];
283 |   for (int i = 1; i < mSize; i++)
284 |     if (mData[i] > bestValue) bestValue = mData[i];
285 |   return bestValue;
286 | }
287 | 
288 | // norm
289 | template <class T>
290 | T CVector<T>::norm() {
291 |   T aSum = 0.0;
292 |   for (int i = 0; i < mSize; i++)
293 |     aSum += mData[i]*mData[i];
294 |   return sqrt(aSum);
295 | }
296 | 
297 | // homogen
298 | template <class T>
299 | CVector<T>& CVector<T>::homogen() {
300 |   if (mSize > 1 && mData[mSize-1] != 0) {
301 |     T invVal = 1.0/mData[mSize-1];
302 |   	for (int i = 0; i < mSize; i++)
303 |       mData[i] *= invVal;
304 |   }
305 |   return (*this);
306 | }
307 | 
308 | // homogen_nD
309 | template <class T>
310 | inline void CVector<T>::homogen_nD() {
311 |   mSize--;
312 | }
313 | 
314 | // cross
315 | template <class T>
316 | void CVector<T>::cross(CVector<T>& aVector) {
317 |   T aHelp0 = aVector(2)*mData[1] - aVector(1)*mData[2];
318 |   T aHelp1 = aVector(0)*mData[2] - aVector(2)*mData[0];
319 |   T aHelp2 = aVector(1)*mData[0] - aVector(0)*mData[1];
320 |   mData[0] = aHelp0;
321 |   mData[1] = aHelp1;
322 |   mData[2] = aHelp2;
323 | }
324 | 
325 | // operator()
326 | template <class T>
327 | inline T& CVector<T>::operator()(const int aIndex) const {
328 |   #ifdef _DEBUG
329 |     if (aIndex >= mSize || aIndex < 0)
330 |       throw EVectorRangeOverflow(aIndex);
331 |   #endif
332 |   return mData[aIndex];
333 | }
334 | 
335 | // operator[]
336 | template <class T>
337 | inline T& CVector<T>::operator[](const int aIndex) const {
338 |   return operator()(aIndex);
339 | }
340 | 
341 | // operator=
342 | template <class T>
343 | inline CVector<T>& CVector<T>::operator=(const T aValue) {
344 |   fill(aValue);
345 |   return *this;
346 | }
347 | 
348 | template <class T>
349 | CVector<T>& CVector<T>::operator=(const CVector<T>& aCopyFrom) {
350 |   if (this != &aCopyFrom) {
351 |     if (mSize != aCopyFrom.size()) {
352 |       delete[] mData;
353 |       mSize = aCopyFrom.size();
354 |       mData = new T[mSize];
355 |     }
356 |     for (register int i = 0; i < mSize; i++)
357 |       mData[i] = aCopyFrom.mData[i];
358 |   }
359 |   return *this;
360 | }
361 | 
362 | template <class T>
363 | CVector<T>& CVector<T>::operator=(const CMatrix<T>& aCopyFrom) {
364 |   if (mSize != aCopyFrom.size()) {
365 |     delete[] mData;
366 |     mSize = aCopyFrom.size();
367 |     mData = new T[mSize];
368 |   }
369 |   for (register int i = 0; i < mSize; i++)
370 |     mData[i] = aCopyFrom.data()[i];
371 |   return *this;
372 | }
373 | 
374 | template <class T>
375 | CVector<T>& CVector<T>::operator=(const CTensor<T>& aCopyFrom) {
376 |   if (mSize != aCopyFrom.size()) {
377 |     delete[] mData;
378 |     mSize = aCopyFrom.size();
379 |     mData = new T[mSize];
380 |   }
381 |   for (register int i = 0; i < mSize; i++)
382 |     mData[i] = aCopyFrom.data()[i];
383 |   return *this;
384 | }
385 | 
386 | // operator +=
387 | template <class T>
388 | CVector<T>& CVector<T>::operator+=(const CVector<T>& aVector) {
389 |   #ifdef _DEBUG
390 |   if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size());
391 |   #endif
392 |   for (int i = 0; i < mSize; i++)
393 |     mData[i] += aVector(i);
394 |   return *this;
395 | }
396 | 
397 | // operator -=
398 | template <class T>
399 | CVector<T>& CVector<T>::operator-=(const CVector<T>& aVector) {
400 |   #ifdef _DEBUG
401 |   if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size());
402 |   #endif
403 |   for (int i = 0; i < mSize; i++)
404 |     mData[i] -= aVector(i);
405 |   return *this;
406 | }
407 | 
408 | // operator *=
409 | template <class T>
410 | CVector<T>& CVector<T>::operator*=(const T aValue) {
411 |   for (int i = 0; i < mSize; i++)
412 |     mData[i] *= aValue;
413 |   return *this;
414 | }
415 | 
416 | template <class T>
417 | T CVector<T>::operator*=(const CVector<T>& aVector) {
418 |   #ifdef _DEBUG
419 |   if (mSize != aVector.size()) throw EVectorIncompatibleSize(mSize,aVector.size());
420 |   #endif
421 |   T aSum = 0.0;
422 |   for (int i = 0; i < mSize; i++)
423 |     aSum += mData[i]*aVector(i);
424 |   return aSum;
425 | }
426 | 
427 | // operator ==
428 | template <class T>
429 | bool CVector<T>::operator==(const CVector<T>& aVector) {
430 |   if (mSize != aVector.size()) return false;
431 |   int i = 0;
432 |   while (i < mSize && aVector(i) == mData[i])
433 |     i++;
434 |   return (i == mSize);
435 | }
436 | 
437 | // operator !=
438 | template <class T>
439 | inline bool CVector<T>::operator!=(const CVector<T>& aVector) {
440 |   return !((*this)==aVector);
441 | }
442 | 
443 | // size
444 | template <class T>
445 | inline int CVector<T>::size() const {
446 |   return mSize;
447 | }
448 | 
449 | // N O N - M E M B E R   F U N C T I O N S -------------------------------------
450 | 
451 | // operator +
452 | template <class T>
453 | CVector<T> operator+(const CVector<T>& vec1, const CVector<T>& vec2) {
454 |   #ifdef _DEBUG
455 |   if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size());
456 |   #endif
457 |   CVector<T> result(vec1.size());
458 |   for (int i = 0; i < vec1.size(); i++)
459 |     result(i) = vec1[i]+vec2[i];
460 |   return result;
461 | }
462 | 
463 | // operator -
464 | template <class T>
465 | CVector<T> operator-(const CVector<T>& vec1, const CVector<T>& vec2) {
466 |   #ifdef _DEBUG
467 |   if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size());
468 |   #endif
469 |   CVector<T> result(vec1.size());
470 |   for (int i = 0; i < vec1.size(); i++)
471 |     result(i) = vec1(i)-vec2(i);
472 |   return result;
473 | }
474 | 
475 | // operator *
476 | template <class T>
477 | CVector<T> operator*(const T aValue, const CVector<T>& aVector) {
478 |   CVector<T> result(aVector.size());
479 |   for (int i = 0; i < aVector.size(); i++)
480 |     result(i) = aValue*aVector(i);
481 |   return result;
482 | }
483 | 
484 | template <class T>
485 | CVector<T> operator*(const CVector<T>& aVector, const T aValue) {
486 |   return operator*(aValue,aVector);
487 | }
488 | 
489 | template <class T>
490 | T operator*(const CVector<T>& vec1, const CVector<T>& vec2) {
491 |   #ifdef _DEBUG
492 |   if (vec1.size() != vec2.size()) throw EVectorIncompatibleSize(vec1.size(),vec2.size());
493 |   #endif
494 |   T aSum = 0.0;
495 |   for (int i = 0; i < vec1.size(); i++)
496 |     aSum += vec1(i)*vec2(i);
497 |   return aSum;
498 | }
499 | 
500 | // operator /
501 | template <class T>
502 | CVector<T> operator/(const CVector<T>& vec1, const CVector<T>& vec2) {
503 |   CVector<T> result(3);
504 |   result[0]=vec1[1]*vec2[2] - vec1[2]*vec2[1];
505 |   result[1]=vec1[2]*vec2[0] - vec1[0]*vec2[2];
506 |   result[2]=vec1[0]*vec2[1] - vec1[1]*vec2[0];
507 |   return result;
508 | }
509 | 
510 | // operator <<
511 | template <class T>
512 | std::ostream& operator<<(std::ostream& aStream, const CVector<T>& aVector) {
513 |   for (int i = 0; i < aVector.size(); i++)
514 |     aStream << aVector(i) << '|';
515 |   aStream << std::endl;
516 |   return aStream;
517 | }
518 | 
519 | #endif
520 | 


--------------------------------------------------------------------------------
/artistic_video_core.lua:
--------------------------------------------------------------------------------
  1 | require 'optim'
  2 | 
  3 | -- modified to include a threshold for relative changes in the loss function as stopping criterion
  4 | local lbfgs_mod = require 'lbfgs'
  5 | 
  6 | ---
  7 | --- MAIN FUNCTIONS
  8 | ---
  9 | 
 10 | function runOptimization(params, net, content_losses, style_losses, temporal_losses,
 11 |     img, frameIdx, runIdx, max_iter)
 12 |   local isMultiPass = (runIdx ~= -1)
 13 | 
 14 |   -- Run it through the network once to get the proper size for the gradient
 15 |   -- All the gradients will come from the extra loss modules, so we just pass
 16 |   -- zeros into the top of the net on the backward pass.
 17 |   local y = net:forward(img)
 18 |   local dy = img.new(#y):zero()
 19 | 
 20 |   -- Declaring this here lets us access it in maybe_print
 21 |   local optim_state = nil
 22 |   if params.optimizer == 'lbfgs' then
 23 |     optim_state = {
 24 |       maxIter = max_iter,
 25 |       tolFunRelative = params.tol_loss_relative,
 26 |       tolFunRelativeInterval = params.tol_loss_relative_interval,
 27 |       verbose=true,
 28 |     }
 29 |   elseif params.optimizer == 'adam' then
 30 |     optim_state = {
 31 |       learningRate = params.learning_rate,
 32 |     }
 33 |   else
 34 |     error(string.format('Unrecognized optimizer "%s"', params.optimizer))
 35 |   end
 36 | 
 37 |   local function maybe_print(t, loss, alwaysPrint)
 38 |     local should_print = (params.print_iter > 0 and t % params.print_iter == 0) or alwaysPrint
 39 |     if should_print then
 40 |       print(string.format('Iteration %d / %d', t, max_iter))
 41 |       for i, loss_module in ipairs(content_losses) do
 42 |         print(string.format('  Content %d loss: %f', i, loss_module.loss))
 43 |       end
 44 |       for i, loss_module in ipairs(temporal_losses) do
 45 |         print(string.format('  Temporal %d loss: %f', i, loss_module.loss))
 46 |       end
 47 |       for i, loss_module in ipairs(style_losses) do
 48 |         print(string.format('  Style %d loss: %f', i, loss_module.loss))
 49 |       end
 50 |       print(string.format('  Total loss: %f', loss))
 51 |     end
 52 |   end
 53 | 
 54 |   local function print_end(t)
 55 |     --- calculate total loss
 56 |     local loss = 0
 57 |     for _, mod in ipairs(content_losses) do
 58 |       loss = loss + mod.loss
 59 |     end
 60 |     for _, mod in ipairs(temporal_losses) do
 61 |       loss = loss + mod.loss
 62 |     end
 63 |     for _, mod in ipairs(style_losses) do
 64 |       loss = loss + mod.loss
 65 |     end
 66 |     -- print informations
 67 |     maybe_print(t, loss, true)
 68 |   end
 69 | 
 70 |   local function maybe_save(t, isEnd)
 71 |     local should_save_intermed = params.save_iter > 0 and t % params.save_iter == 0
 72 |     local should_save_end = t == max_iter or isEnd
 73 |     if should_save_intermed or should_save_end then
 74 |       local filename = nil
 75 |       if isMultiPass then
 76 |         filename = build_OutFilename(params, frameIdx, runIdx)
 77 |       else
 78 |         filename = build_OutFilename(params, math.abs(frameIdx - params.start_number + 1), should_save_end and -1 or t)
 79 |       end
 80 |       save_image(img, filename)
 81 |     end
 82 |   end
 83 | 
 84 |   -- Function to evaluate loss and gradient. We run the net forward and
 85 |   -- backward to get the gradient, and sum up losses from the loss modules.
 86 |   -- optim.lbfgs internally handles iteration and calls this fucntion many
 87 |   -- times, so we manually count the number of iterations to handle printing
 88 |   -- and saving intermediate results.
 89 |   local num_calls = 0
 90 |   local function feval(x)
 91 |     num_calls = num_calls + 1
 92 |     net:forward(x)
 93 |     local grad = net:backward(x, dy)
 94 |     local loss = 0
 95 |     for _, mod in ipairs(content_losses) do
 96 |       loss = loss + mod.loss
 97 |     end
 98 |     for _, mod in ipairs(temporal_losses) do
 99 |       loss = loss + mod.loss
100 |     end
101 |     for _, mod in ipairs(style_losses) do
102 |       loss = loss + mod.loss
103 |     end
104 |     maybe_print(num_calls, loss, false)
105 |     -- Only need to print if single-pass algorithm is used.
106 |     if not isMultiPass then 
107 |       maybe_save(num_calls, false)
108 |     end
109 | 
110 |     collectgarbage()
111 |     -- optim.lbfgs expects a vector for gradients
112 |     return loss, grad:view(grad:nElement())
113 |   end
114 | 
115 |   start_time = os.time()
116 |   
117 |   -- Run optimization.
118 |   if params.optimizer == 'lbfgs' then
119 |     print('Running optimization with L-BFGS')
120 |     local x, losses = lbfgs_mod.optimize(feval, img, optim_state)
121 |   elseif params.optimizer == 'adam' then
122 |     print('Running optimization with ADAM')
123 |     for t = 1, max_iter do
124 |       local x, losses = optim.adam(feval, img, optim_state)
125 |     end
126 |   end
127 |   
128 |   end_time = os.time()
129 |   elapsed_time = os.difftime(end_time-start_time)
130 |   print("Running time: " .. elapsed_time .. "s")
131 |   
132 |   print_end(num_calls)
133 |   maybe_save(num_calls, true)
134 | end
135 | 
136 | -- Rebuild the network, insert style loss and return the indices for content and temporal loss
137 | function buildNet(cnn, params, style_images_caffe)
138 |    -- Handle style blending weights for multiple style inputs
139 |   local style_blend_weights = nil
140 |   if params.style_blend_weights == 'nil' then
141 |     -- Style blending not specified, so use equal weighting
142 |     style_blend_weights = {}
143 |     for i = 1, #style_images_caffe do
144 |       table.insert(style_blend_weights, 1.0)
145 |     end
146 |   else
147 |     style_blend_weights = params.style_blend_weights:split(',')
148 |     assert(#style_blend_weights == #style_images_caffe,
149 |       '-style_blend_weights and -style_images must have the same number of elements')
150 |   end
151 |   -- Normalize the style blending weights so they sum to 1
152 |   local style_blend_sum = 0
153 |   for i = 1, #style_blend_weights do
154 |     style_blend_weights[i] = tonumber(style_blend_weights[i])
155 |     style_blend_sum = style_blend_sum + style_blend_weights[i]
156 |   end
157 |   for i = 1, #style_blend_weights do
158 |     style_blend_weights[i] = style_blend_weights[i] / style_blend_sum
159 |   end
160 |   
161 |   local content_layers = params.content_layers:split(",")
162 |   local style_layers = params.style_layers:split(",")
163 |   -- Which layer to use for the temporal loss. By default, it uses a pixel based loss, masked by the certainty
164 |   --(indicated by initWeighted).
165 |   local temporal_layers = params.temporal_weight > 0 and {'initWeighted'} or {}
166 |   
167 |   local style_losses = {}
168 |   local contentLike_layers_indices = {}
169 |   local contentLike_layers_type = {}
170 |   
171 |   local next_content_i, next_style_i, next_temporal_i = 1, 1, 1
172 |   local current_layer_index = 1
173 |   local net = nn.Sequential()
174 |   
175 |   -- Set up pixel based loss.
176 |   if temporal_layers[next_temporal_i] == 'init' or temporal_layers[next_temporal_i] == 'initWeighted'  then
177 |     print("Setting up temporal consistency.")
178 |     table.insert(contentLike_layers_indices, current_layer_index)
179 |     table.insert(contentLike_layers_type,
180 |       (temporal_layers[next_temporal_i] == 'initWeighted') and 'prevPlusFlowWeighted' or 'prevPlusFlow')
181 |     next_temporal_i = next_temporal_i + 1
182 |   end
183 |   
184 |   -- Set up other loss modules.
185 |   -- For content loss, only remember the indices at which they are inserted, because the content changes for each frame.
186 |   if params.tv_weight > 0 then
187 |     local tv_mod = nn.TVLoss(params.tv_weight):float()
188 |     tv_mod = MaybePutOnGPU(tv_mod, params) 
189 |     net:add(tv_mod)
190 |     current_layer_index = current_layer_index + 1
191 |   end
192 |   for i = 1, #cnn do
193 |     if next_content_i <= #content_layers or next_style_i <= #style_layers or next_temporal_i <= #temporal_layers then
194 |       local layer = cnn:get(i)
195 |       local name = layer.name
196 |       local layer_type = torch.type(layer)
197 |       local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
198 |       if is_pooling and params.pooling == 'avg' then
199 |         assert(layer.padW == 0 and layer.padH == 0)
200 |         local kW, kH = layer.kW, layer.kH
201 |         local dW, dH = layer.dW, layer.dH
202 |         local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float()
203 |         avg_pool_layer = MaybePutOnGPU(avg_pool_layer, params)
204 |         local msg = 'Replacing max pooling at layer %d with average pooling'
205 |         print(string.format(msg, i))
206 |         net:add(avg_pool_layer)
207 |       else
208 |         net:add(layer)
209 |       end
210 |       current_layer_index = current_layer_index + 1
211 |       if name == content_layers[next_content_i] then
212 |         print("Setting up content layer", i, ":", layer.name)
213 |         table.insert(contentLike_layers_indices, current_layer_index)
214 |         table.insert(contentLike_layers_type, 'content')
215 |         next_content_i = next_content_i + 1
216 |       end
217 |       if name == temporal_layers[next_temporal_i] then
218 |         print("Setting up temporal layer", i, ":", layer.name)
219 |         table.insert(contentLike_layers_indices, current_layer_index)
220 |         table.insert(contentLike_layers_type, 'prevPlusFlow')
221 |         next_temporal_i = next_temporal_i + 1
222 |       end
223 |       if name == style_layers[next_style_i] then
224 |         print("Setting up style layer  ", i, ":", layer.name)
225 |         local gram = GramMatrix():float()
226 |         gram = MaybePutOnGPU(gram, params)
227 |         local target = nil
228 |         for i = 1, #style_images_caffe do
229 |           local target_features = net:forward(style_images_caffe[i]):clone()
230 |           local target_i = gram:forward(target_features):clone()
231 |           target_i:div(target_features:nElement())
232 |           target_i:mul(style_blend_weights[i])
233 |           if i == 1 then
234 |             target = target_i
235 |           else
236 |             target:add(target_i)
237 |           end
238 |         end
239 |         local norm = params.normalize_gradients
240 |         local loss_module = nn.StyleLoss(params.style_weight, target, norm):float()
241 |         loss_module = MaybePutOnGPU(loss_module, params)
242 |         net:add(loss_module)
243 |         current_layer_index = current_layer_index + 1
244 |         table.insert(style_losses, loss_module)
245 |         next_style_i = next_style_i + 1
246 |       end
247 |     end
248 |   end
249 |   return net, style_losses, contentLike_layers_indices, contentLike_layers_type
250 | end
251 | 
252 | --
253 | -- LOSS MODULES
254 | --
255 | 
256 | -- Define an nn Module to compute content loss in-place
257 | local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')
258 | 
259 | function ContentLoss:__init(strength, target, normalize)
260 |   parent.__init(self)
261 |   self.strength = strength
262 |   self.target = target
263 |   self.normalize = normalize or false
264 |   self.loss = 0
265 |   self.crit = nn.MSECriterion()
266 | end
267 | 
268 | function ContentLoss:updateOutput(input)
269 |   if input:nElement() == self.target:nElement() then
270 |     self.loss = self.crit:forward(input, self.target) * self.strength
271 |   else
272 |     print('WARNING: Skipping content loss')
273 |   end
274 |   self.output = input
275 |   return self.output
276 | end
277 | 
278 | function ContentLoss:updateGradInput(input, gradOutput)
279 |   if input:nElement() == self.target:nElement() then
280 |     self.gradInput = self.crit:backward(input, self.target)
281 |   end
282 |   if self.normalize then
283 |     self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
284 |   end
285 |   self.gradInput:mul(self.strength)
286 |   self.gradInput:add(gradOutput)
287 |   return self.gradInput
288 | end
289 | 
290 | -- Define an nn Module to compute content loss in-place
291 | local WeightedContentLoss, parent = torch.class('nn.WeightedContentLoss', 'nn.Module')
292 | 
293 | function WeightedContentLoss:__init(strength, target, weights, normalize, loss_criterion)
294 |   parent.__init(self)
295 |   self.strength = strength
296 |   if weights ~= nil then
297 |     -- Take square root of the weights, because of the way the weights are applied
298 |     -- to the mean square error function. We want w*(error^2), but we can only
299 |     -- do (w*error)^2 = w^2 * error^2
300 |     self.weights = torch.sqrt(weights)
301 |     self.target = torch.cmul(target, self.weights)
302 |   else
303 |     self.target = target
304 |     self.weights = nil
305 |   end
306 |   self.normalize = normalize or false
307 |   self.loss = 0
308 |   if loss_criterion == 'mse' then
309 |     self.crit = nn.MSECriterion()
310 |   elseif loss_criterion == 'smoothl1' then
311 |     self.crit = nn.SmoothL1Criterion()
312 |   else
313 |     print('WARNING: Unknown flow loss criterion. Using MSE.')
314 |     self.crit = nn.MSECriterion()
315 |   end
316 | end
317 | 
318 | function WeightedContentLoss:updateOutput(input)
319 |   if input:nElement() == self.target:nElement() then
320 |     self.loss = self.crit:forward(input, self.target) * self.strength
321 |     if self.weights ~= nil then
322 |       self.loss = self.crit:forward(torch.cmul(input, self.weights), self.target) * self.strength
323 |     else
324 |       self.loss = self.crit:forward(input, self.target) * self.strength
325 |     end
326 |   else
327 |     print('WARNING: Skipping content loss')
328 |   end
329 |   self.output = input
330 |   return self.output
331 | end
332 | 
333 | function WeightedContentLoss:updateGradInput(input, gradOutput)
334 |   if input:nElement() == self.target:nElement() then
335 |     if self.weights ~= nil then
336 |       self.gradInput = self.crit:backward(torch.cmul(input, self.weights), self.target)
337 |     else
338 |       self.gradInput = self.crit:backward(input, self.target)
339 |     end
340 |   end
341 |   if self.normalize then
342 |     self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
343 |   end
344 |   self.gradInput:mul(self.strength)
345 |   self.gradInput:add(gradOutput)
346 |   return self.gradInput
347 | end
348 | 
349 | -- Returns a network that computes the CxC Gram matrix from inputs
350 | -- of size C x H x W
351 | function GramMatrix()
352 |   local net = nn.Sequential()
353 |   net:add(nn.View(-1):setNumInputDims(2))
354 |   local concat = nn.ConcatTable()
355 |   concat:add(nn.Identity())
356 |   concat:add(nn.Identity())
357 |   net:add(concat)
358 |   net:add(nn.MM(false, true))
359 |   return net
360 | end
361 | 
362 | 
363 | -- Define an nn Module to compute style loss in-place
364 | local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')
365 | 
366 | function StyleLoss:__init(strength, target, normalize)
367 |   parent.__init(self)
368 |   self.normalize = normalize or false
369 |   self.strength = strength
370 |   self.target = target
371 |   self.loss = 0
372 |   
373 |   self.gram = GramMatrix()
374 |   self.G = nil
375 |   self.crit = nn.MSECriterion()
376 | end
377 | 
378 | function StyleLoss:updateOutput(input)
379 |   self.G = self.gram:forward(input)
380 |   self.G:div(input:nElement())
381 |   self.loss = self.crit:forward(self.G, self.target)
382 |   self.loss = self.loss * self.strength
383 |   self.output = input
384 |   return self.output
385 | end
386 | 
387 | function StyleLoss:updateGradInput(input, gradOutput)
388 |   local dG = self.crit:backward(self.G, self.target)
389 |   dG:div(input:nElement())
390 |   self.gradInput = self.gram:backward(input, dG)
391 |   if self.normalize then
392 |     self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
393 |   end
394 |   self.gradInput:mul(self.strength)
395 |   self.gradInput:add(gradOutput)
396 |   return self.gradInput
397 | end
398 | 
399 | 
400 | local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')
401 | 
402 | function TVLoss:__init(strength)
403 |   parent.__init(self)
404 |   self.strength = strength
405 |   self.x_diff = torch.Tensor()
406 |   self.y_diff = torch.Tensor()
407 | end
408 | 
409 | function TVLoss:updateOutput(input)
410 |   self.output = input
411 |   return self.output
412 | end
413 | 
414 | -- TV loss backward pass inspired by kaishengtai/neuralart
415 | function TVLoss:updateGradInput(input, gradOutput)
416 |   self.gradInput:resizeAs(input):zero()
417 |   local C, H, W = input:size(1), input:size(2), input:size(3)
418 |   self.x_diff:resize(3, H - 1, W - 1)
419 |   self.y_diff:resize(3, H - 1, W - 1)
420 |   self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
421 |   self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
422 |   self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
423 |   self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
424 |   self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
425 |   self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
426 |   self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
427 |   self.gradInput:mul(self.strength)
428 |   self.gradInput:add(gradOutput)
429 |   return self.gradInput
430 | end
431 | 
432 | function getContentLossModuleForLayer(net, layer_idx, target_img, params)
433 |   local tmpNet = nn.Sequential()
434 |   for i = 1, layer_idx-1 do
435 |     local layer = net:get(i)
436 |     tmpNet:add(layer)
437 |   end
438 |   local target = tmpNet:forward(target_img):clone()
439 |   local loss_module = nn.ContentLoss(params.content_weight, target, params.normalize_gradients):float()
440 |   loss_module = MaybePutOnGPU(loss_module, params)
441 |   return loss_module
442 | end
443 | 
444 | function getWeightedContentLossModuleForLayer(net, layer_idx, target_img, params, weights)
445 |   local tmpNet = nn.Sequential()
446 |   for i = 1, layer_idx-1 do
447 |     local layer = net:get(i)
448 |     tmpNet:add(layer)
449 |   end
450 |   local target = tmpNet:forward(target_img):clone()
451 |   local loss_module = nn.WeightedContentLoss(params.temporal_weight, target, weights,
452 |       params.normalize_gradients, params.temporal_loss_criterion):float()
453 |   loss_module = MaybePutOnGPU(loss_module, params)
454 |   return loss_module
455 | end
456 | 
457 | ---
458 | --- HELPER FUNCTIONS
459 | ---
460 | 
461 | function MaybePutOnGPU(obj, params)
462 |   if params.gpu >= 0 then
463 |     if params.backend ~= 'clnn' then
464 |       return obj:cuda()
465 |     else
466 |       return obj:cl()
467 |     end
468 |   end
469 |   return obj
470 | end
471 | 
472 | -- Preprocess an image before passing it to a Caffe model.
473 | -- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
474 | -- and subtract the mean pixel.
475 | function preprocess(img)
476 |   local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
477 |   local perm = torch.LongTensor{3, 2, 1}
478 |   img = img:index(1, perm):mul(256.0)
479 |   mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
480 |   img:add(-1, mean_pixel)
481 |   return img
482 | end
483 | 
484 | -- Undo the above preprocessing.
485 | function deprocess(img)
486 |   local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
487 |   mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
488 |   img = img + mean_pixel
489 |   local perm = torch.LongTensor{3, 2, 1}
490 |   img = img:index(1, perm):div(256.0)
491 |   return img
492 | end
493 | 
494 | function save_image(img, fileName)
495 |   local disp = deprocess(img:double())
496 |   disp = image.minmax{tensor=disp, min=0, max=1}
497 |   image.save(fileName, disp)
498 | end
499 | 
500 | -- Checks whether a table contains a specific value
501 | function tabl_contains(tabl, val)
502 |    for i=1,#tabl do
503 |       if tabl[i] == val then 
504 |          return true
505 |       end
506 |    end
507 |    return false
508 | end
509 | 
510 | -- Sums up all element in a given table
511 | function tabl_sum(t)
512 |   local sum = t[1]:clone()
513 |   for i=2, #t do
514 |     sum:add(t[i])
515 |   end
516 |   return sum
517 | end
518 | 
519 | function str_split(str, delim, maxNb)
520 |     -- Eliminate bad cases...
521 |     if string.find(str, delim) == nil then
522 |         return { str }
523 |     end
524 |     if maxNb == nil or maxNb < 1 then
525 |         maxNb = 0    -- No limit
526 |     end
527 |     local result = {}
528 |     local pat = "(.-)" .. delim .. "()"
529 |     local nb = 1
530 |     local lastPos
531 |     for part, pos in string.gfind(str, pat) do
532 |         result[nb] = part
533 |         lastPos = pos
534 |         nb = nb + 1
535 |         if nb == maxNb then break end
536 |     end
537 |     -- Handle the last field
538 |     result[nb] = string.sub(str, lastPos)
539 |     return result
540 | end
541 | 
542 | function fileExists(name)
543 |    local f=io.open(name,"r")
544 |    if f~=nil then io.close(f) return true else return false end
545 | end
546 | 
547 | function calcNumberOfContentImages(params)
548 |   local frameIdx = 1
549 |   while frameIdx < 100000 do
550 |     local fileName = string.format(params.content_pattern, frameIdx + params.start_number)
551 |     if not fileExists(fileName) then return frameIdx end
552 |     frameIdx = frameIdx + 1
553 |   end
554 |   -- If there are too many content frames, something may be wrong.
555 |   return 0
556 | end
557 | 
558 | function build_OutFilename(params, image_number, iterationOrRun)
559 |   local ext = paths.extname(params.output_image)
560 |   local basename = paths.basename(params.output_image, ext)
561 |   local fileNameBase = '%s%s-' .. params.number_format
562 |   if iterationOrRun == -1 then
563 |     return string.format(fileNameBase .. '.%s',
564 |       params.output_folder, basename, image_number, ext)
565 |   else
566 |     return string.format(fileNameBase .. '_%d.%s',
567 |       params.output_folder, basename, image_number, iterationOrRun, ext)
568 |   end
569 | end
570 | 
571 | function getFormatedFlowFileName(pattern, fromIndex, toIndex)
572 |   local flowFileName = pattern
573 |   flowFileName = string.gsub(flowFileName, '{(.-)}',
574 |     function(a) return string.format(a, fromIndex) end )
575 |   flowFileName = string.gsub(flowFileName, '%[(.-)%]',
576 |     function(a) return string.format(a, toIndex) end )
577 |   return flowFileName
578 | end
579 | 
580 | function getContentImage(frameIdx, params)
581 |   local fileName = string.format(params.content_pattern, frameIdx)
582 |   if not fileExists(fileName) then return nil end
583 |   local content_image = image.load(string.format(params.content_pattern, frameIdx), 3)
584 |   content_image = preprocess(content_image):float()
585 |   content_image = MaybePutOnGPU(content_image, params)
586 |   return content_image
587 | end
588 | 
589 | function getStyleImages(params)
590 |   -- Needed to read content image size
591 |   local firstContentImg = image.load(string.format(params.content_pattern, params.start_number), 3)
592 |   local style_image_list = params.style_image:split(',')
593 |   local style_images_caffe = {}
594 |   for _, img_path in ipairs(style_image_list) do
595 |     local img = image.load(img_path, 3)
596 |     -- Scale the style image so that it's area equals the area of the content image multiplied by the style scale.
597 |     local img_scale = math.sqrt(firstContentImg:size(2) * firstContentImg:size(3) / (img:size(3) * img:size(2)))
598 |         * params.style_scale
599 |     img = image.scale(img, img:size(3) * img_scale, img:size(2) * img_scale, 'bilinear')
600 |     print("Style image size: " .. img:size(3) .. " x " .. img:size(2))
601 |     local img_caffe = preprocess(img):float()
602 |     table.insert(style_images_caffe, img_caffe)
603 |   end
604 | 
605 |   for i = 1, #style_images_caffe do
606 |      style_images_caffe[i] = MaybePutOnGPU(style_images_caffe[i], params)
607 |   end
608 |  
609 |   return style_images_caffe
610 | end
611 | 


--------------------------------------------------------------------------------
/consistencyChecker/CTensor4D.h:
--------------------------------------------------------------------------------
  1 | // CTensor4D
  2 | // A four-dimensional array
  3 | //
  4 | // Author: Thomas Brox
  5 | // Last change: 05.11.2001
  6 | //-------------------------------------------------------------------------
  7 | // Note:
  8 | // There is a difference between the GNU Compiler's STL and the standard
  9 | // concerning the definition and usage of string streams as well as substrings.
 10 | // Thus if using a GNU Compiler you should write #define GNU_COMPILER at the
 11 | // beginning of your program.
 12 | //
 13 | // Another Note:
 14 | // Linker problems occured in connection with <vector> from the STL.
 15 | // In this case you should include this file in a namespace.
 16 | // Example:
 17 | // namespace NTensor4D {
 18 | //   #include <CTensor4D.h>
 19 | // }
 20 | // After including other packages you can then write:
 21 | // using namespace NTensor4D;
 22 | 
 23 | #ifndef CTENSOR4D_H
 24 | #define CTENSOR4D_H
 25 | 
 26 | #include <iostream>
 27 | #include <fstream>
 28 | #include <string>
 29 | #ifdef GNU_COMPILER
 30 |   #include <strstream>
 31 | #else
 32 |   #include <sstream>
 33 | #endif
 34 | #include "CTensor.h"
 35 | 
 36 | template <class T>
 37 | class CTensor4D {
 38 | public:
 39 |   // constructor
 40 |   inline CTensor4D();
 41 |   inline CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize);
 42 |   // copy constructor
 43 |   CTensor4D(const CTensor4D<T>& aCopyFrom);
 44 |   // constructor with implicit filling
 45 |   CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize, const T aFillValue);
 46 |   // destructor
 47 |   virtual ~CTensor4D();
 48 | 
 49 |   // Changes the size of the tensor, data will be lost
 50 |   void setSize(int aXSize, int aYSize, int aZSize, int aASize);
 51 |   // Downsamples the tensor
 52 |   void downsample(int aNewXSize, int aNewYSize);
 53 |   void downsample(int aNewXSize, int aNewYSize, int aNewZSize);
 54 |   // Upsamples the tensor
 55 |   void upsample(int aNewXSize, int aNewYSize);
 56 |   void upsampleBilinear(int aNewXSize, int aNewYSize);
 57 |   void upsampleTrilinear(int aNewXSize, int aNewYSize, int aNewZSize);
 58 |   // Fills the tensor with the value aValue (see also operator =)
 59 |   void fill(const T aValue);
 60 |   // Copies a box from the tensor into aResult, the size of aResult will be adjusted
 61 |   void cut(CTensor4D<T>& aResult, int x1, int y1, int z1, int a1, int x2, int y2, int z2, int a2);
 62 |   // Reads data from a list of PPM or PGM files given in a text file
 63 |   void readFromFile(char* aFilename);
 64 |   // Writes a set of colour images to a large PPM image
 65 |   void writeToPPM(const char* aFilename, int aCols = 0, int aRows = 0);
 66 | 
 67 |   // Gives full access to tensor's values
 68 |   inline T& operator()(const int ax, const int ay, const int az, const int aa) const;
 69 |   // Read access with bilinear interpolation
 70 |   CVector<T> operator()(const float ax, const float ay, const int aa) const;
 71 |   // Fills the tensor with the value aValue (equivalent to fill())
 72 |   inline CTensor4D<T>& operator=(const T aValue);
 73 |   // Copies the tensor aCopyFrom to this tensor (size of tensor might change)
 74 |   CTensor4D<T>& operator=(const CTensor4D<T>& aCopyFrom);
 75 |   // Multiplication with a scalar
 76 |   CTensor4D<T>& operator*=(const T aValue);
 77 |   // Component-wise addition
 78 |   CTensor4D<T>& operator+=(const CTensor4D<T>& aTensor);
 79 | 
 80 |   // Gives access to the tensor's size
 81 |   inline int xSize() const;
 82 |   inline int ySize() const;
 83 |   inline int zSize() const;
 84 |   inline int aSize() const;
 85 |   inline int size() const;
 86 |   // Returns the aath layer of the 4D-tensor as 3D-tensor
 87 |   CTensor<T> getTensor3D(const int aa) const;
 88 |   // Removes one dimension and returns the resulting 3D-tensor
 89 |   void getTensor3D(CTensor<T>& aTensor, int aIndex, int aDim = 3) const;
 90 |   // Copies the components of a 3D-tensor in the aDimth layer of the 4D-tensor
 91 |   void putTensor3D(CTensor<T>& aTensor, int aIndex, int aDim = 3);
 92 |     // Removes two dimensions and returns the resulting matrix
 93 |   void getMatrix(CMatrix<T>& aMatrix, int aZIndex, int aAIndex) const;
 94 |   // Copies the components of a 3D-tensor in the aDimth layer of the 4D-tensor
 95 |   void putMatrix(CMatrix<T>& aMatrix, int aZIndex, int aAIndex);
 96 |   // Gives access to the internal data representation (use sparingly)
 97 |   inline T* data() const;
 98 | protected:
 99 |   int mXSize,mYSize,mZSize,mASize;
100 |   T *mData;
101 | };
102 | 
103 | // Provides basic output functionality (only appropriate for very small tensors)
104 | template <class T> std::ostream& operator<<(std::ostream& aStream, const CTensor4D<T>& aTensor);
105 | 
106 | // Exceptions thrown by CTensor-------------------------------------------------
107 | 
108 | // Thrown when one tries to access an element of a tensor which is out of
109 | // the tensor's bounds
110 | struct ETensor4DRangeOverflow {
111 |   ETensor4DRangeOverflow(const int ax, const int ay, const int az, const int aa) {
112 |     using namespace std;
113 |     cerr << "Exception ETensor4DRangeOverflow: x = " << ax << ", y = " << ay << ", z = " << az << ", a = " << aa << endl;
114 |   }
115 | };
116 | 
117 | // Thrown from getTensor3D if the parameter's size does not match with the size
118 | // of this tensor
119 | struct ETensor4DIncompatibleSize {
120 |   ETensor4DIncompatibleSize(int ax, int ay, int az, int ax2, int ay2, int az2) {
121 |     using namespace std;
122 |     cerr << "Exception ETensor4DIncompatibleSize: x = " << ax << ":" << ax2;
123 |     cerr << ", y = " << ay << ":" << ay2;
124 |     cerr << ", z = " << az << ":" << az2 << endl;
125 |   }
126 | };
127 | 
128 | // Thrown from readFromFile if the file format is unknown
129 | struct ETensor4DInvalidFileFormat {
130 |   ETensor4DInvalidFileFormat() {
131 |     using namespace std;
132 |     cerr << "Exception ETensor4DInvalidFileFormat" << endl;
133 |   }
134 | };
135 | 
136 | // I M P L E M E N T A T I O N --------------------------------------------
137 | //
138 | // You might wonder why there is implementation code in a header file.
139 | // The reason is that not all C++ compilers yet manage separate compilation
140 | // of templates. Inline functions cannot be compiled separately anyway.
141 | // So in this case the whole implementation code is added to the header
142 | // file.
143 | // Users of CTensor4D should ignore everything that's beyond this line :)
144 | // ------------------------------------------------------------------------
145 | 
146 | // P U B L I C ------------------------------------------------------------
147 | 
148 | // constructor
149 | template <class T>
150 | inline CTensor4D<T>::CTensor4D() {
151 |   mData = 0; mXSize = 0; mYSize = 0; mZSize = 0; mASize = 0;
152 | }
153 | 
154 | // constructor
155 | template <class T>
156 | inline CTensor4D<T>::CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize)
157 |   : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize), mASize(aASize) {
158 |   mData = new T[aXSize*aYSize*aZSize*aASize];
159 | }
160 | 
161 | // copy constructor
162 | template <class T>
163 | CTensor4D<T>::CTensor4D(const CTensor4D<T>& aCopyFrom)
164 |   : mXSize(aCopyFrom.mXSize), mYSize(aCopyFrom.mYSize), mZSize(aCopyFrom.mZSize), mASize(aCopyFrom.mASize) {
165 |   int wholeSize = mXSize*mYSize*mZSize*mASize;
166 |   mData = new T[wholeSize];
167 |   for (register int i = 0; i < wholeSize; i++)
168 |     mData[i] = aCopyFrom.mData[i];
169 | }
170 | 
171 | // constructor with implicit filling
172 | template <class T>
173 | CTensor4D<T>::CTensor4D(const int aXSize, const int aYSize, const int aZSize, const int aASize, const T aFillValue)
174 |   : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize), mASize(aASize) {
175 |   mData = new T[aXSize*aYSize*aZSize*aASize];
176 |   fill(aFillValue);
177 | }
178 | 
179 | // destructor
180 | template <class T>
181 | CTensor4D<T>::~CTensor4D() {
182 |   delete[] mData;
183 | }
184 | 
185 | // setSize
186 | template <class T>
187 | void CTensor4D<T>::setSize(int aXSize, int aYSize, int aZSize, int aASize) {
188 |   if (mData != 0) delete[] mData;
189 |   mData = new T[aXSize*aYSize*aZSize*aASize];
190 |   mXSize = aXSize;
191 |   mYSize = aYSize;
192 |   mZSize = aZSize;
193 |   mASize = aASize;
194 | }
195 | 
196 | //downsample
197 | template <class T>
198 | void CTensor4D<T>::downsample(int aNewXSize, int aNewYSize) {
199 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize];
200 |   int aSize = aNewXSize*aNewYSize;
201 |   for (int a = 0; a < mASize; a++)
202 |     for (int z = 0; z < mZSize; z++) {
203 |       CMatrix<T> aTemp(mXSize,mYSize);
204 |       getMatrix(aTemp,z,a);
205 |       aTemp.downsample(aNewXSize,aNewYSize);
206 |       for (int i = 0; i < aSize; i++)
207 |         mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i];
208 |     }
209 |   delete[] mData;
210 |   mData = mData2;
211 |   mXSize = aNewXSize;
212 |   mYSize = aNewYSize;
213 | }
214 | 
215 | template <class T>
216 | void CTensor4D<T>::downsample(int aNewXSize, int aNewYSize, int aNewZSize) {
217 |   T* mData2 = new T[aNewXSize*aNewYSize*aNewZSize*mASize];
218 |   int aSize = aNewXSize*aNewYSize*aNewZSize;
219 |   for (int a = 0; a < mASize; a++) {
220 |     CTensor<T> aTemp(mXSize,mYSize,mZSize);
221 |     getTensor3D(aTemp,a);
222 |     aTemp.downsample(aNewXSize,aNewYSize,aNewZSize);
223 |     for (int i = 0; i < aSize; i++)
224 |       mData2[i+a*aSize] = aTemp.data()[i];
225 |   }
226 |   delete[] mData;
227 |   mData = mData2;
228 |   mXSize = aNewXSize;
229 |   mYSize = aNewYSize;
230 |   mZSize = aNewZSize;
231 | }
232 | 
233 | // upsample
234 | template <class T>
235 | void CTensor4D<T>::upsample(int aNewXSize, int aNewYSize) {
236 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize];
237 |   int aSize = aNewXSize*aNewYSize;
238 |   for (int a = 0; a < mASize; a++)
239 |     for (int z = 0; z < mZSize; z++) {
240 |       CMatrix<T> aTemp(mXSize,mYSize);
241 |       getMatrix(aTemp,z,a);
242 |       aTemp.upsample(aNewXSize,aNewYSize);
243 |       for (int i = 0; i < aSize; i++)
244 |         mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i];
245 |     }
246 |   delete[] mData;
247 |   mData = mData2;
248 |   mXSize = aNewXSize;
249 |   mYSize = aNewYSize;
250 | }
251 | 
252 | // upsampleBilinear
253 | template <class T>
254 | void CTensor4D<T>::upsampleBilinear(int aNewXSize, int aNewYSize) {
255 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize*mASize];
256 |   int aSize = aNewXSize*aNewYSize;
257 |   for (int a = 0; a < mASize; a++)
258 |     for (int z = 0; z < mZSize; z++) {
259 |       CMatrix<T> aTemp(mXSize,mYSize);
260 |       getMatrix(aTemp,z,a);
261 |       aTemp.upsampleBilinear(aNewXSize,aNewYSize);
262 |       for (int i = 0; i < aSize; i++)
263 |         mData2[i+(a*mZSize+z)*aSize] = aTemp.data()[i];
264 |     }
265 |   delete[] mData;
266 |   mData = mData2;
267 |   mXSize = aNewXSize;
268 |   mYSize = aNewYSize;
269 | }
270 | 
271 | // upsampleTrilinear
272 | template <class T>
273 | void CTensor4D<T>::upsampleTrilinear(int aNewXSize, int aNewYSize, int aNewZSize) {
274 |   T* mData2 = new T[aNewXSize*aNewYSize*aNewZSize*mASize];
275 |   int aSize = aNewXSize*aNewYSize*aNewZSize;
276 |   for (int a = 0; a < mASize; a++) {
277 |     CTensor<T> aTemp(mXSize,mYSize,mZSize);
278 |     getTensor3D(aTemp,a);
279 |     aTemp.upsampleTrilinear(aNewXSize,aNewYSize,aNewZSize);
280 |     for (int i = 0; i < aSize; i++)
281 |       mData2[i+a*aSize] = aTemp.data()[i];
282 |   }
283 |   delete[] mData;
284 |   mData = mData2;
285 |   mXSize = aNewXSize;
286 |   mYSize = aNewYSize;
287 |   mZSize = aNewZSize;
288 | }
289 | 
290 | // fill
291 | template <class T>
292 | void CTensor4D<T>::fill(const T aValue) {
293 |   int wholeSize = mXSize*mYSize*mZSize*mASize;
294 |   for (register int i = 0; i < wholeSize; i++)
295 |     mData[i] = aValue;
296 | }
297 | 
298 | // cut
299 | template <class T>
300 | void CTensor4D<T>::cut(CTensor4D<T>& aResult, int x1, int y1, int z1, int a1, int x2, int y2, int z2, int a2) {
301 |   aResult.mXSize = x2-x1+1;
302 |   aResult.mYSize = y2-y1+1;
303 |   aResult.mZSize = z2-z1+1;
304 |   aResult.mASize = a2-a1+1;
305 |   delete[] aResult.mData;
306 |   aResult.mData = new T[aResult.mXSize*aResult.mYSize*aResult.mZSize*aResult.mASize];
307 |   for (int a = a1; a <= a2; a++)
308 |     for (int z = z1; z <= z2; z++)
309 |       for (int y = y1; y <= y2; y++)
310 |         for (int x = x1; x <= x2; x++)
311 |           aResult(x-x1,y-y1,z-z1,a-a1) = operator()(x,y,z,a);
312 | }
313 | 
314 | // readFromFile
315 | template <class T>
316 | void CTensor4D<T>::readFromFile(char* aFilename) {
317 |   if (mData != 0) delete[] mData;
318 |   std::string s;
319 |   std::string aPath = aFilename;
320 |   aPath.erase(aPath.find_last_of('\\')+1,100);
321 |   mASize = 0;
322 |   {
323 |     std::ifstream aStream(aFilename);
324 |     while (!aStream.eof()) {
325 |       aStream >> s;
326 |       if (s != "") {
327 |         mASize++;
328 |         if (mASize == 1) {
329 |           s.erase(0,s.find_last_of('.'));
330 |           if (s == ".ppm" || s == ".PPM") mZSize = 3;
331 |           else if (s == ".pgm" || s == ".PGM") mZSize = 1;
332 |           else throw ETensor4DInvalidFileFormat();
333 |         }
334 |       }
335 |     }
336 |   }
337 |   std::ifstream aStream(aFilename);
338 |   aStream >> s;
339 |   s = aPath+s;
340 |   // PGM
341 |   if (mZSize == 1) {
342 |     CMatrix<float> aTemp;
343 |     aTemp.readFromPGM(s.c_str());
344 |     mXSize = aTemp.xSize();
345 |     mYSize = aTemp.ySize();
346 |     int aSize = mXSize*mYSize;
347 |     mData = new T[aSize*mASize];
348 |     for (int i = 0; i < aSize; i++)
349 |       mData[i] = aTemp.data()[i];
350 |     for (int a = 1; a < mASize; a++) {
351 |       aStream >> s;
352 |       s = aPath+s;
353 |       aTemp.readFromPGM(s.c_str());
354 |       for (int i = 0; i < aSize; i++)
355 |         mData[i+a*aSize] = aTemp.data()[i];
356 |     }
357 |   }
358 |   // PPM
359 |   else {
360 |     CTensor<float> aTemp;
361 |     aTemp.readFromPPM(s.c_str());
362 |     mXSize = aTemp.xSize();
363 |     mYSize = aTemp.ySize();
364 |     int aSize = 3*mXSize*mYSize;
365 |     mData = new T[aSize*mASize];
366 |     for (int i = 0; i < aSize; i++)
367 |       mData[i] = aTemp.data()[i];
368 |     for (int a = 1; a < mASize; a++) {
369 |       aStream >> s;
370 |       s = aPath+s;
371 |       aTemp.readFromPPM(s.c_str());
372 |       for (int i = 0; i < aSize; i++)
373 |         mData[i+a*aSize] = aTemp.data()[i];
374 |     }
375 |   }
376 | }
377 | 
378 | // writeToPPM
379 | template <class T>
380 | void CTensor4D<T>::writeToPPM(const char* aFilename, int aCols, int aRows) {
381 |   int rows = (int)floor(sqrt(mASize));
382 |   if (aRows != 0) rows = aRows;
383 |   int cols = (int)ceil(mASize*1.0/rows);
384 |   if (aCols != 0) cols = aCols;
385 |   FILE* outimage = fopen(aFilename, "wb");
386 |   fprintf(outimage, "P6 \n");
387 |   fprintf(outimage, "%ld %ld \n255\n", cols*mXSize,rows*mYSize);
388 |   for (int r = 0; r < rows; r++)
389 |     for (int y = 0; y < mYSize; y++)
390 |       for (int c = 0; c < cols; c++)
391 |         for (int x = 0; x < mXSize; x++) {
392 |           unsigned char aHelp;
393 |           if (r*cols+c >= mASize) aHelp = 0;
394 |           else aHelp = (unsigned char)operator()(x,y,0,r*cols+c);
395 |           fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
396 |           if (r*cols+c >= mASize) aHelp = 0;
397 |           else aHelp = (unsigned char)operator()(x,y,1,r*cols+c);
398 |           fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
399 |           if (r*cols+c >= mASize) aHelp = 0;
400 |           else aHelp = (unsigned char)operator()(x,y,2,r*cols+c);
401 |           fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
402 |         }
403 |   fclose(outimage);
404 | }
405 | 
406 | // operator ()
407 | template <class T>
408 | inline T& CTensor4D<T>::operator()(const int ax, const int ay, const int az, const int aa) const {
409 |   #ifdef DEBUG
410 |     if (ax >= mXSize || ay >= mYSize || az >= mZSize || aa >= mASize || ax < 0 || ay < 0 || az < 0 || aa < 0)
411 |       throw ETensorRangeOverflow(ax,ay,az,aa);
412 |   #endif
413 |   return mData[mXSize*(mYSize*(mZSize*aa+az)+ay)+ax];
414 | }
415 | 
416 | template <class T>
417 | CVector<T> CTensor4D<T>::operator()(const float ax, const float ay, const int aa) const {
418 |   CVector<T> aResult(mZSize);
419 |   int x1 = (int)ax;
420 |   int y1 = (int)ay;
421 |   int x2 = x1+1;
422 |   int y2 = y1+1;
423 |   #ifdef _DEBUG
424 |   if (x2 >= mXSize || y2 >= mYSize || x1 < 0 || y1 < 0) throw ETensorRangeOverflow(ax,ay,0);
425 |   #endif
426 |   float alphaX = ax-x1; float alphaXTrans = 1.0-alphaX;
427 |   float alphaY = ay-y1; float alphaYTrans = 1.0-alphaY;
428 |   for (int k = 0; k < mZSize; k++) {
429 |     float a = alphaXTrans*operator()(x1,y1,k,aa)+alphaX*operator()(x2,y1,k,aa);
430 |     float b = alphaXTrans*operator()(x1,y2,k,aa)+alphaX*operator()(x2,y2,k,aa);
431 |     aResult(k) = alphaYTrans*a+alphaY*b;
432 |   }
433 |   return aResult;
434 | }
435 | 
436 | // operator =
437 | template <class T>
438 | inline CTensor4D<T>& CTensor4D<T>::operator=(const T aValue) {
439 |   fill(aValue);
440 |   return *this;
441 | }
442 | 
443 | template <class T>
444 | CTensor4D<T>& CTensor4D<T>::operator=(const CTensor4D<T>& aCopyFrom) {
445 |   if (this != &aCopyFrom) {
446 |     if (mData != 0) delete[] mData;
447 |     mXSize = aCopyFrom.mXSize;
448 |     mYSize = aCopyFrom.mYSize;
449 |     mZSize = aCopyFrom.mZSize;
450 |     mASize = aCopyFrom.mASize;
451 |     int wholeSize = mXSize*mYSize*mZSize*mASize;
452 |     mData = new T[wholeSize];
453 |     for (register int i = 0; i < wholeSize; i++)
454 |       mData[i] = aCopyFrom.mData[i];
455 |   }
456 |   return *this;
457 | }
458 | 
459 | // operator *=
460 | template <class T>
461 | CTensor4D<T>& CTensor4D<T>::operator*=(const T aValue) {
462 |   int wholeSize = mXSize*mYSize*mZSize*mASize;
463 |   for (int i = 0; i < wholeSize; i++)
464 |     mData[i] *= aValue;
465 |   return *this;
466 | }
467 | 
468 | // operator +=
469 | template <class T>
470 | CTensor4D<T>& CTensor4D<T>::operator+=(const CTensor4D<T>& aTensor) {
471 |   #ifdef _DEBUG
472 |   if (mXSize != aTensor.mXSize || mYSize != aTensor.mYSize || mZSize != aTensor.mZSize || mASize != aTensor.mASize)
473 |     throw ETensorIncompatibleSize(mXSize,mYSize,mZSize);
474 |   #endif
475 |   int wholeSize = size();
476 |   for (int i = 0; i < wholeSize; i++)
477 |     mData[i] += aTensor.mData[i];
478 |   return *this;
479 | }
480 | 
481 | // xSize
482 | template <class T>
483 | inline int CTensor4D<T>::xSize() const {
484 | 
485 |   return mXSize;
486 | }
487 | 
488 | // ySize
489 | template <class T>
490 | inline int CTensor4D<T>::ySize() const {
491 |   return mYSize;
492 | }
493 | 
494 | // zSize
495 | template <class T>
496 | inline int CTensor4D<T>::zSize() const {
497 |   return mZSize;
498 | }
499 | 
500 | // aSize
501 | template <class T>
502 | inline int CTensor4D<T>::aSize() const {
503 |   return mASize;
504 | }
505 | 
506 | // size
507 | template <class T>
508 | inline int CTensor4D<T>::size() const {
509 |   return mXSize*mYSize*mZSize*mASize;
510 | }
511 | 
512 | // getTensor3D
513 | template <class T>
514 | CTensor<T> CTensor4D<T>::getTensor3D(const int aa) const {
515 |   CTensor<T> aTemp(mXSize,mYSize,mZSize);
516 |   int aTensorSize = mXSize*mYSize*mZSize;
517 |   int aOffset = aa*aTensorSize;
518 |   for (int i = 0; i < aTensorSize; i++)
519 |     aTemp.data()[i] = mData[i+aOffset];
520 |   return aTemp;
521 | }
522 | 
523 | // getTensor3D
524 | template <class T>
525 | void CTensor4D<T>::getTensor3D(CTensor<T>& aTensor, int aIndex, int aDim) const {
526 |   int aSize;
527 |   int aOffset;
528 |   switch (aDim) {
529 |   case 3:
530 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mZSize)
531 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mZSize);
532 |     aSize = mXSize*mYSize*mZSize;
533 |     aOffset = aIndex*aSize;
534 |     for (int i = 0; i < aSize; i++)
535 |       aTensor.data()[i] = mData[i+aOffset];
536 |     break;
537 |   case 2:
538 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mASize)
539 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mASize);
540 |     aSize = mXSize*mYSize;
541 |     aOffset = aIndex*aSize;
542 |     for (int a = 0; a < mASize; a++) 
543 |       for (int i = 0; i < aSize; i++)
544 |         aTensor.data()[i+a*aSize] = mData[i+aOffset+a*aSize*mZSize];
545 |     break;
546 |   case 1:
547 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize)
548 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mZSize,mASize);
549 |     for (int a = 0; a < mASize; a++)
550 |       for (int z = 0; z < mZSize; z++)
551 |         for (int x = 0; x < mXSize; x++)
552 |           aTensor(x,z,a) = operator()(x,aIndex,z,a);
553 |     break;
554 |   case 0:
555 |     if (aTensor.xSize() != mYSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize)
556 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mYSize,mZSize,mASize);
557 |     for (int a = 0; a < mASize; a++)
558 |       for (int z = 0; z < mZSize; z++)
559 |         for (int y = 0; y < mYSize; y++)
560 |           aTensor(y,z,a) = operator()(aIndex,y,z,a);
561 |     break;
562 |   default: getTensor3D(aTensor,aIndex);
563 |   }
564 | }
565 | 
566 | // putTensor3D
567 | template <class T>
568 | void CTensor4D<T>::putTensor3D(CTensor<T>& aTensor, int aIndex, int aDim) {
569 |   int aSize;
570 |   int aOffset;
571 |   switch (aDim) {
572 |   case 3:
573 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mZSize)
574 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mZSize);
575 |     aSize = mXSize*mYSize*mZSize;
576 |     aOffset = aIndex*aSize;
577 |     for (int i = 0; i < aSize; i++)
578 |       mData[i+aOffset] = aTensor.data()[i];
579 |     break;
580 |   case 2:
581 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mYSize || aTensor.zSize() != mASize)
582 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mYSize,mASize);
583 |     aSize = mXSize*mYSize;
584 |     aOffset = aIndex*aSize;
585 |     for (int a = 0; a < mASize; a++)
586 |       for (int i = 0; i < aSize; i++)
587 |         mData[i+aOffset+a*aSize*mZSize] = aTensor.data()[i+a*aSize];
588 |     break;
589 |   case 1:
590 |     if (aTensor.xSize() != mXSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize)
591 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mXSize,mZSize,mASize);
592 |     for (int a = 0; a < mASize; a++)
593 |       for (int z = 0; z < mZSize; z++)
594 |         for (int x = 0; x < mXSize; x++)
595 |           operator()(x,aIndex,z,a) = aTensor(x,z,a);
596 |     break;
597 |   case 0:
598 |     if (aTensor.xSize() != mYSize || aTensor.ySize() != mZSize || aTensor.zSize() != mASize)
599 |       throw ETensor4DIncompatibleSize(aTensor.xSize(),aTensor.ySize(),aTensor.zSize(),mYSize,mZSize,mASize);
600 |     for (int a = 0; a < mASize; a++)
601 |       for (int z = 0; z < mZSize; z++)
602 |         for (int y = 0; y < mYSize; y++)
603 |           operator()(aIndex,y,z,a) = aTensor(y,z,a);
604 |     break;
605 |   default: putTensor3D(aTensor,aIndex);
606 |   }
607 | }
608 | 
609 | // getMatrix
610 | template <class T>
611 | void CTensor4D<T>::getMatrix(CMatrix<T>& aMatrix, int aZIndex, int aAIndex) const {
612 |   if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize)
613 |     throw ETensor4DIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),1,mXSize,mYSize,1);
614 |   int aSize = mXSize*mYSize;
615 |   int aOffset = aSize*(aAIndex*mZSize+aZIndex);
616 |   for (int i = 0; i < aSize; i++)
617 |     aMatrix.data()[i] = mData[i+aOffset];
618 | }
619 | 
620 | // putMatrix
621 | template <class T>
622 | void CTensor4D<T>::putMatrix(CMatrix<T>& aMatrix, int aZIndex, int aAIndex) {
623 |   if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize)
624 |     throw ETensor4DIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),1,mXSize,mYSize,1);
625 |   int aSize = mXSize*mYSize;
626 |   int aOffset = aSize*(aAIndex*mZSize+aZIndex);
627 |   for (int i = 0; i < aSize; i++)
628 |     mData[i+aOffset] = aMatrix.data()[i];
629 | }
630 | 
631 | // data()
632 | template <class T>
633 | inline T* CTensor4D<T>::data() const {
634 |   return mData;
635 | }
636 | 
637 | // N O N - M E M B E R  F U N C T I O N S --------------------------------------
638 | 
639 | // operator <<
640 | template <class T>
641 | std::ostream& operator<<(std::ostream& aStream, const CTensor4D<T>& aTensor) {
642 |   for (int a = 0; a < aTensor.aSize(); a++) {
643 |     for (int z = 0; z < aTensor.zSize(); z++) {
644 |       for (int y = 0; y < aTensor.ySize(); y++) {
645 |         for (int x = 0; x < aTensor.xSize(); x++)
646 |           aStream << aTensor(x,y,z) << ' ';
647 |         aStream << std::endl;
648 |       }
649 |       aStream << std::endl;
650 |     }
651 |     aStream << std::endl;
652 |   }
653 |   return aStream;
654 | }
655 | 
656 | #endif
657 | 


--------------------------------------------------------------------------------
/consistencyChecker/CTensor.h:
--------------------------------------------------------------------------------
   1 | // CTensor
   2 | // A three-dimensional array
   3 | //
   4 | // Author: Thomas Brox
   5 | 
   6 | #ifndef CTENSOR_H
   7 | #define CTENSOR_H
   8 | 
   9 | #include <iostream>
  10 | #include <fstream>
  11 | #include <string>
  12 | #include <sstream>
  13 | #include <CMatrix.h>
  14 | #include <NMath.h>
  15 | 
  16 | inline int int_min(int x, int& y) { return (x<y)?x:y; }
  17 | inline int int_max(int x, int& y) { return (x<y)?y:x; }
  18 | 
  19 | template <class T>
  20 | class CTensor {
  21 | public:
  22 |   // standard constructor
  23 |   inline CTensor();
  24 |   // constructor
  25 |   inline CTensor(const int aXSize, const int aYSize, const int aZSize);
  26 |   // copy constructor
  27 |   CTensor(const CTensor<T>& aCopyFrom);
  28 |   // constructor with implicit filling
  29 |   CTensor(const int aXSize, const int aYSize, const int aZSize, const T aFillValue);
  30 |   // destructor
  31 |   virtual ~CTensor();
  32 | 
  33 |   // Changes the size of the tensor, data will be lost
  34 |   void setSize(int aXSize, int aYSize, int aZSize);
  35 |   // Downsamples the tensor
  36 |   void downsample(int aNewXSize, int aNewYSize);
  37 |   void downsample(int aNewXSize, int aNewYSize, CMatrix<float>& aConfidence);
  38 |   void downsample(int aNewXSize, int aNewYSize, CTensor<float>& aConfidence);
  39 |   // Upsamples the tensor
  40 |   void upsample(int aNewXSize, int aNewYSize);
  41 |   void upsampleBilinear(int aNewXSize, int aNewYSize);
  42 |   // Fills the tensor with the value aValue (see also operator =)
  43 |   void fill(const T aValue);
  44 |   // Fills a rectangular area with the value aValue
  45 |   void fillRect(const CVector<T>& aValue, int ax1, int ay1, int ax2, int ay2);
  46 |   // Copies a box from the tensor into aResult, the size of aResult will be adjusted
  47 |   void cut(CTensor<T>& aResult, int x1, int y1, int z1, int x2, int y2, int z2);
  48 |   // Copies aCopyFrom at a certain position of the tensor
  49 |   void paste(CTensor<T>& aCopyFrom, int ax, int ay, int az);
  50 |   // Mirrors the boundaries, aFrom is the distance from the boundaries where the pixels are copied from,
  51 |   // aTo is the distance from the boundaries they are copied to
  52 |   void mirrorLayers(int aFrom, int aTo);
  53 |   // Transforms the values so that they are all between aMin and aMax
  54 |   // aInitialMin/Max are initializations for seeking the minimum and maximum, change if your
  55 |   // data is not in this range or the data type T cannot hold these values
  56 |   void normalizeEach(T aMin, T aMax, T aInitialMin = -30000, T aInitialMax = 30000);
  57 |   void normalize(T aMin, T aMax, int aChannel, T aInitialMin = -30000, T aInitialMax = 30000);
  58 |   void normalize(T aMin, T aMax, T aInitialMin = -30000, T aInitialMax = 30000);
  59 |   // Converts from RGB to CIELab color space and vice-versa
  60 |   void rgbToCielab();
  61 |   void cielabToRGB();
  62 |   // Draws a line into the image (only for mZSize = 3)
  63 |   void drawLine(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1 = 255, T aValue2 = 255, T aValue3 = 255);
  64 |   void drawRect(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1 = 255, T aValue2 = 255, T aValue3 = 255);
  65 | 
  66 |   // Applies a similarity transform (translation, rotation, scaling) to the image
  67 |   void applySimilarityTransform(CTensor<T>& aWarped, CMatrix<bool>& aOutside, float tx, float ty, float cx, float cy, float phi, float scale);
  68 |   // Applies a homography (linear projective transformation) to the image
  69 |   void applyHomography(CTensor<T>& aWarped, CMatrix<bool>& aOutside, const CMatrix<float>& H);
  70 | 
  71 |   // Reads the tensor from a file in Mathematica format
  72 |   void readFromMathematicaFile(const char* aFilename);
  73 |   // Writes the tensor to a file in Mathematica format
  74 |   void writeToMathematicaFile(const char* aFilename);
  75 |   // Reads the tensor from a movie file in IM format
  76 |   void readFromIMFile(const char* aFilename);
  77 |   // Writes the tensor to a movie file in IM format
  78 |   void writeToIMFile(const char* aFilename);
  79 |   // Reads an image from a PGM file
  80 |   void readFromPGM(const char* aFilename);
  81 |   // Writes the tensor in PGM-Format
  82 |   void writeToPGM(const char* aFilename);
  83 |   // Extends a XxYx1 tensor to a XxYx3 tensor with three identical layers
  84 |   void makeColorTensor();
  85 |   // Reads a color image from a PPM file
  86 |   void readFromPPM(const char* aFilename);
  87 |   // Writes the tensor in PPM-Format
  88 |   void writeToPPM(const char* aFilename);
  89 |   // Reads the tensor from a PDM file
  90 |   void readFromPDM(const char* aFilename);
  91 |   // Writes the tensor in PDM-Format
  92 |   void writeToPDM(const char* aFilename, char aFeatureType);
  93 | 
  94 |   // Gives full access to tensor's values
  95 |   inline T& operator()(const int ax, const int ay, const int az) const;
  96 |   // Read access with bilinear interpolation
  97 |   CVector<T> operator()(const float ax, const float ay) const;
  98 |   // Fills the tensor with the value aValue (equivalent to fill())
  99 |   inline CTensor<T>& operator=(const T aValue);
 100 |   // Copies the tensor aCopyFrom to this tensor (size of tensor might change)
 101 |   CTensor<T>& operator=(const CTensor<T>& aCopyFrom);
 102 |   // Adds a tensor of same size
 103 |   CTensor<T>& operator+=(const CTensor<T>& aMatrix);
 104 |   // Adds a constant to the tensor
 105 |   CTensor<T>& operator+=(const T aValue);
 106 |   // Multiplication with a scalar
 107 |   CTensor<T>& operator*=(const T aValue);
 108 | 
 109 |   // Returns the minimum value
 110 |   T min() const;
 111 |   // Returns the maximum value
 112 |   T max() const;
 113 |   // Returns the average value
 114 |   T avg() const;
 115 |   // Returns the average value of a specific layer
 116 |   T avg(int az) const;
 117 |   // Gives access to the tensor's size
 118 |   inline int xSize() const;
 119 |   inline int ySize() const;
 120 |   inline int zSize() const;
 121 |   inline int size() const;
 122 |   // Returns the az layer of the tensor as matrix (slow and fast version)
 123 |   CMatrix<T> getMatrix(const int az) const;
 124 |   void getMatrix(CMatrix<T>& aMatrix, const int az) const;
 125 |   // Copies the matrix components of aMatrix into the az layer of the tensor
 126 |   void putMatrix(CMatrix<T>& aMatrix, const int az);
 127 |   // Gives access to the internal data representation (use sparingly)
 128 |   inline T* data() const;
 129 | 
 130 |   // Possible interpretations of the third tensor dimension for PDM format
 131 |   static const char cSpacial = 'S';
 132 |   static const char cVector = 'V';
 133 |   static const char cColor = 'C';
 134 |   static const char cSymmetricMatrix = 'Y';
 135 | protected:
 136 |   int mXSize,mYSize,mZSize;
 137 |   T *mData;
 138 | };
 139 | 
 140 | // Provides basic output functionality (only appropriate for very small tensors)
 141 | template <class T> std::ostream& operator<<(std::ostream& aStream, const CTensor<T>& aTensor);
 142 | 
 143 | // Exceptions thrown by CTensor-------------------------------------------------
 144 | 
 145 | // Thrown when one tries to access an element of a tensor which is out of
 146 | // the tensor's bounds
 147 | struct ETensorRangeOverflow {
 148 |   ETensorRangeOverflow(const int ax, const int ay, const int az) {
 149 |     using namespace std;
 150 |     cerr << "Exception ETensorRangeOverflow: x = " << ax << ", y = " << ay << ", z = " << az << endl;
 151 |   }
 152 | };
 153 | 
 154 | // Thrown when the size of a tensor does not match the needed size for a certain operation
 155 | struct ETensorIncompatibleSize {
 156 |   ETensorIncompatibleSize(int ax, int ay, int ax2, int ay2) {
 157 |     using namespace std;
 158 |     cerr << "Exception ETensorIncompatibleSize: x = " << ax << ":" << ax2;
 159 |     cerr << ", y = " << ay << ":" << ay2 << endl;
 160 |   }
 161 |   ETensorIncompatibleSize(int ax, int ay, int az) {
 162 |     std::cerr << "Exception ETensorIncompatibleTensorSize: x = " << ax << ", y = " << ay << ", z= " << az << std::endl;
 163 |   }
 164 | };
 165 | 
 166 | // I M P L E M E N T A T I O N --------------------------------------------
 167 | //
 168 | // You might wonder why there is implementation code in a header file.
 169 | // The reason is that not all C++ compilers yet manage separate compilation
 170 | // of templates. Inline functions cannot be compiled separately anyway.
 171 | // So in this case the whole implementation code is added to the header
 172 | // file.
 173 | // Users of CTensor should ignore everything that's beyond this line :)
 174 | // ------------------------------------------------------------------------
 175 | 
 176 | // P U B L I C ------------------------------------------------------------
 177 | 
 178 | // standard constructor
 179 | template <class T>
 180 | inline CTensor<T>::CTensor() {
 181 |   mData = 0;
 182 |   mXSize = mYSize = mZSize = 0;
 183 | }
 184 | 
 185 | // constructor
 186 | template <class T>
 187 | inline CTensor<T>::CTensor(const int aXSize, const int aYSize, const int aZSize)
 188 |   : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize) {
 189 |   mData = new T[aXSize*aYSize*aZSize];
 190 | }
 191 | 
 192 | // copy constructor
 193 | template <class T>
 194 | CTensor<T>::CTensor(const CTensor<T>& aCopyFrom)
 195 |   : mXSize(aCopyFrom.mXSize), mYSize(aCopyFrom.mYSize), mZSize(aCopyFrom.mZSize) {
 196 |   int wholeSize = mXSize*mYSize*mZSize;
 197 |   mData = new T[wholeSize];
 198 |   for (register int i = 0; i < wholeSize; i++)
 199 |     mData[i] = aCopyFrom.mData[i];
 200 | }
 201 | 
 202 | // constructor with implicit filling
 203 | template <class T>
 204 | CTensor<T>::CTensor(const int aXSize, const int aYSize, const int aZSize, const T aFillValue)
 205 |   : mXSize(aXSize), mYSize(aYSize), mZSize(aZSize) {
 206 |   mData = new T[aXSize*aYSize*aZSize];
 207 |   fill(aFillValue);
 208 | }
 209 | 
 210 | // destructor
 211 | template <class T>
 212 | CTensor<T>::~CTensor() {
 213 |   delete[] mData;
 214 | }
 215 | 
 216 | // setSize
 217 | template <class T>
 218 | void CTensor<T>::setSize(int aXSize, int aYSize, int aZSize) {
 219 |   if (mData != 0) delete[] mData;
 220 |   mData = new T[aXSize*aYSize*aZSize];
 221 |   mXSize = aXSize;
 222 |   mYSize = aYSize;
 223 |   mZSize = aZSize;
 224 | }
 225 | 
 226 | //downsample
 227 | template <class T>
 228 | void CTensor<T>::downsample(int aNewXSize, int aNewYSize) {
 229 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize];
 230 |   int aSize = aNewXSize*aNewYSize;
 231 |   for (int z = 0; z < mZSize; z++) {
 232 |     CMatrix<T> aTemp(mXSize,mYSize);
 233 |     getMatrix(aTemp,z);
 234 |     aTemp.downsample(aNewXSize,aNewYSize);
 235 |     for (int i = 0; i < aSize; i++)
 236 |       mData2[i+z*aSize] = aTemp.data()[i];
 237 |   }
 238 |   delete[] mData;
 239 |   mData = mData2;
 240 |   mXSize = aNewXSize;
 241 |   mYSize = aNewYSize;
 242 | }
 243 | 
 244 | template <class T>
 245 | void CTensor<T>::downsample(int aNewXSize, int aNewYSize, CMatrix<float>& aConfidence) {
 246 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize];
 247 |   int aSize = aNewXSize*aNewYSize;
 248 |   for (int z = 0; z < mZSize; z++) {
 249 |     CMatrix<T> aTemp(mXSize,mYSize);
 250 |     getMatrix(aTemp,z);
 251 |     aTemp.downsample(aNewXSize,aNewYSize,aConfidence);
 252 |     for (int i = 0; i < aSize; i++)
 253 |       mData2[i+z*aSize] = aTemp.data()[i];
 254 |   }
 255 |   delete[] mData;
 256 |   mData = mData2;
 257 |   mXSize = aNewXSize;
 258 |   mYSize = aNewYSize;
 259 | }
 260 | 
 261 | template <class T>
 262 | void CTensor<T>::downsample(int aNewXSize, int aNewYSize, CTensor<float>& aConfidence) {
 263 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize];
 264 |   int aSize = aNewXSize*aNewYSize;
 265 |   CMatrix<float> aConf(mXSize,mYSize);
 266 |   for (int z = 0; z < mZSize; z++) {
 267 |     CMatrix<T> aTemp(mXSize,mYSize);
 268 |     getMatrix(aTemp,z);
 269 |     aConfidence.getMatrix(aConf,z);
 270 |     aTemp.downsample(aNewXSize,aNewYSize,aConf);
 271 |     for (int i = 0; i < aSize; i++)
 272 |       mData2[i+z*aSize] = aTemp.data()[i];
 273 |   }
 274 |   delete[] mData;
 275 |   mData = mData2;
 276 |   mXSize = aNewXSize;
 277 |   mYSize = aNewYSize;
 278 | }
 279 | 
 280 | // upsample
 281 | template <class T>
 282 | void CTensor<T>::upsample(int aNewXSize, int aNewYSize) {
 283 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize];
 284 |   int aSize = aNewXSize*aNewYSize;
 285 |   for (int z = 0; z < mZSize; z++) {
 286 |     CMatrix<T> aTemp(mXSize,mYSize);
 287 |     getMatrix(aTemp,z);
 288 |     aTemp.upsample(aNewXSize,aNewYSize);
 289 |     for (int i = 0; i < aSize; i++)
 290 |       mData2[i+z*aSize] = aTemp.data()[i];
 291 |   }
 292 |   delete[] mData;
 293 |   mData = mData2;
 294 |   mXSize = aNewXSize;
 295 |   mYSize = aNewYSize;
 296 | }
 297 | 
 298 | // upsampleBilinear
 299 | template <class T>
 300 | void CTensor<T>::upsampleBilinear(int aNewXSize, int aNewYSize) {
 301 |   T* mData2 = new T[aNewXSize*aNewYSize*mZSize];
 302 |   int aSize = aNewXSize*aNewYSize;
 303 |   for (int z = 0; z < mZSize; z++) {
 304 |     CMatrix<T> aTemp(mXSize,mYSize);
 305 |     getMatrix(aTemp,z);
 306 |     aTemp.upsampleBilinear(aNewXSize,aNewYSize);
 307 |     for (int i = 0; i < aSize; i++)
 308 |       mData2[i+z*aSize] = aTemp.data()[i];
 309 |   }
 310 |   delete[] mData;
 311 |   mData = mData2;
 312 |   mXSize = aNewXSize;
 313 |   mYSize = aNewYSize;
 314 | }
 315 | 
 316 | // fill
 317 | template <class T>
 318 | void CTensor<T>::fill(const T aValue) {
 319 |   int wholeSize = mXSize*mYSize*mZSize;
 320 |   for (register int i = 0; i < wholeSize; i++)
 321 |     mData[i] = aValue;
 322 | }
 323 | 
 324 | // fillRect
 325 | template <class T>
 326 | void CTensor<T>::fillRect(const CVector<T>& aValue, int ax1, int ay1, int ax2, int ay2) {
 327 |   for (int z = 0; z < mZSize; z++) {
 328 |     T val = aValue(z);
 329 |     for (int y = int_max(0,ay1); y <= int_min(ySize()-1,ay2); y++)
 330 |       for (register int x = int_max(0,ax1); x <= int_min(xSize()-1,ax2); x++)
 331 |         operator()(x,y,z) = val;
 332 |   }
 333 | }
 334 | 
 335 | // cut
 336 | template <class T>
 337 | void CTensor<T>::cut(CTensor<T>& aResult, int x1, int y1, int z1, int x2, int y2, int z2) {
 338 |   aResult.mXSize = x2-x1+1;
 339 |   aResult.mYSize = y2-y1+1;
 340 |   aResult.mZSize = z2-z1+1;
 341 |   delete[] aResult.mData;
 342 |   aResult.mData = new T[aResult.mXSize*aResult.mYSize*aResult.mZSize];
 343 |   for (int z = z1; z <= z2; z++)
 344 |     for (int y = y1; y <= y2; y++)
 345 |       for (int x = x1; x <= x2; x++)
 346 |         aResult(x-x1,y-y1,z-z1) = operator()(x,y,z);
 347 | }
 348 | 
 349 | // paste
 350 | template <class T>
 351 | void CTensor<T>::paste(CTensor<T>& aCopyFrom, int ax, int ay, int az) {
 352 |   for (int z = 0; z < aCopyFrom.zSize(); z++)
 353 |     for (int y = 0; y < aCopyFrom.ySize(); y++)
 354 |       for (int x = 0; x < aCopyFrom.xSize(); x++)
 355 |         operator()(ax+x,ay+y,az+z) = aCopyFrom(x,y,z);
 356 | }
 357 | 
 358 | // mirrorLayers
 359 | template <class T>
 360 | void CTensor<T>::mirrorLayers(int aFrom, int aTo) {
 361 |   for (int z = 0; z < mZSize; z++) {
 362 |     int aToXIndex = mXSize-aTo-1;
 363 |     int aToYIndex = mYSize-aTo-1;
 364 |     int aFromXIndex = mXSize-aFrom-1;
 365 |     int aFromYIndex = mYSize-aFrom-1;
 366 |     for (int y = aFrom; y <= aFromYIndex; y++) {
 367 |       operator()(aTo,y,z) = operator()(aFrom,y,z);
 368 |       operator()(aToXIndex,y,z) = operator()(aFromXIndex,y,z);
 369 |     }
 370 |     for (int x = aTo; x <= aToXIndex; x++) {
 371 |       operator()(x,aTo,z) = operator()(x,aFrom,z);
 372 |       operator()(x,aToYIndex,z) = operator()(x,aFromYIndex,z);
 373 |     }
 374 |   }
 375 | }
 376 | 
 377 | // normalize
 378 | template <class T>
 379 | void CTensor<T>::normalizeEach(T aMin, T aMax, T aInitialMin, T aInitialMax) {
 380 |   for (int k = 0; k < mZSize; k++)
 381 |     normalize(aMin,aMax,k,aInitialMin,aInitialMax);
 382 | }
 383 | 
 384 | template <class T>
 385 | void CTensor<T>::normalize(T aMin, T aMax, int aChannel, T aInitialMin, T aInitialMax) {
 386 |   int aChannelSize = mXSize*mYSize;
 387 |   T aCurrentMin = aInitialMax;
 388 |   T aCurrentMax = aInitialMin;
 389 |   int aIndex = aChannelSize*aChannel;
 390 |   for (int i = 0; i < aChannelSize; i++) {
 391 |     if (mData[aIndex] > aCurrentMax) aCurrentMax = mData[aIndex];
 392 |     else if (mData[aIndex] < aCurrentMin) aCurrentMin = mData[aIndex];
 393 |     aIndex++;
 394 |   }
 395 |   T aTemp1 = aCurrentMin - aMin;
 396 |   T aTemp2 = (aCurrentMax-aCurrentMin);
 397 |   if (aTemp2 == 0) aTemp2 = 1;
 398 |   else aTemp2 = (aMax-aMin)/aTemp2;
 399 |   aIndex = aChannelSize*aChannel;
 400 |   for (int i = 0; i < aChannelSize; i++) {
 401 |     mData[aIndex] -= aTemp1;
 402 |     mData[aIndex] *= aTemp2;
 403 |     aIndex++;
 404 |   }
 405 | }
 406 | 
 407 | // drawLine
 408 | template <class T>
 409 | void CTensor<T>::drawLine(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1, T aValue2, T aValue3) {
 410 |   int aOffset1 = mXSize*mYSize;
 411 |   int aOffset2 = 2*aOffset1;
 412 | 	// vertical line
 413 | 	if (dStartX == dEndX) {
 414 |     if (dStartX < 0 || dStartX >= mXSize)	return;
 415 | 		int x = dStartX;
 416 | 		if (dStartY < dEndY) {
 417 | 			for (int y = dStartY; y <= dEndY; y++)
 418 | 				if (y >= 0 && y < mYSize) {
 419 |           mData[x+y*mXSize] = aValue1;
 420 |           mData[x+y*mXSize+aOffset1] = aValue2;
 421 |           mData[x+y*mXSize+aOffset2] = aValue3;
 422 |         }
 423 |   	}
 424 | 		else {
 425 | 			for (int y = dStartY; y >= dEndY; y--)
 426 | 				if (y >= 0 && y < mYSize) {
 427 |           mData[x+y*mXSize] = aValue1;
 428 |           mData[x+y*mXSize+aOffset1] = aValue2;
 429 |           mData[x+y*mXSize+aOffset2] = aValue3;
 430 |         }
 431 |     }
 432 |     return;
 433 |   }
 434 | 	// horizontal line
 435 | 	if (dStartY == dEndY) {
 436 |     if (dStartY < 0 || dStartY >= mYSize) return;
 437 |  		int y = dStartY;
 438 | 		if (dStartX < dEndX) {
 439 | 			for (int x = dStartX; x <= dEndX; x++)
 440 | 				if (x >= 0 && x < mXSize) {
 441 |           mData[x+y*mXSize] = aValue1;
 442 |           mData[x+y*mXSize+aOffset1] = aValue2;
 443 |           mData[x+y*mXSize+aOffset2] = aValue3;
 444 |         }
 445 |   	}
 446 | 		else {
 447 | 			for (int x = dStartX; x >= dEndX; x--)
 448 | 				if (x >= 0 && x < mXSize) {
 449 |           mData[x+y*mXSize] = aValue1;
 450 |           mData[x+y*mXSize+aOffset1] = aValue2;
 451 |           mData[x+y*mXSize+aOffset2] = aValue3;
 452 |         }
 453 |     }
 454 |     return;
 455 |   }
 456 |   float m = float(dStartY - dEndY) / float(dStartX - dEndX);
 457 |   float invm = 1.0/m;
 458 |   if (fabs(m) > 1.0) {
 459 |     if (dEndY > dStartY) {
 460 |       for (int y = dStartY; y <= dEndY; y++) {
 461 |         int x = (int)(0.5+dStartX+(y-dStartY)*invm);
 462 |         if (x >= 0 && x < mXSize &&	y >= 0 && y < mYSize) {
 463 |           mData[x+y*mXSize] = aValue1;
 464 |           mData[x+y*mXSize+aOffset1] = aValue2;
 465 |           mData[x+y*mXSize+aOffset2] = aValue3;
 466 |         }
 467 |       }
 468 |     }
 469 |     else {
 470 |       for (int y = dStartY; y >= dEndY; y--) {
 471 |         int x = (int)(0.5+dStartX+(y-dStartY)*invm);
 472 |         if (x >= 0 && x < mXSize &&	y >= 0 && y < mYSize) {
 473 |           mData[x+y*mXSize] = aValue1;
 474 |           mData[x+y*mXSize+aOffset1] = aValue2;
 475 |           mData[x+y*mXSize+aOffset2] = aValue3;
 476 |         }
 477 |       }
 478 |     }
 479 |   }
 480 |   else {
 481 |     if (dEndX > dStartX) {
 482 |       for (int x = dStartX; x <= dEndX; x++) {
 483 |         int y = (int)(0.5+dStartY+(x-dStartX)*m);
 484 |         if (x >= 0 && x < mXSize &&	y >= 0 && y < mYSize) {
 485 |           mData[x+y*mXSize] = aValue1;
 486 |           mData[x+y*mXSize+aOffset1] = aValue2;
 487 |           mData[x+y*mXSize+aOffset2] = aValue3;
 488 |         }
 489 |       }
 490 |     }
 491 |     else {
 492 |       for (int x = dStartX; x >= dEndX; x--) {
 493 |         int y = (int)(0.5+dStartY+(x-dStartX)*m);
 494 |         if (x >= 0 && x < mXSize &&	y >= 0 && y < mYSize) {
 495 |           mData[x+y*mXSize] = aValue1;
 496 |           mData[x+y*mXSize+aOffset1] = aValue2;
 497 |           mData[x+y*mXSize+aOffset2] = aValue3;
 498 |         }
 499 |       }
 500 |     }
 501 |   }
 502 | }
 503 | 
 504 | // drawRect
 505 | template <class T>
 506 | void CTensor<T>::drawRect(int dStartX, int dStartY, int dEndX, int dEndY, T aValue1, T aValue2, T aValue3) {
 507 |   drawLine(dStartX,dStartY,dEndX,dStartY,aValue1,aValue2,aValue3);
 508 |   drawLine(dStartX,dEndY,dEndX,dEndY,aValue1,aValue2,aValue3);
 509 |   drawLine(dStartX,dStartY,dStartX,dEndY,aValue1,aValue2,aValue3);
 510 |   drawLine(dEndX,dStartY,dEndX,dEndY,aValue1,aValue2,aValue3);
 511 | }
 512 | 
 513 | template <class T>
 514 | void CTensor<T>::normalize(T aMin, T aMax, T aInitialMin, T aInitialMax) {
 515 |   int aSize = mXSize*mYSize*mZSize;
 516 |   T aCurrentMin = aInitialMax;
 517 |   T aCurrentMax = aInitialMin;
 518 |   for (int i = 0; i < aSize; i++) {
 519 |     if (mData[i] > aCurrentMax) aCurrentMax = mData[i];
 520 |     else if (mData[i] < aCurrentMin) aCurrentMin = mData[i];
 521 |   }
 522 |   T aTemp1 = aCurrentMin - aMin;
 523 |   T aTemp2 = (aCurrentMax-aCurrentMin);
 524 |   if (aTemp2 == 0) aTemp2 = 1;
 525 |   else aTemp2 = (aMax-aMin)/aTemp2;
 526 |   for (int i = 0; i < aSize; i++) {
 527 |     mData[i] -= aTemp1;
 528 |     mData[i] *= aTemp2;
 529 |   }
 530 | }
 531 | 
 532 | template <class T>
 533 | void CTensor<T>::rgbToCielab() {
 534 |   for (int y = 0; y < mYSize; y++)
 535 |     for (int x = 0; x < mXSize; x++) {
 536 |       float R = operator()(x,y,0)*0.003921569;
 537 |       float G = operator()(x,y,1)*0.003921569;
 538 |       float B = operator()(x,y,2)*0.003921569;
 539 |       if (R>0.0031308) R = pow((R + 0.055)*0.9478673, 2.4); else R *= 0.077399381;
 540 |       if (G>0.0031308) G = pow((G + 0.055)*0.9478673, 2.4); else G *= 0.077399381;
 541 |       if (B>0.0031308) B = pow((B + 0.055)*0.9478673, 2.4); else B *= 0.077399381;
 542 |       //Observer. = 2?, Illuminant = D65
 543 |       float X = R * 0.4124 + G * 0.3576 + B * 0.1805;
 544 |       float Y = R * 0.2126 + G * 0.7152 + B * 0.0722;
 545 |       float Z = R * 0.0193 + G * 0.1192 + B * 0.9505;
 546 |       X *= 1.052111;
 547 |       Z *= 0.918417;
 548 |       if (X > 0.008856) X = pow(X,0.33333333333); else X = 7.787*X + 0.137931034;
 549 |       if (Y > 0.008856) Y = pow(Y,0.33333333333); else Y = 7.787*Y + 0.137931034;
 550 |       if (Z > 0.008856) Z = pow(Z,0.33333333333); else Z = 7.787*Z + 0.137931034;
 551 |       operator()(x,y,0) = 1000.0*((295.8*Y) - 40.8)/255.0;
 552 |       operator()(x,y,1) = 128.0+637.5*(X-Y);
 553 |       operator()(x,y,2) = 128.0+255.0*(Y-Z);
 554 |     }
 555 | }
 556 | 
 557 | template <class T>
 558 | void CTensor<T>::cielabToRGB() {
 559 |   for (int y = 0; y < mYSize; y++)
 560 |     for (int x = 0; x < mXSize; x++) {
 561 |       float L = operator()(x,y,0)*0.255;
 562 |       float A = operator()(x,y,1);
 563 |       float B = operator()(x,y,2);
 564 |       float Y = (L+40.8)*0.00338066;
 565 |       float X = (A-128.0+637.5*Y)*0.0015686;
 566 |       float Z = (128.0+255.0*Y-B)*0.00392157;
 567 |       float temp = Y*Y*Y;
 568 |       if (temp > 0.008856) Y = temp;
 569 |       else Y = (Y-0.137931034)*0.12842;
 570 |       temp = X*X*X;
 571 |       if (temp > 0.008856) X = temp;
 572 |       else X = (X-0.137931034)*0.12842;
 573 |       temp = Z*Z*Z;
 574 |       if (temp > 0.008856) Z = temp;
 575 |       else Z = (Z-0.137931034)*0.12842;
 576 |       X *= 0.95047;
 577 |       Y *= 1.0;
 578 |       Z *= 1.08883;
 579 |       float r = 3.2406*X-1.5372*Y-0.4986*Z;
 580 |       float g = -0.9689*X+1.8758*Y+0.0415*Z;
 581 |       float b = 0.0557*X-0.204*Y+1.057*Z;
 582 |       if (r < 0) r = 0;
 583 |       temp = 1.055*pow(r,0.41667)-0.055;
 584 |       if (temp > 0.0031308) r = temp;
 585 |       else r *= 12.92;
 586 |       if (g < 0) g = 0;
 587 |       temp = 1.055*pow(g,0.41667)-0.055;
 588 |       if (temp > 0.0031308) g = temp;
 589 |       else g *= 12.92;
 590 |       if (b < 0) b = 0;
 591 |       temp = 1.055*pow(b,0.41667)-0.055;
 592 |       if (temp > 0.0031308) b = temp;
 593 |       else b *= 12.92;
 594 |       operator()(x,y,0) = 255.0*r;
 595 |       operator()(x,y,1) = 255.0*g;
 596 |       operator()(x,y,2) = 255.0*b;
 597 |     }
 598 | }
 599 | 
 600 | // applySimilarityTransform
 601 | template <class T>
 602 | void CTensor<T>::applySimilarityTransform(CTensor<T>& aWarped, CMatrix<bool>& aOutside, float tx, float ty, float cx, float cy, float phi, float scale) {
 603 |   float cosphi = scale*cos(phi);
 604 |   float sinphi = scale*sin(phi);
 605 |   int aSize = mXSize*mYSize;
 606 |   int aWarpedSize = aWarped.xSize()*aWarped.ySize();
 607 |   float ctx = cx+tx-cx*cosphi+cy*sinphi;
 608 |   float cty = cy+ty-cy*cosphi-cx*sinphi;
 609 |   aOutside = false;
 610 |   int i = 0;
 611 |   for (int y = 0; y < aWarped.ySize(); y++)
 612 |     for (int x = 0; x < aWarped.xSize(); x++,i++) {
 613 |       float xf = x; float yf = y;
 614 |       float ax = xf*cosphi-yf*sinphi+ctx;
 615 |       float ay = yf*cosphi+xf*sinphi+cty;
 616 |       int x1 = (int)ax; int y1 = (int)ay;
 617 |       float alphaX = ax-x1; float alphaY = ay-y1;
 618 |       float betaX = 1.0-alphaX; float betaY = 1.0-alphaY;
 619 |       if (x1 < 0 || y1 < 0 || x1+1 >= mXSize || y1+1 >= mYSize) aOutside.data()[i] = true;
 620 |       else {
 621 |         int j = y1*mXSize+x1;
 622 |         for (int k = 0; k < mZSize; k++) {
 623 |           float a = betaX*mData[j]       +alphaX*mData[j+1];
 624 |           float b = betaX*mData[j+mXSize]+alphaX*mData[j+1+mXSize];
 625 |           aWarped.data()[i+k*aWarpedSize] = betaY*a+alphaY*b;
 626 |           j += aSize;
 627 |         }
 628 |       }
 629 |     }
 630 | }
 631 | 
 632 | // applyHomography
 633 | template <class T>
 634 | void CTensor<T>::applyHomography(CTensor<T>& aWarped, CMatrix<bool>& aOutside, const CMatrix<float>& H) {
 635 |   int aSize = mXSize*mYSize;
 636 |   int aWarpedSize = aWarped.xSize()*aWarped.ySize();
 637 |   aOutside = false;
 638 |   int i = 0;
 639 |   for (int y = 0; y < aWarped.ySize(); y++)
 640 |     for (int x = 0; x < aWarped.xSize(); x++,i++) {
 641 |       float xf = x; float yf = y;
 642 |       float ax = H.data()[0]*xf+H.data()[1]*yf+H.data()[2];
 643 |       float ay = H.data()[3]*xf+H.data()[4]*yf+H.data()[5];
 644 |       float az = H.data()[6]*xf+H.data()[7]*yf+H.data()[8];
 645 |       float invaz = 1.0/az;
 646 |       ax *= invaz; ay *= invaz;
 647 |       int x1 = (int)ax; int y1 = (int)ay;
 648 |       float alphaX = ax-x1; float alphaY = ay-y1;
 649 |       float betaX = 1.0-alphaX; float betaY = 1.0-alphaY;
 650 |       if (x1 < 0 || y1 < 0 || x1+1 >= mXSize || y1+1 >= mYSize) aOutside.data()[i] = true;
 651 |       else {
 652 |         int j = y1*mXSize+x1;
 653 |         for (int k = 0; k < mZSize; k++) {
 654 |           float a = betaX*mData[j]       +alphaX*mData[j+1];
 655 |           float b = betaX*mData[j+mXSize]+alphaX*mData[j+1+mXSize];
 656 |           aWarped.data()[i+k*aWarpedSize] = betaY*a+alphaY*b;
 657 |           j += aSize;
 658 |         }
 659 |       }
 660 |     }
 661 | }
 662 | 
 663 | // -----------------------------------------------------------------------------
 664 | // File I/O
 665 | // -----------------------------------------------------------------------------
 666 | 
 667 | // readFromMathematicaFile
 668 | template <class T>
 669 | void CTensor<T>::readFromMathematicaFile(const char* aFilename) {
 670 |   using namespace std;
 671 |   // Read the whole file and store data in aData
 672 |   // Ignore blanks, tabs and lines
 673 |   // Also ignore Mathematica comments (* ... *)
 674 |   ifstream aStream(aFilename);
 675 |   string aData;
 676 |   char aChar;
 677 |   bool aBracketFound = false;
 678 |   bool aStarFound = false;
 679 |   bool aCommentFound = false;
 680 |   while (aStream.get(aChar))
 681 |     if (aChar != ' ' && aChar != '\t' && aChar != '\n') {
 682 |       if (aCommentFound) {
 683 |         if (!aStarFound && aChar == '*') aStarFound = true;
 684 |         else {
 685 |           if (aStarFound && aChar == ')') aCommentFound = false;
 686 |           aStarFound = false;
 687 |         }
 688 |       }
 689 |       else {
 690 |         if (!aBracketFound && aChar == '(') aBracketFound = true;
 691 |         else {
 692 |           if (aBracketFound && aChar == '*') aCommentFound = true;
 693 |           else aData += aChar;
 694 |           aBracketFound = false;
 695 |         }
 696 |       }
 697 |     }
 698 |   // Count the number of braces and double braces to figure out z- and y-Size of tensor
 699 |   int aDoubleBraceCount = 0;
 700 |   int aBraceCount = 0;
 701 |   int aPos = 0;
 702 |   while ((aPos = aData.find_first_of('{',aPos)+1) > 0) {
 703 |     aBraceCount++;
 704 |     if (aData[aPos] == '{' && aData[aPos+1] != '{') aDoubleBraceCount++;
 705 |   }
 706 |   // Count the number of commas in the first section to figure out xSize of tensor
 707 |   int aCommaCount = 0;
 708 |   aPos = 0;
 709 |   while (aData[aPos] != '}') {
 710 |     if (aData[aPos] == ',') aCommaCount++;
 711 |     aPos++;
 712 |   }
 713 |   // Adapt size of tensor
 714 |   if (mData != 0) delete[] mData;
 715 |   mXSize = aCommaCount+1;
 716 |   mYSize = (aBraceCount-1-aDoubleBraceCount) / aDoubleBraceCount;
 717 |   mZSize = aDoubleBraceCount;
 718 |   mData = new T[mXSize*mYSize*mZSize];
 719 |   // Analyse file ---------------
 720 |   aPos = 0;
 721 |   if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica");
 722 |   aPos++;
 723 |   for (int z = 0; z < mZSize; z++) {
 724 |     if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica");
 725 |     aPos++;
 726 |     for (int y = 0; y < mYSize; y++) {
 727 |       if (aData[aPos] != '{') throw EInvalidFileFormat("Mathematica");
 728 |       aPos++;
 729 |       for (int x = 0; x < mXSize; x++) {
 730 |         int oldPos = aPos;
 731 |         if (x+1 < mXSize) aPos = aData.find_first_of(',',aPos);
 732 |         else aPos = aData.find_first_of('}',aPos);
 733 |         #ifdef GNU_COMPILER
 734 |         string s = aData.substr(oldPos,aPos-oldPos);
 735 |         istrstream is(s.c_str());
 736 |         #else
 737 |         string s = aData.substr(oldPos,aPos-oldPos);
 738 |         istringstream is(s);
 739 |         #endif
 740 |         T aItem;
 741 |         is >> aItem;
 742 |         operator()(x,y,z) = aItem;
 743 |         aPos++;
 744 |       }
 745 |       if (y+1 < mYSize) {
 746 |         if (aData[aPos] != ',') throw EInvalidFileFormat("Mathematica");
 747 |         aPos++;
 748 |         while (aData[aPos] != '{')
 749 |           aPos++;
 750 |       }
 751 |     }
 752 |     aPos++;
 753 |     if (z+1 < mZSize) {
 754 |       if (aData[aPos] != ',') throw EInvalidFileFormat("Mathematica");
 755 |       aPos++;
 756 |       while (aData[aPos] != '{')
 757 |         aPos++;
 758 |     }
 759 |   }
 760 | }
 761 | 
 762 | // writeToMathematicaFile
 763 | template <class T>
 764 | void CTensor<T>::writeToMathematicaFile(const char* aFilename) {
 765 |   using namespace std;
 766 |   ofstream aStream(aFilename);
 767 |   aStream << '{';
 768 |   for (int z = 0; z < mZSize; z++) {
 769 |     aStream << '{';
 770 |     for (int y = 0; y < mYSize; y++) {
 771 |       aStream << '{';
 772 |       for (int x = 0; x < mXSize; x++) {
 773 |         aStream << operator()(x,y,z);
 774 |         if (x+1 < mXSize) aStream << ',';
 775 |       }
 776 |       aStream << '}';
 777 |       if (y+1 < mYSize) aStream << ",\n";
 778 |     }
 779 |     aStream << '}';
 780 |     if (z+1 < mZSize) aStream << ",\n";
 781 |   }
 782 |   aStream << '}';
 783 | }
 784 | 
 785 | // readFromIMFile
 786 | template <class T>
 787 | void CTensor<T>::readFromIMFile(const char* aFilename) {
 788 |   FILE *aStream;
 789 |   aStream = fopen(aFilename,"rb");
 790 |   // Read image data
 791 |   for (int i = 0; i < mXSize*mYSize*mZSize; i++)
 792 |     mData[i] = getc(aStream);
 793 |   fclose(aStream);
 794 | }
 795 | 
 796 | // writeToIMFile
 797 | template <class T>
 798 | void CTensor<T>::writeToIMFile(const char *aFilename) {
 799 |   FILE *aStream;
 800 |   aStream = fopen(aFilename,"wb");
 801 |   // write data
 802 |   for (int i = 0; i < mXSize*mYSize*mZSize; i++) {
 803 |     char dummy = (char)mData[i];
 804 |     fwrite(&dummy,1,1,aStream);
 805 |   }
 806 |   fclose(aStream);
 807 | }
 808 | 
 809 | // readFromPGM
 810 | template <class T>
 811 | void CTensor<T>::readFromPGM(const char* aFilename) {
 812 |   FILE *aStream;
 813 |   aStream = fopen(aFilename,"rb");
 814 |   if (aStream == 0) std::cerr << "File not found: " << aFilename << std::endl;
 815 |   int dummy;
 816 |   // Find beginning of file (P5)
 817 |   while (getc(aStream) != 'P');
 818 |   if (getc(aStream) != '5') throw EInvalidFileFormat("PGM");
 819 |   do
 820 |     dummy = getc(aStream);
 821 |   while (dummy != '\n' && dummy != ' ');
 822 |   // Remove comments and empty lines
 823 |   dummy = getc(aStream);
 824 |   while (dummy == '#') {
 825 |     while (getc(aStream) != '\n');
 826 |     dummy = getc(aStream);
 827 |   }
 828 |   while (dummy == '\n')
 829 |     dummy = getc(aStream);
 830 |   // Read image size
 831 |   mXSize = dummy-48;
 832 |   while ((dummy = getc(aStream)) >= 48 && dummy < 58)
 833 |     mXSize = 10*mXSize+dummy-48;
 834 |   while ((dummy = getc(aStream)) < 48 || dummy >= 58);
 835 |   mYSize = dummy-48;
 836 |   while ((dummy = getc(aStream)) >= 48 && dummy < 58)
 837 |     mYSize = 10*mYSize+dummy-48;
 838 |   mZSize = 1;
 839 |   while (dummy != '\n' && dummy != ' ')
 840 |     dummy = getc(aStream);
 841 |   while (dummy != '\n' && dummy != ' ')
 842 |     dummy = getc(aStream);
 843 |   // Adjust size of data structure
 844 |   delete[] mData;
 845 |   mData = new T[mXSize*mYSize];
 846 |   // Read image data
 847 |   for (int i = 0; i < mXSize*mYSize; i++)
 848 |     mData[i] = getc(aStream);
 849 |   fclose(aStream);
 850 | }
 851 | 
 852 | // writeToPGM
 853 | template <class T>
 854 | void CTensor<T>::writeToPGM(const char* aFilename) {
 855 |   int rows = (int)floor(sqrt(mZSize));
 856 |   int cols = (int)ceil(mZSize*1.0/rows);
 857 |   FILE* outimage = fopen(aFilename, "wb");
 858 |   fprintf(outimage, "P5 \n");
 859 |   fprintf(outimage, "%ld %ld \n255\n", cols*mXSize,rows*mYSize);
 860 |   for (int r = 0; r < rows; r++)
 861 |     for (int y = 0; y < mYSize; y++)
 862 |       for (int c = 0; c < cols; c++)
 863 |         for (int x = 0; x < mXSize; x++) {
 864 |           unsigned char aHelp;
 865 |           if (r*cols+c >= mZSize) aHelp = 0;
 866 |           else aHelp = (unsigned char)operator()(x,y,r*cols+c);
 867 |           fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
 868 |         }
 869 |   fclose(outimage);
 870 | }
 871 | 
 872 | // makeColorTensor
 873 | template <class T>
 874 | void CTensor<T>::makeColorTensor() {
 875 |   if (mZSize != 1) return;
 876 |   int aSize = mXSize*mYSize;
 877 |   int a2Size = 2*aSize;
 878 |   T* aNewData = new T[aSize*3];
 879 |   for (int i = 0; i < aSize; i++)
 880 |     aNewData[i] = aNewData[i+aSize] = aNewData[i+a2Size] = mData[i];
 881 |   mZSize = 3;
 882 |   delete[] mData;
 883 |   mData = aNewData;
 884 | }
 885 | 
 886 | // readFromPPM
 887 | template <class T>
 888 | void CTensor<T>::readFromPPM(const char* aFilename) {
 889 |   FILE *aStream;
 890 |   aStream = fopen(aFilename,"rb");
 891 |   if (aStream == 0)
 892 |     std::cerr << "File not found: " << aFilename << std::endl;
 893 |   int dummy;
 894 |   // Find beginning of file (P6)
 895 |   while (getc(aStream) != 'P');
 896 |   dummy = getc(aStream);
 897 |   if (dummy == '5') mZSize = 1;
 898 |   else if (dummy == '6') mZSize = 3;
 899 |   else throw EInvalidFileFormat("PPM");
 900 |   do dummy = getc(aStream); while (dummy != '\n' && dummy != ' ');
 901 |   // Remove comments and empty lines
 902 |   dummy = getc(aStream);
 903 |   while (dummy == '#') {
 904 |     while (getc(aStream) != '\n');
 905 |     dummy = getc(aStream);
 906 |   }
 907 |   while (dummy == '\n')
 908 |     dummy = getc(aStream);
 909 |   // Read image size
 910 |   mXSize = dummy-48;
 911 |   while ((dummy = getc(aStream)) >= 48 && dummy < 58)
 912 |     mXSize = 10*mXSize+dummy-48;
 913 |   while ((dummy = getc(aStream)) < 48 || dummy >= 58);
 914 |   mYSize = dummy-48;
 915 |   while ((dummy = getc(aStream)) >= 48 && dummy < 58)
 916 |     mYSize = 10*mYSize+dummy-48;
 917 |   while (dummy != '\n' && dummy != ' ')
 918 |     dummy = getc(aStream);
 919 |   while (dummy < 48 || dummy >= 58) dummy = getc(aStream);
 920 |   while ((dummy = getc(aStream)) >= 48 && dummy < 58);
 921 |   if (dummy != '\n') while (getc(aStream) != '\n');
 922 |   // Adjust size of data structure
 923 |   delete[] mData;
 924 |   mData = new T[mXSize*mYSize*mZSize];
 925 |   // Read image data
 926 |   int aSize = mXSize*mYSize;
 927 |   if (mZSize == 1)
 928 |     for (int i = 0; i < aSize; i++)
 929 |       mData[i] = getc(aStream);
 930 |   else {
 931 |     int aSizeTwice = aSize+aSize;
 932 |     for (int i = 0; i < aSize; i++) {
 933 |       mData[i] = getc(aStream);
 934 |       mData[i+aSize] = getc(aStream);
 935 |       mData[i+aSizeTwice] = getc(aStream);
 936 |     }
 937 |   }
 938 |   fclose(aStream);
 939 | }
 940 | 
 941 | // writeToPPM
 942 | template <class T>
 943 | void CTensor<T>::writeToPPM(const char* aFilename) {
 944 |   FILE* outimage = fopen(aFilename, "wb");
 945 |   fprintf(outimage, "P6 \n");
 946 |   fprintf(outimage, "%d %d \n255\n", mXSize,mYSize);
 947 |   for (int y = 0; y < mYSize; y++)
 948 |     for (int x = 0; x < mXSize; x++) {
 949 |       unsigned char aHelp = (unsigned char)operator()(x,y,0);
 950 |       fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
 951 |       aHelp = (unsigned char)operator()(x,y,1);
 952 |       fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
 953 |       aHelp = (unsigned char)operator()(x,y,2);
 954 |       fwrite (&aHelp, sizeof(unsigned char), 1, outimage);
 955 |     }
 956 |   fclose(outimage);
 957 | }
 958 | 
 959 | // readFromPDM
 960 | template <class T>
 961 | void CTensor<T>::readFromPDM(const char* aFilename) {
 962 |   std::ifstream aStream(aFilename);
 963 |   std::string s;
 964 |   // Read header
 965 |   aStream >> s;
 966 |   if (s != "P9") throw EInvalidFileFormat("PDM");
 967 |   char aFeatureType;
 968 |   aStream >> aFeatureType;
 969 |   aStream >> s;
 970 |   aStream >> mXSize;
 971 |   aStream >> mYSize;
 972 |   aStream >> mZSize;
 973 |   aStream >> s;
 974 |   // Adjust size of data structure
 975 |   delete[] mData;
 976 |   mData = new T[mXSize*mYSize*mZSize];
 977 |   // Read data
 978 |   for (int i = 0; i < mXSize*mYSize*mZSize; i++)
 979 |     aStream >> mData[i];
 980 | }
 981 | 
 982 | // writeToPDM
 983 | template <class T>
 984 | void CTensor<T>::writeToPDM(const char* aFilename, char aFeatureType) {
 985 |   std::ofstream aStream(aFilename);
 986 |   // write header
 987 |   aStream << "P9" << std::endl;
 988 |   aStream << aFeatureType << "SS" << std::endl;
 989 |   aStream << mZSize << ' ' << mYSize << ' ' << mXSize << std::endl;
 990 |   aStream << "F" << std::endl;
 991 |   // write data
 992 |   for (int i = 0; i < mXSize*mYSize*mZSize; i++) {
 993 |     aStream << mData[i];
 994 |     if (i % 8 == 0) aStream << std::endl;
 995 |     else aStream << ' ';
 996 |   }
 997 | }
 998 | 
 999 | // operator ()
1000 | template <class T>
1001 | inline T& CTensor<T>::operator()(const int ax, const int ay, const int az) const {
1002 |   #ifdef _DEBUG
1003 |     if (ax >= mXSize || ay >= mYSize || az >= mZSize || ax < 0 || ay < 0 || az < 0)
1004 |       throw ETensorRangeOverflow(ax,ay,az);
1005 |   #endif
1006 |   return mData[mXSize*(mYSize*az+ay)+ax];
1007 | }
1008 | 
1009 | template <class T>
1010 | CVector<T> CTensor<T>::operator()(const float ax, const float ay) const {
1011 |   CVector<T> aResult(mZSize);
1012 |   int x1 = (int)ax;
1013 |   int y1 = (int)ay;
1014 |   int x2 = x1+1;
1015 |   int y2 = y1+1;
1016 |   #ifdef _DEBUG
1017 |   if (x2 >= mXSize || y2 >= mYSize || x1 < 0 || y1 < 0) throw ETensorRangeOverflow(ax,ay,0);
1018 |   #endif
1019 |   float alphaX = ax-x1; float alphaXTrans = 1.0-alphaX;
1020 |   float alphaY = ay-y1; float alphaYTrans = 1.0-alphaY;
1021 |   for (int k = 0; k < mZSize; k++) {
1022 |     float a = alphaXTrans*operator()(x1,y1,k)+alphaX*operator()(x2,y1,k);
1023 |     float b = alphaXTrans*operator()(x1,y2,k)+alphaX*operator()(x2,y2,k);
1024 |     aResult(k) = alphaYTrans*a+alphaY*b;
1025 |   }
1026 |   return aResult;
1027 | }
1028 | 
1029 | // operator =
1030 | template <class T>
1031 | inline CTensor<T>& CTensor<T>::operator=(const T aValue) {
1032 |   fill(aValue);
1033 |   return *this;
1034 | }
1035 | 
1036 | template <class T>
1037 | CTensor<T>& CTensor<T>::operator=(const CTensor<T>& aCopyFrom) {
1038 |   if (this != &aCopyFrom) {
1039 |     delete[] mData;
1040 |     if (aCopyFrom.mData == 0) {
1041 |       mData = 0; mXSize = 0; mYSize = 0; mZSize = 0;
1042 |     }
1043 |     else {
1044 |       mXSize = aCopyFrom.mXSize;
1045 |       mYSize = aCopyFrom.mYSize;
1046 |       mZSize = aCopyFrom.mZSize;
1047 |       int wholeSize = mXSize*mYSize*mZSize;
1048 |       mData = new T[wholeSize];
1049 |       for (register int i = 0; i < wholeSize; i++)
1050 |         mData[i] = aCopyFrom.mData[i];
1051 |     }
1052 |   }
1053 |   return *this;
1054 | }
1055 | 
1056 | // operator +=
1057 | template <class T>
1058 | CTensor<T>& CTensor<T>::operator+=(const CTensor<T>& aTensor) {
1059 |   #ifdef _DEBUG
1060 |   if (mXSize != aTensor.mXSize || mYSize != aTensor.mYSize || mZSize != aTensor.mZSize)
1061 |     throw ETensorIncompatibleSize(mXSize,mYSize,mZSize);
1062 |   #endif
1063 |   int wholeSize = size();
1064 |   for (int i = 0; i < wholeSize; i++)
1065 |     mData[i] += aTensor.mData[i];
1066 |   return *this;
1067 | }
1068 | 
1069 | // operator +=
1070 | template <class T>
1071 | CTensor<T>& CTensor<T>::operator+=(const T aValue) {
1072 |   int wholeSize = mXSize*mYSize*mZSize;
1073 |   for (int i = 0; i < wholeSize; i++)
1074 |     mData[i] += aValue;
1075 |   return *this;
1076 | }
1077 | 
1078 | // operator *=
1079 | template <class T>
1080 | CTensor<T>& CTensor<T>::operator*=(const T aValue) {
1081 |   int wholeSize = mXSize*mYSize*mZSize;
1082 |   for (int i = 0; i < wholeSize; i++)
1083 |     mData[i] *= aValue;
1084 |   return *this;
1085 | }
1086 | 
1087 | // min
1088 | template <class T>
1089 | T CTensor<T>::min() const {
1090 |   T aMin = mData[0];
1091 |   int aSize = mXSize*mYSize*mZSize;
1092 |   for (int i = 1; i < aSize; i++)
1093 |     if (mData[i] < aMin) aMin = mData[i];
1094 |   return aMin;
1095 | }
1096 | 
1097 | // max
1098 | template <class T>
1099 | T CTensor<T>::max() const {
1100 |   T aMax = mData[0];
1101 |   int aSize = mXSize*mYSize*mZSize;
1102 |   for (int i = 1; i < aSize; i++)
1103 |     if (mData[i] > aMax) aMax = mData[i];
1104 |   return aMax;
1105 | }
1106 | 
1107 | // avg
1108 | template <class T>
1109 | T CTensor<T>::avg() const {
1110 |   T aAvg = 0;
1111 |   for (int z = 0; z < mZSize; z++)
1112 |     aAvg += avg(z);
1113 |   return aAvg/mZSize;
1114 | }
1115 | 
1116 | template <class T>
1117 | T CTensor<T>::avg(int az) const {
1118 |   T aAvg = 0;
1119 |   int aSize = mXSize*mYSize;
1120 |   int aTemp = (az+1)*aSize;
1121 |   for (int i = az*aSize; i < aTemp; i++) 
1122 |     aAvg += mData[i];
1123 |   return aAvg/aSize;
1124 | }
1125 | 
1126 | // xSize
1127 | template <class T>
1128 | inline int CTensor<T>::xSize() const {
1129 |   return mXSize;
1130 | }
1131 | 
1132 | // ySize
1133 | template <class T>
1134 | inline int CTensor<T>::ySize() const {
1135 |   return mYSize;
1136 | }
1137 | 
1138 | // zSize
1139 | template <class T>
1140 | inline int CTensor<T>::zSize() const {
1141 |   return mZSize;
1142 | }
1143 | 
1144 | // size
1145 | template <class T>
1146 | inline int CTensor<T>::size() const {
1147 |   return mXSize*mYSize*mZSize;
1148 | }
1149 | 
1150 | // getMatrix
1151 | template <class T>
1152 | CMatrix<T> CTensor<T>::getMatrix(const int az) const {
1153 |   CMatrix<T> aTemp(mXSize,mYSize);
1154 |   int aMatrixSize = mXSize*mYSize;
1155 |   int aOffset = az*aMatrixSize;
1156 |   for (int i = 0; i < aMatrixSize; i++)
1157 |     aTemp.data()[i] = mData[i+aOffset];
1158 |   return aTemp;
1159 | }
1160 | 
1161 | // getMatrix
1162 | template <class T>
1163 | void CTensor<T>::getMatrix(CMatrix<T>& aMatrix, const int az) const {
1164 |   if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize)
1165 |     throw ETensorIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),mXSize,mYSize);
1166 |   int aMatrixSize = mXSize*mYSize;
1167 |   int aOffset = az*aMatrixSize;
1168 |   for (int i = 0; i < aMatrixSize; i++)
1169 |     aMatrix.data()[i] = mData[i+aOffset];
1170 | }
1171 | 
1172 | // putMatrix
1173 | template <class T>
1174 | void CTensor<T>::putMatrix(CMatrix<T>& aMatrix, const int az) {
1175 |   if (aMatrix.xSize() != mXSize || aMatrix.ySize() != mYSize)
1176 |     throw ETensorIncompatibleSize(aMatrix.xSize(),aMatrix.ySize(),mXSize,mYSize);
1177 |   int aMatrixSize = mXSize*mYSize;
1178 |   int aOffset = az*aMatrixSize;
1179 |   for (int i = 0; i < aMatrixSize; i++)
1180 |     mData[i+aOffset] = aMatrix.data()[i];
1181 | }
1182 | 
1183 | // data()
1184 | template <class T>
1185 | inline T* CTensor<T>::data() const {
1186 |   return mData;
1187 | }
1188 | 
1189 | // N O N - M E M B E R  F U N C T I O N S --------------------------------------
1190 | 
1191 | // operator <<
1192 | template <class T>
1193 | std::ostream& operator<<(std::ostream& aStream, const CTensor<T>& aTensor) {
1194 |   for (int z = 0; z < aTensor.zSize(); z++) {
1195 |     for (int y = 0; y < aTensor.ySize(); y++) {
1196 |       for (int x = 0; x < aTensor.xSize(); x++)
1197 |         aStream << aTensor(x,y,z) << ' ';
1198 |       aStream << std::endl;
1199 |     }
1200 |     aStream << std::endl;
1201 |   }
1202 |   return aStream;
1203 | }
1204 | 
1205 | #endif
1206 | 


--------------------------------------------------------------------------------