├── .gitignore ├── LICENSE ├── MTNN ├── example │ ├── MNIST │ │ ├── ImageReader.cpp │ │ ├── ImageReader.h │ │ ├── LabelReader.cpp │ │ ├── LabelReader.h │ │ ├── Makefile │ │ ├── data │ │ │ ├── mnist.nn │ │ │ └── mnist_mse.dat │ │ └── main.cpp │ ├── Makefile │ ├── example.nn │ └── main.cpp └── include │ ├── ilayer.h │ ├── imatrix.h │ ├── neuralnet.h │ └── neuralnetanalyzer.h └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Compiled Dynamic libraries 8 | *.so 9 | *.dylib 10 | 11 | # Compiled Static libraries 12 | *.lai 13 | *.la 14 | *.a 15 | 16 | # Executables 17 | *.exe 18 | *.out 19 | *.app 20 | *.sdf 21 | *.sdf 22 | *.suo 23 | CNN/CNN.sdf 24 | CNN/CNN.v12.suo 25 | CNN/CNN/Debug/CNN.log 26 | CNN/CNN/Debug/CNN.tlog/CL.read.1.tlog 27 | CNN/CNN/Debug/CNN.tlog/CL.write.1.tlog 28 | *.ilk 29 | *.opensdf 30 | *.pdb 31 | *.tlog 32 | CNN/CNN/Debug/CNN.tlog/link.write.1.tlog 33 | CNN/CNN/Debug/CNN.tlog/link.command.1.tlog 34 | CNN/CNN/Debug/CNN.tlog/CNN.lastbuildstate 35 | CNN/CNN/Debug/CNN.tlog/CL.write.1.tlog 36 | CNN/CNN/Debug/CNN.tlog/link.command.1.tlog 37 | *.idb 38 | CNN/CNN/Debug/CNN.tlog/CL.write.1.tlog 39 | CNN/CNN/Debug/CNN.tlog/CL.write.1.tlog 40 | *.tlog 41 | *.tlog 42 | *.pdb 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 liammcinroy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/ImageReader.cpp: -------------------------------------------------------------------------------- 1 | #include "ImageReader.h" 2 | 3 | ImageReader::ImageReader() 4 | { 5 | file = std::ifstream("", std::ios::in | std::ios::binary); 6 | current = Matrix2D(defaultval); 7 | } 8 | 9 | ImageReader::ImageReader(const ImageReader &obj) 10 | { 11 | file = std::ifstream(obj.m_path, std::ios::in | std::ios::binary); 12 | m_path = obj.m_path; 13 | current = Matrix2D(defaultval); 14 | char c; 15 | for (size_t i = 0; i < 16; ++i) 16 | file >> c; 17 | next(); 18 | } 19 | 20 | ImageReader::ImageReader(const std::string &path) 21 | { 22 | file = std::ifstream(path, std::ios::in | std::ios::binary); 23 | m_path = path; 24 | current = Matrix2D(defaultval); 25 | char c; 26 | for (size_t i = 0; i < 16; ++i) 27 | file >> c; 28 | next(); 29 | } 30 | 31 | void ImageReader::next() 32 | { 33 | for (size_t j = 0; j < 29; ++j) 34 | { 35 | current.at(0, j) = defaultval; 36 | current.at(28, j) = defaultval; 37 | } 38 | 39 | char c; 40 | for (size_t i = 0; i < 28; ++i) 41 | { 42 | current.at(i, 0) = defaultval; 43 | current.at(i, 28) = defaultval; 44 | 45 | 46 | for (size_t j = 1; j < 29; ++j) 47 | { 48 | c = file.peek(); 49 | if (c != '\0') 50 | current.at(i, j) = 1; 51 | else 52 | current.at(i, j) = defaultval; 53 | file.get(); 54 | } 55 | } 56 | ++index; 57 | } 58 | 59 | void ImageReader::catch_up(int i) 60 | { 61 | int diff = i - index; 62 | index = i; 63 | for (int j = 0; j < diff * 28 * 28; ++j) 64 | file.get(); 65 | } 66 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/ImageReader.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "imatrix.h" 5 | 6 | class ImageReader 7 | { 8 | public: 9 | ImageReader(); 10 | ImageReader(const ImageReader &obj); 11 | ImageReader(const std::string &path); 12 | ~ImageReader() = default; 13 | void next(); 14 | void catch_up(int i); 15 | Matrix2D current; 16 | int index = 0; 17 | float defaultval = -1; 18 | private: 19 | std::ifstream file; 20 | std::string m_path; 21 | }; 22 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/LabelReader.cpp: -------------------------------------------------------------------------------- 1 | #include "LabelReader.h" 2 | 3 | LabelReader::LabelReader() 4 | { 5 | file = std::ifstream("", std::ios::in | std::ios::binary); 6 | current = Matrix2D(defaultval); 7 | char c; 8 | } 9 | 10 | LabelReader::LabelReader(const LabelReader &obj) 11 | { 12 | file = std::ifstream(obj.m_path, std::ios::in | std::ios::binary); 13 | m_path = obj.m_path; 14 | current = Matrix2D(defaultval); 15 | char c; 16 | for (size_t i = 0; i < 8; ++i) 17 | file >> c; 18 | next(); 19 | } 20 | 21 | LabelReader::LabelReader(const std::string &path) 22 | { 23 | file = std::ifstream(path, std::ios::in | std::ios::binary); 24 | m_path = path; 25 | current = Matrix2D(defaultval); 26 | char c; 27 | for (size_t i = 0; i < 8; ++i) 28 | file >> c; 29 | next(); 30 | } 31 | 32 | void LabelReader::next() 33 | { 34 | char label; 35 | label = file.peek(); 36 | file.get(); 37 | for (size_t i = 0; i < 10; ++i) 38 | { 39 | if (i == label) 40 | current.at(i, 0) = 1; 41 | else 42 | current.at(i, 0) = defaultval; 43 | } 44 | ++index; 45 | } 46 | 47 | void LabelReader::catch_up(int i) 48 | { 49 | int diff = i - index; 50 | index = i; 51 | for (int j = 0; j < diff; ++j) 52 | file.get(); 53 | } 54 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/LabelReader.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "imatrix.h" 5 | 6 | class LabelReader 7 | { 8 | public: 9 | LabelReader(); 10 | LabelReader(const LabelReader &obj); 11 | LabelReader(const std::string &path); 12 | ~LabelReader() = default; 13 | void next(); 14 | void catch_up(int i); 15 | Matrix2D current; 16 | int index; 17 | float defaultval = -1; 18 | private: 19 | std::ifstream file; 20 | std::string m_path; 21 | }; 22 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/Makefile: -------------------------------------------------------------------------------- 1 | build: main.cpp 2 | g++ -O2 -std=gnu++14 -o main.out main.cpp ImageReader.cpp LabelReader.cpp -I../../include 3 | -------------------------------------------------------------------------------- /MTNN/example/MNIST/data/mnist.nn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liammcinroy/MetaTemplateNeuralNet/2e6d1a666d47fb4daa511bc4d1b70b686e9ed3bd/MTNN/example/MNIST/data/mnist.nn -------------------------------------------------------------------------------- /MTNN/example/MNIST/data/mnist_mse.dat: -------------------------------------------------------------------------------- 1 | 1.18592,0.719703,0.562798,0.462016,0.434916,0.403896,0.39742,0.364705,0.373927,0.368347,0.336369,0.275784,0.301674,0.31054,0.315516,0.282027,0.282288,0.281618,0.276792,0.259816,0.272815,0.265772,0.249346,0.199551,0.214917,0.222364,0.219153,0.200593,0.198306,0.205018,0.198503,0.19704,0.195796,0.203516,0.188801,0.151796,0.166534,0.183598,0.18386,0.171242,0.159953,0.165743,0.16627,0.162242,0.169772,0.173795,0.153625,0.125822,0.135884,0.151688,0.145506,0.135199,0.138288,0.143483,0.136892,0.139264,0.136807,0.147167,0.13318,0.110794,0.115385,0.1383,0.129395,0.118035,0.123727,0.123776,0.123955,0.129505,0.125251,0.133157,0.123284,0.10005,0.10874,0.118332,0.120126,0.105559,0.109683,0.115461,0.108364,0.111546,0.112052,0.122438,0.113645,0.0879792,0.104213,0.10976,0.106201,0.102736,0.106695,0.113234,0.102738,0.0996554,0.112661,0.117391,0.103239,0.0834371,0.0968373,0.103731,0.0988821,0.0949914,0.09054,0.0933869,0.0970826,0.0993998,0.0998501,0.104498,0.0948267,0.0749574,0.0814812,0.100133,0.0931235,0.0873865,0.088529,0.097086,0.0898528,0.0936922,0.0916852,0.107388,0.0933393,0.0711998,0.081376,0.0915684,0.0905298,0.0838113,0.0831119,0.09128,0.0867418,0.0913563,0.0899569,0.0931735,0.0880415,0.0675934,0.0780889,0.0901773,0.0847412,0.0802608,0.0782941,0.0857512,0.0824763,0.0880158,0.0833721,0.0926156,0.0801906,0.0646205,0.0736672,0.083317,0.0822918,0.078405,0.0776078,0.079969,0.0767083,0.0835775,0.0813961,0.0928881,0.0795842,0.06478,0.0731061,0.0798023,0.0752581,0.0734253,0.0695429,0.0836484,0.078531,0.0821788,0.0779375,0.0865659,0.0768721,0.059872,0.0669832,0.0775408,0.0769362,0.0705675,0.0712864,0.0768159,0.0747791,0.0796458,0.0772728,0.0848223,0.0752675,0.060112,0.0632604,0.0767984,0.0729825,0.0677114,0.0677728,0.0747044,0.0725261,0.0744203,0.0753486,0.0837159,0.0712004,0.0558458,0.0638317,0.0739514,0.0680708,0.0659898,0.0662426,0.0694978,0.0695376,0.0777217,0.0681065,0.0801941,0.0693424,0.0546405,0.0650172,0.0759793,0.067856,0.0655245,0.061766,0.0710694,0.0685637,0.0718188,0.069532,0.0752679,0.0681638,0.0561477,0.0598215,0.0702345,0.0676447,0.0646688,0.0639482,0.0701174,0.0683008,0.0690524,0.0682792,0.0778311,0.0650891,0.0510188,0.0594456,0.0735985,0.063215,0.0616483,0.0633563,0.0692136,0.0667065,0.0682495,0.0670723,0.0777425,0.0702469,0.0529072,0.0578122,0.0668027,0.0631572,0.0608722,0.0620346,0.0693205,0.0626658,0.0700919,0.0686792,0.0746077,0.0684267,0.0501496,0.0580912,0.0696675,0.0668309,0.0588495,0.0618528,0.0669638,0.0665347,0.0672101,0.0688169,0.0735846,0.0645334,0.0538168,0.0573742,0.0676456,0.0649698,0.057604,0.058603,0.0659196,0.0609918,0.0672861,0.062807,0.0732929,0.0623635,0.0506795,0.0540961,0.0669202,0.0650006,0.0584498,0.0574156,0.0681541,0.0611299,0.06386,0.0628221,0.0709486,0.0601691,0.0478887,0.0562485,0.0658285,0.0626775,0.0610409,0.058026,0.0615237,0.0609235,0.0654044,0.0643271,0.066149,0.0592123,0.0485572,0.0559088,0.0656414,0.0605725,0.0565024,0.054084,0.0610448,0.0609743,0.0640804,0.0597947,0.069087,0.0624562,0.0458398,0.054035,0.0650627,0.0638228,0.0565644,0.055091,0.0613005,0.0598585,0.064811,0.0619676,0.0707771,0.0601788,0.0482793,0.0544918,0.0619236,0.0599117,0.0566238,0.0538903,0.0603749,0.0595591,0.0672418,0.0614932,0.0686386,0.06159,0.0477095,0.05383,0.0638801,0.0573554,0.0588686,0.0557515,0.0618962,0.0634405,0.0651069,0.0631564,0.070235,0.0621013,0.0465899,0.0544096,0.0635982,0.058914,0.0543467,0.0544071,0.0623052,0.059377,0.0624369,0.0590987,0.0740912,0.0584049,0.0499644,0.0505227,0.0648581,0.0596524,0.0555434,0.0571987,0.0611462,0.0598627,0.0616076,0.0607206,0.0716132,0.0580887,0.0413032,0.0536989,0.0655353,0.0587716,0.0602312,0.0550022,0.0626012,0.0539992,0.0620973,0.0597162,0.0689828,0.0593856,0.0470149,0.0527887,0.0639644,0.0580085,0.0571919,0.0543461,0.0579509,0.0572202,0.0609415,0.0593955,0.0640829,0.0599575,0.045939,0.0510559,0.0614295,0.0612686,0.0550927,0.05814,0.0617381,0.0581969,0.0647529,0.0628926,0.0656755,0.0585399,0.0480049,0.0517956,0.0624532,0.0569194,0.0558871,0.0558279,0.0583404,0.0591396,0.0645075,0.059082,0.0646923,0.0563528,0.0471276,0.0533036,0.0601095,0.0583598,0.0577982,0.0534398,0.0591355,0.056382,0.0642828,0.0586033,0.067921,0.0609052,0.0469603,0.0530266,0.0619194,0.0559112,0.0572706,0.0514847,0.0605922,0.0563296,0.059854,0.059021,0.0658404,0.0581382,0.0425696,0.052864,0.0628598,0.058875,0.0543722,0.0535301,0.0622701,0.0570226,0.0589269,0.0597022,0.0657355,0.0587249,0.0428133,0.0515394,0.0617134,0.0559918,0.0550044,0.0507141,0.061632,0.0546184,0.0605229,0.059592,0.06612,0.0581389,0.0467759,0.0526427,0.0614926,0.0562069,0.0566355,0.0502273,0.0594624,0.0565727,0.060267,0.0592907,0.0610077,0.0571254,0.0460214,0.051047,0.0589291,0.0555094,0.0539021,0.0482182,0.0599,0.0520678,0.0584404,0.0588745,0.0647226,0.0556745,0.0485973,0.052686,0.0595344,0.0561882,0.0555763,0.0525824,0.0598511,0.0586774,0.0584653,0.0551212,0.0602892,0.0608833,0.0479862,0.0481945,0.0592555,0.0573679,0.0532027,0.0504622,0.0604514,0.0541146,0.0627457,0.0568671,0.0641278,0.0569902,0.0429728,0.0489121,0.0586217,0.0574724,0.0561666,0.0527607,0.0593429,0.0606055,0.0610486,0.0590259,0.0641621,0.0585234,0.0432925,0.0508149,0.0616993,0.0588346,0.0546012,0.0487035,0.0594085,0.0558162,0.0610652,0.0574247,0.0639186,0.0560641,0.045695,0.0506581,0.0605567,0.0574944,0.0535529,0.0506462,0.0625908,0.0577105,0.0562548,0.0556173,0.0671725,0.0564379,0.04336,0.0505462,0.0582066,0.0556761,0.0527179,0.0518695,0.0556724,0.057875,0.065236,0.061389,0.0679688,0.0570412,0.0417402,0.0494184,0.057275,0.0509317,0.052429,0.0485038,0.0541254,0.0583539,0.064523,0.0556647,0.0657781,0.0588053,0.0437023,0.049539,0.0597458,0.0512949,0.0543124,0.0508194,0.0596865,0.0572641,0.0646067,0.0595808,0.0661561,0.0564407,0.0457228,0.0469308,0.060025,0.0539675,0.0538371,0.0516171,0.0592468,0.0556339,0.0594308,0.0567224,0.0672256,0.0547568,0.0445824, -------------------------------------------------------------------------------- /MTNN/example/MNIST/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "imatrix.h" 6 | #include "ilayer.h" 7 | #include "neuralnet.h" 8 | #include "neuralnetanalyzer.h" 9 | 10 | #include "ImageReader.h" 11 | #include "LabelReader.h" 12 | 13 | //Output functions 14 | 15 | void normal_line(std::string s) 16 | { 17 | std::cout << s << std::endl; 18 | } 19 | 20 | void indented_line(std::string s) 21 | { 22 | std::cout << '\t' << s << std::endl; 23 | } 24 | 25 | //Distortion functions 26 | 27 | template 28 | Matrix2D convolve(Matrix2D& input, Matrix2D& kernel) 29 | { 30 | int N = (kernel_r - 1) / 2; 31 | int M = (kernel_c - 1) / 2; 32 | constexpr size_t out_r = (r - kernel_r) / s + 1; 33 | constexpr size_t out_c = (c - kernel_c) / s + 1; 34 | Matrix2D output = { 0 }; 35 | 36 | for (size_t i = N; i < (r - N); i += s)//change focus of kernel 37 | { 38 | for (size_t j = M; j < (c - M); j += s) 39 | { 40 | //iterate over kernel 41 | float sum = 0; 42 | for (int n = N; n >= -N; --n) 43 | for (int m = M; m >= -M; --m) 44 | sum += input.at(i - n, j - m) * kernel.at(N - n, N - m); 45 | output.at((i - N) / s, (j - N) / s) = sum; 46 | } 47 | } 48 | return output; 49 | } 50 | 51 | template 52 | FeatureMap<1, rows, cols> distort(Matrix2D& input, Matrix2D& kernel, float elasticity, float max_stretch, float max_rot) 53 | { 54 | //elastic map distort 55 | const int n = (kernel_size - 1) / 2; 56 | auto up_i = Matrix2D{}; 57 | auto up_j = Matrix2D{}; 58 | for (int i = 0; i < up_i.rows(); ++i) 59 | { 60 | for (int j = 0; j < up_i.cols(); ++j) 61 | { 62 | if (i > 2 * n - 1 && i < rows + 2 * n - 1 && j > 2 * n - 1 && j < cols + 2 * n - 1) 63 | { 64 | up_i.at(i, j) = 2.0f * rand() / RAND_MAX - 1; 65 | up_j.at(i, j) = 2.0f * rand() / RAND_MAX - 1; 66 | } 67 | else 68 | { 69 | up_i.at(i, j) = 0; 70 | up_j.at(i, j) = 0; 71 | } 72 | } 73 | } 74 | auto map_i = convolve(up_i, kernel); 75 | auto map_j = convolve(up_j, kernel); 76 | for (int i = 0; i < map_i.rows(); ++i) 77 | { 78 | for (int j = 0; j < map_i.cols(); ++j) 79 | { 80 | map_i.at(i, j) *= elasticity; 81 | map_j.at(i, j) *= elasticity; 82 | } 83 | } 84 | 85 | //affine 86 | float vertical_stretch = max_stretch * (2.0f * rand() / RAND_MAX - 1); 87 | float horizontal_stretch = max_stretch * (2.0f * rand() / RAND_MAX - 1); 88 | 89 | float angle = 3.1415926f * max_rot * (2.0f * rand() / RAND_MAX - 1) / 180; 90 | float sina = sin(angle); 91 | float cosa = cos(angle); 92 | 93 | int rows_mid = rows / 2; 94 | int cols_mid = cols / 2; 95 | 96 | for (int i = 0; i < map_i.rows(); ++i) 97 | { 98 | for (int j = 0; j < map_i.cols(); ++j) 99 | { 100 | map_i.at(i, j) -= vertical_stretch * (rows_mid - i) + (rows_mid - i) * (cosa - 1) + (j - cols_mid) * sina; 101 | map_j.at(i, j) += horizontal_stretch * (j - cols_mid) + (j - cols_mid) * (cosa - 1) - (rows_mid - i) * sina; 102 | } 103 | } 104 | 105 | //bilinear intrepolation 106 | auto output = FeatureMap<1, rows, cols>(); 107 | for (int i = 0; i < rows; ++i) 108 | { 109 | for (int j = 0; j < cols; ++j) 110 | { 111 | float desired_i = i - map_i.at(i, j); 112 | float desired_j = j - map_j.at(i, j); 113 | 114 | int int_i = (int)desired_i; 115 | int int_j = (int)desired_j; 116 | 117 | float frac_i = desired_i - int_i; 118 | float frac_j = desired_j - int_j; 119 | 120 | //get rectangle weights 121 | float w1 = (1.0 - frac_i) * (1.0 - frac_j); 122 | float w2 = (1.0 - frac_i) * frac_j; 123 | float w3 = frac_i * (1 - frac_j); 124 | float w4 = frac_i * frac_j; 125 | 126 | //check validity 127 | float v1 = (int_i > 0 && int_i < rows && int_j > 0 && int_j < cols) ? input.at(int_i, int_j) : -1; 128 | float v2 = (int_i > 0 && int_i < rows && int_j + 1 > 0 && int_j + 1 < cols) ? input.at(int_i, int_j + 1) : -1; 129 | float v3 = (int_i + 1 > 0 && int_i + 1 < rows && int_j > 0 && int_j < cols) ? input.at(int_i + 1, int_j) : -1; 130 | float v4 = (int_i + 1 > 0 && int_i + 1 < rows && int_j + 1 > 0 && int_j + 1 < cols) ? input.at(int_i + 1, int_j + 1) : -1; 131 | 132 | output[0].at(i, j) = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4; 133 | } 134 | } 135 | 136 | return output; 137 | } 138 | 139 | template 140 | FeatureMap<1, rows, cols> make_fm(Matrix2D input) 141 | { 142 | FeatureMap<1, rows, cols> out{}; 143 | for (int i = 0; i < rows; ++i) 144 | for (int j = 0; j < cols; ++j) 145 | out[0].at(i, j) = input.at(i, j); 146 | return out; 147 | } 148 | 149 | template void print_matrix(Matrix2D input, int width = 3) 150 | { 151 | std::cout << std::setfill(' '); 152 | for (int i = 0; i < r; ++i) 153 | { 154 | for (int j = 0; j < c; ++j) 155 | std::cout << std::setprecision(width - 2) << std::setw(width) << input.at(i, j); 156 | std::cout << std::endl; 157 | } 158 | std::cout << std::setfill('\0'); 159 | } 160 | 161 | #define DEFAULT -1 162 | 163 | //setup the network architecture 164 | //typedef NeuralNet, 165 | // BatchNormalizationLayer<1, 1, 29, 29, MTNN_FUNC_LINEAR>, 166 | // PerceptronFullConnectivityLayer<2, 1, 29, 29, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 167 | // BatchNormalizationLayer<2, 1, 100, 1, MTNN_FUNC_LINEAR>, 168 | // PerceptronFullConnectivityLayer<3, 1, 100, 1, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 169 | // BatchNormalizationLayer<3, 1, 100, 1, MTNN_FUNC_LINEAR>, 170 | // PerceptronFullConnectivityLayer<4, 1, 100, 1, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 171 | // BatchNormalizationLayer<4, 1, 100, 1, MTNN_FUNC_LINEAR>, 172 | // PerceptronFullConnectivityLayer<5, 1, 100, 1, 1, 10, 1, MTNN_FUNC_LOGISTIC, true>, 173 | // OutputLayer<1, 1, 10, 1>> Net; 174 | 175 | //typedef NeuralNet, 176 | // PerceptronFullConnectivityLayer<2, 1, 29, 29, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 177 | // PerceptronFullConnectivityLayer<3, 1, 100, 1, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 178 | // PerceptronFullConnectivityLayer<4, 1, 100, 1, 1, 100, 1, MTNN_FUNC_LOGISTIC, true>, 179 | // PerceptronFullConnectivityLayer<5, 1, 100, 1, 1, 10, 1, MTNN_FUNC_LOGISTIC, true>, 180 | // OutputLayer<1, 1, 10, 1>> Net; 181 | 182 | //standard, boring 183 | typedef NeuralNet, 184 | ConvolutionLayer<2, 1, 29, 29, 5, 2, 6, MTNN_FUNC_TANHLECUN, true, false>, 185 | ConvolutionLayer<3, 6, 13, 13, 5, 2, 50, MTNN_FUNC_TANHLECUN, true, false>, 186 | PerceptronFullConnectivityLayer<4, 50, 5, 5, 1, 100, 1, MTNN_FUNC_TANHLECUN, true>, 187 | PerceptronFullConnectivityLayer<5, 1, 100, 1, 1, 10, 1, MTNN_FUNC_TANHLECUN, true>, 188 | OutputLayer<6, 1, 10, 1>> Net; 189 | 190 | 191 | typedef FeatureMap<1, 29, 29> NetInput; 192 | typedef FeatureMap<1, 10, 1> NetOutput; 193 | 194 | bool training = false; 195 | 196 | int main() 197 | { 198 | //get string path 199 | auto net_file_path = CSTRING("data//net.nn"); 200 | using net_path_type = decltype(net_file_path); 201 | 202 | auto pretrain_file_path = CSTRING("data//mnist.nn"); 203 | using pretrain_path_type = decltype(pretrain_file_path); 204 | 205 | Net::learning_rate = .001f; 206 | Net::use_batch_learning = true; 207 | Net::optimization_method = MTNN_OPT_ADAM; 208 | Net::loss_function = MTNN_LOSS_L2; 209 | NeuralNetAnalyzer::sample_size = 100; 210 | 211 | //timing variables 212 | float t = 0.0f; 213 | float e_t = 0.0f; 214 | float p_e_t = 0.0f; 215 | 216 | float mse = 1.0f; 217 | 218 | /*auto errors = NeuralNetAnalyzer::mean_gradient_error(); 219 | std::cout << errors.first << ',' << errors.second << std::endl;*/ 220 | 221 | if (training) 222 | normal_line("Training a new network"); 223 | else 224 | normal_line("Testing prior network"); 225 | 226 | //generate gaussian kernel for distortions 227 | if (training) 228 | { 229 | float sigma = 8; 230 | const int n = 5; 231 | auto gaussian = Matrix2D(); 232 | for (int i = 0; i < gaussian.rows(); ++i) 233 | for (int j = 0; j < gaussian.cols(); ++j) 234 | gaussian.at(i, j) = exp(-((i - n / 2) * (i - n / 2) + (j - n / 2) * (j - n / 2)) / (2 * sigma * sigma)) / (sigma * sigma * 2 * 3.1415926f); 235 | 236 | normal_line("Loading MNIST Database..."); 237 | 238 | //load in images 239 | std::vector> images(60000); 240 | std::vector labels(10); 241 | ImageReader trainImgs("data//train-images.idx3-ubyte"); 242 | trainImgs.defaultval = DEFAULT; 243 | LabelReader trainLbls("data//train-labels.idx1-ubyte"); 244 | trainLbls.defaultval = DEFAULT; 245 | for (int i = 0; i < 60000; ++i) 246 | { 247 | int label = 0; 248 | for (int j = 0; j < trainLbls.current.rows(); ++j) 249 | if (trainLbls.current.at(j, 0) > 0) 250 | label = j; 251 | labels[label] = make_fm<10, 1>(trainLbls.current.clone()); 252 | images[i] = std::make_pair(trainImgs.current.clone(), label); 253 | 254 | trainImgs.next(); 255 | trainLbls.next(); 256 | } 257 | 258 | normal_line("Starting Training"); 259 | for (int e = 0; e < 50; ++e) 260 | { 261 | //shuffle images 262 | //std::random_shuffle(images.begin(), images.end()); 263 | 264 | /*for (int it = 0; it < 60000; ++it) 265 | { 266 | auto distorted = distort<29, 29, 5>(images[it].first, gaussian, .5, .15, 15); 267 | auto& label = labels[images[it].second]; 268 | 269 | Net::set_input(distorted); 270 | Net::set_labels(label); 271 | 272 | float error = 10; 273 | 274 | error = Net::train(); 275 | if (it == 0) 276 | indented_line("First error = " + std::to_string(error)); 277 | 278 | NeuralNetAnalyzer::add_point(error); 279 | 280 | if ((it + 1) % 50 == 0) 281 | { 282 | Net::apply_gradient(); 283 | } 284 | 285 | if ((it + 1) % 5000 == 0) 286 | { 287 | mse = NeuralNetAnalyzer::mean_error(); 288 | indented_line("MSE = " + std::to_string(mse)); 289 | } 290 | }*/ 291 | 292 | //discrim on first 500 293 | int correct = 0; 294 | std::vector totals(10); 295 | 296 | for (int i = 0; i < 500; ++i) 297 | { 298 | Net::set_input(make_fm<29, 29>(images[i].first)); 299 | auto& test = Net::discriminate()[0]; 300 | auto& label = labels[images[i].second][0]; 301 | int max_i = 0; 302 | int max_j = 0; 303 | 304 | float max = test.at(0, 0); 305 | float max2 = label.at(0, 0); 306 | for (int j = 1; j < test.rows(); ++j) 307 | { 308 | //normal_line(std::to_string(test.at(j, 0))); 309 | //normal_line(std::to_string(label.at(j, 0))); 310 | if (test.at(j, 0) > max) 311 | { 312 | max = test.at(j, 0); 313 | max_i = j; 314 | } 315 | if (label.at(j, 0) > max2) 316 | { 317 | max2 = label.at(j, 0); 318 | max_j = j; 319 | } 320 | } 321 | 322 | ++totals[max_i]; 323 | if (max_i == max_j) 324 | ++correct; 325 | } 326 | normal_line("On running random trial of 500 got " + std::to_string(correct) + " correct. "); 327 | std::string out = ""; 328 | for (int j = 0; j < totals.size(); ++j) 329 | out += std::to_string(j) + ": " + std::to_string(totals[j] / 500.0f) + " "; 330 | indented_line("Distribution: " + out); 331 | 332 | for (int batches = 0; batches < 60000 / 50; ++batches) 333 | { 334 | auto batch_images = FeatureMapVector<1, 29, 29>(50); 335 | auto batch_labels = FeatureMapVector<1, 10, 1>(50); 336 | for (int i = 0; i < 50; ++i) 337 | { 338 | batch_images[i] = distort<29, 29, 5>(images[batches * 50 + i].first, gaussian, .5, .15, 15); 339 | batch_labels[i] = labels[images[batches * 50 + i].second]; 340 | } 341 | 342 | float error = Net::train_batch(batch_images, batch_labels); 343 | 344 | if (batches == 0) 345 | indented_line("First error = " + std::to_string(error)); 346 | 347 | NeuralNetAnalyzer::add_point(error); 348 | 349 | if ((batches + 1) * 50 % 5000 == 0) 350 | { 351 | mse = NeuralNetAnalyzer::mean_error(); 352 | indented_line("MSE = " + std::to_string(mse)); 353 | } 354 | } 355 | 356 | if (Net::learning_rate > .00005f && (e + 1) % 2 == 0) 357 | Net::learning_rate *= 0.794183335f; 358 | //if (e == 10) 359 | // net.learning_rate = .001f; 360 | 361 | t = clock() - t; 362 | p_e_t += t; 363 | normal_line("(training) Epoch " + std::to_string(e) + " was completed in " + std::to_string(t / CLOCKS_PER_SEC) + " seconds"); 364 | Net::save_data(); 365 | NeuralNetAnalyzer::save_mean_error("data//mse.dat"); 366 | t = clock(); 367 | } 368 | std::vector training_set_images(60000); 369 | for (int i = 0; i < training_set_images.size(); ++i) 370 | training_set_images[i] = make_fm<29, 29>(images[i].first); 371 | Net::calculate_population_statistics(training_set_images); 372 | 373 | normal_line("Training was completed in " + std::to_string(p_e_t / CLOCKS_PER_SEC) + " seconds."); 374 | t = clock(); 375 | } 376 | 377 | if (!training) 378 | Net::load_data(); 379 | else 380 | Net::load_data(); 381 | 382 | normal_line("Starting Testing"); 383 | 384 | ImageReader testImgs("data//t10k-images.idx3-ubyte"); 385 | testImgs.defaultval = DEFAULT; 386 | LabelReader testLbls("data//t10k-labels.idx1-ubyte"); 387 | testLbls.defaultval = DEFAULT; 388 | int correct = 0; 389 | 390 | std::vector totals(10); 391 | 392 | for (int i = 0; i < 9999; ++i) 393 | { 394 | testImgs.next(); 395 | testLbls.next(); 396 | 397 | Net::set_input(make_fm<29, 29>(testImgs.current)); 398 | auto& test = Net::discriminate()[0]; 399 | int max_i = 0; 400 | float max = test.at(0, 0); 401 | for (int j = 1; j < 10; ++j) 402 | { 403 | if (test.at(j, 0) > max) 404 | { 405 | max = test.at(j, 0); 406 | max_i = j; 407 | } 408 | } 409 | 410 | ++totals[max_i]; 411 | for (int j = 0; j < testLbls.current.rows(); ++j) 412 | if (testLbls.current.at(j, 0) == 1 && j == max_i) 413 | ++correct; 414 | 415 | if (i % 500 == 0 && i != 0) 416 | { 417 | normal_line("After " + std::to_string(i) + " tests, " + std::to_string(100.0f * correct / i) + "% were correct"); 418 | std::string out = ""; 419 | for (int j = 0; j < totals.size(); ++j) 420 | out += std::to_string(j) + ": " + std::to_string(totals[j] / (1.0f * i)) + " "; 421 | normal_line("Distribution: " + out); 422 | } 423 | } 424 | 425 | normal_line("Press any key to exit"); 426 | getchar(); 427 | return 0; 428 | } 429 | -------------------------------------------------------------------------------- /MTNN/example/Makefile: -------------------------------------------------------------------------------- 1 | build: main.cpp 2 | g++ -std=gnu++14 -o example.out main.cpp -I../include 3 | -------------------------------------------------------------------------------- /MTNN/example/example.nn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liammcinroy/MetaTemplateNeuralNet/2e6d1a666d47fb4daa511bc4d1b70b686e9ed3bd/MTNN/example/example.nn -------------------------------------------------------------------------------- /MTNN/example/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "imatrix.h" 4 | #include "ilayer.h" 5 | #include "neuralnet.h" 6 | #include "neuralnetanalyzer.h" 7 | 8 | #define TRAINING true 9 | 10 | #define BATCH_SIZE 100 11 | #define BATCH_FUNCTIONS true //use to compare using batch functions vs automatic batch learning 12 | //Note that using the automatic batch learnin does not make sense with batch norm and is ill defined 13 | 14 | #define INPUT_TRANSFORM(x) x //((x - 4.5f) / 2.872f) //test bn 15 | #define OUTPUT_TRANSFORM(x) x 16 | #define OUTPUT_INV_TRANSFORM(x) x 17 | 18 | //setup the structure of the network 19 | typedef NeuralNet< 20 | InputLayer<1, 1, 1, 1>, //the indexes allow the classes to be static 21 | //BatchNormalizationLayer<1, 1, 1, 1, MTNN_FUNC_LINEAR>, //could use to normalize inputs 22 | PerceptronFullConnectivityLayer<2, 1, 1, 1, 1, 1, 1, MTNN_FUNC_LINEAR, false>, //can disable biases 23 | //ConvolutionLayer<3, 1, 1, 1, 1, 1, 2, MTNN_FUNC_LINEAR, true, false>, //can disable padding 24 | //BatchNormalizationLayer<3, 2, 1, 1, MTNN_FUNC_TANH>, //if want to use tanh for conv layer with bn, use linear on conv then logistic for bn 25 | //MaxpoolLayer<4, 2, 1, 1, 1, 1>, 26 | //PerceptronFullConnectivityLayer<5, 2, 1, 1, 1, 1, 1, MTNN_FUNC_RELU, true>, 27 | //PerceptronFullConnectivityLayer<6, 1, 1, 1, 1, 1, 1, MTNN_FUNC_LINEAR, true>, //Because of different indexes, then this and layer 1 won't share data 28 | OutputLayer<7, 1, 1, 1>> Net; 29 | 30 | 31 | template<> FeatureMap<1, 1, 1> PerceptronFullConnectivityLayer<2, 1, 1, 1, 1, 1, 1, MTNN_FUNC_LINEAR, false>::weights_global = { .1f };//custom weight initialization 32 | 33 | int main(int argc, char** argv) 34 | { 35 | //Have to define input/output filename before template because templates don't take string literals and needs linking 36 | auto path = CSTRING("example.cnn"); 37 | 38 | Net::loss_function = MTNN_LOSS_L2; 39 | Net::optimization_method = MTNN_OPT_BACKPROP; 40 | Net::learning_rate = .001f; 41 | Net::use_batch_learning = true; 42 | Net::weight_decay_factor = .0001f; 43 | Net::use_l2_weight_decay = false; 44 | Net::include_bias_decay = false; 45 | 46 | Net n2{};//example of creating parallel net 47 | 48 | //Choose sample size to estimate error 49 | if (BATCH_FUNCTIONS) 50 | NeuralNetAnalyzer::sample_size = 1; 51 | else 52 | NeuralNetAnalyzer::sample_size = BATCH_SIZE; 53 | 54 | //basic input/output 55 | auto inputs = FeatureMapVector<1, 1, 1>(BATCH_SIZE); 56 | auto labels = FeatureMapVector<1, 1, 1>(BATCH_SIZE); 57 | 58 | //get gradient error, won't mean_global much because online training (not using batch funcs) will kill any BN network (BN won't even pass new info on) 59 | Net::train(); 60 | std::pair errors = NeuralNetAnalyzer::mean_gradient_error(); 61 | std::cout << "Approximate gradient errors: " << errors.first << ',' << errors.second << std::endl; 62 | 63 | //testing parallel nets 64 | n2.discriminate_thread(); 65 | n2.train_thread(); 66 | 67 | if (!TRAINING) 68 | Net::load_data(); 69 | 70 | float error = INFINITY; 71 | for (int batch = 1; error > .01f && TRAINING; ++batch) 72 | { 73 | for (int i = 0; i < BATCH_SIZE; ++i) 74 | { 75 | //since we are using minibatch normalization and NOT keeping a running total of the statistics, 76 | //then we must run each sample from the minibatch through the network to collect the data 77 | inputs[i][0].at(0, 0) = INPUT_TRANSFORM(i % 10); 78 | labels[i][0].at(0, 0) = OUTPUT_TRANSFORM(i % 10); 79 | 80 | 81 | if (!BATCH_FUNCTIONS) //if using automated, just train on every input 82 | { 83 | Net::set_input(inputs[i]); 84 | Net::set_labels(labels[i]); 85 | NeuralNetAnalyzer::add_point(Net::train()); 86 | } 87 | } 88 | 89 | if (BATCH_FUNCTIONS) 90 | NeuralNetAnalyzer::add_point(Net::train_batch(inputs, labels)); //if using bn/batch functions, pass batch inputs 91 | 92 | else 93 | Net::apply_gradient(); //apply gradient if using automated 94 | 95 | if (Net::learning_rate > .00001f) 96 | Net::learning_rate *= .9; 97 | error = NeuralNetAnalyzer::mean_error(); 98 | std::cout << "After " << batch << " batches, network has expected error of " << error << std::endl; 99 | 100 | std::cout << "Net value with input (minibatch statistics) of 1: " << OUTPUT_INV_TRANSFORM(Net::template get_batch_activations()[1][0].at(0, 0)) << std::endl; 101 | 102 | //test actual network (difference in values is due to changed weights_global, etc.) 103 | Net::set_input(FeatureMap<1, 1, 1>{ INPUT_TRANSFORM(1.0f) }); 104 | 105 | Net::discriminate(); 106 | std::cout << "Net value with input (using population averages) of 1: " << OUTPUT_INV_TRANSFORM(Net::template get_batch_activations()[0][0].at(0, 0)) << std::endl; 107 | } 108 | Net::save_data(); //save to path 109 | 110 | //test actual network 111 | Net::set_input(FeatureMap<1, 1, 1>{ INPUT_TRANSFORM(1.0f) }); 112 | Net::discriminate(); 113 | std::cout << "Net value with input of 1 (after training set statistics): " << OUTPUT_INV_TRANSFORM(Net::template get_batch_activations()[0][0].at(0, 0)) << std::endl; 114 | 115 | //test loading data 116 | Net::load_data(); 117 | 118 | Net::discriminate(inputs); 119 | std::cout << "Net value with input of 1 (after load): " << OUTPUT_INV_TRANSFORM(Net::template get_batch_activations()[0][0].at(0, 0)) << std::endl; 120 | 121 | std::cout << "\n\nPress any key to exit" << std::endl; 122 | getchar(); 123 | return 0; 124 | } 125 | -------------------------------------------------------------------------------- /MTNN/include/imatrix.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | //basic abstract class - use for pointers to unknown sizes of Matrix2D (ie IMatrix* m; ... m->at(i, j) ... ) 11 | template class IMatrix 12 | { 13 | public: 14 | IMatrix() = default; 15 | 16 | ~IMatrix() = default; 17 | 18 | virtual T& at(const size_t& i, const size_t& j) = 0; 19 | 20 | virtual const T& at(const size_t& i, const size_t& j) const = 0; 21 | }; 22 | 23 | //template class - used for references and ensures passing correct size matrices before runtime 24 | template class Matrix2D : public IMatrix 25 | { 26 | public: 27 | 28 | //default constructor 29 | Matrix2D() 30 | { 31 | data = std::vector(r * c);; 32 | for (size_t i = 0; i < r * c; ++i) 33 | data[i] = T(); 34 | } 35 | 36 | //construct with all elements equal to same value (usually 0 or 1) 37 | Matrix2D(T val) 38 | { 39 | data = std::vector(r * c);; 40 | for (size_t i = 0; i < r * c; ++i) 41 | data[i] = val; 42 | } 43 | 44 | //construct with all elements drawn randomly from uniform distribution (defined by params) 45 | Matrix2D(const T& min, const T& max) 46 | { 47 | data = std::vector(r * c);; 48 | T diff = max - min; 49 | for (size_t i = 0; i < data.size(); ++i) 50 | data[i] = (diff * rand()) / RAND_MAX + min; 51 | } 52 | 53 | //deep copy 54 | Matrix2D(const Matrix2D& ref) 55 | { 56 | data = std::vector(r * c); 57 | for (size_t i = 0; i < data.size(); ++i) 58 | data[i] = ref.data[i]; 59 | } 60 | 61 | //construct from particular example (doesn't work well with brace-initialization, hence commented out) 62 | /*Matrix2D(std::initializer_list> arr) 63 | { 64 | data = std::vector(r * c);; 65 | typename std::initializer_list>::iterator it = arr.begin(); 66 | for (size_t i = 0; i < r; ++i) 67 | { 68 | typename std::initializer_list::iterator it2 = it->begin(); 69 | for (size_t j = 0; j < c; ++j) 70 | { 71 | data[(c * i) + j] = *it2; 72 | ++it2; 73 | } 74 | ++it; 75 | } 76 | } 77 | 78 | //for vectors 79 | Matrix2D(std::initializer_list arr) 80 | { 81 | data = std::vector(r * c); 82 | typename std::initializer_list::iterator it = arr.begin(); 83 | for (size_t i = 0; i < r; ++i) 84 | { 85 | data[(c * i)] = *it; 86 | ++it; 87 | } 88 | }*/ 89 | 90 | //vector cleans itself up 91 | ~Matrix2D() = default; 92 | 93 | //get element 94 | T& at(const size_t& i, const size_t& j) override 95 | { 96 | return data[(c * i) + j]; 97 | } 98 | 99 | //get element 100 | const T& at(const size_t& i, const size_t& j) const override 101 | { 102 | return data[(c * i) + j]; 103 | } 104 | 105 | //deep copy - depreciated? 106 | Matrix2D clone() 107 | { 108 | Matrix2D out = Matrix2D(); 109 | 110 | for (size_t i = 0; i < r; ++i) 111 | for (size_t j = 0; j < c; ++j) 112 | out.at(i, j) = this->at(i, j); 113 | return out; 114 | } 115 | 116 | //will store result in current instance 117 | void elem_multiply(Matrix2D& other) 118 | { 119 | for (size_t i = 0; i < r; ++i) 120 | for (size_t j = 0; j < c; ++j) 121 | this->at(i, j) *= other.at(i, j); 122 | } 123 | 124 | //will store result in current instance 125 | void elem_divide(Matrix2D& other) 126 | { 127 | for (size_t i = 0; i < r; ++i) 128 | for (size_t j = 0; j < c; ++j) 129 | this->at(i, j) /= other.at(i, j); 130 | } 131 | 132 | //returns current rows (constexpr so no memory access!) 133 | static constexpr size_t rows() 134 | { 135 | return r; 136 | } 137 | 138 | //returns current cols (constexpr so no memory access!) 139 | static constexpr size_t cols() 140 | { 141 | return c; 142 | } 143 | 144 | //data, stored in vector so data is in heap, not stack 145 | std::vector data; 146 | }; 147 | 148 | //Basically just a vector of Matrix2D<>s 149 | template class FeatureMap 150 | { 151 | public: 152 | 153 | //default constructor 154 | FeatureMap() 155 | { 156 | for (size_t k = 0; k < f; ++k) 157 | maps.push_back(Matrix2D()); 158 | } 159 | 160 | //set all to same value 161 | FeatureMap(T val) 162 | { 163 | for (size_t k = 0; k < f; ++k) 164 | maps.push_back(Matrix2D(val)); 165 | } 166 | 167 | //draw from uniform distribution (defined by params) 168 | FeatureMap(T max, T min) 169 | { 170 | for (size_t k = 0; k < f; ++k) 171 | maps.push_back(Matrix2D(max, min)); 172 | } 173 | 174 | //deep copy 175 | FeatureMap(const FeatureMap& ref) 176 | { 177 | for (size_t k = 0; k < f; ++k) 178 | maps.push_back(ref[k]); 179 | } 180 | 181 | /* 182 | //from another, doesn't work well with brace initializers 183 | FeatureMap(std::initializer_list> arr) 184 | { 185 | typename std::initializer_list>::iterator it = arr.begin(); 186 | for (size_t k = 0; k < f && it != arr.end(); ++k) 187 | { 188 | maps.push_back(Matrix2D(*it)); 189 | ++it; 190 | } 191 | }*/ 192 | 193 | //vector takes care of itself 194 | ~FeatureMap() = default; 195 | 196 | //access the data 197 | Matrix2D& operator[](const size_t& feat) 198 | { 199 | return maps[feat]; 200 | } 201 | 202 | //access the data 203 | const Matrix2D& operator[](const size_t& feat) const 204 | { 205 | return maps[feat]; 206 | } 207 | 208 | //access the data 209 | Matrix2D& at(const size_t& feat) 210 | { 211 | return maps[feat]; 212 | } 213 | 214 | //returns current number of maps (constexpr so no memory access!) 215 | static constexpr size_t size() 216 | { 217 | return f; 218 | } 219 | 220 | //returns current rows (constexpr so no memory access!) 221 | static constexpr size_t rows() 222 | { 223 | return r; 224 | } 225 | 226 | //returns current cols (constexpr so no memory access!) 227 | static constexpr size_t cols() 228 | { 229 | return c; 230 | } 231 | 232 | //Can be useful in templates 233 | using type = Matrix2D; 234 | 235 | private: 236 | 237 | //vector so it's on heap 238 | std::vector> maps; 239 | }; 240 | 241 | //basic matrix multiplication 242 | template Matrix2D operator*(const Matrix2D& lhs, const Matrix2D & rhs) 243 | { 244 | Matrix2D result{}; 245 | for (size_t i = 0; i < rows1; ++i) 246 | { 247 | for (size_t j = 0; j < cols2; ++j) 248 | { 249 | T sum{}; 250 | for (size_t i2 = 0; i2 < rows2; ++i2) 251 | sum += lhs.at(i, i2) * rhs.at(i2, j); 252 | result.at(i, j) = sum; 253 | } 254 | } 255 | return result; 256 | } 257 | 258 | //Adds two matricies, stores in the first 259 | template void add(Matrix2D& first, const Matrix2D& second) 260 | { 261 | for (size_t i = 0; i < rows; ++i) 262 | for (size_t j = 0; j < cols; ++j) 263 | first.at(i, j) += second.at(i, j); 264 | } 265 | 266 | //Adds two matricies, but first second is multiplied by mult, stores in the first 267 | template void add(Matrix2D& first, const Matrix2D& second, const T& mult) 268 | { 269 | for (size_t i = 0; i < rows; ++i) 270 | for (size_t j = 0; j < cols; ++j) 271 | first.at(i, j) += second.at(i, j) * mult; 272 | } 273 | -------------------------------------------------------------------------------- /MTNN/include/neuralnet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "imatrix.h" 9 | #include "ilayer.h" 10 | 11 | //default, MSE 12 | #define MTNN_LOSS_L2 0 13 | //assumes prior layer is softmax 14 | #define MTNN_LOSS_LOGLIKELIHOOD 1 15 | //undefined for error, instead sets output to labels during training 16 | #define MTNN_LOSS_CUSTOMTARGETS 2 17 | 18 | //vanilla, add in momentum or hessian if desired 19 | #define MTNN_OPT_BACKPROP 0 20 | //can't use with momentum or hessian 21 | #define MTNN_OPT_ADAM 1 22 | //can't use with momentum or hessian 23 | #define MTNN_OPT_ADAGRAD 2 24 | 25 | ////HELPER FUNCTIONS 26 | ////Network class definitions begin at line 171 27 | 28 | //for C++11 29 | template using enable_if_t = typename std::enable_if::type; 30 | template using remove_reference_t = typename std::remove_reference::type; 31 | 32 | //TEMPLATE FOR LOOP, if using for<...> then have to add a 0 if using MSVC. Sorry 33 | 34 | //incremental for loop, pass type func with initializer taking args... 35 | template class func, typename... Args> struct for_loop_inc_impl 36 | { 37 | template 38 | for_loop_inc_impl(Args... args, enable_if_t<(i2 < UPPER), for_loop_inc_impl>* = 0) 39 | { 40 | func{args...}; 41 | #if !defined(_MSC_VER) && !defined(clang) 42 | auto next = for_loop_inc_impl{args...}; 43 | #else 44 | auto next = for_loop_inc_impl(args..., 0); 45 | #endif 46 | } 47 | 48 | template 49 | for_loop_inc_impl(Args... args, enable_if_t>* = 0) 50 | { 51 | func{args...}; 52 | } 53 | }; 54 | 55 | //decremental for loop, pass type func with initializer taking args... 56 | template class func, typename... Args> struct for_loop_dec_impl 57 | { 58 | template 59 | for_loop_dec_impl(Args... args, enable_if_t<(i2 > LOWER), for_loop_dec_impl>* = 0) 60 | { 61 | func{args...}; 62 | #if !defined(_MSC_VER) 63 | auto next = for_loop_dec_impl{args...}; 64 | #else 65 | auto next = for_loop_dec_impl(args..., 0); 66 | #endif 67 | } 68 | 69 | template 70 | for_loop_dec_impl(Args... args, enable_if_t>* = 0) 71 | { 72 | func{args...}; 73 | } 74 | }; 75 | 76 | //for loop, pass type func with initializer taking args... 77 | template class func, typename... Args> struct for_loop 78 | { 79 | template 80 | for_loop(Args... args, enable_if_t<(START2 < FINISH), for_loop>* = 0) 81 | { 82 | #if !defined(_MSC_VER) 83 | for_loop_inc_impl{args...}; 84 | #else 85 | for_loop_inc_impl(args..., 0); 86 | #endif 87 | } 88 | 89 | template 90 | for_loop(Args... args, enable_if_t<(START2 > FINISH), for_loop>* = 0) 91 | { 92 | #if !defined(_MSC_VER) 93 | for_loop_dec_impl{args...}; 94 | #else 95 | for_loop_dec_impl(args..., 0); 96 | #endif 97 | } 98 | 99 | template 100 | for_loop(Args... args, enable_if_t<(START2 == FINISH), for_loop>* = 0) 101 | { 102 | func{args...}; 103 | } 104 | }; 105 | 106 | //RECURSIVE PACK GET 107 | 108 | template struct get_type_impl 109 | { 110 | using type = typename get_type_impl::type; 111 | }; 112 | 113 | template 114 | struct get_type_impl<0, T0, Ts...> 115 | { 116 | using type = T0; 117 | }; 118 | 119 | template using get_type = typename get_type_impl::type; 120 | 121 | //RECURSIVE RBM INDEX GET 122 | 123 | template struct get_rbm_idx_impl 124 | { 125 | static constexpr size_t idx = (get_type::activation == MTNN_FUNC_RBM) ? n : get_rbm_idx_impl::idx; 126 | }; 127 | 128 | template struct get_rbm_idx_impl<0, Ts...> 129 | { 130 | static constexpr size_t idx = 0; 131 | }; 132 | 133 | template using get_rbm_idx = get_rbm_idx_impl; 134 | 135 | //CONSTEXPR STRING 136 | 137 | template class func, size_t... indices> struct do_foreach_range 138 | { 139 | using type = typename do_foreach_range::type; 140 | }; 141 | 142 | template class func, size_t... indices> struct do_foreach_range<0, func, indices...> 143 | { 144 | using type = typename func::type; 145 | }; 146 | 147 | template struct str 148 | { 149 | static constexpr const char string[sizeof...(cs)+1] = { cs..., '\0' }; 150 | }; 151 | 152 | template constexpr const char str::string[]; 153 | 154 | template struct builder//str_type is static class with string literal 155 | { 156 | template struct do_foreach//will be func 157 | { 158 | //want to fetch the char of each index 159 | using type = str; 160 | }; 161 | }; 162 | 163 | #define CSTRING(string_literal) []{ \ 164 | struct const_str { const char* chars = string_literal; }; \ 165 | return do_foreach_range::do_foreach>::type{}; }() 166 | 167 | //The class for a neural network. Put in types of *Layer as layers... 168 | //The static class is considered the "global" network. Creating an instance of this class creates a thread net (with separate weights_global & gradient_globals_global) 169 | template 170 | class NeuralNet 171 | { 172 | private: 173 | 174 | ////LAYER LOOP BODIES 175 | 176 | //save a weight file 177 | template struct save_data_t 178 | { 179 | save_data_t() 180 | { 181 | #if defined(_MSC_VER) 182 | fopen_s(&fp, file_name::string, "w+b"); 183 | loop_up_layers(0); 184 | #else 185 | fp = fopen(file_name::string, "w+b"); 186 | loop_up_layers(); 187 | #endif 188 | fclose(fp); 189 | } 190 | private: 191 | static FILE* fp; 192 | 193 | //save a layer 194 | template struct save_data_impl 195 | { 196 | public: 197 | void write_float(const float& f, FILE* file) 198 | { 199 | fwrite(&f, sizeof(float), 1, file); 200 | } 201 | save_data_impl() 202 | { 203 | using layer = get_layer; 204 | 205 | if (layer::type == MTNN_LAYER_BATCHNORMALIZATION) 206 | { 207 | using t = decltype(layer::activations_population_mean_global); 208 | for (size_t d = 0; d < t::size(); ++d) 209 | { 210 | for (size_t i = 0; i < t::rows(); ++i) 211 | { 212 | for (size_t j = 0; j < t::cols(); ++j) 213 | { 214 | write_float(layer::activations_population_mean_global[d].at(i, j), fp); 215 | write_float(layer::activations_population_variance_global[d].at(i, j), fp); 216 | } 217 | } 218 | } 219 | } 220 | 221 | //begin weights_global values 222 | { 223 | using t = decltype(layer::weights_global); 224 | for (size_t d = 0; d < t::size(); ++d) 225 | for (size_t i = 0; i < t::rows(); ++i) 226 | for (size_t j = 0; j < t::cols(); ++j) 227 | write_float(layer::weights_global[d].at(i, j), fp); 228 | } 229 | 230 | //begin biases_global values 231 | { 232 | using t = decltype(layer::biases_global); 233 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 234 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 235 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 236 | write_float(layer::biases_global[f_0].at(i_0, j_0), fp);//bias values 237 | } 238 | 239 | //begin gen biases_global values 240 | { 241 | using t = decltype(layer::generative_biases_global); 242 | for (size_t f = 0; f < t::size(); ++f) 243 | for (size_t i = 0; i < t::rows(); ++i) 244 | for (size_t j = 0; j < t::cols(); ++j) 245 | write_float(layer::generative_biases_global[f].at(i, j), fp);//gen bias values 246 | } 247 | } 248 | }; 249 | }; 250 | 251 | //load a weight file 252 | template struct load_data_t 253 | { 254 | load_data_t() 255 | { 256 | #if defined(_MSC_VER) 257 | fopen_s(&fp, file_name::string, "r+b"); 258 | loop_up_layers(0); 259 | #else 260 | fp = fopen(file_name::string, "r+b"); 261 | loop_up_layers(); 262 | #endif 263 | fclose(fp); 264 | } 265 | private: 266 | static FILE* fp; 267 | 268 | //load a layer 269 | template struct load_data_impl 270 | { 271 | public: 272 | void read_float(float& out_float, FILE* file) 273 | { 274 | fread(&out_float, sizeof(float), 1, file); 275 | } 276 | load_data_impl() 277 | { 278 | using layer = get_layer; 279 | 280 | if (layer::type == MTNN_LAYER_BATCHNORMALIZATION) 281 | { 282 | using t = decltype(layer::activations_population_mean_global); 283 | for (size_t d = 0; d < t::size(); ++d) 284 | { 285 | for (size_t i = 0; i < t::rows(); ++i) 286 | { 287 | for (size_t j = 0; j < t::cols(); ++j) 288 | { 289 | read_float(layer::activations_population_mean_global[d].at(i, j), fp); 290 | read_float(layer::activations_population_variance_global[d].at(i, j), fp); 291 | } 292 | } 293 | } 294 | } 295 | 296 | //begin weights_global values 297 | { 298 | using t = decltype(layer::weights_global); 299 | for (size_t d = 0; d < t::size(); ++d) 300 | for (size_t i = 0; i < t::rows(); ++i) 301 | for (size_t j = 0; j < t::cols(); ++j) 302 | read_float(layer::weights_global[d].at(i, j), fp); 303 | } 304 | 305 | //begin biases_global values 306 | { 307 | using t = decltype(layer::biases_global); 308 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 309 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 310 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 311 | read_float(layer::biases_global[f_0].at(i_0, j_0), fp); 312 | } 313 | 314 | //begin gen biases_global values 315 | { 316 | using t = decltype(layer::generative_biases_global); 317 | for (size_t f = 0; f < t::size(); ++f) 318 | for (size_t i = 0; i < t::rows(); ++i) 319 | for (size_t j = 0; j < t::cols(); ++j) 320 | read_float(layer::generative_biases_global[f].at(i, j), fp); 321 | } 322 | } 323 | }; 324 | }; 325 | 326 | //reset a particular data_global type (usually only gradient_globals_global) 327 | template struct reset_impl 328 | { 329 | reset_impl() 330 | { 331 | using layer = get_layer; 332 | if (target == MTNN_DATA_FEATURE_MAP) 333 | { 334 | using t = decltype(layer::feature_maps_global); 335 | for (size_t f = 0; f < t::size(); ++f) 336 | for (size_t i = 0; i < t::rows(); ++i) 337 | for (size_t j = 0; j < t::cols(); ++j) 338 | layer::feature_maps_global[f].at(i, j) = 0.0f; 339 | //reset batch data_global 340 | for (size_t in = 0; in < get_batch_activations().size(); ++in) 341 | for (size_t f = 0; f < t::size(); ++f) 342 | for (size_t i = 0; i < t::rows(); ++i) 343 | for (size_t j = 0; j < t::cols(); ++j) 344 | get_batch_activations()[in][f].at(i, j) = 0; 345 | for (size_t in = 0; in < get_batch_out_derivs().size(); ++in) 346 | for (size_t f = 0; f < t::size(); ++f) 347 | for (size_t i = 0; i < t::rows(); ++i) 348 | for (size_t j = 0; j < t::cols(); ++j) 349 | get_batch_out_derivs()[in][f].at(i, j) = 0; 350 | } 351 | if (target == MTNN_DATA_WEIGHT_GRAD) 352 | { 353 | using t = decltype(layer::weights_gradient_global); 354 | for (size_t d = 0; d < t::size(); ++d) 355 | for (size_t i = 0; i < t::rows(); ++i) 356 | for (size_t j = 0; j < t::cols(); ++j) 357 | layer::weights_gradient_global[d].at(i, j) = 0.0f; 358 | } 359 | if (target == MTNN_DATA_BIAS_GRAD) 360 | { 361 | using t = decltype(layer::biases_gradient_global); 362 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 363 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 364 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 365 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0.0f; 366 | } 367 | if (target == MTNN_DATA_WEIGHT_MOMENT) 368 | { 369 | using t = decltype(layer::weights_momentum); 370 | for (size_t d = 0; d < t::size(); ++d) 371 | for (size_t i = 0; i < t::rows(); ++i) 372 | for (size_t j = 0; j < t::cols(); ++j) 373 | layer::weights_momentum[d].at(i, j) = 0.0f; 374 | } 375 | if (target == MTNN_DATA_BIAS_MOMENT) 376 | { 377 | using t = decltype(layer::biases_gradient_global); 378 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 379 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 380 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 381 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0.0f; 382 | } 383 | if (target == MTNN_DATA_WEIGHT_AUXDATA) 384 | { 385 | using t = decltype(layer::weights_aux_data_global); 386 | for (size_t d = 0; d < t::size(); ++d) 387 | for (size_t i = 0; i < t::rows(); ++i) 388 | for (size_t j = 0; j < t::cols(); ++j) 389 | layer::weights_aux_data_global[d].at(i, j) = 0.0f; 390 | } 391 | if (target == MTNN_DATA_BIAS_AUXDATA) 392 | { 393 | using t = decltype(layer::biases_aux_data_global); 394 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 395 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 396 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 397 | layer::biases_aux_data_global[f_0].at(i_0, j_0) = 0.0f; 398 | } 399 | } 400 | }; 401 | 402 | //deallocate a data_global type (not really necessary...) 403 | template struct delete_impl 404 | { 405 | delete_impl() 406 | { 407 | using layer = get_layer; 408 | if (target == MTNN_DATA_FEATURE_MAP) 409 | { 410 | using t = decltype(layer::feature_maps_global); 411 | layer::feature_maps_global.~FeatureMap(); 412 | } 413 | if (target == MTNN_DATA_WEIGHT_MOMENT) 414 | { 415 | using t = decltype(layer::weights_momentum); 416 | layer::weights_momentum.~FeatureMap(); 417 | } 418 | if (target == MTNN_DATA_BIAS_MOMENT) 419 | { 420 | using t = decltype(layer::biases_momentum); 421 | layer::biases_momentum.~FeatureMap(); 422 | } 423 | if (target == MTNN_DATA_WEIGHT_AUXDATA) 424 | { 425 | using t = decltype(layer::weights_aux_data_global); 426 | layer::weights_aux_data_global.~FeatureMap(); 427 | } 428 | if (target == MTNN_DATA_BIAS_AUXDATA) 429 | { 430 | using t = decltype(layer::biases_aux_data_global); 431 | layer::biases_aux_data_global.~FeatureMap(); 432 | } 433 | } 434 | }; 435 | 436 | //feed forwards a layer (training or not) NOT BATCH 437 | template struct feed_forwards_impl 438 | { 439 | feed_forwards_impl() 440 | { 441 | using layer = get_layer; 442 | 443 | if (use_dropout && l != 0 && layer::type != MTNN_LAYER_SOFTMAX) 444 | dropout(); 445 | layer::feed_forwards(get_batch_activations()[0], get_batch_activations()[0]); 446 | } 447 | }; 448 | 449 | //feed forwards a batch layer 450 | template struct feed_forwards_batch_impl 451 | { 452 | feed_forwards_batch_impl() 453 | { 454 | if (use_dropout && training && l != 0 && get_layer::type != MTNN_LAYER_SOFTMAX) 455 | dropout();//todo vec also training bool 456 | get_layer::feed_forwards(get_batch_activations(), get_batch_activations()); 457 | } 458 | }; 459 | 460 | //feed backwards a layer NOT BATCH 461 | template struct feed_backwards_impl 462 | { 463 | feed_backwards_impl() 464 | { 465 | using layer = get_layer; 466 | layer::feed_backwards(get_batch_activations()[0], get_batch_activations()[0]); 467 | if (sample) 468 | layer::stochastic_sample(layer::feature_maps_global); 469 | } 470 | }; 471 | 472 | //feed backwards a batch of layers 473 | template struct feed_backwards_batch_impl 474 | { 475 | feed_backwards_batch_impl() 476 | { 477 | using layer = get_layer; 478 | layer::feed_backwards(get_batch_activations(), get_batch_activations()); 479 | if (sample) 480 | layer::stochastic_sample(layer::feature_maps_global);//todo vec 481 | } 482 | }; 483 | 484 | //backprop a layer NOT BATCH 485 | template struct back_prop_impl 486 | { 487 | back_prop_impl() 488 | { 489 | get_layer::back_prop(get_layer::activation, get_layer::feature_maps_global, get_batch_activations()[0], get_layer::feature_maps_global, !use_batch_learning && optimization_method == MTNN_OPT_BACKPROP, learning_rate, use_momentum && !use_batch_learning, momentum_term, use_l2_weight_decay, include_bias_decay, weight_decay_factor); 490 | } 491 | }; 492 | 493 | //backprop a batch of layers 494 | template struct back_prop_batch_impl 495 | { 496 | back_prop_batch_impl() 497 | { 498 | get_layer::back_prop(get_layer::activation, get_batch_out_derivs(), get_batch_activations(), get_batch_out_derivs(), !use_batch_learning && optimization_method == MTNN_OPT_BACKPROP, learning_rate, use_momentum && !use_batch_learning, momentum_term, use_l2_weight_decay, include_bias_decay, weight_decay_factor); 499 | } 500 | }; 501 | 502 | //get population statistics for an entire training batch (post training) 503 | template struct feed_forwards_pop_stats_impl 504 | { 505 | feed_forwards_pop_stats_impl() 506 | { 507 | using layer = get_layer; 508 | 509 | //calculate statistics for batch normalization layer 510 | auto& inputs = get_batch_activations(); 511 | auto& outputs = get_batch_activations(); 512 | if (layer::type == MTNN_LAYER_BATCHNORMALIZATION) 513 | { 514 | using t = decltype(layer::feature_maps_global); 515 | for (size_t f = 0; f < t::size(); ++f) 516 | { 517 | for (size_t i = 0; i < t::rows(); ++i) 518 | { 519 | for (size_t j = 0; j < t::cols(); ++j) 520 | { 521 | float sumx = 0.0f; 522 | float sumxsqr = 0.0f; 523 | size_t n_in = outputs.size(); 524 | //compute statistics 525 | for (size_t in = 0; in < n_in; ++in) 526 | { 527 | float x = inputs[in][f].at(i, j); 528 | sumx += x; 529 | sumxsqr += x * x; 530 | } 531 | 532 | //store stats 533 | float mean_global = sumx / n_in; 534 | layer::activations_population_mean_global[f].at(i, j) = mean_global; 535 | layer::activations_population_variance_global[f].at(i, j) = sumxsqr / n_in - mean_global * mean_global; 536 | } 537 | } 538 | } 539 | } 540 | 541 | //can't feed forward batch because batch norm will use sample statistics 542 | for (size_t in = 0; in < inputs.size(); ++in) 543 | layer::feed_forwards(inputs[in], outputs[in]); 544 | } 545 | }; 546 | 547 | //add L2 weight decay to gradient_global 548 | template struct add_weight_decay_impl 549 | { 550 | add_weight_decay_impl() 551 | { 552 | if (!include_bias_decay) 553 | { 554 | using t = decltype(get_layer::weights_gradient_global); 555 | for (size_t d = 0; d < t::size(); ++d) 556 | for (size_t i = 0; i < t::rows(); ++i) 557 | for (size_t j = 0; j < t::cols(); ++j) 558 | get_layer::weights_gradient_global[d].at(i, j) += 2 * weight_decay_factor * get_layer::weights_global[d].at(i, j); 559 | } 560 | 561 | else 562 | { 563 | using t = decltype(get_layer::biases_gradient_global); 564 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 565 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 566 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 567 | get_layer::biases_gradient_global[f_0].at(i_0, j_0) += 2 * weight_decay_factor * get_layer::biases_global[f_0].at(i_0, j_0); 568 | } 569 | } 570 | }; 571 | 572 | //apply the gradient_global, and reset the gradient_global if specified 573 | template struct apply_grad_impl 574 | { 575 | apply_grad_impl() 576 | { 577 | using layer = get_layer; 578 | using weights_t = decltype(layer::weights_global); 579 | using biases_t = decltype(layer::biases_global); 580 | 581 | if (optimization_method == MTNN_OPT_ADAM && layer::type != MTNN_LAYER_BATCHNORMALIZATION) 582 | { 583 | //update weights_global 584 | for (size_t d = 0; d < weights_t::size(); ++d) 585 | { 586 | for (size_t i = 0; i < weights_t::rows(); ++i) 587 | { 588 | for (size_t j = 0; j < weights_t::cols(); ++j) 589 | { 590 | float g = layer::weights_gradient_global[d].at(i, j); 591 | layer::weights_momentum[d].at(i, j) = beta1 * layer::weights_momentum[d].at(i, j) + (1.0f - beta1) * g; 592 | layer::weights_aux_data_global[d].at(i, j) = beta2 * layer::weights_aux_data_global[d].at(i, j) + (1.0f - beta2) * g * g; 593 | layer::weights_global[d].at(i, j) += -learning_rate * (float)sqrt(1.0f - pow(beta2, t_adam)) / (1.0f - (float)pow(beta1, t_adam)) * layer::weights_momentum[d].at(i, j) / ((float)sqrt(layer::weights_aux_data_global[d].at(i, j)) + 1e-7f); 594 | if (erase) 595 | layer::weights_gradient_global[d].at(i, j) = 0; 596 | } 597 | } 598 | } 599 | 600 | //update biases_global 601 | for (size_t f_0 = 0; f_0 < biases_t::size(); ++f_0) 602 | { 603 | for (size_t i_0 = 0; i_0 < biases_t::rows(); ++i_0) 604 | { 605 | for (size_t j_0 = 0; j_0 < biases_t::cols(); ++j_0) 606 | { 607 | float g = layer::biases_gradient_global[f_0].at(i_0, j_0); 608 | layer::biases_momentum[f_0].at(i_0, j_0) = beta1 * layer::biases_momentum[f_0].at(i_0, j_0) + (1 - beta1) * g; 609 | layer::biases_aux_data_global[f_0].at(i_0, j_0) = beta2 * layer::biases_aux_data_global[f_0].at(i_0, j_0) + (1 - beta2) * g * g; 610 | layer::biases_global[f_0].at(i_0, j_0) += -learning_rate * (float)sqrt(1 - pow(beta2, t_adam)) / (float)(1 - pow(beta1, t_adam)) * layer::biases_momentum[f_0].at(i_0, j_0) / (float)(sqrt(layer::biases_aux_data_global[f_0].at(i_0, j_0)) + 1e-7f); 611 | if (erase) 612 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0; 613 | } 614 | } 615 | } 616 | } 617 | 618 | else if (optimization_method == MTNN_OPT_ADAGRAD && layer::type != MTNN_LAYER_BATCHNORMALIZATION) 619 | { 620 | //update weights_global 621 | for (size_t d = 0; d < weights_t::size(); ++d) 622 | { 623 | for (size_t i = 0; i < weights_t::rows(); ++i) 624 | { 625 | for (size_t j = 0; j < weights_t::cols(); ++j) 626 | { 627 | float g = layer::weights_gradient_global[d].at(i, j); 628 | layer::weights_global[d].at(i, j) += -learning_rate / sqrt(layer::weights_aux_data_global[d].at(i, j) + minimum_divisor) * g; 629 | layer::weights_aux_data_global[d].at(i, j) += g * g; 630 | if (erase) 631 | layer::weights_gradient_global[d].at(i, j) = 0; 632 | } 633 | } 634 | } 635 | 636 | //update biases_global 637 | for (size_t f_0 = 0; f_0 < biases_t::size(); ++f_0) 638 | { 639 | for (size_t i_0 = 0; i_0 < biases_t::rows(); ++i_0) 640 | { 641 | for (size_t j_0 = 0; j_0 < biases_t::cols(); ++j_0) 642 | { 643 | float g = layer::biases_gradient_global[f_0].at(i_0, j_0); 644 | layer::biases_global[f_0].at(i_0, j_0) += -learning_rate / sqrt(layer::biases_aux_data_global[f_0].at(i_0, j_0) + minimum_divisor) * g; 645 | layer::biases_aux_data_global[f_0].at(i_0, j_0) += g * g; 646 | if (erase) 647 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0; 648 | } 649 | } 650 | } 651 | } 652 | 653 | else if (use_momentum) 654 | { 655 | //update weights_global 656 | for (size_t d = 0; d < weights_t::size(); ++d) 657 | { 658 | for (size_t i = 0; i < weights_t::rows(); ++i) 659 | { 660 | for (size_t j = 0; j < weights_t::cols(); ++j) 661 | { 662 | layer::weights_global[d].at(i, j) += -learning_rate * layer::weights_gradient_global[d].at(i, j) + momentum_term * layer::weights_momentum[d].at(i, j); 663 | layer::weights_momentum[d].at(i, j) = momentum_term * layer::weights_momentum[d].at(i, j) + -learning_rate * layer::weights_gradient_global[d].at(i, j); 664 | if (erase) 665 | layer::weights_gradient_global[d].at(i, j) = 0; 666 | } 667 | } 668 | } 669 | 670 | //update biases_global 671 | for (size_t f_0 = 0; f_0 < biases_t::size(); ++f_0) 672 | { 673 | for (size_t i_0 = 0; i_0 < biases_t::rows(); ++i_0) 674 | { 675 | for (size_t j_0 = 0; j_0 < biases_t::cols(); ++j_0) 676 | { 677 | layer::biases_global[f_0].at(i_0, j_0) += -learning_rate * layer::biases_gradient_global[f_0].at(i_0, j_0) + momentum_term * layer::biases_momentum[f_0].at(i_0, j_0); 678 | layer::biases_momentum[f_0].at(i_0, j_0) = momentum_term * layer::biases_momentum[f_0].at(i_0, j_0) + -learning_rate * layer::biases_gradient_global[f_0].at(i_0, j_0); 679 | if (erase) 680 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0; 681 | } 682 | } 683 | } 684 | } 685 | 686 | else 687 | { 688 | //update weights_global 689 | for (size_t d = 0; d < weights_t::size(); ++d) 690 | { 691 | for (size_t i = 0; i < weights_t::rows(); ++i) 692 | { 693 | for (size_t j = 0; j < weights_t::cols(); ++j) 694 | { 695 | layer::weights_global[d].at(i, j) += -learning_rate * layer::weights_gradient_global[d].at(i, j); 696 | if (erase) 697 | layer::weights_gradient_global[d].at(i, j) = 0; 698 | } 699 | } 700 | } 701 | 702 | //update biases_global 703 | for (size_t f_0 = 0; f_0 < biases_t::size(); ++f_0) 704 | { 705 | for (size_t i_0 = 0; i_0 < biases_t::rows(); ++i_0) 706 | { 707 | for (size_t j_0 = 0; j_0 < biases_t::cols(); ++j_0) 708 | { 709 | layer::biases_global[f_0].at(i_0, j_0) += -learning_rate * layer::biases_gradient_global[f_0].at(i_0, j_0); 710 | if (erase) 711 | layer::biases_gradient_global[f_0].at(i_0, j_0) = 0; 712 | } 713 | } 714 | } 715 | } 716 | } 717 | }; 718 | 719 | //change size of batch_activations vector 720 | template struct modify_batch_activations_vector_impl 721 | { 722 | modify_batch_activations_vector_impl() 723 | { 724 | if (add) 725 | get_batch_activations().push_back(typename remove_reference_t())>::value_type{ 0 }); 726 | else 727 | get_batch_activations().pop_back(); 728 | } 729 | }; 730 | 731 | //change size of batch_out_derivs vector 732 | template struct modify_batch_out_derivs_vector_impl 733 | { 734 | modify_batch_out_derivs_vector_impl() 735 | { 736 | if (add) 737 | get_batch_out_derivs().push_back(typename remove_reference_t())>::value_type{ 0 }); 738 | else 739 | get_batch_out_derivs().pop_back(); 740 | } 741 | }; 742 | 743 | ////Nonstatic thread versions 744 | 745 | //reset global gradients to locals TODO hogwild 746 | template struct update_global_params_impl 747 | { 748 | update_global_params_impl(NeuralNet& net) 749 | { 750 | using global_layer = get_layer; 751 | auto& local_layer = std::get(net.thread_layers); 752 | //update locals 753 | { 754 | using t = typename global_layer::weights_type; 755 | for (size_t d = 0; d < t::size(); ++d) 756 | { 757 | for (size_t i = 0; i < t::rows(); ++i) 758 | { 759 | 760 | for (size_t j = 0; j < t::cols(); ++j) 761 | { 762 | global_layer::weights_gradient_global[d].at(i, j) = local_layer.weights_gradient_local[d].at(i, j); 763 | local_layer.weights_gradient_local[d].at(i, j) = 0; 764 | } 765 | } 766 | } 767 | } 768 | { 769 | using t = typename global_layer::biases_type; 770 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 771 | { 772 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 773 | { 774 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 775 | { 776 | global_layer::biases_gradient_global[f_0].at(i_0, j_0) = local_layer.biases_gradient_local[f_0].at(i_0, j_0); 777 | local_layer.biases_gradient_local[f_0].at(i_0, j_0) = 0; 778 | } 779 | } 780 | } 781 | } 782 | } 783 | }; 784 | 785 | //reset thread weights and biases to global values 786 | template struct update_thread_impl 787 | { 788 | update_thread_impl(NeuralNet& net) 789 | { 790 | using global_layer = get_layer; 791 | auto& local_layer = std::get(net.thread_layers); 792 | //update locals 793 | { 794 | using t = typename global_layer::weights_type; 795 | for (size_t d = 0; d < t::size(); ++d) 796 | for (size_t i = 0; i < t::rows(); ++i) 797 | for (size_t j = 0; j < t::cols(); ++j) 798 | local_layer.weights_local[d].at(i, j) = global_layer::weights_global[d].at(i, j); 799 | } 800 | { 801 | using t = typename global_layer::biases_type; 802 | for (size_t f_0 = 0; f_0 < t::size(); ++f_0) 803 | for (size_t i_0 = 0; i_0 < t::rows(); ++i_0) 804 | for (size_t j_0 = 0; j_0 < t::cols(); ++j_0) 805 | local_layer.biases_local[f_0].at(i_0, j_0) = global_layer::biases_global[f_0].at(i_0, j_0); 806 | } 807 | } 808 | }; 809 | 810 | //reset thread activations and derivatives within an instance of a NeuralNet 811 | template struct reset_thread_impl 812 | { 813 | reset_thread_impl(NeuralNet& net) 814 | { 815 | using layer = get_layer; 816 | //reset batch data_global 817 | using t = decltype(layer::feature_maps_global); 818 | for (size_t in = 0; in < net.get_thread_batch_activations().size(); ++in) 819 | { 820 | for (size_t f = 0; f < t::size(); ++f) 821 | { 822 | for (size_t i = 0; i < t::rows(); ++i) 823 | { 824 | for (size_t j = 0; j < t::cols(); ++j) 825 | { 826 | net.get_thread_batch_activations()[in][f].at(i, j) = 0; 827 | net.get_thread_batch_out_derivs()[in][f].at(i, j) = 0; 828 | } 829 | } 830 | } 831 | } 832 | } 833 | }; 834 | 835 | //feed forwards using an instance's parameters/activations NOT BATCH 836 | template struct feed_forwards_thread_impl 837 | { 838 | feed_forwards_thread_impl(NeuralNet& net) 839 | { 840 | using layer = get_layer; 841 | 842 | //if (use_dropout && l != 0 && layer::type != MTNN_LAYER_SOFTMAX) 843 | // dropout(); TODO 844 | 845 | std::get(net.thread_layers).feed_forwards_local(net.get_thread_batch_activations()[0], net.get_thread_batch_activations()[0]); 846 | } 847 | }; 848 | 849 | //feed forwards using an instance's parameters/activations BATCH 850 | template struct feed_forwards_batch_thread_impl 851 | { 852 | feed_forwards_batch_thread_impl(NeuralNet& net) 853 | { 854 | //if (use_dropout && training &&l != 0 && get_layer::type != MTNN_LAYER_SOFTMAX) 855 | // dropout();//todo vec also training bool 856 | std::get(net.thread_layers).feed_forwards_local(net.get_thread_batch_activations(), net.get_thread_batch_activations()); 857 | } 858 | }; 859 | 860 | //feed backwards using an instance's parameters/activations NOT BATCH 861 | template struct feed_backwards_thread_impl 862 | { 863 | feed_backwards_thread_impl(NeuralNet& net) 864 | { 865 | using layer = get_layer; 866 | std::get(net.thread_layers).feed_backwards_local(net.get_thread_batch_activations()[0], net.get_thread_batch_activations()[0]); //TODO: not generative biases_global 867 | if (sample) 868 | layer::stochastic_sample(net.get_thread_batch_activations()[0]); 869 | } 870 | }; 871 | 872 | //feed backwards using an instance's parameters/activations BATCH 873 | template struct feed_backwards_batch_thread_impl 874 | { 875 | feed_backwards_batch_thread_impl(NeuralNet& net) 876 | { 877 | using layer = get_layer; 878 | std::get(net.thread_layers).feed_backwards_local(net.get_thread_batch_activations(), net.get_thread_batch_activations()); //TODO: not generative biases_global 879 | if (sample) 880 | for (size_t i = 0; i < net.get_thread_batch_activations().size(); ++i) 881 | layer::stochastic_sample(net.get_thread_batch_activations()[i]); 882 | } 883 | }; 884 | 885 | //backprop using an instance's parameters/activations NOT BATCH 886 | template struct back_prop_thread_impl 887 | { 888 | back_prop_thread_impl(NeuralNet& net) 889 | { 890 | std::get(net.thread_layers).back_prop_local(get_layer::activation, net.get_thread_batch_out_derivs()[0], net.get_thread_batch_activations()[0], net.get_thread_batch_out_derivs()[0], !use_batch_learning && optimization_method == MTNN_OPT_BACKPROP, learning_rate, use_momentum && !use_batch_learning, momentum_term, use_l2_weight_decay, include_bias_decay, weight_decay_factor); 891 | } 892 | }; 893 | 894 | //backprop using an instance's parameters/activations BATCH 895 | template struct back_prop_batch_thread_impl 896 | { 897 | back_prop_batch_thread_impl(NeuralNet& net) 898 | { 899 | std::get(net.thread_layers).back_prop_local(get_layer::activation, net.get_thread_batch_out_derivs(), net.get_thread_batch_activations(), net.get_thread_batch_out_derivs(), !use_batch_learning && optimization_method == MTNN_OPT_BACKPROP, learning_rate, use_momentum && !use_batch_learning, momentum_term, use_l2_weight_decay, include_bias_decay, weight_decay_factor); 900 | } 901 | }; 902 | 903 | //change size of thread_batch_activations vector 904 | template struct modify_thread_batch_activations_vector_impl 905 | { 906 | modify_thread_batch_activations_vector_impl(NeuralNet& net) 907 | { 908 | if (add) 909 | net.get_thread_batch_activations().push_back(typename remove_reference_t())>::value_type{ 0 }); 910 | else 911 | net.get_thread_batch_activations().pop_back(); 912 | } 913 | }; 914 | 915 | //change size of thread_batch_out_derivs vector 916 | template struct modify_thread_batch_out_derivs_vector_impl 917 | { 918 | modify_thread_batch_out_derivs_vector_impl(NeuralNet& net) 919 | { 920 | if (add) 921 | net.get_thread_batch_out_derivs().push_back(typename remove_reference_t())>::value_type{ 0 }); 922 | else 923 | net.get_thread_batch_out_derivs().pop_back(); 924 | } 925 | }; 926 | 927 | public: 928 | 929 | ////Architecture constexprs 930 | //the total number of layers 931 | static constexpr size_t num_layers = sizeof...(layers); 932 | //usually the index of the output layer 933 | static constexpr size_t last_layer_index = num_layers - 1; 934 | 935 | ////Loop bodies 936 | 937 | template using save_net_data_global = save_data_t; 938 | template using load_net_data_global = load_data_t; 939 | 940 | template using reset_layer_feature_maps_global = reset_impl; 941 | template using reset_layer_weights_gradient_global = reset_impl; 942 | template using reset_layer_biases_gradient_global = reset_impl; 943 | template using reset_layer_weights_momentum = reset_impl; 944 | template using reset_layer_biases_momentum = reset_impl; 945 | template using reset_layer_weights_aux_data_global = reset_impl; 946 | template using reset_layer_biases_aux_data_global = reset_impl; 947 | 948 | template using delete_layer_feature_maps_global = delete_impl; 949 | template using delete_layer_weights_momentum = delete_impl; 950 | template using delete_layer_biases_momentum = delete_impl; 951 | template using delete_layer_weights_aux_data_global = delete_impl; 952 | template using delete_layer_biases_aux_data_global = delete_impl; 953 | 954 | template using feed_forwards_layer = feed_forwards_impl; 955 | template using feed_forwards_training_layer = feed_forwards_impl; 956 | 957 | template using feed_forwards_batch_layer = feed_forwards_batch_impl; 958 | template using feed_forwards_batch_training_layer = feed_forwards_batch_impl; 959 | 960 | template using feed_forwards_population_statistics_layer = feed_forwards_pop_stats_impl; 961 | 962 | template using feed_backwards_layer_nosample = feed_backwards_impl; 963 | template using feed_backwards_layer_sample = feed_backwards_impl; 964 | 965 | template using feed_backwards_batch_layer_nosample = feed_backwards_batch_impl; 966 | template using feed_backwards_batch_layer_sample = feed_backwards_batch_impl; 967 | 968 | template using back_prop_layer = back_prop_impl; 969 | 970 | template using back_prop_batch_layer = back_prop_batch_impl; 971 | 972 | template using add_weight_decay_layer = add_weight_decay_impl; 973 | 974 | template using apply_gradient_layer = apply_grad_impl; 975 | template using apply_gradient_noclear_layer = apply_grad_impl; 976 | 977 | template using add_batch_activations = modify_batch_activations_vector_impl; 978 | template using remove_batch_activations = modify_batch_activations_vector_impl; 979 | 980 | template using add_batch_out_derivs = modify_batch_out_derivs_vector_impl; 981 | template using remove_batch_out_derivs = modify_batch_out_derivs_vector_impl; 982 | 983 | //nonstatic versions 984 | 985 | template using update_global_params = update_global_params_impl; 986 | template using update_thread = update_thread_impl; 987 | 988 | template using reset_thread_feature_maps_global = reset_thread_impl; 989 | 990 | template using feed_forwards_thread = feed_forwards_thread_impl; 991 | template using feed_forwards_training_thread = feed_forwards_thread_impl; 992 | 993 | template using feed_forwards_batch_thread = feed_forwards_batch_thread_impl; 994 | template using feed_forwards_batch_training_thread = feed_forwards_batch_thread_impl; 995 | 996 | template using feed_backwards_thread_nosample = feed_backwards_thread_impl; 997 | template using feed_backwards_thread_sample = feed_backwards_thread_impl; 998 | 999 | template using feed_backwards_batch_thread_nosample = feed_backwards_batch_thread_impl; 1000 | template using feed_backwards_batch_thread_sample = feed_backwards_batch_thread_impl; 1001 | 1002 | template using back_prop_thread = back_prop_thread_impl; 1003 | 1004 | template using back_prop_batch_thread = back_prop_batch_thread_impl; 1005 | 1006 | template using add_thread_batch_activations = modify_thread_batch_activations_vector_impl; 1007 | template using remove_thread_batch_activations = modify_thread_batch_activations_vector_impl; 1008 | 1009 | template using add_thread_batch_out_derivs = modify_thread_batch_out_derivs_vector_impl; 1010 | template using remove_thread_batch_out_derivs = modify_thread_batch_out_derivs_vector_impl; 1011 | 1012 | //incremental loop 1013 | template class loop_body, typename... Args> using loop_up_layers = for_loop<0, last_layer_index - 1, 1, loop_body, Args...>; 1014 | //decremental loop 1015 | template class loop_body, typename... Args> using loop_down_layers = for_loop; 1016 | template class loop_body, typename... Args> using loop_all_layers = for_loop<0, last_layer_index, 1, loop_body, Args...>; 1017 | 1018 | //fetch a layer with a constexpr 1019 | template using get_layer = get_type; 1020 | //fetch a layer activation vector with a constexpr 1021 | template static typename get_layer::feature_maps_vector_type& get_batch_activations() 1022 | { 1023 | return std::get(batch_activations); 1024 | } 1025 | //fetch out derivs vector 1026 | template static typename get_layer::feature_maps_vector_type& get_batch_out_derivs() 1027 | { 1028 | return std::get(batch_out_derivs); 1029 | } 1030 | 1031 | //non static 1032 | //fetch a layer activation vector with a constexpr for a given thread 1033 | template typename get_layer::feature_maps_vector_type& get_thread_batch_activations() 1034 | { 1035 | return std::get(thread_batch_activations); 1036 | } 1037 | //fetch out derivs vector for a given thread 1038 | template typename get_layer::feature_maps_vector_type& get_thread_batch_out_derivs() 1039 | { 1040 | return std::get(thread_batch_out_derivs); 1041 | } 1042 | 1043 | ////Hyperparameters 1044 | 1045 | static size_t loss_function; 1046 | static size_t optimization_method; 1047 | static bool use_dropout; 1048 | static bool use_batch_learning; 1049 | //Cannot be used with Adam 1050 | static bool use_momentum; 1051 | static bool use_l2_weight_decay; 1052 | static bool include_bias_decay; 1053 | 1054 | //learning rate (should be positive) 1055 | static float learning_rate; 1056 | //only set if using adagrad 1057 | static float minimum_divisor; 1058 | //only set if using momentum 1059 | static float momentum_term; 1060 | //only set if using dropout. This proportion of neurons will be "dropped" 1061 | static float dropout_probability; 1062 | //must be set if using Adam 1063 | static float beta1; 1064 | //must be set if using Adam 1065 | static float beta2; 1066 | //must be set if using L2 weight decay 1067 | static float weight_decay_factor; 1068 | 1069 | static typename get_type<0, layers...>::feature_maps_type input; 1070 | static typename get_type::feature_maps_type labels; 1071 | 1072 | //used for adam 1073 | static size_t t_adam; 1074 | 1075 | static constexpr size_t last_rbm_index = get_rbm_idx::idx; 1076 | 1077 | //need 1078 | static std::tuple batch_activations; 1079 | 1080 | //only for batches and batch norm 1081 | static std::tuple batch_out_derivs; 1082 | 1083 | //NONSTATIC MEMBERS: Used for parallel 1084 | 1085 | //need for parallel 1086 | std::tuple thread_layers; 1087 | 1088 | //need for parallel batches, can't use feature maps at all 1089 | std::tuple thread_batch_activations; 1090 | 1091 | //need for parallel batches, can't use feature maps at all 1092 | std::tuple thread_batch_out_derivs; 1093 | 1094 | ////Static Functions: General use and non parallel use 1095 | 1096 | //save learned net 1097 | template static void save_data(); 1098 | 1099 | //load previously learned net 1100 | template static void load_data(); 1101 | 1102 | //set input (for discrimination) 1103 | static void set_input(typename get_type<0, layers...>::feature_maps_type new_input); 1104 | 1105 | //set labels for batch 1106 | static void set_labels(typename get_type::feature_maps_type& new_labels); 1107 | 1108 | //feed forwards 1109 | static typename get_type::feature_maps_type& discriminate(typename get_type<0, layers...>::feature_maps_type& new_input = input); 1110 | 1111 | static typename get_type::feature_maps_vector_type& discriminate(typename get_type<0, layers...>::feature_maps_vector_type& batch_input); 1112 | 1113 | //feed backwards, returns a copy of the first layer (must be deallocated) 1114 | static typename get_type<0, layers...>::feature_maps_type generate(typename get_type::feature_maps_type& input, size_t iterations, bool use_sampling); 1115 | 1116 | //wake-sleep algorithm, only trains target layer with assumption that layers up to it have been trained 1117 | static void pretrain(size_t markov_iterations); 1118 | 1119 | //backpropogate with selected method, returns error by loss function 1120 | static float train(bool already_fed = false, typename get_type<0, layers...>::feature_maps_type& new_input = NeuralNet::input, typename get_type::feature_maps_type& lbl = labels); 1121 | 1122 | //backprop for a batch with selected method, returns mean_global error by loss function 1123 | static float train_batch(typename get_type<0, layers...>::feature_maps_vector_type& batch_input, typename get_type::feature_maps_vector_type& batch_labels, bool already_fed = false, bool apply = true); 1124 | 1125 | //compute the population statistics for BN networks 1126 | static void calculate_population_statistics(typename get_type<0, layers...>::feature_maps_vector_type& batch_input); 1127 | 1128 | //reset and apply gradient_global 1129 | static void apply_gradient(bool clear_gradient_globals_global = true); 1130 | 1131 | //get current error according to loss function 1132 | static float global_error(typename get_type::feature_maps_type& output = get_batch_activations()[0], typename get_type::feature_maps_type& lbls = labels); 1133 | 1134 | //get error for an entire batch according to loss function 1135 | static float global_error(typename get_type::feature_maps_vector_type& batch_outputs, typename get_type::feature_maps_vector_type& batch_labels); 1136 | 1137 | private: 1138 | 1139 | //apply dropout with dropout probability on a layer (done in feed forwards) 1140 | template static void dropout(); 1141 | 1142 | //get the deriv of the loss wrt the output 1143 | static typename get_type::feature_maps_type error_signals(typename get_type::feature_maps_type& output = get_batch_activations()[0], typename get_type::feature_maps_type& lbls = labels); 1144 | 1145 | //get the deriv of the loss wrt the output for a batch 1146 | static typename get_type::feature_maps_vector_type error_signals(typename get_type::feature_maps_vector_type& batch_outputs, typename get_type::feature_maps_vector_type& batch_labels); 1147 | 1148 | public: 1149 | 1150 | //// NON-STATIC PARALLEL FUNCTIONS 1151 | 1152 | //instantiate a subnet 1153 | NeuralNet() 1154 | { 1155 | thread_layers = std::make_tuple(layers{}...); 1156 | thread_batch_activations = std::make_tuple(typename layers::feature_maps_vector_type(1)...); 1157 | thread_batch_out_derivs = std::make_tuple(typename layers::feature_maps_vector_type(1)...); 1158 | } 1159 | 1160 | //deallocates itself 1161 | ~NeuralNet() = default; 1162 | 1163 | //discriminate using an instances params 1164 | typename get_type::feature_maps_type& discriminate_thread(typename get_type<0, layers...>::feature_maps_type& new_input = input); 1165 | 1166 | //discriminate using an instances params batch 1167 | typename get_type::feature_maps_vector_type& discriminate_thread(typename get_type<0, layers...>::feature_maps_vector_type& batch_input); 1168 | 1169 | //feed backwards, returns a copy of the first layer (must be deallocated) 1170 | typename get_type<0, layers...>::feature_maps_type generate_thread(typename get_type::feature_maps_type& input, size_t iterations, bool use_sampling); //todo: add par 1171 | 1172 | //wake-sleep algorithm, only trains target layer with assumption that layers up to it have been trained 1173 | void pretrain_thread(size_t markov_iterations); //todo: add par 1174 | 1175 | //backpropogate with selected method, returns error by loss function 1176 | float train_thread(bool already_fed = false, typename get_type<0, layers...>::feature_maps_type& new_input = NeuralNet::input, typename get_type::feature_maps_type& lbl = labels); 1177 | 1178 | //backprop for a batch with selected method, returns mean_global error by loss function 1179 | float train_batch_thread(typename get_type<0, layers...>::feature_maps_vector_type& batch_input, typename get_type::feature_maps_vector_type& batch_labels, bool already_fed = false); 1180 | 1181 | //update the global gradients TODO use hogwild? separate class 1182 | void update_global_gradients(); 1183 | 1184 | //update local weights from the global values 1185 | void update_thread_weights(); 1186 | }; 1187 | 1188 | //Hyperparameter declarations 1189 | 1190 | template size_t NeuralNet::loss_function = MTNN_LOSS_L2; 1191 | template size_t NeuralNet::optimization_method = MTNN_OPT_BACKPROP; 1192 | template bool NeuralNet::use_dropout = false; 1193 | template bool NeuralNet::use_batch_learning = false; 1194 | template bool NeuralNet::use_momentum = false; 1195 | template bool NeuralNet::use_l2_weight_decay = false; 1196 | template bool NeuralNet::include_bias_decay = false; 1197 | template float NeuralNet::learning_rate = .001f; 1198 | template float NeuralNet::minimum_divisor = .1f; 1199 | template float NeuralNet::momentum_term = .8f; 1200 | template float NeuralNet::dropout_probability = .5f; 1201 | template float NeuralNet::beta1 = .9f; 1202 | template float NeuralNet::beta2 = .99f; 1203 | template float NeuralNet::weight_decay_factor = .001f; 1204 | template size_t NeuralNet::t_adam = 0; 1205 | template template FILE* NeuralNet::save_data_t::fp = {}; 1206 | template template FILE* NeuralNet::load_data_t::fp = {}; 1207 | template typename get_type<0, layers...>::feature_maps_type NeuralNet::input = {}; 1208 | template typename get_type::feature_maps_type NeuralNet::labels = {}; 1209 | template std::tuple NeuralNet::batch_activations = {}; //init with one, will add more if necessary for batch 1210 | template std::tuple NeuralNet::batch_out_derivs = {}; //init with zero, will add more if necessary for batch 1211 | 1212 | ////DEFINITIONS 1213 | 1214 | template 1215 | template 1216 | inline void NeuralNet:: 1217 | save_data() 1218 | { 1219 | save_net_data_global(); 1220 | } 1221 | 1222 | template 1223 | template 1224 | inline void NeuralNet:: 1225 | load_data() 1226 | { 1227 | load_net_data_global(); 1228 | } 1229 | 1230 | template 1231 | inline void NeuralNet:: 1232 | set_input(typename get_type<0, layers...>::feature_maps_type new_input) 1233 | { 1234 | #if !defined(_MSC_VER) 1235 | if (get_batch_activations<0>().size() == 0) 1236 | loop_all_layers(); 1237 | loop_all_layers(); 1238 | #else 1239 | if (get_batch_activations<0>().size() == 0) 1240 | loop_all_layers(0); 1241 | loop_all_layers(0); 1242 | #endif 1243 | 1244 | for (size_t f = 0; f < input.size(); ++f) 1245 | { 1246 | for (size_t i = 0; i < input.rows(); ++i) 1247 | { 1248 | for (size_t j = 0; j < input.cols(); ++j) 1249 | { 1250 | input[f].at(i, j) = new_input[f].at(i, j); 1251 | get_batch_activations<0>()[0][f].at(i, j) = input[f].at(i, j); 1252 | } 1253 | } 1254 | } 1255 | } 1256 | 1257 | template 1258 | inline void NeuralNet:: 1259 | set_labels(typename get_type::feature_maps_type& new_labels) 1260 | { 1261 | for (size_t f = 0; f < labels.size(); ++f) 1262 | for (size_t i = 0; i < labels.rows(); ++i) 1263 | for (size_t j = 0; j < labels.cols(); ++j) 1264 | labels[f].at(i, j) = new_labels[f].at(i, j); 1265 | } 1266 | 1267 | template 1268 | inline typename get_type::feature_maps_type& NeuralNet:: 1269 | discriminate(typename get_type<0, layers...>::feature_maps_type& new_input) 1270 | { 1271 | #if !defined(_MSC_VER) 1272 | if (get_batch_activations<0>().size() == 0) 1273 | loop_all_layers(); 1274 | loop_all_layers(); 1275 | #else 1276 | if (get_batch_activations<0>().size() == 0) 1277 | loop_all_layers(0); 1278 | loop_all_layers(0); 1279 | #endif 1280 | 1281 | for (size_t f = 0; f < get_layer<0>::feature_maps_global.size(); ++f) 1282 | for (size_t i = 0; i < get_layer<0>::feature_maps_global.rows(); ++i) 1283 | for (size_t j = 0; j < get_layer<0>::feature_maps_global.cols(); ++j) 1284 | get_batch_activations<0>()[0][f].at(i, j) = new_input[f].at(i, j); 1285 | #if !defined(_MSC_VER) 1286 | loop_up_layers(); 1287 | #else 1288 | loop_up_layers(0); 1289 | #endif 1290 | return get_batch_activations()[0]; 1291 | } 1292 | 1293 | template 1294 | inline typename get_type::feature_maps_type& NeuralNet:: 1295 | discriminate_thread(typename get_type<0, layers...>::feature_maps_type& new_input) 1296 | { 1297 | #if !defined(_MSC_VER) 1298 | loop_all_layers&>(*this); 1299 | #else 1300 | loop_all_layers&>(*this, 0); 1301 | #endif 1302 | 1303 | //set input 1304 | for (size_t f = 0; f < get_layer<0>::feature_maps_global.size(); ++f) 1305 | for (size_t i = 0; i < get_layer<0>::feature_maps_global.rows(); ++i) 1306 | for (size_t j = 0; j < get_layer<0>::feature_maps_global.cols(); ++j) 1307 | get_thread_batch_activations<0>()[0][f].at(i, j) = new_input[f].at(i, j); 1308 | #if !defined(_MSC_VER) 1309 | loop_up_layers&>(*this); 1310 | #else 1311 | loop_up_layers&>(*this, 0); 1312 | #endif 1313 | 1314 | return get_thread_batch_activations()[0]; 1315 | } 1316 | 1317 | template 1318 | inline typename get_type::feature_maps_vector_type& NeuralNet:: 1319 | discriminate(typename get_type<0, layers...>::feature_maps_vector_type& batch_inputs) 1320 | { 1321 | //adjust batch data_global sizes 1322 | #if !defined(_MSC_VER) 1323 | while (get_batch_activations<0>().size() != batch_inputs.size()) //fix sizes 1324 | { 1325 | if (get_batch_activations<0>().size() > batch_inputs.size()) 1326 | loop_all_layers(); 1327 | else 1328 | loop_all_layers(); 1329 | } 1330 | 1331 | //reset batch activations 1332 | loop_all_layers(); 1333 | 1334 | get_layer<0>::feed_forwards(batch_inputs, get_batch_activations<1>()); 1335 | for_loop<1, last_layer_index - 1, 1, feed_forwards_batch_training_layer>(); 1336 | #else 1337 | while (get_batch_activations<0>().size() != batch_inputs.size()) //fix sizes 1338 | { 1339 | if (get_batch_activations<0>().size() > batch_inputs.size()) 1340 | loop_all_layers(0); 1341 | else 1342 | loop_all_layers(0); 1343 | } 1344 | 1345 | //reset batch activations 1346 | loop_all_layers(0); 1347 | 1348 | get_layer<0>::feed_forwards(batch_inputs, get_batch_activations<1>()); 1349 | for_loop<1, last_layer_index - 1, 1, feed_forwards_batch_training_layer>(0); 1350 | #endif 1351 | 1352 | return get_batch_activations(); 1353 | } 1354 | 1355 | template 1356 | inline typename get_type::feature_maps_vector_type& NeuralNet:: 1357 | discriminate_thread(typename get_type<0, layers...>::feature_maps_vector_type& batch_inputs) 1358 | { 1359 | #if !defined(_MSC_VER) 1360 | //adjust and reset batch activations 1361 | while (get_thread_batch_activations<0>().size() != batch_inputs.size()) //fix sizes 1362 | { 1363 | if (get_thread_batch_activations<0>().size() > batch_inputs.size()) 1364 | loop_all_layers&>(*this); 1365 | else 1366 | loop_all_layers&>(*this); 1367 | } 1368 | loop_all_layers&>(*this); 1369 | 1370 | get_layer<0>::feed_forwards(batch_inputs, get_thread_batch_activations<1>()); 1371 | loop_up_layers&>(*this); 1372 | #else 1373 | //adjust and reset batch activations 1374 | while (get_thread_batch_activations<0>().size() != batch_inputs.size()) //fix sizes 1375 | { 1376 | if (get_thread_batch_activations<0>().size() > batch_inputs.size()) 1377 | loop_all_layers&>(*this, 0); 1378 | else 1379 | loop_all_layers&>(*this, 0); 1380 | } 1381 | loop_all_layers&>(*this, 0); 1382 | 1383 | std::get<0, layers...>(thread_layers).feed_forwards_local(batch_inputs, get_thread_batch_activations<1>()); 1384 | loop_up_layers&>(*this, 0); 1385 | #endif 1386 | 1387 | return get_thread_batch_activations(); 1388 | } 1389 | 1390 | template 1391 | inline typename get_type<0, layers...>::feature_maps_type NeuralNet:: 1392 | generate(typename get_type::feature_maps_type& input, size_t iterations, bool use_sampling) 1393 | { 1394 | #if !defined(_MSC_VER) 1395 | if (get_batch_activations<0>().size() == 0) 1396 | loop_all_layers(); 1397 | loop_all_layers(); 1398 | #else 1399 | if (get_batch_activations<0>().size() == 0) 1400 | loop_all_layers(0); 1401 | loop_all_layers(0); 1402 | #endif 1403 | 1404 | //reset all but output (or inputs?) 1405 | #if !defined(_MSC_VER) 1406 | loop_all_layers(); 1407 | #else 1408 | loop_all_layers(0); 1409 | #endif 1410 | get_layer::feed_backwards(input); 1411 | 1412 | #if !defined(_MSC_VER) 1413 | for_loop(); 1414 | #else 1415 | for_loop(0); 1416 | #endif 1417 | using rbm_layer = get_layer; 1418 | 1419 | //gibbs sample 1420 | rbm_layer::feed_backwards(get_layer::feature_maps_global); 1421 | for (size_t i = 0; i < iterations; ++i) 1422 | { 1423 | if (use_sampling) 1424 | rbm_layer::stochastic_sample(rbm_layer::feature_maps_global); 1425 | rbm_layer::feed_forwards(get_layer::feature_maps_global); 1426 | get_layer::stochastic_sample(get_layer::feature_maps_global); 1427 | rbm_layer::feed_backwards(get_layer::feature_maps_global); 1428 | } 1429 | 1430 | #if !defined(_MSC_VER) 1431 | if (use_sampling) 1432 | for_loop(); 1433 | else 1434 | for_loop(); 1435 | #else 1436 | if (use_sampling) 1437 | for_loop(0); 1438 | else 1439 | for_loop(0); 1440 | #endif 1441 | 1442 | typename get_type<0, layers...>::feature_maps_type output = {}; 1443 | for (size_t f = 0; f < output.size(); ++f) 1444 | output[f] = get_batch_activations<0>()[0][f].clone(); 1445 | return output; 1446 | } 1447 | 1448 | template 1449 | inline void NeuralNet:: 1450 | pretrain(size_t markov_iterations) 1451 | { 1452 | #if !defined(_MSC_VER) 1453 | if (get_batch_activations<0>().size() == 0) 1454 | loop_all_layers(); 1455 | loop_all_layers(); 1456 | #else 1457 | if (get_batch_activations<0>().size() == 0) 1458 | loop_all_layers(0); 1459 | loop_all_layers(0); 1460 | #endif 1461 | 1462 | //reset input 1463 | #if !defined(_MSC_VER) 1464 | loop_all_layers(); 1465 | #else 1466 | loop_all_layers(0); 1467 | #endif 1468 | for (size_t f = 0; f < get_layer<0>::feature_maps_global.size(); ++f) 1469 | for (size_t i = 0; i < get_layer<0>::feature_maps_global.rows(); ++i) 1470 | for (size_t j = 0; j < get_layer<0>::feature_maps_global.cols(); ++j) 1471 | get_batch_activations<0>()[0][f].at(i, j) = input[f].at(i, j); 1472 | 1473 | #if !defined(_MSC_VER) 1474 | loop_up_layers(); 1475 | #else 1476 | loop_up_layers(0); 1477 | #endif 1478 | 1479 | using target_layer = get_layer; //todo add in target layer 1480 | if (target_layer::type == MTNN_LAYER_CONVOLUTION || target_layer::type == MTNN_LAYER_PERCEPTRONFULLCONNECTIVITY) 1481 | target_layer::wake_sleep(learning_rate, use_dropout, markov_iterations); 1482 | } 1483 | 1484 | template 1485 | inline float NeuralNet:: 1486 | train(bool already_fed, typename get_type<0, layers...>::feature_maps_type& new_input, typename get_type::feature_maps_type& lbl) 1487 | { 1488 | #if !defined(_MSC_VER) 1489 | if (get_batch_activations<0>().size() == 0) 1490 | loop_all_layers(); 1491 | #else 1492 | if (get_batch_activations<0>().size() == 0) 1493 | loop_all_layers(0); 1494 | #endif 1495 | 1496 | float error = 0.0f; 1497 | if (!already_fed) 1498 | { 1499 | #if !defined(_MSC_VER) 1500 | loop_up_layers(); //resets batch too 1501 | #else 1502 | loop_up_layers(0); //resets batch too 1503 | #endif 1504 | get_layer<0>::feed_forwards(new_input, get_batch_activations<0>()[0]); 1505 | 1506 | #if !defined(_MSC_VER) 1507 | loop_up_layers(); 1508 | #else 1509 | loop_up_layers(0); 1510 | #endif 1511 | 1512 | error = global_error(get_batch_activations()[0], lbl); 1513 | } 1514 | 1515 | //get error signals for output 1516 | auto errors = error_signals(get_batch_activations()[0], lbl); 1517 | 1518 | //back_prop for each layer (need to get activation derivatives for output first 1519 | get_layer::back_prop(get_layer::activation, errors, 1520 | get_batch_activations()[0], get_layer::feature_maps_global, 1521 | !use_batch_learning && optimization_method == MTNN_OPT_BACKPROP, learning_rate, 1522 | use_momentum && !use_batch_learning, momentum_term, 1523 | use_l2_weight_decay, include_bias_decay, weight_decay_factor); 1524 | #if !defined(_MSC_VER) 1525 | for_loop(); 1526 | #else 1527 | for_loop(0); 1528 | #endif 1529 | 1530 | if (!use_batch_learning && optimization_method != MTNN_OPT_BACKPROP) //online is applied directly in backprop otherwise 1531 | apply_gradient(); 1532 | 1533 | return error; 1534 | } 1535 | 1536 | template 1537 | inline float NeuralNet:: 1538 | train_thread(bool already_fed, typename get_type<0, layers...>::feature_maps_type& new_input, typename get_type::feature_maps_type& lbl) 1539 | { 1540 | float error = 0.0f; 1541 | 1542 | if (!already_fed) 1543 | { 1544 | #if !defined(_MSC_VER) 1545 | loop_all_layers&>(*this); 1546 | #else 1547 | loop_all_layers&>(*this, 0); 1548 | #endif 1549 | //set input 1550 | get_layer<0>::feed_forwards(new_input, get_thread_batch_activations<0>()[0]); 1551 | 1552 | #if !defined(_MSC_VER) 1553 | loop_up_layers&>(*this); 1554 | #else 1555 | loop_up_layers&>(*this, 0); 1556 | #endif 1557 | } 1558 | error = global_error(get_thread_batch_activations()[0], lbl); 1559 | 1560 | //get error signals for output 1561 | auto errors = error_signals(get_thread_batch_activations()[0], lbl); 1562 | 1563 | //back_prop for each layer (need to get activation derivatives for output first 1564 | std::get(thread_layers).back_prop_local(get_layer::activation, errors, 1565 | get_thread_batch_activations()[0], get_thread_batch_out_derivs()[0], 1566 | false, learning_rate, false, momentum_term, false, false, false); 1567 | #if !defined(_MSC_VER) 1568 | for_loop&>(*this); 1569 | #else 1570 | for_loop&>(*this, 0); 1571 | #endif 1572 | 1573 | //if (!use_batch_learning && optimization_method != MTNN_OPT_BACKPROP) 1574 | // apply_gradient(); parallel so don't? 1575 | 1576 | return error; 1577 | } 1578 | 1579 | template 1580 | inline float NeuralNet:: 1581 | train_batch(typename get_type<0, layers...>::feature_maps_vector_type& batch_inputs, typename get_type::feature_maps_vector_type& batch_labels, bool already_fed, bool apply) 1582 | { 1583 | bool temp_batch = use_batch_learning; 1584 | use_batch_learning = true; 1585 | 1586 | #if !defined(_MSC_VER) 1587 | //adjust batch data_global sizes 1588 | if (!already_fed) 1589 | { 1590 | while (get_batch_activations<0>().size() != batch_labels.size()) //fix sizes 1591 | { 1592 | if (get_batch_activations<0>().size() > batch_labels.size()) 1593 | loop_all_layers(); 1594 | else 1595 | loop_all_layers(); 1596 | } 1597 | while (get_batch_out_derivs<0>().size() != batch_labels.size()) //fix sizes 1598 | { 1599 | if (get_batch_out_derivs<0>().size() > batch_labels.size()) 1600 | loop_all_layers(); 1601 | else 1602 | loop_all_layers(); 1603 | } 1604 | 1605 | //reset batch activations 1606 | loop_all_layers(); 1607 | 1608 | get_layer<0>::feed_forwards(batch_inputs, get_batch_activations<0>()); 1609 | loop_up_layers(); 1610 | } 1611 | #else 1612 | //adjust batch data_global sizes 1613 | if (!already_fed) 1614 | { 1615 | while (get_batch_activations<0>().size() != batch_labels.size()) //fix sizes 1616 | { 1617 | if (get_batch_activations<0>().size() > batch_labels.size()) 1618 | loop_all_layers(0); 1619 | else 1620 | loop_all_layers(0); 1621 | } 1622 | while (get_batch_out_derivs<0>().size() != batch_labels.size()) //fix sizes 1623 | { 1624 | if (get_batch_out_derivs<0>().size() > batch_labels.size()) 1625 | loop_all_layers(0); 1626 | else 1627 | loop_all_layers(0); 1628 | } 1629 | 1630 | //reset batch activations 1631 | loop_all_layers(0); 1632 | 1633 | get_layer<0>::feed_forwards(batch_inputs, get_batch_activations<0>()); 1634 | loop_up_layers(0); 1635 | } 1636 | #endif 1637 | 1638 | float total_error = global_error(get_batch_activations(), batch_labels); 1639 | 1640 | //get error signals for output 1641 | auto errors = error_signals(get_batch_activations(), batch_labels); 1642 | 1643 | //back_prop for each layer (need to get activation derivatives for output first 1644 | get_layer::back_prop(get_layer::activation, errors, 1645 | get_batch_activations(), get_batch_out_derivs(), 1646 | true, learning_rate, false, momentum_term, 1647 | use_l2_weight_decay, include_bias_decay, weight_decay_factor); 1648 | #if !defined(_MSC_VER) 1649 | for_loop(); 1650 | #else 1651 | for_loop(0); 1652 | #endif 1653 | 1654 | if (apply) 1655 | apply_gradient(); 1656 | use_batch_learning = temp_batch; 1657 | return total_error / batch_inputs.size(); 1658 | } 1659 | 1660 | template 1661 | inline float NeuralNet:: 1662 | train_batch_thread(typename get_type<0, layers...>::feature_maps_vector_type& batch_inputs, typename get_type::feature_maps_vector_type& batch_labels, bool already_fed) 1663 | { 1664 | bool temp_batch = use_batch_learning; 1665 | use_batch_learning = true; 1666 | 1667 | #if !defined(_MSC_VER) 1668 | if (!already_fed) 1669 | { 1670 | //adjust batch data_global sizes 1671 | while (get_thread_batch_activations<0>().size() != batch_labels.size()) //fix sizes 1672 | { 1673 | if (get_thread_batch_activations<0>().size() > batch_labels.size()) 1674 | loop_all_layers&>(*this); 1675 | else 1676 | loop_all_layers&>(*this); 1677 | } 1678 | while (get_thread_batch_out_derivs<0>().size() != batch_labels.size()) //fix sizes 1679 | { 1680 | if (get_thread_batch_out_derivs<0>().size() > batch_labels.size()) 1681 | loop_all_layers&>(*this); 1682 | else 1683 | loop_all_layers&>(*this); 1684 | } 1685 | 1686 | //reset batch activations 1687 | loop_all_layers&>(*this); 1688 | 1689 | std::get<0, layers...>(thread_layers).feed_forwards_local(batch_inputs, get_thread_batch_activations<0>()); 1690 | loop_up_layers&>(*this); 1691 | } 1692 | #else 1693 | if (!already_fed) 1694 | { 1695 | //adjust batch data_global sizes 1696 | while (get_thread_batch_activations<0>().size() != batch_labels.size()) //fix sizes 1697 | { 1698 | if (get_thread_batch_activations<0>().size() > batch_labels.size()) 1699 | loop_all_layers&>(*this, 0); 1700 | else 1701 | loop_all_layers&>(*this, 0); 1702 | } 1703 | while (get_thread_batch_out_derivs<0>().size() != batch_labels.size()) //fix sizes 1704 | { 1705 | if (get_thread_batch_out_derivs<0>().size() > batch_labels.size()) 1706 | loop_all_layers&>(*this, 0); 1707 | else 1708 | loop_all_layers&>(*this, 0); 1709 | } 1710 | 1711 | //reset batch activations 1712 | loop_all_layers&>(*this, 0); 1713 | 1714 | std::get<0, layers...>(thread_layers).feed_forwards_local(batch_inputs, get_thread_batch_activations<0>()); 1715 | loop_up_layers&>(*this, 0); 1716 | } 1717 | #endif 1718 | 1719 | float total_error = global_error(get_thread_batch_activations(), batch_labels); 1720 | 1721 | //get error signals for output 1722 | auto errors = error_signals(get_thread_batch_activations(), batch_labels); 1723 | 1724 | //back_prop for each layer (need to get activation derivatives for output first 1725 | std::get(thread_layers).back_prop_local(get_layer::activation, errors, 1726 | get_thread_batch_activations(), get_thread_batch_out_derivs(), 1727 | true, learning_rate, false, momentum_term, use_l2_weight_decay, include_bias_decay, weight_decay_factor); 1728 | #if !defined(_MSC_VER) 1729 | for_loop&>(*this); 1730 | #else 1731 | for_loop&>(*this, 0); 1732 | #endif 1733 | 1734 | //apply_gradient(); don't apply gradient_global if parallel 1735 | use_batch_learning = temp_batch; 1736 | return total_error / batch_inputs.size(); 1737 | } 1738 | 1739 | template 1740 | inline void NeuralNet:: 1741 | calculate_population_statistics(typename get_type<0, layers...>::feature_maps_vector_type& batch_inputs) 1742 | { 1743 | //put in inputs 1744 | get_layer<0>::feed_forwards(batch_inputs, get_batch_activations<1>()); 1745 | #if !defined(_MSC_VER) 1746 | for_loop<1, last_layer_index - 1, 1, feed_forwards_population_statistics_layer>(); 1747 | #else 1748 | for_loop<1, last_layer_index - 1, 1, feed_forwards_population_statistics_layer>(0); 1749 | #endif 1750 | } 1751 | 1752 | template 1753 | inline void NeuralNet:: 1754 | apply_gradient(bool clear_gradient_globals_global) 1755 | { 1756 | #if !defined(_MSC_VER) 1757 | if (use_l2_weight_decay && use_batch_learning) 1758 | loop_up_layers(); 1759 | #else 1760 | if (use_l2_weight_decay && use_batch_learning) 1761 | loop_up_layers(0); 1762 | #endif 1763 | 1764 | if (optimization_method == MTNN_OPT_ADAM) 1765 | ++t_adam; 1766 | 1767 | #if !defined(_MSC_VER) 1768 | if (clear_gradient_globals_global) 1769 | loop_up_layers(); 1770 | else 1771 | loop_up_layers(); 1772 | #else 1773 | if (clear_gradient_globals_global) 1774 | loop_up_layers(0); 1775 | else 1776 | loop_up_layers(0); 1777 | #endif 1778 | } 1779 | 1780 | template 1781 | inline void NeuralNet::update_global_gradients() 1782 | { 1783 | #if !defined(_MSC_VER) 1784 | loop_up_layers&>(*this); 1785 | #else 1786 | loop_up_layers&>(*this, 0); 1787 | #endif 1788 | } 1789 | 1790 | template 1791 | inline void NeuralNet::update_thread_weights() 1792 | { 1793 | #if !defined(_MSC_VER) 1794 | loop_up_layers&>(*this); 1795 | #else 1796 | loop_up_layers&>(*this, 0); 1797 | #endif 1798 | } 1799 | 1800 | template 1801 | inline float NeuralNet:: 1802 | global_error(typename get_type::feature_maps_type& output, typename get_type::feature_maps_type& lbls) 1803 | { 1804 | float sum = 0.0f; 1805 | 1806 | if (loss_function == MTNN_LOSS_L2) 1807 | { 1808 | for (size_t f = 0; f < labels.size(); ++f) 1809 | for (size_t i = 0; i < labels[f].rows(); ++i) 1810 | for (size_t j = 0; j < labels[f].cols(); ++j) 1811 | sum += pow(output[f].at(i, j) - lbls[f].at(i, j), 2); 1812 | return sum / 2; 1813 | } 1814 | else if (loss_function == MTNN_LOSS_LOGLIKELIHOOD) 1815 | { 1816 | sum = 0.0f; 1817 | for (size_t f = 0; f < labels.size(); ++f) 1818 | for (size_t i = 0; i < labels[f].rows(); ++i) 1819 | for (size_t j = 0; j < labels[f].cols(); ++j) 1820 | sum += -1 * (labels[f].at(i, j) * log(output[f].at(i, j))); 1821 | return sum; 1822 | } 1823 | else if (loss_function == MTNN_LOSS_CUSTOMTARGETS) 1824 | return 0; 1825 | } 1826 | 1827 | template 1828 | inline float NeuralNet:: 1829 | global_error(typename get_type::feature_maps_vector_type& batch_outputs, typename get_type::feature_maps_vector_type& batch_labels) 1830 | { 1831 | if (loss_function == MTNN_LOSS_CUSTOMTARGETS) 1832 | return 0; 1833 | float sum = 0.0f; 1834 | for (size_t in = 0; in < batch_outputs.size(); ++in) 1835 | { 1836 | if (loss_function == MTNN_LOSS_L2) 1837 | for (size_t f = 0; f < batch_labels[in].size(); ++f) 1838 | for (size_t i = 0; i < batch_labels[in][f].rows(); ++i) 1839 | for (size_t j = 0; j < batch_labels[in][f].cols(); ++j) 1840 | sum += pow(batch_outputs[in][f].at(i, j) - batch_labels[in][f].at(i, j), 2); 1841 | else if (loss_function == MTNN_LOSS_LOGLIKELIHOOD) 1842 | for (size_t f = 0; f < labels.size(); ++f) 1843 | for (size_t i = 0; i < labels[f].rows(); ++i) 1844 | for (size_t j = 0; j < labels[f].cols(); ++j) 1845 | sum += -1 * (batch_labels[in][f].at(i, j) * log(batch_outputs[in][f].at(i, j))); 1846 | } 1847 | if (loss_function == MTNN_LOSS_L2) 1848 | return sum / 2; 1849 | else if (loss_function == MTNN_LOSS_LOGLIKELIHOOD) 1850 | return sum; 1851 | else 1852 | return INFINITY; 1853 | } 1854 | 1855 | template 1856 | template 1857 | inline void NeuralNet:: 1858 | dropout() 1859 | { 1860 | using layer = get_layer; 1861 | for (size_t f = 0; f < layer::feature_maps_global.size(); ++f) 1862 | for (size_t i = 0; i < layer::feature_maps_global.rows(); ++i) 1863 | for (size_t j = 0; j < layer::feature_maps_global.cols(); ++j) 1864 | if ((1.0f * rand()) / RAND_MAX <= dropout_probability) 1865 | get_batch_activations()[0][f].at(i, j) = 0; 1866 | } 1867 | 1868 | template 1869 | inline typename get_type::feature_maps_type NeuralNet:: 1870 | error_signals(typename get_type::feature_maps_type& output, typename get_type::feature_maps_type& lbls) 1871 | { 1872 | auto out = typename get_type::feature_maps_type{ 0 }; 1873 | if (loss_function == MTNN_LOSS_L2) 1874 | for (size_t f = 0; f < lbls.size(); ++f) 1875 | for (size_t i = 0; i < lbls.rows(); ++i) 1876 | for (size_t j = 0; j < lbls.cols(); ++j) 1877 | out[f].at(i, j) = output[f].at(i, j) - lbls[f].at(i, j); 1878 | else if (loss_function == MTNN_LOSS_LOGLIKELIHOOD) //assumes next layer is softmax? 1879 | { 1880 | for (size_t f = 0; f < lbls.size(); ++f) 1881 | for (size_t i = 0; i < lbls.rows(); ++i) 1882 | for (size_t j = 0; j < lbls.cols(); ++j) 1883 | out[f].at(i, j) = lbls[f].at(i, j); 1884 | } 1885 | else if (loss_function == MTNN_LOSS_CUSTOMTARGETS) 1886 | for (size_t f = 0; f < lbls.size(); ++f) 1887 | for (size_t i = 0; i < lbls.rows(); ++i) 1888 | for (size_t j = 0; j < lbls.cols(); ++j) 1889 | out[f].at(i, j) = lbls[f].at(i, j); 1890 | return out; 1891 | } 1892 | 1893 | template 1894 | inline typename get_type::feature_maps_vector_type NeuralNet:: 1895 | error_signals(typename get_type::feature_maps_vector_type& batch_outputs, typename get_type::feature_maps_vector_type& batch_labels) 1896 | { 1897 | auto out = typename get_layer::feature_maps_vector_type(batch_outputs.size()); 1898 | for (size_t in = 0; in < batch_outputs.size(); ++in) 1899 | out[in] = error_signals(batch_outputs[in], batch_labels[in]); 1900 | return out; 1901 | } 1902 | -------------------------------------------------------------------------------- /MTNN/include/neuralnetanalyzer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "imatrix.h" 7 | #include "ilayer.h" 8 | #include "neuralnet.h" 9 | 10 | template class NeuralNetAnalyzer 11 | { 12 | private: 13 | static float total_grad_error; 14 | static float original_net_error; 15 | 16 | static int n; 17 | 18 | static bool proportional; 19 | 20 | template struct add_grad_error_impl 21 | { 22 | public: 23 | add_grad_error_impl() 24 | { 25 | using layer = typename net::template get_layer; 26 | 27 | if (!biases_global) 28 | { 29 | using t = decltype(layer::weights_gradient_global); 30 | for (size_t d = 0; d < t::size(); ++d) 31 | { 32 | for (size_t i = 0; i < t::rows(); ++i) 33 | { 34 | for (size_t j = 0; j < t::cols(); ++j) 35 | { 36 | ++n; 37 | 38 | //decrement, get new error 39 | layer::weights_global[d].at(i, j) -= .001f; 40 | net::discriminate(); 41 | float adj_error = net::global_error(); 42 | 43 | //approximate (finite differences) 44 | float appr_grad = -(adj_error - original_net_error) / .001f; 45 | float grad = layer::weights_gradient_global[d].at(i, j); 46 | //add to total 47 | if (!proportional) 48 | total_grad_error += abs(layer::weights_gradient_global[d].at(i, j) - appr_grad); 49 | else 50 | total_grad_error += abs((layer::weights_gradient_global[d].at(i, j) - appr_grad) / layer::weights_gradient_global[d].at(i, j)); 51 | 52 | //reset 53 | layer::weights_global[d].at(i, j) += .001f; 54 | } 55 | } 56 | } 57 | } 58 | 59 | else 60 | { 61 | using t = decltype(layer::biases_gradient_global); 62 | for (size_t d = 0; d < t::size(); ++d) 63 | { 64 | for (size_t i = 0; i < t::rows(); ++i) 65 | { 66 | for (size_t j = 0; j < t::cols(); ++j) 67 | { 68 | ++n; 69 | 70 | layer::biases_global[d].at(i, j) -= .001f; 71 | net::discriminate(); 72 | 73 | float adj_error = net::global_error(); 74 | float appr_grad = -(adj_error - original_net_error) / .001f; 75 | 76 | if (!proportional) 77 | total_grad_error += abs(layer::biases_gradient_global[d].at(i, j) - appr_grad); 78 | else 79 | total_grad_error += abs((layer::biases_gradient_global[d].at(i, j) - appr_grad) / layer::biases_gradient_global[d].at(i, j)); 80 | 81 | layer::biases_global[d].at(i, j) += .001f; 82 | } 83 | } 84 | } 85 | } 86 | } 87 | }; 88 | 89 | template struct add_hess_error_impl 90 | { 91 | public: 92 | add_hess_error_impl() 93 | { 94 | using layer = typename net::template get_layer; 95 | 96 | if (!biases_global) 97 | { 98 | using t = decltype(layer::weights_hessian); 99 | for (size_t d = 0; d < t::size(); ++d) 100 | { 101 | for (size_t i = 0; i < t::rows(); ++i) 102 | { 103 | for (size_t j = 0; j < t::cols(); ++j) 104 | { 105 | ++n; 106 | 107 | //decrement, get new error 108 | layer::weights_global[d].at(i, j) -= .001f; 109 | net::discriminate(); 110 | float h_minus = net::global_error(); 111 | 112 | //reincrement, get new error 113 | layer::weights_global[d].at(i, j) += .002f; 114 | net::discriminate(); 115 | float h = net::global_error(); 116 | 117 | //approximate (finite differences) 118 | float appr_grad = (h - 2 * original_net_error + h_minus) / (.001f * .001f); 119 | 120 | //add to total 121 | if (!proportional) 122 | total_grad_error += abs(layer::weights_hessian[d].at(i, j) - appr_grad); 123 | else 124 | total_grad_error += abs((layer::weights_hessian[d].at(i, j) - appr_grad) / layer::weights_hessian[d].at(i, j)); 125 | 126 | //reset 127 | layer::weights_global[d].at(i, j) -= .001f; 128 | } 129 | } 130 | } 131 | } 132 | 133 | else 134 | { 135 | using t = decltype(layer::biases_hessian); 136 | for (size_t d = 0; d < t::size(); ++d) 137 | { 138 | for (size_t i = 0; i < t::rows(); ++i) 139 | { 140 | for (size_t j = 0; j < t::cols(); ++j) 141 | { 142 | ++n; 143 | 144 | //decrement, get new error 145 | layer::biases_global[d].at(i, j) -= .001f; 146 | net::discriminate(); 147 | float h_minus = net::global_error(); 148 | 149 | //reincrement, get new error 150 | layer::biases_global[d].at(i, j) += .002f; 151 | net::discriminate(); 152 | float h = net::global_error(); 153 | 154 | //approximate (finite differences) 155 | float appr_grad = (h - 2 * original_net_error + h_minus) / (.001f * .001f); 156 | 157 | //add to total 158 | if (!proportional) 159 | total_grad_error += abs(layer::biases_hessian[d].at(i, j) - appr_grad); 160 | else 161 | total_grad_error += abs((layer::biases_hessian[d].at(i, j) - appr_grad) / layer::biases_hessian[d].at(i, j)); 162 | 163 | //reset 164 | layer::biases_global[d].at(i, j) -= .001f; 165 | } 166 | } 167 | } 168 | } 169 | } 170 | }; 171 | 172 | template using add_grad_error_w = add_grad_error_impl; 173 | template using add_grad_error_b = add_grad_error_impl; 174 | 175 | template using add_hess_error_w = add_hess_error_impl; 176 | template using add_hess_error_b = add_hess_error_impl; 177 | 178 | public: 179 | //find mean gradient_global error from numerical approximation MAKE SURE INPUTS ARE NOT 0 180 | static std::pair mean_gradient_error() 181 | { 182 | net::discriminate(); 183 | 184 | proportional = false; 185 | total_grad_error = 0.0f; 186 | n = 0; 187 | original_net_error = net::global_error(); 188 | 189 | #if defined(_MSC_VER) || defined(__clang__) 190 | net::template loop_all_layers(0); 191 | #else 192 | typename net::template loop_all_layers(); 193 | #endif 194 | std::pair errors{}; 195 | errors.first = total_grad_error / n; 196 | 197 | total_grad_error = 0.0f; 198 | n = 0; 199 | 200 | #if defined(_MSC_VER) || defined(__clang__) 201 | net::template loop_all_layers(0); 202 | #else 203 | typename net::template loop_all_layers(); 204 | #endif 205 | 206 | errors.second = total_grad_error / n; 207 | 208 | return errors; 209 | } 210 | 211 | //find mean hessian error from numerical approximation WARNING NOT NECESSARILY ACCURATE 212 | static std::pair mean_hessian_error() 213 | { 214 | net::discriminate(); 215 | 216 | proportional = false; 217 | total_grad_error = 0.0f; 218 | n = 0; 219 | original_net_error = net::global_error(); 220 | 221 | #if defined(_MSC_VER) || defined(__clang__) 222 | net::template loop_all_layers(0); 223 | #else 224 | typename net::template loop_all_layers(); 225 | #endif 226 | 227 | std::pair errors{}; 228 | errors.first = total_grad_error / n; 229 | 230 | total_grad_error = 0.0f; 231 | n = 0; 232 | 233 | #if defined(_MSC_VER) || defined(__clang__) 234 | net::template loop_all_layers(0); 235 | #else 236 | typename net::template loop_all_layers(); 237 | #endif 238 | 239 | errors.second = total_grad_error / n; 240 | 241 | return errors; 242 | } 243 | 244 | //find mean proportional gradient_global error from numerical approximation MAKE SURE INPUTS ARE NOT 0 245 | static std::pair proportional_gradient_error() 246 | { 247 | net::discriminate(); 248 | 249 | proportional = true; 250 | total_grad_error = 0.0f; 251 | n = 0; 252 | original_net_error = net::global_error(); 253 | 254 | #if defined(_MSC_VER) || defined(__clang__) 255 | net::template loop_all_layers(0); 256 | #else 257 | typename net::template loop_all_layers(); 258 | #endif 259 | 260 | std::pair errors{}; 261 | errors.first = total_grad_error / n; 262 | 263 | total_grad_error = 0.0f; 264 | n = 0; 265 | 266 | #if defined(_MSC_VER) || defined(__clang__) 267 | net::template loop_all_layers(0); 268 | #else 269 | typename net::template loop_all_layers(); 270 | #endif 271 | 272 | errors.second = total_grad_error / n; 273 | 274 | return errors; 275 | } 276 | 277 | //find mean proportional hessian error from numerical approximation WARNING NOT NECESSARILY ACCURATE 278 | static std::pair proportional_hessian_error() 279 | { 280 | net::discriminate(); 281 | 282 | proportional = true; 283 | total_grad_error = 0.0f; 284 | n = 0; 285 | original_net_error = net::global_error(); 286 | 287 | #if defined(_MSC_VER) || defined(__clang__) 288 | net::template loop_all_layers(0); 289 | #else 290 | typename net::template loop_all_layers(); 291 | #endif 292 | 293 | std::pair errors{}; 294 | errors.first = total_grad_error / n; 295 | 296 | total_grad_error = 0.0f; 297 | n = 0; 298 | 299 | #if defined(_MSC_VER) || defined(__clang__) 300 | net::template loop_all_layers(0); 301 | #else 302 | typename net::template loop_all_layers(); 303 | #endif 304 | 305 | errors.second = total_grad_error / n; 306 | 307 | return errors; 308 | } 309 | 310 | //update sample 311 | static void add_point(float value) 312 | { 313 | if (sample.size() == sample_size) 314 | sample.erase(sample.begin()); 315 | sample.push_back(value); 316 | } 317 | 318 | //calculate the expected error 319 | static float mean_error() 320 | { 321 | float sum = 0.0f; 322 | for (size_t i = 0; i < sample.size(); ++i) 323 | sum += sample[i]; 324 | errors.push_back(sum / sample.size()); 325 | return sum / sample.size(); 326 | } 327 | 328 | //save error data 329 | static void save_mean_error(std::string path) 330 | { 331 | std::ofstream file{ path }; 332 | for (size_t i = 0; i < errors.size(); ++i) 333 | file << errors[i] << ','; 334 | file.flush(); 335 | } 336 | 337 | static int sample_size; 338 | 339 | static std::vector sample; 340 | static std::vector errors; 341 | }; 342 | template std::vector NeuralNetAnalyzer::sample = {}; 343 | template std::vector NeuralNetAnalyzer::errors = {}; 344 | template float NeuralNetAnalyzer::total_grad_error = 0.0f; 345 | template float NeuralNetAnalyzer::original_net_error = 0.0f; 346 | template bool NeuralNetAnalyzer::proportional = false; 347 | template int NeuralNetAnalyzer::n = 0; 348 | template int NeuralNetAnalyzer::sample_size = 0; 349 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MetaTemplateNeuralNet 2 | ========================== 3 | 4 | An API for neural networks implemented in C++ with template meta-programming. Perhaps the first of its kind. 5 | 6 | ## Include 7 | ========================== 8 | 9 | All the necessary components are in header files (yay templates!) So just add what you need from the include folder. 10 | 11 | 12 | ## What a Neural Network is 13 | ========================== 14 | 15 | Our brains work by a large web of connected neurons, or simple binary states. These neurons are connected by synapses, which have a strength associated with them. When a neuron fires, it's signal is sent through all of it's connecting synapses to other neurons to determine their value. When we learn, our brain adjusts the strengths of the associated synapses to limit the amount of activated neurons. 16 | 17 | A neural network is a machine learning algorithm based off of the brain. Within a network, there are layers. Each of these layers has a number of neurons, which take on floating point values, and weights, symbolic of synapses, attached to the neurons in the next layer. These networks then run in a way similar to our brains, given an input, all neurons are fed forward to the next layer by summing the value of the neurons times the weights connecting two neurons. Commonly, a bias is an addition to the network which is used as a simple shift to neurons. The bias is added to the sum of the weights times the neurons to produce the output of the neuron, which is then commonly ran through a continuous activation function, such as a sigmoid, to bound the value of the neuron as well as give the network a differentiable property. 18 | 19 | Weights can be connected between neurons in different ways. Most common are full connectivity layers and shared weight layers. Full connectivity layers have weights going from every input neuron to every output neuron, so every neuron in the layers are connected to every neuron in the layers above. Shared weights are a way of forming similar connections between different neurons by a common weight pattern. A common implementation of this is convolutional layers. 20 | 21 | Convolutional layers make use of mathematical convolution, an operation used to produce feature maps, or highlights from an image. Convolution is formally defined as the sum of all values in the domains of two functions which are multiplied by one another. In real life cases, this is commonly discrete, and is most easily understood in images. Image convolution involves iterating a mask over an image to produce an output, where the output pixel values are equivelant to the sum of the mask multiplied by neighboring pixels in the input when anchored at the center of the mask. This operation draws features from the image, such as edges or curves, and is associated with the way our visual cortex processes imagery. 22 | 23 | Networks learn through different algorithms, although the two implemented here are the up-down or wake-sleep algorithm and vanilla backpropagation. Backpropagation is an algorithm which computes the derivatives of the error with respect to the weights, and adjusts the weights in order to find a minimum in the error function. This is a way of approximating the actual error signal of every neuron, so a small step size is often used to prevent divergence. The wake-sleep or up-down algorithm trains the network without knowledge of data in an encoder-decoder format. The layers in the network are fed forward, backwards, and forwards again, before a difference is calculated to adjust the weights. 24 | 25 | ## How this API is implemented 26 | ================================= 27 | 28 | This API is based off of template meta-programming to optimize efficiency. Therefore, much of this API is based on the assumption that a network architecture will be defined at compile time rather than runtime. This has caused most of the class to become static, therefore you may want to `typedef NeuralNet<...> Net;` in your source file for clarity. More details on accessing a `NeuralNet` can be found in it's section. 29 | 30 | Note that because this is a template based approach, then almost all errors will be indescript compiler errors. Generally it is because a particular layer does not "connect" to the next. 31 | 32 | ## Documentation 33 | =============================== 34 | 35 | ### Macros 36 | =============================== 37 | 38 | These macros are used to signify layer types, optimization methods, loss functions, and activation functions. They are prefixed with `MTNN_FUNC_*` for activation functions, `MTNN_LAYER_*` for layers, `MTNN_OPT_*` for optimization methods, and `MTNN_COST_*` for cost functions. Their name should explain their use. The available layers can be found below. 39 | 40 | Available activation functions are linear (y = x), sigmoid (y = 1/(1 + exp(-x)), bipolar sigmoid (y = 2/(1 + exp(-x)) - 1), tanh (y = tanh), and rectified linear (y = max(0, x)). 41 | 42 | Available loss functions are quadratic, cross entropy, log likelihood, and custom targets. 43 | 44 | Available optimization methods are vanilla backprop (with momentum, l2 weight decay, etc. as desired), Adam, and Adagrad. 45 | 46 | 47 | ### `Matrix2D` 48 | =============================== 49 | 50 | This class is a simple matrix implementation, with some extra methods that can be used in situations outside of this neural network. 51 | 52 | | Member/Method | Type | Details | 53 | |---------|------|---------| 54 | | `data` | `std::vector(rows * cols)` | holds the matrice's data in column major format | 55 | | `at(size_t i, size_t j)` | `T` | returns the value of the matrix at i, j | 56 | | `clone()` | `Matrix2D` | creates a deep copy of the matrix | 57 | | `rows()` | `static constexpr size_t` | returns the amount of rows | 58 | | `cols()` | `static constexpr size_t` | returns the amount of cols | 59 | 60 | This table contains methods used only in the source code of the network 61 | 62 | Can be initialized with initialization lists, so brace initializers may create some problems. 63 | 64 | ### `FeatureMap` 65 | =============================== 66 | 67 | This class is a slightly more advanced wrapper of just a `std::vector(f)`, with basic initialization functions. 68 | 69 | Can be initialized with initialization lists, so brace initializers may create some problems. 70 | 71 | ### Layer 72 | =============================== 73 | 74 | There is no `Layer` class, but all of the "`*Layer`" classes are implemented similarily. Note that only members that are used will be used as this API uses implicit static initialization. 75 | 76 | Use the `index` parameter to create different instances if using the same type of layer multiple times (eg. if using a `InputLayer` taking 1 input on multiple networks, add a distinct `index` to prevent them from modifying each other's data) 77 | 78 | | Member/Method | Type | Details | 79 | |--------|------|----------| 80 | | `feed_forwards(feature_maps_type& input, out_feature_maps_type& output, ...)` | `void` | Feeds the layer forward | 81 | | `feed_backwards(feature_maps_type& output, out_feature_maps_type& input, ...)` | `void` | Feeds the layer backwards using generative biases (if bool is enabled) | 82 | | `back_prop(...)` | `void` | Performs vanilla backpropagation with the specified activation method | 83 | | `feed_forwards(feature_maps_vector_type& inputs, out_feature_maps_vector_type& outputs, ...)` | `void` | Feeds the layer forward (overloaded for batches) | 84 | | `feed_backwards(feature_maps_vector_type& outputs, out_feature_maps_vector_type& inputs, ...)` | `void` | Feeds the layer backwards using generative or recognition weights (overloaded for batches) | 85 | | `back_prop(...)` | `void` | Performs vanilla backpropagation with the specified activation method (overloaded for batches) | 86 | | `wake_sleep(...)` | `void` | Performs the wake-sleep (up-down) algorithm with the specified activation method | 87 | | `feature_maps` | `FeatureMap<>` | Holds current activations | 88 | | `weights` | `FeatureMap<>` | Holds the weights | 89 | | `biases` | `FeatureMap<>` | Holds the biases (if used) | 90 | | `generative_biases` | `FeatureMap<>` | Holds the generative biases (if used) | 91 | | `weights_momentum` | `FeatureMap<>` | Holds the weights' momentum | 92 | | `biases_momentum` | `FeatureMap<>` | Holds the biases' momentum | 93 | | `weights_aux_data` | `FeatureMap<>` | Holds the weights' aux_data (used for optimization methods) | 94 | | `biases_aux_data` | `FeatureMap<>` | Holds the biases' aux_data (used for optimization methods) | 95 | | `feature_maps_type` | `type` | the type | 96 | | `out_feature_maps_type` | `type` | the type | 97 | | `weights_type` | `type` | the type | 98 | | `biases_type` | `type` | the type | 99 | | `generative_biases_type` | `type` | the type | 100 | | `feature_maps_vector_type` | `type` | the type | 101 | | `out_feature_maps_vector_type` | `type` | the type | 102 | | `weights_vector_type` | `type` | the type | 103 | | `biases_vector_type` | `type` | the type | 104 | | `generative_biases_vector_type` | `type` | the type | 105 | 106 | ### `PerceptronFullConnectivityLayer` 107 | =============================== 108 | 109 | Basic fully connected perceptron layer. 110 | 111 | ### ConvolutionLayer` 112 | =============================== 113 | 114 | Basic convolutional layer, masks or kernels must be square (but not odd!). 115 | 116 | With padding, then output is same size. Otherwise output is reduced. 117 | 118 | ### `LSTMLayer` 119 | =============================== 120 | 121 | Basic LSTM layer (uses tanh activation). STILL IN DEVELOPMENT, WON'T WORK WITH THREADS. 122 | 123 | `max_t_store` states how many time steps to perform bptt on. 124 | 125 | ### `MaxpoolLayer` 126 | =================================== 127 | 128 | Basic maxpooling layer. Maxpool is performed on each feature map independently. 129 | 130 | 131 | ### `SoftMaxLayer` 132 | ===================================== 133 | 134 | Basic softmax layer. This will compute derivatives for any cost function, not just log-likelihood. Softmax is performed on each feature map independently. 135 | 136 | ### `BatchNormalizationLayer` 137 | ===================================== 138 | 139 | Basic batch normalization layer. Gamma and beta are in `weights` and `biases`. 140 | 141 | If using, then batch learning and the respective overloads must be used. 142 | 143 | ### `InputLayer` 144 | ===================================== 145 | 146 | Basic input layer just to signify the beginning of the network. Required 147 | 148 | ### `OutputLayer` 149 | ===================================== 150 | 151 | Basic output layer just to signify the end of the network. Required 152 | 153 | ### `NeuralNetwork` 154 | =============================== 155 | 156 | This is the class that encapsulates all of the rest. Has all required methods. Will add support for more loss functions and optimization methods later. If you want to train the network in parallel (or keep different sets of weights for a target network, or different architecture, etc.) then create a new instance of the class. Each new instance has its own weights and gradients and is thread safe (if you use `*_thread(.)` functions). The static class is the master net and retains its own weights. 157 | 158 | | Member/Method | Type | Details | 159 | |--------|------|----------| 160 | | `learning_rate` | `float` | The learning term of the network. Default value is 0.01 | 161 | | `momentum_term` | `float` | The momentum term (proportion of learning rate when applied to momentum) of the network. Between 0 and 1. Default value is 0 | 162 | | `dropout_probability` | `float` | The probability that a given neuron will be "dropped". Default value is .5 | 163 | | `loss_function` | `size_t` | The loss function to be used. Default mean square | 164 | | `optimization_method` | `size_t` | Optimization method to be used. Default backprop | 165 | | `use_batch_learning` | `bool` | Whether you will apply gradient manually with minibatches | 166 | | `use_dropout` | `bool` | Whether to train the network with dropout | 167 | | `use_momentum` | `bool` | Whether to train the network with momentums. Cannot be used with Adam or Adagrad | 168 | | `labels` | `FeatureMap<>` | The current labels | 169 | | `input` | `FeatureMap<>` | The current input | 170 | | `setup()` | `void` | Initializes the network to learn. Must call if learning. Must set the hyperparameters before calling | 171 | | `apply_gradient()` | `void` | Updates weights | 172 | | `save_data()` | `void` | Saves the data. Check the example to see how to supply the filename | 173 | | `load_data()` | `void` | Loads the data (Must have initialized network and filled layers first!!!) | 174 | | `set_input(FeatureMap<> input)` | `void` | Sets the current input | 175 | | `set_labels(FeatureMap<> labels)` | `void` | Sets the current labels | 176 | | `discriminate()` | `void` | Feeds the network forward with current input, can be specified | 177 | | `discriminate(FeatureMapVector<> inputs)` | `void` | Feeds the network forward with the batch inputs | 178 | | `generate(FeatureMap<> input, size_t sampling_iterations, bool use_sampling)` | `FeatureMap<>` | Generates an output for an rbm network. `use_sampling` means sample for each layer after the markov iterations on the final RBM layer | 179 | | `pretrain()` | `void` | Pretrains the network using the wake-sleep algorithm. Assumes every layer upto the last RBM layer has been trained. | 180 | | `train()` | `float` | Trains the network using specified optimization method. `already_fed` means that the network has already been discriminated and the algorithm does not need to get the hidden layer activations. | 181 | | `train_batch(FeatureMapVector<> batch_inputs, FeatureMapVector<> batch_labels)` | `float` | Trains the network using specified optimization method and batch learning. `already_fed` means that the network has already been discriminated and the algorithm does not need to get the hidden layer activations. MUST BE USED IF USING BATCH NORMALIZATION | 182 | | `discriminate_thread()` | `void` | Feeds the network forward with current input and the current initialization (or thread's) weights, can be specified. | 183 | | `discriminate_thread(FeatureMapVector<> inputs)` | `void` | Feeds the network forward with the batch inputs and the current initialization (or thread's) weights. | 184 | | `train_thread()` | `float` | Trains the network using specified optimization method with the current initialization (or thread's) weights. `already_fed` means that the network has already been discriminated and the algorithm does not need to get the hidden layer activations. | 185 | | `train_batch_thread(FeatureMapVector<> batch_inputs, FeatureMapVector<> batch_labels)` | `float` | Trains the network using specified optimization method and batch learning with the current initialization (or thread's) weights. `already_fed` means that the network has already been discriminated and the algorithm does not need to get the hidden layer activations. MUST BE USED IF USING BATCH NORMALIZATION | 186 | | `calculate_population_statistics(FeatureMapVector<> batch_inputs)` | `void` | Calculates the population statistics for BN networks. Do after all training with full training data. | 187 | | `template get_layer | `type` | Returns the lth layer's type | 188 | | `template loop_up_layers class loop_body, typename... Args> | `type` | Initialize one of these to perform a function specified from the initialization of a `loop_body` type on each layer with initialization arguments of type `Args...` | 189 | | `template loop_down_layers class loop_body, typename... Args> | `type` | Initialize one of these to perform a function specified from the initialization of a `loop_body` type on each layer with initialization arguments of type `Args...` | 190 | 191 | ### `NeuralNetAnalyzer` 192 | 193 | This is a singleton static class. This class helps with network analysis, such as the expected error, and finite difference backprop checking. 194 | 195 | | Member/Method | Type | Details | 196 | |--------|------|----------| 197 | | `sample_size` | `static size_t` | The sample size used to calculate the expected error | 198 | | `mean_gradient_error()` | `static std::pair` | Uses finite differences for backprop checking, returns mean difference in ordered pair (weights, biases) | 199 | | `proportional_gradient_error()` | `static std::pair` | Uses finite differences for backprop checking, returns proportional difference in ordered pair (weights, biases) | 200 | | `add_point(float value)` | `static void` | Adds a point for the running calculation of the expected error | 201 | | `mean_error()` | `static float` | Returns the running estimate of expected error | 202 | | `save_error(std::string path)` | `static void` | Saves all calculated expected errors | 203 | 204 | 205 | # Usage 206 | =============================== 207 | 208 | For an example of creating and using a network, see main.cpp in the examples folder. 209 | 210 | There is also an example with the MNIST Database in the examples folder. The provided .nn file has ~1% error. --------------------------------------------------------------------------------