├── img ├── boxed.png └── learning.mp4 ├── samples ├── test1.jpg └── tensor-convo-20230122-final-gelu.state ├── .gitignore ├── basic-autograd.cc ├── vizi.hh ├── textsupport.hh ├── model.hh ├── LICENSE ├── los3.cc ├── try-convo.cc ├── tensormodtest.cc ├── imagine.cc ├── .github └── workflows │ └── cmake.yml ├── mnistreader.hh ├── mnistposter.cc ├── grutest.cc ├── textsupport.cc ├── convo-alphabet.hh ├── misc.hh ├── mnistreader.cc ├── cnn1.hh ├── 37learn.cc ├── vizi.cc ├── gru-layer.hh ├── ext └── sqlitewriter │ ├── sqlwriter.hh │ └── sqlwriter.cc ├── threeorseven.cc ├── CMakeLists.txt ├── cnn-alphabet.hh ├── testfvector.cc ├── fvector.hh ├── tensor-relu.cc ├── alternate ├── tensor.cc ├── modular-convo.cc ├── first-relu.cc ├── modular-linear-convo.cc ├── workertest.cc ├── modular-threaded-convo.cc ├── worker-convo-avx-threaded.cc ├── worker-convo.cc └── worker-convo-avx.cc ├── tensor-convo.cc ├── trackedfuncs.hh ├── tensor-gru.cc ├── ui.cc ├── gru.hh ├── los.cc ├── los2.cc ├── testrunner.cc ├── hello-dl.cc ├── layers.hh └── tensor-layers.hh /img/boxed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/img/boxed.png -------------------------------------------------------------------------------- /img/learning.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/img/learning.mp4 -------------------------------------------------------------------------------- /samples/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/samples/test1.jpg -------------------------------------------------------------------------------- /samples/tensor-convo-20230122-final-gelu.state: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/samples/tensor-convo-20230122-final-gelu.state -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | hello-dl 3 | # Prerequisites 4 | *.d 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | *.obj 11 | 12 | # Precompiled Headers 13 | *.gch 14 | *.pch 15 | 16 | # Compiled Dynamic libraries 17 | *.so 18 | *.dylib 19 | *.dll 20 | 21 | # Fortran module files 22 | *.mod 23 | *.smod 24 | 25 | # Compiled Static libraries 26 | *.lai 27 | *.la 28 | *.a 29 | *.lib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | -------------------------------------------------------------------------------- /basic-autograd.cc: -------------------------------------------------------------------------------- 1 | #include "tensor2.hh" 2 | #include 3 | using namespace std; 4 | 5 | int main() 6 | { 7 | #if 0 8 | Tensor x(2.0f); 9 | Tensor z(0.0f); 10 | Tensor a(1.0f); 11 | Tensor y = x * (z + a); 12 | y(0,0); 13 | y.backward(); 14 | 15 | #else 16 | Tensor x(2.0f); 17 | Tensor z(0.0f); 18 | Tensor y = Tensor(3.0f)*x*x*x + Tensor(4.0f)*x + Tensor(1.0f) + x*z; 19 | y(0,0); 20 | y.backward(); 21 | cout << "y = "<< y << endl; // 3*8 + 4*2 + 1 = 33 22 | 23 | 24 | 25 | cout << "dy/dx = " << x.getGrad() << endl; // 9*x^2 + 4 = 40 26 | cout << "dy/dz = " << z.getGrad() << endl; // 2 27 | #endif 28 | } 29 | -------------------------------------------------------------------------------- /vizi.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "tensor2.hh" 5 | #include "ext/stb/stb_truetype.h" 6 | 7 | template 8 | void printImgTensor(const T& img) 9 | { 10 | for(unsigned int y=0; y < img.getRows(); ++y) { 11 | for(unsigned int x=0; x < img.getCols(); ++x) { 12 | float val = img(y,x); 13 | if(val > 0.5) 14 | std::cout<<'X'; 15 | else if(val > 0.25) 16 | std::cout<<'*'; 17 | else if(val > 0.125) 18 | std::cout<<'.'; 19 | else 20 | std::cout<<' '; 21 | } 22 | std::cout<<'\n'; 23 | } 24 | std::cout<<"\n"; 25 | } 26 | 27 | struct FontWriter 28 | { 29 | FontWriter(); 30 | void writeChar(char ch, int s, int c, int r, std::function f); 31 | stbtt_fontinfo d_font; 32 | std::vector d_ttf_buffer; 33 | }; 34 | 35 | void saveTensor(const Tensor& t, const std::string& fname, int size, bool monochrome=false); 36 | -------------------------------------------------------------------------------- /textsupport.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | std::vector textChopper(const char* fname, size_t siz, int mult=1); 8 | 9 | class BiMapper 10 | { 11 | public: 12 | explicit BiMapper(const char* fname, int lim=-1); 13 | int c2i(char c) const 14 | { 15 | auto iter = d_c2i.find(c); 16 | if(iter == d_c2i.end()) { 17 | // std::cout<<("Attempting to find unknown character with value '"+std::to_string((int)c)+"'")<second; 21 | } 22 | char i2c(int i) const 23 | { 24 | auto iter = d_i2c.find(i); 25 | if(iter == d_i2c.end()) { 26 | // std::cout<<("Attempting to find unknown integer "+std::to_string(i))<second; 30 | } 31 | 32 | private: 33 | std::unordered_map d_c2i; 34 | std::unordered_map d_i2c; 35 | }; 36 | 37 | 38 | -------------------------------------------------------------------------------- /model.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "layers.hh" 3 | #include 4 | 5 | struct ModelState 6 | { 7 | std::vector d_members; 8 | void save(std::ostream& out) const 9 | { 10 | for(const auto& mem : d_members) 11 | mem->save(out); 12 | } 13 | void save(std::string& out) const 14 | { 15 | std::ostringstream os; 16 | for(const auto& mem : d_members) 17 | mem->save(os); 18 | 19 | out=os.str(); 20 | } 21 | 22 | void load(std::istream& in) 23 | { 24 | for(auto& mem : d_members) 25 | mem->load(in); 26 | } 27 | 28 | void load(std::string& in) 29 | { 30 | std::istringstream is(in); 31 | load(is); 32 | } 33 | 34 | void learn(float lr) 35 | { 36 | for(auto& mem : d_members) 37 | mem->learn(lr); 38 | } 39 | 40 | void zeroGrad() 41 | { 42 | for(auto& mem : d_members) 43 | mem->zeroGrad(); 44 | } 45 | 46 | uint32_t size() 47 | { 48 | size_t ret = 0; 49 | for(auto& mem : d_members) 50 | ret += mem->size(); 51 | return ret; 52 | } 53 | 54 | }; 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 bert hubert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /los3.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | #include 3 | #include 4 | #include "tensor2.hh" 5 | #include "misc.hh" 6 | 7 | #include 8 | using namespace std; 9 | 10 | int main(int argc, char**argv) 11 | { 12 | DTime dt; 13 | dt.start(); 14 | float t=0; 15 | for(unsigned int n=0; n < 10000/64; ++n) { 16 | Eigen::Tensor input(64,28, 28); 17 | Eigen::Tensor kernel(3, 3); 18 | Eigen::Tensor output(64, 26, 26); 19 | input.setRandom(); 20 | kernel.setRandom(); 21 | 22 | Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. 23 | output = input.convolve(kernel, dims); 24 | t+=output(0,0,0); 25 | } 26 | cout<<"t: "< 2 | #include 3 | #include "mnistreader.hh" 4 | #include "misc.hh" 5 | #include 6 | #include 7 | #include "tensor-layers.hh" 8 | #include "vizi.hh" 9 | 10 | using namespace std; 11 | 12 | int main(int argc, char** argv) 13 | { 14 | if(argc < 2) { 15 | cerr<<"Syntax: try-convo index"< convo; 30 | auto& f1 = convo.d_filters[0]; 31 | f1(0,0) = -1; f1(0,1) = -1; f1(0,2)=1; 32 | f1(1,0) = -1; f1(0,1) = -1; f1(1,2)=1; 33 | f1(2,0) = 1; f1(2,1) = 1; f1(2,2)=1; 34 | 35 | convo.d_bias[0](0,0) = 0; 36 | 37 | Tensor out = convo.forward(img)[0].makeMax2d(2); 38 | out(0,0); 39 | cout<<"out:\n"< 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include 9 | #include "tensor-layers.hh" 10 | using namespace std; 11 | 12 | 13 | TEST_CASE("tensor model load save test") { 14 | struct TestState : ModelState 15 | { 16 | Conv2d d_lc1; // -> 26*26 -> max2d -> 13*13 17 | Linear d_fc1; 18 | TestState() 19 | { 20 | d_members = {{&d_lc1, "lc1"}, {&d_fc1, "fc1"}}; 21 | } 22 | }; 23 | 24 | TestState ts; 25 | ts.randomize(); 26 | 27 | saveModelState(ts, "tensormodetest-test.state"); 28 | 29 | TestState ts2; 30 | ts2.randomize(); // just to confuse things 31 | loadModelState(ts2, "tensormodetest-test.state"); 32 | 33 | CHECK(ts.d_lc1.d_filters[16](2,2) == ts2.d_lc1.d_filters[16](2,2)); 34 | CHECK(ts.d_fc1.d_weights(30,32) == ts2.d_fc1.d_weights(30,32)); 35 | } 36 | 37 | 38 | TEST_CASE("max2d array") { 39 | std::array, 3> in; 40 | for(unsigned int n=0; n < in.size(); ++n) { 41 | in[n]=Tensor(4,4); 42 | in[n].iota(n); 43 | } 44 | auto res = Max2dfw(in, 2); 45 | // 0 1 2 3 46 | // 4 5 6 7 47 | // 8 9 10 11 48 | // 12 13 14 15 49 | 50 | CHECK(res[0](0,0) == 5); 51 | CHECK(res[0](0,1) == 7); 52 | 53 | CHECK(res[1](0,0) == 6); 54 | CHECK(res[1](0,1) == 8); 55 | 56 | CHECK(res[2](1,0) == 15); 57 | CHECK(res[2](1,1) == 17); 58 | } 59 | 60 | -------------------------------------------------------------------------------- /imagine.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "tensor-layers.hh" 6 | #include "convo-alphabet.hh" 7 | #include "vizi.hh" 8 | #include "mnistreader.hh" 9 | using namespace std; 10 | 11 | 12 | int main(int argc, char** argv) 13 | { 14 | if(argc < 3) { 15 | cout<<"Syntax: imagine fromletter toletter modelname"<d_val = m.img.d_imp -> d_val.unaryExpr([](float v) { return fabs(v); }); 39 | */ 40 | 41 | m.img.normalize(0.172575, 0.25); 42 | 43 | auto specscore = m.scores.makeSlice(tolabel, 0, 1, 1); 44 | auto topo = specscore.getTopo(); 45 | for(unsigned int tries = 0 ; tries < 10000; ++tries) { 46 | cout<d_val += grad; 51 | 52 | if(!(tries %4)) 53 | saveTensor(m.img, "imagine-"+to_string(tries)+".png", 252, true); 54 | specscore.zerograd(topo); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 11 | BUILD_TYPE: Release 12 | 13 | 14 | jobs: 15 | build: 16 | # The CMake configure and build commands are platform agnostic and should work equally 17 | # well on Windows or Mac. You can convert this to a matrix build if you need 18 | # cross-platform coverage. 19 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - name: Install libraries 26 | run: sudo apt-get install libeigen3-dev libz-dev libsqlite3-dev 27 | 28 | - name: Configure CMake 29 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 30 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 31 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 32 | 33 | - name: Build 34 | # Build your program with the given configuration 35 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 36 | 37 | - name: Test 38 | working-directory: ${{github.workspace}}/build 39 | # Execute tests defined by the CMake configuration. 40 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 41 | run: ctest -C ${{env.BUILD_TYPE}} 42 | 43 | -------------------------------------------------------------------------------- /mnistreader.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | //#include "array.hh" 6 | //#include "fvector.hh" 7 | 8 | #include "tensor2.hh" 9 | class MNISTReader 10 | { 11 | public: 12 | MNISTReader(const std::string& images, const std::string& labels); 13 | unsigned int num() const 14 | { 15 | return d_num; 16 | } 17 | std::vector getImage(int n) const; 18 | const std::vector& getImageFloat(int n) const 19 | { 20 | if(auto iter = d_converted.find(n); iter != d_converted.end()) 21 | return iter->second; 22 | else 23 | throw std::runtime_error("Could not find image "+std::to_string(n)); 24 | } 25 | 26 | template 27 | void pushImage(int n, Tensor& dest) const 28 | { 29 | assert(dest.d_imp && dest.d_imp->d_mode == TMode::Parameter); 30 | const auto& src = getImageFloat(n); 31 | for(int row=0 ; row < 28; ++row) 32 | for(int col=0 ; col < 28; ++col) 33 | dest(row, col) = src.at(row+28*col); 34 | } 35 | /* 36 | template 37 | void pushImage(int n, NNArray& dest, int idx) const 38 | { 39 | const auto& src = getImageFloat(n); 40 | for(int row=0 ; row < 28; ++row) 41 | for(int col=0 ; col < 28; ++col) { 42 | if(!dest(row,col).impl) // XXX FUGLY 43 | dest(row, col) = 0; 44 | dest(row, col).impl->d_val.v[idx] = src.at(row+28*col); 45 | } 46 | } 47 | */ 48 | 49 | char getLabel(int n) const; 50 | private: 51 | std::vector d_images; 52 | std::vector d_labels; 53 | unsigned int d_rows, d_cols, d_stride, d_num; 54 | std::unordered_map> d_converted; 55 | }; 56 | -------------------------------------------------------------------------------- /mnistposter.cc: -------------------------------------------------------------------------------- 1 | 2 | #define STB_IMAGE_WRITE_IMPLEMENTATION 3 | #include "ext/stb/stb_image_write.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "mnistreader.hh" 10 | #include 11 | #include "misc.hh" 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char **argv) 16 | { 17 | int filt=-1; 18 | if(argc == 2) 19 | filt= 1 + argv[1][0] - 'a'; 20 | 21 | feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); 22 | 23 | MNISTReader mn("gzip/emnist-letters-train-images-idx3-ubyte.gz", "gzip/emnist-letters-train-labels-idx1-ubyte.gz"); 24 | //MNISTReader mn("gzip/emnist-letters-test-images-idx3-ubyte.gz", "gzip/emnist-letters-test-labels-idx1-ubyte.gz"); 25 | 26 | cout<<"Have "< out; 30 | out.resize(imgcols*imgrows); 31 | auto pix = [&out, &imgrows, &imgcols](int col, int row) -> uint8_t& 32 | { 33 | return out[col + row*imgcols]; 34 | }; 35 | 36 | int count=0; 37 | Batcher batcher(mn.num()); 38 | for(;;) { 39 | auto b = batcher.getBatch(1); 40 | if(b.empty()) 41 | break; 42 | int n=b[0]; 43 | if(filt >=0 && mn.getLabel(n) != filt) 44 | continue; 45 | 46 | Tensor img(28,28); 47 | mn.pushImage(n, img); 48 | 49 | int x = 30 * (count % (imgcols/30 - 1)); // this many per row 50 | int y = 30 * (count / (imgcols/30 - 1)); 51 | count++; 52 | 53 | if(x+30 >= imgcols || y+30 >= imgrows) 54 | break; 55 | 56 | for(unsigned int r=0; r < img.getRows(); ++r) 57 | for(unsigned int c=0; c < img.getCols(); ++c) 58 | pix(x+c, y+r) = 255 - img(r,c)*255; 59 | } 60 | stbi_write_png("poster.png", imgcols, imgrows, 1, &out[0], imgcols); 61 | } 62 | -------------------------------------------------------------------------------- /grutest.cc: -------------------------------------------------------------------------------- 1 | #include "ext/doctest.h" 2 | #include "tracked.hh" 3 | #include "gru.hh" 4 | #include 5 | 6 | using namespace std; 7 | 8 | // a sequence needs to be dragged through 9 | #if 0 10 | TEST_CASE("single GRU") { 11 | GRULayer gm; 12 | // cout<<"gm.size(): "< fc; 14 | // cout<<"fc.size(): "< in; 17 | NNArray expected; 18 | 19 | std::string input = "hellothisisbert"; 20 | std::string output; 21 | TrackedFloat totloss=0.0; 22 | for(size_t pos = 0 ; pos < input.size() - 1; ++pos) { 23 | in.zero(); in(input.at(pos)-'a', 0) = 1.0; 24 | 25 | expected.zero(); expected(0, input.at(pos+1)-'a') = 1.0; 26 | auto res1 = fc.forward(gm.forward(in)); 27 | auto score = res1.logSoftMax(); 28 | auto loss = TrackedNumber(0.0) - (expected*score)(0,0); 29 | totloss = totloss + loss; 30 | output.append(1, 'a' + score.maxValueIndexOfColumn(0)); 31 | // cout<<"score: "< gm1, gm2, gm3; 41 | // cout<<"gm.size(): "< fc1, fc2, fc3; 43 | 44 | NNArray in1, in2, in3; 45 | in1.zero(); in1(3,0) = 1.0; 46 | in2.zero(); in2(1,0) = 1.0; 47 | in3.zero(); in3(4,0) = 1.0; 48 | auto res1 = gm1.forward(in1); 49 | gm2.d_prevh = res1; 50 | 51 | auto res2 = gm2.forward(in2); 52 | gm3.d_prevh = res2; 53 | 54 | auto res3 = gm3.forward(in3); 55 | 56 | cout<<"GRU: "< 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | vector textChopper(const char* fname, size_t siz, int mult) 13 | { 14 | ifstream ifs(fname); 15 | 16 | vector buffer(1024000); 17 | string total; 18 | while(!ifs.eof()) { 19 | ifs.read(&buffer[0], buffer.size()); 20 | total.append(&buffer[0], &buffer[ifs.gcount()]); 21 | } 22 | buffer.clear(); 23 | unsigned int pieces = mult*total.size()/siz; 24 | vector ret; 25 | ret.reserve(pieces); 26 | 27 | std::random_device rd; //Will be used to obtain a seed for the random number engine 28 | std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() 29 | std::uniform_int_distribution<> distrib(0, total.size() - siz -1); 30 | 31 | for(unsigned int n = 0 ; n < pieces; ++n) { 32 | ret.push_back(total.substr(distrib(gen), siz)); 33 | for(auto& c : *ret.rbegin()) 34 | if(c=='\n' || c=='\t') c=' '; 35 | 36 | } 37 | 38 | return ret; 39 | } 40 | 41 | 42 | BiMapper::BiMapper(const char* fname, int lim) 43 | { 44 | ifstream ifs(fname); 45 | std::array a; 46 | std::unordered_map popcount; 47 | while(!ifs.eof()) { 48 | ifs.read((char*)&a[0], a.size()); 49 | for(const auto& c : a) { 50 | //cout<> revcount; 56 | for(const auto& p : popcount) 57 | revcount.push_back(p); 58 | sort(revcount.begin(), revcount.end(), [](const auto& a, const auto& b) { 59 | return b.second < a.second; 60 | }); 61 | 62 | if(lim >= 0 && revcount.size() > (unsigned int)lim) 63 | revcount.resize(lim); 64 | 65 | for(unsigned int n=0; n < revcount.size(); ++n) { 66 | d_c2i[revcount[n].first] = n; 67 | d_i2c[n]=revcount[n].first; 68 | // cout<<(char)revcount[n].first <<" -> "< img{28,28}; 5 | Tensor scores{26, 1}; 6 | Tensor expected{1,26}; 7 | Tensor modelloss{1,1}; 8 | Tensor weightsloss{1,1}; 9 | Tensor loss{1,1}; 10 | 11 | struct State : public ModelState 12 | { 13 | // r_in c k c_i c_out 14 | Conv2d c1; // -> 26*26 -> max2d -> 13*13 15 | Conv2d c2; // -> -> 11*11 -> max2d -> 6*6 //padding 16 | Conv2d c3; // -> 4*4 -> max2d -> 2*2 17 | // flattened to 512 (128*2*2) 18 | // IN OUT 19 | Linear fc1; 20 | Linear fc2; 21 | Linear fc3; 22 | 23 | State() 24 | { 25 | this->d_members = {{&c1, "c1"}, {&c2, "c2"}, {&c3, "c3"}, {&fc1, "fc1"}, {&fc2, "fc2"}, {&fc3, "fc3"}}; 26 | } 27 | }; 28 | 29 | void init(State& s, bool production=false) 30 | { 31 | using ActFunc = GeluFunc; 32 | 33 | img.zero(); 34 | img.d_imp->d_nograd=true; 35 | 36 | auto step1 = s.c1.forward(img); // -> 26x26, 32 layers 37 | auto step2 = Max2dfw(step1, 2); // -> 13x13 38 | auto step3 = s.c2.forward(step2); // -> 11x11, 64 layers 39 | auto step4 = Max2dfw(step3, 2); // -> 6x6 (padding) 40 | auto step5 = s.c3.forward(step4); // -> 4x4, 128 layers 41 | auto step6 = Max2dfw(step5, 2); // -> 2x2 42 | auto flat = makeFlatten(step6); // -> 512x1 43 | auto output = s.fc1.forward(flat); // -> 64 44 | auto output1 = production ? output : output.makeDropout(0.5); 45 | auto output2 = makeFunction(output1); 46 | auto output3 = makeFunction(s.fc2.forward(output2)); // -> 128 47 | // auto output4 = makeFunction(s.fc3.forward(output3)); // -> 26 48 | auto output4 = s.fc3.forward(output3); // -> 26 49 | scores = makeLogSoftMax(output4); 50 | modelloss = -(expected*scores); 51 | 52 | Tensor fact(1,1); 53 | fact(0,0) = 0.02; 54 | weightsloss = fact*(s.c1.SquaredWeightsSum() + s.c2.SquaredWeightsSum() + s.c3.SquaredWeightsSum() + 55 | s.fc1.SquaredWeightsSum() + s.fc1.SquaredWeightsSum() + s.fc1.SquaredWeightsSum()); 56 | 57 | loss = modelloss; // + weightsloss; 58 | } 59 | }; 60 | -------------------------------------------------------------------------------- /misc.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct HyperParameters 14 | { 15 | float lr; 16 | float momentum; 17 | int batchMult; 18 | unsigned int getBatchSize() 19 | { 20 | return 8*batchMult; 21 | } 22 | }; 23 | 24 | struct TrainingProgress 25 | { 26 | int batchno=0; 27 | float lastTook=0; 28 | std::vector losses; 29 | std::vector corrects; 30 | std::atomic trained=0; 31 | }; 32 | 33 | extern struct TrainingProgress g_progress; 34 | extern std::shared_ptr g_hyper; 35 | int graphicsThread(); 36 | 37 | class Batcher 38 | { 39 | public: 40 | explicit Batcher(int n, std::optional rng=std::optional()) 41 | { 42 | for(int i=0; i < n ; ++i) 43 | d_store.push_back(i); 44 | 45 | randomize(rng); 46 | } 47 | 48 | explicit Batcher(const std::vector& in) 49 | { 50 | for(const auto& i : in) 51 | d_store.push_back(i); 52 | randomize(); 53 | } 54 | 55 | auto getBatch(int n) 56 | { 57 | std::deque ret; 58 | for(int i = 0 ; !d_store.empty() && i < n; ++i) { 59 | ret.push_back(d_store.front()); 60 | d_store.pop_front(); 61 | } 62 | return ret; 63 | } 64 | 65 | auto getBatchLocked(int n) 66 | { 67 | std::deque ret; 68 | std::lock_guard l(d_mut); 69 | for(int i = 0 ; !d_store.empty() && i < n; ++i) { 70 | ret.push_back(d_store.front()); 71 | d_store.pop_front(); 72 | } 73 | return ret; 74 | } 75 | 76 | private: 77 | std::deque d_store; 78 | std::mutex d_mut; 79 | void randomize(std::optional rnd = std::optional()) 80 | { 81 | if(rnd) { 82 | std::shuffle(d_store.begin(), d_store.end(), *rnd); 83 | } 84 | else { 85 | std::random_device rd; 86 | std::mt19937 g(rd()); 87 | std::shuffle(d_store.begin(), d_store.end(), g); 88 | } 89 | } 90 | 91 | }; 92 | 93 | 94 | struct DTime 95 | { 96 | void start() 97 | { 98 | d_start = std::chrono::steady_clock::now(); 99 | } 100 | uint32_t lapUsec() 101 | { 102 | auto usec = std::chrono::duration_cast(std::chrono::steady_clock::now()- d_start).count(); 103 | start(); 104 | return usec; 105 | } 106 | 107 | std::chrono::time_point d_start; 108 | }; 109 | -------------------------------------------------------------------------------- /mnistreader.cc: -------------------------------------------------------------------------------- 1 | #include "mnistreader.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zlib.h" 7 | #include 8 | 9 | using namespace std; 10 | 11 | static auto safeOpen(const std::string& fname) 12 | { 13 | gzFile fp = gzopen(fname.c_str(), "rb"); 14 | if(!fp) 15 | throw runtime_error("Error opening file "+fname+": "+strerror(errno)); 16 | return fp; 17 | } 18 | 19 | MNISTReader::MNISTReader(const std::string& images, const std::string& labels) 20 | { 21 | struct idx1header 22 | { 23 | uint32_t magic; 24 | uint32_t num; 25 | } __attribute__((packed)); 26 | 27 | struct idx3header 28 | { 29 | uint32_t magic; 30 | uint32_t num; 31 | uint32_t rows; 32 | uint32_t cols; 33 | } __attribute__((packed)); 34 | 35 | auto imgfp = safeOpen(images); 36 | auto labelsfp = safeOpen(labels); 37 | 38 | idx1header i1h; 39 | idx3header i3h; 40 | if(gzfread(&i1h, sizeof(idx1header), 1, labelsfp) != 1) 41 | throw std::runtime_error("Label file too short"); 42 | if(gzfread(&i3h, sizeof(idx3header), 1, imgfp) != 1) 43 | throw std::runtime_error("Images file too short"); 44 | 45 | i1h.magic = htonl(i1h.magic); 46 | i1h.num = htonl(i1h.num); 47 | 48 | i3h.magic = htonl(i3h.magic); 49 | i3h.num = htonl(i3h.num); 50 | i3h.rows = htonl(i3h.rows); 51 | i3h.cols = htonl(i3h.cols); 52 | 53 | d_rows = i3h.rows; 54 | d_cols = i3h.cols; 55 | d_stride = d_rows * d_cols; 56 | d_num = i3h.num; 57 | if(i1h.magic != 2049) 58 | throw runtime_error("Magic value of labels file wrong "+to_string(i1h.magic)); 59 | if(i3h.magic != 2051) 60 | throw runtime_error("Magic value of images file wrong "+to_string(i3h.magic)); 61 | 62 | if(i3h.num != i1h.num) 63 | throw runtime_error("Mismatch between number of labels and number of images"); 64 | 65 | d_images.resize(i3h.num*i3h.cols*i3h.rows); 66 | if(gzfread((char*)&d_images[0], i3h.cols*i3h.rows, i3h.num, imgfp) != i3h.num) 67 | throw runtime_error("Could not read all "+to_string(i3h.num)+" images"); 68 | 69 | d_labels.resize(i3h.num); 70 | if(gzfread((char*)&d_labels[0], 1, i3h.num, labelsfp) != i3h.num) 71 | throw runtime_error("Could not read all "+to_string(i3h.num)+" labels"); 72 | 73 | gzclose(imgfp); 74 | gzclose(labelsfp); 75 | 76 | vector tmp(28*28); 77 | for(unsigned int n=0 ; n < d_num; ++n) { 78 | unsigned int pos = n * d_stride; 79 | for(unsigned int i=0; i < d_stride; ++i) { 80 | tmp.at(i) = d_images.at(pos+i)/256.0; 81 | } 82 | d_converted[n]=tmp; 83 | } 84 | } 85 | 86 | vector MNISTReader::getImage(int n) const 87 | { 88 | unsigned int pos = n*d_rows*d_cols; 89 | vector ret(&d_images.at(pos), &d_images.at(pos + d_rows*d_cols)); 90 | return ret; 91 | } 92 | 93 | char MNISTReader::getLabel(int n) const 94 | { 95 | return d_labels.at(n); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /cnn1.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tracked.hh" 3 | #include "layers.hh" 4 | 5 | /* 6 | Gratefully copied from 'mnist.cpp' in the PyTorch example repository 7 | https://github.com/pytorch/examples/blob/main/cpp/mnist/mnist.cpp 8 | 9 | This model takes MNIST 28*28 input and: 10 | 11 | * normalizes to "0.1307, 03081", torch::data::transforms::Normalize<>(0.1307, 0.3081) 12 | 13 | * applies a 5*5 kernel convolution `conv1`, configured to emit 10 layers, 24*24 14 | * does max_pool2d on these, which takes non-overlapping 2*2 rectangles 15 | and emits max value per rectangle. Delivers 12*12 values for each layer 16 | * ReLu 17 | * does another 5x5 convolution `conv2` on the 10 layers, turning them into 20 layers of 8*8 18 | * randomly *zeroes* half of the 20 layers `conv2_drop` - no state, Bernoulli 19 | STILL MISSING! 20 | * another max_pool2d, 4*4*20 layers 21 | * ReLu 22 | * flatten to 320 values 23 | * linear combination 320x50 (fc1) 24 | * ReLU 25 | * zero out half of values randomly during training (STILL MISSING) 26 | * another linear combination, 50x10 (fc2) 27 | * log_softmax on the 10 values 28 | * the 10 outputs are probabilities per digit 29 | * highest probability is chosen 30 | */ 31 | 32 | 33 | struct CNNModel { 34 | NNArray img; 35 | 36 | int label; 37 | NNArray scores; 38 | NNArray expected; 39 | 40 | TrackedFloat loss; 41 | 42 | struct State 43 | { 44 | // R C K IN OUTLAYERS 45 | Conv2d c1; // -> 24*24 -> max2d -> 12*12 46 | Conv2d c2; // -> 8*8 -> max2d -> 4*4 47 | 48 | // IN OUT 49 | Linear fc1; 50 | Linear fc2; 51 | 52 | void learn(float lr) 53 | { 54 | c1.learn(lr); 55 | c2.learn(lr); 56 | fc1.learn(lr); 57 | fc2.learn(lr); 58 | } 59 | 60 | void save(std::ostream& out) const 61 | { 62 | c1.save(out); c2.save(out); fc1.save(out); fc2.save(out); 63 | } 64 | void load(std::istream& in) 65 | { 66 | c1.load(in); c2.load(in); fc1.load(in); fc2.load(in); 67 | } 68 | }; 69 | 70 | void init(State& s) 71 | { 72 | img.zero(); 73 | 74 | auto step1 = s.c1.forward(img); 75 | 76 | std::array, 10> step2; 77 | unsigned ctr=0; 78 | for(auto& p : step2) 79 | p = step1[ctr++].Max2d<2>().applyFunc(ReluFunc()); 80 | 81 | std::array, 20> step3 = s.c2.forward(step2); 82 | std::array, 20> step4; 83 | 84 | ctr=0; 85 | for(auto& p : step4) { 86 | p = step3[ctr++].Max2d<2>().applyFunc(ReluFunc()); 87 | } 88 | 89 | NNArray flat = flatten(step4); 90 | auto output = s.fc1.forward(flat); 91 | auto output2 = output.applyFunc(ReluFunc()); 92 | auto output3 = s.fc2.forward(output2); 93 | 94 | scores = output3.logSoftMax(); 95 | expected.zero(); 96 | loss = TrackedFloat(0) - (expected*scores)(0,0); 97 | } 98 | }; 99 | -------------------------------------------------------------------------------- /37learn.cc: -------------------------------------------------------------------------------- 1 | #include "mnistreader.hh" 2 | #include "vizi.hh" 3 | #include 4 | #include "ext/sqlitewriter/sqlwriter.hh" 5 | #include 6 | 7 | using namespace std; 8 | 9 | float doTest(const MNISTReader& mntest, const Tensor& weights, float bias, SQLiteWriter* sqw=0) 10 | { 11 | unsigned int corrects=0, wrongs=0; 12 | 13 | for(unsigned int n = 0 ; n < mntest.num(); ++n) { 14 | int label = mntest.getLabel(n); 15 | if(label != 3 && label != 7) 16 | continue; 17 | Tensor img(28,28); 18 | mntest.pushImage(n, img); 19 | 20 | float score = (img.dot(weights).sum()(0,0)) + bias; // the calculation 21 | 22 | int predict = score > 0 ? 7 : 3; // the verdict 23 | 24 | if(sqw) 25 | sqw->addValue({{"label", label}, {"res", score}, {"verdict", predict}}); 26 | 27 | 28 | if(predict == label) { 29 | corrects++; 30 | } 31 | else { 32 | wrongs++; 33 | } 34 | } 35 | float perc = 100.0*corrects/(corrects+wrongs); 36 | cout << perc << "% correct" << endl; 37 | return perc; 38 | } 39 | 40 | int main() 41 | { 42 | MNISTReader mn("gzip/emnist-digits-train-images-idx3-ubyte.gz", "gzip/emnist-digits-train-labels-idx1-ubyte.gz"); 43 | MNISTReader mntest("gzip/emnist-digits-test-images-idx3-ubyte.gz", "gzip/emnist-digits-test-labels-idx1-ubyte.gz"); 44 | 45 | cout << "Have "< 98.0) 68 | break; 69 | saveTensor(weights, "weights-"+to_string(count)+".png", 252); 70 | } 71 | 72 | Tensor img(28,28); 73 | mn.pushImage(n, img); 74 | float res = (img.dot(weights).sum()(0,0)) + bias; // the calculation 75 | if(count == 25001) { 76 | auto prod = img.dot(weights); 77 | saveTensor(img, "random-image.png", 252, true); 78 | saveTensor(prod, "random-prod.png", 252); 79 | cout<<"res for first image: " << res << '\n'; 80 | } 81 | int verdict = res > 0 ? 7 : 3; 82 | 83 | if(label == 7) { 84 | if(res < 2.0) { 85 | weights.raw() = weights.raw() + img.raw() * lr.raw(); 86 | bias += 0.01; 87 | } 88 | } else { 89 | if(res > -2.0) { 90 | weights.raw() = weights.raw() - img.raw() * lr.raw(); 91 | bias -= 0.01; 92 | } 93 | } 94 | 95 | 96 | ++count; 97 | } 98 | saveTensor(weights, "weights-final.png", 252); 99 | doTest(mntest, weights, bias, &sqw); 100 | cout<<"Bias: "< 11 | 12 | using namespace std; 13 | 14 | FontWriter::FontWriter() 15 | { 16 | std::ifstream in("/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman_Bold.ttf", std::ios::binary); 17 | d_ttf_buffer.assign(std::istreambuf_iterator(in), 18 | std::istreambuf_iterator()); 19 | 20 | stbtt_InitFont(&d_font, (const unsigned char*)&d_ttf_buffer[0], stbtt_GetFontOffsetForIndex((const unsigned char*)&d_ttf_buffer[0],0)); 21 | } 22 | 23 | void FontWriter::writeChar(char ch, int s, int c, int r, std::function f) 24 | { 25 | int w,h,i,j; 26 | unsigned char *bitmap = stbtt_GetCodepointBitmap(&d_font, 0,stbtt_ScaleForPixelHeight(&d_font, s), ch, &w, &h, 0,0); 27 | c -= w/2; // center 28 | for (j=0; j < h; ++j) { 29 | for (i=0; i < w; ++i) { 30 | f(c + i, r + j, 255-bitmap[j*w+i], 255-bitmap[j*w+i], 255-bitmap[j*w+i]); 31 | } 32 | } 33 | } 34 | 35 | 36 | void saveTensor(const Tensor& t, const std::string& fname, int size, bool monochrome) 37 | { 38 | vector out; 39 | out.resize(size*size*3); 40 | struct Pixel { 41 | uint8_t r, g, b; 42 | }; 43 | static_assert(sizeof(Pixel)==3); 44 | 45 | auto pix = [&out, &size](int col, int row) -> Pixel& 46 | { 47 | return *(Pixel*)&out[3 *(col + row*size)]; 48 | }; 49 | 50 | float lemin, lemax; 51 | lemin = lemax = t(0,0); 52 | 53 | for(unsigned int row = 0 ; row < t.getRows(); ++row) { 54 | for(unsigned int col = 0 ; col < t.getCols(); ++col) { 55 | float v = t(row, col); 56 | if(v > lemax) 57 | lemax = v; 58 | if(v < lemin) 59 | lemin = v; 60 | } 61 | } 62 | 63 | unsigned int hboxsize = size/t.getCols(); 64 | unsigned int vboxsize = size/t.getRows(); 65 | 66 | auto box = [&pix, &out](int col, int row, int w, int h, uint8_t cr, uint8_t cg, uint8_t cb) { 67 | for(int c = col ; c < col + w; ++c) 68 | for(int r = row ; r < row + h; ++r) 69 | pix(c, r) = {cr,cg,cb}; 70 | }; 71 | 72 | for(unsigned int row = 0 ; row < t.getRows(); ++row) { 73 | for(unsigned int col = 0 ; col < t.getCols(); ++col) { 74 | float v = t(row, col); 75 | 76 | if(monochrome) { 77 | uint8_t color = 255.0*(v - lemin)/(lemax-lemin); 78 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, color, color, color); 79 | } 80 | else { 81 | if(v > 0) // red 82 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, 255 * v/lemax, 0, 0); 83 | else if(v < 0) // blue 84 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, 0, 0, 255 * v/lemin); 85 | } 86 | } 87 | } 88 | 89 | 90 | stbi_write_png(fname.c_str(), size, size, 3, &out[0], 3*size); 91 | } 92 | -------------------------------------------------------------------------------- /gru-layer.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tensor-layers.hh" 3 | // hidden state=> dense linear => output x 4 | 5 | // x is input 6 | // h_t, h_{t-1} = hidden state 7 | 8 | // gate_{reset} = \sigma(W_{input_{reset}} \cdot x_t + W_{hidden_{reset}} \cdot h_{t-1}) 9 | 10 | // W_input_reset - ^^ normal matrix products 11 | // W_input_hidden 12 | 13 | // pytorch: 14 | 15 | // r_t = σ(W_{ir} x_t + b_{ir}+W_{hr}h_{t−1} +b_{hr}) // reset gate 16 | // z_t = σ(W_{iz} x_t + b_{iz} +W_{hz}h_{t−1} + b_{hz}) // update 17 | // n_t = tanh(W_{in}x_t+b_{in}+ r_t*(W_{hn} h_{t−1} + b_{hn})) // "new" - * is dotproduct 18 | // h_t=(1−z_t)*n_t+z_t*h_{t−1)} // new h 19 | 20 | // the hidden state is also the output, which needs linear combination to turn into input size again 21 | // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html 22 | 23 | template 24 | struct GRULayer : TensorLayer 25 | { 26 | Tensor d_w_ir{HIDDEN, IN}; // reset 27 | Tensor d_w_iz{HIDDEN, IN}; // update 28 | Tensor d_w_in{HIDDEN, IN}; // new 29 | 30 | Tensor d_w_hr{HIDDEN, HIDDEN}; // hidden reset 31 | Tensor d_w_hz{HIDDEN, HIDDEN}; // hidden update 32 | Tensor d_w_hn{HIDDEN, HIDDEN}; // hidden "new" 33 | 34 | Tensor d_origprevh{HIDDEN, 1}; 35 | Tensor d_prevh{HIDDEN, 1}; 36 | 37 | GRULayer() 38 | { 39 | this->d_params={ 40 | {&d_w_ir, "w_ir"}, {&d_w_iz, "w_iz"}, {&d_w_in, "w_in"}, 41 | {&d_w_hr, "w_hr"}, {&d_w_hz, "w_hz"}, {&d_w_hn, "w_hn"}}; 42 | randomize(); 43 | Tensor one(HIDDEN, HIDDEN); 44 | one.identity(1.0); 45 | d_prevh = one*d_origprevh; 46 | } 47 | 48 | // https://blog.floydhub.com/gru-with-pytorch/ 49 | // https://towardsdatascience.com/gate-recurrent-units-explained-using-matrices-part-1-3c781469fc18 50 | // these appear to be slightly different 51 | auto forward(const Tensor& xt) 52 | { 53 | auto r_t = makeFunction(d_w_ir * xt + d_w_hr * d_prevh); // reset gate 54 | auto z_t = makeFunction(d_w_iz * xt + d_w_hz * d_prevh); 55 | // z_t dimensions: rows from d_w_iz, columns from xt -> HIDDEN,IN 56 | auto n_t = makeFunction(d_w_in * xt + r_t.dot(d_w_hn *d_prevh)); 57 | 58 | Tensor one(HIDDEN, 1); // XXX this is a SUPER wart 59 | // the problem is we have no support for 1 - Tensor() kind of operations, so we need to make an appropriately sized 'one' 60 | for(unsigned int r=0 ; r < one.getRows(); ++r) 61 | for(unsigned int c=0 ; c < one.getCols(); ++c) 62 | one(r,c)=1; 63 | 64 | auto h_t = (one - z_t).dot(n_t) + z_t.dot(d_prevh); 65 | d_prevh = h_t; // "this is where the magic happens" 66 | return h_t; 67 | } 68 | 69 | void randomize() // "Xavier initialization" http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf 70 | { 71 | d_w_ir.randomize(1.0/sqrt(HIDDEN)); 72 | d_w_iz.randomize(1.0/sqrt(HIDDEN)); 73 | d_w_in.randomize(1.0/sqrt(HIDDEN)); 74 | 75 | d_w_hr.randomize(1.0/sqrt(HIDDEN)); 76 | d_w_hz.randomize(1.0/sqrt(HIDDEN)); 77 | d_w_hn.randomize(1.0/sqrt(HIDDEN)); 78 | d_prevh.zero(); 79 | } 80 | }; 81 | -------------------------------------------------------------------------------- /ext/sqlitewriter/sqlwriter.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct sqlite3; 11 | struct sqlite3_stmt; 12 | 13 | class MiniSQLite 14 | { 15 | public: 16 | MiniSQLite(std::string_view fname); 17 | ~MiniSQLite(); 18 | std::vector> getSchema(const std::string& table); 19 | void addColumn(const std::string& table, std::string_view name, std::string_view type); 20 | std::vector> exec(std::string_view query); 21 | void prepare(const std::string& table, std::string_view str); 22 | void bindPrep(const std::string& table, int idx, bool value); 23 | void bindPrep(const std::string& table, int idx, int value); 24 | void bindPrep(const std::string& table, int idx, uint32_t value); 25 | void bindPrep(const std::string& table, int idx, long value); 26 | void bindPrep(const std::string& table, int idx, unsigned long value); 27 | void bindPrep(const std::string& table, int idx, long long value); 28 | void bindPrep(const std::string& table, int idx, unsigned long long value); 29 | void bindPrep(const std::string& table, int idx, double value); 30 | void bindPrep(const std::string& table, int idx, const std::string& value); 31 | void execPrep(const std::string& table); 32 | void begin(); 33 | void commit(); 34 | void cycle(); 35 | bool isPrepared(const std::string& table) const 36 | { 37 | if(auto iter = d_stmts.find(table); iter == d_stmts.end()) 38 | return false; 39 | else 40 | return iter->second != nullptr; 41 | } 42 | 43 | private: 44 | sqlite3* d_sqlite; 45 | std::unordered_map d_stmts; 46 | std::vector> d_rows; // for exec() 47 | static int helperFunc(void* ptr, int cols, char** colvals, char** colnames); 48 | bool d_intransaction{false}; 49 | bool haveTable(const std::string& table); 50 | }; 51 | 52 | class SQLiteWriter 53 | { 54 | 55 | public: 56 | explicit SQLiteWriter(std::string_view fname) : d_db(fname) 57 | { 58 | // for(const auto& c : d_columns) 59 | // cout < var_t; 66 | void addValue(const std::initializer_list>& values, const std::string& table="data"); 67 | void addValue(const std::vector>& values, const std::string& table="data"); 68 | 69 | template 70 | void addValueGeneric(const std::string& table, const T& values); 71 | ~SQLiteWriter() 72 | { 73 | // std::cerr<<"Destructor called"<>> d_columns; 85 | std::unordered_map> d_lastsig; 86 | bool haveColumn(const std::string& table, std::string_view name); 87 | 88 | }; 89 | -------------------------------------------------------------------------------- /threeorseven.cc: -------------------------------------------------------------------------------- 1 | #include "mnistreader.hh" 2 | #include "vizi.hh" 3 | #include 4 | #include "ext/sqlitewriter/sqlwriter.hh" 5 | #include 6 | 7 | using namespace std; 8 | 9 | int main() 10 | { 11 | MNISTReader mn("gzip/emnist-digits-train-images-idx3-ubyte.gz", "gzip/emnist-digits-train-labels-idx1-ubyte.gz"); 12 | MNISTReader mntest("gzip/emnist-digits-test-images-idx3-ubyte.gz", "gzip/emnist-digits-test-labels-idx1-ubyte.gz"); 13 | 14 | cout << "Have "< 0 ? 7 : 3; // the verdict 83 | 84 | if(predict == label) { 85 | if(haveseven < 5 && label==7) { 86 | saveTensor(img, "seven-"+to_string(haveseven)+".png", 252, true); 87 | Tensor prod = img.dot(delta); 88 | saveTensor(prod, "prod7-"+to_string(haveseven)+".png", 252); 89 | haveseven++; 90 | } 91 | if(havethree < 5 && label==3) { 92 | saveTensor(img, "three-"+to_string(havethree)+".png", 252, true); 93 | Tensor prod = img.dot(delta); 94 | saveTensor(prod, "prod3-"+to_string(havethree)+".png", 252); 95 | havethree++; 96 | } 97 | 98 | corrects++; 99 | } 100 | else { 101 | saveTensor(img, "wrong-"+to_string(label)+"-"+to_string(wrongs)+".png", 252); 102 | wrongs++; 103 | } 104 | } 105 | cout << 100.0*corrects/(corrects+wrongs) << "% correct" << endl; 106 | } 107 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -O3 -ggdb -march=native -msse2 -mavx -mavx2 -mfma -fno-omit-frame-pointer -ffast-math -Iext/imgui -Iext/imgui/backends -Iext/implot-master -I/usr/include/eigen3 -I/usr/include/eigen3/unsupported/ -Wno-missing-field-initializers") 3 | 4 | project(hello-dl VERSION 1.0 5 | DESCRIPTION "Hello deep learning" 6 | LANGUAGES CXX) 7 | 8 | 9 | #add_compile_options(-fsanitize=address) 10 | #add_link_options(-fsanitize=address) 11 | 12 | set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard to use") 13 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 14 | set(CMAKE_CXX_EXTENSIONS ON) 15 | 16 | set(CMAKE_THREAD_PREFER_PTHREAD TRUE) 17 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 18 | find_package(Threads REQUIRED) 19 | find_package (Eigen3 3.3 REQUIRED NO_MODULE) 20 | 21 | add_library(common OBJECT mnistreader.cc textsupport.cc ) 22 | #add_library(gui OBJECT ui.cc ext/imgui/imgui.cpp ext/imgui/imgui_demo.cpp ext/imgui/imgui_draw.cpp ext/imgui/imgui_tables.cpp ext/imgui/imgui_widgets.cpp ext/imgui/backends/imgui_impl_glfw.cpp ext/imgui/backends/imgui_impl_opengl3.cpp ext/implot-master/implot.cpp ext/implot-master/implot_items.cpp ext/implot-master/implot_demo.cpp) 23 | 24 | 25 | 26 | add_executable(tensor-relu tensor-relu.cc ext/sqlitewriter/sqlwriter.cc) 27 | target_link_libraries(tensor-relu z common Eigen3::Eigen sqlite3 Threads::Threads) 28 | 29 | add_executable(tensor-convo tensor-convo.cc ext/sqlitewriter/sqlwriter.cc ) 30 | target_link_libraries(tensor-convo z common Eigen3::Eigen sqlite3 Threads::Threads) 31 | 32 | add_executable(tensor-convo-par tensor-convo-par.cc ext/sqlitewriter/sqlwriter.cc ) 33 | target_link_libraries(tensor-convo-par z common Eigen3::Eigen sqlite3 Threads::Threads) 34 | 35 | 36 | add_executable(img-ocr img-ocr.cc vizi.cc ) 37 | target_link_libraries(img-ocr z common Eigen3::Eigen Threads::Threads) 38 | 39 | add_executable(threeorseven threeorseven.cc vizi.cc ext/sqlitewriter/sqlwriter.cc) 40 | target_link_libraries(threeorseven z common Eigen3::Eigen sqlite3 Threads::Threads) 41 | 42 | add_executable(37learn 37learn.cc vizi.cc ext/sqlitewriter/sqlwriter.cc) 43 | target_link_libraries(37learn z common Eigen3::Eigen sqlite3 Threads::Threads) 44 | 45 | add_executable(basic-autograd basic-autograd.cc) 46 | target_link_libraries(basic-autograd z common Eigen3::Eigen Threads::Threads ) 47 | 48 | add_executable(imagine imagine.cc vizi.cc) 49 | target_link_libraries(imagine z common Eigen3::Eigen Threads::Threads ) 50 | 51 | 52 | add_executable(tensor-gru tensor-gru.cc ) 53 | target_link_libraries(tensor-gru z common Eigen3::Eigen Threads::Threads ) 54 | 55 | add_executable(testrunner testrunner.cc arraytests.cc testfvector.cc grutest.cc tensor2test.cc tensormodtest.cc) 56 | 57 | target_link_libraries(testrunner z common Eigen3::Eigen) 58 | 59 | enable_testing() 60 | add_test(testname testrunner) 61 | 62 | 63 | 64 | #add_executable(first-convo first-convo.cc ) 65 | #target_link_libraries(first-convo z common) 66 | 67 | #add_executable(modular-convo modular-convo.cc ) 68 | #target_link_libraries(modular-convo z common) 69 | 70 | #add_executable(modular-linear-convo modular-linear-convo.cc ) 71 | #target_link_libraries(modular-linear-convo z common) 72 | 73 | 74 | #add_executable(worker-convo worker-convo.cc ) 75 | #target_link_libraries(worker-convo z common) 76 | 77 | #add_executable(worker-convo-avx worker-convo-avx.cc) 78 | #target_link_libraries(worker-convo-avx z common gui GL glfw) 79 | 80 | #add_executable(worker-convo-avx-threaded worker-convo-avx-threaded.cc ) 81 | #target_link_libraries(worker-convo-avx-threaded z common) 82 | 83 | 84 | #add_executable(modular-threaded-convo modular-threaded-convo.cc ) 85 | #target_link_libraries(modular-threaded-convo z common) 86 | 87 | 88 | add_executable(try-convo try-convo.cc vizi.cc) 89 | target_link_libraries(try-convo z common Threads::Threads) 90 | 91 | 92 | #add_executable(los los.cc ) 93 | #target_link_libraries(los z common) 94 | 95 | add_executable(mnistposter mnistposter.cc ) 96 | target_link_libraries(mnistposter z common) 97 | 98 | #add_executable(los2 los2.cc ) 99 | #target_link_libraries(los2 z common) 100 | 101 | #add_executable(los3 los3.cc ) 102 | #target_link_libraries(los3 z common) 103 | 104 | #add_executable(tensor tensor.cc) 105 | #target_link_libraries(tensor z common) 106 | 107 | #add_executable(first-relu first-relu.cc ) 108 | #target_link_libraries(first-relu z common) 109 | 110 | 111 | -------------------------------------------------------------------------------- /cnn-alphabet.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "model.hh" 3 | #include 4 | /* 5 | Model from https://data-flair.training/blogs/handwritten-character-recognition-neural-network/ 6 | */ 7 | 8 | 9 | template 10 | struct CNNAlphabetModel { 11 | NNArray img; 12 | 13 | NNArray scores; 14 | NNArray expected; 15 | 16 | TrackedNumber loss; 17 | 18 | struct State : ModelState 19 | { 20 | // R C K IN OUTLAYERS 21 | Conv2d c1; // -> 26*26 -> max2d -> 13*13 22 | Conv2d c2; // -> -> 11*11 -> max2d -> 6*6 //padding 23 | Conv2d c3; // -> 4*4 -> max2d -> 2*2 24 | // flattened to 512 25 | // IN OUT 26 | Linear fc1; 27 | Linear fc2; 28 | Linear fc3; 29 | 30 | // this has GRAVE issues with copying 31 | State() 32 | { // this has GRAVE issues with copying 33 | d_members={&c1, &c2, &c3, &fc1, &fc2, &fc3}; 34 | } 35 | // this has GRAVE issues with copying 36 | State(const State& rhs) 37 | { 38 | c1=rhs.c1; 39 | c2=rhs.c2; 40 | c3=rhs.c3; 41 | fc1=rhs.fc1; 42 | fc2=rhs.fc2; 43 | fc3=rhs.fc3; 44 | d_members={&c1, &c2, &c3, &fc1, &fc2, &fc3}; 45 | } 46 | 47 | State& operator=(const State& rhs) = delete; 48 | 49 | void reset() 50 | { 51 | for(auto& m : d_members) 52 | m->reset(); 53 | } 54 | 55 | void addGrad(const State& rhs) 56 | { 57 | c1.addGrad(rhs.c1); 58 | c2.addGrad(rhs.c2); 59 | c3.addGrad(rhs.c3); 60 | 61 | fc1.addGrad(rhs.fc1); 62 | fc2.addGrad(rhs.fc2); 63 | fc3.addGrad(rhs.fc3); 64 | } 65 | 66 | void setGrad(const State& rhs, float divisor) 67 | { 68 | c1.setGrad(rhs.c1, divisor); 69 | c2.setGrad(rhs.c2, divisor); 70 | c3.setGrad(rhs.c3, divisor); 71 | 72 | fc1.setGrad(rhs.fc1, divisor); 73 | fc2.setGrad(rhs.fc2, divisor); 74 | fc3.setGrad(rhs.fc3, divisor); 75 | } 76 | 77 | void momentum(const State& rhs, float mom, float dampening=0) 78 | { 79 | c1.momGrad(rhs.c1, mom, dampening); 80 | c2.momGrad(rhs.c2, mom, dampening); 81 | c3.momGrad(rhs.c3, mom, dampening); 82 | 83 | fc1.momGrad(rhs.fc1, mom, dampening); 84 | fc2.momGrad(rhs.fc2, mom, dampening); 85 | fc3.momGrad(rhs.fc3, mom, dampening); 86 | } 87 | 88 | 89 | template 90 | void makeProj(const W& w) 91 | { 92 | fc1.makeProj(w); 93 | fc2.makeProj(w); 94 | fc3.makeProj(w); 95 | c1.makeProj(w); 96 | c2.makeProj(w); 97 | } 98 | 99 | template 100 | void projForward(W& w) const 101 | { 102 | fc1.projForward(w); 103 | fc2.projForward(w); 104 | fc3.projForward(w); 105 | c1.projForward(w); 106 | c2.projForward(w); 107 | } 108 | template 109 | void projBackGrad(const W& w) 110 | { 111 | fc1.projBackGrad(w); 112 | fc2.projBackGrad(w); 113 | fc3.projBackGrad(w); 114 | c1.projBackGrad(w); 115 | c2.projBackGrad(w); 116 | } 117 | 118 | }; 119 | 120 | void init(State& s) 121 | { 122 | img.zero(); 123 | img.setVariable(); 124 | 125 | auto step1 = s.c1.forward(img); 126 | 127 | std::array, 32> step2; 128 | unsigned ctr=0; 129 | for(auto& p : step2) 130 | p = step1[ctr++]. template Max2d<2>().applyFunc(ReluFunc()); 131 | 132 | std::array, 64> step3 = s.c2.forward(step2); 133 | std::array, 64> step4; 134 | 135 | ctr=0; 136 | for(auto& p : step4) { 137 | p = step3[ctr++]. template Max2d<2>().applyFunc(ReluFunc()); 138 | } 139 | 140 | std::array, 128> step5 = s.c3.forward(step4); 141 | std::array, 128> step6; 142 | 143 | ctr=0; 144 | for(auto& p : step6) { 145 | p = step5[ctr++]. template Max2d<2>().applyFunc(ReluFunc()); 146 | } 147 | 148 | NNArray flat = flatten(step6); 149 | auto output = s.fc1.forward(flat); 150 | auto output2 = output.applyFunc(ReluFunc()); 151 | auto output3 = s.fc2.forward(output2).applyFunc(ReluFunc()); 152 | auto output4 = s.fc3.forward(output3).applyFunc(ReluFunc()); 153 | 154 | scores = output4.logSoftMax(); 155 | expected.zero(); 156 | loss = TrackedNumber(0.0) - (expected*scores)(0,0); 157 | } 158 | 159 | }; 160 | -------------------------------------------------------------------------------- /testfvector.cc: -------------------------------------------------------------------------------- 1 | #include "ext/doctest.h" 2 | #include "array.hh" 3 | #include 4 | #include "fvector.hh" 5 | #include "tracked.hh" 6 | using namespace std; 7 | 8 | TEST_CASE("basic fvect4 test") { 9 | fvector<4> val=1234; 10 | CHECK(val.v[0] == 1234); 11 | CHECK(val.v[1] == 1234); 12 | CHECK(val.v[2] == 1234); 13 | CHECK(val.v[3] == 1234); 14 | 15 | CHECK(val.sum() == 4*1234); 16 | 17 | val.v[0]++; 18 | val *= 2; 19 | 20 | CHECK(val.v[0] == 2*1235); 21 | 22 | } 23 | 24 | 25 | TEST_CASE("basic fvect8 test") { 26 | fvector<8> val=1234; 27 | CHECK(val.v[0] == 1234); 28 | CHECK(val.v[1] == 1234); 29 | CHECK(val.v[2] == 1234); 30 | CHECK(val.v[3] == 1234); 31 | 32 | CHECK(val.sum() == 8*1234); 33 | 34 | val.v[0]++; 35 | val *= 2; 36 | 37 | CHECK(val.v[0] == 2*1235); 38 | 39 | } 40 | 41 | TEST_CASE("dev fvect8 test") { 42 | fvector<8> val({1.0, 2.0, 3.0, 4., 5., 6., 7., 8.}); 43 | auto inv = 1.0/val; 44 | 45 | CHECK(inv.v[0]==doctest::Approx(1.0)); 46 | CHECK(inv.v[4]==doctest::Approx(0.2)); 47 | } 48 | 49 | TEST_CASE("fvect8 cmp test") { 50 | fvector<8> x({1,2,3,4,5,6,7,8}); 51 | fvector<8> y({5,4,3,2,1,0,0,0}); 52 | fvector<8> r = x x({1,2,3,4,5,6,7,8}); 67 | fvector<8> y({5,4,3,2,1,0,0,0}); 68 | fvector<8> sum1=0, sum2=0; 69 | fvector<8> res = x> TrackedVec; 87 | TEST_CASE("tracked fvect8 test") { 88 | /* 89 | TrackedVec a = fvector<8>({-3,-2,-1,0,1,2,3,4}); 90 | TrackedVec b = makeFunc(a, ReluFunc()) + a; 91 | 92 | fvector<8> res = b.getVal(); 93 | cout<<"res: "<({-3,-2,-1, 0,1,2,3,4}); 101 | cout< tmp({ 2, -9,-4, 3,2,1,4,7}); 103 | TrackedVec b = fvector<8>(tmp); 104 | 105 | // -3,-2,-1,0,1,2,3,4 106 | //b: 2 -9 -4 3 2 1 4 7 107 | //res: 2 -2 -1 3 2 2 4 7 108 | 109 | 110 | // cout<< "a: "<({3,2,1, 1,1,2,3,4}); 145 | auto res = makeFunc(a, LogFunc()); 146 | cout<<"m: "< a({1,2,3}); 154 | CHECK(a.v[0]==1); 155 | CHECK(a.v[2]==3); 156 | CHECK(a.v[3]==0); 157 | a=1.0; 158 | a=fvector<8>({0}); 159 | CHECK(a.v[5]==0.0); 160 | } 161 | 162 | 163 | TEST_CASE("tanh test") { 164 | TrackedVec x(1.0); 165 | TrackedVec y(1.0); 166 | TrackedVec res = makeFunc(x-y, TanhFunc()); 167 | 168 | CHECK(res.getVal().v[5] == doctest::Approx(0.0)); 169 | res.backward(); 170 | CHECK(x.getGrad().v[4] == doctest::Approx(1.0)); 171 | CHECK(y.getGrad().v[3] == doctest::Approx(-1.0)); 172 | 173 | res.zeroGrad(); 174 | x = 2; 175 | CHECK(res.getVal().v[7] == doctest::Approx(tanhf(1.0))); 176 | res.backward(); 177 | CHECK(x.getGrad().v[0] == doctest::Approx(1 - tanhf(1.0)*tanhf(1.0))); 178 | CHECK(y.getGrad().v[1] == -x.getGrad().v[6]); 179 | 180 | } 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /fvector.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | template 7 | struct fvector 8 | { 9 | float v __attribute__((vector_size (4*W))); 10 | fvector() 11 | {} 12 | fvector(float v0) 13 | { 14 | for(unsigned int n=0 ; n < W; ++n) 15 | v[n] = v0; 16 | } 17 | 18 | // be VERY careful, fvector<8>{0.0} will call this 19 | fvector(const std::initializer_list& in) 20 | { 21 | unsigned char ctr=0; 22 | for(const auto& val: in) 23 | v[ctr++] = val; 24 | for(; ctr < W; ++ctr) 25 | v[ctr] = 0.0; 26 | } 27 | 28 | fvector& operator=(float val) 29 | { 30 | *this = fvector(val); 31 | return *this; 32 | } 33 | 34 | fvector operator+(const fvector& rhs) 35 | { 36 | fvector ret = *this; 37 | ret.v += rhs.v; 38 | return ret; 39 | } 40 | fvector operator-(const fvector& rhs) const 41 | { 42 | fvector ret = *this; 43 | ret.v -= rhs.v; 44 | return ret; 45 | } 46 | fvector operator-() const 47 | { 48 | fvector ret = *this; 49 | ret.v = -ret.v; 50 | return ret; 51 | } 52 | 53 | fvector operator!() const 54 | { 55 | fvector ret = *this; 56 | for(unsigned int n = 0 ; n < W; ++n) 57 | ret.v[n] = !v[n]; 58 | 59 | return ret; 60 | } 61 | 62 | fvector operator*(const float& rhs) const 63 | { 64 | fvector ret = *this; 65 | ret.v *= rhs; 66 | return ret; 67 | } 68 | 69 | fvector operator*(const fvector& rhs) const 70 | { 71 | fvector ret = *this; 72 | ret.v *= rhs.v; 73 | return ret; 74 | } 75 | fvector operator/(const fvector& rhs) const 76 | { 77 | fvector ret = *this; 78 | ret.v /= rhs.v; 79 | return ret; 80 | } 81 | fvector& operator/=(const fvector& rhs) 82 | { 83 | v /= rhs.v; 84 | return *this; 85 | } 86 | fvector& operator*=(float rhs) 87 | { 88 | v *= rhs; 89 | return *this; 90 | } 91 | 92 | fvector& operator*=(const fvector& rhs) 93 | { 94 | v *= rhs.v; 95 | return *this; 96 | } 97 | fvector& operator+=(const fvector& rhs) 98 | { 99 | v += rhs.v; 100 | return *this; 101 | } 102 | fvector& operator+=(float rhs) 103 | { 104 | v += rhs; 105 | return *this; 106 | } 107 | 108 | fvector operator<(const fvector& rhs) const 109 | { 110 | fvector ret; 111 | // you'd think you'd be able to do this: 112 | // ret.v = (v < rhs.v); 113 | // but the output is Weird - if the comparison is valid, the output is 0 (!) 114 | // and if invalid, it is NaN (!!) https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html 115 | 116 | for(unsigned int i = 0 ; i < W; ++i) 117 | ret.v[i] = v[i] < rhs.v[i]; 118 | 119 | return ret; 120 | } 121 | 122 | bool operator==(const fvector& rhs) const 123 | { 124 | for(unsigned int n=0; n < W; ++n) 125 | if(v[n] != rhs.v[n]) 126 | return false; 127 | return true; 128 | } 129 | 130 | float sum() const 131 | { 132 | float ret = 0; 133 | for(unsigned int n=0; n < W; ++n) 134 | ret += v[n]; 135 | return ret; 136 | } 137 | }; 138 | 139 | template 140 | auto operator*(float v, const fvector& rhs) 141 | { 142 | return rhs * v; 143 | } 144 | 145 | template 146 | auto operator/(float v, const fvector& rhs) 147 | { 148 | fvector ret = v; 149 | return ret/rhs; 150 | } 151 | 152 | template 153 | auto operator+(float v, const fvector& rhs) 154 | { 155 | fvector ret = v; 156 | return ret+rhs; 157 | } 158 | 159 | 160 | template 161 | auto exp(const fvector& v) 162 | { 163 | fvector ret; 164 | for(size_t i = 0; i < W ; ++i) { 165 | ret.v[i] = expf(v.v[i]); 166 | } 167 | return ret; 168 | } 169 | 170 | template 171 | auto log(const fvector& v) 172 | { 173 | fvector ret; 174 | for(size_t i = 0; i < W ; ++i) { 175 | ret.v[i] = logf(v.v[i]); 176 | } 177 | return ret; 178 | } 179 | 180 | template 181 | auto tanh(const fvector& v) 182 | { 183 | fvector ret; 184 | for(size_t i = 0; i < W ; ++i) { 185 | ret.v[i] = tanhf(v.v[i]); 186 | } 187 | return ret; 188 | } 189 | 190 | 191 | template 192 | std::ostream& operator<<(std::ostream& os, const fvector& o) 193 | { 194 | for(unsigned int n=0 ; n < W; ++n) 195 | os << o.v[n] <<" "; 196 | return os; 197 | } 198 | /* 199 | template 200 | void setZero(fvector& o) 201 | { 202 | o=0.0; 203 | } 204 | */ 205 | 206 | template 207 | auto maxFunc(const fvector& lhs, const fvector& rhs) 208 | { 209 | fvector ret; 210 | for(unsigned int i = 0 ; i < W; ++i) 211 | ret.v[i] = std::max(lhs.v[i], rhs.v[i]); 212 | return ret; 213 | } 214 | 215 | -------------------------------------------------------------------------------- /tensor-relu.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tensor2.hh" 7 | #include "mnistreader.hh" 8 | #include "misc.hh" 9 | #include 10 | #include "tensor-layers.hh" 11 | #include "ext/sqlitewriter/sqlwriter.hh" 12 | #include "vizi.hh" 13 | using namespace std; 14 | 15 | struct ReluDigitModel { 16 | Tensor img{28,28}; 17 | Tensor scores{10, 1}; 18 | Tensor expected{1,10}; 19 | Tensor loss{1,1}; 20 | struct State : public ModelState 21 | { // IN OUT 22 | Linear lc1; 23 | Linear lc2; 24 | Linear lc3; 25 | 26 | State() 27 | { 28 | this->d_members = {{&lc1, "lc1"}, {&lc2, "lc2"}, {&lc3, "lc3"}}; 29 | } 30 | }; 31 | 32 | void init(State& s) 33 | { 34 | auto output = s.lc1.forward(makeFlatten({img})); 35 | auto output2 = makeFunction(output); 36 | auto output3 = s.lc2.forward(output2); 37 | auto output4 = makeFunction(output3); 38 | auto output5 = s.lc3.forward(output4); 39 | scores = makeLogSoftMax(output5); 40 | loss = -(expected*scores); 41 | } 42 | }; 43 | 44 | template 45 | void testModel(M& m, const MNISTReader& mn, SQLiteWriter& sqw, int batchno, bool full=false) 46 | { 47 | Batcher b(mn.num()); 48 | auto batch = b.getBatch(full ? mn.num() : 128); 49 | float totalLoss=0; 50 | int corrects=0, wrongs=0; 51 | auto topo = m.loss.getTopo(); 52 | 53 | Tensor confusion(10, 10); 54 | 55 | for(const auto& idx : batch) { 56 | m.loss.zerograd(topo); 57 | mn.pushImage(idx, m.img); 58 | int label = mn.getLabel(idx); 59 | m.expected.zero(); 60 | m.expected(0, label) = 1; 61 | 62 | totalLoss += m.loss(0,0); // turns it into a float 63 | 64 | int predicted = m.scores.maxValueIndexOfColumn(0); 65 | 66 | if(corrects + wrongs == 0) { 67 | printImgTensor(m.img); 68 | cout<<"predicted: "<<(int)predicted<<", actual: "< 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "array.hh" 7 | #include "tracked.hh" 8 | #include "mnistreader.hh" 9 | #include "misc.hh" 10 | 11 | using namespace std; 12 | 13 | ofstream g_tree; //("tree.part"); 14 | 15 | struct State { 16 | NNArray img; 17 | NNArray flat; 18 | NNArray output; 19 | int label; 20 | TrackedFloat score; 21 | TrackedFloat expected; 22 | TrackedFloat loss; 23 | void init(NNArray& weights, NNArray& bias) 24 | { 25 | img.zero(); 26 | flat = img.flatViewRow(); 27 | output = weights * flat + bias; 28 | score = makeFunc(output(0,0), SigmoidFunc()); 29 | expected = 0; 30 | loss = (expected - score) * (expected - score); 31 | } 32 | }; 33 | 34 | 35 | template 36 | void scoreModel(W& weights, B& bias, const MNISTReader& mntest) 37 | { 38 | unsigned int corrects=0, wrongs=0; 39 | int threes=0, sevens=0, threepreds=0, sevenpreds=0; 40 | M s; 41 | s.init(weights, bias); 42 | auto topo = s.score.getTopo(); 43 | for(unsigned int i = 0 ; i < mntest.num() - 1; ++i){ 44 | int label = mntest.getLabel(i); 45 | if(label==3) 46 | threes++; 47 | else if(label == 7) 48 | sevens++; 49 | else continue; 50 | 51 | mntest.pushImage(i, s.img); 52 | 53 | int verdict = s.score.getVal() < 0.5 ? 3 : 7; 54 | // cout<<"label "<<(int)label<<" score "< threeseven; 85 | for(unsigned int i = 0 ; i < mn.num(); ++i) { 86 | int label = mn.getLabel(i); 87 | if(label==3 || label == 7) 88 | threeseven.push_back(i); 89 | } 90 | cout<<"Have "< weights; 93 | NNArray bias; 94 | weights.randomize(); 95 | bias.randomize(); 96 | 97 | 98 | cout<<"Configuring network.. "; 99 | cout.flush(); 100 | 101 | vector states; 102 | for(int n=0; n <128; ++n) { 103 | State s; 104 | s.init(weights, bias); 105 | states.push_back(s); 106 | } 107 | TrackedFloat totalLoss=0; 108 | for(auto& s : states) 109 | totalLoss = totalLoss + s.loss; 110 | 111 | cout<<"done: "<< TrackedNumberImp::getCount() <<" nodes"<(weights, bias, mntest); 117 | 118 | auto batch = batcher.getBatch(states.size()); 119 | if(batch.empty()) 120 | break; 121 | 122 | for(size_t i = 0; i < batch.size(); ++i) { 123 | State& s = states[i]; 124 | auto idx = batch[i]; 125 | 126 | mn.pushImage(idx, s.img); 127 | s.label = mn.getLabel(idx); 128 | 129 | if(s.label == 7) 130 | s.expected = 1; 131 | if(s.label == 3) 132 | s.expected = 0; 133 | } 134 | 135 | cout<<"Average loss: "; 136 | cout.flush(); 137 | cout<d_val > 0.5 ? 7 : 3; // get the precalculated number 142 | if(predicted == s.label) 143 | corrects++; 144 | else wrongs++; 145 | if(predicted == 7) 146 | sevenpreds++; 147 | else if(predicted ==3) 148 | threepreds++; 149 | } 150 | cout<<"Percent batch correct: "<<100.0*corrects/(corrects+wrongs)<<" threepreds "< 11 | 12 | #include 13 | #include "cnn1.hh" 14 | #include "cnn-alphabet.hh" 15 | 16 | using TheModel = CNNAlphabetModel; 17 | 18 | using namespace std; 19 | 20 | ofstream g_tree; //("tree.part"); 21 | 22 | template 23 | void scoreModel(S& s, const MNISTReader& mntest, int batchno) 24 | { 25 | unsigned int corrects=0, wrongs=0; 26 | static ofstream vcsv("validation.csv"); 27 | static bool notfirst; 28 | if(!notfirst) { 29 | vcsv<<"batchno,corperc,avgloss\n"; 30 | notfirst=true; 31 | } 32 | 33 | M model; 34 | model.init(s); 35 | 36 | auto topo = model.loss.getTopo(); 37 | Batcher batcher(mntest.num()); 38 | auto batch = batcher.getBatch(100); 39 | double totalLoss=0; 40 | for(auto i : batch) { 41 | cout<<"."; cout.flush(); 42 | int label = mntest.getLabel(i) - 1; 43 | 44 | mntest.pushImage(i, model.img); 45 | model.expected.zero(); 46 | model.expected(0,label) = 1; // "one hot vector" 47 | 48 | int verdict = model.scores.maxValueIndexOfColumn(0); 49 | 50 | if(verdict == label) { 51 | corrects++; 52 | } 53 | else { 54 | wrongs++; 55 | } 56 | totalLoss += model.loss.getVal(); 57 | model.loss.zeroGrad(topo); 58 | } 59 | cout<<"\n"; 60 | double perc = corrects*100.0/(corrects+wrongs); 61 | double avgLoss = totalLoss/batch.size(); 62 | cout< 1) { 80 | cout<<"Loading model state from "< models; 88 | for(int n=0; n < 22; ++n) { 89 | TheModel rm; 90 | rm.init(s); 91 | models.push_back(rm); 92 | cout<<"."; 93 | cout.flush(); 94 | } 95 | 96 | cout<::getCount()<<" instances"<(s, mntest, batchno); 117 | 118 | auto batch = batcher.getBatch(models.size()); 119 | if(batch.size() != models.size()) 120 | break; 121 | vector labels(batch.size()); 122 | for(size_t i = 0; i < batch.size(); ++i) { 123 | TheModel& m = models.at(i); 124 | 125 | auto idx = batch.at(i); 126 | mn.pushImage(idx, m.img); 127 | labels[i] = mn.getLabel(idx) -1; 128 | 129 | m.expected.zero(); 130 | m.expected(0,labels[i]) = 1; // "one hot vector" 131 | } 132 | 133 | cout<<"Average loss: "; 134 | cout.flush(); 135 | cout<(s, mntest, batchno); 174 | } 175 | -------------------------------------------------------------------------------- /tensor-convo.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tensor2.hh" 7 | #include "mnistreader.hh" 8 | #include "misc.hh" 9 | #include "vizi.hh" 10 | #include 11 | #include "tensor-layers.hh" 12 | #include "convo-alphabet.hh" 13 | #include 14 | 15 | using namespace std; 16 | 17 | template 18 | void testModel(SQLiteWriter& sqw, S& s, const MNISTReader& mn, unsigned int startID, int batchno) 19 | { 20 | M m; 21 | m.init(s, true); // production 22 | 23 | Batcher b(mn.num()); 24 | auto batch = b.getBatch(128); 25 | float totalLoss=0; 26 | int corrects=0, wrongs=0; 27 | 28 | auto topo = m.loss.getTopo(); 29 | DTime dt; 30 | dt.start(); 31 | for(const auto& idx : batch) { 32 | m.loss.zerograd(topo); 33 | mn.pushImage(idx, m.img); 34 | // normalize 35 | m.img.normalize(0.172575, 0.25); 36 | 37 | int label = mn.getLabel(idx) - 1; 38 | m.expected.zero(); 39 | m.expected(0, label) = 1; 40 | 41 | totalLoss += m.modelloss(0,0); // turns it into a float 42 | 43 | int predicted = m.scores.maxValueIndexOfColumn(0); 44 | 45 | if(corrects + wrongs == 0) { 46 | printImgTensor(m.img); 47 | cout<<"predicted: "<<(char)(predicted+'a')<<", actual: "<<(char)('a'+label)<<", loss: "<(sqw, s, mntest, startID, batchno); 105 | saveModelState(s, "tensor-convo.state"); 106 | } 107 | if(batchno < 32 || !(tries%32)) { 108 | s.emit(sqw, startID, batchno, batch.size()); 109 | } 110 | dt.start(); 111 | 112 | batchno++; 113 | 114 | float totalLoss = 0, totalWeightsLoss=0; 115 | unsigned int corrects=0, wrongs=0; 116 | 117 | m.loss.zeroAccumGrads(topo); 118 | 119 | for(const auto& idx : batch) { 120 | mn.pushImage(idx, m.img); 121 | // normalize 122 | m.img.normalize(0.172575, 0.25); 123 | 124 | int label = mn.getLabel(idx) -1; // they count from 1 over at NIST! 125 | m.expected.oneHotColumn(label); 126 | 127 | totalLoss += m.modelloss(0,0); // turns it into a float 128 | totalWeightsLoss += m.weightsloss(0,0); 129 | int predicted = m.scores.maxValueIndexOfColumn(0); 130 | 131 | if(corrects + wrongs == 0) { 132 | cout<<"predicted: "<<(char)(predicted+'a')<<", actual: "<<(char)(label+'a')<<", loss: "< 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "array.hh" 7 | #include "tracked.hh" 8 | #include "mnistreader.hh" 9 | #include "misc.hh" 10 | #include 11 | 12 | using namespace std; 13 | 14 | ofstream g_tree;//("tree.part"); 15 | 16 | struct ReluModel { 17 | NNArray img; 18 | 19 | int label; 20 | NNArray scores; 21 | NNArray expected; 22 | 23 | TrackedFloat loss; 24 | 25 | struct State 26 | { 27 | // out in 28 | NNArray w1; 29 | NNArray b1; 30 | 31 | NNArray w2; 32 | NNArray b2; 33 | 34 | NNArray w3; 35 | NNArray b3; 36 | 37 | }; 38 | 39 | void init(State& s) 40 | { 41 | img.zero(); 42 | auto flat = img.flatViewRow(); 43 | 44 | auto output = s.w1 * flat + s.b1; 45 | 46 | auto output2 = output.applyFunc(ReluFunc()); 47 | 48 | auto output3 = s.w2 * output2 + s.b2; 49 | auto output4 = output3.applyFunc(ReluFunc()); 50 | 51 | auto output5 = s.w3 * output4 + s.b3; 52 | 53 | scores = output5.logSoftMax(); 54 | expected.zero(); 55 | loss = TrackedFloat(0)-(expected*scores)(0,0); 56 | } 57 | }; 58 | 59 | template 60 | void scoreModel(S& s, const MNISTReader& mntest) 61 | { 62 | unsigned int corrects=0, wrongs=0; 63 | 64 | M model; 65 | model.init(s); 66 | 67 | unsigned int limit = mntest.num() - 1; 68 | limit=100; 69 | for(unsigned int i = 0 ; i < limit; ++i){ 70 | cout<<"."; 71 | cout.flush(); 72 | int label = mntest.getLabel(i); 73 | 74 | mntest.pushImage(i, model.img); 75 | 76 | int verdict = model.scores.maxValueIndexOfColumn(0); 77 | // cout<<"label "<<(int)label<<" result(0) "< models; 118 | for(int n=0; n <256; ++n) { 119 | ReluModel rm; 120 | rm.init(s); 121 | models.push_back(rm); 122 | cout<<"."; 123 | cout.flush(); 124 | } 125 | 126 | cout<::getCount()<<" instances"<(s, mntest); 144 | 145 | 146 | auto batch = batcher.getBatch(models.size()); 147 | if(batch.empty()) 148 | break; 149 | for(size_t i = 0; i < batch.size(); ++i) { 150 | ReluModel& m = models.at(i); 151 | 152 | auto idx = batch.at(i); 153 | 154 | mn.pushImage(idx, m.img); 155 | 156 | m.label = mn.getLabel(idx); 157 | 158 | m.expected.zero(); 159 | m.expected(0,m.label) = 1; 160 | } 161 | 162 | cout<<"Average loss: "; 163 | cout.flush(); 164 | cout<(s, mntest); 202 | } 203 | 204 | -------------------------------------------------------------------------------- /trackedfuncs.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "fvector.hh" 3 | 4 | struct FuncStruct 5 | { 6 | typedef float (*floatfunc_t)(const float& f); 7 | typedef fvector<4>(*fvect4func_t)(const fvector<4>& f); 8 | typedef fvector<8>(*fvect8func_t)(const fvector<8>& f); 9 | floatfunc_t func; 10 | floatfunc_t deriv; 11 | fvect4func_t func4; 12 | fvect4func_t deriv4; 13 | fvect8func_t func8; 14 | fvect8func_t deriv8; 15 | std::string name; 16 | 17 | template 18 | using retfunc = T(*)(const T&); 19 | }; 20 | 21 | template // primary template 22 | FuncStruct::retfunc getFunc(const FuncStruct&); 23 | 24 | template<> 25 | inline 26 | FuncStruct::retfunc getFunc(const FuncStruct& fs) 27 | { 28 | return fs.func; 29 | } 30 | 31 | template<> 32 | inline 33 | FuncStruct::retfunc> getFunc(const FuncStruct& fs) 34 | { 35 | return fs.func4; 36 | } 37 | 38 | template<> inline 39 | FuncStruct::retfunc> getFunc(const FuncStruct& fs) 40 | { 41 | return fs.func8; 42 | } 43 | 44 | template // primary template 45 | FuncStruct::retfunc getDeriv(const FuncStruct&); 46 | 47 | template<> inline 48 | FuncStruct::retfunc getDeriv(const FuncStruct& fs) 49 | { 50 | return fs.deriv; 51 | } 52 | 53 | template<> inline 54 | FuncStruct::retfunc> getDeriv(const FuncStruct& fs) 55 | { 56 | return fs.deriv4; 57 | } 58 | 59 | 60 | template<> inline 61 | FuncStruct::retfunc> getDeriv(const FuncStruct& fs) 62 | { 63 | return fs.deriv8; 64 | } 65 | 66 | inline FuncStruct MakeExpFunc() 67 | { 68 | FuncStruct ret; 69 | ret.func = [](const float& f) -> float { return expf(f);}; 70 | ret.deriv = [](const float& f) -> float { return expf(f);}; 71 | ret.func4 = [](const fvector<4>& f) { return exp(f);}; 72 | ret.deriv4 = [](const fvector<4>& f) -> fvector<4> { return exp(f);}; 73 | ret.func8 = [](const fvector<8>& f) { return exp(f);}; 74 | ret.deriv8 = [](const fvector<8>& f) -> fvector<8> { return exp(f);}; 75 | ret.name="exp"; 76 | return ret; 77 | } 78 | 79 | inline FuncStruct MakeSquareFunc() 80 | { 81 | FuncStruct ret; 82 | ret.func = [](const float& f) -> float { return f*f;}; 83 | ret.deriv = [](const float& f) -> float { return 2*f;}; 84 | ret.func4 = [](const fvector<4>& f) { return f*f;}; 85 | ret.deriv4 = [](const fvector<4>& f) -> fvector<4> { return 2*f;}; 86 | ret.func8 = [](const fvector<8>& f) { return f*f;}; 87 | ret.deriv8 = [](const fvector<8>& f) -> fvector<8> { return 2*f;}; 88 | ret.name="square"; 89 | return ret; 90 | } 91 | 92 | inline FuncStruct MakeReluFunc() 93 | { 94 | FuncStruct ret; 95 | ret.func = [](const float& f) -> float { return std::max(0.0F, f); }; 96 | ret.deriv = [](const float& f) -> float { return f < 0.0 ? 0.0F : 1.0F; }; 97 | ret.func4 = [](const fvector<4>& in) { 98 | fvector<4> cmp = 0; 99 | return (cmp < in) * in; 100 | }; 101 | ret.deriv4 = [](const fvector<4>& in) { 102 | fvector<4> cmp = 0; 103 | return (cmp < in) * 1.0; 104 | }; 105 | ret.func8 = [](const fvector<8>& in) { 106 | fvector<8> cmp = 0; 107 | return (cmp < in) * in; 108 | }; 109 | ret.deriv8 = [](const fvector<8>& in) { 110 | fvector<8> cmp = 0; 111 | return (cmp < in) * 1.0; 112 | }; 113 | ret.name="relu"; 114 | return ret; 115 | } 116 | 117 | 118 | inline FuncStruct MakeTanhFunc() 119 | { 120 | FuncStruct ret; 121 | ret.func = [](const float& in) { return tanhf(in); }; 122 | ret.deriv = [](const float& in) { float t = tanh(in); return 1-t*t; }; 123 | 124 | ret.func8 = [](const fvector<8>& in) { return tanh(in); }; 125 | ret.deriv8 =[](const fvector<8>& in) { fvector<8> t = tanh(in); return fvector<8>(1.0) - t*t; }; 126 | ret.name = "tanh"; 127 | return ret; 128 | }; 129 | 130 | 131 | inline FuncStruct MakeSigmoidFunc() 132 | { 133 | FuncStruct ret; 134 | ret.func8 = [](const fvector<8>& in) 135 | { 136 | return 1.0F / (fvector<8>(1.0F) + exp(-in)); 137 | }; 138 | ret.deriv8 = [](const fvector<8>& in) 139 | { 140 | fvector<8> sigma = 1.0F / (fvector<8>(1.0F) + exp(-in)); 141 | return sigma * (fvector<8>(1.0F) - sigma); 142 | }; 143 | 144 | ret.func = [](const float& in) 145 | { 146 | return 1.0F / (1.0F + expf(-in)); 147 | }; 148 | ret.deriv = [](const float& in) 149 | { 150 | float sigma = 1.0F / (1.0F + expf(-in)); 151 | return sigma * (1.0F - sigma); 152 | }; 153 | ret.name="sigmoid"; 154 | return ret; 155 | }; 156 | 157 | 158 | inline FuncStruct MakeLogFunc() 159 | { 160 | struct FuncStruct ret; 161 | 162 | ret.func = [](const float& in) 163 | { 164 | if (in == 0.0F) 165 | return -80.0F; 166 | return logf(in); 167 | }; 168 | ret.deriv = [](const float& in) 169 | { 170 | if (in == 0.0F) 171 | return 80.0F; 172 | return 1.0F / in; 173 | }; 174 | 175 | ret.func8 = [](const fvector<8>& in) 176 | { 177 | return log(in); 178 | }; 179 | ret.deriv8 = [](const fvector<8>& in) 180 | { 181 | return 1.0/in; 182 | }; 183 | 184 | ret.name = "log"; 185 | return ret; 186 | }; 187 | 188 | static std::vector g_fss({ 189 | MakeSigmoidFunc(), // 0 190 | MakeReluFunc(), // 1 191 | MakeExpFunc(), // 2 192 | MakeLogFunc(), // 3 193 | MakeTanhFunc(), // 4 194 | MakeSquareFunc()});// 5 195 | 196 | inline uint8_t SigmoidFunc(){ return 0; } 197 | inline uint8_t ReluFunc(){ return 1; } 198 | inline uint8_t ExpFunc(){ return 2; } 199 | inline uint8_t LogFunc(){ return 3; } 200 | inline uint8_t TanhFunc(){ return 4; } 201 | inline uint8_t SquareFunc(){ return 5; } 202 | 203 | 204 | -------------------------------------------------------------------------------- /alternate/modular-linear-convo.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "array.hh" 7 | #include "tracked.hh" 8 | #include "mnistreader.hh" 9 | #include "misc.hh" 10 | #include 11 | 12 | #include 13 | #include "cnn1.hh" 14 | #include "cnn-alphabet.hh" 15 | #include "fvector.hh" 16 | 17 | using TheModel = CNNAlphabetModel>; 18 | 19 | using namespace std; 20 | 21 | ofstream g_tree; //("tree.part"); 22 | 23 | template 24 | void scoreModel(S& s, const MNISTReader& mntest, int batchno) 25 | { 26 | unsigned int corrects=0, wrongs=0; 27 | static ofstream vcsv("validation2.csv"); 28 | static bool notfirst; 29 | if(!notfirst) { 30 | vcsv<<"batchno,corperc,avgloss\n"; 31 | notfirst=true; 32 | } 33 | 34 | M mod; 35 | mod.init(s); 36 | 37 | auto topo = mod.loss.getTopo(); 38 | Batcher batcher(mntest.num()); 39 | auto batch = batcher.getBatch(80); 40 | 41 | fvector<8> totLoss=0.0; 42 | 43 | while(!batch.empty()) { 44 | mod.expected.zero(); 45 | fvector<8> label = 0; 46 | for(int n = 0; n < 8 ; ++n) { 47 | auto idx = batch.front(); 48 | batch.pop_front(); 49 | 50 | mntest.pushImage(idx, mod.img, n); 51 | label.v[n] = mntest.getLabel(idx) - 1; // a == 1.. 52 | 53 | mod.expected(0, label.v[n]).impl->d_val.v[n] = 1; // "one hot vector" 54 | } 55 | 56 | totLoss += mod.loss.getVal(); 57 | 58 | for(int n = 0; n < 8; ++n) { 59 | int verdict = mod.scores.getUnparallel(n).maxValueIndexOfColumn(0); 60 | if(verdict == label.v[n]) 61 | corrects++; 62 | else 63 | wrongs++; 64 | } 65 | mod.loss.zeroGrad(topo); 66 | } 67 | double avgLoss = totLoss.sum()/80.0; 68 | double perc = 100.0*corrects/(corrects+wrongs); 69 | cout< 1) { 87 | cout<<"Loading model state from "<::getCount()<<" instances, model has "<(s, mntest, batchno); 111 | DTime dt; 112 | dt.start(); 113 | auto batch = batcher.getBatch(batchsize); 114 | if(batch.size() != batchsize) 115 | break; 116 | 117 | fvector<8> totLoss=0.0; 118 | int corrects=0, wrongs=0; 119 | 120 | TheModel::State gather; 121 | gather.zeroGrad(); 122 | 123 | while(!batch.empty()) { 124 | mod.expected.zero(); 125 | fvector<8> label=0; 126 | for(int n = 0; n < 8 ; ++n) { 127 | auto idx = batch.front(); 128 | batch.pop_front(); 129 | 130 | mn.pushImage(idx, mod.img, n); 131 | label.v[n] = mn.getLabel(idx) - 1; // a == 1.. 132 | 133 | mod.expected(0, label.v[n]).impl->d_val.v[n] = 1; // "one hot vector" 134 | } 135 | 136 | totLoss += mod.loss.getVal(); 137 | mod.loss.backward(topo); 138 | 139 | gather.addGrad(s); 140 | 141 | for(int n = 0; n < 8; ++n) { 142 | int verdict = mod.scores.getUnparallel(n).maxValueIndexOfColumn(0); 143 | 144 | if(corrects + wrongs == 0) { 145 | cout<<"Predicted: '"<< (char)('a'+verdict)<<"', actual: '"<< (char)('a'+label.v[n]) <<"': "; 146 | if(verdict == label.v[n]) 147 | cout<<"We got it right!"< 8 | struct GRUModel 9 | { 10 | struct State : ModelState 11 | { 12 | // IN HIDDEN 13 | GRULayer gm1; 14 | GRULayer gm2; 15 | GRULayer gm3; 16 | Linear fc; 17 | 18 | State() 19 | { 20 | this->d_members = {{&gm1, "gm1"}, {&gm2, "gm2"}, {&gm3, "gm3"}, {&fc, "fc"}}; 21 | } 22 | 23 | }; 24 | vector> invec; // SYMBOLS 1 25 | vector> expvec; // 1 SYMBOLS 26 | vector> scorevec;// SYMBOLS 1 27 | 28 | Tensor totloss; 29 | 30 | void unroll(State& s, unsigned int choplen) 31 | { 32 | cout<<"Unrolling the GRU"; 33 | totloss= Tensor(1, 1); 34 | totloss(0,0) = 0.0; 35 | Tensor choplent(1,1); 36 | choplent(0,0) = choplen; 37 | 38 | for(size_t i = 0 ; i < choplen; ++i) { 39 | cout<<"."; cout.flush(); 40 | Tensor in(SYMBOLS, 1); 41 | Tensor expected(1,SYMBOLS); 42 | in.zero(); 43 | expected.zero(); 44 | 45 | invec.push_back(in); 46 | expvec.push_back(expected); 47 | auto res1 = s.fc.forward(s.gm3.forward(s.gm2.forward(s.gm1.forward(in)))); 48 | auto score = makeLogSoftMax(res1); 49 | scorevec.push_back(score); 50 | auto loss = -(expected*score); 51 | totloss = totloss + loss; 52 | } 53 | totloss = totloss/choplent; // otherwise the gradient is too high 54 | cout<<"\n"; 55 | } 56 | }; 57 | 58 | int sampleWithTemperature(const Tensor::EigenMatrix& in, float t [[maybe_unused]] = 0) 59 | { 60 | std::random_device rd; 61 | std::mt19937 gen(rd()); 62 | vector ref; 63 | for(int r = 0; r< in.rows();++r) 64 | for(int c = 0; c< in.cols();++c) 65 | ref.push_back(exp(3.0*in(r,c))); 66 | 67 | std::discrete_distribution<> d(ref.begin(), ref.end()); 68 | int pick = d(gen); 69 | 70 | return pick; 71 | } 72 | 73 | int main(int argc, char **argv) 74 | { 75 | constexpr int tokens = 95; 76 | BiMapper bm("corpus.txt",tokens); 77 | constexpr int choplen= 75; 78 | vector sentences=textChopper("corpus.txt", choplen, 10); 79 | cout<<"Got "< grum; 82 | GRUModel::State s; 83 | 84 | vector charset; 85 | for(int n=0; n < tokens;++n) { 86 | cout< 2) { 96 | int len = 1000; 97 | grum.unroll(s, len); 98 | cout<<"Loading state from "<d_val); 115 | // res = grum.scorevec[n].maxValueIndexOfColumn(0); 116 | cout << bm.i2c(res); 117 | } 118 | cout<1) { 126 | cout<<"Loading state from "< 6 | #include 7 | #define GL_SILENCE_DEPRECATION 8 | #if defined(IMGUI_IMPL_OPENGL_ES2) 9 | #include 10 | #endif 11 | #include // Will drag system OpenGL headers 12 | #include 13 | #include "tracked.hh" 14 | using namespace std; 15 | 16 | 17 | 18 | 19 | static void glfw_error_callback(int error, const char* description) 20 | { 21 | fprintf(stderr, "Glfw Error %d: %s\n", error, description); 22 | } 23 | 24 | int graphicsThread() 25 | { 26 | glfwSetErrorCallback(glfw_error_callback); 27 | if (!glfwInit()) 28 | return 1; 29 | 30 | // Decide GL+GLSL versions 31 | #if defined(IMGUI_IMPL_OPENGL_ES2) 32 | // GL ES 2.0 + GLSL 100 33 | const char* glsl_version = "#version 100"; 34 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2); 35 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0); 36 | glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_ES_API); 37 | #elif defined(__APPLE__) 38 | // GL 3.2 + GLSL 150 39 | const char* glsl_version = "#version 150"; 40 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 41 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 2); 42 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only 43 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // Required on Mac 44 | #else 45 | // GL 3.0 + GLSL 130 46 | const char* glsl_version = "#version 130"; 47 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 48 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0); 49 | //glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // 3.2+ only 50 | //glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // 3.0+ only 51 | #endif 52 | 53 | // Create window with graphics context 54 | GLFWwindow* window = glfwCreateWindow(1280, 720, "Hello Deep Learning", NULL, NULL); 55 | if (window == NULL) 56 | return 1; 57 | glfwMakeContextCurrent(window); 58 | glfwSwapInterval(1); // Enable vsync 59 | 60 | // Setup Dear ImGui context 61 | IMGUI_CHECKVERSION(); 62 | ImGui::CreateContext(); 63 | ImPlot::CreateContext(); 64 | ImGuiIO& io = ImGui::GetIO(); (void)io; 65 | //io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable Keyboard Controls 66 | //io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad; // Enable Gamepad Controls 67 | 68 | // Setup Dear ImGui style 69 | ImGui::StyleColorsDark(); 70 | // ImGui::StyleColorsLight(); 71 | 72 | // Setup Platform/Renderer backends 73 | ImGui_ImplGlfw_InitForOpenGL(window, true); 74 | ImGui_ImplOpenGL3_Init(glsl_version); 75 | 76 | io.Fonts->AddFontFromFileTTF("/usr/share/fonts/truetype/msttcorefonts/Comic_Sans_MS.ttf", 18.0f); 77 | 78 | // Our state 79 | ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f); 80 | 81 | shared_ptr hp; 82 | // Main loop 83 | while (!glfwWindowShouldClose(window)) 84 | { 85 | auto hp = *g_hyper; 86 | glfwPollEvents(); 87 | 88 | // Start the Dear ImGui frame 89 | ImGui_ImplOpenGL3_NewFrame(); 90 | ImGui_ImplGlfw_NewFrame(); 91 | ImGui::NewFrame(); 92 | 93 | // ImPlot::ShowDemoWindow(); 94 | { 95 | 96 | ImGui::Begin("Hello, deep learning!"); // Create a window called "Hello, world!" and append into it. 97 | 98 | ImGui::SliderInt("Batch multiple", &hp.batchMult, 1, 32); 99 | ImGui::SliderFloat("Learning rate", &hp.lr, 0.0f, 0.2f); 100 | ImGui::SliderFloat("Momentum", &hp.momentum, 0.0f, 1.0f); 101 | 102 | 103 | if (ImGui::Button("Button")) // Buttons return true when clicked (most widgets return true when edited/activated) 104 | std::cout<<"Clicked!"<::getCount()); 107 | ImGui::Text("Trained %d characters", g_progress.trained.load()); 108 | ImGui::Text("Last batch of %d characters took %.1f seconds", hp.getBatchSize(), g_progress.lastTook); 109 | 110 | if (g_progress.losses.size()>0 && ImPlot::BeginPlot("Loss plot")) { 111 | // ImPlot::SetupAxes(NULL,NULL,ImPlotAxisFlags_AutoFit|ImPlotAxisFlags_RangeFit, ImPlotAxisFlags_AutoFit|ImPlotAxisFlags_RangeFit); 112 | ImPlot::SetupAxes("Batch", "Loss"); 113 | ImPlot::SetupAxisLimits(ImAxis_Y1, 2, 7); 114 | ImPlot::SetupAxis(ImAxis_Y2, "Correct",ImPlotAxisFlags_AuxDefault); 115 | ImPlot::SetupAxisLimits(ImAxis_Y2, 0, 100); 116 | ImPlot::SetAxes(ImAxis_X1, ImAxis_Y1); 117 | ImPlot::PlotLine("Loss", &g_progress.losses[0], g_progress.losses.size()); 118 | ImPlot::SetAxes(ImAxis_X1, ImAxis_Y2); 119 | ImPlot::PlotLine("Corrects", &g_progress.corrects[0], g_progress.corrects.size()); 120 | ImPlot::EndPlot(); 121 | } 122 | 123 | ImGui::End(); 124 | } 125 | 126 | 127 | // Rendering 128 | ImGui::Render(); 129 | int display_w, display_h; 130 | glfwGetFramebufferSize(window, &display_w, &display_h); 131 | glViewport(0, 0, display_w, display_h); 132 | glClearColor(clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w); 133 | glClear(GL_COLOR_BUFFER_BIT); 134 | ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); 135 | 136 | glfwSwapBuffers(window); 137 | auto newhyper = make_shared(hp); 138 | g_hyper.swap(newhyper); 139 | } 140 | 141 | // Cleanup 142 | ImGui_ImplOpenGL3_Shutdown(); 143 | ImGui_ImplGlfw_Shutdown(); 144 | ImPlot::DestroyContext(); 145 | ImGui::DestroyContext(); 146 | 147 | 148 | glfwDestroyWindow(window); 149 | glfwTerminate(); 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /alternate/workertest.cc: -------------------------------------------------------------------------------- 1 | #include "tracked.hh" 2 | #include "array.hh" 3 | #include "ext/doctest.h" 4 | 5 | using namespace std; 6 | 7 | TEST_CASE("basic worker test") { 8 | TrackedFloat a(2.0), b(3.0), c(1.0); 9 | auto d = a * b + c; 10 | a.setVariable(); 11 | float res = d.getVal(); 12 | cout<<"res is: "<(topo); 17 | for(const auto& g : w.dyns) { 18 | cout<<"Setting variable ["<d_val<<"\n"; 19 | w.work[g.first].ourval = g.second->d_val; 20 | } 21 | 22 | 23 | int num=0; 24 | for(const auto& item : w.work) { 25 | cout<< "["<d_val = 1.0; 31 | d.backward(); 32 | d.zeroGrad(); 33 | 34 | cout<<"New value: "<(topo); 57 | float wresult = w.getResult(); 58 | cout<<"Worker result: "< img, weights; 74 | img.randomize(); 75 | weights.randomize(); 76 | weights.needsGrad(); 77 | 78 | auto c = weights * img; 79 | auto sumval = c.sum(); 80 | float res = sumval.getVal(); 81 | sumval.backward(); 82 | img.zeroGrad(); // backward populates grads, when it shouldn't 83 | cout<<"res is: "<(topo); 89 | CHECK(w.dyns.size() == 8); 90 | cout<<"worker res: "< zero; 107 | zero.setZero(); 108 | auto imggrad = img.getGrad(); 109 | CHECK(imggrad == zero); 110 | 111 | cout<<"sizeof: "<< sizeof(w.work[0]) < a; 118 | NNArray b; 119 | a.randomize(); 120 | b.randomize(); 121 | TrackedFloat res = (a * b).sum(); 122 | float nresult = res.getVal(); 123 | 124 | auto topo = res.getTopo(); 125 | auto w=res.getWork(topo); 126 | 127 | auto proj = makeProj(a, w); 128 | 129 | float wresult = w.getResult(); 130 | CHECK(nresult == wresult); 131 | 132 | a(0,0) = a(0,0).getVal() + 1; 133 | res.zeroGrad(); // flushes cache 134 | 135 | projForward(proj, a, w); 136 | cout<<"nresult: "< a; 143 | NNArray b; 144 | a.randomize(); 145 | b.randomize(); 146 | TrackedFloat res = (a * b).sum(); 147 | float nresult = res.getVal(); 148 | 149 | auto topo = res.getTopo(); 150 | auto w=res.getWork>(topo); 151 | 152 | auto proj = makeProj(a, w); 153 | 154 | auto wresult = w.getResult(); 155 | CHECK(nresult == wresult.v[0]); 156 | CHECK(nresult == wresult.v[1]); 157 | CHECK(nresult == wresult.v[2]); 158 | CHECK(nresult == wresult.v[7]); 159 | 160 | a(0,0) = a(0,0).getVal() + 1; 161 | res.zeroGrad(); // flushes cache 162 | 163 | projForward(proj, a, w); 164 | cout<<"nresult: "< a; 173 | NNArray b; 174 | a.zero(); 175 | b.zero(); 176 | a.setVariable(); 177 | b.setVariable(); 178 | 179 | auto c = a+b; 180 | auto res = c(0,0); 181 | float nresult = res.getVal(); 182 | 183 | CHECK(nresult == 0.0); 184 | 185 | auto topo = res.getTopo(); 186 | 187 | auto w = res.getWork(topo); 188 | auto aproj = makeProj(a, w); 189 | auto bproj = makeProj(b, w); 190 | 191 | auto w8=res.getWork>(topo); 192 | 193 | auto wresult = w8.getResult(); 194 | CHECK(wresult.v[0] == 0 ); 195 | CHECK(wresult.v[1] == 0 ); 196 | CHECK(wresult.v[2] == 0 ); 197 | CHECK(wresult.v[3] == 0 ); 198 | 199 | 200 | NNArray, 1, 5> a8; 201 | a8(0,0) = 0; 202 | a8(0,0).impl->d_val.v[0]=1; 203 | a8(0,0).impl->d_val.v[1]=2; 204 | a8(0,0).impl->d_val.v[2]=4; 205 | 206 | 207 | NNArray, 1, 5> b8; 208 | b8(0,0) = 0; 209 | b8(0,0).impl->d_val.v[0]=4; 210 | b8(0,0).impl->d_val.v[1]=5; 211 | b8(0,0).impl->d_val.v[2]=6; 212 | 213 | 214 | res.zeroGrad(); // flushes cache 215 | 216 | projForward(aproj, a8, w8); 217 | projForward(bproj, b8, w8); 218 | 219 | wresult = w8.getResult(); 220 | CHECK(wresult.v[0] == 5); 221 | CHECK(wresult.v[1] == 7); 222 | CHECK(wresult.v[2] == 10); 223 | 224 | 225 | } 226 | -------------------------------------------------------------------------------- /gru.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "layers.hh" 3 | 4 | 5 | // hidden state=> dense linear => output x 6 | 7 | // x is input 8 | // h_t, h_{t-1} = hidden state 9 | 10 | // gate_{reset} = \sigma(W_{input_{reset}} \cdot x_t + W_{hidden_{reset}} \cdot h_{t-1}) 11 | 12 | // W_input_reset - ^^ normal matrix products 13 | // W_input_hidden 14 | 15 | // pytorch: 16 | 17 | // r_t = σ(W_{ir} x_t + b_{ir}+W_{hr}h_{t−1} +b_{hr}) // reset gate 18 | // z_t = σ(W_{iz} x_t + b_{iz} +W_{hz}h_{t−1} + b_{hz}) // update 19 | // n_t = tanh(W_{in}x_t+b_{in}+ r_t*(W_{hn} h_{t−1} + b_{hn})) // "new" - * is dotproduct 20 | // h_t=(1−z_t)*n_t+z_t*h_{t−1)} // new h 21 | 22 | // the hidden state is also the output, which needs linear combination to turn into input size again 23 | // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html 24 | template