├── img ├── boxed.png └── learning.mp4 ├── samples ├── test1.jpg └── tensor-convo-20230122-final-gelu.state ├── .gitignore ├── basic-autograd.cc ├── vizi.hh ├── textsupport.hh ├── model.hh ├── LICENSE ├── los3.cc ├── try-convo.cc ├── tensormodtest.cc ├── imagine.cc ├── .github └── workflows │ └── cmake.yml ├── mnistreader.hh ├── mnistposter.cc ├── grutest.cc ├── textsupport.cc ├── convo-alphabet.hh ├── misc.hh ├── mnistreader.cc ├── cnn1.hh ├── 37learn.cc ├── vizi.cc ├── gru-layer.hh ├── ext └── sqlitewriter │ ├── sqlwriter.hh │ └── sqlwriter.cc ├── threeorseven.cc ├── CMakeLists.txt ├── cnn-alphabet.hh ├── testfvector.cc ├── fvector.hh ├── tensor-relu.cc ├── alternate ├── tensor.cc ├── modular-convo.cc ├── first-relu.cc ├── modular-linear-convo.cc ├── workertest.cc ├── modular-threaded-convo.cc ├── worker-convo-avx-threaded.cc ├── worker-convo.cc └── worker-convo-avx.cc ├── tensor-convo.cc ├── trackedfuncs.hh ├── tensor-gru.cc ├── ui.cc ├── gru.hh ├── los.cc ├── los2.cc ├── testrunner.cc ├── hello-dl.cc ├── layers.hh └── tensor-layers.hh /img/boxed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/img/boxed.png -------------------------------------------------------------------------------- /img/learning.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/img/learning.mp4 -------------------------------------------------------------------------------- /samples/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/samples/test1.jpg -------------------------------------------------------------------------------- /samples/tensor-convo-20230122-final-gelu.state: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/berthubert/hello-dl/main/samples/tensor-convo-20230122-final-gelu.state -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | hello-dl 3 | # Prerequisites 4 | *.d 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | *.obj 11 | 12 | # Precompiled Headers 13 | *.gch 14 | *.pch 15 | 16 | # Compiled Dynamic libraries 17 | *.so 18 | *.dylib 19 | *.dll 20 | 21 | # Fortran module files 22 | *.mod 23 | *.smod 24 | 25 | # Compiled Static libraries 26 | *.lai 27 | *.la 28 | *.a 29 | *.lib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | -------------------------------------------------------------------------------- /basic-autograd.cc: -------------------------------------------------------------------------------- 1 | #include "tensor2.hh" 2 | #include 3 | using namespace std; 4 | 5 | int main() 6 | { 7 | #if 0 8 | Tensor x(2.0f); 9 | Tensor z(0.0f); 10 | Tensor a(1.0f); 11 | Tensor y = x * (z + a); 12 | y(0,0); 13 | y.backward(); 14 | 15 | #else 16 | Tensor x(2.0f); 17 | Tensor z(0.0f); 18 | Tensor y = Tensor(3.0f)*x*x*x + Tensor(4.0f)*x + Tensor(1.0f) + x*z; 19 | y(0,0); 20 | y.backward(); 21 | cout << "y = "<< y << endl; // 3*8 + 4*2 + 1 = 33 22 | 23 | 24 | 25 | cout << "dy/dx = " << x.getGrad() << endl; // 9*x^2 + 4 = 40 26 | cout << "dy/dz = " << z.getGrad() << endl; // 2 27 | #endif 28 | } 29 | -------------------------------------------------------------------------------- /vizi.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "tensor2.hh" 5 | #include "ext/stb/stb_truetype.h" 6 | 7 | template 8 | void printImgTensor(const T& img) 9 | { 10 | for(unsigned int y=0; y < img.getRows(); ++y) { 11 | for(unsigned int x=0; x < img.getCols(); ++x) { 12 | float val = img(y,x); 13 | if(val > 0.5) 14 | std::cout<<'X'; 15 | else if(val > 0.25) 16 | std::cout<<'*'; 17 | else if(val > 0.125) 18 | std::cout<<'.'; 19 | else 20 | std::cout<<' '; 21 | } 22 | std::cout<<'\n'; 23 | } 24 | std::cout<<"\n"; 25 | } 26 | 27 | struct FontWriter 28 | { 29 | FontWriter(); 30 | void writeChar(char ch, int s, int c, int r, std::function f); 31 | stbtt_fontinfo d_font; 32 | std::vector d_ttf_buffer; 33 | }; 34 | 35 | void saveTensor(const Tensor& t, const std::string& fname, int size, bool monochrome=false); 36 | -------------------------------------------------------------------------------- /textsupport.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | std::vector textChopper(const char* fname, size_t siz, int mult=1); 8 | 9 | class BiMapper 10 | { 11 | public: 12 | explicit BiMapper(const char* fname, int lim=-1); 13 | int c2i(char c) const 14 | { 15 | auto iter = d_c2i.find(c); 16 | if(iter == d_c2i.end()) { 17 | // std::cout<<("Attempting to find unknown character with value '"+std::to_string((int)c)+"'")<second; 21 | } 22 | char i2c(int i) const 23 | { 24 | auto iter = d_i2c.find(i); 25 | if(iter == d_i2c.end()) { 26 | // std::cout<<("Attempting to find unknown integer "+std::to_string(i))<second; 30 | } 31 | 32 | private: 33 | std::unordered_map d_c2i; 34 | std::unordered_map d_i2c; 35 | }; 36 | 37 | 38 | -------------------------------------------------------------------------------- /model.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "layers.hh" 3 | #include 4 | 5 | struct ModelState 6 | { 7 | std::vector d_members; 8 | void save(std::ostream& out) const 9 | { 10 | for(const auto& mem : d_members) 11 | mem->save(out); 12 | } 13 | void save(std::string& out) const 14 | { 15 | std::ostringstream os; 16 | for(const auto& mem : d_members) 17 | mem->save(os); 18 | 19 | out=os.str(); 20 | } 21 | 22 | void load(std::istream& in) 23 | { 24 | for(auto& mem : d_members) 25 | mem->load(in); 26 | } 27 | 28 | void load(std::string& in) 29 | { 30 | std::istringstream is(in); 31 | load(is); 32 | } 33 | 34 | void learn(float lr) 35 | { 36 | for(auto& mem : d_members) 37 | mem->learn(lr); 38 | } 39 | 40 | void zeroGrad() 41 | { 42 | for(auto& mem : d_members) 43 | mem->zeroGrad(); 44 | } 45 | 46 | uint32_t size() 47 | { 48 | size_t ret = 0; 49 | for(auto& mem : d_members) 50 | ret += mem->size(); 51 | return ret; 52 | } 53 | 54 | }; 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 bert hubert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /los3.cc: -------------------------------------------------------------------------------- 1 | #define EIGEN_USE_THREADS 2 | #include 3 | #include 4 | #include "tensor2.hh" 5 | #include "misc.hh" 6 | 7 | #include 8 | using namespace std; 9 | 10 | int main(int argc, char**argv) 11 | { 12 | DTime dt; 13 | dt.start(); 14 | float t=0; 15 | for(unsigned int n=0; n < 10000/64; ++n) { 16 | Eigen::Tensor input(64,28, 28); 17 | Eigen::Tensor kernel(3, 3); 18 | Eigen::Tensor output(64, 26, 26); 19 | input.setRandom(); 20 | kernel.setRandom(); 21 | 22 | Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. 23 | output = input.convolve(kernel, dims); 24 | t+=output(0,0,0); 25 | } 26 | cout<<"t: "< 2 | #include 3 | #include "mnistreader.hh" 4 | #include "misc.hh" 5 | #include 6 | #include 7 | #include "tensor-layers.hh" 8 | #include "vizi.hh" 9 | 10 | using namespace std; 11 | 12 | int main(int argc, char** argv) 13 | { 14 | if(argc < 2) { 15 | cerr<<"Syntax: try-convo index"< convo; 30 | auto& f1 = convo.d_filters[0]; 31 | f1(0,0) = -1; f1(0,1) = -1; f1(0,2)=1; 32 | f1(1,0) = -1; f1(0,1) = -1; f1(1,2)=1; 33 | f1(2,0) = 1; f1(2,1) = 1; f1(2,2)=1; 34 | 35 | convo.d_bias[0](0,0) = 0; 36 | 37 | Tensor out = convo.forward(img)[0].makeMax2d(2); 38 | out(0,0); 39 | cout<<"out:\n"< 4 | #include 5 | #include "misc.hh" 6 | #include 7 | #include 8 | #include 9 | #include "tensor-layers.hh" 10 | using namespace std; 11 | 12 | 13 | TEST_CASE("tensor model load save test") { 14 | struct TestState : ModelState 15 | { 16 | Conv2d d_lc1; // -> 26*26 -> max2d -> 13*13 17 | Linear d_fc1; 18 | TestState() 19 | { 20 | d_members = {{&d_lc1, "lc1"}, {&d_fc1, "fc1"}}; 21 | } 22 | }; 23 | 24 | TestState ts; 25 | ts.randomize(); 26 | 27 | saveModelState(ts, "tensormodetest-test.state"); 28 | 29 | TestState ts2; 30 | ts2.randomize(); // just to confuse things 31 | loadModelState(ts2, "tensormodetest-test.state"); 32 | 33 | CHECK(ts.d_lc1.d_filters[16](2,2) == ts2.d_lc1.d_filters[16](2,2)); 34 | CHECK(ts.d_fc1.d_weights(30,32) == ts2.d_fc1.d_weights(30,32)); 35 | } 36 | 37 | 38 | TEST_CASE("max2d array") { 39 | std::array, 3> in; 40 | for(unsigned int n=0; n < in.size(); ++n) { 41 | in[n]=Tensor(4,4); 42 | in[n].iota(n); 43 | } 44 | auto res = Max2dfw(in, 2); 45 | // 0 1 2 3 46 | // 4 5 6 7 47 | // 8 9 10 11 48 | // 12 13 14 15 49 | 50 | CHECK(res[0](0,0) == 5); 51 | CHECK(res[0](0,1) == 7); 52 | 53 | CHECK(res[1](0,0) == 6); 54 | CHECK(res[1](0,1) == 8); 55 | 56 | CHECK(res[2](1,0) == 15); 57 | CHECK(res[2](1,1) == 17); 58 | } 59 | 60 | -------------------------------------------------------------------------------- /imagine.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "tensor-layers.hh" 6 | #include "convo-alphabet.hh" 7 | #include "vizi.hh" 8 | #include "mnistreader.hh" 9 | using namespace std; 10 | 11 | 12 | int main(int argc, char** argv) 13 | { 14 | if(argc < 3) { 15 | cout<<"Syntax: imagine fromletter toletter modelname"<d_val = m.img.d_imp -> d_val.unaryExpr([](float v) { return fabs(v); }); 39 | */ 40 | 41 | m.img.normalize(0.172575, 0.25); 42 | 43 | auto specscore = m.scores.makeSlice(tolabel, 0, 1, 1); 44 | auto topo = specscore.getTopo(); 45 | for(unsigned int tries = 0 ; tries < 10000; ++tries) { 46 | cout<d_val += grad; 51 | 52 | if(!(tries %4)) 53 | saveTensor(m.img, "imagine-"+to_string(tries)+".png", 252, true); 54 | specscore.zerograd(topo); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 11 | BUILD_TYPE: Release 12 | 13 | 14 | jobs: 15 | build: 16 | # The CMake configure and build commands are platform agnostic and should work equally 17 | # well on Windows or Mac. You can convert this to a matrix build if you need 18 | # cross-platform coverage. 19 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - name: Install libraries 26 | run: sudo apt-get install libeigen3-dev libz-dev libsqlite3-dev 27 | 28 | - name: Configure CMake 29 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 30 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 31 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 32 | 33 | - name: Build 34 | # Build your program with the given configuration 35 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 36 | 37 | - name: Test 38 | working-directory: ${{github.workspace}}/build 39 | # Execute tests defined by the CMake configuration. 40 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 41 | run: ctest -C ${{env.BUILD_TYPE}} 42 | 43 | -------------------------------------------------------------------------------- /mnistreader.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | //#include "array.hh" 6 | //#include "fvector.hh" 7 | 8 | #include "tensor2.hh" 9 | class MNISTReader 10 | { 11 | public: 12 | MNISTReader(const std::string& images, const std::string& labels); 13 | unsigned int num() const 14 | { 15 | return d_num; 16 | } 17 | std::vector getImage(int n) const; 18 | const std::vector& getImageFloat(int n) const 19 | { 20 | if(auto iter = d_converted.find(n); iter != d_converted.end()) 21 | return iter->second; 22 | else 23 | throw std::runtime_error("Could not find image "+std::to_string(n)); 24 | } 25 | 26 | template 27 | void pushImage(int n, Tensor& dest) const 28 | { 29 | assert(dest.d_imp && dest.d_imp->d_mode == TMode::Parameter); 30 | const auto& src = getImageFloat(n); 31 | for(int row=0 ; row < 28; ++row) 32 | for(int col=0 ; col < 28; ++col) 33 | dest(row, col) = src.at(row+28*col); 34 | } 35 | /* 36 | template 37 | void pushImage(int n, NNArray& dest, int idx) const 38 | { 39 | const auto& src = getImageFloat(n); 40 | for(int row=0 ; row < 28; ++row) 41 | for(int col=0 ; col < 28; ++col) { 42 | if(!dest(row,col).impl) // XXX FUGLY 43 | dest(row, col) = 0; 44 | dest(row, col).impl->d_val.v[idx] = src.at(row+28*col); 45 | } 46 | } 47 | */ 48 | 49 | char getLabel(int n) const; 50 | private: 51 | std::vector d_images; 52 | std::vector d_labels; 53 | unsigned int d_rows, d_cols, d_stride, d_num; 54 | std::unordered_map> d_converted; 55 | }; 56 | -------------------------------------------------------------------------------- /mnistposter.cc: -------------------------------------------------------------------------------- 1 | 2 | #define STB_IMAGE_WRITE_IMPLEMENTATION 3 | #include "ext/stb/stb_image_write.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "mnistreader.hh" 10 | #include 11 | #include "misc.hh" 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char **argv) 16 | { 17 | int filt=-1; 18 | if(argc == 2) 19 | filt= 1 + argv[1][0] - 'a'; 20 | 21 | feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); 22 | 23 | MNISTReader mn("gzip/emnist-letters-train-images-idx3-ubyte.gz", "gzip/emnist-letters-train-labels-idx1-ubyte.gz"); 24 | //MNISTReader mn("gzip/emnist-letters-test-images-idx3-ubyte.gz", "gzip/emnist-letters-test-labels-idx1-ubyte.gz"); 25 | 26 | cout<<"Have "< out; 30 | out.resize(imgcols*imgrows); 31 | auto pix = [&out, &imgrows, &imgcols](int col, int row) -> uint8_t& 32 | { 33 | return out[col + row*imgcols]; 34 | }; 35 | 36 | int count=0; 37 | Batcher batcher(mn.num()); 38 | for(;;) { 39 | auto b = batcher.getBatch(1); 40 | if(b.empty()) 41 | break; 42 | int n=b[0]; 43 | if(filt >=0 && mn.getLabel(n) != filt) 44 | continue; 45 | 46 | Tensor img(28,28); 47 | mn.pushImage(n, img); 48 | 49 | int x = 30 * (count % (imgcols/30 - 1)); // this many per row 50 | int y = 30 * (count / (imgcols/30 - 1)); 51 | count++; 52 | 53 | if(x+30 >= imgcols || y+30 >= imgrows) 54 | break; 55 | 56 | for(unsigned int r=0; r < img.getRows(); ++r) 57 | for(unsigned int c=0; c < img.getCols(); ++c) 58 | pix(x+c, y+r) = 255 - img(r,c)*255; 59 | } 60 | stbi_write_png("poster.png", imgcols, imgrows, 1, &out[0], imgcols); 61 | } 62 | -------------------------------------------------------------------------------- /grutest.cc: -------------------------------------------------------------------------------- 1 | #include "ext/doctest.h" 2 | #include "tracked.hh" 3 | #include "gru.hh" 4 | #include 5 | 6 | using namespace std; 7 | 8 | // a sequence needs to be dragged through 9 | #if 0 10 | TEST_CASE("single GRU") { 11 | GRULayer gm; 12 | // cout<<"gm.size(): "< fc; 14 | // cout<<"fc.size(): "< in; 17 | NNArray expected; 18 | 19 | std::string input = "hellothisisbert"; 20 | std::string output; 21 | TrackedFloat totloss=0.0; 22 | for(size_t pos = 0 ; pos < input.size() - 1; ++pos) { 23 | in.zero(); in(input.at(pos)-'a', 0) = 1.0; 24 | 25 | expected.zero(); expected(0, input.at(pos+1)-'a') = 1.0; 26 | auto res1 = fc.forward(gm.forward(in)); 27 | auto score = res1.logSoftMax(); 28 | auto loss = TrackedNumber(0.0) - (expected*score)(0,0); 29 | totloss = totloss + loss; 30 | output.append(1, 'a' + score.maxValueIndexOfColumn(0)); 31 | // cout<<"score: "< gm1, gm2, gm3; 41 | // cout<<"gm.size(): "< fc1, fc2, fc3; 43 | 44 | NNArray in1, in2, in3; 45 | in1.zero(); in1(3,0) = 1.0; 46 | in2.zero(); in2(1,0) = 1.0; 47 | in3.zero(); in3(4,0) = 1.0; 48 | auto res1 = gm1.forward(in1); 49 | gm2.d_prevh = res1; 50 | 51 | auto res2 = gm2.forward(in2); 52 | gm3.d_prevh = res2; 53 | 54 | auto res3 = gm3.forward(in3); 55 | 56 | cout<<"GRU: "< 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | vector textChopper(const char* fname, size_t siz, int mult) 13 | { 14 | ifstream ifs(fname); 15 | 16 | vector buffer(1024000); 17 | string total; 18 | while(!ifs.eof()) { 19 | ifs.read(&buffer[0], buffer.size()); 20 | total.append(&buffer[0], &buffer[ifs.gcount()]); 21 | } 22 | buffer.clear(); 23 | unsigned int pieces = mult*total.size()/siz; 24 | vector ret; 25 | ret.reserve(pieces); 26 | 27 | std::random_device rd; //Will be used to obtain a seed for the random number engine 28 | std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() 29 | std::uniform_int_distribution<> distrib(0, total.size() - siz -1); 30 | 31 | for(unsigned int n = 0 ; n < pieces; ++n) { 32 | ret.push_back(total.substr(distrib(gen), siz)); 33 | for(auto& c : *ret.rbegin()) 34 | if(c=='\n' || c=='\t') c=' '; 35 | 36 | } 37 | 38 | return ret; 39 | } 40 | 41 | 42 | BiMapper::BiMapper(const char* fname, int lim) 43 | { 44 | ifstream ifs(fname); 45 | std::array a; 46 | std::unordered_map popcount; 47 | while(!ifs.eof()) { 48 | ifs.read((char*)&a[0], a.size()); 49 | for(const auto& c : a) { 50 | //cout<> revcount; 56 | for(const auto& p : popcount) 57 | revcount.push_back(p); 58 | sort(revcount.begin(), revcount.end(), [](const auto& a, const auto& b) { 59 | return b.second < a.second; 60 | }); 61 | 62 | if(lim >= 0 && revcount.size() > (unsigned int)lim) 63 | revcount.resize(lim); 64 | 65 | for(unsigned int n=0; n < revcount.size(); ++n) { 66 | d_c2i[revcount[n].first] = n; 67 | d_i2c[n]=revcount[n].first; 68 | // cout<<(char)revcount[n].first <<" -> "< img{28,28}; 5 | Tensor scores{26, 1}; 6 | Tensor expected{1,26}; 7 | Tensor modelloss{1,1}; 8 | Tensor weightsloss{1,1}; 9 | Tensor loss{1,1}; 10 | 11 | struct State : public ModelState 12 | { 13 | // r_in c k c_i c_out 14 | Conv2d c1; // -> 26*26 -> max2d -> 13*13 15 | Conv2d c2; // -> -> 11*11 -> max2d -> 6*6 //padding 16 | Conv2d c3; // -> 4*4 -> max2d -> 2*2 17 | // flattened to 512 (128*2*2) 18 | // IN OUT 19 | Linear fc1; 20 | Linear fc2; 21 | Linear fc3; 22 | 23 | State() 24 | { 25 | this->d_members = {{&c1, "c1"}, {&c2, "c2"}, {&c3, "c3"}, {&fc1, "fc1"}, {&fc2, "fc2"}, {&fc3, "fc3"}}; 26 | } 27 | }; 28 | 29 | void init(State& s, bool production=false) 30 | { 31 | using ActFunc = GeluFunc; 32 | 33 | img.zero(); 34 | img.d_imp->d_nograd=true; 35 | 36 | auto step1 = s.c1.forward(img); // -> 26x26, 32 layers 37 | auto step2 = Max2dfw(step1, 2); // -> 13x13 38 | auto step3 = s.c2.forward(step2); // -> 11x11, 64 layers 39 | auto step4 = Max2dfw(step3, 2); // -> 6x6 (padding) 40 | auto step5 = s.c3.forward(step4); // -> 4x4, 128 layers 41 | auto step6 = Max2dfw(step5, 2); // -> 2x2 42 | auto flat = makeFlatten(step6); // -> 512x1 43 | auto output = s.fc1.forward(flat); // -> 64 44 | auto output1 = production ? output : output.makeDropout(0.5); 45 | auto output2 = makeFunction(output1); 46 | auto output3 = makeFunction(s.fc2.forward(output2)); // -> 128 47 | // auto output4 = makeFunction(s.fc3.forward(output3)); // -> 26 48 | auto output4 = s.fc3.forward(output3); // -> 26 49 | scores = makeLogSoftMax(output4); 50 | modelloss = -(expected*scores); 51 | 52 | Tensor fact(1,1); 53 | fact(0,0) = 0.02; 54 | weightsloss = fact*(s.c1.SquaredWeightsSum() + s.c2.SquaredWeightsSum() + s.c3.SquaredWeightsSum() + 55 | s.fc1.SquaredWeightsSum() + s.fc1.SquaredWeightsSum() + s.fc1.SquaredWeightsSum()); 56 | 57 | loss = modelloss; // + weightsloss; 58 | } 59 | }; 60 | -------------------------------------------------------------------------------- /misc.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct HyperParameters 14 | { 15 | float lr; 16 | float momentum; 17 | int batchMult; 18 | unsigned int getBatchSize() 19 | { 20 | return 8*batchMult; 21 | } 22 | }; 23 | 24 | struct TrainingProgress 25 | { 26 | int batchno=0; 27 | float lastTook=0; 28 | std::vector losses; 29 | std::vector corrects; 30 | std::atomic trained=0; 31 | }; 32 | 33 | extern struct TrainingProgress g_progress; 34 | extern std::shared_ptr g_hyper; 35 | int graphicsThread(); 36 | 37 | class Batcher 38 | { 39 | public: 40 | explicit Batcher(int n, std::optional rng=std::optional()) 41 | { 42 | for(int i=0; i < n ; ++i) 43 | d_store.push_back(i); 44 | 45 | randomize(rng); 46 | } 47 | 48 | explicit Batcher(const std::vector& in) 49 | { 50 | for(const auto& i : in) 51 | d_store.push_back(i); 52 | randomize(); 53 | } 54 | 55 | auto getBatch(int n) 56 | { 57 | std::deque ret; 58 | for(int i = 0 ; !d_store.empty() && i < n; ++i) { 59 | ret.push_back(d_store.front()); 60 | d_store.pop_front(); 61 | } 62 | return ret; 63 | } 64 | 65 | auto getBatchLocked(int n) 66 | { 67 | std::deque ret; 68 | std::lock_guard l(d_mut); 69 | for(int i = 0 ; !d_store.empty() && i < n; ++i) { 70 | ret.push_back(d_store.front()); 71 | d_store.pop_front(); 72 | } 73 | return ret; 74 | } 75 | 76 | private: 77 | std::deque d_store; 78 | std::mutex d_mut; 79 | void randomize(std::optional rnd = std::optional()) 80 | { 81 | if(rnd) { 82 | std::shuffle(d_store.begin(), d_store.end(), *rnd); 83 | } 84 | else { 85 | std::random_device rd; 86 | std::mt19937 g(rd()); 87 | std::shuffle(d_store.begin(), d_store.end(), g); 88 | } 89 | } 90 | 91 | }; 92 | 93 | 94 | struct DTime 95 | { 96 | void start() 97 | { 98 | d_start = std::chrono::steady_clock::now(); 99 | } 100 | uint32_t lapUsec() 101 | { 102 | auto usec = std::chrono::duration_cast(std::chrono::steady_clock::now()- d_start).count(); 103 | start(); 104 | return usec; 105 | } 106 | 107 | std::chrono::time_point d_start; 108 | }; 109 | -------------------------------------------------------------------------------- /mnistreader.cc: -------------------------------------------------------------------------------- 1 | #include "mnistreader.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zlib.h" 7 | #include 8 | 9 | using namespace std; 10 | 11 | static auto safeOpen(const std::string& fname) 12 | { 13 | gzFile fp = gzopen(fname.c_str(), "rb"); 14 | if(!fp) 15 | throw runtime_error("Error opening file "+fname+": "+strerror(errno)); 16 | return fp; 17 | } 18 | 19 | MNISTReader::MNISTReader(const std::string& images, const std::string& labels) 20 | { 21 | struct idx1header 22 | { 23 | uint32_t magic; 24 | uint32_t num; 25 | } __attribute__((packed)); 26 | 27 | struct idx3header 28 | { 29 | uint32_t magic; 30 | uint32_t num; 31 | uint32_t rows; 32 | uint32_t cols; 33 | } __attribute__((packed)); 34 | 35 | auto imgfp = safeOpen(images); 36 | auto labelsfp = safeOpen(labels); 37 | 38 | idx1header i1h; 39 | idx3header i3h; 40 | if(gzfread(&i1h, sizeof(idx1header), 1, labelsfp) != 1) 41 | throw std::runtime_error("Label file too short"); 42 | if(gzfread(&i3h, sizeof(idx3header), 1, imgfp) != 1) 43 | throw std::runtime_error("Images file too short"); 44 | 45 | i1h.magic = htonl(i1h.magic); 46 | i1h.num = htonl(i1h.num); 47 | 48 | i3h.magic = htonl(i3h.magic); 49 | i3h.num = htonl(i3h.num); 50 | i3h.rows = htonl(i3h.rows); 51 | i3h.cols = htonl(i3h.cols); 52 | 53 | d_rows = i3h.rows; 54 | d_cols = i3h.cols; 55 | d_stride = d_rows * d_cols; 56 | d_num = i3h.num; 57 | if(i1h.magic != 2049) 58 | throw runtime_error("Magic value of labels file wrong "+to_string(i1h.magic)); 59 | if(i3h.magic != 2051) 60 | throw runtime_error("Magic value of images file wrong "+to_string(i3h.magic)); 61 | 62 | if(i3h.num != i1h.num) 63 | throw runtime_error("Mismatch between number of labels and number of images"); 64 | 65 | d_images.resize(i3h.num*i3h.cols*i3h.rows); 66 | if(gzfread((char*)&d_images[0], i3h.cols*i3h.rows, i3h.num, imgfp) != i3h.num) 67 | throw runtime_error("Could not read all "+to_string(i3h.num)+" images"); 68 | 69 | d_labels.resize(i3h.num); 70 | if(gzfread((char*)&d_labels[0], 1, i3h.num, labelsfp) != i3h.num) 71 | throw runtime_error("Could not read all "+to_string(i3h.num)+" labels"); 72 | 73 | gzclose(imgfp); 74 | gzclose(labelsfp); 75 | 76 | vector tmp(28*28); 77 | for(unsigned int n=0 ; n < d_num; ++n) { 78 | unsigned int pos = n * d_stride; 79 | for(unsigned int i=0; i < d_stride; ++i) { 80 | tmp.at(i) = d_images.at(pos+i)/256.0; 81 | } 82 | d_converted[n]=tmp; 83 | } 84 | } 85 | 86 | vector MNISTReader::getImage(int n) const 87 | { 88 | unsigned int pos = n*d_rows*d_cols; 89 | vector ret(&d_images.at(pos), &d_images.at(pos + d_rows*d_cols)); 90 | return ret; 91 | } 92 | 93 | char MNISTReader::getLabel(int n) const 94 | { 95 | return d_labels.at(n); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /cnn1.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tracked.hh" 3 | #include "layers.hh" 4 | 5 | /* 6 | Gratefully copied from 'mnist.cpp' in the PyTorch example repository 7 | https://github.com/pytorch/examples/blob/main/cpp/mnist/mnist.cpp 8 | 9 | This model takes MNIST 28*28 input and: 10 | 11 | * normalizes to "0.1307, 03081", torch::data::transforms::Normalize<>(0.1307, 0.3081) 12 | 13 | * applies a 5*5 kernel convolution `conv1`, configured to emit 10 layers, 24*24 14 | * does max_pool2d on these, which takes non-overlapping 2*2 rectangles 15 | and emits max value per rectangle. Delivers 12*12 values for each layer 16 | * ReLu 17 | * does another 5x5 convolution `conv2` on the 10 layers, turning them into 20 layers of 8*8 18 | * randomly *zeroes* half of the 20 layers `conv2_drop` - no state, Bernoulli 19 | STILL MISSING! 20 | * another max_pool2d, 4*4*20 layers 21 | * ReLu 22 | * flatten to 320 values 23 | * linear combination 320x50 (fc1) 24 | * ReLU 25 | * zero out half of values randomly during training (STILL MISSING) 26 | * another linear combination, 50x10 (fc2) 27 | * log_softmax on the 10 values 28 | * the 10 outputs are probabilities per digit 29 | * highest probability is chosen 30 | */ 31 | 32 | 33 | struct CNNModel { 34 | NNArray img; 35 | 36 | int label; 37 | NNArray scores; 38 | NNArray expected; 39 | 40 | TrackedFloat loss; 41 | 42 | struct State 43 | { 44 | // R C K IN OUTLAYERS 45 | Conv2d c1; // -> 24*24 -> max2d -> 12*12 46 | Conv2d c2; // -> 8*8 -> max2d -> 4*4 47 | 48 | // IN OUT 49 | Linear fc1; 50 | Linear fc2; 51 | 52 | void learn(float lr) 53 | { 54 | c1.learn(lr); 55 | c2.learn(lr); 56 | fc1.learn(lr); 57 | fc2.learn(lr); 58 | } 59 | 60 | void save(std::ostream& out) const 61 | { 62 | c1.save(out); c2.save(out); fc1.save(out); fc2.save(out); 63 | } 64 | void load(std::istream& in) 65 | { 66 | c1.load(in); c2.load(in); fc1.load(in); fc2.load(in); 67 | } 68 | }; 69 | 70 | void init(State& s) 71 | { 72 | img.zero(); 73 | 74 | auto step1 = s.c1.forward(img); 75 | 76 | std::array, 10> step2; 77 | unsigned ctr=0; 78 | for(auto& p : step2) 79 | p = step1[ctr++].Max2d<2>().applyFunc(ReluFunc()); 80 | 81 | std::array, 20> step3 = s.c2.forward(step2); 82 | std::array, 20> step4; 83 | 84 | ctr=0; 85 | for(auto& p : step4) { 86 | p = step3[ctr++].Max2d<2>().applyFunc(ReluFunc()); 87 | } 88 | 89 | NNArray flat = flatten(step4); 90 | auto output = s.fc1.forward(flat); 91 | auto output2 = output.applyFunc(ReluFunc()); 92 | auto output3 = s.fc2.forward(output2); 93 | 94 | scores = output3.logSoftMax(); 95 | expected.zero(); 96 | loss = TrackedFloat(0) - (expected*scores)(0,0); 97 | } 98 | }; 99 | -------------------------------------------------------------------------------- /37learn.cc: -------------------------------------------------------------------------------- 1 | #include "mnistreader.hh" 2 | #include "vizi.hh" 3 | #include 4 | #include "ext/sqlitewriter/sqlwriter.hh" 5 | #include 6 | 7 | using namespace std; 8 | 9 | float doTest(const MNISTReader& mntest, const Tensor& weights, float bias, SQLiteWriter* sqw=0) 10 | { 11 | unsigned int corrects=0, wrongs=0; 12 | 13 | for(unsigned int n = 0 ; n < mntest.num(); ++n) { 14 | int label = mntest.getLabel(n); 15 | if(label != 3 && label != 7) 16 | continue; 17 | Tensor img(28,28); 18 | mntest.pushImage(n, img); 19 | 20 | float score = (img.dot(weights).sum()(0,0)) + bias; // the calculation 21 | 22 | int predict = score > 0 ? 7 : 3; // the verdict 23 | 24 | if(sqw) 25 | sqw->addValue({{"label", label}, {"res", score}, {"verdict", predict}}); 26 | 27 | 28 | if(predict == label) { 29 | corrects++; 30 | } 31 | else { 32 | wrongs++; 33 | } 34 | } 35 | float perc = 100.0*corrects/(corrects+wrongs); 36 | cout << perc << "% correct" << endl; 37 | return perc; 38 | } 39 | 40 | int main() 41 | { 42 | MNISTReader mn("gzip/emnist-digits-train-images-idx3-ubyte.gz", "gzip/emnist-digits-train-labels-idx1-ubyte.gz"); 43 | MNISTReader mntest("gzip/emnist-digits-test-images-idx3-ubyte.gz", "gzip/emnist-digits-test-labels-idx1-ubyte.gz"); 44 | 45 | cout << "Have "< 98.0) 68 | break; 69 | saveTensor(weights, "weights-"+to_string(count)+".png", 252); 70 | } 71 | 72 | Tensor img(28,28); 73 | mn.pushImage(n, img); 74 | float res = (img.dot(weights).sum()(0,0)) + bias; // the calculation 75 | if(count == 25001) { 76 | auto prod = img.dot(weights); 77 | saveTensor(img, "random-image.png", 252, true); 78 | saveTensor(prod, "random-prod.png", 252); 79 | cout<<"res for first image: " << res << '\n'; 80 | } 81 | int verdict = res > 0 ? 7 : 3; 82 | 83 | if(label == 7) { 84 | if(res < 2.0) { 85 | weights.raw() = weights.raw() + img.raw() * lr.raw(); 86 | bias += 0.01; 87 | } 88 | } else { 89 | if(res > -2.0) { 90 | weights.raw() = weights.raw() - img.raw() * lr.raw(); 91 | bias -= 0.01; 92 | } 93 | } 94 | 95 | 96 | ++count; 97 | } 98 | saveTensor(weights, "weights-final.png", 252); 99 | doTest(mntest, weights, bias, &sqw); 100 | cout<<"Bias: "< 11 | 12 | using namespace std; 13 | 14 | FontWriter::FontWriter() 15 | { 16 | std::ifstream in("/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman_Bold.ttf", std::ios::binary); 17 | d_ttf_buffer.assign(std::istreambuf_iterator(in), 18 | std::istreambuf_iterator()); 19 | 20 | stbtt_InitFont(&d_font, (const unsigned char*)&d_ttf_buffer[0], stbtt_GetFontOffsetForIndex((const unsigned char*)&d_ttf_buffer[0],0)); 21 | } 22 | 23 | void FontWriter::writeChar(char ch, int s, int c, int r, std::function f) 24 | { 25 | int w,h,i,j; 26 | unsigned char *bitmap = stbtt_GetCodepointBitmap(&d_font, 0,stbtt_ScaleForPixelHeight(&d_font, s), ch, &w, &h, 0,0); 27 | c -= w/2; // center 28 | for (j=0; j < h; ++j) { 29 | for (i=0; i < w; ++i) { 30 | f(c + i, r + j, 255-bitmap[j*w+i], 255-bitmap[j*w+i], 255-bitmap[j*w+i]); 31 | } 32 | } 33 | } 34 | 35 | 36 | void saveTensor(const Tensor& t, const std::string& fname, int size, bool monochrome) 37 | { 38 | vector out; 39 | out.resize(size*size*3); 40 | struct Pixel { 41 | uint8_t r, g, b; 42 | }; 43 | static_assert(sizeof(Pixel)==3); 44 | 45 | auto pix = [&out, &size](int col, int row) -> Pixel& 46 | { 47 | return *(Pixel*)&out[3 *(col + row*size)]; 48 | }; 49 | 50 | float lemin, lemax; 51 | lemin = lemax = t(0,0); 52 | 53 | for(unsigned int row = 0 ; row < t.getRows(); ++row) { 54 | for(unsigned int col = 0 ; col < t.getCols(); ++col) { 55 | float v = t(row, col); 56 | if(v > lemax) 57 | lemax = v; 58 | if(v < lemin) 59 | lemin = v; 60 | } 61 | } 62 | 63 | unsigned int hboxsize = size/t.getCols(); 64 | unsigned int vboxsize = size/t.getRows(); 65 | 66 | auto box = [&pix, &out](int col, int row, int w, int h, uint8_t cr, uint8_t cg, uint8_t cb) { 67 | for(int c = col ; c < col + w; ++c) 68 | for(int r = row ; r < row + h; ++r) 69 | pix(c, r) = {cr,cg,cb}; 70 | }; 71 | 72 | for(unsigned int row = 0 ; row < t.getRows(); ++row) { 73 | for(unsigned int col = 0 ; col < t.getCols(); ++col) { 74 | float v = t(row, col); 75 | 76 | if(monochrome) { 77 | uint8_t color = 255.0*(v - lemin)/(lemax-lemin); 78 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, color, color, color); 79 | } 80 | else { 81 | if(v > 0) // red 82 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, 255 * v/lemax, 0, 0); 83 | else if(v < 0) // blue 84 | box(col * hboxsize, row * vboxsize, hboxsize, vboxsize, 0, 0, 255 * v/lemin); 85 | } 86 | } 87 | } 88 | 89 | 90 | stbi_write_png(fname.c_str(), size, size, 3, &out[0], 3*size); 91 | } 92 | -------------------------------------------------------------------------------- /gru-layer.hh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tensor-layers.hh" 3 | // hidden state=> dense linear => output x 4 | 5 | // x is input 6 | // h_t, h_{t-1} = hidden state 7 | 8 | // gate_{reset} = \sigma(W_{input_{reset}} \cdot x_t + W_{hidden_{reset}} \cdot h_{t-1}) 9 | 10 | // W_input_reset - ^^ normal matrix products 11 | // W_input_hidden 12 | 13 | // pytorch: 14 | 15 | // r_t​ = σ(W_{ir} ​x_t + b_{ir}​+W_{hr}​h_{t−1}​ +b_{hr}​) // reset gate 16 | // z_t​ = σ(W_{iz} ​x_t ​+ b_{iz} ​+W_{hz}​h_{t−1}​ + b_{hz}​) // update 17 | // n_t​ = tanh(W_{in}​x_t​+b_{in}​+ r_t​*(W_{hn} ​h_{t−1}​ + b_{hn}​)) // "new" - * is dotproduct 18 | // h_t​=(1−z_t​)*n_t​+z_t​*h_{t−1)​} // new h 19 | 20 | // the hidden state is also the output, which needs linear combination to turn into input size again 21 | // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html 22 | 23 | template