├── samples ├── hls │ └── mnist │ │ └── simple │ │ ├── directives.tcl │ │ ├── .gitignore │ │ ├── src │ │ ├── mnist_sample.h │ │ └── mnist_sample.cpp │ │ ├── testbench │ │ └── tb_mnist.cpp │ │ ├── README.md │ │ └── Makefile ├── verilog │ ├── mnist │ │ ├── tb_mnist_lut_cnn │ │ │ ├── xsim │ │ │ │ ├── xsim_run_all.tcl │ │ │ │ ├── xvlog_cmd.txt │ │ │ │ ├── run_xsim.sh │ │ │ │ └── run_xsim.bat │ │ │ ├── veritak │ │ │ │ └── mnist_lut_cnn.vtakprj │ │ │ ├── iverilog │ │ │ │ ├── run_iverilog.sh │ │ │ │ └── iverilog_cmd.txt │ │ │ └── verilator │ │ │ │ ├── clang-cmakeinit.cmake │ │ │ │ ├── verilator_cmd.txt │ │ │ │ ├── Makefile │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── tb_verilator.cpp │ │ ├── tb_mnist_lut_simple │ │ │ ├── xsim │ │ │ │ ├── xsim_run_all.tcl │ │ │ │ ├── xvlog_cmd.txt │ │ │ │ ├── run_xsim.sh │ │ │ │ └── run_xsim.bat │ │ │ ├── iverilog │ │ │ │ ├── iverilog_cmd.txt │ │ │ │ └── run_iverilog.sh │ │ │ ├── veritak │ │ │ │ └── tb_mnist_lut_simple.vtakprj │ │ │ └── verilator │ │ │ │ ├── clang-cmakeinit.cmake │ │ │ │ ├── verilator_cmd.txt │ │ │ │ ├── Makefile │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── tb_verilator.cpp │ │ ├── tb_mnist_semantic_segmentation │ │ │ ├── xsim │ │ │ │ ├── xsim_run_all.tcl │ │ │ │ ├── xvlog_cmd.txt │ │ │ │ ├── run_xsim.sh │ │ │ │ └── run_xsim.bat │ │ │ ├── verilator │ │ │ │ ├── clang-cmakeinit.cmake │ │ │ │ ├── verilator_cmd.txt │ │ │ │ ├── Makefile │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── tb_verilator.cpp │ │ │ ├── veritak │ │ │ │ └── tb_mnist_semantic_segmentation.vtakprj │ │ │ └── iverilog │ │ │ │ ├── run_iverilog.sh │ │ │ │ └── iverilog_cmd.txt │ │ ├── tb_mnist_segmentation_and_classification │ │ │ ├── xsim │ │ │ │ ├── xsim_run_all.tcl │ │ │ │ ├── xvlog_cmd.txt │ │ │ │ ├── run_xsim.sh │ │ │ │ └── run_xsim.bat │ │ │ ├── verilator │ │ │ │ ├── clang-cmakeinit.cmake │ │ │ │ ├── verilator_cmd.txt │ │ │ │ ├── Makefile │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── tb_verilator.cpp │ │ │ ├── iverilog │ │ │ │ ├── run_iverilog.sh │ │ │ │ └── iverilog_cmd.txt │ │ │ └── veritak │ │ │ │ └── tb_mnist_segmentation_and_classification.vtakprj │ │ ├── common │ │ │ └── bb_lut.v │ │ └── README.md │ └── cifar10 │ │ └── README.md └── cpp │ ├── cifar10 │ ├── Cifar10DenseCnn.cpp │ ├── Cifar10DenseSimple.cpp │ ├── Cifar10MicroMlpLutCnn.cpp │ ├── Cifar10StochasticLutCnn.cpp │ ├── Cifar10DifferentiableLutCnn.cpp │ ├── Cifar10DifferentiableLutSimple.cpp │ ├── sample_cifar10.vcxproj.filters │ └── sample_cifar10.vcxproj.user │ ├── diabetes │ ├── readme.txt │ ├── diabets_data.py │ ├── LoadDiabetes.h │ ├── main.cpp │ ├── sample_diabetes.vcxproj.filters │ ├── Makefile │ └── DiabetesRegressionDenseAffine.cpp │ └── mnist │ ├── get_nmist.bat │ ├── get_nmist.sh │ ├── sample_mnist.vcxproj.filters │ ├── sample_mnist.sln │ └── MnistLoadNet.cpp ├── tests ├── hls │ └── mnist │ │ └── mnist_simple │ │ ├── directives.tcl │ │ ├── .gitignore │ │ ├── src │ │ ├── mnist_simple.h │ │ └── mnist_simple.cpp │ │ ├── testbench │ │ └── tb_mnist_simple.cpp │ │ └── Makefile ├── .gitattributes ├── cuda │ ├── MicroMlp_Test.cpp │ ├── StochasticLut6_Test.cpp │ ├── Makefile │ └── main.cpp ├── gtest │ ├── TensorTest.cpp │ ├── BinaryScalingTest.cpp │ ├── MetricsCategoricalAccuracyTest.cpp │ ├── cudaMatrixColwiseSumTest.cpp │ ├── cudaMatrixRowwiseSetVectorTest.cpp │ ├── ConvBitToRealTest.cpp │ ├── ShuffleTest.cpp │ ├── VariablesTest.cpp │ ├── DenseAffineQuantizeTest.cpp │ ├── DifferentiableLutTest.cpp │ ├── BitEncodeTest.cpp │ ├── Makefile │ ├── ConcatenateTest.cpp │ ├── DepthwiseDenseAffineTest.cpp │ └── cudaMatrixColwiseMeanVarTest.cpp ├── cpp │ ├── cifar10 │ │ ├── Cifar10DenseCnn.cpp │ │ ├── Cifar10DenseSimple.cpp │ │ ├── Cifar10MicroMlpLutCnn.cpp │ │ ├── Cifar10StochasticLutCnn.cpp │ │ ├── Cifar10DifferentiableLutCnn.cpp │ │ └── Cifar10DifferentiableLutSimple.cpp │ ├── diabetes │ │ ├── readme.txt │ │ ├── diabets_data.py │ │ ├── LoadDiabetes.h │ │ ├── main.cpp │ │ ├── Makefile │ │ └── DiabetesRegressionDenseAffine.cpp │ ├── mnist │ │ ├── get_nmist.bat │ │ ├── get_nmist.sh │ │ ├── test_mnist.sln │ │ └── MnistLoadNet.cpp │ └── xor │ │ ├── main.cpp │ │ ├── StochasticLut6.cpp │ │ └── XorMicroMlp.cpp └── svhn │ └── download_svhn.sh ├── include └── bb │ ├── ObjectLoader.h │ ├── Manager.h │ ├── ModelLoader.h │ ├── ValueGenerator.h │ ├── StochasticLutModel.h │ ├── Optimizer.h │ ├── MetricsFunction.h │ ├── Version.h │ ├── LossFunction.h │ ├── CudaUtility.h │ ├── PnmImage.h │ ├── LoadXor.h │ ├── Assert.h │ ├── Activation.h │ └── Filter2d.h ├── documents ├── images │ ├── micro_mlp.png │ ├── GoogleColab.jpg │ ├── dense_affine.png │ ├── performance.png │ ├── block_diagram.png │ ├── fpga_resource.png │ ├── sparse_affine.png │ ├── stochastic_and.png │ ├── stochastic_lut.png │ ├── LutNet_node_model.png │ ├── autoencoder_mnist.png │ ├── binary_modulation.png │ ├── fpga_environment.jpg │ ├── modulation_model.png │ ├── stochastic_lut2.png │ ├── LutNet_design_flow.png │ ├── LutNet_layer_model.png │ ├── autoencoder_cifar10.png │ ├── differentiable_lut.png │ ├── differentiable_lut_app.png │ ├── differentiable-lut_model.png │ ├── LutNet_lut_equivalent_model.png │ ├── binary_modulation_wide_test.png │ └── difference_other_networks.png └── sphinx │ ├── source │ ├── python_module_storage.rst │ ├── introduction.rst │ ├── python_module_system.rst │ ├── python_module_object.rst │ ├── _static │ │ └── css │ │ │ └── my_theme.css │ ├── quick_start_verilog.rst │ ├── python_module_models_misc.rst │ ├── python_module_verilog.rst │ ├── python_module_models_base.rst │ ├── python_module_container.rst │ ├── python_module_optimizer.rst │ ├── python_api.rst │ ├── index.rst │ ├── python_module_models_binary.rst │ ├── python_module_models_filter.rst │ ├── python_module_models_activation.rst │ ├── python_module_metrics.rst │ ├── introduction_features.rst │ ├── locale │ │ ├── en │ │ │ └── LC_MESSAGES │ │ │ │ ├── index.po │ │ │ │ └── sample_rtl.po │ │ └── ja │ │ │ └── LC_MESSAGES │ │ │ ├── index.po │ │ │ └── sample_rtl.po │ ├── python_module_losses.rst │ ├── python_module_models_operation.rst │ ├── introduction_binary_modulation.rst │ ├── introduction_case_study.rst │ ├── informations.rst │ └── quick_start_cpp.rst │ ├── Makefile │ └── make.bat ├── python ├── copy_src.bat ├── build.sh ├── copy_src.sh ├── build.bat ├── projects │ ├── discrete │ │ ├── core.vcxproj.user │ │ └── Makefile │ └── thrust │ │ ├── core.vcxproj.user │ │ ├── PyBinaryBrainThrust.sln │ │ └── Makefile ├── clean.bat ├── clean.sh ├── uninstall.sh ├── requirements.txt ├── upload_pypi.bat ├── check_install.py ├── binarybrain │ ├── __init__.py │ ├── variables.py │ ├── Makefile │ ├── src │ │ └── core_bbcu.cu │ ├── metrics.py │ └── hls.py └── upload_pypi.sh ├── .gitmodules ├── cuda ├── bbcu.vcxproj.user ├── Manager.cu ├── bbcu.sln ├── MatrixRowwiseSetVector.cu ├── Makefile ├── MatrixColwiseSum.cu ├── OptimizerAdam.cu └── ConvBitToReal.cu ├── .readthedocs.yaml ├── .gitattributes ├── setup.py └── license.txt /samples/hls/mnist/simple/directives.tcl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/directives.tcl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/.gitignore: -------------------------------------------------------------------------------- 1 | mnist_simple/ -------------------------------------------------------------------------------- /tests/.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb filter=nbstripout 2 | *.ipynb diff=ipynb 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/xsim/xsim_run_all.tcl: -------------------------------------------------------------------------------- 1 | run all 2 | quit 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/xsim/xsim_run_all.tcl: -------------------------------------------------------------------------------- 1 | run all 2 | quit 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/xsim_run_all.tcl: -------------------------------------------------------------------------------- 1 | run all 2 | quit 3 | -------------------------------------------------------------------------------- /include/bb/ObjectLoader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/include/bb/ObjectLoader.h -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/xsim_run_all.tcl: -------------------------------------------------------------------------------- 1 | run all 2 | quit 3 | -------------------------------------------------------------------------------- /tests/cuda/MicroMlp_Test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cuda/MicroMlp_Test.cpp -------------------------------------------------------------------------------- /tests/gtest/TensorTest.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/gtest/TensorTest.cpp -------------------------------------------------------------------------------- /documents/images/micro_mlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/micro_mlp.png -------------------------------------------------------------------------------- /samples/hls/mnist/simple/.gitignore: -------------------------------------------------------------------------------- 1 | mnist_sample/ 2 | 3 | MnistDifferentiableLutHls.h 4 | mnist_test_data.h 5 | -------------------------------------------------------------------------------- /documents/images/GoogleColab.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/GoogleColab.jpg -------------------------------------------------------------------------------- /documents/images/dense_affine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/dense_affine.png -------------------------------------------------------------------------------- /documents/images/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/performance.png -------------------------------------------------------------------------------- /tests/gtest/BinaryScalingTest.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/gtest/BinaryScalingTest.cpp -------------------------------------------------------------------------------- /documents/images/block_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/block_diagram.png -------------------------------------------------------------------------------- /documents/images/fpga_resource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/fpga_resource.png -------------------------------------------------------------------------------- /documents/images/sparse_affine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/sparse_affine.png -------------------------------------------------------------------------------- /documents/images/stochastic_and.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_and.png -------------------------------------------------------------------------------- /documents/images/stochastic_lut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_lut.png -------------------------------------------------------------------------------- /tests/cuda/StochasticLut6_Test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cuda/StochasticLut6_Test.cpp -------------------------------------------------------------------------------- /documents/images/LutNet_node_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_node_model.png -------------------------------------------------------------------------------- /documents/images/autoencoder_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/autoencoder_mnist.png -------------------------------------------------------------------------------- /documents/images/binary_modulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/binary_modulation.png -------------------------------------------------------------------------------- /documents/images/fpga_environment.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/fpga_environment.jpg -------------------------------------------------------------------------------- /documents/images/modulation_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/modulation_model.png -------------------------------------------------------------------------------- /documents/images/stochastic_lut2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_lut2.png -------------------------------------------------------------------------------- /python/copy_src.bat: -------------------------------------------------------------------------------- 1 | 2 | xcopy /Y /I /E ..\include binarybrain\include 3 | xcopy /Y /I ..\cuda binarybrain\cuda 4 | 5 | -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10DenseCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DenseCnn.cpp -------------------------------------------------------------------------------- /documents/images/LutNet_design_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_design_flow.png -------------------------------------------------------------------------------- /documents/images/LutNet_layer_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_layer_model.png -------------------------------------------------------------------------------- /documents/images/autoencoder_cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/autoencoder_cifar10.png -------------------------------------------------------------------------------- /documents/images/differentiable_lut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable_lut.png -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10DenseCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DenseCnn.cpp -------------------------------------------------------------------------------- /samples/verilog/cifar10/README.md: -------------------------------------------------------------------------------- 1 | CIFAR-10 の Verilog 出力先ディレクトリです。 2 | 3 | 現時点で シミュレーション実行のサンプル提供はありませんので、MNISTを参考に各自で挑戦お願いいたします。 4 | -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10DenseSimple.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DenseSimple.cpp -------------------------------------------------------------------------------- /documents/images/differentiable_lut_app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable_lut_app.png -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10DenseSimple.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DenseSimple.cpp -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp -------------------------------------------------------------------------------- /documents/images/differentiable-lut_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable-lut_model.png -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/iverilog/iverilog_cmd.txt: -------------------------------------------------------------------------------- 1 | ../tb_mnist_lut_simple.v 2 | ../MnistLutSimple.v 3 | -y ../../common 4 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/xsim/xvlog_cmd.txt: -------------------------------------------------------------------------------- 1 | ../tb_mnist_lut_simple.v 2 | ../MnistLutSimple.v 3 | ../../common/bb_lut.v 4 | -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10StochasticLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10StochasticLutCnn.cpp -------------------------------------------------------------------------------- /documents/images/LutNet_lut_equivalent_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_lut_equivalent_model.png -------------------------------------------------------------------------------- /documents/images/binary_modulation_wide_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/binary_modulation_wide_test.png -------------------------------------------------------------------------------- /documents/images/difference_other_networks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/difference_other_networks.png -------------------------------------------------------------------------------- /python/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./clean.sh 4 | source ./copy_src.sh 5 | python3 setup.py build 6 | python3 setup.py develop --user 7 | -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10StochasticLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10StochasticLutCnn.cpp -------------------------------------------------------------------------------- /samples/cpp/diabetes/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | データの準備 3 | 4 | sudo apt install python3-pip 5 | sudo pip3 install scikit-learn 6 | python3 diabets_data.py 7 | 8 | -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp -------------------------------------------------------------------------------- /tests/cpp/diabetes/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | データの準備 3 | 4 | sudo apt install python3-pip 5 | sudo pip3 install scikit-learn 6 | python3 diabets_data.py 7 | 8 | -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp -------------------------------------------------------------------------------- /tests/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp -------------------------------------------------------------------------------- /samples/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/src/mnist_simple.h: -------------------------------------------------------------------------------- 1 | 2 | #define AP_INT_MAX_W 32768 3 | #include "ap_int.h" 4 | 5 | ap_uint<10> mnist_simple(ap_uint<28*28> in_data); 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cereal"] 2 | path = cereal 3 | url = https://github.com/USCiLab/cereal 4 | [submodule "jelly"] 5 | path = jelly 6 | url = https://github.com/ryuz/jelly 7 | -------------------------------------------------------------------------------- /python/copy_src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -fr binarybrain/include 4 | rm -fr binarybrain/cuda 5 | 6 | cp -r ../include binarybrain/include 7 | cp -r ../cuda binarybrain/cuda 8 | 9 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/veritak/mnist_lut_cnn.vtakprj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_lut_cnn/veritak/mnist_lut_cnn.vtakprj -------------------------------------------------------------------------------- /cuda/bbcu.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /python/build.bat: -------------------------------------------------------------------------------- 1 | call clean.bat 2 | call copy_src.bat 3 | python setup.py build 4 | python setup.py develop 5 | 6 | python check_install.py 7 | if %errorlevel% neq 0 ( 8 | exit /b 9 | ) 10 | -------------------------------------------------------------------------------- /tests/cuda/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | nvcc -I../../include ../../cuda/MicroMlp.cu main.cpp MicroMlp_Test.cpp -o cuda_test 4 | 5 | run: 6 | ./cuda_test 7 | 8 | clean: 9 | rm ./cuda_test 10 | -------------------------------------------------------------------------------- /samples/hls/mnist/simple/src/mnist_sample.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | void mnist_sample( 5 | const ap_uint<1> in[28*28], 6 | ap_uint<4> out[] 7 | ); 8 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/veritak/tb_mnist_lut_simple.vtakprj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_lut_simple/veritak/tb_mnist_lut_simple.vtakprj -------------------------------------------------------------------------------- /python/projects/discrete/core.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /python/projects/thrust/core.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/iverilog/run_iverilog.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | TOP_MODULE=tb_mnist_lut_cnn 4 | 5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG 6 | vvp $TOP_MODULE.vvp 7 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/verilator/clang-cmakeinit.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE) 2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE) 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/iverilog/run_iverilog.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | TOP_MODULE=tb_mnist_lut_simple 4 | 5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG 6 | vvp $TOP_MODULE.vvp 7 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/verilator/clang-cmakeinit.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE) 2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE) 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/clang-cmakeinit.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE) 2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE) 3 | -------------------------------------------------------------------------------- /python/clean.bat: -------------------------------------------------------------------------------- 1 | rd /s /q build 2 | rd /s /q dist 3 | rd /s /q binarybrain.egg-info 4 | rd /s /q binarybrain\__pycache__ 5 | rd /s /q binarybrain\cuda 6 | rd /s /q binarybrain\include 7 | del binarybrain\*.pyd 8 | 9 | call copy_src.bat 10 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/veritak/tb_mnist_semantic_segmentation.vtakprj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_semantic_segmentation/veritak/tb_mnist_semantic_segmentation.vtakprj -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_storage.rst: -------------------------------------------------------------------------------- 1 | 保存/復帰(Serialize) 2 | ============================= 3 | 4 | storage モジュール 5 | ---------------------------- 6 | 7 | .. automodule:: binarybrain.storage 8 | :members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/clang-cmakeinit.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE) 2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE) 3 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/iverilog/run_iverilog.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | TOP_MODULE=tb_mnist_lut_semantic_segmentation 4 | 5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG 6 | vvp $TOP_MODULE.vvp 7 | -------------------------------------------------------------------------------- /documents/sphinx/source/introduction.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | はじめに 3 | ============== 4 | 5 | .. toctree:: 6 | 7 | introduction_features 8 | introduction_case_study 9 | introduction_lut_network 10 | introduction_binary_modulation 11 | 12 | 13 | -------------------------------------------------------------------------------- /python/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -fr build 4 | rm -fr dist 5 | rm -fr binarybrain.egg-info 6 | rm -fr tmp 7 | 8 | rm -fr binarybrain/include 9 | rm -fr binarybrain/cuda 10 | 11 | rm -f binarybrain/src/*.o 12 | rm -f binarybrain/src/*.so 13 | 14 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.9" 7 | 8 | python: 9 | install: 10 | - requirements: python/requirements.txt 11 | 12 | sphinx: 13 | configuration: documents/sphinx/source/conf.py 14 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_system.rst: -------------------------------------------------------------------------------- 1 | システム/GPU関連(System/GPU) 2 | ================================= 3 | 4 | その他システム制御関連のAPIです 5 | 6 | .. automodule:: binarybrain.system 7 | :members: 8 | :show-inheritance: 9 | 10 | .. :undoc-members: 11 | 12 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/iverilog/run_iverilog.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | TOP_MODULE=tb_mnist_lut_segmentation_and_classification 4 | 5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG 6 | vvp $TOP_MODULE.vvp 7 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_object.rst: -------------------------------------------------------------------------------- 1 | 基本クラス 2 | ============================= 3 | 4 | Object クラス 5 | ---------------------------- 6 | 7 | .. autoclass:: binarybrain.object.Object 8 | :members: 9 | :show-inheritance: 10 | :member-order: bysource 11 | 12 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/veritak/tb_mnist_segmentation_and_classification.vtakprj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_segmentation_and_classification/veritak/tb_mnist_segmentation_and_classification.vtakprj -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/iverilog/iverilog_cmd.txt: -------------------------------------------------------------------------------- 1 | ../tb_mnist_lut_cnn.v 2 | ../MnistLutCnn.v 3 | -y .. 4 | -y ../../common 5 | -y ../../../../../jelly/rtl/library 6 | -y ../../../../../jelly/rtl/image 7 | -y ../../../../../jelly/rtl/video 8 | -y ../../../../../jelly/rtl/model 9 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/verilator/verilator_cmd.txt: -------------------------------------------------------------------------------- 1 | #--trace-fst --trace-params --trace-structs --trace-underscore 2 | #--public 3 | #--threads 2 4 | 5 | -Wno-WIDTH 6 | -Wno-UNSIGNED 7 | -Wno-PINMISSING 8 | -Wno-UNOPTFLAT 9 | #-Wno-UNOPTTHREADS 10 | -Wno-LITENDIAN 11 | 12 | -y .. 13 | -y ../../common 14 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/iverilog/iverilog_cmd.txt: -------------------------------------------------------------------------------- 1 | ../tb_mnist_lut_semantic_segmentation.v 2 | ../MnistSemanticSegmentation.v 3 | -y .. 4 | -y ../../common 5 | -y ../../../../../jelly/rtl/library 6 | -y ../../../../../jelly/rtl/image 7 | -y ../../../../../jelly/rtl/video 8 | -y ../../../../../jelly/rtl/model 9 | -------------------------------------------------------------------------------- /python/uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python3 setup.py install --record files.txt 4 | cat files.txt | xargs rm -rf 5 | rm files.txt 6 | 7 | python3 setup.py install --user --record files.txt 8 | cat files.txt | xargs rm -rf 9 | rm files.txt 10 | 11 | rm -fr /home/ryuji/.local/lib/python3.6/site-packages/binarybrain-0.0.2-py3.6-linux-x86_64.egg -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | *.md text 4 | *.txt text 5 | 6 | *.sh text eol=lf 7 | 8 | *.rst text eol=lf 9 | *.po text eol=lf 10 | 11 | *.jpg -text 12 | *.png -text 13 | 14 | *.sln text eol=crlf 15 | *.vcproj text eol=crlf 16 | *.vcproj.filters text eol=crlf 17 | *.vcproj.user text eol=crlf 18 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/iverilog/iverilog_cmd.txt: -------------------------------------------------------------------------------- 1 | ../tb_mnist_lut_segmentation_and_classification.v 2 | ../MnistSegmentationAndClassification.v 3 | -y .. 4 | -y ../../common 5 | -y ../../../../../jelly/rtl/library 6 | -y ../../../../../jelly/rtl/image 7 | -y ../../../../../jelly/rtl/video 8 | -y ../../../../../jelly/rtl/model 9 | -------------------------------------------------------------------------------- /documents/sphinx/source/_static/css/my_theme.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .wy-nav-content { 4 | max-width: none; 5 | } 6 | 7 | h1,h2,h3,h4,h5,h6 { 8 | border-bottom: 1px solid #ccc; 9 | } 10 | 11 | .wy-table-responsive table td, .wy-table-responsive table th { 12 | white-space: normal; 13 | } 14 | 15 | colgroup { 16 | display: none; 17 | } 18 | -------------------------------------------------------------------------------- /documents/sphinx/source/quick_start_verilog.rst: -------------------------------------------------------------------------------- 1 | =========================== 2 | クイックスタート(Verilog) 3 | =========================== 4 | 5 | 6 | RTL Simulation の試し方 7 | ============================ 8 | 9 | C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。 10 | 出力したRTLの試し方は 11 | 12 | https://github.com/ryuz/BinaryBrain/blob/ver4_release/samples/verilog/mnist/README.md 13 | 14 | のなどをご参照ください。 15 | 16 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/xsim/xvlog_cmd.txt: -------------------------------------------------------------------------------- 1 | -sv 2 | --sourcelibdir .. 3 | --sourcelibdir ../../common 4 | --sourcelibdir ../../../../../jelly/rtl/library 5 | --sourcelibdir ../../../../../jelly/rtl/image 6 | --sourcelibdir ../../../../../jelly/rtl/video 7 | --sourcelibdir ../../../../../jelly/rtl/model 8 | --sourcelibext .v 9 | --sourcelibext .sv 10 | 11 | ../tb_mnist_lut_cnn.v 12 | ../MnistLutCnn.v 13 | 14 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | numpy 3 | tqdm 4 | pybind11 5 | twine 6 | Sphinx 7 | sphinx-autobuild 8 | sphinx-markdown-tables 9 | sphinx-rtd-theme 10 | sphinxcontrib-actdiag 11 | sphinxcontrib-applehelp 12 | sphinxcontrib-blockdiag 13 | sphinxcontrib-devhelp 14 | sphinxcontrib-htmlhelp 15 | sphinxcontrib-jsmath 16 | sphinxcontrib-nwdiag 17 | sphinxcontrib-qthelp 18 | sphinxcontrib-seqdiag 19 | sphinxcontrib-serializinghtml 20 | -------------------------------------------------------------------------------- /tests/svhn/download_svhn.sh: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | curl -O http://ufldl.stanford.edu/housenumbers/train.tar.gz 4 | curl -O http://ufldl.stanford.edu/housenumbers/test.tar.gz 5 | curl -O http://ufldl.stanford.edu/housenumbers/extra.tar.gz 6 | curl -O http://ufldl.stanford.edu/housenumbers/train_32x32.mat 7 | curl -O http://ufldl.stanford.edu/housenumbers/test_32x32.mat 8 | curl -O http://ufldl.stanford.edu/housenumbers/extra_32x32.mat 9 | 10 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/xsim/run_xsim.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | rm -fr xsim.dir 4 | rm -fr .Xil 5 | rm -f webtalk*.jou 6 | rm -f webtalk*.log 7 | rm -f xvlog*.log 8 | rm -f xvlog*.pb 9 | rm -f xelab*.log 10 | rm -f xelab*.pb 11 | rm -f xsim*.jou 12 | rm -f xsim*.log 13 | 14 | TOP_MODULE=tb_mnist_lut_cnn 15 | 16 | xvlog -f xvlog_cmd.txt 17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE 18 | xsim $TOP_MODULE -t xsim_run_all.tcl 19 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/xsim/run_xsim.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | rm -fr xsim.dir 4 | rm -fr .Xil 5 | rm -f webtalk*.jou 6 | rm -f webtalk*.log 7 | rm -f xvlog*.log 8 | rm -f xvlog*.pb 9 | rm -f xelab*.log 10 | rm -f xelab*.pb 11 | rm -f xsim*.jou 12 | rm -f xsim*.log 13 | 14 | TOP_MODULE=tb_mnist_lut_simple 15 | 16 | xvlog -f xvlog_cmd.txt 17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE 18 | xsim $TOP_MODULE -t xsim_run_all.tcl 19 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/xvlog_cmd.txt: -------------------------------------------------------------------------------- 1 | -sv 2 | --sourcelibdir .. 3 | --sourcelibdir ../../common 4 | --sourcelibdir ../../../../../jelly/rtl/library 5 | --sourcelibdir ../../../../../jelly/rtl/image 6 | --sourcelibdir ../../../../../jelly/rtl/video 7 | --sourcelibdir ../../../../../jelly/rtl/model 8 | --sourcelibext .v 9 | --sourcelibext .sv 10 | 11 | ../tb_mnist_lut_semantic_segmentation.v 12 | ../MnistSemanticSegmentation.v 13 | 14 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/run_xsim.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | rm -fr xsim.dir 4 | rm -fr .Xil 5 | rm -f webtalk*.jou 6 | rm -f webtalk*.log 7 | rm -f xvlog*.log 8 | rm -f xvlog*.pb 9 | rm -f xelab*.log 10 | rm -f xelab*.pb 11 | rm -f xsim*.jou 12 | rm -f xsim*.log 13 | 14 | TOP_MODULE=tb_mnist_lut_semantic_segmentation 15 | 16 | xvlog -f xvlog_cmd.txt 17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE 18 | xsim $TOP_MODULE -t xsim_run_all.tcl 19 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/xvlog_cmd.txt: -------------------------------------------------------------------------------- 1 | -sv 2 | --sourcelibdir .. 3 | --sourcelibdir ../../common 4 | --sourcelibdir ../../../../../jelly/rtl/library 5 | --sourcelibdir ../../../../../jelly/rtl/image 6 | --sourcelibdir ../../../../../jelly/rtl/video 7 | --sourcelibdir ../../../../../jelly/rtl/model 8 | --sourcelibext .v 9 | --sourcelibext .sv 10 | 11 | ../tb_mnist_lut_segmentation_and_classification.v 12 | ../MnistSegmentationAndClassification.v 13 | 14 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/run_xsim.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -eu 2 | 3 | rm -fr xsim.dir 4 | rm -fr .Xil 5 | rm -f webtalk*.jou 6 | rm -f webtalk*.log 7 | rm -f xvlog*.log 8 | rm -f xvlog*.pb 9 | rm -f xelab*.log 10 | rm -f xelab*.pb 11 | rm -f xsim*.jou 12 | rm -f xsim*.log 13 | 14 | TOP_MODULE=tb_mnist_lut_segmentation_and_classification 15 | 16 | xvlog -f xvlog_cmd.txt 17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE 18 | xsim $TOP_MODULE -t xsim_run_all.tcl 19 | -------------------------------------------------------------------------------- /tests/cpp/diabetes/diabets_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from sklearn.datasets import load_diabetes 4 | 5 | diabetes = load_diabetes() 6 | 7 | with open('diabetes_data.txt', mode='w') as f: 8 | for dd in diabetes['data']: 9 | for d in dd: 10 | f.write(str(d)) 11 | f.write(' ') 12 | f.write('\n') 13 | 14 | with open('diabetes_target.txt', mode='w') as f: 15 | for d in diabetes['target']: 16 | f.write(str(d)) 17 | f.write('\n') 18 | 19 | -------------------------------------------------------------------------------- /samples/cpp/diabetes/diabets_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | from sklearn.datasets import load_diabetes 4 | 5 | diabetes = load_diabetes() 6 | 7 | with open('diabetes_data.txt', mode='w') as f: 8 | for dd in diabetes['data']: 9 | for d in dd: 10 | f.write(str(d)) 11 | f.write(' ') 12 | f.write('\n') 13 | 14 | with open('diabetes_target.txt', mode='w') as f: 15 | for d in diabetes['target']: 16 | f.write(str(d)) 17 | f.write('\n') 18 | 19 | -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/src/mnist_simple.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "mnist_simple.h" 3 | #include "MnistDifferentiableLutSimpleHls.h" 4 | 5 | 6 | ap_uint<10> mnist_simple(ap_uint<28*28> in_data) 7 | { 8 | #pragma HLS pipeline II=1 9 | 10 | auto data1 = mnist_layer1(in_data); 11 | auto data2 = mnist_layer2(data1); 12 | auto data3 = mnist_layer3(data2); 13 | auto data4 = mnist_layer4(data3); 14 | auto data5 = mnist_layer5(data4); 15 | auto data6 = mnist_layer6(data5); 16 | return data6; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/verilator/verilator_cmd.txt: -------------------------------------------------------------------------------- 1 | #--trace-fst --trace-params --trace-structs --trace-underscore 2 | #--public 3 | #--threads 2 4 | 5 | -Wno-WIDTH 6 | -Wno-UNSIGNED 7 | -Wno-PINMISSING 8 | -Wno-UNOPTFLAT 9 | #-Wno-UNOPTTHREADS 10 | -Wno-LITENDIAN 11 | 12 | -y .. 13 | -y ../../common 14 | -y ../../../../../jelly/rtl/library/ 15 | -y ../../../../../jelly/rtl/bus 16 | -y ../../../../../jelly/rtl/image 17 | -y ../../../../../jelly/rtl/video 18 | -y ../../../../../jelly/rtl/math 19 | -y ../../../../../jelly/rtl/model 20 | -------------------------------------------------------------------------------- /python/upload_pypi.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem call set_vc.bat 4 | 5 | python -V 6 | pause 7 | 8 | echo git switch ver4_release & git pull 9 | pause 10 | git switch ver4_release 11 | git pull 12 | 13 | 14 | echo build 15 | pause 16 | 17 | call clean.bat 18 | call copy_src.bat 19 | 20 | python setup.py build 21 | 22 | python setup.py sdist 23 | python setup.py bdist_wheel 24 | 25 | 26 | echo upload TestPyPI 27 | pause 28 | twine upload --repository testpypi dist/* 29 | 30 | 31 | echo upload py37 32 | pause 33 | twine upload --repository pypi dist/* 34 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/verilator_cmd.txt: -------------------------------------------------------------------------------- 1 | #--trace-fst --trace-params --trace-structs --trace-underscore 2 | #--public 3 | #--threads 2 4 | 5 | -Wno-WIDTH 6 | -Wno-UNSIGNED 7 | -Wno-PINMISSING 8 | -Wno-UNOPTFLAT 9 | #-Wno-UNOPTTHREADS 10 | -Wno-LITENDIAN 11 | 12 | -y .. 13 | -y ../../common 14 | -y ../../../../../jelly/rtl/library/ 15 | -y ../../../../../jelly/rtl/bus 16 | -y ../../../../../jelly/rtl/image 17 | -y ../../../../../jelly/rtl/video 18 | -y ../../../../../jelly/rtl/math 19 | -y ../../../../../jelly/rtl/model 20 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/verilator_cmd.txt: -------------------------------------------------------------------------------- 1 | #--trace-fst --trace-params --trace-structs --trace-underscore 2 | #--public 3 | #--threads 2 4 | 5 | -Wno-WIDTH 6 | -Wno-UNSIGNED 7 | -Wno-PINMISSING 8 | -Wno-UNOPTFLAT 9 | #-Wno-UNOPTTHREADS 10 | -Wno-LITENDIAN 11 | 12 | -y .. 13 | -y ../../common 14 | -y ../../../../../jelly/rtl/library/ 15 | -y ../../../../../jelly/rtl/bus 16 | -y ../../../../../jelly/rtl/image 17 | -y ../../../../../jelly/rtl/video 18 | -y ../../../../../jelly/rtl/math 19 | -y ../../../../../jelly/rtl/model 20 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_misc.rst: -------------------------------------------------------------------------------- 1 | 補助モデル 2 | ====================================== 3 | 4 | 5 | models モジュールのその他のモデルです。 6 | 7 | 8 | BatchNormalization クラス 9 | ---------------------------- 10 | 11 | .. autoclass:: binarybrain.models.BatchNormalization 12 | :members: 13 | :show-inheritance: 14 | 15 | 16 | Dropout クラス 17 | ---------------------------- 18 | 19 | .. autoclass:: binarybrain.models.Dropout 20 | :members: 21 | :show-inheritance: 22 | 23 | 24 | Shuffle クラス 25 | ---------------------------- 26 | 27 | .. autoclass:: binarybrain.models.Shuffle 28 | :members: 29 | :show-inheritance: 30 | 31 | 32 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/xsim/run_xsim.bat: -------------------------------------------------------------------------------- 1 | 2 | 3 | rmdir /s /q xsim.dir 4 | rmdir /s /q .Xil 5 | del webtalk*.jou 6 | del webtalk*.log 7 | del xvlog*.log 8 | del xvlog*.pb 9 | del xelab*.log 10 | del xelab*.pb 11 | del xsim*.jou 12 | del xsim*.log 13 | 14 | @if "%1"=="" goto BUILD 15 | @if %1==clean goto END 16 | 17 | :BUILD 18 | 19 | set TOP_MODULE=tb_mnist_lut_cnn 20 | 21 | call xvlog -f xvlog_cmd.txt 22 | @if ERRORLEVEL 1 GOTO END 23 | 24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE% 25 | @if ERRORLEVEL 1 GOTO END 26 | 27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl 28 | @if ERRORLEVEL 1 GOTO END 29 | 30 | :END 31 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/xsim/run_xsim.bat: -------------------------------------------------------------------------------- 1 | 2 | 3 | rmdir /s /q xsim.dir 4 | rmdir /s /q .Xil 5 | del webtalk*.jou 6 | del webtalk*.log 7 | del xvlog*.log 8 | del xvlog*.pb 9 | del xelab*.log 10 | del xelab*.pb 11 | del xsim*.jou 12 | del xsim*.log 13 | 14 | @if "%1"=="" goto BUILD 15 | @if %1==clean goto END 16 | 17 | :BUILD 18 | 19 | set TOP_MODULE=tb_mnist_lut_simple 20 | 21 | call xvlog -f xvlog_cmd.txt 22 | @if ERRORLEVEL 1 GOTO END 23 | 24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE% 25 | @if ERRORLEVEL 1 GOTO END 26 | 27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl 28 | @if ERRORLEVEL 1 GOTO END 29 | 30 | :END 31 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/run_xsim.bat: -------------------------------------------------------------------------------- 1 | 2 | 3 | rmdir /s /q xsim.dir 4 | rmdir /s /q .Xil 5 | del webtalk*.jou 6 | del webtalk*.log 7 | del xvlog*.log 8 | del xvlog*.pb 9 | del xelab*.log 10 | del xelab*.pb 11 | del xsim*.jou 12 | del xsim*.log 13 | 14 | @if "%1"=="" goto BUILD 15 | @if %1==clean goto END 16 | 17 | :BUILD 18 | 19 | set TOP_MODULE=tb_mnist_lut_semantic_segmentation 20 | 21 | call xvlog -f xvlog_cmd.txt 22 | @if ERRORLEVEL 1 GOTO END 23 | 24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE% 25 | @if ERRORLEVEL 1 GOTO END 26 | 27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl 28 | @if ERRORLEVEL 1 GOTO END 29 | 30 | :END 31 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_verilog.rst: -------------------------------------------------------------------------------- 1 | RTL(Verilog/HLS)変換 2 | ========================== 3 | 4 | 学習が完了したネットは結果パラメータに基づいて、ユーザ側で自由に実装可能ですが、 5 | BinaryBrainでも若干のサポート関数を備えています。 6 | 7 | .. automodule:: binarybrain.verilog 8 | :members: 9 | :show-inheritance: 10 | 11 | .. automodule:: binarybrain.hls 12 | :members: 13 | :show-inheritance: 14 | 15 | .. 16 | :undoc-members: 17 | dump_verilog_lut_layers 関数 18 | ----------------------------------- 19 | .. automethod:: binarybrain.verilog.dump_verilog_lut_layers 20 | dump_verilog_lut_cnv_layers 関数 21 | ----------------------------------- 22 | .. automethod:: binarybrain.verilog.dump_verilog_lut_cnv_layers 23 | 24 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/run_xsim.bat: -------------------------------------------------------------------------------- 1 | 2 | 3 | rmdir /s /q xsim.dir 4 | rmdir /s /q .Xil 5 | del webtalk*.jou 6 | del webtalk*.log 7 | del xvlog*.log 8 | del xvlog*.pb 9 | del xelab*.log 10 | del xelab*.pb 11 | del xsim*.jou 12 | del xsim*.log 13 | 14 | @if "%1"=="" goto BUILD 15 | @if %1==clean goto END 16 | 17 | :BUILD 18 | 19 | set TOP_MODULE=tb_mnist_lut_segmentation_and_classification 20 | 21 | call xvlog -f xvlog_cmd.txt 22 | @if ERRORLEVEL 1 GOTO END 23 | 24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE% 25 | @if ERRORLEVEL 1 GOTO END 26 | 27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl 28 | @if ERRORLEVEL 1 GOTO END 29 | 30 | :END 31 | -------------------------------------------------------------------------------- /include/bb/Manager.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | 5 | #ifdef BB_WITH_CUDA 6 | #include "bbcu/bbcu.h" 7 | #endif 8 | 9 | 10 | namespace bb { 11 | 12 | class Manager 13 | { 14 | public: 15 | 16 | #ifdef BB_WITH_CUDA 17 | static inline bool IsDeviceAvailable(void) 18 | { 19 | return !bbcu_IsHostOnly(); 20 | } 21 | 22 | static inline void SetHostOnly(bool hostOnly) 23 | { 24 | bbcu_SetHostOnly(hostOnly); 25 | } 26 | #else 27 | static bool IsDeviceAvailable(void) 28 | { 29 | return false; 30 | } 31 | 32 | static void SetHostOnly(bool hostOnly) 33 | { 34 | } 35 | #endif 36 | }; 37 | 38 | 39 | } 40 | 41 | 42 | // end of file 43 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_base.rst: -------------------------------------------------------------------------------- 1 | 基本モデル (Base models) 2 | ====================================== 3 | 4 | models モジュールには、ネットワークを構成するための各種演算モデルがあります。 5 | 6 | 7 | Model クラス 8 | ---------------------------- 9 | 10 | .. autoclass:: binarybrain.models.Model 11 | :members: 12 | :show-inheritance: 13 | :member-order: bysource 14 | 15 | 16 | Sequential クラス 17 | ---------------------------- 18 | 19 | .. autoclass:: binarybrain.models.Sequential 20 | :members: set_model_list, get_model_list, append 21 | :show-inheritance: 22 | 23 | 24 | Switcher クラス 25 | ---------------------------- 26 | 27 | .. autoclass:: binarybrain.models.Switcher 28 | :members: 29 | :show-inheritance: 30 | 31 | -------------------------------------------------------------------------------- /python/check_install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | print(sys.version) 6 | 7 | import platform 8 | print("Python : {}".format(platform.python_version())) 9 | 10 | import binarybrain as bb 11 | print("BinaryBrain : {}".format(bb.get_version_string())) 12 | print("CUDA version : {}".format(bb.get_cuda_driver_version_string())) 13 | 14 | device_available = bb.is_device_available() 15 | print("GPU available : {}".format(device_available)) 16 | if device_available: 17 | device_count = bb.get_device_count() 18 | print("GPU count : {}".format(device_count)) 19 | for i in range(device_count): 20 | print("GPU[{}] : {}".format(i, bb.get_device_name(i))) 21 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_container.rst: -------------------------------------------------------------------------------- 1 | データ格納 2 | ================================== 3 | 4 | 5 | DType クラス(Enum定義) 6 | ---------------------------- 7 | 8 | .. autoclass:: binarybrain.dtype.DType 9 | :members: 10 | :undoc-members: 11 | :show-inheritance: 12 | 13 | 14 | Tensor クラス 15 | ---------------------------- 16 | 17 | .. autoclass:: binarybrain.tensor.Tensor 18 | :members: 19 | :show-inheritance: 20 | 21 | 22 | FrameBuffer クラス 23 | ---------------------------- 24 | 25 | .. autoclass:: binarybrain.frame_buffer.FrameBuffer 26 | :members: 27 | :show-inheritance: 28 | 29 | Variables クラス 30 | ---------------------------- 31 | 32 | .. autoclass:: binarybrain.variables.Variables 33 | :members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /documents/sphinx/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile livehtml 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | 21 | livehtml: 22 | sphinx-autobuild -b html $(SPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html 23 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_optimizer.rst: -------------------------------------------------------------------------------- 1 | 最適化 (optimizer) 2 | ============================ 3 | 4 | 5 | Optimizer クラス 6 | ---------------------------- 7 | 8 | .. autoclass:: binarybrain.optimizer.Optimizer 9 | :members: 10 | :show-inheritance: 11 | 12 | 13 | OptimizerSgd クラス 14 | ---------------------------- 15 | 16 | .. autoclass:: binarybrain.optimizer.OptimizerSgd 17 | :members: 18 | :show-inheritance: 19 | 20 | 21 | OptimizerAdaGrad クラス 22 | ---------------------------- 23 | 24 | .. autoclass:: binarybrain.optimizer.OptimizerAdaGrad 25 | :members: 26 | :show-inheritance: 27 | 28 | 29 | OptimizerAdam クラス 30 | ---------------------------- 31 | 32 | .. autoclass:: binarybrain.optimizer.OptimizerAdam 33 | :members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_api.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Python API 3 | ===================== 4 | 5 | 概要 6 | ----------- 7 | 8 | Python版モジュールは binarybarin パッケージを import することで利用可能です。 9 | 10 | 11 | 12 | binarybarin パッケージ 13 | --------------------------- 14 | 15 | binarybarin には以下のモジュールが含まれています。 16 | 17 | .. toctree:: 18 | 19 | python_module_object 20 | python_module_container 21 | python_module_models_base 22 | python_module_models_binary 23 | python_module_models_operation 24 | python_module_models_filter 25 | python_module_models_activation 26 | python_module_models_misc 27 | python_module_optimizer 28 | python_module_losses 29 | python_module_metrics 30 | python_module_storage 31 | python_module_verilog 32 | python_module_system 33 | 34 | -------------------------------------------------------------------------------- /samples/cpp/mnist/get_nmist.bat: -------------------------------------------------------------------------------- 1 | if not exist train-images-idx3-ubyte.gz ( 2 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 3 | ) 4 | if not exist train-labels-idx1-ubyte.gz ( 5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 6 | ) 7 | if not exist t10k-images-idx3-ubyte.gz ( 8 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 9 | ) 10 | if not exist t10k-labels-idx1-ubyte.gz ( 11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 12 | ) 13 | 14 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte 15 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte 16 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte 17 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import sys 5 | import subprocess 6 | import shutil 7 | 8 | # change directory 9 | src_path = os.path.dirname(os.path.abspath(sys.argv[0])) 10 | os.chdir(os.path.join(src_path, 'python')) 11 | 12 | # file copy 13 | shutil.rmtree('binarybrain/include', ignore_errors=True) 14 | shutil.rmtree('binarybrain/cuda', ignore_errors=True) 15 | shutil.copytree('../include', 'binarybrain/include') 16 | shutil.copytree('../cuda', 'binarybrain/cuda') 17 | 18 | python_cmd = 'python3' 19 | try: 20 | subprocess.check_call('python3 -V', shell=True) 21 | except subprocess.CalledProcessError as e: 22 | python_cmd = 'python' 23 | 24 | 25 | # run setup.py 26 | args = sys.argv.copy() 27 | args.pop(0) 28 | subprocess.call([python_cmd, 'setup.py'] + args) 29 | -------------------------------------------------------------------------------- /tests/cpp/mnist/get_nmist.bat: -------------------------------------------------------------------------------- 1 | if not exist train-images-idx3-ubyte.gz ( 2 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 3 | ) 4 | if not exist train-labels-idx1-ubyte.gz ( 5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 6 | ) 7 | if not exist t10k-images-idx3-ubyte.gz ( 8 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 9 | ) 10 | if not exist t10k-labels-idx1-ubyte.gz ( 11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 12 | ) 13 | 14 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte 15 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte 16 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte 17 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte 18 | -------------------------------------------------------------------------------- /tests/cpp/xor/main.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // MNIST sample 4 | // 5 | // Copyright (C) 2018 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | void XorMicroMlp(int epoch_size, bool binary_mode); 18 | void StochasticLut6(int epoch_size, bool binary_mode); 19 | 20 | 21 | // メイン関数 22 | int main() 23 | { 24 | omp_set_num_threads(1); 25 | 26 | // XorMicroMlp(65536, true); 27 | StochasticLut6(65536, true); 28 | 29 | return 0; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /documents/sphinx/source/index.rst: -------------------------------------------------------------------------------- 1 | .. BinaryBrain documentation master file, created by 2 | sphinx-quickstart on Mon Sep 16 08:36:13 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to BinaryBrain's documentation! 7 | ============================================= 8 | 9 | 本書は `BinaryBrain Ver4`: https://github.com/ryuz/BinaryBrain/tree/ver4_release のドキュメントです。 10 | 11 | 12 | .. toctree:: 13 | :maxdepth: 4 14 | :caption: Contents: 15 | 16 | introduction 17 | quick_start_cpp 18 | quick_start_python 19 | quick_start_verilog 20 | cpp_api 21 | python_api 22 | informations 23 | 24 | 25 | Indices and tables 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /tests/cpp/mnist/get_nmist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -e train-images-idx3-ubyte.gz ]; then 4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 5 | fi 6 | 7 | if [ ! -e train-labels-idx1-ubyte.gz ]; then 8 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 9 | fi 10 | 11 | if [ ! -e t10k-images-idx3-ubyte.gz ]; then 12 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 13 | fi 14 | 15 | if [ ! -e t10k-labels-idx1-ubyte.gz ]; then 16 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 17 | fi 18 | 19 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte 20 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte 21 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte 22 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte 23 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_binary.rst: -------------------------------------------------------------------------------- 1 | バイナリ変調モデル (Binary modulation) 2 | ====================================== 3 | 4 | models モジュールのうち、バイナリネットを構成する変調にかかわるモデルです。 5 | 6 | 7 | RealToBinary class 8 | ---------------------------- 9 | 10 | .. autoclass:: binarybrain.models.RealToBinary 11 | :members: 12 | :show-inheritance: 13 | 14 | 15 | BinaryToReal class 16 | ---------------------------- 17 | 18 | .. autoclass:: binarybrain.models.BinaryToReal 19 | :members: 20 | :show-inheritance: 21 | 22 | 23 | BitEncode class 24 | ---------------------------- 25 | 26 | .. autoclass:: binarybrain.models.BitEncode 27 | :members: 28 | :show-inheritance: 29 | 30 | 31 | Reduce class 32 | ---------------------------- 33 | 34 | .. autoclass:: binarybrain.models.Reduce 35 | :members: 36 | :show-inheritance: 37 | 38 | -------------------------------------------------------------------------------- /samples/cpp/mnist/get_nmist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -e train-images-idx3-ubyte.gz ]; then 4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 5 | fi 6 | 7 | if [ ! -e train-labels-idx1-ubyte.gz ]; then 8 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 9 | fi 10 | 11 | if [ ! -e t10k-images-idx3-ubyte.gz ]; then 12 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 13 | fi 14 | 15 | if [ ! -e t10k-labels-idx1-ubyte.gz ]; then 16 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 17 | fi 18 | 19 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte 20 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte 21 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte 22 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte 23 | -------------------------------------------------------------------------------- /include/bb/ModelLoader.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2021 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | #include 13 | 14 | #include "bb/Model.h" 15 | #include "bb/ObjectLoader.h" 16 | 17 | 18 | namespace bb 19 | { 20 | 21 | 22 | inline std::shared_ptr Model_LoadFromFile(std::string filename) 23 | { 24 | return std::dynamic_pointer_cast(Object_LoadFromFile(filename)); 25 | } 26 | 27 | 28 | } 29 | 30 | 31 | // end of file 32 | -------------------------------------------------------------------------------- /python/binarybrain/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import sys 5 | if os.name == 'nt' and sys.version_info.major >= 3 and sys.version_info.minor >= 8: 6 | os.add_dll_directory(os.path.join(os.environ['CUDA_PATH'], 'bin')) 7 | 8 | from binarybrain.system import * 9 | 10 | from binarybrain.dtype import * 11 | 12 | from binarybrain.object import * 13 | 14 | from binarybrain.tensor import * 15 | from binarybrain.frame_buffer import * 16 | from binarybrain.variables import * 17 | 18 | from binarybrain.models import * 19 | 20 | from binarybrain.losses import * 21 | from binarybrain.metrics import * 22 | from binarybrain.optimizer import * 23 | 24 | from binarybrain.storage import * 25 | from binarybrain.verilog import * 26 | from binarybrain.hls import * 27 | 28 | -------------------------------------------------------------------------------- /include/bb/ValueGenerator.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | 13 | #include "bb/Object.h" 14 | 15 | 16 | namespace bb { 17 | 18 | template 19 | class ValueGenerator : public Object 20 | { 21 | public: 22 | virtual std::string GetValueGeneratorName(void) const = 0; 23 | 24 | virtual ~ValueGenerator(){} 25 | virtual void Reset(void) = 0; 26 | virtual T GetValue(void) = 0; 27 | }; 28 | 29 | 30 | } 31 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_filter.rst: -------------------------------------------------------------------------------- 1 | 畳み込み/プーリング(Convolution and Pooling) 2 | ================================================ 3 | 4 | 5 | models モジュールの、畳み込みやプーリングなどのフィルタ演算を行うモデルです。 6 | 7 | 8 | Convolution2d クラス 9 | ---------------------------- 10 | 11 | .. autoclass:: binarybrain.models.Convolution2d 12 | :members: 13 | :show-inheritance: 14 | 15 | MaxPooling クラス 16 | ---------------------------- 17 | 18 | .. autoclass:: binarybrain.models.MaxPooling 19 | :members: 20 | :show-inheritance: 21 | 22 | 23 | StochasticMaxPooling クラス 24 | ---------------------------- 25 | 26 | .. autoclass:: binarybrain.models.StochasticMaxPooling 27 | :members: 28 | :show-inheritance: 29 | 30 | 31 | UpSampling クラス 32 | ---------------------------- 33 | 34 | .. autoclass:: binarybrain.models.UpSampling 35 | :members: 36 | :show-inheritance: 37 | 38 | -------------------------------------------------------------------------------- /include/bb/StochasticLutModel.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | 11 | #pragma once 12 | 13 | 14 | #include "bb/SparseModel.h" 15 | 16 | 17 | namespace bb { 18 | 19 | 20 | // 確率的LUT関連の基底クラス 21 | class StochasticLutModel : public SparseModel 22 | { 23 | public: 24 | virtual Tensor &W(void) = 0; 25 | virtual Tensor const &W(void) const = 0; 26 | 27 | virtual Tensor &dW(void) = 0; 28 | virtual Tensor const &dW(void) const = 0; 29 | }; 30 | 31 | 32 | } 33 | -------------------------------------------------------------------------------- /samples/hls/mnist/simple/testbench/tb_mnist.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "mnist_sample.h" 3 | #include "mnist_test_data.h" 4 | 5 | int main() 6 | { 7 | std::cout << "start testbench" << std::endl; 8 | 9 | int n = 0; 10 | int ok = 0; 11 | for ( int i = 0; i < 20; ++i ) { 12 | ap_uint<1> in[28*28]; 13 | for ( int y = 0; y < 28; ++y ) { 14 | for ( int x = 0; x < 28; ++x ) { 15 | in[y*28+x] = test_images[i][y][x]; 16 | } 17 | } 18 | 19 | ap_uint<4> out[0]; 20 | mnist_sample(in, out); 21 | 22 | n++; 23 | if ( out[0] == test_labels[i] ) { 24 | ok++; 25 | } 26 | 27 | std::cout << "out[" << i << "]=" << (int)out[0] << " exp:"<< (int)test_labels[i] << " " << (out[0] == test_labels[i] ? "ok" : "miss") << std::endl; 28 | } 29 | std::cout << "accuracy = " << ok << "/" << n << std::endl; 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_activation.rst: -------------------------------------------------------------------------------- 1 | 活性化(Activation) 2 | ====================================== 3 | 4 | 5 | models モジュールの 活性化層(Activation層()を作るためのモデルです。 6 | 7 | 8 | Binarize クラス 9 | ---------------------------- 10 | 11 | .. autoclass:: binarybrain.models.Binarize 12 | :members: 13 | :show-inheritance: 14 | 15 | 16 | Sigmoid クラス 17 | ---------------------------- 18 | 19 | .. autoclass:: binarybrain.models.Sigmoid 20 | :members: 21 | :show-inheritance: 22 | 23 | 24 | ReLU クラス 25 | ---------------------------- 26 | 27 | .. autoclass:: binarybrain.models.ReLU 28 | :members: 29 | :show-inheritance: 30 | 31 | HardTanh クラス 32 | ---------------------------- 33 | 34 | .. autoclass:: binarybrain.models.HardTanh 35 | :members: 36 | :show-inheritance: 37 | 38 | 39 | Softmax クラス 40 | ---------------------------- 41 | 42 | .. autoclass:: binarybrain.models.Softmax 43 | :members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_metrics.rst: -------------------------------------------------------------------------------- 1 | 評価関数(Metrics functions) 2 | ==================================== 3 | 4 | 5 | Metrics クラス 6 | ---------------------------- 7 | 8 | .. autoclass:: binarybrain.metrics.Metrics 9 | :members: 10 | :show-inheritance: 11 | 12 | 13 | 14 | MetricsMeanSquaredError クラス 15 | ------------------------------------- 16 | 17 | .. autoclass:: binarybrain.metrics.MetricsMeanSquaredError 18 | :members: 19 | :show-inheritance: 20 | 21 | 22 | MetricsCategoricalAccuracy クラス 23 | ------------------------------------ 24 | 25 | .. autoclass:: binarybrain.metrics.MetricsCategoricalAccuracy 26 | :members: 27 | :show-inheritance: 28 | 29 | 30 | MetricsBinaryCategoricalAccuracy クラス 31 | ---------------------------------------------- 32 | 33 | .. autoclass:: binarybrain.metrics.MetricsBinaryCategoricalAccuracy 34 | :members: 35 | :show-inheritance: 36 | 37 | 38 | .. 39 | .. automodule:: binarybrain.metrics 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/verilator/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | # cmake のフラグ 4 | CMAKE_FLAGS = 5 | 6 | 7 | # clang があれば使う 8 | ifeq (,$(shell which clang)) 9 | WITH_CLANG ?= No 10 | else 11 | WITH_CLANG ?= Yes 12 | endif 13 | 14 | # ninja があれば使う 15 | ifeq (,$(shell which ninja)) 16 | WITH_NINJA ?= No 17 | else 18 | WITH_NINJA ?= Yes 19 | endif 20 | 21 | ifeq ($(WITH_CLANG),Yes) 22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake 23 | endif 24 | 25 | ifeq ($(WITH_NINJA),Yes) 26 | CMAKE_FLAGS += -GNinja 27 | endif 28 | 29 | 30 | 31 | all: build run 32 | 33 | .PHONY : build 34 | build: 35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) .. 36 | cmake --build build -j 37 | 38 | .PHONY : clean 39 | clean: 40 | rm -rf build 41 | 42 | .PHONY : mostlyclean 43 | mostlyclean: clean 44 | rm -f img_*.png angle_*.png 45 | rm -f *.fst *.vcd 46 | rm -f coverage.dat 47 | rm -fr annotated 48 | 49 | 50 | .PHONY : run 51 | run: 52 | build/tb_verilator 53 | 54 | .PHONY : coverage 55 | coverage: 56 | verilator_coverage --annotate annotated coverage.dat 57 | 58 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/verilator/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | # cmake のフラグ 4 | CMAKE_FLAGS = 5 | 6 | 7 | # clang があれば使う 8 | ifeq (,$(shell which clang)) 9 | WITH_CLANG ?= No 10 | else 11 | WITH_CLANG ?= Yes 12 | endif 13 | 14 | # ninja があれば使う 15 | ifeq (,$(shell which ninja)) 16 | WITH_NINJA ?= No 17 | else 18 | WITH_NINJA ?= Yes 19 | endif 20 | 21 | ifeq ($(WITH_CLANG),Yes) 22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake 23 | endif 24 | 25 | ifeq ($(WITH_NINJA),Yes) 26 | CMAKE_FLAGS += -GNinja 27 | endif 28 | 29 | 30 | 31 | all: build run 32 | 33 | .PHONY : build 34 | build: 35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) .. 36 | cmake --build build -j 37 | 38 | .PHONY : clean 39 | clean: 40 | rm -rf build 41 | 42 | .PHONY : mostlyclean 43 | mostlyclean: clean 44 | rm -f img_*.png angle_*.png 45 | rm -f *.fst *.vcd 46 | rm -f coverage.dat 47 | rm -fr annotated 48 | 49 | 50 | .PHONY : run 51 | run: 52 | build/tb_verilator 53 | 54 | .PHONY : coverage 55 | coverage: 56 | verilator_coverage --annotate annotated coverage.dat 57 | 58 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | # cmake のフラグ 4 | CMAKE_FLAGS = 5 | 6 | 7 | # clang があれば使う 8 | ifeq (,$(shell which clang)) 9 | WITH_CLANG ?= No 10 | else 11 | WITH_CLANG ?= Yes 12 | endif 13 | 14 | # ninja があれば使う 15 | ifeq (,$(shell which ninja)) 16 | WITH_NINJA ?= No 17 | else 18 | WITH_NINJA ?= Yes 19 | endif 20 | 21 | ifeq ($(WITH_CLANG),Yes) 22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake 23 | endif 24 | 25 | ifeq ($(WITH_NINJA),Yes) 26 | CMAKE_FLAGS += -GNinja 27 | endif 28 | 29 | 30 | 31 | all: build run 32 | 33 | .PHONY : build 34 | build: 35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) .. 36 | cmake --build build -j 37 | 38 | .PHONY : clean 39 | clean: 40 | rm -rf build 41 | 42 | .PHONY : mostlyclean 43 | mostlyclean: clean 44 | rm -f img_*.png angle_*.png 45 | rm -f *.fst *.vcd 46 | rm -f coverage.dat 47 | rm -fr annotated 48 | 49 | 50 | .PHONY : run 51 | run: 52 | build/tb_verilator 53 | 54 | .PHONY : coverage 55 | coverage: 56 | verilator_coverage --annotate annotated coverage.dat 57 | 58 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2018 by Ryuji Fuchikami 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /samples/cpp/diabetes/LoadDiabetes.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #pragma once 5 | 6 | 7 | #include "bb/DataType.h" 8 | 9 | 10 | template 11 | bb::TrainData LoadDiabetes(int num_train=400) 12 | { 13 | const int n = 442; 14 | 15 | std::ifstream ifs_x("diabetes_data.txt"); 16 | std::ifstream ifs_t("diabetes_target.txt"); 17 | 18 | bb::TrainData td; 19 | td.x_shape = bb::indices_t({ 10 }); 20 | td.t_shape = bb::indices_t({ 1 }); 21 | 22 | for (int i = 0; i < num_train; ++i) { 23 | std::vector train(10); 24 | std::vector target(1); 25 | for (int j = 0; j < 10; ++j) { 26 | ifs_x >> train[j]; 27 | } 28 | ifs_t >> target[0]; 29 | 30 | td.x_train.push_back(train); 31 | td.t_train.push_back(target); 32 | } 33 | 34 | for (int i = 0; i < n - num_train; ++i) { 35 | std::vector train(10); 36 | std::vector target(1); 37 | for (int j = 0; j < 10; ++j) { 38 | ifs_x >> train[j]; 39 | } 40 | ifs_t >> target[0]; 41 | 42 | td.x_test.push_back(train); 43 | td.t_test.push_back(target); 44 | } 45 | 46 | return td; 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | # cmake のフラグ 4 | CMAKE_FLAGS = 5 | 6 | 7 | # clang があれば使う 8 | ifeq (,$(shell which clang)) 9 | WITH_CLANG ?= No 10 | else 11 | WITH_CLANG ?= Yes 12 | endif 13 | 14 | # ninja があれば使う 15 | ifeq (,$(shell which ninja)) 16 | WITH_NINJA ?= No 17 | else 18 | WITH_NINJA ?= Yes 19 | endif 20 | 21 | ifeq ($(WITH_CLANG),Yes) 22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake 23 | endif 24 | 25 | ifeq ($(WITH_NINJA),Yes) 26 | CMAKE_FLAGS += -GNinja 27 | endif 28 | 29 | 30 | 31 | all: build run 32 | 33 | .PHONY : build 34 | build: 35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) .. 36 | cmake --build build -j 37 | 38 | .PHONY : clean 39 | clean: 40 | rm -rf build 41 | 42 | .PHONY : mostlyclean 43 | mostlyclean: clean 44 | rm -f img_*.png angle_*.png 45 | rm -f *.fst *.vcd 46 | rm -f coverage.dat 47 | rm -fr annotated 48 | 49 | 50 | .PHONY : run 51 | run: 52 | build/tb_verilator 53 | 54 | .PHONY : coverage 55 | coverage: 56 | verilator_coverage --annotate annotated coverage.dat 57 | 58 | -------------------------------------------------------------------------------- /tests/cpp/diabetes/LoadDiabetes.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #pragma once 5 | 6 | 7 | #include "bb/DataType.h" 8 | 9 | 10 | template 11 | bb::TrainData LoadDiabetes(int num_train=400) 12 | { 13 | const int n = 442; 14 | 15 | std::ifstream ifs_x("diabetes_data.txt"); 16 | std::ifstream ifs_t("diabetes_target.txt"); 17 | 18 | bb::TrainData td; 19 | td.x_shape = bb::indices_t({ 10 }); 20 | td.t_shape = bb::indices_t({ 1 }); 21 | 22 | for (int i = 0; i < num_train; ++i) { 23 | std::vector train(10); 24 | std::vector target(1); 25 | for (int j = 0; j < 10; ++j) { 26 | ifs_x >> train[j]; 27 | } 28 | ifs_t >> target[0]; 29 | 30 | td.x_train.push_back(train); 31 | td.t_train.push_back(target); 32 | } 33 | 34 | for (int i = 0; i < n - num_train; ++i) { 35 | std::vector train(10); 36 | std::vector target(1); 37 | for (int j = 0; j < 10; ++j) { 38 | ifs_x >> train[j]; 39 | } 40 | ifs_t >> target[0]; 41 | 42 | td.x_test.push_back(train); 43 | td.t_test.push_back(target); 44 | } 45 | 46 | return td; 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /documents/sphinx/source/introduction_features.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | 概要 3 | ================= 4 | 5 | 6 | 7 | 特徴 8 | ======= 9 | 10 | BinaryBrain は主に当サイトが研究中の LUT(Look-Up Table)-Networkを実験することを目的に作成した 11 | ディープラーニング用のプラットフォームです。 12 | 13 | LUT-Networkの評価を目的に作成しておりますが、それ以外の用途にも利用可能です。 14 | 15 | 以下の特徴があります 16 | 17 | - ニューラルネットのFPGA化をメインターゲットにしている 18 | - バイナリネットであるも関わらず変調技術によりAutoencodeや回帰分析が可能 19 | - 独自のDifferentiable-LUTモデルにより、LUTの性能を最大限引き出したが学習できる 20 | - 量子化&疎行列のネットワークでパフォーマンスの良い学習が出来る環境を目指している 21 | - C++で記述されている 22 | - GPU(CUDA)に対応している 23 | - 高速でマニアックな自作レイヤーが作りやすい 24 | - Pythonからの利用も可能 25 | 26 | 27 | 基本的な使い方 28 | ================= 29 | 30 | 基本的には C++ や Python で、ネットワークを記述し、学習を行った後に 31 | その結果を verilog などに埋め込んで、FPGA化することを目的に作成しています。 32 | 33 | C++用のCPU版に関してはヘッダオンリーライブラリとなっているため、include 以下にある 34 | ヘッダファイルをインクルードするだけでご利用いただけます。 35 | 36 | GPUを使う場合は、ヘッダ読み込みの際に BB_WITH_CUDA マクロを定義した上で、cuda 以下にある 37 | ライブラリをビルドした上でリンクする必要があります。 38 | 39 | また、BB_WITH_CEREAL マクロを定義すると、途中経過の保存形式に json が利用可能となります。 40 | 41 | Python版を使う場合は、一旦ビルドに成功すれば import するだけで利用可能です。 42 | 43 | 使い方はsamplesなどを参考にしてください。 44 | -------------------------------------------------------------------------------- /python/upload_pypi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git pull 4 | 5 | ./clean.sh 6 | ./copy_src.sh 7 | 8 | python3 setup.py build 9 | python3 setup.py sdist 10 | 11 | while true;do 12 | echo -n "upload testPyPI?(yes/no):" 13 | read answer 14 | case $answer in 15 | yes) 16 | echo "upload testpypi" 17 | # python3 setup.py sdist upload -r testpypi 18 | twine upload -r testpypi dist/* 19 | break 20 | ;; 21 | no) 22 | echo "don't upload" 23 | break 24 | ;; 25 | *) 26 | ;; 27 | esac 28 | done 29 | 30 | 31 | while true;do 32 | echo -n "upload PyPI?(yes/no):" 33 | read answer 34 | case $answer in 35 | yes) 36 | echo "upload pypi" 37 | # python3 setup.py sdist upload -r pypi 38 | twine upload -r pypi dist/* 39 | break 40 | ;; 41 | no) 42 | echo "don't upload" 43 | break 44 | ;; 45 | *) 46 | ;; 47 | esac 48 | done 49 | 50 | -------------------------------------------------------------------------------- /tests/gtest/MetricsCategoricalAccuracyTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "gtest/gtest.h" 4 | #include "bb/MetricsCategoricalAccuracy.h" 5 | 6 | 7 | 8 | TEST(MetricsCategoricalAccuracyTest, testMetricsCategoricalAccuracyTest) 9 | { 10 | bb::FrameBuffer y_buf(2, {3}, BB_TYPE_FP32); 11 | bb::FrameBuffer t_buf(2, {3}, BB_TYPE_FP32); 12 | 13 | y_buf.SetFP32(0, 0, 0.2f); 14 | y_buf.SetFP32(0, 1, 0.4f); 15 | y_buf.SetFP32(0, 2, 0.1f); 16 | 17 | y_buf.SetFP32(1, 0, 0.9f); 18 | y_buf.SetFP32(1, 1, 0.1f); 19 | y_buf.SetFP32(1, 2, 0.5f); 20 | 21 | t_buf.SetFP32(0, 0, 0.0f); 22 | t_buf.SetFP32(0, 1, 1.0f); 23 | t_buf.SetFP32(0, 2, 0.0f); 24 | 25 | t_buf.SetFP32(1, 0, 0.0f); 26 | t_buf.SetFP32(1, 1, 0.0f); 27 | t_buf.SetFP32(1, 2, 1.0f); 28 | 29 | auto accFunc = bb::MetricsCategoricalAccuracy<>::Create(); 30 | accFunc->CalculateMetrics(y_buf, t_buf); 31 | 32 | auto acc = accFunc->GetMetrics(); 33 | EXPECT_DOUBLE_EQ(0.5, acc); 34 | // std::cout << "acc : " << acc << std::endl; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /samples/hls/mnist/simple/README.md: -------------------------------------------------------------------------------- 1 | # MNIST HLS サンプル 2 | 3 | ## 事前準備 4 | 5 | 事前に学習を行ってソースコードを作成する必要があります。 6 | 7 | samples/python/mnist/MnistDifferentiableLutHls.ipynb 8 | 9 | を Jupyter などで実行してください。 10 | 11 | ネットとして MnistDifferentiableLutHls.h と、テストベンチ用のデータとして mnist_test_data.h が生成されれば OK です。 12 | 13 | また、このサンプルは本リポジトリの submodule である jelly を利用しますので、git clone 時に取得していない場合には 14 | 15 | ``` 16 | git submodule update --init --recursive 17 | ``` 18 | 19 | などのコマンドで取得ください。 20 | 21 | また Xilinx の Vitis などのツールが必要ですので、それらがインストールされており、事前設定されているものとします。 22 | 23 | 例えば Linux なら 24 | 25 | ``` 26 | source /tools/Xilinx/Vitis/2021.2/settings64.sh 27 | ``` 28 | 29 | などの実行で事前準備されます(OSやバージョンにより微妙に異なります)。 30 | 31 | 32 | ## 使い方 33 | 34 | ### Cシミュレーション 35 | 36 | 下記のように打つと動きます。 37 | 38 | ``` 39 | make csim 40 | ``` 41 | 42 | ### 合成 43 | 44 | 下記のように打つと動きます。 45 | 46 | ``` 47 | make 48 | ``` 49 | 50 | Vivado にインポートするための zip ファイルが出来上がります。 51 | 52 | 53 | ### コシミュレーション 54 | 55 | 下記のように打つと動きます。 56 | 57 | ``` 58 | make cosim 59 | ``` 60 | 61 | デフォルトで波形確認のための GUI を起動するオプションにしております。 62 | 必要に応じて Makefile を編集ください。 63 | 64 | -------------------------------------------------------------------------------- /samples/cpp/diabetes/main.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // diabetes regression sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "bb/Manager.h" 13 | 14 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size); 15 | void DiabetesRegressionMicroMlpLut(int epoch_size, size_t mini_batch_size, size_t mux_size); 16 | void DiabetesRegressionStochasticLut6(int epoch_size, size_t mini_batch_size); 17 | 18 | 19 | // メイン関数 20 | int main() 21 | { 22 | omp_set_num_threads(4); 23 | 24 | // 普通のDenseAffineでの回帰 25 | DiabetesAffineRegression(64, 16); 26 | 27 | // μMLPによるバイナリネットでの回帰 28 | DiabetesRegressionMicroMlpLut(64, 16, 255); 29 | 30 | // 確率的LUTによる回帰と、バイナリネットでの再生 31 | DiabetesRegressionStochasticLut6(64, 16); 32 | 33 | return 0; 34 | } 35 | 36 | 37 | // end of file 38 | -------------------------------------------------------------------------------- /tests/cpp/diabetes/main.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // diabetes regression sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "bb/Manager.h" 13 | 14 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size); 15 | void DiabetesRegressionMicroMlpLut(int epoch_size, size_t mini_batch_size, size_t mux_size); 16 | void DiabetesRegressionStochasticLut6(int epoch_size, size_t mini_batch_size); 17 | 18 | 19 | // メイン関数 20 | int main() 21 | { 22 | omp_set_num_threads(4); 23 | 24 | // 普通のDenseAffineでの回帰 25 | DiabetesAffineRegression(64, 16); 26 | 27 | // μMLPによるバイナリネットでの回帰 28 | DiabetesRegressionMicroMlpLut(64, 16, 255); 29 | 30 | // 確率的LUTによる回帰と、バイナリネットでの再生 31 | DiabetesRegressionStochasticLut6(64, 16); 32 | 33 | return 0; 34 | } 35 | 36 | 37 | // end of file 38 | -------------------------------------------------------------------------------- /include/bb/Optimizer.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | 13 | #include "bb/Object.h" 14 | #include "bb/Variables.h" 15 | 16 | 17 | namespace bb { 18 | 19 | class Optimizer : public Object 20 | { 21 | public: 22 | virtual ~Optimizer() {} 23 | 24 | public: 25 | virtual std::string GetOptimizerName(void) const = 0; 26 | 27 | virtual void SetVariables(Variables params, Variables grads) = 0; 28 | virtual void ZeroGrad(void) = 0; 29 | virtual void Step(void) = 0; 30 | 31 | virtual void Update(void) 32 | { 33 | this->Step(); 34 | this->ZeroGrad(); 35 | } 36 | 37 | virtual void SetLearningRate(double learning_rate) = 0; 38 | }; 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /documents/sphinx/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | 8 | if "%SPHINXBUILD%" == "" ( 9 | set SPHINXBUILD=sphinx-build 10 | ) 11 | set SOURCEDIR=source 12 | set BUILDDIR=build 13 | 14 | if "%1" == "" goto help 15 | 16 | if "%1" == "livehtml" ( 17 | start http://127.0.0.1:8000 18 | sphinx-autobuild -b html %SOURCEDIR% %BUILDDIR%/html 19 | if errorlevel 1 exit /b 1 20 | goto end 21 | ) 22 | 23 | %SPHINXBUILD% >NUL 2>NUL 24 | if errorlevel 9009 ( 25 | echo. 26 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 27 | echo.installed, then set the SPHINXBUILD environment variable to point 28 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 29 | echo.may add the Sphinx directory to PATH. 30 | echo. 31 | echo.If you don't have Sphinx installed, grab it from 32 | echo.http://sphinx-doc.org/ 33 | exit /b 1 34 | ) 35 | 36 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 37 | goto end 38 | 39 | :help 40 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 41 | 42 | :end 43 | popd 44 | -------------------------------------------------------------------------------- /include/bb/MetricsFunction.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | 15 | #include "bb/Object.h" 16 | #include "bb/FrameBuffer.h" 17 | 18 | 19 | namespace bb { 20 | 21 | 22 | class MetricsFunction : public Object 23 | { 24 | 25 | public: 26 | virtual ~MetricsFunction() {} 27 | 28 | virtual std::string GetMetricsFunctionName(void) const = 0; 29 | 30 | virtual std::string GetMetricsString(void) { return "accuracy"; } 31 | 32 | virtual void Clear(void) = 0; 33 | virtual double GetMetrics(void) const = 0; 34 | virtual void CalculateMetrics(FrameBuffer y, FrameBuffer t) = 0; 35 | }; 36 | 37 | 38 | } 39 | 40 | -------------------------------------------------------------------------------- /cuda/Manager.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_runtime.h" 5 | #include "device_launch_parameters.h" 6 | 7 | #include "bbcu/bbcu.h" 8 | #include "bbcu/bbcu_util.h" 9 | 10 | 11 | 12 | static bool bbcu_HostOnly = false; 13 | 14 | 15 | BBCU_DLL_EXPORT int bbcu_GetDeviceCount(void) 16 | { 17 | int dev_count = 0; 18 | auto status = cudaGetDeviceCount(&dev_count); 19 | if (status != cudaSuccess) { 20 | dev_count = 0; 21 | } 22 | return dev_count; 23 | } 24 | 25 | BBCU_DLL_EXPORT int bbcu_GetDevice(void) 26 | { 27 | int device; 28 | BB_CUDA_SAFE_CALL(cudaGetDevice(&device)); 29 | return device; 30 | } 31 | 32 | BBCU_DLL_EXPORT void bbcu_SetDevice(int device) 33 | { 34 | BB_CUDA_SAFE_CALL(cudaSetDevice(device)); 35 | } 36 | 37 | 38 | 39 | 40 | BBCU_DLL_EXPORT void bbcu_SetHostOnly(bool hostOnly) 41 | { 42 | bbcu_HostOnly = hostOnly; 43 | } 44 | 45 | 46 | BBCU_DLL_EXPORT bool bbcu_IsHostOnly(void) 47 | { 48 | return bbcu_HostOnly; 49 | } 50 | 51 | 52 | BBCU_DLL_EXPORT bool bbcu_IsDeviceAvailable(void) 53 | { 54 | return !bbcu_HostOnly; 55 | } 56 | 57 | 58 | // end of file 59 | -------------------------------------------------------------------------------- /cuda/bbcu.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.7.34003.232 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64 15 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64 16 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64 17 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {7B3BD921-1A3E-4E24-870E-8D07D71B0F91} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/verilator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | project(tb_verilator) 5 | 6 | 7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT}) 8 | if (NOT verilator_FOUND) 9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") 10 | endif() 11 | 12 | find_package(OpenCV REQUIRED) 13 | if (NOT OpenCV_FOUND) 14 | message(FATAL_ERROR "OpenCV was not found. Either install it") 15 | endif() 16 | 17 | 18 | add_executable(tb_verilator ./tb_verilator.cpp) 19 | 20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include") 21 | 22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS}) 23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS}) 24 | 25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17") 26 | add_compile_definitions(WITH_OPENCV2) 27 | target_link_libraries(tb_verilator PUBLIC "-pthread") 28 | 29 | verilate(tb_verilator 30 | # COVERAGE 31 | # TRACE 32 | INCLUDE_DIRS "." 33 | VERILATOR_ARGS -f verilator_cmd.txt -Os 34 | SOURCES ./tb_verilator.sv ../MnistLutCnn.v) 35 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/verilator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | project(tb_verilator) 5 | 6 | 7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT}) 8 | if (NOT verilator_FOUND) 9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") 10 | endif() 11 | 12 | find_package(OpenCV REQUIRED) 13 | if (NOT OpenCV_FOUND) 14 | message(FATAL_ERROR "OpenCV was not found. Either install it") 15 | endif() 16 | 17 | 18 | add_executable(tb_verilator ./tb_verilator.cpp) 19 | 20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include") 21 | 22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS}) 23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS}) 24 | 25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17") 26 | #add_compile_definitions(WITH_OPENCV2) 27 | target_link_libraries(tb_verilator PUBLIC "-pthread") 28 | 29 | verilate(tb_verilator 30 | # COVERAGE 31 | # TRACE 32 | INCLUDE_DIRS "." 33 | VERILATOR_ARGS -f verilator_cmd.txt 34 | SOURCES ./tb_verilator.sv ../MnistLutSimple.v) 35 | -------------------------------------------------------------------------------- /python/projects/thrust/PyBinaryBrainThrust.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.7.34003.232 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "core.vcxproj", "{ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Debug|x64.ActiveCfg = Debug|x64 15 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Debug|x64.Build.0 = Debug|x64 16 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Release|x64.ActiveCfg = Release|x64 17 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {2C10458D-2E46-4D6D-AB03-166526A12BF9} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | project(tb_verilator) 5 | 6 | 7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT}) 8 | if (NOT verilator_FOUND) 9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") 10 | endif() 11 | 12 | find_package(OpenCV REQUIRED) 13 | if (NOT OpenCV_FOUND) 14 | message(FATAL_ERROR "OpenCV was not found. Either install it") 15 | endif() 16 | 17 | 18 | add_executable(tb_verilator ./tb_verilator.cpp) 19 | 20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include") 21 | 22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS}) 23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS}) 24 | 25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17") 26 | # add_compile_definitions(WITH_OPENCV2) 27 | target_link_libraries(tb_verilator PUBLIC "-pthread") 28 | 29 | verilate(tb_verilator 30 | # COVERAGE 31 | # TRACE 32 | INCLUDE_DIRS "." 33 | VERILATOR_ARGS -f verilator_cmd.txt -Os 34 | SOURCES ./tb_verilator.sv ../MnistSemanticSegmentation.v) 35 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | project(tb_verilator) 5 | 6 | 7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT}) 8 | if (NOT verilator_FOUND) 9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable") 10 | endif() 11 | 12 | find_package(OpenCV REQUIRED) 13 | if (NOT OpenCV_FOUND) 14 | message(FATAL_ERROR "OpenCV was not found. Either install it") 15 | endif() 16 | 17 | 18 | add_executable(tb_verilator ./tb_verilator.cpp) 19 | 20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include") 21 | 22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS}) 23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS}) 24 | 25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17") 26 | # add_compile_definitions(WITH_OPENCV2) 27 | target_link_libraries(tb_verilator PUBLIC "-pthread") 28 | 29 | verilate(tb_verilator 30 | # COVERAGE 31 | # TRACE 32 | INCLUDE_DIRS "." 33 | VERILATOR_ARGS -f verilator_cmd.txt -Os 34 | SOURCES ./tb_verilator.sv ../MnistSegmentationAndClassification.v) 35 | -------------------------------------------------------------------------------- /documents/sphinx/source/locale/en/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2019, Ryuji Fuchikami 3 | # This file is distributed under the same license as the BinaryBrain 4 | # package. 5 | # FIRST AUTHOR , 2019. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryBrain \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2019-09-22 08:35+0900\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language-Team: LANGUAGE \n" 16 | "MIME-Version: 1.0\n" 17 | "Content-Type: text/plain; charset=utf-8\n" 18 | "Content-Transfer-Encoding: 8bit\n" 19 | "Generated-By: Babel 2.7.0\n" 20 | 21 | #: ../../source/index.rst:7 22 | msgid "Welcome to BinaryBrain's documentation!" 23 | msgstr "" 24 | 25 | #: ../../source/index.rst:9 26 | msgid "Contents:" 27 | msgstr "" 28 | 29 | #: ../../source/index.rst:19 30 | msgid "Indices and tables" 31 | msgstr "" 32 | 33 | #: ../../source/index.rst:21 34 | msgid ":ref:`genindex`" 35 | msgstr "" 36 | 37 | #: ../../source/index.rst:22 38 | msgid ":ref:`modindex`" 39 | msgstr "" 40 | 41 | #: ../../source/index.rst:23 42 | msgid ":ref:`search`" 43 | msgstr "" 44 | 45 | -------------------------------------------------------------------------------- /documents/sphinx/source/locale/ja/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2019, Ryuji Fuchikami 3 | # This file is distributed under the same license as the BinaryBrain 4 | # package. 5 | # FIRST AUTHOR , 2019. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryBrain \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2019-09-22 08:35+0900\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language-Team: LANGUAGE \n" 16 | "MIME-Version: 1.0\n" 17 | "Content-Type: text/plain; charset=utf-8\n" 18 | "Content-Transfer-Encoding: 8bit\n" 19 | "Generated-By: Babel 2.7.0\n" 20 | 21 | #: ../../source/index.rst:7 22 | msgid "Welcome to BinaryBrain's documentation!" 23 | msgstr "" 24 | 25 | #: ../../source/index.rst:9 26 | msgid "Contents:" 27 | msgstr "" 28 | 29 | #: ../../source/index.rst:19 30 | msgid "Indices and tables" 31 | msgstr "" 32 | 33 | #: ../../source/index.rst:21 34 | msgid ":ref:`genindex`" 35 | msgstr "" 36 | 37 | #: ../../source/index.rst:22 38 | msgid ":ref:`modindex`" 39 | msgstr "" 40 | 41 | #: ../../source/index.rst:23 42 | msgid ":ref:`search`" 43 | msgstr "" 44 | 45 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_losses.rst: -------------------------------------------------------------------------------- 1 | 損失関数(Loss functions) 2 | ================================== 3 | 4 | LossFunction クラス 5 | -------------------------------------- 6 | 7 | .. autoclass:: binarybrain.losses.LossFunction 8 | :members: 9 | :show-inheritance: 10 | 11 | 12 | LossMeanSquaredError クラス 13 | -------------------------------------- 14 | 15 | .. autoclass:: binarybrain.losses.LossMeanSquaredError 16 | :members: 17 | :show-inheritance: 18 | 19 | 20 | LossCrossEntropy クラス 21 | -------------------------------------- 22 | 23 | .. autoclass:: binarybrain.losses.LossCrossEntropy 24 | :members: 25 | :show-inheritance: 26 | 27 | LossBinaryCrossEntropy クラス 28 | -------------------------------------- 29 | 30 | .. autoclass:: binarybrain.losses.LossBinaryCrossEntropy 31 | :members: 32 | :show-inheritance: 33 | 34 | LossSoftmaxCrossEntropy クラス 35 | -------------------------------------- 36 | 37 | .. autoclass:: binarybrain.losses.LossSoftmaxCrossEntropy 38 | :members: 39 | :show-inheritance: 40 | 41 | 42 | LossSigmoidCrossEntropy クラス 43 | -------------------------------------- 44 | 45 | .. autoclass:: binarybrain.losses.LossSigmoidCrossEntropy 46 | :members: 47 | :show-inheritance: 48 | 49 | -------------------------------------------------------------------------------- /documents/sphinx/source/locale/ja/LC_MESSAGES/sample_rtl.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2019, Ryuji Fuchikami 3 | # This file is distributed under the same license as the BinaryBrain 4 | # package. 5 | # FIRST AUTHOR , 2019. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryBrain 3.9\n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2019-09-29 19:33+0900\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language-Team: LANGUAGE \n" 16 | "MIME-Version: 1.0\n" 17 | "Content-Type: text/plain; charset=utf-8\n" 18 | "Content-Transfer-Encoding: 8bit\n" 19 | "Generated-By: Babel 2.7.0\n" 20 | 21 | #: ../../source/sample_rtl.rst:3 22 | msgid "RTLの試し方" 23 | msgstr "" 24 | 25 | #: ../../source/sample_rtl.rst:7 26 | msgid "sampleの動かし方" 27 | msgstr "" 28 | 29 | #: ../../source/sample_rtl.rst:9 30 | msgid "C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。 出力したRTLの試し方は" 31 | msgstr "" 32 | 33 | #: ../../source/sample_rtl.rst:12 34 | msgid "https://github.com/ryuz/BinaryBrain/tree/master/samples/mnist/verilog" 35 | msgstr "" 36 | 37 | #: ../../source/sample_rtl.rst:15 38 | msgid "の readme.txt を参照ください。" 39 | msgstr "" 40 | 41 | -------------------------------------------------------------------------------- /documents/sphinx/source/locale/en/LC_MESSAGES/sample_rtl.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2019, Ryuji Fuchikami 3 | # This file is distributed under the same license as the BinaryBrain 4 | # package. 5 | # FIRST AUTHOR , 2019. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryBrain 3.9\n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2019-09-29 19:33+0900\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language-Team: LANGUAGE \n" 16 | "MIME-Version: 1.0\n" 17 | "Content-Type: text/plain; charset=utf-8\n" 18 | "Content-Transfer-Encoding: 8bit\n" 19 | "Generated-By: Babel 2.7.0\n" 20 | 21 | #: ../../source/sample_rtl.rst:3 22 | msgid "RTLの試し方" 23 | msgstr "Evaluation for FPGA" 24 | 25 | #: ../../source/sample_rtl.rst:7 26 | msgid "sampleの動かし方" 27 | msgstr "sample program" 28 | 29 | #: ../../source/sample_rtl.rst:9 30 | msgid "C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。 出力したRTLの試し方は" 31 | msgstr "" 32 | 33 | #: ../../source/sample_rtl.rst:12 34 | msgid "https://github.com/ryuz/BinaryBrain/tree/master/samples/mnist/verilog" 35 | msgstr "" 36 | 37 | #: ../../source/sample_rtl.rst:15 38 | msgid "の readme.txt を参照ください。" 39 | msgstr "" 40 | 41 | -------------------------------------------------------------------------------- /python/binarybrain/variables.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import binarybrain as bb 4 | import binarybrain.core as core 5 | import numpy as np 6 | from typing import List 7 | 8 | 9 | class Variables(): 10 | """Variables class 11 | 12 | 学習の為の Optimizer と実際の学習ターゲットの変数の橋渡しに利用されるクラス。 13 | 内部的には各モデル内の重みや勾配を保有する Tensor をまとめて保持している。 14 | """ 15 | 16 | def __init__(self): 17 | self.variables = core.Variables() 18 | 19 | @staticmethod 20 | def from_core(variables): 21 | new_variables = Variables() 22 | new_variables.variables = variables 23 | return new_variables 24 | 25 | def get_core(self): 26 | return self.variables 27 | 28 | def append(self, variables): 29 | """ 変数を追加 30 | 31 | Args: 32 | variables (Variables) : 追加する変数 33 | """ 34 | self.variables.push_back(variables.get_core()) 35 | 36 | def get_size(self): 37 | return self.variables.get_size() 38 | 39 | def at(self, item): 40 | return self.variables.at(item) 41 | 42 | def __len__(self): 43 | return self.variables.get_size() 44 | 45 | def __getitem__(self, item): 46 | return self.variables.at(item) 47 | 48 | -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/testbench/tb_mnist_simple.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "mnist_simple.h" 5 | 6 | int main() 7 | { 8 | // データファイル読み込み 9 | std::ifstream is("../../../../testbench/mnist_hls_test.txt"); 10 | if( !is ) { std::cout << "open error : mnist_hls_test.txt" << std::endl; return 1; } 11 | 12 | int n = 0; 13 | int ok = 0; 14 | for ( int i = 0; i < 64; ++i ) { 15 | // データ読み込み 16 | ap_uint<28*28> in_data; 17 | int label; 18 | is >> label; 19 | for ( int j = 0; j < 28*28; ++j ) { 20 | int val; 21 | is >> val; 22 | in_data[j] = val; 23 | } 24 | 25 | // テスト 26 | auto out_data = mnist_simple(in_data); 27 | 28 | // 確認 29 | ap_uint<10> exp_data = (1 << label); 30 | if ( out_data == exp_data ) { 31 | std::cout << "[OK] "; 32 | ok++; 33 | } else { 34 | std::cout << "[miss] "; 35 | } 36 | std::cout << "label: " << std::dec << label << " out: 0x" << std::hex << (int)out_data << std::endl; 37 | n++; 38 | } 39 | std::cout << "total : " << std::dec << ok << "/" << n << std::endl; 40 | 41 | // とりあえず1/3整合していればOKとする 42 | assert(ok >= n/3); 43 | 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /samples/hls/mnist/simple/src/mnist_sample.cpp: -------------------------------------------------------------------------------- 1 | #include "mnist_sample.h" 2 | #include "MnistDifferentiableLutHls.h" 3 | 4 | 5 | void MnistDepthwiseAffine_layer(int y[10], const ap_uint<10*DWA_DEPTH> x) 6 | { 7 | for ( int i = 0; i < 10; ++i ) { 8 | #pragma HLS unroll 9 | int sum = (int)b_tbl[i]; 10 | for ( int j = 0; j < DWA_DEPTH; ++j ) { 11 | #pragma HLS unroll 12 | sum += (int)x[i*DWA_DEPTH + j] * (int)W_tbl[i][j]; 13 | } 14 | y[i] = sum; 15 | } 16 | } 17 | 18 | 19 | // kernel 20 | void mnist_sample( 21 | const ap_uint<1> in[28*28], 22 | ap_uint<4> out[1] 23 | ) 24 | { 25 | // input 26 | ap_uint<28*28> x0; 27 | for ( int i = 0; i < 28*28; ++i ) { 28 | x0[i] = in[i]; 29 | } 30 | auto x1 = MnistLut_layer1(x0); 31 | auto x2 = MnistLut_layer2(x1); 32 | auto x3 = MnistLut_layer3(x2); 33 | 34 | // Depthwise Affine 35 | int y[10]; 36 | MnistDepthwiseAffine_layer(y, x3); 37 | 38 | // argmax 39 | int max_val = -32768; 40 | ap_uint<4> max_idx = 0; 41 | for ( int i = 0; i < 10; ++i ) { 42 | if ( y[i] > max_val ) { 43 | max_val = y[i]; 44 | max_idx = i; 45 | } 46 | } 47 | 48 | // output 49 | out[0] = max_idx; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /include/bb/Version.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018-2024 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | #include 13 | 14 | 15 | namespace bb 16 | { 17 | 18 | #define BB_MAJOR_VERSION 4 19 | #define BB_MINOR_VERSION 3 20 | #define BB_REVISION_NUMBER 2 21 | 22 | #define BB_VERSION (std::to_string(BB_MAJOR_VERSION) + "." + std::to_string(BB_MINOR_VERSION) + "." + std::to_string(BB_REVISION_NUMBER)) 23 | 24 | 25 | // バージョン取得 26 | inline void GetVersion(int *major_version, int *minor_version=nullptr, int *revision_number=nullptr) 27 | { 28 | if ( major_version != nullptr ) { *major_version = BB_MAJOR_VERSION; } 29 | if ( minor_version != nullptr ) { *minor_version = BB_MINOR_VERSION; } 30 | if ( revision_number != nullptr ) { *revision_number = BB_REVISION_NUMBER; } 31 | } 32 | 33 | // バージョン文字列取得 34 | inline std::string GetVersionString(void) 35 | { 36 | return BB_VERSION; 37 | } 38 | 39 | 40 | } 41 | 42 | 43 | // end of file 44 | -------------------------------------------------------------------------------- /samples/cpp/diabetes/sample_diabetes.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | ソース ファイル 20 | 21 | 22 | ソース ファイル 23 | 24 | 25 | ソース ファイル 26 | 27 | 28 | ソース ファイル 29 | 30 | 31 | -------------------------------------------------------------------------------- /documents/sphinx/source/python_module_models_operation.rst: -------------------------------------------------------------------------------- 1 | 演算モデル (Operation models) 2 | ====================================== 3 | 4 | 5 | models モジュールには、ネットワークを構成するための各種演算モデルがあります。 6 | 7 | 8 | DifferentiableLut クラス 9 | ---------------------------- 10 | 11 | .. autoclass:: binarybrain.models.DifferentiableLut 12 | :members: 13 | :show-inheritance: 14 | 15 | 16 | AverageLut クラス 17 | ---------------------------- 18 | 19 | .. autoclass:: binarybrain.models.AverageLut 20 | :members: 21 | :show-inheritance: 22 | 23 | BinaryLut クラス 24 | ---------------------------- 25 | 26 | .. autoclass:: binarybrain.models.BinaryLut 27 | :members: 28 | :show-inheritance: 29 | 30 | 31 | DenseAffine クラス 32 | ---------------------------- 33 | 34 | .. autoclass:: binarybrain.models.DenseAffine 35 | :members: 36 | :show-inheritance: 37 | 38 | DenseAffineQuantize クラス 39 | ----------------------------- 40 | 41 | .. autoclass:: binarybrain.models.DenseAffineQuantize 42 | :members: 43 | :show-inheritance: 44 | 45 | 46 | 47 | DepthwiseDenseAffine クラス 48 | ------------------------------ 49 | 50 | .. autoclass:: binarybrain.models.DepthwiseDenseAffine 51 | :members: 52 | :show-inheritance: 53 | 54 | 55 | DepthwiseDenseAffineQuantize クラス 56 | -------------------------------------- 57 | 58 | .. autoclass:: binarybrain.models.DepthwiseDenseAffineQuantize 59 | :members: 60 | :show-inheritance: 61 | 62 | -------------------------------------------------------------------------------- /include/bb/LossFunction.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | 13 | #include 14 | 15 | 16 | #include "bb/Object.h" 17 | #include "bb/FrameBuffer.h" 18 | 19 | 20 | namespace bb { 21 | 22 | 23 | class LossFunction : public Object 24 | { 25 | public: 26 | virtual std::string GetLossFunctionName(void) const = 0; 27 | 28 | /** 29 | * @brief 積算していた損失をクリア 30 | * @detail 積算していた損失をクリアする 31 | */ 32 | virtual void Clear(void) = 0; 33 | 34 | /** 35 | * @brief 損失取得 36 | * @detail 損失取得 37 | * 損失の取得にGPUからのメモリコピーが発生する可能性があるので 38 | * CalculateLoss とは別メソッドにする 39 | * @return 積算していた損失を返す 40 | */ 41 | virtual double GetLoss(void) const = 0; 42 | 43 | /** 44 | * @brief 損失計算 45 | * @detail 損失を計算する 46 | * @param y 結果の入力 47 | * @param t 期待値 48 | * @return backwardする誤差勾配を返す 49 | */ 50 | virtual FrameBuffer CalculateLoss(FrameBuffer y_buf, FrameBuffer t_buf, index_t mini_batch_size) = 0; 51 | }; 52 | 53 | 54 | } 55 | 56 | -------------------------------------------------------------------------------- /samples/verilog/mnist/common/bb_lut.v: -------------------------------------------------------------------------------- 1 | // LUT 2 | 3 | 4 | `timescale 1ns / 1ps 5 | `default_nettype none 6 | 7 | 8 | 9 | module bb_lut 10 | #( 11 | parameter DEVICE = "RTL", 12 | parameter N = 6, 13 | parameter [(1< 2 | #include 3 | #include 4 | #include 5 | 6 | #include "bbcu/bbcu.h" 7 | #include "bbcu/bbcu_util.h" 8 | 9 | int Test_MicroMlp_Forward(void); 10 | int Test_MicroMlp_Backward(void); 11 | 12 | int Test_StochasticLut6_Forward(void); 13 | int Test_StochasticLut6_Backward(void); 14 | 15 | void bbcu_ShufleTest(void); 16 | 17 | int main() 18 | { 19 | bbcu_ShufleTest(); 20 | getchar(); 21 | return 0; 22 | 23 | void* ptr0 = bbcu_LocalHeap_Malloc(2*1024); 24 | void* ptr1 = bbcu_LocalHeap_Malloc(1*1024); 25 | void* ptr2 = bbcu_LocalHeap_Malloc(3*1024); 26 | 27 | bbcu_LocalHeap_Free(ptr0); 28 | bbcu_LocalHeap_Free(ptr2); 29 | 30 | void* ptr00 = bbcu_LocalHeap_Malloc(2*1024); 31 | void* ptr02 = bbcu_LocalHeap_Malloc(3*1024); 32 | 33 | bbcu_LocalHeap_Free(ptr00); 34 | bbcu_LocalHeap_Free(ptr1); 35 | bbcu_LocalHeap_Free(ptr02); 36 | 37 | #if 0 38 | std::cout << "---- Test_MicroMlp_Forward ----" << std::endl; 39 | Test_MicroMlp_Forward(); 40 | 41 | std::cout << "---- Test_MicroMlp_Backward ----" << std::endl; 42 | Test_MicroMlp_Backward(); 43 | #endif 44 | 45 | #if 1 46 | std::cout << "---- Test_StochasticLut6_Forward ----" << std::endl; 47 | Test_StochasticLut6_Forward(); 48 | 49 | // std::cout << "---- Test_StochasticLut6_Backward ----" << std::endl; 50 | // Test_StochasticLut6_Backward(); 51 | #endif 52 | 53 | return 0; 54 | } 55 | 56 | 57 | -------------------------------------------------------------------------------- /python/binarybrain/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | PYTHON_PATH = /usr/include/python3.6m 4 | CEREAL_PATH = ../cereal-1.2.2 5 | 6 | TARGET = core$(shell python3-config --extension-suffix) 7 | 8 | CUARCH = -gencode=arch=compute_35,code=sm_35 \ 9 | -gencode=arch=compute_50,code=sm_50 \ 10 | -gencode=arch=compute_60,code=sm_60 \ 11 | -gencode=arch=compute_61,code=sm_61 \ 12 | -gencode=arch=compute_75,code=sm_75 13 | 14 | CFLAGS = -DBB_ASSERT_EXCEPTION=1 -DBB_WITH_CEREAL=1 -DBB_WITH_CUDA=1 \ 15 | -I$(CEREAL_PATH)/include -Iinclude -Ibinarybrain/cuda \ 16 | -Xcompiler -pthread -Xcompiler -mavx2 -Xcompiler -mfma \ 17 | -Xcompiler -fopenmp -Xcompiler -std=c++14 -Xcompiler -fPIC \ 18 | $(CUARCH) \ 19 | $(shell python3 -m pybind11 --includes) 20 | 21 | CUFLAGS = -DBB_ASSERT_EXCEPTION=1 -DBB_WITH_CEREAL=1 -DBB_WITH_CUDA=1 \ 22 | -Iinclude -Icuda \ 23 | -I$(CEREAL_PATH) \ 24 | $(CUARCH) \ 25 | -std=c++11 -Xcompiler -fPIC \ 26 | $(shell python3 -m pybind11 --includes) 27 | 28 | LDFLAG = -Xcompiler -pthread -Xcompiler -fopenmp -lstdc++ -lm -lcublas 29 | 30 | 31 | .PHONY: all 32 | all: $(TARGET) 33 | 34 | .PHONY: clean 35 | clean: 36 | rm -f $(TARGET) core_bbcu.o core_main.o 37 | 38 | $(TARGET): core_bbcu.o core_main.o 39 | nvcc -shared core_bbcu.o core_main.o $(LDFLAG) -o $(TARGET) 40 | 41 | core_main.o: src/core_main.cpp 42 | nvcc $(CFLAGS) -c src/core_main.cpp -o core_main.o 43 | 44 | core_bbcu.o: src/core_bbcu.cu 45 | nvcc $(CUFLAGS) -c src/core_bbcu.cu -o core_bbcu.o 46 | 47 | -------------------------------------------------------------------------------- /include/bb/CudaUtility.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | 13 | #ifdef BB_WITH_CUDA 14 | #include "cuda_runtime.h" 15 | #include "bbcu/bbcu_util.h" 16 | #endif 17 | 18 | #include "bb/DataType.h" 19 | #include "bb/Utility.h" 20 | 21 | 22 | namespace bb { 23 | 24 | 25 | #ifdef BB_WITH_CUDA 26 | 27 | class CudaDevicePush 28 | { 29 | protected: 30 | int m_old_device; 31 | int m_device; 32 | 33 | public: 34 | CudaDevicePush(int device) 35 | { 36 | m_device = device; 37 | if ( m_device >= 0 ) { 38 | BB_CUDA_SAFE_CALL(cudaGetDevice(&m_old_device)); 39 | if ( m_old_device != m_device ) { 40 | BB_CUDA_SAFE_CALL(cudaSetDevice(m_device)); 41 | } 42 | } 43 | } 44 | 45 | ~CudaDevicePush() 46 | { 47 | if ( m_device >= 0 && (m_old_device != m_device) ) { 48 | BB_CUDA_SAFE_CALL(cudaSetDevice(m_old_device)); 49 | } 50 | } 51 | }; 52 | 53 | #else 54 | 55 | class CudaDevicePush 56 | { 57 | public: 58 | CudaDevicePush(int device) {} 59 | ~CudaDevicePush() {} 60 | }; 61 | 62 | #endif 63 | 64 | 65 | } 66 | 67 | // end of file 68 | -------------------------------------------------------------------------------- /tests/gtest/cudaMatrixColwiseSumTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cuda_runtime.h" 8 | #include "device_launch_parameters.h" 9 | 10 | #include "gtest/gtest.h" 11 | 12 | #if BB_WITH_CUDA 13 | 14 | #include "bb/FrameBuffer.h" 15 | #include "bbcu/bbcu.h" 16 | 17 | 18 | TEST(cudaMatrixColwiseSumTest, test_cudaMatrixColwiseSum) 19 | { 20 | int const node_size = 2; 21 | int const frame_size = 3; 22 | 23 | bb::FrameBuffer x_buf(frame_size, {node_size}, BB_TYPE_FP32); 24 | bb::Tensor y_buf({node_size}, BB_TYPE_FP32); 25 | 26 | { 27 | x_buf.SetFP32(0, 0, 1); 28 | x_buf.SetFP32(1, 0, 2); 29 | x_buf.SetFP32(2, 0, 3); 30 | 31 | x_buf.SetFP32(0, 1, 4); 32 | x_buf.SetFP32(1, 1, 5); 33 | x_buf.SetFP32(2, 1, 6); 34 | } 35 | 36 | y_buf = 0; 37 | 38 | { 39 | auto x_ptr = x_buf.LockDeviceMemoryConst(); 40 | auto y_ptr = y_buf.LockDeviceMemory(); 41 | bbcu_fp32_MatrixColwiseSum 42 | ( 43 | (float const *)x_ptr.GetAddr(), 44 | (float *)y_ptr.GetAddr(), 45 | (int )x_buf.GetNodeSize(), 46 | (int )x_buf.GetFrameSize(), 47 | (int )(x_buf.GetFrameStride() / sizeof(float)) 48 | ); 49 | } 50 | 51 | { 52 | auto y_ptr = y_buf.LockConst(); 53 | 54 | EXPECT_FLOAT_EQ(1+2+3, y_ptr(0)); 55 | EXPECT_FLOAT_EQ(4+5+6, y_ptr(1)); 56 | } 57 | } 58 | 59 | 60 | #endif 61 | 62 | -------------------------------------------------------------------------------- /samples/hls/mnist/simple/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # setting 3 | export HLS_TARGET = mnist_sample 4 | export HLS_SOLUTION = solution_1 5 | export DEVICE_PART = xczu3eg-sbva484-1-i 6 | export CLOCK_PERIOD = 4 7 | 8 | export CSIM_OPTIONS ?= 9 | export COSIM_OPTIONS ?= -trace_level all -wave_debug 10 | 11 | 12 | # directories 13 | JELLY_DIR = ../../../../jelly 14 | SOURCE_DIR = src 15 | TESTBENCH_DIR = testbench 16 | TARGET_DIR = $(HLS_TARGET)/$(HLS_SOLUTION) 17 | CSIM_DIR = $(TARGET_DIR)/csim 18 | SYN_DIR = $(TARGET_DIR)/syn 19 | COSIM_DIR = $(TARGET_DIR)/sim 20 | IMPL_DIR = $(TARGET_DIR)/impl 21 | EXPORT_ZIP = $(IMPL_DIR)/export.zip 22 | 23 | # flags 24 | export SOURCE_FLAGS = -I$(SOURCE_DIR) 25 | export TESTBENCH_FLAGS = -I$(SOURCE_DIR) -I$(TESTBENCH_DIR) 26 | 27 | # source 28 | SOURCES = $(SOURCE_DIR)/mnist_sample.cpp 29 | export SOURCES 30 | 31 | # testbanch 32 | TESTBENCHS = $(TESTBENCH_DIR)/tb_mnist.cpp 33 | export TESTBENCHS 34 | 35 | # rules 36 | .PHONY: all 37 | all: $(EXPORT_ZIP) 38 | 39 | $(TARGET_DIR): 40 | -rm -rf $(HLS_TARGET) 41 | vitis_hls $(JELLY_DIR)/scripts/hls_create_project.tcl 42 | 43 | $(EXPORT_ZIP): $(TARGET_DIR) $(SOURCES) 44 | vitis_hls $(JELLY_DIR)/scripts/hls_csynth.tcl 45 | 46 | .PHONY: clean 47 | clean: 48 | -rm -rf $(HLS_TARGET) 49 | -rm *.log 50 | 51 | 52 | .PHONY: create 53 | create: $(TARGET_DIR) 54 | 55 | .PHONY: csynth 56 | csynth: $(EXPORT_ZIP) 57 | 58 | .PHONY: csim 59 | csim: $(TARGET_DIR) 60 | vitis_hls $(JELLY_DIR)/scripts/hls_csim.tcl 61 | 62 | .PHONY: cosim 63 | cosim: $(TARGET_DIR) $(EXPORT_ZIP) 64 | vitis_hls $(JELLY_DIR)/scripts/hls_cosim.tcl 65 | -------------------------------------------------------------------------------- /tests/gtest/cudaMatrixRowwiseSetVectorTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cuda_runtime.h" 8 | #include "device_launch_parameters.h" 9 | 10 | #include "gtest/gtest.h" 11 | 12 | #if BB_WITH_CUDA 13 | 14 | #include "bb/FrameBuffer.h" 15 | #include "bbcu/bbcu.h" 16 | 17 | TEST(cudacudaMatrixRowwiseSetVectorTest, test_cudaMatrixRowwiseSetVector) 18 | { 19 | int const node_size = 513; 20 | int const frame_size = 1021; 21 | 22 | bb::Tensor x_buf(BB_TYPE_FP32, node_size); 23 | bb::FrameBuffer y_buf(BB_TYPE_FP32, frame_size, node_size); 24 | 25 | { 26 | auto x_ptr = x_buf.Lock(); 27 | for (int node = 0; node < node_size; ++node) { 28 | x_ptr(node) = node + 1; 29 | } 30 | } 31 | 32 | { 33 | auto x_ptr = x_buf.LockDeviceMemoryConst(); 34 | auto y_ptr = y_buf.LockDeviceMemory(true); 35 | bbcu_fp32_MatrixRowwiseSetVector 36 | ( 37 | (float const *)x_ptr.GetAddr(), 38 | (float *)y_ptr.GetAddr(), 39 | (int )y_buf.GetNodeSize(), 40 | (int )y_buf.GetFrameSize(), 41 | (int )(y_buf.GetFrameStride() / sizeof(float)) 42 | ); 43 | } 44 | 45 | { 46 | for (int node = 0; node < node_size; ++node) { 47 | for (int frame = 0; frame < frame_size; ++frame) { 48 | EXPECT_FLOAT_EQ((float)(node+1), y_buf.GetFP32(frame, node)); 49 | } 50 | } 51 | } 52 | } 53 | 54 | 55 | #endif 56 | 57 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_cnn/verilator/tb_verilator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "Vtb_verilator.h" 5 | #include "jelly/simulator/Manager.h" 6 | #include "jelly/simulator/ClockNode.h" 7 | #include "jelly/simulator/ResetNode.h" 8 | #include "jelly/simulator/VerilatorNode.h" 9 | #include "jelly/simulator/Axi4sImageLoadNode.h" 10 | #include "jelly/simulator/Axi4sImageDumpNode.h" 11 | 12 | 13 | namespace jsim = jelly::simulator; 14 | 15 | 16 | #if VM_TRACE 17 | #include 18 | #include 19 | #endif 20 | 21 | 22 | int main(int argc, char** argv) 23 | { 24 | auto contextp = std::make_shared(); 25 | contextp->debug(0); 26 | contextp->randReset(2); 27 | contextp->commandArgs(argc, argv); 28 | 29 | const auto top = std::make_shared(contextp.get(), "top"); 30 | 31 | 32 | jsim::trace_ptr_t tfp = nullptr; 33 | #if VM_TRACE 34 | contextp->traceEverOn(true); 35 | 36 | tfp = std::make_shared(); 37 | top->trace(tfp.get(), 100); 38 | tfp->open("tb_verilator" TRACE_EXT); 39 | #endif 40 | 41 | auto mng = jsim::Manager::Create(); 42 | 43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2)); 44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100)); 45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp)); 46 | 47 | mng->Run(10000000); 48 | // mng->Run(); 49 | 50 | #if VM_TRACE 51 | tfp->close(); 52 | #endif 53 | 54 | #if VM_COVERAGE 55 | contextp->coveragep()->write("coverage.dat"); 56 | #endif 57 | 58 | return 0; 59 | } 60 | 61 | 62 | // end of file 63 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_lut_simple/verilator/tb_verilator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "Vtb_verilator.h" 5 | #include "jelly/simulator/Manager.h" 6 | #include "jelly/simulator/ClockNode.h" 7 | #include "jelly/simulator/ResetNode.h" 8 | #include "jelly/simulator/VerilatorNode.h" 9 | #include "jelly/simulator/Axi4sImageLoadNode.h" 10 | #include "jelly/simulator/Axi4sImageDumpNode.h" 11 | 12 | 13 | namespace jsim = jelly::simulator; 14 | 15 | 16 | #if VM_TRACE 17 | #include 18 | #include 19 | #endif 20 | 21 | 22 | int main(int argc, char** argv) 23 | { 24 | auto contextp = std::make_shared(); 25 | contextp->debug(0); 26 | contextp->randReset(2); 27 | contextp->commandArgs(argc, argv); 28 | 29 | const auto top = std::make_shared(contextp.get(), "top"); 30 | 31 | 32 | jsim::trace_ptr_t tfp = nullptr; 33 | #if VM_TRACE 34 | contextp->traceEverOn(true); 35 | 36 | tfp = std::make_shared(); 37 | top->trace(tfp.get(), 100); 38 | tfp->open("tb_verilator" TRACE_EXT); 39 | #endif 40 | 41 | auto mng = jsim::Manager::Create(); 42 | 43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2)); 44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100)); 45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp)); 46 | 47 | mng->Run(10000000); 48 | // mng->Run(); 49 | 50 | #if VM_TRACE 51 | tfp->close(); 52 | #endif 53 | 54 | #if VM_COVERAGE 55 | contextp->coveragep()->write("coverage.dat"); 56 | #endif 57 | 58 | return 0; 59 | } 60 | 61 | 62 | // end of file 63 | -------------------------------------------------------------------------------- /documents/sphinx/source/introduction_binary_modulation.rst: -------------------------------------------------------------------------------- 1 | ---------------------------------------------- 2 | バイナリ変調 3 | ---------------------------------------------- 4 | 5 | 概要 6 | ------- 7 | 8 | 本章ではバイナリLUT-Networkに限らず、広くバイナリネットワークに適用可能な技術として、バイナリ変調の適用について述べます。 9 | バイナリ変調とフルバイナリネットワークの組み合わせは、本サイトの提唱する技術の1つであり、入出力のに多値データが 10 | 要求される場合にバイナリネットワークを適用するための手法です。 11 | 12 | 従来のバイナリネットワーク 13 | --------------------------- 14 | 15 | 従来のバイナリネットワークでは、多値画像の認識などを行うために、入力側のいくつかの層をバイナライズせずに 16 | 多値入力とすることで多値データを扱っていました。 17 | この方法は一定の効果はあるものの、入力層では乗算器を必要とする為リソースが大きく増加する上に、 18 | 出力はバイナリであり、クラスタ分類ぐらいにしか応用できないという課題がありました。 19 | 20 | 21 | バイナリ変調 22 | ------------------- 23 | 24 | 信号処理の世界にはバイナリ変調という技術があります。 25 | 例えばデジタルオーディオなどの分野では 1bit ADC やD級アンプの技術は非常に重要です。 26 | こでは信号をオーバーサンプリングにより、高い周波数の 1bit のデータに量子化することで、 27 | 信号処理自体はバイナリで扱うにもかかわらず、入出力データには例えば16bit以上の高品質の 28 | 信号を得る技術です。 29 | 30 | もっとも簡単な方法はアナログ値を乱数閾値でバイナリ化することです。結果は元の 31 | アナログ値に応じた確率で1と0が生成されますので、扱いたい値がそのまま 32 | Stochastic演算の対象となります。 33 | しかしながら確率的な振る舞いはデータ数が充分多い時に顕在化してきますので 34 | 信号オーバーサンプリングは重要な技法となってきます。 35 | 36 | BinaryBrain では同様の変調を元データに施してデータを水増しすることで、 37 | 非常に小さな回路の認識率を上げたり、Autoencoderや回帰分析などの多値出力を 38 | 必要とする分野への適用可能性を広げました。 39 | 40 | 下記は、通常の Dense CNN の ReLU を Binarizer に置き換え、入力もバイナリ化して 41 | フルバイナリネットワーク化したものを用いて、バイナリ変調の効果を実験した結果です。 42 | 43 | .. image:: ../../images/binary_modulation.png 44 | :scale: 100% 45 | 46 | 47 | binary_x1 が1倍のオーバーサンプル、すなわち何もせずに単純にフルバイナリ化した場合ですが、 48 | FP32での結果に比べて大きく認識率が落ち込みます。 49 | そして、binary_x3、binary_x7, binary_x15, binary_x31 が、それぞれ3倍、7倍、15倍、31倍 50 | のオーバーサンプリングでのバイナリ変調を行ったものですが、ある程度の回復を見せている 51 | 事がうかがえます。 52 | 53 | 同じ回路に、より高いフレームレートで、変調したデータを通すだけなので、スループットは 54 | 低下しますが、ネットワークを構成する回路自体のリソースは一切変化することなく、認識率だけが 55 | 向上しているのが特徴です。 56 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/tb_verilator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include 4 | #include "Vtb_verilator.h" 5 | #include "jelly/simulator/Manager.h" 6 | #include "jelly/simulator/ClockNode.h" 7 | #include "jelly/simulator/ResetNode.h" 8 | #include "jelly/simulator/VerilatorNode.h" 9 | #include "jelly/simulator/Axi4sImageLoadNode.h" 10 | #include "jelly/simulator/Axi4sImageDumpNode.h" 11 | 12 | 13 | namespace jsim = jelly::simulator; 14 | 15 | 16 | #if VM_TRACE 17 | #include 18 | #include 19 | #endif 20 | 21 | 22 | int main(int argc, char** argv) 23 | { 24 | auto contextp = std::make_shared(); 25 | contextp->debug(0); 26 | contextp->randReset(2); 27 | contextp->commandArgs(argc, argv); 28 | 29 | const auto top = std::make_shared(contextp.get(), "top"); 30 | 31 | 32 | jsim::trace_ptr_t tfp = nullptr; 33 | #if VM_TRACE 34 | contextp->traceEverOn(true); 35 | 36 | tfp = std::make_shared(); 37 | top->trace(tfp.get(), 100); 38 | tfp->open("tb_verilator" TRACE_EXT); 39 | #endif 40 | 41 | auto mng = jsim::Manager::Create(); 42 | 43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2)); 44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100)); 45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp)); 46 | 47 | mng->Run(10000000); 48 | // mng->Run(); 49 | 50 | #if VM_TRACE 51 | tfp->close(); 52 | #endif 53 | 54 | #if VM_COVERAGE 55 | contextp->coveragep()->write("coverage.dat"); 56 | #endif 57 | 58 | return 0; 59 | } 60 | 61 | 62 | // end of file 63 | -------------------------------------------------------------------------------- /samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/tb_verilator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include 4 | #include "Vtb_verilator.h" 5 | #include "jelly/simulator/Manager.h" 6 | #include "jelly/simulator/ClockNode.h" 7 | #include "jelly/simulator/ResetNode.h" 8 | #include "jelly/simulator/VerilatorNode.h" 9 | #include "jelly/simulator/Axi4sImageLoadNode.h" 10 | #include "jelly/simulator/Axi4sImageDumpNode.h" 11 | 12 | 13 | namespace jsim = jelly::simulator; 14 | 15 | 16 | #if VM_TRACE 17 | #include 18 | #include 19 | #endif 20 | 21 | 22 | int main(int argc, char** argv) 23 | { 24 | auto contextp = std::make_shared(); 25 | contextp->debug(0); 26 | contextp->randReset(2); 27 | contextp->commandArgs(argc, argv); 28 | 29 | const auto top = std::make_shared(contextp.get(), "top"); 30 | 31 | 32 | jsim::trace_ptr_t tfp = nullptr; 33 | #if VM_TRACE 34 | contextp->traceEverOn(true); 35 | 36 | tfp = std::make_shared(); 37 | top->trace(tfp.get(), 100); 38 | tfp->open("tb_verilator" TRACE_EXT); 39 | #endif 40 | 41 | auto mng = jsim::Manager::Create(); 42 | 43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2)); 44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100)); 45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp)); 46 | 47 | mng->Run(10000000); 48 | // mng->Run(); 49 | 50 | #if VM_TRACE 51 | tfp->close(); 52 | #endif 53 | 54 | #if VM_COVERAGE 55 | contextp->coveragep()->write("coverage.dat"); 56 | #endif 57 | 58 | return 0; 59 | } 60 | 61 | 62 | // end of file 63 | -------------------------------------------------------------------------------- /tests/hls/mnist/mnist_simple/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # setting 3 | export HLS_TARGET = mnist_simple 4 | export HLS_SOLUTION = solution_1 5 | export DEVICE_PART = xczu3eg-sbva484-1-i 6 | export CLOCK_PERIOD = 4 7 | 8 | export CSIM_OPTIONS ?= 9 | export COSIM_OPTIONS ?= -trace_level all -wave_debug 10 | 11 | 12 | # Ubuntu の場合下記が必要? 13 | # export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu 14 | 15 | 16 | # directories 17 | JELLY_DIR = ../../../../jelly 18 | SOURCE_DIR = src 19 | TESTBENCH_DIR = testbench 20 | TARGET_DIR = $(HLS_TARGET)/$(HLS_SOLUTION) 21 | CSIM_DIR = $(TARGET_DIR)/csim 22 | SYN_DIR = $(TARGET_DIR)/syn 23 | COSIM_DIR = $(TARGET_DIR)/sim 24 | IMPL_DIR = $(TARGET_DIR)/impl 25 | EXPORT_ZIP = $(IMPL_DIR)/export.zip 26 | 27 | # flags 28 | export SOURCE_FLAGS = -I$(SOURCE_DIR) 29 | export TESTBENCH_FLAGS = -I$(SOURCE_DIR) -I$(TESTBENCH_DIR) 30 | 31 | # source 32 | SOURCES = $(SOURCE_DIR)/mnist_simple.cpp 33 | export SOURCES 34 | 35 | # testbanch 36 | TESTBENCHS = $(TESTBENCH_DIR)/tb_mnist_simple.cpp 37 | export TESTBENCHS 38 | 39 | # rules 40 | .PHONY: all 41 | all: $(EXPORT_ZIP) 42 | 43 | $(TARGET_DIR): 44 | -rm -rf $(HLS_TARGET) 45 | vitis_hls $(JELLY_DIR)/scripts/hls_create_project.tcl 46 | 47 | $(EXPORT_ZIP): $(TARGET_DIR) $(SOURCES) 48 | vitis_hls $(JELLY_DIR)/scripts/hls_csynth.tcl 49 | 50 | .PHONY: clean 51 | clean: 52 | -rm -rf $(HLS_TARGET) 53 | -rm *.log 54 | 55 | 56 | .PHONY: create 57 | create: $(TARGET_DIR) 58 | 59 | .PHONY: csynth 60 | csynth: $(EXPORT_ZIP) 61 | 62 | .PHONY: csim 63 | csim: $(TARGET_DIR) 64 | vitis_hls $(JELLY_DIR)/scripts/hls_csim.tcl 65 | 66 | .PHONY: cosim 67 | cosim: $(TARGET_DIR) $(EXPORT_ZIP) 68 | vitis_hls $(JELLY_DIR)/scripts/hls_cosim.tcl 69 | -------------------------------------------------------------------------------- /tests/gtest/ConvBitToRealTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "gtest/gtest.h" 5 | 6 | #include "bbcu/bbcu.h" 7 | #include "bb/FrameBuffer.h" 8 | 9 | 10 | 11 | #ifdef BB_WITH_CUDA 12 | 13 | TEST(ConvBitToRealTest, testConvBitToRealTest) 14 | { 15 | int frame_size = 1234; 16 | int node_size = 3456; 17 | 18 | bb::FrameBuffer buf_bit (frame_size, {node_size}, BB_TYPE_BIT); 19 | bb::FrameBuffer buf_fp32(frame_size, {node_size}, BB_TYPE_FP32); 20 | 21 | std::mt19937_64 mt(1); 22 | std::uniform_int_distribution dist(0, 1); 23 | for (int frame = 0; frame < frame_size; ++frame ) { 24 | for (int node = 0; node < node_size; ++node ) { 25 | buf_bit.SetBit(frame, node, dist(mt) == 1); 26 | } 27 | } 28 | 29 | { 30 | auto x_ptr = buf_bit.LockDeviceMemoryConst(); 31 | auto y_ptr = buf_fp32.LockDeviceMemory(true); 32 | 33 | bbcu_ConvBitToReal( 34 | (int const *)x_ptr.GetAddr(), 35 | (float *)y_ptr.GetAddr(), 36 | 0.0f, 37 | 1.0f, 38 | (int)node_size, 39 | (int)frame_size, 40 | (int)(buf_bit.GetFrameStride() / sizeof(int)), 41 | (int)(buf_fp32.GetFrameStride() / sizeof(float)) 42 | ); 43 | } 44 | 45 | for (int frame = 0; frame < frame_size; ++frame ) { 46 | for (int node = 0; node < node_size; ++node ) { 47 | bool x = buf_bit.GetBit(frame, node); 48 | float y = buf_fp32.GetFP32(frame, node); 49 | EXPECT_EQ(x ? 1.0f : 0.0f, y); 50 | } 51 | } 52 | } 53 | 54 | 55 | #endif 56 | 57 | 58 | // end of file 59 | 60 | -------------------------------------------------------------------------------- /include/bb/PnmImage.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "bb/DataType.h" 17 | #include "bb/FrameBuffer.h" 18 | 19 | 20 | namespace bb { 21 | 22 | inline void WritePgm(std::string fname, bb::FrameBuffer buf, int width, int height, int frame = 0) 23 | { 24 | std::ofstream ofs(fname); 25 | ofs << "P2\n"; 26 | ofs << width << " " << height << "\n"; 27 | ofs << "255\n"; 28 | for ( int i = 0; i < width*height; ++i ) { 29 | auto v = buf.GetFP32(frame, i); 30 | v = std::max(v, 0.0f); 31 | v = std::min(v, 1.0f); 32 | ofs << (int)(v * 255.0f) << "\n"; 33 | } 34 | } 35 | 36 | inline void WritePpm(std::string fname, bb::FrameBuffer buf, int width, int height, int frame = 0) 37 | { 38 | std::ofstream ofs(fname); 39 | ofs << "P3\n"; 40 | ofs << width << " " << height << "\n"; 41 | ofs << "255\n"; 42 | for ( int i = 0; i < width*height; ++i ) { 43 | for ( int c = 0; c < 3; ++c ) { 44 | auto v = buf.GetFP32(frame, width*height*c + i); 45 | v = std::max(v, 0.0f); 46 | v = std::min(v, 1.0f); 47 | ofs << (int)(v * 255.0f) << "\n"; 48 | } 49 | ofs << "\n"; 50 | } 51 | } 52 | 53 | } 54 | 55 | 56 | // end of file 57 | -------------------------------------------------------------------------------- /python/binarybrain/src/core_bbcu.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #ifndef BB_PYBIND11 11 | #define BB_PYBIND11 12 | #endif 13 | 14 | #ifndef BB_OBJECT_LOADER 15 | #define BB_OBJECT_LOADER 16 | #endif 17 | 18 | 19 | #include "Manager.cu" 20 | #include "LocalHeap.cu" 21 | #include "FrameBufferCopy.cu" 22 | #include "ConvBitToReal.cu" 23 | #include "Vector.cu" 24 | #include "MatrixColwiseSum.cu" 25 | #include "MatrixColwiseMeanVar.cu" 26 | #include "MatrixRowwiseSetVector.cu" 27 | #include "MicroMlp.cu" 28 | #include "AverageLut.cu" 29 | #include "MaxLut.cu" 30 | #include "BinaryLut6.cu" 31 | #include "DifferentiableLut.cu" 32 | #include "StochasticLut.cu" 33 | #include "StochasticMaxPooling.cu" 34 | #include "StochasticBatchNormalization.cu" 35 | #include "ShuffleModulation.cu" 36 | #include "Shuffle.cu" 37 | #include "RealToBinary.cu" 38 | #include "BinaryToReal.cu" 39 | #include "BitEncode.cu" 40 | #include "BitError.cu" 41 | #include "Im2Col.cu" 42 | #include "Col2Im.cu" 43 | #include "MaxPooling.cu" 44 | #include "UpSampling.cu" 45 | #include "BatchNormalization.cu" 46 | #include "ReLU.cu" 47 | #include "Sigmoid.cu" 48 | #include "Binarize.cu" 49 | #include "HardTanh.cu" 50 | #include "OptimizerAdam.cu" 51 | #include "LossSoftmaxCrossEntropy.cu" 52 | #include "LossMeanSquaredError.cu" 53 | #include "MetricsCategoricalAccuracy.cu" 54 | #include "Utility.cu" 55 | 56 | 57 | // end of file 58 | -------------------------------------------------------------------------------- /cuda/MatrixRowwiseSetVector.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_runtime.h" 5 | #include "device_launch_parameters.h" 6 | 7 | #include "bbcu/bbcu.h" 8 | #include "bbcu/bbcu_util.h" 9 | 10 | 11 | 12 | // kernel 13 | __global__ void kernel_fp32_MatrixRowwiseSetVector( 14 | const float* x_vec, 15 | float* y_mat, 16 | int node_size, 17 | int frame_size, 18 | int frame_stride 19 | ) 20 | { 21 | // 初期化 22 | int frame_base = threadIdx.x; 23 | int frame_step = blockDim.x; 24 | int node = blockIdx.y * blockDim.y + threadIdx.y; 25 | 26 | if (node >= node_size) { 27 | return; 28 | } 29 | 30 | // 読み込み 31 | float x = x_vec[node]; 32 | 33 | float *y_ptr = &y_mat[node * frame_stride]; 34 | for ( int frame = frame_base; frame < frame_size; frame += frame_step ) { 35 | y_ptr[frame] = x; 36 | } 37 | } 38 | 39 | 40 | int bbcu_fp32_MatrixRowwiseSetVector 41 | ( 42 | const float* dev_x_vec, 43 | float* dev_y_mat, 44 | int node_size, 45 | int frame_size, 46 | int frame_stride, 47 | cudaStream_t streamId 48 | ) 49 | { 50 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable()); 51 | 52 | dim3 block(32, 32); 53 | dim3 grid(1, (node_size+block.y-1)/block.y); 54 | 55 | kernel_fp32_MatrixRowwiseSetVector<<>>( 56 | dev_x_vec, 57 | dev_y_mat, 58 | node_size, 59 | frame_size, 60 | frame_stride); 61 | BB_CUDA_CHECK_LAST_ERROR(); 62 | 63 | return 0; 64 | } 65 | 66 | 67 | -------------------------------------------------------------------------------- /tests/gtest/ShuffleTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "gtest/gtest.h" 4 | 5 | #include "bb/Shuffle.h" 6 | 7 | 8 | 9 | TEST(SuffleTest, testSuffle_test0) 10 | { 11 | auto shuffle = bb::Shuffle::Create(3); 12 | 13 | bb::FrameBuffer x(2, {6}, BB_TYPE_FP32); 14 | shuffle->SetInputShape(x.GetShape()); 15 | 16 | x.SetFP32(0, 0, 11); 17 | x.SetFP32(0, 1, 12); 18 | x.SetFP32(0, 2, 13); 19 | x.SetFP32(0, 3, 14); 20 | x.SetFP32(0, 4, 15); 21 | x.SetFP32(0, 5, 16); 22 | x.SetFP32(1, 0, 21); 23 | x.SetFP32(1, 1, 22); 24 | x.SetFP32(1, 2, 23); 25 | x.SetFP32(1, 3, 24); 26 | x.SetFP32(1, 4, 25); 27 | x.SetFP32(1, 5, 26); 28 | 29 | auto y = shuffle->Forward(x); 30 | 31 | EXPECT_EQ(11, y.GetFP32(0, 0)); 32 | EXPECT_EQ(13, y.GetFP32(0, 1)); 33 | EXPECT_EQ(15, y.GetFP32(0, 2)); 34 | EXPECT_EQ(12, y.GetFP32(0, 3)); 35 | EXPECT_EQ(14, y.GetFP32(0, 4)); 36 | EXPECT_EQ(16, y.GetFP32(0, 5)); 37 | EXPECT_EQ(21, y.GetFP32(1, 0)); 38 | EXPECT_EQ(23, y.GetFP32(1, 1)); 39 | EXPECT_EQ(25, y.GetFP32(1, 2)); 40 | EXPECT_EQ(22, y.GetFP32(1, 3)); 41 | EXPECT_EQ(24, y.GetFP32(1, 4)); 42 | EXPECT_EQ(26, y.GetFP32(1, 5)); 43 | 44 | // backward 45 | auto dx = shuffle->Backward(y); 46 | 47 | EXPECT_EQ(11, dx.GetFP32(0, 0)); 48 | EXPECT_EQ(12, dx.GetFP32(0, 1)); 49 | EXPECT_EQ(13, dx.GetFP32(0, 2)); 50 | EXPECT_EQ(14, dx.GetFP32(0, 3)); 51 | EXPECT_EQ(15, dx.GetFP32(0, 4)); 52 | EXPECT_EQ(16, dx.GetFP32(0, 5)); 53 | EXPECT_EQ(21, dx.GetFP32(1, 0)); 54 | EXPECT_EQ(22, dx.GetFP32(1, 1)); 55 | EXPECT_EQ(23, dx.GetFP32(1, 2)); 56 | EXPECT_EQ(24, dx.GetFP32(1, 3)); 57 | EXPECT_EQ(25, dx.GetFP32(1, 4)); 58 | EXPECT_EQ(26, dx.GetFP32(1, 5)); 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /tests/cpp/diabetes/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # target 3 | TARGET = sample-diabetes 4 | SUB_TARGETS = 5 | 6 | # run option 7 | RUN_OPTION = All 8 | 9 | # default flag 10 | DEBUG ?= No 11 | WITH_CUDA ?= Yes 12 | WITH_CEREAL ?= Yes 13 | 14 | BBCU_PATH = ../../../cuda 15 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a 16 | 17 | CEREAL_PATH = ../../../cereal 18 | 19 | ifeq ($(WITH_CUDA),Yes) 20 | else 21 | CC = g++ 22 | #CC ?= clang++ 23 | endif 24 | 25 | CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14 26 | CINCS = -I../../../include 27 | CDEFS = 28 | 29 | SRCS = main.cpp 30 | SRCS += DiabetesRegressionDenseAffine.cpp 31 | SRCS += DiabetesRegressionMicroMlpLut.cpp 32 | SRCS += DiabetesRegressionStochasticLut6.cpp 33 | 34 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 35 | 36 | LIBS = 37 | 38 | ifeq ($(WITH_CEREAL),Yes) 39 | CDEFS += -DBB_WITH_CEREAL 40 | CINCS += -I$(CEREAL_PATH)/include 41 | endif 42 | 43 | ifeq ($(WITH_CUDA),Yes) 44 | CC = nvcc 45 | CDEFS += -DBB_WITH_CUDA 46 | CFLAGS := -Xcompiler '$(CFLAGS)' -lcublas 47 | LIBS += $(BBCU_LIB) 48 | SUB_TARGET += bbcu_build 49 | endif 50 | 51 | .SUFFIXES: .c .o 52 | 53 | .PHONY: all 54 | all: $(SUB_TARGET) $(TARGET) 55 | 56 | .PHONY: clean 57 | clean: 58 | rm -f $(TARGET) *.o 59 | 60 | .PHONY: run 61 | run: $(TARGET) $(DATA_FILES) 62 | ./$(TARGET) $(RUN_OPTION) 63 | 64 | .PHONY: bbcu_build 65 | bbcu_build: 66 | make -C $(BBCU_PATH) 67 | 68 | $(TARGET): $(OBJS) $(LIBS) 69 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(LIBS) 70 | 71 | .cpp.o: 72 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< 73 | 74 | depend: $(SRCS) 75 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@ 76 | 77 | include depend 78 | 79 | 80 | # data 81 | DATA_FILES = diabetes_data.txt 82 | DATA_FILES += diabetes_target.txt 83 | 84 | .PHONY: dl_data 85 | dl_data: 86 | python3 diabets_data.py 87 | 88 | -------------------------------------------------------------------------------- /samples/cpp/diabetes/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # target 3 | TARGET = sample-diabetes 4 | SUB_TARGETS = 5 | 6 | # run option 7 | RUN_OPTION = All 8 | 9 | # default flag 10 | DEBUG ?= No 11 | WITH_CUDA ?= Yes 12 | WITH_CEREAL ?= Yes 13 | 14 | BBCU_PATH = ../../../cuda 15 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a 16 | 17 | CEREAL_PATH = ../../../cereal 18 | 19 | ifeq ($(WITH_CUDA),Yes) 20 | else 21 | CC = g++ 22 | #CC ?= clang++ 23 | endif 24 | 25 | CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14 26 | CINCS = -I../../../include 27 | CDEFS = 28 | 29 | SRCS = main.cpp 30 | SRCS += DiabetesRegressionDenseAffine.cpp 31 | SRCS += DiabetesRegressionMicroMlpLut.cpp 32 | SRCS += DiabetesRegressionStochasticLut6.cpp 33 | 34 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 35 | 36 | LIBS = 37 | 38 | ifeq ($(WITH_CEREAL),Yes) 39 | CDEFS += -DBB_WITH_CEREAL 40 | CINCS += -I$(CEREAL_PATH)/include 41 | endif 42 | 43 | ifeq ($(WITH_CUDA),Yes) 44 | CC = nvcc 45 | CDEFS += -DBB_WITH_CUDA 46 | CFLAGS := -Xcompiler '$(CFLAGS)' -lcublas 47 | LIBS += $(BBCU_LIB) 48 | SUB_TARGET += bbcu_build 49 | endif 50 | 51 | .SUFFIXES: .c .o 52 | 53 | .PHONY: all 54 | all: $(SUB_TARGET) $(TARGET) 55 | 56 | .PHONY: clean 57 | clean: 58 | rm -f $(TARGET) *.o 59 | 60 | .PHONY: run 61 | run: $(TARGET) $(DATA_FILES) 62 | ./$(TARGET) $(RUN_OPTION) 63 | 64 | .PHONY: bbcu_build 65 | bbcu_build: 66 | make -C $(BBCU_PATH) 67 | 68 | $(TARGET): $(OBJS) $(LIBS) 69 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(LIBS) 70 | 71 | .cpp.o: 72 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< 73 | 74 | depend: $(SRCS) 75 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@ 76 | 77 | include depend 78 | 79 | 80 | # data 81 | DATA_FILES = diabetes_data.txt 82 | DATA_FILES += diabetes_target.txt 83 | 84 | .PHONY: dl_data 85 | dl_data: 86 | python3 diabets_data.py 87 | 88 | -------------------------------------------------------------------------------- /tests/gtest/VariablesTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "gtest/gtest.h" 5 | 6 | #include "bb/Variables.h" 7 | 8 | 9 | TEST(VariablesTest, VariablesTest_Test) 10 | { 11 | auto t0 = std::make_shared(bb::indices_t({2, 3, 4}), BB_TYPE_FP32); 12 | auto t1 = std::make_shared(bb::indices_t({3, 2, 6}), BB_TYPE_FP64); 13 | auto t2 = std::make_shared(bb::indices_t({4, 1, 7}), BB_TYPE_INT32); 14 | 15 | bb::Variables var1; 16 | var1.PushBack(t0); 17 | var1.PushBack(t1); 18 | var1.PushBack(t2); 19 | 20 | bb::Variables var2(var1.GetTypes(), var1.GetShapes()); 21 | bb::Variables var3(var1.GetTypes(), var1.GetShapes()); 22 | 23 | var1 = 1; 24 | var2 = 2; 25 | var3 = 0; 26 | 27 | var2 += var1; 28 | 29 | { 30 | auto ptr2_0 = var2[0].Lock(); 31 | auto ptr2_1 = var2[1].Lock(); 32 | auto ptr2_2 = var2[2].Lock(); 33 | EXPECT_EQ(3.0f, ptr2_0[0]); 34 | EXPECT_EQ(3.0f, ptr2_0[1]); 35 | EXPECT_EQ(3.0f, ptr2_0[2]); 36 | EXPECT_EQ(3.0, ptr2_1[0]); 37 | EXPECT_EQ(3.0, ptr2_1[1]); 38 | EXPECT_EQ(3.0, ptr2_1[2]); 39 | EXPECT_EQ(3, ptr2_2[0]); 40 | EXPECT_EQ(3, ptr2_2[1]); 41 | EXPECT_EQ(3, ptr2_2[2]); 42 | } 43 | 44 | var2 += 11; 45 | 46 | var3 = var1 + var2; 47 | var3 = var1 + 1; 48 | var3 = 2 + var1 + 1; 49 | 50 | var3 -= var1; 51 | var3 -= 5; 52 | var3 = var1 - var2; 53 | var3 = var1 - 1; 54 | var3 = 2 - var1; 55 | 56 | var3 *= var1; 57 | var3 *= 5; 58 | var3 = var1 * var2; 59 | var3 = var1 * 1; 60 | var3 = 2 * var1; 61 | 62 | var3 /= var1; 63 | var3 /= 5; 64 | var3 = var1 / var2; 65 | var3 = var1 / 1; 66 | var3 = 2 / var1; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /tests/gtest/DenseAffineQuantizeTest.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | #include "bb/DenseAffineQuantize.h" 7 | 8 | 9 | 10 | 11 | TEST(DenseAffineQuantizeTest, test1) 12 | { 13 | auto affine = bb::DenseAffineQuantize<>::Create(3); 14 | 15 | #if 0 16 | affine->SetInputShape({2}); 17 | 18 | // forward 19 | bb::FrameBuffer x_buf(1, {2}, BB_TYPE_FP32); 20 | 21 | x_buf.SetFP32(0, 0, 1); 22 | x_buf.SetFP32(0, 1, 2); 23 | EXPECT_EQ(1, x_buf.GetFP32(0, 0)); 24 | EXPECT_EQ(2, x_buf.GetFP32(0, 1)); 25 | 26 | { 27 | auto W = affine->lock_W(); 28 | auto b = affine->lock_b(); 29 | W(0, 0) = 1; 30 | W(0, 1) = 2; 31 | W(1, 0) = 10; 32 | W(1, 1) = 20; 33 | W(2, 0) = 100; 34 | W(2, 1) = 200; 35 | b(0) = 1000; 36 | b(1) = 2000; 37 | b(2) = 3000; 38 | } 39 | 40 | auto y_buf = affine->Forward(x_buf); 41 | 42 | EXPECT_EQ(1 * 1 + 2 * 2 + 1000, y_buf.GetFP32(0, 0)); 43 | EXPECT_EQ(1 * 10 + 2 * 20 + 2000, y_buf.GetFP32(0, 1)); 44 | EXPECT_EQ(1 * 100 + 2 * 200 + 3000, y_buf.GetFP32(0, 2)); 45 | 46 | 47 | // backward 48 | 49 | bb::FrameBuffer dy_buf(1, {3}, BB_TYPE_FP32); 50 | 51 | dy_buf.SetFP32(0, 0, 998); 52 | dy_buf.SetFP32(0, 1, 2042); 53 | dy_buf.SetFP32(0, 2, 3491); 54 | 55 | auto dx_buf = affine->Backward(dy_buf); 56 | 57 | EXPECT_EQ(370518, dx_buf.GetFP32(0, 0)); 58 | EXPECT_EQ(741036, dx_buf.GetFP32(0, 1)); 59 | 60 | { 61 | auto dW = affine->lock_dW_const(); 62 | 63 | EXPECT_EQ(998, dW(0, 0)); 64 | EXPECT_EQ(2042, dW(1, 0)); 65 | EXPECT_EQ(3491, dW(2, 0)); 66 | EXPECT_EQ(1996, dW(0, 1)); 67 | EXPECT_EQ(4084, dW(1, 1)); 68 | EXPECT_EQ(6982, dW(2, 1)); 69 | } 70 | #endif 71 | } 72 | 73 | -------------------------------------------------------------------------------- /documents/sphinx/source/introduction_case_study.rst: -------------------------------------------------------------------------------- 1 | ---------------------------------------------- 2 | 事例紹介 3 | ---------------------------------------------- 4 | 5 | リアルタイム認識 6 | ------------------ 7 | 8 | 実装事例 9 | ^^^^^^^^^^^^^^^^^^^ 10 | フルバイナリネットワークで、遅延数ミリ秒(1000fps)での画像認識の例です。 11 | 12 | .. image:: ../../images/fpga_environment.jpg 13 | :scale: 100% 14 | 15 | 下記のようなブロック図となっています。 16 | 17 | .. image:: ../../images/block_diagram.png 18 | :scale: 100% 19 | 20 | 21 | FPGAリソース 22 | ^^^^^^^^^^^^^^^^ 23 | 24 | いくつかの認識について実験したものを以下に示します。 25 | 26 | .. image:: ../../images/fpga_resource.png 27 | :scale: 100% 28 | 29 | 下記はカメラやOLEDなどの制御回路も含んだものもありますが、例えば MNIST の Simple DNN 30 | であればニューラルネット部分はわずか 1460個のLUTのみで88%の認識が可能です。 31 | これは、今手に入るXILINXのもっとも小さなFPGAでも十分収まるサイズです。 32 | 33 | これは 1024-360-60-10 の4層構造のネットワークであり、例えば200MHzで動かした場合、 34 | 4サイクル(=20ナノ秒)で認識が完了します。そのため極めてリアルタイム性の高い用途への応用も可能です。 35 | 36 | もしカメラなどの入力に制約がなく、28x28の画像を毎サイクル供給可能であれば、 37 | コア自体は 200Mfpsで動作可能となります。 38 | これは1つの対象に対して条件を変えながら非常に多くの認識を行える帯域ですので、 39 | 1回の認識率は低くても、結果を二次加工することで実用的な認識率を目指すようなことも可能な帯域です。 40 | 41 | 42 | Autoencoder 43 | ------------------ 44 | 45 | 通常のバイナリネットワークは出力もバイナリであるため、例えばAutoencoderのような 46 | 多値出力が必要な用途には応用が難しいという課題があります。 47 | (入力に関しては最初の数層を多値で扱う手はあります) 48 | 49 | BinaryBrainでは、バイナリ変調を用いることで、入力から出力まで全層がバイナリである 50 | Fully binary neural network で多値データを扱う方法を提供しています。 51 | 52 | MNIST 53 | ^^^^^^^^^^^^^^^^ 54 | 55 | MNISTでの Autoencoder の実験結果です。 56 | 57 | .. image:: ../../images/autoencoder_mnist.png 58 | :scale: 100% 59 | 60 | MNIST画像自体が2値に近いのですが、輪郭付近でやや滑らかさが出ています。 61 | 62 | 63 | CIFAR-10 64 | ^^^^^^^^^^^^^^^^ 65 | 66 | 同様にCIFAR-10のデータセットで扱ったものです。 67 | 68 | .. image:: ../../images/autoencoder_cifar10.png 69 | :scale: 100% 70 | 71 | ぼやけた感じは否めませんが、多値出力に対してある程度のことができているのは確認できます。 72 | 73 | もともとがCIFAR-10のデータセット自体が Autoencoder のような学習を目的としたデータセットではないので、 74 | 多値の従来ネットワークでもかなりボケた画像しか作れない部分はあるので、まずは実験的な結果と言えます。 75 | 76 | -------------------------------------------------------------------------------- /tests/gtest/DifferentiableLutTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include "bb/DifferentiableLutN.h" 8 | 9 | 10 | TEST(DifferentiableLutTest, test_001) 11 | { 12 | auto lut0 = bb::DifferentiableLutN<6, float>::Create(16); 13 | bb::FrameBuffer x_buf(1, {16}, BB_TYPE_FP32); 14 | 15 | lut0->SetInputShape(x_buf.GetShape()); 16 | 17 | for (int i = 0; i < 16; ++i) { 18 | x_buf.SetFP32(0, 0, (float)i/10); 19 | } 20 | auto y_buf = lut0->Forward(x_buf); 21 | 22 | bb::FrameBuffer dy_buf(1, {16}, BB_TYPE_FP32); 23 | for (int i = 0; i < 16; ++i) { 24 | dy_buf.SetFP32(0, 0, (float)i/100); 25 | } 26 | 27 | lut0->Backward(dy_buf); 28 | 29 | { 30 | std::ofstream ofs("DifferentiableLutTest001.bb_net", std::ios::binary); 31 | lut0->DumpObject(ofs); 32 | } 33 | 34 | auto lut1 = bb::DifferentiableLutN<6, float>::Create(16); 35 | { 36 | std::ifstream ifs("DifferentiableLutTest001.bb_net", std::ios::binary); 37 | lut1->LoadObject(ifs); 38 | } 39 | 40 | EXPECT_EQ(lut0->GetGamma(), lut1->GetGamma()); 41 | EXPECT_EQ(lut0->GetBeta(), lut1->GetBeta()); 42 | EXPECT_EQ(lut0->GetOutputShape(), lut1->GetOutputShape()); 43 | EXPECT_EQ(lut0->GetInputShape(), lut1->GetInputShape()); 44 | for ( bb::index_t out_node = 0; out_node < lut0->GetOutputNodeSize(); ++out_node ) { 45 | EXPECT_EQ(lut0->GetNodeConnectionSize(out_node), lut1->GetNodeConnectionSize(out_node)); 46 | for ( bb::index_t in_index = 0; in_index < lut0->GetNodeConnectionSize(out_node); ++in_index ) { 47 | EXPECT_EQ(lut0->GetNodeConnectionIndex(out_node, in_index), lut1->GetNodeConnectionIndex(out_node, in_index)); 48 | } 49 | } 50 | 51 | EXPECT_EQ(lut0->EqualityCheck(*lut1), true); 52 | 53 | } 54 | 55 | 56 | -------------------------------------------------------------------------------- /documents/sphinx/source/informations.rst: -------------------------------------------------------------------------------- 1 | 2 | ================= 3 | 開発情報 4 | ================= 5 | 6 | 7 | githubについて 8 | ============================ 9 | 10 | 現在 version4 は下記の branch で管理しています 11 | 12 | ver4_develop 13 | 開発用ブランチです。ビルド不能な状態になることもあります。 14 | 最新のコードにアクセスしたい場合はここをご覧ください。 15 | 16 | ver4_release 17 | リリース作成用ブランチです。 18 | 19 | master 20 | リリースブランチで確認したものを反映。 21 | 22 | tag は リリースのタイミングでバージョン番号のタグを打つようにしております。 23 | また、開発都合で ver4_build0001 のような形式でリリースと無関係にビルドタグを打つ場合があります。 24 | 25 | まだ、開発初期で仕様が安定していませんので、再現性の確保などが必要な際はタグを活用ください。 26 | 27 | 28 | 29 | 30 | 作者情報 31 | ============================ 32 | 33 | 渕上 竜司(Ryuji Fuchikami) 34 | 35 | - github : https://github.com/ryuz 36 | - blog : http://ryuz.txt-nifty.com 37 | - twitter : https://twitter.com/ryuz88 38 | - facebook : https://www.facebook.com/ryuji.fuchikami 39 | - web-site : https://rtc-lab.com/ 40 | - e-mail : ryuji.fuchikami@nifty.com 41 | 42 | 43 | 参考にさせて頂いた情報 44 | ============================ 45 | 46 | - | バイナリニューラルネットとハードウェアの関係 47 | | https://www.slideshare.net/kentotajiri/ss-77136469 48 | 49 | - | BinaryConnect: Training Deep Neural Networks with binary weights during propagations 50 | | https://arxiv.org/pdf/1511.00363.pdf 51 | 52 | - | Binarized Neural Networks 53 | | https://arxiv.org/abs/1602.02505 54 | 55 | - | Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1 56 | | https://arxiv.org/abs/1602.02830 57 | 58 | - | XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks 59 | | https://arxiv.org/abs/1603.05279 60 | 61 | - | Xilinx UltraScale Architecture Configurable Logic Block User Guide 62 | | https://japan.xilinx.com/support/documentation/user_guides/ug574-ultrascale-clb.pdf 63 | 64 | 65 | 66 | 参考にした書籍 67 | ============================ 68 | 69 | - | ゼロから作るDeep Learning ―Pythonで学ぶディープラーニングの理論と実装 70 | | https://www.oreilly.co.jp/books/9784873117584/ 71 | 72 | -------------------------------------------------------------------------------- /samples/cpp/cifar10/sample_cifar10.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | ソース ファイル 20 | 21 | 22 | ソース ファイル 23 | 24 | 25 | ソース ファイル 26 | 27 | 28 | ソース ファイル 29 | 30 | 31 | ソース ファイル 32 | 33 | 34 | ソース ファイル 35 | 36 | 37 | ソース ファイル 38 | 39 | 40 | ソース ファイル 41 | 42 | 43 | ソース ファイル 44 | 45 | 46 | -------------------------------------------------------------------------------- /documents/sphinx/source/quick_start_cpp.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | クイックスタート(C++) 3 | ============================== 4 | 5 | 6 | まずはじめに付属のMNISTサンプルを動かすまでを紹介します。 7 | 8 | AXV2以降の命令が使えるCPUと、Windows7以降もしくは Linuxの環境を想定しております。 9 | CUDAにも対応していまが、nvccが利用可能な環境でビルドする必要があります。 10 | 11 | CUDAについてはNVIDIAのページを参考に事前にインストールください。 12 | https://developer.nvidia.com/cuda-downloads 13 | 14 | なお make 時に make WITH_CUDA=No と指定することで、GPUを使わないCPU版もビルド可能です。 15 | 16 | 17 | Windows 18 | ----------- 19 | 1. install VisualStudio 2019 + CUDA 11.3 20 | 2. git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git 21 | 3. download MNIST from http://yann.lecun.com/exdb/mnist/ 22 | 4. decompress MNIST for "\samples\cpp\mnist" 23 | 5. open VC++ solution "samples\cpp\mnist\sample_mnist.sln" 24 | 6. build "x64 Release" 25 | 7. run 26 | 27 | Linux(Ubuntu 20.04) 28 | ---------------------- 29 | 30 | 1. install tools 31 | ^^^^^^^^^^^^^^^^^ 32 | 33 | :: 34 | 35 | % sudo apt update 36 | % sudo apt upgrade 37 | % sudo apt install git 38 | % sudo apt install make 39 | % sudo apt install g++ 40 | % wget https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run 41 | % sudo sh cuda_11.3.1_465.19.01_linux.run 42 | 43 | 2. build and run 44 | ^^^^^^^^^^^^^^^^^ 45 | 46 | :: 47 | 48 | % git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git 49 | % cd BinaryBrain/samples/cpp/mnist 50 | % make 51 | % make dl_data 52 | % ./sample-mnist All 53 | 54 | 55 | ここで単に 56 | 57 | :: 58 | 59 | % ./sample-mnist 60 | 61 | と打ち込むと、使い方が表示されます。 62 | 63 | 64 | Google Colaboratory 65 | --------------------------- 66 | 67 | nvcc が利用可能な Google Colaboratory でも動作可能なようです。 68 | 以下あくまで参考ですが、ランタイムのタイプをGPUに設定した上で、下記のような操作で、ビルドして動作させることができます。 69 | 70 | :: 71 | 72 | !git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git 73 | %cd BinaryBrain/samples/cpp/mnist 74 | !make all 75 | !make run 76 | 77 | -------------------------------------------------------------------------------- /include/bb/LoadXor.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | 13 | #include 14 | #include 15 | 16 | #include "bb/DataType.h" 17 | 18 | 19 | namespace bb { 20 | 21 | 22 | template 23 | class LoadXor 24 | { 25 | public: 26 | static TrainData Load(int bit_size, int mul=1) 27 | { 28 | TrainData td; 29 | 30 | int data_size = (1 << bit_size); 31 | td.x_train.resize(data_size); 32 | td.t_train.resize(data_size); 33 | for (size_t i = 0; i < data_size; ++i) { 34 | td.x_train[i].resize(bit_size); 35 | td.t_train[i].resize(1); 36 | 37 | int y = 0; 38 | for (int j = 0; j < bit_size; ++j) { 39 | if ((i >> j) & 1) { 40 | y ^= 1; 41 | td.x_train[i][j] = (T)1.0; 42 | } 43 | else { 44 | td.x_train[i][j] = (T)0.0; 45 | } 46 | } 47 | td.t_train[i][0] = (T)y; 48 | } 49 | 50 | td.x_test = td.x_train; 51 | td.t_test = td.t_train; 52 | 53 | td.x_train.resize(data_size*mul); 54 | td.t_train.resize(data_size*mul); 55 | for (size_t i = data_size; i < data_size*mul; ++i) { 56 | td.x_train[i] = td.x_train[i%data_size]; 57 | td.t_train[i] = td.t_train[i%data_size]; 58 | } 59 | 60 | td.x_shape = indices_t({bit_size}); 61 | td.t_shape = indices_t({1}); 62 | 63 | return td; 64 | } 65 | }; 66 | 67 | 68 | } 69 | 70 | -------------------------------------------------------------------------------- /include/bb/Assert.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | namespace bb { 22 | 23 | 24 | #ifndef BB_ASSERT_ACTION 25 | 26 | #if defined(BB_ASSERT_GETCHAR) 27 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; printf("\nplease press enter key to exit.\n"); (void)getchar(); exit(1); } while(0) 28 | #elif defined(BB_ASSERT_EXCEPTION) 29 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; throw std::runtime_error(text); } while(0) 30 | #elif defined(BB_ASSERT_LOOP) 31 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; for (;;); } while(0) 32 | #else 33 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; exit(1); } while(0) 34 | #endif 35 | 36 | #endif 37 | 38 | 39 | // assert for always 40 | #define BB_ASSERT(v) \ 41 | do { \ 42 | if(!(v)) { \ 43 | BB_ASSERT_ACTION("BB_ASSERT(" #v ") at " __FILE__ " line " + std::to_string(__LINE__) ); \ 44 | } \ 45 | } while(0) 46 | 47 | // assert for debug mode 48 | #ifdef _DEBUG 49 | #define BB_DEBUG_ASSERT(v) \ 50 | do { \ 51 | if(!(v)) { \ 52 | BB_ASSERT_ACTION("BB_ASSERT(" #v ") at " __FILE__ " line " + std::to_string(__LINE__) ); \ 53 | } \ 54 | } while(0) 55 | #else 56 | #define BB_DEBUG_ASSERT(v) do{}while(0) 57 | #endif 58 | 59 | 60 | } 61 | 62 | 63 | // end of file 64 | -------------------------------------------------------------------------------- /cuda/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # target 3 | TARGET = libbbcu.a 4 | 5 | # tools 6 | NVCC = nvcc 7 | AR = ar 8 | RM = rm 9 | 10 | # flagas 11 | CUFLAGS = -I../include -std=c++11 -Xcompiler -fPIC \ 12 | -gencode=arch=compute_52,code=sm_52 \ 13 | -gencode=arch=compute_53,code=sm_53 \ 14 | -gencode=arch=compute_60,code=sm_60 \ 15 | -gencode=arch=compute_61,code=sm_61 \ 16 | -gencode=arch=compute_62,code=sm_62 \ 17 | -gencode=arch=compute_70,code=sm_70 \ 18 | -gencode=arch=compute_72,code=sm_72 \ 19 | -gencode=arch=compute_75,code=sm_75 20 | ARFLAGS = 21 | 22 | # sources 23 | SRCS += Manager.cu 24 | SRCS += LocalHeap.cu 25 | SRCS += FrameBufferCopy.cu 26 | SRCS += ConvBitToReal.cu 27 | SRCS += Vector.cu 28 | SRCS += MatrixColwiseSum.cu 29 | SRCS += MatrixColwiseMeanVar.cu 30 | SRCS += MatrixRowwiseSetVector.cu 31 | SRCS += MicroMlp.cu 32 | SRCS += BinaryLut6.cu 33 | SRCS += DifferentiableLut.cu 34 | SRCS += StochasticLut.cu 35 | SRCS += StochasticBatchNormalization.cu 36 | SRCS += AverageLut.cu 37 | SRCS += ShuffleModulation.cu 38 | SRCS += RealToBinary.cu 39 | SRCS += BinaryToReal.cu 40 | SRCS += Im2Col.cu 41 | SRCS += Col2Im.cu 42 | SRCS += MaxPooling.cu 43 | SRCS += StochasticMaxPooling.cu 44 | SRCS += UpSampling.cu 45 | SRCS += BatchNormalization.cu 46 | SRCS += ReLU.cu 47 | SRCS += Sigmoid.cu 48 | SRCS += Binarize.cu 49 | SRCS += HardTanh.cu 50 | SRCS += OptimizerAdam.cu 51 | SRCS += LossSoftmaxCrossEntropy.cu 52 | SRCS += LossMeanSquaredError.cu 53 | SRCS += MetricsCategoricalAccuracy.cu 54 | SRCS += Utility.cu 55 | 56 | HDRS = ../include/bbcu/bbcu.h 57 | HDRS += ../include/bbcu/bbcu_util.h 58 | HDRS += Common.cuh 59 | 60 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 61 | 62 | .SUFFIXES: .cu .o 63 | 64 | .PHONY: all 65 | all: $(TARGET) 66 | 67 | .PHONY: clean 68 | clean: 69 | $(RM) -f $(TARGET) $(OBJS) depend 70 | 71 | $(TARGET): $(OBJS) 72 | $(AR) $(ARFLAGS) rcs $(TARGET) $(OBJS) 73 | 74 | .cu.o: 75 | $(NVCC) -c $(CUFLAGS) $< -o $@ 76 | 77 | $(OBJS): $(HDRS) 78 | 79 | depend: $(SRCS) 80 | $(NVCC) -M $(CUFLAGS) $^ > $@ 81 | 82 | include depend 83 | -------------------------------------------------------------------------------- /tests/gtest/BitEncodeTest.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | #include "bb/BitEncode.h" 7 | 8 | 9 | 10 | TEST(BitEncodeTest, testBitEncode_test0) 11 | { 12 | auto bitenc = bb::BitEncode::Create(4); 13 | 14 | bb::FrameBuffer x(2, {3}, BB_TYPE_FP32); 15 | bitenc->SetInputShape(x.GetShape()); 16 | 17 | x.SetFP32(0, 0, 0x55 / 255.0f); 18 | x.SetFP32(0, 1, 0x00 / 255.0f); 19 | x.SetFP32(0, 2, 0xff / 255.0f); 20 | x.SetFP32(1, 0, 0x11 / 255.0f); 21 | x.SetFP32(1, 1, 0x22 / 255.0f); 22 | x.SetFP32(1, 2, 0xaa / 255.0f); 23 | 24 | auto y = bitenc->Forward(x); 25 | EXPECT_EQ(bb::DataType::type, y.GetType()); 26 | EXPECT_EQ(12, y.GetNodeSize()); 27 | 28 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*0+0)); 29 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*1+0)); 30 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*2+0)); 31 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*3+0)); 32 | 33 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*0+1)); 34 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*1+1)); 35 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*2+1)); 36 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*3+1)); 37 | 38 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2)); 39 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2)); 40 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2)); 41 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2)); 42 | 43 | 44 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*0+0)); 45 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*1+0)); 46 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+0)); 47 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*3+0)); 48 | 49 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*0+1)); 50 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*1+1)); 51 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+1)); 52 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*3+1)); 53 | 54 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*0+2)); 55 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*1+2)); 56 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+2)); 57 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*3+2)); 58 | } 59 | 60 | 61 | -------------------------------------------------------------------------------- /cuda/MatrixColwiseSum.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_runtime.h" 5 | #include "device_launch_parameters.h" 6 | 7 | #include "bbcu/bbcu.h" 8 | #include "bbcu/bbcu_util.h" 9 | 10 | 11 | #define THREAD_X_UNIT 512 12 | 13 | // kernel 14 | __global__ void kernel_fp32_MatrixColwiseSum( 15 | const float* x_mat, 16 | float* y_vec, 17 | int frame_size, 18 | int frame_stride) 19 | { 20 | __shared__ float buf[THREAD_X_UNIT]; 21 | 22 | // 初期化 23 | int node = blockIdx.x; 24 | int frame_base = threadIdx.x; 25 | int frame_step = blockDim.x; 26 | 27 | // 読み込み 28 | float acc = 0; 29 | const float* x_ptr = &x_mat[frame_stride * node]; 30 | for ( int frame = frame_base; frame < frame_size; frame += frame_step ) { 31 | acc += x_ptr[frame]; 32 | } 33 | buf[threadIdx.x] = acc; 34 | 35 | __syncthreads(); 36 | 37 | int x = threadIdx.x; 38 | int comb = 1; 39 | while ( comb < blockDim.x ) { 40 | int next = comb * 2; 41 | int mask = next - 1; 42 | if ( (x & mask) == 0 ) { 43 | buf[x] += buf[x + comb]; 44 | } 45 | comb = next; 46 | __syncthreads(); 47 | } 48 | 49 | if ( threadIdx.x == 0 ) { 50 | y_vec[node] += buf[0]; 51 | } 52 | } 53 | 54 | 55 | int bbcu_fp32_MatrixColwiseSum 56 | ( 57 | const float* dev_x_mat, 58 | float* dev_y_vec, 59 | int node_size, 60 | int frame_size, 61 | int frame_stride, 62 | cudaStream_t streamId 63 | ) 64 | { 65 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable()); 66 | 67 | dim3 grid(node_size); 68 | dim3 block(THREAD_X_UNIT); 69 | kernel_fp32_MatrixColwiseSum<<>>( 70 | dev_x_mat, 71 | dev_y_vec, 72 | frame_size, 73 | frame_stride 74 | ); 75 | BB_CUDA_CHECK_LAST_ERROR(); 76 | 77 | return 0; 78 | } 79 | 80 | 81 | -------------------------------------------------------------------------------- /tests/gtest/Makefile: -------------------------------------------------------------------------------- 1 | # target 2 | TARGET = gtest 3 | 4 | # default flag 5 | DEBUG ?= No 6 | WITH_CUDA ?= Yes 7 | WITH_CEREAL ?= Yes 8 | 9 | BBCU_PATH = ../../cuda 10 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a 11 | 12 | CEREAL_PATH = ../../cereal 13 | 14 | ifeq ($(WITH_CUDA),Yes) 15 | else 16 | CC = g++ 17 | #CC ?= clang++ 18 | endif 19 | 20 | #CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14 21 | CFLAGS = -g -O0 -mavx2 -mfma -std=c++14 22 | CINCS = -I../../include -I../../eigen 23 | CDEFS = 24 | CLIBS = -lgtest_main -lgtest -lpthread 25 | 26 | SRCS += BatchNormalizationTest.cpp 27 | SRCS += BinarizeTest.cpp 28 | SRCS += BinaryLutTest.cpp 29 | SRCS += BinaryToRealTest.cpp 30 | SRCS += ConvolutionCol2ImTest.cpp 31 | SRCS += ConvolutionIm2ColTest.cpp 32 | SRCS += DenseAffineTest.cpp 33 | SRCS += FrameBufferTest.cpp 34 | SRCS += LossSoftmaxCrossEntropyTest.cpp 35 | SRCS += LoweringConvolutionTest.cpp 36 | SRCS += MaxPoolingTest.cpp 37 | # SRCS += MemoryTest.cpp 38 | SRCS += MicroMlpAffineTest.cpp 39 | SRCS += OptimizerAdamTest.cpp 40 | SRCS += ReLUTest.cpp 41 | SRCS += RealToBinaryTest.cpp 42 | SRCS += SigmoidTest.cpp 43 | SRCS += TensorTest.cpp 44 | SRCS += VariablesTest.cpp 45 | 46 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 47 | 48 | LIBS = 49 | 50 | ifeq ($(WITH_CEREAL),Yes) 51 | CDEFS += -DBB_WITH_CEREAL 52 | CINCS += -I$(CEREAL_PATH)/include 53 | endif 54 | 55 | ifeq ($(WITH_CUDA),Yes) 56 | CC = nvcc 57 | CDEFS += -DBB_WITH_CUDA 58 | CFLAGS := -Xcompiler '$(CFLAGS)' 59 | LIBS += $(BBCU_LIB) 60 | SUB_TARGET += bbcu_build 61 | endif 62 | 63 | .SUFFIXES: .c .o 64 | 65 | .PHONY: all 66 | all: $(SUB_TARGET) $(TARGET) 67 | 68 | .PHONY: clean 69 | clean: 70 | rm -f $(TARGET) *.o 71 | 72 | .PHONY: run 73 | run: $(TARGET) train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte 74 | ./$(TARGET) $(RUN_OPTION) 75 | 76 | .PHONY: bbcu_build 77 | bbcu_build: 78 | make -C $(BBCU_PATH) 79 | 80 | $(TARGET): $(OBJS) $(BBCU_LIB) 81 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(BBCU_LIB) $(CLIBS) 82 | 83 | .cpp.o: 84 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< 85 | -------------------------------------------------------------------------------- /samples/verilog/mnist/README.md: -------------------------------------------------------------------------------- 1 | # MNIST Verilog シミュレーションサンプル 2 | 3 | ## ディレクトリの説明 4 | 5 | このディレクトリ以下が MNISTのサンプルの Verilog ソースファイルの出力先ディレクトリとなっています。 6 | 7 | シミュレーションに先立って、対応するサンプルプログラムを実行して学習済みの 8 | Verilog ソースファイルを生成しておく必要があります。 9 | 10 | Verilogシミュレーションに先立って、Python もしくは C++ で学習サンプルを実行ください。 11 | 12 | 13 | - Simple系(tb_mnist_lut_simple ディレクトリ) 14 | - MnistLutSimple.v (Verilogソースコード) 15 | - mnist_test.txt (シミュレーション用データ) 16 | 17 | - CNN系(tb_mnist_lut_cnn) 18 | - MnistLutCnn.v (Verilogソースコード) 19 | - mnist_test_160x120.ppm (シミュレーション用入力画像) 20 | - mnist_test_640x480.ppm (シミュレーション用入力画像) 21 | 22 | - SegmentationAndClassification (tb_mnist_segmentation_and_classification ディレクトリ) 23 | - MnistSegmentationAndClassification.v (Verilogソースコード) 24 | - mnist_test_160x120.ppm (シミュレーション用入力画像) 25 | - mnist_test_640x480.ppm (シミュレーション用入力画像) 26 | 27 | 28 | なお、学習方式が異なるサンプルでも、同じファイルに上書きしますので 29 | どのサンプルを試すかよく確認の上に利用ください。 30 | 31 | 32 | ## シミュレーション実施 33 | 34 | シミュレーションツールには verilator、xsim(Xilinx)、veritak、iverilog の4種のスクリプトを用意しています。 35 | 36 | ただし iverilog は本システムのシミュレーションではかなり遅いようですのでお勧めしません。 37 | 38 | 39 | ### verilator の場合 40 | 41 | verilator のツールにパスが通った状態で、 verilator ディレクトリで 42 | 43 | ``` 44 | make 45 | ``` 46 | 47 | を実行ください。 48 | 49 | ``` 50 | make clean 51 | ``` 52 | 53 | で、クリーンナップ出来ます。 54 | 55 | 56 | ### xsim の場合 57 | 58 | Xilinxのツールにパスが通った状態で、xsim ディレクトリで以下のいずれかを実行ください。 59 | 60 | - run_xsim.bat (Windowsの場合) 61 | - run_xsim.sh (Linuxの場合) 62 | 63 | 64 | ### iverilog の場合 65 | 66 | ツールにパスが通った状態で、iverilog ディレクトリで以下のいずれかを実行ください。 67 | 68 | - run_iverilog.sh (Linuxのみ) 69 | 70 | ### Veritak-Win の場合 71 | 72 | vertak ディレクトリにあるプロジェクトファイルを開いて実行ください。 73 | 74 | 75 | 76 | ## 結果確認 77 | 78 | ### Simple版 79 | 80 | Simple版の場合は完了すると、認識率がコンソールに出力されます。 81 | 82 | また vcd ファイルも出力されますので gtkwave などのツールで波形を見ることもできます。 83 | 84 | ### CNN版 85 | 86 | シミュレーションがうまくいくと MaxPooling 層で縮小された後の画像サイズで認識結果で色付けしたものが 87 | 88 | col_0.ppm 89 | 90 | に出力されます。数字付近で目的の色が出ていれば正解です(黒:0, 茶:1 赤:2, 橙:3, 黄:4, 緑:5, 青:6, 紫:7, 灰:8, 白:9 )。 91 | 92 | pgmやppm などの [PNMファイル](https://en.wikipedia.org/wiki/Netpbm)を見るには IrfanView, gimp, MassiGra(+Susieプラグイン) などがおすすめです。 93 | 94 | 95 | -------------------------------------------------------------------------------- /python/projects/discrete/Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Environment 3 | PYTHON = python3 4 | BB_PATH = ../../.. 5 | PACKAGE_PATH = ../../binarybrain 6 | SRC_PATH = $(PACKAGE_PATH)/src 7 | 8 | EXT_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) 9 | 10 | 11 | # target 12 | TARGET_NAME = core 13 | TARGET = $(PACKAGE_PATH)/$(TARGET_NAME)$(EXT_SUFFIX) 14 | SUB_TARGETS = 15 | 16 | 17 | # run option 18 | RUN_OPTION = All 19 | 20 | # default flag 21 | DEBUG ?= No 22 | WITH_CUDA ?= Yes 23 | WITH_CEREAL ?= Yes 24 | 25 | BBCU_PATH = $(BB_PATH)/cuda 26 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a 27 | 28 | CEREAL_PATH = $(BB_PATH)/cereal 29 | 30 | ifeq ($(WITH_CUDA),Yes) 31 | else 32 | #CC = x86_64-linux-gnu-gcc 33 | CC = g++ 34 | #CC ?= clang++ 35 | endif 36 | 37 | # -pthread 38 | CFLAGS = -mavx2 -mfma -fopenmp -std=c++14 -fPIC 39 | CINCS = -I$(BB_PATH)/include $(shell $(PYTHON) -m pybind11 --includes) 40 | CDEFS = 41 | 42 | SRCS = $(SRC_PATH)/core_main.cpp 43 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 44 | 45 | LIBS = -lstdc++ -lm 46 | # -shared-libgcc 47 | 48 | ifeq ($(DEBUG),Yes) 49 | CFLAGS += -O0 -g 50 | else 51 | CFLAGS += -O3 52 | endif 53 | 54 | ifeq ($(WITH_CEREAL),Yes) 55 | CDEFS += -DBB_WITH_CEREAL 56 | CINCS += -I$(CEREAL_PATH)/include 57 | endif 58 | 59 | ifeq ($(WITH_CUDA),Yes) 60 | CC = nvcc 61 | CFLAGS := -shared -Xcompiler '$(CFLAGS)' -lcublas 62 | CDEFS += -DBB_WITH_CUDA 63 | LIBS += $(BBCU_LIB) 64 | SUB_TARGET += bbcu_build 65 | else 66 | CFLAGS := -shared $(CFLAGS) 67 | endif 68 | 69 | .SUFFIXES: .c .o 70 | 71 | .PHONY: all 72 | all: $(SUB_TARGET) $(TARGET) 73 | 74 | .PHONY: clean 75 | clean: 76 | rm -f $(TARGET) $(OBJS) 77 | 78 | .PHONY: mostlyclean 79 | mostlyclean: clean 80 | make -C $(BBCU_PATH) clean 81 | 82 | .PHONY: bbcu_build 83 | bbcu_build: 84 | make -C $(BBCU_PATH) 85 | 86 | $(TARGET): $(OBJS) 87 | $(CC) -o $(TARGET) $(CFLAGS) $(OBJS) $(LIBS) 88 | 89 | .cpp.o: 90 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< -o $@ 91 | 92 | depend: $(SRCS) 93 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@ 94 | 95 | include depend 96 | 97 | -------------------------------------------------------------------------------- /python/projects/thrust/Makefile: -------------------------------------------------------------------------------- 1 | # target name 2 | TARGET_NAME = core 3 | 4 | # tools 5 | PYTHON = python3 6 | CC = g++ 7 | #CC = clang++ 8 | NVCC = nvcc 9 | AR = g++ 10 | RM = rm 11 | DEPEND = g++ 12 | 13 | # path 14 | BB_PATH = ../../.. 15 | PACKAGE_PATH = ../../binarybrain 16 | SRC_PATH = $(PACKAGE_PATH)/src 17 | BBCU_PATH = $(BB_PATH)/cuda 18 | CEREAL_PATH = $(BB_PATH)/cereal 19 | 20 | # target 21 | EXT_SUFFIX = $(shell $(PYTHON)-config --extension-suffix) 22 | TARGET = $(PACKAGE_PATH)/$(TARGET_NAME)$(EXT_SUFFIX) 23 | 24 | # control flag 25 | DEBUG ?= No 26 | WITH_CUDA ?= Yes 27 | WITH_CEREAL ?= Yes 28 | 29 | # flags 30 | CFLAGS = -pthread -mavx2 -mfma -fopenmp -std=c++14 -fPIC 31 | CUFLAGS = -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_75,code=sm_75 32 | ARFLAGS = -pthread -fopenmp -fPIC 33 | CINCS = -I$(BB_PATH)/cuda -I$(BB_PATH)/include $(shell $(PYTHON) -m pybind11 --includes) 34 | CDEFS = 35 | 36 | # sources 37 | SRCS = $(SRC_PATH)/core_main.cpp 38 | 39 | # libraries 40 | LIBS = -lstdc++ -lm 41 | 42 | 43 | # debug 44 | ifeq ($(DEBUG),Yes) 45 | CFLAGS += -g -O0 -D_DEBUG 46 | else 47 | CFLAGS += -g -O3 48 | endif 49 | 50 | # CEREAL 51 | ifeq ($(WITH_CEREAL),Yes) 52 | CDEFS += -DBB_WITH_CEREAL 53 | CINCS += -I$(CEREAL_PATH)/include 54 | endif 55 | 56 | # CUDA 57 | ifeq ($(WITH_CUDA),Yes) 58 | SRCS += $(SRC_PATH)/core_bbcu.cu 59 | CC = $(NVCC) 60 | AR = $(NVCC) 61 | LIBS += -lcublas 62 | CFLAGS := $(CUFLAGS) -Xcompiler '$(CFLAGS)' 63 | ARFLAGS := -Xcompiler '$(ARFLAGS)' 64 | CDEFS += -DBB_WITH_CUDA 65 | endif 66 | 67 | # objects 68 | OBJS = $(addsuffix .o, $(basename $(SRCS))) 69 | 70 | 71 | .SUFFIXES: .c .cu .o 72 | 73 | .PHONY: all 74 | all: $(TARGET) 75 | 76 | .PHONY: clean 77 | clean: 78 | $(RM) -f $(TARGET) $(OBJS) 79 | 80 | $(TARGET): $(OBJS) 81 | $(AR) -shared $(ARFLAGS) -o $(TARGET) $(OBJS) $(LIBS) 82 | 83 | .cpp.o: 84 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< -o $@ 85 | 86 | .cu.o: 87 | $(NVCC) $(CUDAFLAGS) $(CINCS) $(CDEFS) -std=c++11 -Xcompiler -fPIC -c $< -o $@ 88 | 89 | depend: $(SRCS) 90 | $(DEPEND) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@ 91 | 92 | include depend 93 | 94 | -------------------------------------------------------------------------------- /samples/cpp/diabetes/DiabetesRegressionDenseAffine.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // diabetes regression sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | 11 | #include "bb/Sequential.h" 12 | #include "bb/DenseAffine.h" 13 | #include "bb/ReLU.h" 14 | #include "bb/Sigmoid.h" 15 | #include "bb/MetricsMeanSquaredError.h" 16 | #include "bb/LossMeanSquaredError.h" 17 | #include "bb/OptimizerAdam.h" 18 | #include "bb/OptimizerSgd.h" 19 | #include "bb/Runner.h" 20 | #include "LoadDiabetes.h" 21 | 22 | 23 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size) 24 | { 25 | // load diabetes data 26 | auto td = LoadDiabetes<>(); 27 | bb::TrainDataNormalize(td); 28 | 29 | auto net = bb::Sequential::Create(); 30 | net->Add(bb::DenseAffine<>::Create(512)); 31 | net->Add(bb::Sigmoid<>::Create()); 32 | net->Add(bb::DenseAffine<>::Create(256)); 33 | net->Add(bb::Sigmoid<>::Create()); 34 | net->Add(bb::DenseAffine<>::Create(1)); 35 | // net->Add(bb::Sigmoid<>::Create()); 36 | net->SetInputShape({10}); 37 | 38 | bb::FrameBuffer x(mini_batch_size, {10}, BB_TYPE_FP32); 39 | bb::FrameBuffer t(mini_batch_size, {1}, BB_TYPE_FP32); 40 | 41 | bb::Runner::create_t runner_create; 42 | runner_create.name = "DiabetesAffineRegression"; 43 | runner_create.net = net; 44 | runner_create.lossFunc = bb::LossMeanSquaredError::Create(); 45 | runner_create.metricsFunc = bb::MetricsMeanSquaredError::Create(); 46 | // runner_create.optimizer = bb::OptimizerSgd::Create(0.0001f); 47 | runner_create.optimizer = bb::OptimizerAdam::Create(); 48 | runner_create.write_serial = false; 49 | runner_create.file_read = false; 50 | runner_create.file_write = true; 51 | runner_create.print_progress = false; 52 | runner_create.initial_evaluation = false; 53 | auto runner = bb::Runner::Create(runner_create); 54 | 55 | runner->Fitting(td, epoch_size, mini_batch_size); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/cpp/diabetes/DiabetesRegressionDenseAffine.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // diabetes regression sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | 11 | #include "bb/Sequential.h" 12 | #include "bb/DenseAffine.h" 13 | #include "bb/ReLU.h" 14 | #include "bb/Sigmoid.h" 15 | #include "bb/MetricsMeanSquaredError.h" 16 | #include "bb/LossMeanSquaredError.h" 17 | #include "bb/OptimizerAdam.h" 18 | #include "bb/OptimizerSgd.h" 19 | #include "bb/Runner.h" 20 | #include "LoadDiabetes.h" 21 | 22 | 23 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size) 24 | { 25 | // load diabetes data 26 | auto td = LoadDiabetes<>(); 27 | bb::TrainDataNormalize(td); 28 | 29 | auto net = bb::Sequential::Create(); 30 | net->Add(bb::DenseAffine<>::Create(512)); 31 | net->Add(bb::Sigmoid<>::Create()); 32 | net->Add(bb::DenseAffine<>::Create(256)); 33 | net->Add(bb::Sigmoid<>::Create()); 34 | net->Add(bb::DenseAffine<>::Create(1)); 35 | // net->Add(bb::Sigmoid<>::Create()); 36 | net->SetInputShape({10}); 37 | 38 | bb::FrameBuffer x(mini_batch_size, {10}, BB_TYPE_FP32); 39 | bb::FrameBuffer t(mini_batch_size, {1}, BB_TYPE_FP32); 40 | 41 | bb::Runner::create_t runner_create; 42 | runner_create.name = "DiabetesAffineRegression"; 43 | runner_create.net = net; 44 | runner_create.lossFunc = bb::LossMeanSquaredError::Create(); 45 | runner_create.metricsFunc = bb::MetricsMeanSquaredError::Create(); 46 | // runner_create.optimizer = bb::OptimizerSgd::Create(0.0001f); 47 | runner_create.optimizer = bb::OptimizerAdam::Create(); 48 | runner_create.write_serial = false; 49 | runner_create.file_read = false; 50 | runner_create.file_write = true; 51 | runner_create.print_progress = false; 52 | runner_create.initial_evaluation = false; 53 | auto runner = bb::Runner::Create(runner_create); 54 | 55 | runner->Fitting(td, epoch_size, mini_batch_size); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /tests/cpp/xor/StochasticLut6.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // MNIST sample 4 | // 5 | // Copyright (C) 2018 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "bb/RealToBinary.h" 16 | #include "bb/BinaryToReal.h" 17 | #include "bb/StochasticLutN.h" 18 | #include "bb/BatchNormalization.h" 19 | #include "bb/Sigmoid.h" 20 | #include "bb/ReLU.h" 21 | #include "bb/LossMeanSquaredError.h" 22 | #include "bb/MetricsBinaryAccuracy.h" 23 | #include "bb/OptimizerAdam.h" 24 | #include "bb/LoadXor.h" 25 | #include "bb/Utility.h" 26 | #include "bb/Sequential.h" 27 | #include "bb/Runner.h" 28 | 29 | 30 | // MNIST CNN with LUT networks 31 | void StochasticLut6(int epoch_size, bool binary_mode) 32 | { 33 | // load data 34 | auto td = bb::LoadXor<>::Load(6, 256); 35 | 36 | /* 37 | for (int i = 0; i < 64; ++i) { 38 | std::cout << td.t_train[i][0] << " : "; 39 | for (int j = 0; j < 6; ++j) { 40 | std::cout << td.x_train[i][j] << " "; 41 | } 42 | std::cout << std::endl; 43 | } 44 | */ 45 | 46 | auto net = bb::Sequential::Create(); 47 | net->Add(bb::StochasticLutN<6>::Create(td.t_shape)); 48 | net->SetInputShape(td.x_shape); 49 | 50 | if ( binary_mode ) { 51 | net->SendCommand("binary true"); 52 | std::cout << "binary mode" << std::endl; 53 | } 54 | 55 | bb::Runner::create_t runner_create; 56 | runner_create.name = "StochasticLut6"; 57 | runner_create.net = net; 58 | runner_create.lossFunc = bb::LossMeanSquaredError::Create(); 59 | runner_create.metricsFunc = bb::MetricsBinaryAccuracy::Create(); 60 | runner_create.optimizer = bb::OptimizerAdam::Create(); 61 | runner_create.print_progress = true; 62 | runner_create.file_write = true; 63 | runner_create.initial_evaluation = false; 64 | auto runner = bb::Runner::Create(runner_create); 65 | 66 | runner->Fitting(td, epoch_size, (1 << 6)); 67 | } 68 | 69 | -------------------------------------------------------------------------------- /samples/cpp/mnist/sample_mnist.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | ソース ファイル 20 | 21 | 22 | ソース ファイル 23 | 24 | 25 | ソース ファイル 26 | 27 | 28 | ソース ファイル 29 | 30 | 31 | ソース ファイル 32 | 33 | 34 | ソース ファイル 35 | 36 | 37 | ソース ファイル 38 | 39 | 40 | ソース ファイル 41 | 42 | 43 | ソース ファイル 44 | 45 | 46 | ソース ファイル 47 | 48 | 49 | ソース ファイル 50 | 51 | 52 | ソース ファイル 53 | 54 | 55 | ソース ファイル 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/cpp/xor/XorMicroMlp.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // MNIST sample 4 | // 5 | // Copyright (C) 2018 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "bb/RealToBinary.h" 16 | #include "bb/BinaryToReal.h" 17 | #include "bb/MicroMlpAffine.h" 18 | #include "bb/StochasticLutN.h" 19 | #include "bb/BatchNormalization.h" 20 | #include "bb/Sigmoid.h" 21 | #include "bb/ReLU.h" 22 | #include "bb/LossMeanSquaredError.h" 23 | #include "bb/MetricsBinaryAccuracy.h" 24 | #include "bb/OptimizerAdam.h" 25 | #include "bb/LoadXor.h" 26 | #include "bb/Utility.h" 27 | #include "bb/Sequential.h" 28 | #include "bb/Runner.h" 29 | 30 | 31 | // MNIST CNN with LUT networks 32 | void XorMicroMlp(int epoch_size, bool binary_mode) 33 | { 34 | // load data 35 | auto td = bb::LoadXor<>::Load(6, 256); 36 | 37 | /* 38 | for (int i = 0; i < 64; ++i) { 39 | std::cout << td.t_train[i][0] << " : "; 40 | for (int j = 0; j < 6; ++j) { 41 | std::cout << td.x_train[i][j] << " "; 42 | } 43 | std::cout << std::endl; 44 | } 45 | */ 46 | 47 | auto net = bb::Sequential::Create(); 48 | net->Add(bb::MicroMlpAffine<6, 16, float>::Create(td.t_shape)); 49 | net->Add(bb::BatchNormalization::Create()); 50 | net->Add(bb::Sigmoid::Create()); 51 | net->SetInputShape(td.x_shape); 52 | 53 | if ( binary_mode ) { 54 | net->SendCommand("binary true"); 55 | std::cout << "binary mode" << std::endl; 56 | } 57 | 58 | bb::Runner::create_t runner_create; 59 | runner_create.name = "XorMicroMlp"; 60 | runner_create.net = net; 61 | runner_create.lossFunc = bb::LossMeanSquaredError::Create(); 62 | runner_create.metricsFunc = bb::MetricsBinaryAccuracy::Create(); 63 | runner_create.optimizer = bb::OptimizerAdam::Create(); 64 | runner_create.print_progress = true; 65 | runner_create.file_write = true; 66 | runner_create.initial_evaluation = false; 67 | auto runner = bb::Runner::Create(runner_create); 68 | 69 | runner->Fitting(td, epoch_size, (1 << 6)); 70 | } 71 | 72 | -------------------------------------------------------------------------------- /tests/gtest/ConcatenateTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "gtest/gtest.h" 5 | 6 | #include "bbcu/bbcu.h" 7 | #include "bb/Concatenate.h" 8 | 9 | 10 | 11 | #ifdef BB_WITH_CUDA 12 | 13 | 14 | template 15 | void PrintFrameBuf(bb::FrameBuffer buf) 16 | { 17 | for (bb::index_t frame = 0; frame < buf.GetFrameSize(); ++frame) { 18 | for (bb::index_t node = 0; node < buf.GetNodeSize(); ++node) { 19 | std::cout << "[" << frame << "]" << "[" << node << "] : " 20 | << buf.Get(frame, node) << std::endl; 21 | } 22 | } 23 | } 24 | 25 | 26 | TEST(ConcatenateTest, testConcatenateTest) 27 | { 28 | int frame_size = 4; 29 | int node0_size = 2; 30 | int node1_size = 3; 31 | 32 | bb::FrameBuffer x0_buf(frame_size, {node0_size}, BB_TYPE_FP32); 33 | bb::FrameBuffer x1_buf(frame_size, {node1_size}, BB_TYPE_FP32); 34 | 35 | std::mt19937_64 mt(1); 36 | std::normal_distribution dist(0.0f, 1.0f); 37 | for (int frame = 0; frame < frame_size; ++frame ) { 38 | for (int node = 0; node < node0_size; ++node ) { 39 | x0_buf.SetFP32(frame, node, dist(mt)); 40 | } 41 | for (int node = 0; node < node1_size; ++node ) { 42 | x1_buf.SetFP32(frame, node, dist(mt)); 43 | } 44 | } 45 | 46 | auto concat = bb::Concatenate::Create(); 47 | 48 | auto y_bufs = concat->ForwardMulti({x0_buf, x1_buf}); 49 | EXPECT_EQ(y_bufs.size(), 1); 50 | EXPECT_EQ(y_bufs[0].GetFrameSize(), frame_size); 51 | EXPECT_EQ(y_bufs[0].GetNodeSize(), node0_size+node1_size); 52 | 53 | auto dx_bufs = concat->BackwardMulti(y_bufs); 54 | 55 | 56 | for (int frame = 0; frame < frame_size; ++frame ) { 57 | for (int node = 0; node < node0_size; ++node ) { 58 | EXPECT_EQ(x0_buf.GetFP32(frame, node), dx_bufs[0].GetFP32(frame, node)); 59 | } 60 | for (int node = 0; node < node1_size; ++node ) { 61 | EXPECT_EQ(x1_buf.GetFP32(frame, node), dx_bufs[1].GetFP32(frame, node)); 62 | } 63 | } 64 | 65 | #if 0 66 | std::cout << "x0_buf" << std::endl; 67 | PrintFrameBuf(x0_buf); 68 | std::cout << "x1_buf" << std::endl; 69 | PrintFrameBuf(x1_buf); 70 | std::cout << "y_bufs" << std::endl; 71 | PrintFrameBuf(y_bufs[0]); 72 | #endif 73 | } 74 | 75 | 76 | #endif 77 | 78 | 79 | // end of file 80 | 81 | -------------------------------------------------------------------------------- /tests/cpp/mnist/test_mnist.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.7.34003.232 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_mnist", "test_mnist.vcxproj", "{4D95C63D-3452-42FF-97AC-7E51A32DBCAD}" 7 | ProjectSection(ProjectDependencies) = postProject 8 | {FEADE517-59B9-4551-AD9D-D181A1442EA7} = {FEADE517-59B9-4551-AD9D-D181A1442EA7} 9 | EndProjectSection 10 | EndProject 11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "..\..\..\cuda\bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}" 12 | EndProject 13 | Global 14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 15 | DbgCpu|x64 = DbgCpu|x64 16 | Debug|x64 = Debug|x64 17 | RelCpu|x64 = RelCpu|x64 18 | Release|x64 = Release|x64 19 | EndGlobalSection 20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 21 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.ActiveCfg = DbgCpu|x64 22 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.Build.0 = DbgCpu|x64 23 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.ActiveCfg = Debug|x64 24 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.Build.0 = Debug|x64 25 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.ActiveCfg = RelCpu|x64 26 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.Build.0 = RelCpu|x64 27 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.ActiveCfg = Release|x64 28 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.Build.0 = Release|x64 29 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.ActiveCfg = DbgCpu|x64 30 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.Build.0 = DbgCpu|x64 31 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64 32 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64 33 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.ActiveCfg = RelCpu|x64 34 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.Build.0 = RelCpu|x64 35 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64 36 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64 37 | EndGlobalSection 38 | GlobalSection(SolutionProperties) = preSolution 39 | HideSolutionNode = FALSE 40 | EndGlobalSection 41 | GlobalSection(ExtensibilityGlobals) = postSolution 42 | SolutionGuid = {6AC5BC58-661C-438A-AA75-EE403E3EDE93} 43 | EndGlobalSection 44 | EndGlobal 45 | -------------------------------------------------------------------------------- /samples/cpp/mnist/sample_mnist.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.7.34003.232 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sample_mnist", "sample_mnist.vcxproj", "{4D95C63D-3452-42FF-97AC-7E51A32DBCAD}" 7 | ProjectSection(ProjectDependencies) = postProject 8 | {FEADE517-59B9-4551-AD9D-D181A1442EA7} = {FEADE517-59B9-4551-AD9D-D181A1442EA7} 9 | EndProjectSection 10 | EndProject 11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "..\..\..\cuda\bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}" 12 | EndProject 13 | Global 14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 15 | DbgCpu|x64 = DbgCpu|x64 16 | Debug|x64 = Debug|x64 17 | RelCpu|x64 = RelCpu|x64 18 | Release|x64 = Release|x64 19 | EndGlobalSection 20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 21 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.ActiveCfg = DbgCpu|x64 22 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.Build.0 = DbgCpu|x64 23 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.ActiveCfg = Debug|x64 24 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.Build.0 = Debug|x64 25 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.ActiveCfg = RelCpu|x64 26 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.Build.0 = RelCpu|x64 27 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.ActiveCfg = Release|x64 28 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.Build.0 = Release|x64 29 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.ActiveCfg = DbgCpu|x64 30 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.Build.0 = DbgCpu|x64 31 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64 32 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64 33 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.ActiveCfg = RelCpu|x64 34 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.Build.0 = RelCpu|x64 35 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64 36 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64 37 | EndGlobalSection 38 | GlobalSection(SolutionProperties) = preSolution 39 | HideSolutionNode = FALSE 40 | EndGlobalSection 41 | GlobalSection(ExtensibilityGlobals) = postSolution 42 | SolutionGuid = {6AC5BC58-661C-438A-AA75-EE403E3EDE93} 43 | EndGlobalSection 44 | EndGlobal 45 | -------------------------------------------------------------------------------- /samples/cpp/cifar10/sample_cifar10.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | 8 | $(ProjectDir) 9 | WindowsLocalDebugger 10 | 11 | 12 | $(ProjectDir) 13 | WindowsLocalDebugger 14 | All -num_threads 4 -print_device 15 | 16 | 17 | $(ProjectDir) 18 | WindowsLocalDebugger 19 | All -num_threads 4 -print_device 20 | 21 | 22 | $(ProjectDir) 23 | WindowsLocalDebugger 24 | 25 | 26 | $(ProjectDir) 27 | WindowsLocalDebugger 28 | All -num_threads 4 -print_device 29 | 30 | 31 | $(ProjectDir) 32 | WindowsLocalDebugger 33 | All -num_threads 4 -print_device 34 | 35 | 36 | $(ProjectDir) 37 | WindowsLocalDebugger 38 | 39 | -------------------------------------------------------------------------------- /samples/cpp/mnist/MnistLoadNet.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // MNIST sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "bb/Sequential.h" 13 | #include "bb/DenseAffine.h" 14 | #include "bb/BatchNormalization.h" 15 | #include "bb/ReLU.h" 16 | #include "bb/Convolution2d.h" 17 | #include "bb/MaxPooling.h" 18 | #include "bb/BinaryModulation.h" 19 | #include "bb/OptimizerAdam.h" 20 | #include "bb/LossSoftmaxCrossEntropy.h" 21 | #include "bb/MetricsCategoricalAccuracy.h" 22 | #include "bb/Runner.h" 23 | #include "bb/LoadMnist.h" 24 | #include "bb/ModelLoader.h" 25 | 26 | 27 | void MnistLoadNet(int epoch_size, int mini_batch_size, std::string filename) 28 | { 29 | // load MNIST data 30 | #ifdef _DEBUG 31 | auto td = bb::LoadMnist<>::Load(512, 128); 32 | std::cout << "!!! debug mode !!!" << std::endl; 33 | #else 34 | auto td = bb::LoadMnist<>::Load(); 35 | #endif 36 | 37 | // ネット読み込み 38 | auto net = bb::Model_LoadFromFile(filename); 39 | if (!net) { 40 | std::cerr << "file read error : " << filename << std::endl; 41 | return; 42 | } 43 | 44 | // set input shape 45 | // net->SetInputShape(td.x_shape); 46 | 47 | // print model information 48 | net->PrintInfo(); 49 | 50 | std::cout << "-----------------------------------" << std::endl; 51 | std::cout << "epoch_size : " << epoch_size << std::endl; 52 | std::cout << "mini_batch_size : " << mini_batch_size << std::endl; 53 | std::cout << "-----------------------------------" << std::endl; 54 | 55 | // run fitting 56 | bb::Runner::create_t runner_create; 57 | runner_create.name = net->GetName(); 58 | runner_create.net = net; 59 | runner_create.lossFunc = bb::LossSoftmaxCrossEntropy::Create(); 60 | runner_create.metricsFunc = bb::MetricsCategoricalAccuracy::Create(); 61 | runner_create.optimizer = bb::OptimizerAdam::Create(); 62 | runner_create.print_progress = true; // 途中結果を表示 63 | runner_create.initial_evaluation = true; // ファイルを読んだ場合は最初に評価しておく 64 | auto runner = bb::Runner::Create(runner_create); 65 | runner->Fitting(td, epoch_size, mini_batch_size); 66 | } 67 | 68 | 69 | // end of file 70 | -------------------------------------------------------------------------------- /tests/cpp/mnist/MnistLoadNet.cpp: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // BinaryBrain -- binary network evaluation platform 3 | // MNIST sample 4 | // 5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami 6 | // -------------------------------------------------------------------------- 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "bb/Sequential.h" 13 | #include "bb/DenseAffine.h" 14 | #include "bb/BatchNormalization.h" 15 | #include "bb/ReLU.h" 16 | #include "bb/Convolution2d.h" 17 | #include "bb/MaxPooling.h" 18 | #include "bb/BinaryModulation.h" 19 | #include "bb/OptimizerAdam.h" 20 | #include "bb/LossSoftmaxCrossEntropy.h" 21 | #include "bb/MetricsCategoricalAccuracy.h" 22 | #include "bb/Runner.h" 23 | #include "bb/LoadMnist.h" 24 | #include "bb/ModelLoader.h" 25 | 26 | 27 | void MnistLoadNet(int epoch_size, int mini_batch_size, std::string filename) 28 | { 29 | // load MNIST data 30 | #ifdef _DEBUG 31 | auto td = bb::LoadMnist<>::Load(512, 128); 32 | std::cout << "!!! debug mode !!!" << std::endl; 33 | #else 34 | auto td = bb::LoadMnist<>::Load(); 35 | #endif 36 | 37 | // ネット読み込み 38 | auto net = bb::Model_LoadFromFile(filename); 39 | if (!net) { 40 | std::cerr << "file read error : " << filename << std::endl; 41 | return; 42 | } 43 | 44 | // set input shape 45 | // net->SetInputShape(td.x_shape); 46 | 47 | // print model information 48 | net->PrintInfo(); 49 | 50 | std::cout << "-----------------------------------" << std::endl; 51 | std::cout << "epoch_size : " << epoch_size << std::endl; 52 | std::cout << "mini_batch_size : " << mini_batch_size << std::endl; 53 | std::cout << "-----------------------------------" << std::endl; 54 | 55 | // run fitting 56 | bb::Runner::create_t runner_create; 57 | runner_create.name = net->GetName(); 58 | runner_create.net = net; 59 | runner_create.lossFunc = bb::LossSoftmaxCrossEntropy::Create(); 60 | runner_create.metricsFunc = bb::MetricsCategoricalAccuracy::Create(); 61 | runner_create.optimizer = bb::OptimizerAdam::Create(); 62 | runner_create.print_progress = true; // 途中結果を表示 63 | runner_create.initial_evaluation = true; // ファイルを読んだ場合は最初に評価しておく 64 | auto runner = bb::Runner::Create(runner_create); 65 | runner->Fitting(td, epoch_size, mini_batch_size); 66 | } 67 | 68 | 69 | // end of file 70 | -------------------------------------------------------------------------------- /include/bb/Activation.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | 11 | #pragma once 12 | 13 | #include 14 | #include 15 | 16 | #include "bb/Manager.h" 17 | #include "bb/Model.h" 18 | 19 | 20 | namespace bb { 21 | 22 | 23 | // Activation 24 | class Activation : public Model 25 | { 26 | using _super = Model; 27 | 28 | protected: 29 | indices_t m_shape; //< 入出力の形状 30 | 31 | public: 32 | /** 33 | * @brief 入力形状設定 34 | * @detail 入力形状を設定する 35 | * 内部変数を初期化し、以降、GetOutputShape()で値取得可能となることとする 36 | * 同一形状を指定しても内部変数は初期化されるものとする 37 | * @param shape 1フレームのノードを構成するshape 38 | * @return 出力形状を返す 39 | */ 40 | indices_t SetInputShape(indices_t shape) override 41 | { 42 | // 設定済みなら何もしない 43 | if ( shape == this->GetInputShape() ) { 44 | return this->GetOutputShape(); 45 | } 46 | 47 | m_shape = shape; 48 | return m_shape; 49 | } 50 | 51 | /** 52 | * @brief 入力形状取得 53 | * @detail 入力形状を取得する 54 | * @return 入力形状を返す 55 | */ 56 | indices_t GetInputShape(void) const override 57 | { 58 | return m_shape; 59 | } 60 | 61 | /** 62 | * @brief 出力形状取得 63 | * @detail 出力形状を取得する 64 | * @return 出力形状を返す 65 | */ 66 | indices_t GetOutputShape(void) const override 67 | { 68 | return m_shape; 69 | } 70 | 71 | 72 | protected: 73 | 74 | void DumpObjectData(std::ostream &os) const override 75 | { 76 | // バージョン 77 | std::int64_t ver = 1; 78 | bb::SaveValue(os, ver); 79 | 80 | // 親クラス 81 | _super::DumpObjectData(os); 82 | 83 | // メンバ 84 | bb::SaveValue(os, m_shape); 85 | } 86 | 87 | void LoadObjectData(std::istream &is) override 88 | { 89 | // バージョン 90 | std::int64_t ver; 91 | bb::LoadValue(is, ver); 92 | 93 | BB_ASSERT(ver == 1); 94 | 95 | // 親クラス 96 | _super::LoadObjectData(is); 97 | 98 | // メンバ 99 | bb::LoadValue(is, m_shape); 100 | } 101 | 102 | }; 103 | 104 | 105 | }; 106 | 107 | -------------------------------------------------------------------------------- /tests/gtest/DepthwiseDenseAffineTest.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "gtest/gtest.h" 6 | #include "bb/DepthwiseDenseAffine.h" 7 | 8 | 9 | TEST(DepthwiseDenseAffineTest, testAffine) 10 | { 11 | auto affine = bb::DepthwiseDenseAffine<>::Create({3}); 12 | 13 | affine->SetInputShape({3, 2}); 14 | 15 | // forward 16 | bb::FrameBuffer x_buf(1, {3, 2}, BB_TYPE_FP32); 17 | 18 | x_buf.SetFP32(0, 0, 1); 19 | x_buf.SetFP32(0, 1, 2); 20 | x_buf.SetFP32(0, 2, 3); 21 | x_buf.SetFP32(0, 3, 4); 22 | x_buf.SetFP32(0, 4, 5); 23 | x_buf.SetFP32(0, 5, 6); 24 | 25 | EXPECT_EQ(1, x_buf.GetFP32(0, 0)); 26 | EXPECT_EQ(2, x_buf.GetFP32(0, 1)); 27 | EXPECT_EQ(3, x_buf.GetFP32(0, 2)); 28 | EXPECT_EQ(4, x_buf.GetFP32(0, 3)); 29 | EXPECT_EQ(5, x_buf.GetFP32(0, 4)); 30 | EXPECT_EQ(6, x_buf.GetFP32(0, 5)); 31 | 32 | { 33 | auto W = affine->lock_W(); 34 | auto b = affine->lock_b(); 35 | W(0, 0, 0) = 1; 36 | W(0, 0, 1) = 2; 37 | W(1, 0, 0) = 10; 38 | W(1, 0, 1) = 20; 39 | W(2, 0, 0) = 100; 40 | W(2, 0, 1) = 200; 41 | b(0, 0) = 1000; 42 | b(1, 0) = 2000; 43 | b(2, 0) = 3000; 44 | } 45 | 46 | auto y_buf = affine->Forward(x_buf); 47 | 48 | EXPECT_EQ(1 * 1 + 2 * 2 + 1000, y_buf.GetFP32(0, 0)); 49 | EXPECT_EQ(3 * 10 + 4 * 20 + 2000, y_buf.GetFP32(0, 1)); 50 | EXPECT_EQ(5 * 100 + 6 * 200 + 3000, y_buf.GetFP32(0, 2)); 51 | 52 | 53 | // backward 54 | bb::FrameBuffer dy_buf(1, {3}, BB_TYPE_FP32); 55 | 56 | dy_buf.SetFP32(0, 0, 123); 57 | dy_buf.SetFP32(0, 1, 456); 58 | dy_buf.SetFP32(0, 2, 789); 59 | 60 | auto dx_buf = affine->Backward(dy_buf); 61 | 62 | EXPECT_EQ(123 * 1, dx_buf.GetFP32(0, 0)); 63 | EXPECT_EQ(123 * 2, dx_buf.GetFP32(0, 1)); 64 | EXPECT_EQ(456 * 10, dx_buf.GetFP32(0, 2)); 65 | EXPECT_EQ(456 * 20, dx_buf.GetFP32(0, 3)); 66 | EXPECT_EQ(789 * 100, dx_buf.GetFP32(0, 4)); 67 | EXPECT_EQ(789 * 200, dx_buf.GetFP32(0, 5)); 68 | 69 | { 70 | auto db = affine->lock_db_const(); 71 | 72 | EXPECT_EQ(123, db(0, 0)); 73 | EXPECT_EQ(456, db(1, 0)); 74 | EXPECT_EQ(789, db(2, 0)); 75 | } 76 | 77 | { 78 | auto dW = affine->lock_dW_const(); 79 | 80 | EXPECT_EQ(1 * 123, dW(0, 0, 0)); 81 | EXPECT_EQ(2 * 123, dW(0, 0, 1)); 82 | EXPECT_EQ(3 * 456, dW(1, 0, 0)); 83 | EXPECT_EQ(4 * 456, dW(1, 0, 1)); 84 | EXPECT_EQ(5 * 789, dW(2, 0, 0)); 85 | EXPECT_EQ(6 * 789, dW(2, 0, 1)); 86 | } 87 | } 88 | 89 | -------------------------------------------------------------------------------- /cuda/OptimizerAdam.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_runtime.h" 5 | #include "device_launch_parameters.h" 6 | 7 | #include "bbcu/bbcu.h" 8 | #include "bbcu/bbcu_util.h" 9 | 10 | 11 | 12 | __global__ void kernal_fp32_OptimizerAdam( 13 | int const *size_table, 14 | float * const *params_buf_table, 15 | float * const *grads_buf_table, 16 | float * const *m_buf_table, 17 | float * const *v_buf_table, 18 | float lr_t, 19 | float neg_beta1, 20 | float neg_beta2 21 | ) 22 | { 23 | int id = threadIdx.x; 24 | int id_step = blockDim.x; 25 | int index = blockDim.y * blockIdx.y + threadIdx.y; 26 | 27 | int size = size_table[index]; 28 | 29 | float *params_buf = params_buf_table[index]; 30 | float *grads_buf = grads_buf_table[index]; 31 | float *m_buf = m_buf_table[index]; 32 | float *v_buf = v_buf_table[index]; 33 | 34 | for ( int n = id; n < size; n += id_step ) { 35 | float param = params_buf[n]; 36 | float grad = grads_buf[n]; 37 | float m = m_buf[n]; 38 | float v = v_buf[n]; 39 | 40 | m += neg_beta1 * (grad - m); 41 | v += neg_beta2 * (grad * grad - v); 42 | param -= lr_t * m / (sqrt(v) + 1e-7); 43 | 44 | m_buf[n] = m; 45 | v_buf[n] = v; 46 | params_buf[n] = param; 47 | // grads_buf[n] = 0; 48 | } 49 | } 50 | 51 | 52 | BBCU_DLL_EXPORT int bbcu_fp32_OptimizerAdam 53 | ( 54 | int size, 55 | int const *dev_size_table, 56 | float * const *dev_params_buf_table, 57 | float * const *dev_grads_buf_table, 58 | float * const *dev_m_buf_table, 59 | float * const *dev_v_buf_table, 60 | float lr_t, 61 | float beta1, 62 | float beta2, 63 | cudaStream_t streamId 64 | ) 65 | { 66 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable()); 67 | 68 | dim3 grid(1, size); 69 | dim3 block(192, 1); 70 | 71 | kernal_fp32_OptimizerAdam<<>>( 72 | dev_size_table, 73 | dev_params_buf_table, 74 | dev_grads_buf_table, 75 | dev_m_buf_table, 76 | dev_v_buf_table, 77 | lr_t, 78 | (1.0f - beta1), 79 | (1.0f - beta2) 80 | ); 81 | BB_CUDA_CHECK_LAST_ERROR(); 82 | 83 | return 0; 84 | } 85 | 86 | // end of file 87 | -------------------------------------------------------------------------------- /python/binarybrain/metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import binarybrain as bb 4 | import binarybrain.core as core 5 | import numpy as np 6 | from typing import List 7 | 8 | 9 | 10 | class Metrics(bb.Object): 11 | """Metrics class 12 | 評価関数の基底クラス 13 | """ 14 | 15 | def __init__(self, core_metrics=None): 16 | super(Metrics, self).__init__(core_object=core_metrics) 17 | 18 | def clear(self): 19 | """値のクリア 20 | 21 | 集計をクリアする。通常 epoch の単位でクリアして再集計を行う 22 | """ 23 | self.get_core().clear() 24 | 25 | def get(self): 26 | """値の取得 27 | 28 | Returns: 29 | metrics(float) : 現在までの損失値を返す 30 | """ 31 | return self.get_core().get_metrics() 32 | 33 | def calculate(self, y_buf, t_buf): 34 | """評価の計算 35 | 36 | Args: 37 | y_buf (FrameBuffer): forward演算結果 38 | t_buf (FrameBuffer): 教師データ 39 | """ 40 | return self.get_core().calculate_metrics(y_buf.get_core(), t_buf.get_core()) 41 | 42 | def get_metrics_string(self): 43 | """評価対象の文字列取得 44 | 45 | 評価関数ごとに評価値の単位が異なるため計算しているものの文字列を返す 46 | 平均二乗誤差(MSE)であったり、認識率(accuracy)であったり 47 | getで得られる値を、表示やログで表す際に利用できる 48 | 49 | Args: 50 | metrics_string (str): 評価対象の文字列取得 51 | """ 52 | return self.get_core().get_metrics_string() 53 | 54 | 55 | class MetricsMeanSquaredError(Metrics): 56 | """MetricsMeanSquaredError class 57 | 58 | 平均二乗誤差の評価関数 59 | 教師信号との平均二乗誤差を計算する 60 | """ 61 | 62 | def __init__(self, dtype=bb.DType.FP32): 63 | core_metrics = bb.search_core_object('MetricsMeanSquaredError', [dtype]).create() 64 | super(MetricsMeanSquaredError, self).__init__(core_metrics=core_metrics) 65 | 66 | 67 | class MetricsCategoricalAccuracy(Metrics): 68 | """MetricsCategoricalAccuracy class 69 | 70 | 多クラス分類用の評価関数 71 | 一致率を accuracy として計算する 72 | """ 73 | 74 | def __init__(self, dtype=bb.DType.FP32): 75 | core_metrics = bb.search_core_object('MetricsCategoricalAccuracy', [dtype]).create() 76 | super(MetricsCategoricalAccuracy, self).__init__(core_metrics=core_metrics) 77 | 78 | 79 | class MetricsBinaryCategoricalAccuracy(Metrics): 80 | """MetricsBinaryCategoricalAccuracy class 81 | 82 | 2クラス分類用の評価関数 83 | 一致率を accuracy として計算する 84 | """ 85 | 86 | def __init__(self, dtype=bb.DType.FP32): 87 | core_metrics = bb.search_core_object('MetricsBinaryCategoricalAccuracy', [dtype]).create() 88 | super(MetricsBinaryCategoricalAccuracy, self).__init__(core_metrics=core_metrics) 89 | 90 | -------------------------------------------------------------------------------- /python/binarybrain/hls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import io 4 | import numpy as np 5 | 6 | import binarybrain as bb 7 | import binarybrain.core as core 8 | 9 | 10 | def make_lut_func_name(name, node): 11 | return "%s_lut_%d"%(name, node) 12 | 13 | 14 | def dump_hls_lut_node(f, name, lut, node): 15 | f.write("\ninline ap_uint<1> %s(\n"%(make_lut_func_name(name, node))) 16 | n = lut.get_node_connection_size(node) 17 | s = lut.get_lut_table_size(node) 18 | for i in range(n): 19 | f.write(" ap_uint<1> in_data%d"%(i)) 20 | if i < n-1: 21 | f.write(",\n") 22 | else: 23 | f.write(")\n") 24 | f.write("{\n") 25 | f.write(" #pragma HLS inline\n\n") 26 | f.write(" ap_uint<%d> index;\n"%(n)) 27 | for i in range(n): 28 | f.write(" index[%d] = in_data%d;\n"%(i, i)) 29 | f.write(" \n") 30 | f.write(" const ap_uint<1> table[%d] = {"%(s)) 31 | for i in range(s): 32 | f.write("%d,"%(lut.get_lut_table(node ,i))) 33 | f.write("};\n") 34 | f.write(" #pragma HLS bind_storage variable=table type=ROM_1P impl=LUTRAM\n") 35 | f.write(" return table[index];\n") 36 | f.write("}\n\n") 37 | 38 | def dump_hls_lut_layer(f, name, lut): 39 | ''' dump HLS source of LUT layer 40 | 41 | Args: 42 | f (StreamIO) : 出力先ストリーム 43 | name (str): 関数名 44 | lut (Model): 変換するネット 45 | ''' 46 | 47 | ins = lut.get_input_node_size() 48 | outs = lut.get_output_node_size() 49 | for node in range(outs): 50 | dump_hls_lut_node(f, name, lut, node) 51 | 52 | f.write("\n") 53 | f.write("inline ap_uint<%d> %s(ap_uint<%d> in_data)\n"%(outs, name, ins)) 54 | f.write("{\n") 55 | f.write(" ap_uint<%d> out_data;\n"%(outs)) 56 | for node in range(outs): 57 | f.write(" out_data[%d] = %s("%(node, make_lut_func_name(name, node))) 58 | n = lut.get_node_connection_size(node) 59 | for i in range(n): 60 | f.write("in_data[%d]"%(lut.get_node_connection_index(node, i))) 61 | if i < n-1: 62 | f.write(",") 63 | else: 64 | f.write(");\n") 65 | f.write(" return out_data;\n") 66 | f.write("}\n\n") 67 | 68 | 69 | def make_hls_lut_layer(name, lut): 70 | ''' make HLS source of LUT layer 71 | 72 | Args: 73 | name (str): 関数名 74 | lut (Model): 変換するネット 75 | 76 | Returns: 77 | HLS source code (str) 78 | ''' 79 | 80 | with io.StringIO() as f: 81 | dump_hls_lut_layer(f, name, lut) 82 | return f.getvalue() 83 | -------------------------------------------------------------------------------- /cuda/ConvBitToReal.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_runtime.h" 5 | #include "device_launch_parameters.h" 6 | 7 | #include "bbcu/bbcu.h" 8 | #include "bbcu/bbcu_util.h" 9 | 10 | 11 | 12 | template 13 | __global__ void kernal_ConvBitToReal( 14 | int const *x_buf, 15 | T *y_buf, 16 | T value0, 17 | T value1, 18 | int node_size, 19 | int frame_size, 20 | int x_frame_stride, 21 | int y_frame_stride 22 | ) 23 | { 24 | int frame = blockDim.x * blockIdx.x + threadIdx.x; 25 | int node = blockDim.y * blockIdx.y + threadIdx.y; 26 | 27 | int bit = (threadIdx.x & 0x1f); 28 | int bit_mask = (1 << bit); 29 | int unit = (frame >> 5); 30 | 31 | if ( frame < frame_size && node < node_size ) { 32 | int x = x_buf[node * x_frame_stride + unit]; 33 | T y = (x & bit_mask) ? value1 : value0; 34 | y_buf[node * y_frame_stride + frame] = y; 35 | } 36 | } 37 | 38 | 39 | template 40 | BBCU_DLL_EXPORT int bbcu_ConvBitToReal 41 | ( 42 | int const *dev_x_buf, 43 | T *dev_y_buf, 44 | T value0, 45 | T value1, 46 | int node_size, 47 | int frame_size, 48 | int x_frame_stride, 49 | int y_frame_stride, 50 | cudaStream_t streamId 51 | ) 52 | { 53 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable()); 54 | 55 | dim3 block(32, 32); 56 | dim3 grid((frame_size + 31) / 32, (node_size + 31) / 32); 57 | 58 | kernal_ConvBitToReal<<>> 59 | ( 60 | dev_x_buf, 61 | dev_y_buf, 62 | value0, 63 | value1, 64 | node_size, 65 | frame_size, 66 | x_frame_stride, 67 | y_frame_stride 68 | ); 69 | BB_CUDA_CHECK_LAST_ERROR(); 70 | 71 | return 0; 72 | } 73 | 74 | 75 | template BBCU_DLL_EXPORT int bbcu_ConvBitToReal 76 | ( 77 | int const *dev_x_buf, 78 | float *dev_y_buf, 79 | float value0, 80 | float value1, 81 | int node_size, 82 | int frame_size, 83 | int x_frame_stride, 84 | int y_frame_stride, 85 | cudaStream_t streamId 86 | ); 87 | 88 | 89 | // end of file 90 | -------------------------------------------------------------------------------- /include/bb/Filter2d.h: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------- 2 | // Binary Brain -- binary neural net framework 3 | // 4 | // Copyright (C) 2018 by Ryuji Fuchikami 5 | // https://github.com/ryuz 6 | // ryuji.fuchikami@nifty.com 7 | // -------------------------------------------------------------------------- 8 | 9 | 10 | 11 | #pragma once 12 | 13 | 14 | #include "bb/Model.h" 15 | 16 | 17 | namespace bb { 18 | 19 | 20 | // border_mode 21 | #define BB_BORDER_CONSTANT 0 22 | #define BB_BORDER_REFLECT 1 23 | #define BB_BORDER_REFLECT_101 2 24 | #define BB_BORDER_REPLICATE 3 25 | #define BB_BORDER_WRAP 4 26 | 27 | 28 | // 二次元フィルタの基本クラス 29 | class Filter2d : public Model 30 | { 31 | protected: 32 | /** 33 | * @brief モデルの情報を表示 34 | * @detail モデルの情報を表示する 35 | * @param os 出力ストリーム 36 | * @param indent インデント文字列 37 | */ 38 | void PrintInfoText(std::ostream& os, std::string indent, int columns, int nest, int depth) const override 39 | { 40 | os << indent << " filter size : (" << GetFilterHeight() << ", " << GetFilterWidth() << ")" << std::endl; 41 | Model::PrintInfoText(os, indent, columns, nest, depth); 42 | } 43 | 44 | public: 45 | virtual index_t GetFilterHeight(void) const = 0; 46 | virtual index_t GetFilterWidth(void) const = 0; 47 | 48 | virtual std::shared_ptr< Model > GetSubLayer(void) const 49 | { 50 | return nullptr; 51 | } 52 | 53 | index_t GetInputChannels(void) const 54 | { 55 | auto shape = this->GetInputShape(); 56 | BB_ASSERT(shape.size() == 3); 57 | return shape[0]; 58 | } 59 | 60 | index_t GetInputHeight(void) const 61 | { 62 | auto shape = this->GetInputShape(); 63 | BB_ASSERT(shape.size() == 3); 64 | return shape[1]; 65 | } 66 | 67 | index_t GetInputWidth(void) const 68 | { 69 | auto shape = this->GetInputShape(); 70 | BB_ASSERT(shape.size() == 3); 71 | return shape[2]; 72 | } 73 | 74 | index_t GetOutputChannels(void) const 75 | { 76 | auto shape = this->GetOutputShape(); 77 | BB_ASSERT(shape.size() == 3); 78 | return shape[0]; 79 | } 80 | 81 | index_t GetOutputHeight(void) const 82 | { 83 | auto shape = this->GetOutputShape(); 84 | BB_ASSERT(shape.size() == 3); 85 | return shape[1]; 86 | } 87 | 88 | index_t GetOutputWidth(void) const 89 | { 90 | auto shape = this->GetOutputShape(); 91 | BB_ASSERT(shape.size() == 3); 92 | return shape[2]; 93 | } 94 | }; 95 | 96 | 97 | } -------------------------------------------------------------------------------- /tests/gtest/cudaMatrixColwiseMeanVarTest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cuda_runtime.h" 8 | #include "device_launch_parameters.h" 9 | 10 | #include "gtest/gtest.h" 11 | 12 | #include "bb/FrameBuffer.h" 13 | #include "bbcu/bbcu.h" 14 | 15 | 16 | #if BB_WITH_CUDA 17 | 18 | 19 | static double calc_mean(std::valarray const &varray) 20 | { 21 | return varray.sum() / varray.size(); 22 | } 23 | 24 | static double calc_var(std::valarray const &varray) 25 | { 26 | double mean = calc_mean(varray); 27 | return ((varray*varray).sum() - mean * mean*varray.size()) / varray.size(); 28 | } 29 | 30 | 31 | TEST(cudaMatrixColwiseMeanVarTest, test_MatrixColwiseMeanVar) 32 | { 33 | const int n = 1027; 34 | 35 | std::mt19937_64 mt(1); 36 | std::normal_distribution dist0(1.0, 2.0); 37 | std::normal_distribution dist1(-1.0, 3.0); 38 | std::normal_distribution dist2(2.0, 4.0); 39 | 40 | std::valarray arr0(n); 41 | std::valarray arr1(n); 42 | std::valarray arr2(n); 43 | for (int i = 0; i < n; ++i) { 44 | arr0[i] = dist0(mt); 45 | arr1[i] = dist1(mt); 46 | arr2[i] = dist2(mt); 47 | } 48 | 49 | bb::FrameBuffer x_buf(n, {3}, BB_TYPE_FP32); 50 | for (int i = 0; i < n; ++i) { 51 | x_buf.SetFP32(i, 0, (float)arr0[i]); 52 | x_buf.SetFP32(i, 1, (float)arr1[i]); 53 | x_buf.SetFP32(i, 2, (float)arr2[i]); 54 | } 55 | 56 | bb::Tensor m_buf({3}, BB_TYPE_FP32); 57 | bb::Tensor v_buf({3}, BB_TYPE_FP32); 58 | { 59 | auto x_ptr = x_buf.LockDeviceMemoryConst(); 60 | auto m_ptr = m_buf.LockDeviceMemory(); 61 | auto v_ptr = v_buf.LockDeviceMemory(); 62 | bbcu_fp32_MatrixColwiseMeanVar 63 | ( 64 | (const float *)x_ptr.GetAddr(), 65 | (float *)m_ptr.GetAddr(), 66 | (float *)v_ptr.GetAddr(), 67 | (int )3, 68 | (int )n, 69 | (int )x_buf.GetFrameStride() / sizeof(float) 70 | ); 71 | } 72 | 73 | { 74 | auto m_ptr = m_buf.LockConst(); 75 | auto v_ptr = v_buf.LockConst(); 76 | 77 | EXPECT_FLOAT_EQ((float)calc_mean(arr0), m_ptr[0]); 78 | EXPECT_FLOAT_EQ((float)calc_mean(arr1), m_ptr[1]); 79 | EXPECT_FLOAT_EQ((float)calc_mean(arr2), m_ptr[2]); 80 | EXPECT_FLOAT_EQ((float)calc_var(arr0), v_ptr[0]); 81 | EXPECT_FLOAT_EQ((float)calc_var(arr1), v_ptr[1]); 82 | EXPECT_FLOAT_EQ((float)calc_var(arr2), v_ptr[2]); 83 | } 84 | } 85 | 86 | 87 | #endif 88 | 89 | --------------------------------------------------------------------------------