├── samples
├── hls
│ └── mnist
│ │ └── simple
│ │ ├── directives.tcl
│ │ ├── .gitignore
│ │ ├── src
│ │ ├── mnist_sample.h
│ │ └── mnist_sample.cpp
│ │ ├── testbench
│ │ └── tb_mnist.cpp
│ │ ├── README.md
│ │ └── Makefile
├── verilog
│ ├── mnist
│ │ ├── tb_mnist_lut_cnn
│ │ │ ├── xsim
│ │ │ │ ├── xsim_run_all.tcl
│ │ │ │ ├── xvlog_cmd.txt
│ │ │ │ ├── run_xsim.sh
│ │ │ │ └── run_xsim.bat
│ │ │ ├── veritak
│ │ │ │ └── mnist_lut_cnn.vtakprj
│ │ │ ├── iverilog
│ │ │ │ ├── run_iverilog.sh
│ │ │ │ └── iverilog_cmd.txt
│ │ │ └── verilator
│ │ │ │ ├── clang-cmakeinit.cmake
│ │ │ │ ├── verilator_cmd.txt
│ │ │ │ ├── Makefile
│ │ │ │ ├── CMakeLists.txt
│ │ │ │ └── tb_verilator.cpp
│ │ ├── tb_mnist_lut_simple
│ │ │ ├── xsim
│ │ │ │ ├── xsim_run_all.tcl
│ │ │ │ ├── xvlog_cmd.txt
│ │ │ │ ├── run_xsim.sh
│ │ │ │ └── run_xsim.bat
│ │ │ ├── iverilog
│ │ │ │ ├── iverilog_cmd.txt
│ │ │ │ └── run_iverilog.sh
│ │ │ ├── veritak
│ │ │ │ └── tb_mnist_lut_simple.vtakprj
│ │ │ └── verilator
│ │ │ │ ├── clang-cmakeinit.cmake
│ │ │ │ ├── verilator_cmd.txt
│ │ │ │ ├── Makefile
│ │ │ │ ├── CMakeLists.txt
│ │ │ │ └── tb_verilator.cpp
│ │ ├── tb_mnist_semantic_segmentation
│ │ │ ├── xsim
│ │ │ │ ├── xsim_run_all.tcl
│ │ │ │ ├── xvlog_cmd.txt
│ │ │ │ ├── run_xsim.sh
│ │ │ │ └── run_xsim.bat
│ │ │ ├── verilator
│ │ │ │ ├── clang-cmakeinit.cmake
│ │ │ │ ├── verilator_cmd.txt
│ │ │ │ ├── Makefile
│ │ │ │ ├── CMakeLists.txt
│ │ │ │ └── tb_verilator.cpp
│ │ │ ├── veritak
│ │ │ │ └── tb_mnist_semantic_segmentation.vtakprj
│ │ │ └── iverilog
│ │ │ │ ├── run_iverilog.sh
│ │ │ │ └── iverilog_cmd.txt
│ │ ├── tb_mnist_segmentation_and_classification
│ │ │ ├── xsim
│ │ │ │ ├── xsim_run_all.tcl
│ │ │ │ ├── xvlog_cmd.txt
│ │ │ │ ├── run_xsim.sh
│ │ │ │ └── run_xsim.bat
│ │ │ ├── verilator
│ │ │ │ ├── clang-cmakeinit.cmake
│ │ │ │ ├── verilator_cmd.txt
│ │ │ │ ├── Makefile
│ │ │ │ ├── CMakeLists.txt
│ │ │ │ └── tb_verilator.cpp
│ │ │ ├── iverilog
│ │ │ │ ├── run_iverilog.sh
│ │ │ │ └── iverilog_cmd.txt
│ │ │ └── veritak
│ │ │ │ └── tb_mnist_segmentation_and_classification.vtakprj
│ │ ├── common
│ │ │ └── bb_lut.v
│ │ └── README.md
│ └── cifar10
│ │ └── README.md
└── cpp
│ ├── cifar10
│ ├── Cifar10DenseCnn.cpp
│ ├── Cifar10DenseSimple.cpp
│ ├── Cifar10MicroMlpLutCnn.cpp
│ ├── Cifar10StochasticLutCnn.cpp
│ ├── Cifar10DifferentiableLutCnn.cpp
│ ├── Cifar10DifferentiableLutSimple.cpp
│ ├── sample_cifar10.vcxproj.filters
│ └── sample_cifar10.vcxproj.user
│ ├── diabetes
│ ├── readme.txt
│ ├── diabets_data.py
│ ├── LoadDiabetes.h
│ ├── main.cpp
│ ├── sample_diabetes.vcxproj.filters
│ ├── Makefile
│ └── DiabetesRegressionDenseAffine.cpp
│ └── mnist
│ ├── get_nmist.bat
│ ├── get_nmist.sh
│ ├── sample_mnist.vcxproj.filters
│ ├── sample_mnist.sln
│ └── MnistLoadNet.cpp
├── tests
├── hls
│ └── mnist
│ │ └── mnist_simple
│ │ ├── directives.tcl
│ │ ├── .gitignore
│ │ ├── src
│ │ ├── mnist_simple.h
│ │ └── mnist_simple.cpp
│ │ ├── testbench
│ │ └── tb_mnist_simple.cpp
│ │ └── Makefile
├── .gitattributes
├── cuda
│ ├── MicroMlp_Test.cpp
│ ├── StochasticLut6_Test.cpp
│ ├── Makefile
│ └── main.cpp
├── gtest
│ ├── TensorTest.cpp
│ ├── BinaryScalingTest.cpp
│ ├── MetricsCategoricalAccuracyTest.cpp
│ ├── cudaMatrixColwiseSumTest.cpp
│ ├── cudaMatrixRowwiseSetVectorTest.cpp
│ ├── ConvBitToRealTest.cpp
│ ├── ShuffleTest.cpp
│ ├── VariablesTest.cpp
│ ├── DenseAffineQuantizeTest.cpp
│ ├── DifferentiableLutTest.cpp
│ ├── BitEncodeTest.cpp
│ ├── Makefile
│ ├── ConcatenateTest.cpp
│ ├── DepthwiseDenseAffineTest.cpp
│ └── cudaMatrixColwiseMeanVarTest.cpp
├── cpp
│ ├── cifar10
│ │ ├── Cifar10DenseCnn.cpp
│ │ ├── Cifar10DenseSimple.cpp
│ │ ├── Cifar10MicroMlpLutCnn.cpp
│ │ ├── Cifar10StochasticLutCnn.cpp
│ │ ├── Cifar10DifferentiableLutCnn.cpp
│ │ └── Cifar10DifferentiableLutSimple.cpp
│ ├── diabetes
│ │ ├── readme.txt
│ │ ├── diabets_data.py
│ │ ├── LoadDiabetes.h
│ │ ├── main.cpp
│ │ ├── Makefile
│ │ └── DiabetesRegressionDenseAffine.cpp
│ ├── mnist
│ │ ├── get_nmist.bat
│ │ ├── get_nmist.sh
│ │ ├── test_mnist.sln
│ │ └── MnistLoadNet.cpp
│ └── xor
│ │ ├── main.cpp
│ │ ├── StochasticLut6.cpp
│ │ └── XorMicroMlp.cpp
└── svhn
│ └── download_svhn.sh
├── include
└── bb
│ ├── ObjectLoader.h
│ ├── Manager.h
│ ├── ModelLoader.h
│ ├── ValueGenerator.h
│ ├── StochasticLutModel.h
│ ├── Optimizer.h
│ ├── MetricsFunction.h
│ ├── Version.h
│ ├── LossFunction.h
│ ├── CudaUtility.h
│ ├── PnmImage.h
│ ├── LoadXor.h
│ ├── Assert.h
│ ├── Activation.h
│ └── Filter2d.h
├── documents
├── images
│ ├── micro_mlp.png
│ ├── GoogleColab.jpg
│ ├── dense_affine.png
│ ├── performance.png
│ ├── block_diagram.png
│ ├── fpga_resource.png
│ ├── sparse_affine.png
│ ├── stochastic_and.png
│ ├── stochastic_lut.png
│ ├── LutNet_node_model.png
│ ├── autoencoder_mnist.png
│ ├── binary_modulation.png
│ ├── fpga_environment.jpg
│ ├── modulation_model.png
│ ├── stochastic_lut2.png
│ ├── LutNet_design_flow.png
│ ├── LutNet_layer_model.png
│ ├── autoencoder_cifar10.png
│ ├── differentiable_lut.png
│ ├── differentiable_lut_app.png
│ ├── differentiable-lut_model.png
│ ├── LutNet_lut_equivalent_model.png
│ ├── binary_modulation_wide_test.png
│ └── difference_other_networks.png
└── sphinx
│ ├── source
│ ├── python_module_storage.rst
│ ├── introduction.rst
│ ├── python_module_system.rst
│ ├── python_module_object.rst
│ ├── _static
│ │ └── css
│ │ │ └── my_theme.css
│ ├── quick_start_verilog.rst
│ ├── python_module_models_misc.rst
│ ├── python_module_verilog.rst
│ ├── python_module_models_base.rst
│ ├── python_module_container.rst
│ ├── python_module_optimizer.rst
│ ├── python_api.rst
│ ├── index.rst
│ ├── python_module_models_binary.rst
│ ├── python_module_models_filter.rst
│ ├── python_module_models_activation.rst
│ ├── python_module_metrics.rst
│ ├── introduction_features.rst
│ ├── locale
│ │ ├── en
│ │ │ └── LC_MESSAGES
│ │ │ │ ├── index.po
│ │ │ │ └── sample_rtl.po
│ │ └── ja
│ │ │ └── LC_MESSAGES
│ │ │ ├── index.po
│ │ │ └── sample_rtl.po
│ ├── python_module_losses.rst
│ ├── python_module_models_operation.rst
│ ├── introduction_binary_modulation.rst
│ ├── introduction_case_study.rst
│ ├── informations.rst
│ └── quick_start_cpp.rst
│ ├── Makefile
│ └── make.bat
├── python
├── copy_src.bat
├── build.sh
├── copy_src.sh
├── build.bat
├── projects
│ ├── discrete
│ │ ├── core.vcxproj.user
│ │ └── Makefile
│ └── thrust
│ │ ├── core.vcxproj.user
│ │ ├── PyBinaryBrainThrust.sln
│ │ └── Makefile
├── clean.bat
├── clean.sh
├── uninstall.sh
├── requirements.txt
├── upload_pypi.bat
├── check_install.py
├── binarybrain
│ ├── __init__.py
│ ├── variables.py
│ ├── Makefile
│ ├── src
│ │ └── core_bbcu.cu
│ ├── metrics.py
│ └── hls.py
└── upload_pypi.sh
├── .gitmodules
├── cuda
├── bbcu.vcxproj.user
├── Manager.cu
├── bbcu.sln
├── MatrixRowwiseSetVector.cu
├── Makefile
├── MatrixColwiseSum.cu
├── OptimizerAdam.cu
└── ConvBitToReal.cu
├── .readthedocs.yaml
├── .gitattributes
├── setup.py
└── license.txt
/samples/hls/mnist/simple/directives.tcl:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/directives.tcl:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/.gitignore:
--------------------------------------------------------------------------------
1 | mnist_simple/
--------------------------------------------------------------------------------
/tests/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb filter=nbstripout
2 | *.ipynb diff=ipynb
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/xsim/xsim_run_all.tcl:
--------------------------------------------------------------------------------
1 | run all
2 | quit
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/xsim/xsim_run_all.tcl:
--------------------------------------------------------------------------------
1 | run all
2 | quit
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/xsim_run_all.tcl:
--------------------------------------------------------------------------------
1 | run all
2 | quit
3 |
--------------------------------------------------------------------------------
/include/bb/ObjectLoader.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/include/bb/ObjectLoader.h
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/xsim_run_all.tcl:
--------------------------------------------------------------------------------
1 | run all
2 | quit
3 |
--------------------------------------------------------------------------------
/tests/cuda/MicroMlp_Test.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cuda/MicroMlp_Test.cpp
--------------------------------------------------------------------------------
/tests/gtest/TensorTest.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/gtest/TensorTest.cpp
--------------------------------------------------------------------------------
/documents/images/micro_mlp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/micro_mlp.png
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/.gitignore:
--------------------------------------------------------------------------------
1 | mnist_sample/
2 |
3 | MnistDifferentiableLutHls.h
4 | mnist_test_data.h
5 |
--------------------------------------------------------------------------------
/documents/images/GoogleColab.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/GoogleColab.jpg
--------------------------------------------------------------------------------
/documents/images/dense_affine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/dense_affine.png
--------------------------------------------------------------------------------
/documents/images/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/performance.png
--------------------------------------------------------------------------------
/tests/gtest/BinaryScalingTest.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/gtest/BinaryScalingTest.cpp
--------------------------------------------------------------------------------
/documents/images/block_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/block_diagram.png
--------------------------------------------------------------------------------
/documents/images/fpga_resource.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/fpga_resource.png
--------------------------------------------------------------------------------
/documents/images/sparse_affine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/sparse_affine.png
--------------------------------------------------------------------------------
/documents/images/stochastic_and.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_and.png
--------------------------------------------------------------------------------
/documents/images/stochastic_lut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_lut.png
--------------------------------------------------------------------------------
/tests/cuda/StochasticLut6_Test.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cuda/StochasticLut6_Test.cpp
--------------------------------------------------------------------------------
/documents/images/LutNet_node_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_node_model.png
--------------------------------------------------------------------------------
/documents/images/autoencoder_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/autoencoder_mnist.png
--------------------------------------------------------------------------------
/documents/images/binary_modulation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/binary_modulation.png
--------------------------------------------------------------------------------
/documents/images/fpga_environment.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/fpga_environment.jpg
--------------------------------------------------------------------------------
/documents/images/modulation_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/modulation_model.png
--------------------------------------------------------------------------------
/documents/images/stochastic_lut2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/stochastic_lut2.png
--------------------------------------------------------------------------------
/python/copy_src.bat:
--------------------------------------------------------------------------------
1 |
2 | xcopy /Y /I /E ..\include binarybrain\include
3 | xcopy /Y /I ..\cuda binarybrain\cuda
4 |
5 |
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10DenseCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DenseCnn.cpp
--------------------------------------------------------------------------------
/documents/images/LutNet_design_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_design_flow.png
--------------------------------------------------------------------------------
/documents/images/LutNet_layer_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_layer_model.png
--------------------------------------------------------------------------------
/documents/images/autoencoder_cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/autoencoder_cifar10.png
--------------------------------------------------------------------------------
/documents/images/differentiable_lut.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable_lut.png
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10DenseCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DenseCnn.cpp
--------------------------------------------------------------------------------
/samples/verilog/cifar10/README.md:
--------------------------------------------------------------------------------
1 | CIFAR-10 の Verilog 出力先ディレクトリです。
2 |
3 | 現時点で シミュレーション実行のサンプル提供はありませんので、MNISTを参考に各自で挑戦お願いいたします。
4 |
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10DenseSimple.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DenseSimple.cpp
--------------------------------------------------------------------------------
/documents/images/differentiable_lut_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable_lut_app.png
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10DenseSimple.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DenseSimple.cpp
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp
--------------------------------------------------------------------------------
/documents/images/differentiable-lut_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/differentiable-lut_model.png
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10MicroMlpLutCnn.cpp
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/iverilog/iverilog_cmd.txt:
--------------------------------------------------------------------------------
1 | ../tb_mnist_lut_simple.v
2 | ../MnistLutSimple.v
3 | -y ../../common
4 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/xsim/xvlog_cmd.txt:
--------------------------------------------------------------------------------
1 | ../tb_mnist_lut_simple.v
2 | ../MnistLutSimple.v
3 | ../../common/bb_lut.v
4 |
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10StochasticLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10StochasticLutCnn.cpp
--------------------------------------------------------------------------------
/documents/images/LutNet_lut_equivalent_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/LutNet_lut_equivalent_model.png
--------------------------------------------------------------------------------
/documents/images/binary_modulation_wide_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/binary_modulation_wide_test.png
--------------------------------------------------------------------------------
/documents/images/difference_other_networks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/documents/images/difference_other_networks.png
--------------------------------------------------------------------------------
/python/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | source ./clean.sh
4 | source ./copy_src.sh
5 | python3 setup.py build
6 | python3 setup.py develop --user
7 |
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10StochasticLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10StochasticLutCnn.cpp
--------------------------------------------------------------------------------
/samples/cpp/diabetes/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | データの準備
3 |
4 | sudo apt install python3-pip
5 | sudo pip3 install scikit-learn
6 | python3 diabets_data.py
7 |
8 |
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp
--------------------------------------------------------------------------------
/tests/cpp/diabetes/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | データの準備
3 |
4 | sudo apt install python3-pip
5 | sudo pip3 install scikit-learn
6 | python3 diabets_data.py
7 |
8 |
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DifferentiableLutCnn.cpp
--------------------------------------------------------------------------------
/tests/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/tests/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp
--------------------------------------------------------------------------------
/samples/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/cpp/cifar10/Cifar10DifferentiableLutSimple.cpp
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/src/mnist_simple.h:
--------------------------------------------------------------------------------
1 |
2 | #define AP_INT_MAX_W 32768
3 | #include "ap_int.h"
4 |
5 | ap_uint<10> mnist_simple(ap_uint<28*28> in_data);
6 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "cereal"]
2 | path = cereal
3 | url = https://github.com/USCiLab/cereal
4 | [submodule "jelly"]
5 | path = jelly
6 | url = https://github.com/ryuz/jelly
7 |
--------------------------------------------------------------------------------
/python/copy_src.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm -fr binarybrain/include
4 | rm -fr binarybrain/cuda
5 |
6 | cp -r ../include binarybrain/include
7 | cp -r ../cuda binarybrain/cuda
8 |
9 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/veritak/mnist_lut_cnn.vtakprj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_lut_cnn/veritak/mnist_lut_cnn.vtakprj
--------------------------------------------------------------------------------
/cuda/bbcu.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/python/build.bat:
--------------------------------------------------------------------------------
1 | call clean.bat
2 | call copy_src.bat
3 | python setup.py build
4 | python setup.py develop
5 |
6 | python check_install.py
7 | if %errorlevel% neq 0 (
8 | exit /b
9 | )
10 |
--------------------------------------------------------------------------------
/tests/cuda/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all:
3 | nvcc -I../../include ../../cuda/MicroMlp.cu main.cpp MicroMlp_Test.cpp -o cuda_test
4 |
5 | run:
6 | ./cuda_test
7 |
8 | clean:
9 | rm ./cuda_test
10 |
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/src/mnist_sample.h:
--------------------------------------------------------------------------------
1 |
2 | #include
3 |
4 | void mnist_sample(
5 | const ap_uint<1> in[28*28],
6 | ap_uint<4> out[]
7 | );
8 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/veritak/tb_mnist_lut_simple.vtakprj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_lut_simple/veritak/tb_mnist_lut_simple.vtakprj
--------------------------------------------------------------------------------
/python/projects/discrete/core.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/python/projects/thrust/core.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/iverilog/run_iverilog.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | TOP_MODULE=tb_mnist_lut_cnn
4 |
5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG
6 | vvp $TOP_MODULE.vvp
7 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/verilator/clang-cmakeinit.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE)
2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE)
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/iverilog/run_iverilog.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | TOP_MODULE=tb_mnist_lut_simple
4 |
5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG
6 | vvp $TOP_MODULE.vvp
7 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/verilator/clang-cmakeinit.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE)
2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE)
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/clang-cmakeinit.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE)
2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE)
3 |
--------------------------------------------------------------------------------
/python/clean.bat:
--------------------------------------------------------------------------------
1 | rd /s /q build
2 | rd /s /q dist
3 | rd /s /q binarybrain.egg-info
4 | rd /s /q binarybrain\__pycache__
5 | rd /s /q binarybrain\cuda
6 | rd /s /q binarybrain\include
7 | del binarybrain\*.pyd
8 |
9 | call copy_src.bat
10 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/veritak/tb_mnist_semantic_segmentation.vtakprj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_semantic_segmentation/veritak/tb_mnist_semantic_segmentation.vtakprj
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_storage.rst:
--------------------------------------------------------------------------------
1 | 保存/復帰(Serialize)
2 | =============================
3 |
4 | storage モジュール
5 | ----------------------------
6 |
7 | .. automodule:: binarybrain.storage
8 | :members:
9 | :show-inheritance:
10 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/clang-cmakeinit.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/clang" CACHE string "clang compiler" FORCE)
2 | set(CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE string "clang++ compiler" FORCE)
3 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/iverilog/run_iverilog.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | TOP_MODULE=tb_mnist_lut_semantic_segmentation
4 |
5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG
6 | vvp $TOP_MODULE.vvp
7 |
--------------------------------------------------------------------------------
/documents/sphinx/source/introduction.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | はじめに
3 | ==============
4 |
5 | .. toctree::
6 |
7 | introduction_features
8 | introduction_case_study
9 | introduction_lut_network
10 | introduction_binary_modulation
11 |
12 |
13 |
--------------------------------------------------------------------------------
/python/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | rm -fr build
4 | rm -fr dist
5 | rm -fr binarybrain.egg-info
6 | rm -fr tmp
7 |
8 | rm -fr binarybrain/include
9 | rm -fr binarybrain/cuda
10 |
11 | rm -f binarybrain/src/*.o
12 | rm -f binarybrain/src/*.so
13 |
14 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-20.04
5 | tools:
6 | python: "3.9"
7 |
8 | python:
9 | install:
10 | - requirements: python/requirements.txt
11 |
12 | sphinx:
13 | configuration: documents/sphinx/source/conf.py
14 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_system.rst:
--------------------------------------------------------------------------------
1 | システム/GPU関連(System/GPU)
2 | =================================
3 |
4 | その他システム制御関連のAPIです
5 |
6 | .. automodule:: binarybrain.system
7 | :members:
8 | :show-inheritance:
9 |
10 | .. :undoc-members:
11 |
12 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/iverilog/run_iverilog.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | TOP_MODULE=tb_mnist_lut_segmentation_and_classification
4 |
5 | iverilog -o $TOP_MODULE.vvp -s $TOP_MODULE -c iverilog_cmd.txt -DIVERILOG
6 | vvp $TOP_MODULE.vvp
7 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_object.rst:
--------------------------------------------------------------------------------
1 | 基本クラス
2 | =============================
3 |
4 | Object クラス
5 | ----------------------------
6 |
7 | .. autoclass:: binarybrain.object.Object
8 | :members:
9 | :show-inheritance:
10 | :member-order: bysource
11 |
12 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/veritak/tb_mnist_segmentation_and_classification.vtakprj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ryuz/BinaryBrain/HEAD/samples/verilog/mnist/tb_mnist_segmentation_and_classification/veritak/tb_mnist_segmentation_and_classification.vtakprj
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/iverilog/iverilog_cmd.txt:
--------------------------------------------------------------------------------
1 | ../tb_mnist_lut_cnn.v
2 | ../MnistLutCnn.v
3 | -y ..
4 | -y ../../common
5 | -y ../../../../../jelly/rtl/library
6 | -y ../../../../../jelly/rtl/image
7 | -y ../../../../../jelly/rtl/video
8 | -y ../../../../../jelly/rtl/model
9 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/verilator/verilator_cmd.txt:
--------------------------------------------------------------------------------
1 | #--trace-fst --trace-params --trace-structs --trace-underscore
2 | #--public
3 | #--threads 2
4 |
5 | -Wno-WIDTH
6 | -Wno-UNSIGNED
7 | -Wno-PINMISSING
8 | -Wno-UNOPTFLAT
9 | #-Wno-UNOPTTHREADS
10 | -Wno-LITENDIAN
11 |
12 | -y ..
13 | -y ../../common
14 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/iverilog/iverilog_cmd.txt:
--------------------------------------------------------------------------------
1 | ../tb_mnist_lut_semantic_segmentation.v
2 | ../MnistSemanticSegmentation.v
3 | -y ..
4 | -y ../../common
5 | -y ../../../../../jelly/rtl/library
6 | -y ../../../../../jelly/rtl/image
7 | -y ../../../../../jelly/rtl/video
8 | -y ../../../../../jelly/rtl/model
9 |
--------------------------------------------------------------------------------
/python/uninstall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | python3 setup.py install --record files.txt
4 | cat files.txt | xargs rm -rf
5 | rm files.txt
6 |
7 | python3 setup.py install --user --record files.txt
8 | cat files.txt | xargs rm -rf
9 | rm files.txt
10 |
11 | rm -fr /home/ryuji/.local/lib/python3.6/site-packages/binarybrain-0.0.2-py3.6-linux-x86_64.egg
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 |
3 | *.md text
4 | *.txt text
5 |
6 | *.sh text eol=lf
7 |
8 | *.rst text eol=lf
9 | *.po text eol=lf
10 |
11 | *.jpg -text
12 | *.png -text
13 |
14 | *.sln text eol=crlf
15 | *.vcproj text eol=crlf
16 | *.vcproj.filters text eol=crlf
17 | *.vcproj.user text eol=crlf
18 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/iverilog/iverilog_cmd.txt:
--------------------------------------------------------------------------------
1 | ../tb_mnist_lut_segmentation_and_classification.v
2 | ../MnistSegmentationAndClassification.v
3 | -y ..
4 | -y ../../common
5 | -y ../../../../../jelly/rtl/library
6 | -y ../../../../../jelly/rtl/image
7 | -y ../../../../../jelly/rtl/video
8 | -y ../../../../../jelly/rtl/model
9 |
--------------------------------------------------------------------------------
/documents/sphinx/source/_static/css/my_theme.css:
--------------------------------------------------------------------------------
1 | @import url("theme.css");
2 |
3 | .wy-nav-content {
4 | max-width: none;
5 | }
6 |
7 | h1,h2,h3,h4,h5,h6 {
8 | border-bottom: 1px solid #ccc;
9 | }
10 |
11 | .wy-table-responsive table td, .wy-table-responsive table th {
12 | white-space: normal;
13 | }
14 |
15 | colgroup {
16 | display: none;
17 | }
18 |
--------------------------------------------------------------------------------
/documents/sphinx/source/quick_start_verilog.rst:
--------------------------------------------------------------------------------
1 | ===========================
2 | クイックスタート(Verilog)
3 | ===========================
4 |
5 |
6 | RTL Simulation の試し方
7 | ============================
8 |
9 | C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。
10 | 出力したRTLの試し方は
11 |
12 | https://github.com/ryuz/BinaryBrain/blob/ver4_release/samples/verilog/mnist/README.md
13 |
14 | のなどをご参照ください。
15 |
16 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/xsim/xvlog_cmd.txt:
--------------------------------------------------------------------------------
1 | -sv
2 | --sourcelibdir ..
3 | --sourcelibdir ../../common
4 | --sourcelibdir ../../../../../jelly/rtl/library
5 | --sourcelibdir ../../../../../jelly/rtl/image
6 | --sourcelibdir ../../../../../jelly/rtl/video
7 | --sourcelibdir ../../../../../jelly/rtl/model
8 | --sourcelibext .v
9 | --sourcelibext .sv
10 |
11 | ../tb_mnist_lut_cnn.v
12 | ../MnistLutCnn.v
13 |
14 |
--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | numpy
3 | tqdm
4 | pybind11
5 | twine
6 | Sphinx
7 | sphinx-autobuild
8 | sphinx-markdown-tables
9 | sphinx-rtd-theme
10 | sphinxcontrib-actdiag
11 | sphinxcontrib-applehelp
12 | sphinxcontrib-blockdiag
13 | sphinxcontrib-devhelp
14 | sphinxcontrib-htmlhelp
15 | sphinxcontrib-jsmath
16 | sphinxcontrib-nwdiag
17 | sphinxcontrib-qthelp
18 | sphinxcontrib-seqdiag
19 | sphinxcontrib-serializinghtml
20 |
--------------------------------------------------------------------------------
/tests/svhn/download_svhn.sh:
--------------------------------------------------------------------------------
1 | #/bin/sh
2 |
3 | curl -O http://ufldl.stanford.edu/housenumbers/train.tar.gz
4 | curl -O http://ufldl.stanford.edu/housenumbers/test.tar.gz
5 | curl -O http://ufldl.stanford.edu/housenumbers/extra.tar.gz
6 | curl -O http://ufldl.stanford.edu/housenumbers/train_32x32.mat
7 | curl -O http://ufldl.stanford.edu/housenumbers/test_32x32.mat
8 | curl -O http://ufldl.stanford.edu/housenumbers/extra_32x32.mat
9 |
10 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/xsim/run_xsim.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | rm -fr xsim.dir
4 | rm -fr .Xil
5 | rm -f webtalk*.jou
6 | rm -f webtalk*.log
7 | rm -f xvlog*.log
8 | rm -f xvlog*.pb
9 | rm -f xelab*.log
10 | rm -f xelab*.pb
11 | rm -f xsim*.jou
12 | rm -f xsim*.log
13 |
14 | TOP_MODULE=tb_mnist_lut_cnn
15 |
16 | xvlog -f xvlog_cmd.txt
17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE
18 | xsim $TOP_MODULE -t xsim_run_all.tcl
19 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/xsim/run_xsim.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | rm -fr xsim.dir
4 | rm -fr .Xil
5 | rm -f webtalk*.jou
6 | rm -f webtalk*.log
7 | rm -f xvlog*.log
8 | rm -f xvlog*.pb
9 | rm -f xelab*.log
10 | rm -f xelab*.pb
11 | rm -f xsim*.jou
12 | rm -f xsim*.log
13 |
14 | TOP_MODULE=tb_mnist_lut_simple
15 |
16 | xvlog -f xvlog_cmd.txt
17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE
18 | xsim $TOP_MODULE -t xsim_run_all.tcl
19 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/xvlog_cmd.txt:
--------------------------------------------------------------------------------
1 | -sv
2 | --sourcelibdir ..
3 | --sourcelibdir ../../common
4 | --sourcelibdir ../../../../../jelly/rtl/library
5 | --sourcelibdir ../../../../../jelly/rtl/image
6 | --sourcelibdir ../../../../../jelly/rtl/video
7 | --sourcelibdir ../../../../../jelly/rtl/model
8 | --sourcelibext .v
9 | --sourcelibext .sv
10 |
11 | ../tb_mnist_lut_semantic_segmentation.v
12 | ../MnistSemanticSegmentation.v
13 |
14 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/run_xsim.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | rm -fr xsim.dir
4 | rm -fr .Xil
5 | rm -f webtalk*.jou
6 | rm -f webtalk*.log
7 | rm -f xvlog*.log
8 | rm -f xvlog*.pb
9 | rm -f xelab*.log
10 | rm -f xelab*.pb
11 | rm -f xsim*.jou
12 | rm -f xsim*.log
13 |
14 | TOP_MODULE=tb_mnist_lut_semantic_segmentation
15 |
16 | xvlog -f xvlog_cmd.txt
17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE
18 | xsim $TOP_MODULE -t xsim_run_all.tcl
19 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/xvlog_cmd.txt:
--------------------------------------------------------------------------------
1 | -sv
2 | --sourcelibdir ..
3 | --sourcelibdir ../../common
4 | --sourcelibdir ../../../../../jelly/rtl/library
5 | --sourcelibdir ../../../../../jelly/rtl/image
6 | --sourcelibdir ../../../../../jelly/rtl/video
7 | --sourcelibdir ../../../../../jelly/rtl/model
8 | --sourcelibext .v
9 | --sourcelibext .sv
10 |
11 | ../tb_mnist_lut_segmentation_and_classification.v
12 | ../MnistSegmentationAndClassification.v
13 |
14 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/run_xsim.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash -eu
2 |
3 | rm -fr xsim.dir
4 | rm -fr .Xil
5 | rm -f webtalk*.jou
6 | rm -f webtalk*.log
7 | rm -f xvlog*.log
8 | rm -f xvlog*.pb
9 | rm -f xelab*.log
10 | rm -f xelab*.pb
11 | rm -f xsim*.jou
12 | rm -f xsim*.log
13 |
14 | TOP_MODULE=tb_mnist_lut_segmentation_and_classification
15 |
16 | xvlog -f xvlog_cmd.txt
17 | xelab -debug wave $TOP_MODULE -s $TOP_MODULE
18 | xsim $TOP_MODULE -t xsim_run_all.tcl
19 |
--------------------------------------------------------------------------------
/tests/cpp/diabetes/diabets_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | from sklearn.datasets import load_diabetes
4 |
5 | diabetes = load_diabetes()
6 |
7 | with open('diabetes_data.txt', mode='w') as f:
8 | for dd in diabetes['data']:
9 | for d in dd:
10 | f.write(str(d))
11 | f.write(' ')
12 | f.write('\n')
13 |
14 | with open('diabetes_target.txt', mode='w') as f:
15 | for d in diabetes['target']:
16 | f.write(str(d))
17 | f.write('\n')
18 |
19 |
--------------------------------------------------------------------------------
/samples/cpp/diabetes/diabets_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | from sklearn.datasets import load_diabetes
4 |
5 | diabetes = load_diabetes()
6 |
7 | with open('diabetes_data.txt', mode='w') as f:
8 | for dd in diabetes['data']:
9 | for d in dd:
10 | f.write(str(d))
11 | f.write(' ')
12 | f.write('\n')
13 |
14 | with open('diabetes_target.txt', mode='w') as f:
15 | for d in diabetes['target']:
16 | f.write(str(d))
17 | f.write('\n')
18 |
19 |
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/src/mnist_simple.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "mnist_simple.h"
3 | #include "MnistDifferentiableLutSimpleHls.h"
4 |
5 |
6 | ap_uint<10> mnist_simple(ap_uint<28*28> in_data)
7 | {
8 | #pragma HLS pipeline II=1
9 |
10 | auto data1 = mnist_layer1(in_data);
11 | auto data2 = mnist_layer2(data1);
12 | auto data3 = mnist_layer3(data2);
13 | auto data4 = mnist_layer4(data3);
14 | auto data5 = mnist_layer5(data4);
15 | auto data6 = mnist_layer6(data5);
16 | return data6;
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/verilator/verilator_cmd.txt:
--------------------------------------------------------------------------------
1 | #--trace-fst --trace-params --trace-structs --trace-underscore
2 | #--public
3 | #--threads 2
4 |
5 | -Wno-WIDTH
6 | -Wno-UNSIGNED
7 | -Wno-PINMISSING
8 | -Wno-UNOPTFLAT
9 | #-Wno-UNOPTTHREADS
10 | -Wno-LITENDIAN
11 |
12 | -y ..
13 | -y ../../common
14 | -y ../../../../../jelly/rtl/library/
15 | -y ../../../../../jelly/rtl/bus
16 | -y ../../../../../jelly/rtl/image
17 | -y ../../../../../jelly/rtl/video
18 | -y ../../../../../jelly/rtl/math
19 | -y ../../../../../jelly/rtl/model
20 |
--------------------------------------------------------------------------------
/python/upload_pypi.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 |
3 | rem call set_vc.bat
4 |
5 | python -V
6 | pause
7 |
8 | echo git switch ver4_release & git pull
9 | pause
10 | git switch ver4_release
11 | git pull
12 |
13 |
14 | echo build
15 | pause
16 |
17 | call clean.bat
18 | call copy_src.bat
19 |
20 | python setup.py build
21 |
22 | python setup.py sdist
23 | python setup.py bdist_wheel
24 |
25 |
26 | echo upload TestPyPI
27 | pause
28 | twine upload --repository testpypi dist/*
29 |
30 |
31 | echo upload py37
32 | pause
33 | twine upload --repository pypi dist/*
34 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/verilator_cmd.txt:
--------------------------------------------------------------------------------
1 | #--trace-fst --trace-params --trace-structs --trace-underscore
2 | #--public
3 | #--threads 2
4 |
5 | -Wno-WIDTH
6 | -Wno-UNSIGNED
7 | -Wno-PINMISSING
8 | -Wno-UNOPTFLAT
9 | #-Wno-UNOPTTHREADS
10 | -Wno-LITENDIAN
11 |
12 | -y ..
13 | -y ../../common
14 | -y ../../../../../jelly/rtl/library/
15 | -y ../../../../../jelly/rtl/bus
16 | -y ../../../../../jelly/rtl/image
17 | -y ../../../../../jelly/rtl/video
18 | -y ../../../../../jelly/rtl/math
19 | -y ../../../../../jelly/rtl/model
20 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/verilator_cmd.txt:
--------------------------------------------------------------------------------
1 | #--trace-fst --trace-params --trace-structs --trace-underscore
2 | #--public
3 | #--threads 2
4 |
5 | -Wno-WIDTH
6 | -Wno-UNSIGNED
7 | -Wno-PINMISSING
8 | -Wno-UNOPTFLAT
9 | #-Wno-UNOPTTHREADS
10 | -Wno-LITENDIAN
11 |
12 | -y ..
13 | -y ../../common
14 | -y ../../../../../jelly/rtl/library/
15 | -y ../../../../../jelly/rtl/bus
16 | -y ../../../../../jelly/rtl/image
17 | -y ../../../../../jelly/rtl/video
18 | -y ../../../../../jelly/rtl/math
19 | -y ../../../../../jelly/rtl/model
20 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_misc.rst:
--------------------------------------------------------------------------------
1 | 補助モデル
2 | ======================================
3 |
4 |
5 | models モジュールのその他のモデルです。
6 |
7 |
8 | BatchNormalization クラス
9 | ----------------------------
10 |
11 | .. autoclass:: binarybrain.models.BatchNormalization
12 | :members:
13 | :show-inheritance:
14 |
15 |
16 | Dropout クラス
17 | ----------------------------
18 |
19 | .. autoclass:: binarybrain.models.Dropout
20 | :members:
21 | :show-inheritance:
22 |
23 |
24 | Shuffle クラス
25 | ----------------------------
26 |
27 | .. autoclass:: binarybrain.models.Shuffle
28 | :members:
29 | :show-inheritance:
30 |
31 |
32 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/xsim/run_xsim.bat:
--------------------------------------------------------------------------------
1 |
2 |
3 | rmdir /s /q xsim.dir
4 | rmdir /s /q .Xil
5 | del webtalk*.jou
6 | del webtalk*.log
7 | del xvlog*.log
8 | del xvlog*.pb
9 | del xelab*.log
10 | del xelab*.pb
11 | del xsim*.jou
12 | del xsim*.log
13 |
14 | @if "%1"=="" goto BUILD
15 | @if %1==clean goto END
16 |
17 | :BUILD
18 |
19 | set TOP_MODULE=tb_mnist_lut_cnn
20 |
21 | call xvlog -f xvlog_cmd.txt
22 | @if ERRORLEVEL 1 GOTO END
23 |
24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE%
25 | @if ERRORLEVEL 1 GOTO END
26 |
27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl
28 | @if ERRORLEVEL 1 GOTO END
29 |
30 | :END
31 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/xsim/run_xsim.bat:
--------------------------------------------------------------------------------
1 |
2 |
3 | rmdir /s /q xsim.dir
4 | rmdir /s /q .Xil
5 | del webtalk*.jou
6 | del webtalk*.log
7 | del xvlog*.log
8 | del xvlog*.pb
9 | del xelab*.log
10 | del xelab*.pb
11 | del xsim*.jou
12 | del xsim*.log
13 |
14 | @if "%1"=="" goto BUILD
15 | @if %1==clean goto END
16 |
17 | :BUILD
18 |
19 | set TOP_MODULE=tb_mnist_lut_simple
20 |
21 | call xvlog -f xvlog_cmd.txt
22 | @if ERRORLEVEL 1 GOTO END
23 |
24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE%
25 | @if ERRORLEVEL 1 GOTO END
26 |
27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl
28 | @if ERRORLEVEL 1 GOTO END
29 |
30 | :END
31 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/xsim/run_xsim.bat:
--------------------------------------------------------------------------------
1 |
2 |
3 | rmdir /s /q xsim.dir
4 | rmdir /s /q .Xil
5 | del webtalk*.jou
6 | del webtalk*.log
7 | del xvlog*.log
8 | del xvlog*.pb
9 | del xelab*.log
10 | del xelab*.pb
11 | del xsim*.jou
12 | del xsim*.log
13 |
14 | @if "%1"=="" goto BUILD
15 | @if %1==clean goto END
16 |
17 | :BUILD
18 |
19 | set TOP_MODULE=tb_mnist_lut_semantic_segmentation
20 |
21 | call xvlog -f xvlog_cmd.txt
22 | @if ERRORLEVEL 1 GOTO END
23 |
24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE%
25 | @if ERRORLEVEL 1 GOTO END
26 |
27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl
28 | @if ERRORLEVEL 1 GOTO END
29 |
30 | :END
31 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_verilog.rst:
--------------------------------------------------------------------------------
1 | RTL(Verilog/HLS)変換
2 | ==========================
3 |
4 | 学習が完了したネットは結果パラメータに基づいて、ユーザ側で自由に実装可能ですが、
5 | BinaryBrainでも若干のサポート関数を備えています。
6 |
7 | .. automodule:: binarybrain.verilog
8 | :members:
9 | :show-inheritance:
10 |
11 | .. automodule:: binarybrain.hls
12 | :members:
13 | :show-inheritance:
14 |
15 | ..
16 | :undoc-members:
17 | dump_verilog_lut_layers 関数
18 | -----------------------------------
19 | .. automethod:: binarybrain.verilog.dump_verilog_lut_layers
20 | dump_verilog_lut_cnv_layers 関数
21 | -----------------------------------
22 | .. automethod:: binarybrain.verilog.dump_verilog_lut_cnv_layers
23 |
24 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/xsim/run_xsim.bat:
--------------------------------------------------------------------------------
1 |
2 |
3 | rmdir /s /q xsim.dir
4 | rmdir /s /q .Xil
5 | del webtalk*.jou
6 | del webtalk*.log
7 | del xvlog*.log
8 | del xvlog*.pb
9 | del xelab*.log
10 | del xelab*.pb
11 | del xsim*.jou
12 | del xsim*.log
13 |
14 | @if "%1"=="" goto BUILD
15 | @if %1==clean goto END
16 |
17 | :BUILD
18 |
19 | set TOP_MODULE=tb_mnist_lut_segmentation_and_classification
20 |
21 | call xvlog -f xvlog_cmd.txt
22 | @if ERRORLEVEL 1 GOTO END
23 |
24 | call xelab -debug wave %TOP_MODULE% -s %TOP_MODULE%
25 | @if ERRORLEVEL 1 GOTO END
26 |
27 | call xsim %TOP_MODULE% -t xsim_run_all.tcl
28 | @if ERRORLEVEL 1 GOTO END
29 |
30 | :END
31 |
--------------------------------------------------------------------------------
/include/bb/Manager.h:
--------------------------------------------------------------------------------
1 |
2 | #pragma once
3 |
4 |
5 | #ifdef BB_WITH_CUDA
6 | #include "bbcu/bbcu.h"
7 | #endif
8 |
9 |
10 | namespace bb {
11 |
12 | class Manager
13 | {
14 | public:
15 |
16 | #ifdef BB_WITH_CUDA
17 | static inline bool IsDeviceAvailable(void)
18 | {
19 | return !bbcu_IsHostOnly();
20 | }
21 |
22 | static inline void SetHostOnly(bool hostOnly)
23 | {
24 | bbcu_SetHostOnly(hostOnly);
25 | }
26 | #else
27 | static bool IsDeviceAvailable(void)
28 | {
29 | return false;
30 | }
31 |
32 | static void SetHostOnly(bool hostOnly)
33 | {
34 | }
35 | #endif
36 | };
37 |
38 |
39 | }
40 |
41 |
42 | // end of file
43 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_base.rst:
--------------------------------------------------------------------------------
1 | 基本モデル (Base models)
2 | ======================================
3 |
4 | models モジュールには、ネットワークを構成するための各種演算モデルがあります。
5 |
6 |
7 | Model クラス
8 | ----------------------------
9 |
10 | .. autoclass:: binarybrain.models.Model
11 | :members:
12 | :show-inheritance:
13 | :member-order: bysource
14 |
15 |
16 | Sequential クラス
17 | ----------------------------
18 |
19 | .. autoclass:: binarybrain.models.Sequential
20 | :members: set_model_list, get_model_list, append
21 | :show-inheritance:
22 |
23 |
24 | Switcher クラス
25 | ----------------------------
26 |
27 | .. autoclass:: binarybrain.models.Switcher
28 | :members:
29 | :show-inheritance:
30 |
31 |
--------------------------------------------------------------------------------
/python/check_install.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import sys
5 | print(sys.version)
6 |
7 | import platform
8 | print("Python : {}".format(platform.python_version()))
9 |
10 | import binarybrain as bb
11 | print("BinaryBrain : {}".format(bb.get_version_string()))
12 | print("CUDA version : {}".format(bb.get_cuda_driver_version_string()))
13 |
14 | device_available = bb.is_device_available()
15 | print("GPU available : {}".format(device_available))
16 | if device_available:
17 | device_count = bb.get_device_count()
18 | print("GPU count : {}".format(device_count))
19 | for i in range(device_count):
20 | print("GPU[{}] : {}".format(i, bb.get_device_name(i)))
21 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_container.rst:
--------------------------------------------------------------------------------
1 | データ格納
2 | ==================================
3 |
4 |
5 | DType クラス(Enum定義)
6 | ----------------------------
7 |
8 | .. autoclass:: binarybrain.dtype.DType
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 |
14 | Tensor クラス
15 | ----------------------------
16 |
17 | .. autoclass:: binarybrain.tensor.Tensor
18 | :members:
19 | :show-inheritance:
20 |
21 |
22 | FrameBuffer クラス
23 | ----------------------------
24 |
25 | .. autoclass:: binarybrain.frame_buffer.FrameBuffer
26 | :members:
27 | :show-inheritance:
28 |
29 | Variables クラス
30 | ----------------------------
31 |
32 | .. autoclass:: binarybrain.variables.Variables
33 | :members:
34 | :show-inheritance:
35 |
36 |
--------------------------------------------------------------------------------
/documents/sphinx/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile livehtml
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 |
21 | livehtml:
22 | sphinx-autobuild -b html $(SPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html
23 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_optimizer.rst:
--------------------------------------------------------------------------------
1 | 最適化 (optimizer)
2 | ============================
3 |
4 |
5 | Optimizer クラス
6 | ----------------------------
7 |
8 | .. autoclass:: binarybrain.optimizer.Optimizer
9 | :members:
10 | :show-inheritance:
11 |
12 |
13 | OptimizerSgd クラス
14 | ----------------------------
15 |
16 | .. autoclass:: binarybrain.optimizer.OptimizerSgd
17 | :members:
18 | :show-inheritance:
19 |
20 |
21 | OptimizerAdaGrad クラス
22 | ----------------------------
23 |
24 | .. autoclass:: binarybrain.optimizer.OptimizerAdaGrad
25 | :members:
26 | :show-inheritance:
27 |
28 |
29 | OptimizerAdam クラス
30 | ----------------------------
31 |
32 | .. autoclass:: binarybrain.optimizer.OptimizerAdam
33 | :members:
34 | :show-inheritance:
35 |
36 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_api.rst:
--------------------------------------------------------------------------------
1 | =====================
2 | Python API
3 | =====================
4 |
5 | 概要
6 | -----------
7 |
8 | Python版モジュールは binarybarin パッケージを import することで利用可能です。
9 |
10 |
11 |
12 | binarybarin パッケージ
13 | ---------------------------
14 |
15 | binarybarin には以下のモジュールが含まれています。
16 |
17 | .. toctree::
18 |
19 | python_module_object
20 | python_module_container
21 | python_module_models_base
22 | python_module_models_binary
23 | python_module_models_operation
24 | python_module_models_filter
25 | python_module_models_activation
26 | python_module_models_misc
27 | python_module_optimizer
28 | python_module_losses
29 | python_module_metrics
30 | python_module_storage
31 | python_module_verilog
32 | python_module_system
33 |
34 |
--------------------------------------------------------------------------------
/samples/cpp/mnist/get_nmist.bat:
--------------------------------------------------------------------------------
1 | if not exist train-images-idx3-ubyte.gz (
2 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
3 | )
4 | if not exist train-labels-idx1-ubyte.gz (
5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
6 | )
7 | if not exist t10k-images-idx3-ubyte.gz (
8 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
9 | )
10 | if not exist t10k-labels-idx1-ubyte.gz (
11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
12 | )
13 |
14 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte
15 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte
16 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte
17 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte
18 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import os
4 | import sys
5 | import subprocess
6 | import shutil
7 |
8 | # change directory
9 | src_path = os.path.dirname(os.path.abspath(sys.argv[0]))
10 | os.chdir(os.path.join(src_path, 'python'))
11 |
12 | # file copy
13 | shutil.rmtree('binarybrain/include', ignore_errors=True)
14 | shutil.rmtree('binarybrain/cuda', ignore_errors=True)
15 | shutil.copytree('../include', 'binarybrain/include')
16 | shutil.copytree('../cuda', 'binarybrain/cuda')
17 |
18 | python_cmd = 'python3'
19 | try:
20 | subprocess.check_call('python3 -V', shell=True)
21 | except subprocess.CalledProcessError as e:
22 | python_cmd = 'python'
23 |
24 |
25 | # run setup.py
26 | args = sys.argv.copy()
27 | args.pop(0)
28 | subprocess.call([python_cmd, 'setup.py'] + args)
29 |
--------------------------------------------------------------------------------
/tests/cpp/mnist/get_nmist.bat:
--------------------------------------------------------------------------------
1 | if not exist train-images-idx3-ubyte.gz (
2 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
3 | )
4 | if not exist train-labels-idx1-ubyte.gz (
5 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
6 | )
7 | if not exist t10k-images-idx3-ubyte.gz (
8 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
9 | )
10 | if not exist t10k-labels-idx1-ubyte.gz (
11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
12 | )
13 |
14 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte
15 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte
16 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte
17 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte
18 |
--------------------------------------------------------------------------------
/tests/cpp/xor/main.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // MNIST sample
4 | //
5 | // Copyright (C) 2018 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 | #include
9 |
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 |
17 | void XorMicroMlp(int epoch_size, bool binary_mode);
18 | void StochasticLut6(int epoch_size, bool binary_mode);
19 |
20 |
21 | // メイン関数
22 | int main()
23 | {
24 | omp_set_num_threads(1);
25 |
26 | // XorMicroMlp(65536, true);
27 | StochasticLut6(65536, true);
28 |
29 | return 0;
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/documents/sphinx/source/index.rst:
--------------------------------------------------------------------------------
1 | .. BinaryBrain documentation master file, created by
2 | sphinx-quickstart on Mon Sep 16 08:36:13 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to BinaryBrain's documentation!
7 | =============================================
8 |
9 | 本書は `BinaryBrain Ver4`: https://github.com/ryuz/BinaryBrain/tree/ver4_release のドキュメントです。
10 |
11 |
12 | .. toctree::
13 | :maxdepth: 4
14 | :caption: Contents:
15 |
16 | introduction
17 | quick_start_cpp
18 | quick_start_python
19 | quick_start_verilog
20 | cpp_api
21 | python_api
22 | informations
23 |
24 |
25 | Indices and tables
26 | ==================
27 |
28 | * :ref:`genindex`
29 | * :ref:`modindex`
30 | * :ref:`search`
31 |
--------------------------------------------------------------------------------
/tests/cpp/mnist/get_nmist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ ! -e train-images-idx3-ubyte.gz ]; then
4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
5 | fi
6 |
7 | if [ ! -e train-labels-idx1-ubyte.gz ]; then
8 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
9 | fi
10 |
11 | if [ ! -e t10k-images-idx3-ubyte.gz ]; then
12 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
13 | fi
14 |
15 | if [ ! -e t10k-labels-idx1-ubyte.gz ]; then
16 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
17 | fi
18 |
19 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte
20 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte
21 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte
22 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte
23 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_binary.rst:
--------------------------------------------------------------------------------
1 | バイナリ変調モデル (Binary modulation)
2 | ======================================
3 |
4 | models モジュールのうち、バイナリネットを構成する変調にかかわるモデルです。
5 |
6 |
7 | RealToBinary class
8 | ----------------------------
9 |
10 | .. autoclass:: binarybrain.models.RealToBinary
11 | :members:
12 | :show-inheritance:
13 |
14 |
15 | BinaryToReal class
16 | ----------------------------
17 |
18 | .. autoclass:: binarybrain.models.BinaryToReal
19 | :members:
20 | :show-inheritance:
21 |
22 |
23 | BitEncode class
24 | ----------------------------
25 |
26 | .. autoclass:: binarybrain.models.BitEncode
27 | :members:
28 | :show-inheritance:
29 |
30 |
31 | Reduce class
32 | ----------------------------
33 |
34 | .. autoclass:: binarybrain.models.Reduce
35 | :members:
36 | :show-inheritance:
37 |
38 |
--------------------------------------------------------------------------------
/samples/cpp/mnist/get_nmist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ ! -e train-images-idx3-ubyte.gz ]; then
4 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
5 | fi
6 |
7 | if [ ! -e train-labels-idx1-ubyte.gz ]; then
8 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
9 | fi
10 |
11 | if [ ! -e t10k-images-idx3-ubyte.gz ]; then
12 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
13 | fi
14 |
15 | if [ ! -e t10k-labels-idx1-ubyte.gz ]; then
16 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
17 | fi
18 |
19 | gzip -c -d train-images-idx3-ubyte.gz > train-images-idx3-ubyte
20 | gzip -c -d train-labels-idx1-ubyte.gz > train-labels-idx1-ubyte
21 | gzip -c -d t10k-images-idx3-ubyte.gz > t10k-images-idx3-ubyte
22 | gzip -c -d t10k-labels-idx1-ubyte.gz > t10k-labels-idx1-ubyte
23 |
--------------------------------------------------------------------------------
/include/bb/ModelLoader.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2021 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 | #include
13 |
14 | #include "bb/Model.h"
15 | #include "bb/ObjectLoader.h"
16 |
17 |
18 | namespace bb
19 | {
20 |
21 |
22 | inline std::shared_ptr Model_LoadFromFile(std::string filename)
23 | {
24 | return std::dynamic_pointer_cast(Object_LoadFromFile(filename));
25 | }
26 |
27 |
28 | }
29 |
30 |
31 | // end of file
32 |
--------------------------------------------------------------------------------
/python/binarybrain/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import os
4 | import sys
5 | if os.name == 'nt' and sys.version_info.major >= 3 and sys.version_info.minor >= 8:
6 | os.add_dll_directory(os.path.join(os.environ['CUDA_PATH'], 'bin'))
7 |
8 | from binarybrain.system import *
9 |
10 | from binarybrain.dtype import *
11 |
12 | from binarybrain.object import *
13 |
14 | from binarybrain.tensor import *
15 | from binarybrain.frame_buffer import *
16 | from binarybrain.variables import *
17 |
18 | from binarybrain.models import *
19 |
20 | from binarybrain.losses import *
21 | from binarybrain.metrics import *
22 | from binarybrain.optimizer import *
23 |
24 | from binarybrain.storage import *
25 | from binarybrain.verilog import *
26 | from binarybrain.hls import *
27 |
28 |
--------------------------------------------------------------------------------
/include/bb/ValueGenerator.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 |
13 | #include "bb/Object.h"
14 |
15 |
16 | namespace bb {
17 |
18 | template
19 | class ValueGenerator : public Object
20 | {
21 | public:
22 | virtual std::string GetValueGeneratorName(void) const = 0;
23 |
24 | virtual ~ValueGenerator(){}
25 | virtual void Reset(void) = 0;
26 | virtual T GetValue(void) = 0;
27 | };
28 |
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_filter.rst:
--------------------------------------------------------------------------------
1 | 畳み込み/プーリング(Convolution and Pooling)
2 | ================================================
3 |
4 |
5 | models モジュールの、畳み込みやプーリングなどのフィルタ演算を行うモデルです。
6 |
7 |
8 | Convolution2d クラス
9 | ----------------------------
10 |
11 | .. autoclass:: binarybrain.models.Convolution2d
12 | :members:
13 | :show-inheritance:
14 |
15 | MaxPooling クラス
16 | ----------------------------
17 |
18 | .. autoclass:: binarybrain.models.MaxPooling
19 | :members:
20 | :show-inheritance:
21 |
22 |
23 | StochasticMaxPooling クラス
24 | ----------------------------
25 |
26 | .. autoclass:: binarybrain.models.StochasticMaxPooling
27 | :members:
28 | :show-inheritance:
29 |
30 |
31 | UpSampling クラス
32 | ----------------------------
33 |
34 | .. autoclass:: binarybrain.models.UpSampling
35 | :members:
36 | :show-inheritance:
37 |
38 |
--------------------------------------------------------------------------------
/include/bb/StochasticLutModel.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 |
11 | #pragma once
12 |
13 |
14 | #include "bb/SparseModel.h"
15 |
16 |
17 | namespace bb {
18 |
19 |
20 | // 確率的LUT関連の基底クラス
21 | class StochasticLutModel : public SparseModel
22 | {
23 | public:
24 | virtual Tensor &W(void) = 0;
25 | virtual Tensor const &W(void) const = 0;
26 |
27 | virtual Tensor &dW(void) = 0;
28 | virtual Tensor const &dW(void) const = 0;
29 | };
30 |
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/testbench/tb_mnist.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "mnist_sample.h"
3 | #include "mnist_test_data.h"
4 |
5 | int main()
6 | {
7 | std::cout << "start testbench" << std::endl;
8 |
9 | int n = 0;
10 | int ok = 0;
11 | for ( int i = 0; i < 20; ++i ) {
12 | ap_uint<1> in[28*28];
13 | for ( int y = 0; y < 28; ++y ) {
14 | for ( int x = 0; x < 28; ++x ) {
15 | in[y*28+x] = test_images[i][y][x];
16 | }
17 | }
18 |
19 | ap_uint<4> out[0];
20 | mnist_sample(in, out);
21 |
22 | n++;
23 | if ( out[0] == test_labels[i] ) {
24 | ok++;
25 | }
26 |
27 | std::cout << "out[" << i << "]=" << (int)out[0] << " exp:"<< (int)test_labels[i] << " " << (out[0] == test_labels[i] ? "ok" : "miss") << std::endl;
28 | }
29 | std::cout << "accuracy = " << ok << "/" << n << std::endl;
30 |
31 | return 0;
32 | }
33 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_activation.rst:
--------------------------------------------------------------------------------
1 | 活性化(Activation)
2 | ======================================
3 |
4 |
5 | models モジュールの 活性化層(Activation層()を作るためのモデルです。
6 |
7 |
8 | Binarize クラス
9 | ----------------------------
10 |
11 | .. autoclass:: binarybrain.models.Binarize
12 | :members:
13 | :show-inheritance:
14 |
15 |
16 | Sigmoid クラス
17 | ----------------------------
18 |
19 | .. autoclass:: binarybrain.models.Sigmoid
20 | :members:
21 | :show-inheritance:
22 |
23 |
24 | ReLU クラス
25 | ----------------------------
26 |
27 | .. autoclass:: binarybrain.models.ReLU
28 | :members:
29 | :show-inheritance:
30 |
31 | HardTanh クラス
32 | ----------------------------
33 |
34 | .. autoclass:: binarybrain.models.HardTanh
35 | :members:
36 | :show-inheritance:
37 |
38 |
39 | Softmax クラス
40 | ----------------------------
41 |
42 | .. autoclass:: binarybrain.models.Softmax
43 | :members:
44 | :show-inheritance:
45 |
46 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_metrics.rst:
--------------------------------------------------------------------------------
1 | 評価関数(Metrics functions)
2 | ====================================
3 |
4 |
5 | Metrics クラス
6 | ----------------------------
7 |
8 | .. autoclass:: binarybrain.metrics.Metrics
9 | :members:
10 | :show-inheritance:
11 |
12 |
13 |
14 | MetricsMeanSquaredError クラス
15 | -------------------------------------
16 |
17 | .. autoclass:: binarybrain.metrics.MetricsMeanSquaredError
18 | :members:
19 | :show-inheritance:
20 |
21 |
22 | MetricsCategoricalAccuracy クラス
23 | ------------------------------------
24 |
25 | .. autoclass:: binarybrain.metrics.MetricsCategoricalAccuracy
26 | :members:
27 | :show-inheritance:
28 |
29 |
30 | MetricsBinaryCategoricalAccuracy クラス
31 | ----------------------------------------------
32 |
33 | .. autoclass:: binarybrain.metrics.MetricsBinaryCategoricalAccuracy
34 | :members:
35 | :show-inheritance:
36 |
37 |
38 | ..
39 | .. automodule:: binarybrain.metrics
40 | :members:
41 | :undoc-members:
42 | :show-inheritance:
43 |
44 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/verilator/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | # cmake のフラグ
4 | CMAKE_FLAGS =
5 |
6 |
7 | # clang があれば使う
8 | ifeq (,$(shell which clang))
9 | WITH_CLANG ?= No
10 | else
11 | WITH_CLANG ?= Yes
12 | endif
13 |
14 | # ninja があれば使う
15 | ifeq (,$(shell which ninja))
16 | WITH_NINJA ?= No
17 | else
18 | WITH_NINJA ?= Yes
19 | endif
20 |
21 | ifeq ($(WITH_CLANG),Yes)
22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake
23 | endif
24 |
25 | ifeq ($(WITH_NINJA),Yes)
26 | CMAKE_FLAGS += -GNinja
27 | endif
28 |
29 |
30 |
31 | all: build run
32 |
33 | .PHONY : build
34 | build:
35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) ..
36 | cmake --build build -j
37 |
38 | .PHONY : clean
39 | clean:
40 | rm -rf build
41 |
42 | .PHONY : mostlyclean
43 | mostlyclean: clean
44 | rm -f img_*.png angle_*.png
45 | rm -f *.fst *.vcd
46 | rm -f coverage.dat
47 | rm -fr annotated
48 |
49 |
50 | .PHONY : run
51 | run:
52 | build/tb_verilator
53 |
54 | .PHONY : coverage
55 | coverage:
56 | verilator_coverage --annotate annotated coverage.dat
57 |
58 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/verilator/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | # cmake のフラグ
4 | CMAKE_FLAGS =
5 |
6 |
7 | # clang があれば使う
8 | ifeq (,$(shell which clang))
9 | WITH_CLANG ?= No
10 | else
11 | WITH_CLANG ?= Yes
12 | endif
13 |
14 | # ninja があれば使う
15 | ifeq (,$(shell which ninja))
16 | WITH_NINJA ?= No
17 | else
18 | WITH_NINJA ?= Yes
19 | endif
20 |
21 | ifeq ($(WITH_CLANG),Yes)
22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake
23 | endif
24 |
25 | ifeq ($(WITH_NINJA),Yes)
26 | CMAKE_FLAGS += -GNinja
27 | endif
28 |
29 |
30 |
31 | all: build run
32 |
33 | .PHONY : build
34 | build:
35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) ..
36 | cmake --build build -j
37 |
38 | .PHONY : clean
39 | clean:
40 | rm -rf build
41 |
42 | .PHONY : mostlyclean
43 | mostlyclean: clean
44 | rm -f img_*.png angle_*.png
45 | rm -f *.fst *.vcd
46 | rm -f coverage.dat
47 | rm -fr annotated
48 |
49 |
50 | .PHONY : run
51 | run:
52 | build/tb_verilator
53 |
54 | .PHONY : coverage
55 | coverage:
56 | verilator_coverage --annotate annotated coverage.dat
57 |
58 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | # cmake のフラグ
4 | CMAKE_FLAGS =
5 |
6 |
7 | # clang があれば使う
8 | ifeq (,$(shell which clang))
9 | WITH_CLANG ?= No
10 | else
11 | WITH_CLANG ?= Yes
12 | endif
13 |
14 | # ninja があれば使う
15 | ifeq (,$(shell which ninja))
16 | WITH_NINJA ?= No
17 | else
18 | WITH_NINJA ?= Yes
19 | endif
20 |
21 | ifeq ($(WITH_CLANG),Yes)
22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake
23 | endif
24 |
25 | ifeq ($(WITH_NINJA),Yes)
26 | CMAKE_FLAGS += -GNinja
27 | endif
28 |
29 |
30 |
31 | all: build run
32 |
33 | .PHONY : build
34 | build:
35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) ..
36 | cmake --build build -j
37 |
38 | .PHONY : clean
39 | clean:
40 | rm -rf build
41 |
42 | .PHONY : mostlyclean
43 | mostlyclean: clean
44 | rm -f img_*.png angle_*.png
45 | rm -f *.fst *.vcd
46 | rm -f coverage.dat
47 | rm -fr annotated
48 |
49 |
50 | .PHONY : run
51 | run:
52 | build/tb_verilator
53 |
54 | .PHONY : coverage
55 | coverage:
56 | verilator_coverage --annotate annotated coverage.dat
57 |
58 |
--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright 2018 by Ryuji Fuchikami
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/samples/cpp/diabetes/LoadDiabetes.h:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #pragma once
5 |
6 |
7 | #include "bb/DataType.h"
8 |
9 |
10 | template
11 | bb::TrainData LoadDiabetes(int num_train=400)
12 | {
13 | const int n = 442;
14 |
15 | std::ifstream ifs_x("diabetes_data.txt");
16 | std::ifstream ifs_t("diabetes_target.txt");
17 |
18 | bb::TrainData td;
19 | td.x_shape = bb::indices_t({ 10 });
20 | td.t_shape = bb::indices_t({ 1 });
21 |
22 | for (int i = 0; i < num_train; ++i) {
23 | std::vector train(10);
24 | std::vector target(1);
25 | for (int j = 0; j < 10; ++j) {
26 | ifs_x >> train[j];
27 | }
28 | ifs_t >> target[0];
29 |
30 | td.x_train.push_back(train);
31 | td.t_train.push_back(target);
32 | }
33 |
34 | for (int i = 0; i < n - num_train; ++i) {
35 | std::vector train(10);
36 | std::vector target(1);
37 | for (int j = 0; j < 10; ++j) {
38 | ifs_x >> train[j];
39 | }
40 | ifs_t >> target[0];
41 |
42 | td.x_test.push_back(train);
43 | td.t_test.push_back(target);
44 | }
45 |
46 | return td;
47 | }
48 |
49 |
50 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | # cmake のフラグ
4 | CMAKE_FLAGS =
5 |
6 |
7 | # clang があれば使う
8 | ifeq (,$(shell which clang))
9 | WITH_CLANG ?= No
10 | else
11 | WITH_CLANG ?= Yes
12 | endif
13 |
14 | # ninja があれば使う
15 | ifeq (,$(shell which ninja))
16 | WITH_NINJA ?= No
17 | else
18 | WITH_NINJA ?= Yes
19 | endif
20 |
21 | ifeq ($(WITH_CLANG),Yes)
22 | CMAKE_FLAGS += -C ../clang-cmakeinit.cmake
23 | endif
24 |
25 | ifeq ($(WITH_NINJA),Yes)
26 | CMAKE_FLAGS += -GNinja
27 | endif
28 |
29 |
30 |
31 | all: build run
32 |
33 | .PHONY : build
34 | build:
35 | mkdir -p build && cd build && cmake $(CMAKE_FLAGS) ..
36 | cmake --build build -j
37 |
38 | .PHONY : clean
39 | clean:
40 | rm -rf build
41 |
42 | .PHONY : mostlyclean
43 | mostlyclean: clean
44 | rm -f img_*.png angle_*.png
45 | rm -f *.fst *.vcd
46 | rm -f coverage.dat
47 | rm -fr annotated
48 |
49 |
50 | .PHONY : run
51 | run:
52 | build/tb_verilator
53 |
54 | .PHONY : coverage
55 | coverage:
56 | verilator_coverage --annotate annotated coverage.dat
57 |
58 |
--------------------------------------------------------------------------------
/tests/cpp/diabetes/LoadDiabetes.h:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #pragma once
5 |
6 |
7 | #include "bb/DataType.h"
8 |
9 |
10 | template
11 | bb::TrainData LoadDiabetes(int num_train=400)
12 | {
13 | const int n = 442;
14 |
15 | std::ifstream ifs_x("diabetes_data.txt");
16 | std::ifstream ifs_t("diabetes_target.txt");
17 |
18 | bb::TrainData td;
19 | td.x_shape = bb::indices_t({ 10 });
20 | td.t_shape = bb::indices_t({ 1 });
21 |
22 | for (int i = 0; i < num_train; ++i) {
23 | std::vector train(10);
24 | std::vector target(1);
25 | for (int j = 0; j < 10; ++j) {
26 | ifs_x >> train[j];
27 | }
28 | ifs_t >> target[0];
29 |
30 | td.x_train.push_back(train);
31 | td.t_train.push_back(target);
32 | }
33 |
34 | for (int i = 0; i < n - num_train; ++i) {
35 | std::vector train(10);
36 | std::vector target(1);
37 | for (int j = 0; j < 10; ++j) {
38 | ifs_x >> train[j];
39 | }
40 | ifs_t >> target[0];
41 |
42 | td.x_test.push_back(train);
43 | td.t_test.push_back(target);
44 | }
45 |
46 | return td;
47 | }
48 |
49 |
50 |
--------------------------------------------------------------------------------
/documents/sphinx/source/introduction_features.rst:
--------------------------------------------------------------------------------
1 | =================
2 | 概要
3 | =================
4 |
5 |
6 |
7 | 特徴
8 | =======
9 |
10 | BinaryBrain は主に当サイトが研究中の LUT(Look-Up Table)-Networkを実験することを目的に作成した
11 | ディープラーニング用のプラットフォームです。
12 |
13 | LUT-Networkの評価を目的に作成しておりますが、それ以外の用途にも利用可能です。
14 |
15 | 以下の特徴があります
16 |
17 | - ニューラルネットのFPGA化をメインターゲットにしている
18 | - バイナリネットであるも関わらず変調技術によりAutoencodeや回帰分析が可能
19 | - 独自のDifferentiable-LUTモデルにより、LUTの性能を最大限引き出したが学習できる
20 | - 量子化&疎行列のネットワークでパフォーマンスの良い学習が出来る環境を目指している
21 | - C++で記述されている
22 | - GPU(CUDA)に対応している
23 | - 高速でマニアックな自作レイヤーが作りやすい
24 | - Pythonからの利用も可能
25 |
26 |
27 | 基本的な使い方
28 | =================
29 |
30 | 基本的には C++ や Python で、ネットワークを記述し、学習を行った後に
31 | その結果を verilog などに埋め込んで、FPGA化することを目的に作成しています。
32 |
33 | C++用のCPU版に関してはヘッダオンリーライブラリとなっているため、include 以下にある
34 | ヘッダファイルをインクルードするだけでご利用いただけます。
35 |
36 | GPUを使う場合は、ヘッダ読み込みの際に BB_WITH_CUDA マクロを定義した上で、cuda 以下にある
37 | ライブラリをビルドした上でリンクする必要があります。
38 |
39 | また、BB_WITH_CEREAL マクロを定義すると、途中経過の保存形式に json が利用可能となります。
40 |
41 | Python版を使う場合は、一旦ビルドに成功すれば import するだけで利用可能です。
42 |
43 | 使い方はsamplesなどを参考にしてください。
44 |
--------------------------------------------------------------------------------
/python/upload_pypi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | git pull
4 |
5 | ./clean.sh
6 | ./copy_src.sh
7 |
8 | python3 setup.py build
9 | python3 setup.py sdist
10 |
11 | while true;do
12 | echo -n "upload testPyPI?(yes/no):"
13 | read answer
14 | case $answer in
15 | yes)
16 | echo "upload testpypi"
17 | # python3 setup.py sdist upload -r testpypi
18 | twine upload -r testpypi dist/*
19 | break
20 | ;;
21 | no)
22 | echo "don't upload"
23 | break
24 | ;;
25 | *)
26 | ;;
27 | esac
28 | done
29 |
30 |
31 | while true;do
32 | echo -n "upload PyPI?(yes/no):"
33 | read answer
34 | case $answer in
35 | yes)
36 | echo "upload pypi"
37 | # python3 setup.py sdist upload -r pypi
38 | twine upload -r pypi dist/*
39 | break
40 | ;;
41 | no)
42 | echo "don't upload"
43 | break
44 | ;;
45 | *)
46 | ;;
47 | esac
48 | done
49 |
50 |
--------------------------------------------------------------------------------
/tests/gtest/MetricsCategoricalAccuracyTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "gtest/gtest.h"
4 | #include "bb/MetricsCategoricalAccuracy.h"
5 |
6 |
7 |
8 | TEST(MetricsCategoricalAccuracyTest, testMetricsCategoricalAccuracyTest)
9 | {
10 | bb::FrameBuffer y_buf(2, {3}, BB_TYPE_FP32);
11 | bb::FrameBuffer t_buf(2, {3}, BB_TYPE_FP32);
12 |
13 | y_buf.SetFP32(0, 0, 0.2f);
14 | y_buf.SetFP32(0, 1, 0.4f);
15 | y_buf.SetFP32(0, 2, 0.1f);
16 |
17 | y_buf.SetFP32(1, 0, 0.9f);
18 | y_buf.SetFP32(1, 1, 0.1f);
19 | y_buf.SetFP32(1, 2, 0.5f);
20 |
21 | t_buf.SetFP32(0, 0, 0.0f);
22 | t_buf.SetFP32(0, 1, 1.0f);
23 | t_buf.SetFP32(0, 2, 0.0f);
24 |
25 | t_buf.SetFP32(1, 0, 0.0f);
26 | t_buf.SetFP32(1, 1, 0.0f);
27 | t_buf.SetFP32(1, 2, 1.0f);
28 |
29 | auto accFunc = bb::MetricsCategoricalAccuracy<>::Create();
30 | accFunc->CalculateMetrics(y_buf, t_buf);
31 |
32 | auto acc = accFunc->GetMetrics();
33 | EXPECT_DOUBLE_EQ(0.5, acc);
34 | // std::cout << "acc : " << acc << std::endl;
35 | }
36 |
37 |
38 |
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/README.md:
--------------------------------------------------------------------------------
1 | # MNIST HLS サンプル
2 |
3 | ## 事前準備
4 |
5 | 事前に学習を行ってソースコードを作成する必要があります。
6 |
7 | samples/python/mnist/MnistDifferentiableLutHls.ipynb
8 |
9 | を Jupyter などで実行してください。
10 |
11 | ネットとして MnistDifferentiableLutHls.h と、テストベンチ用のデータとして mnist_test_data.h が生成されれば OK です。
12 |
13 | また、このサンプルは本リポジトリの submodule である jelly を利用しますので、git clone 時に取得していない場合には
14 |
15 | ```
16 | git submodule update --init --recursive
17 | ```
18 |
19 | などのコマンドで取得ください。
20 |
21 | また Xilinx の Vitis などのツールが必要ですので、それらがインストールされており、事前設定されているものとします。
22 |
23 | 例えば Linux なら
24 |
25 | ```
26 | source /tools/Xilinx/Vitis/2021.2/settings64.sh
27 | ```
28 |
29 | などの実行で事前準備されます(OSやバージョンにより微妙に異なります)。
30 |
31 |
32 | ## 使い方
33 |
34 | ### Cシミュレーション
35 |
36 | 下記のように打つと動きます。
37 |
38 | ```
39 | make csim
40 | ```
41 |
42 | ### 合成
43 |
44 | 下記のように打つと動きます。
45 |
46 | ```
47 | make
48 | ```
49 |
50 | Vivado にインポートするための zip ファイルが出来上がります。
51 |
52 |
53 | ### コシミュレーション
54 |
55 | 下記のように打つと動きます。
56 |
57 | ```
58 | make cosim
59 | ```
60 |
61 | デフォルトで波形確認のための GUI を起動するオプションにしております。
62 | 必要に応じて Makefile を編集ください。
63 |
64 |
--------------------------------------------------------------------------------
/samples/cpp/diabetes/main.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // diabetes regression sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 |
12 | #include "bb/Manager.h"
13 |
14 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size);
15 | void DiabetesRegressionMicroMlpLut(int epoch_size, size_t mini_batch_size, size_t mux_size);
16 | void DiabetesRegressionStochasticLut6(int epoch_size, size_t mini_batch_size);
17 |
18 |
19 | // メイン関数
20 | int main()
21 | {
22 | omp_set_num_threads(4);
23 |
24 | // 普通のDenseAffineでの回帰
25 | DiabetesAffineRegression(64, 16);
26 |
27 | // μMLPによるバイナリネットでの回帰
28 | DiabetesRegressionMicroMlpLut(64, 16, 255);
29 |
30 | // 確率的LUTによる回帰と、バイナリネットでの再生
31 | DiabetesRegressionStochasticLut6(64, 16);
32 |
33 | return 0;
34 | }
35 |
36 |
37 | // end of file
38 |
--------------------------------------------------------------------------------
/tests/cpp/diabetes/main.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // diabetes regression sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 |
12 | #include "bb/Manager.h"
13 |
14 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size);
15 | void DiabetesRegressionMicroMlpLut(int epoch_size, size_t mini_batch_size, size_t mux_size);
16 | void DiabetesRegressionStochasticLut6(int epoch_size, size_t mini_batch_size);
17 |
18 |
19 | // メイン関数
20 | int main()
21 | {
22 | omp_set_num_threads(4);
23 |
24 | // 普通のDenseAffineでの回帰
25 | DiabetesAffineRegression(64, 16);
26 |
27 | // μMLPによるバイナリネットでの回帰
28 | DiabetesRegressionMicroMlpLut(64, 16, 255);
29 |
30 | // 確率的LUTによる回帰と、バイナリネットでの再生
31 | DiabetesRegressionStochasticLut6(64, 16);
32 |
33 | return 0;
34 | }
35 |
36 |
37 | // end of file
38 |
--------------------------------------------------------------------------------
/include/bb/Optimizer.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 |
13 | #include "bb/Object.h"
14 | #include "bb/Variables.h"
15 |
16 |
17 | namespace bb {
18 |
19 | class Optimizer : public Object
20 | {
21 | public:
22 | virtual ~Optimizer() {}
23 |
24 | public:
25 | virtual std::string GetOptimizerName(void) const = 0;
26 |
27 | virtual void SetVariables(Variables params, Variables grads) = 0;
28 | virtual void ZeroGrad(void) = 0;
29 | virtual void Step(void) = 0;
30 |
31 | virtual void Update(void)
32 | {
33 | this->Step();
34 | this->ZeroGrad();
35 | }
36 |
37 | virtual void SetLearningRate(double learning_rate) = 0;
38 | };
39 |
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/documents/sphinx/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 |
8 | if "%SPHINXBUILD%" == "" (
9 | set SPHINXBUILD=sphinx-build
10 | )
11 | set SOURCEDIR=source
12 | set BUILDDIR=build
13 |
14 | if "%1" == "" goto help
15 |
16 | if "%1" == "livehtml" (
17 | start http://127.0.0.1:8000
18 | sphinx-autobuild -b html %SOURCEDIR% %BUILDDIR%/html
19 | if errorlevel 1 exit /b 1
20 | goto end
21 | )
22 |
23 | %SPHINXBUILD% >NUL 2>NUL
24 | if errorlevel 9009 (
25 | echo.
26 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
27 | echo.installed, then set the SPHINXBUILD environment variable to point
28 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
29 | echo.may add the Sphinx directory to PATH.
30 | echo.
31 | echo.If you don't have Sphinx installed, grab it from
32 | echo.http://sphinx-doc.org/
33 | exit /b 1
34 | )
35 |
36 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
37 | goto end
38 |
39 | :help
40 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
41 |
42 | :end
43 | popd
44 |
--------------------------------------------------------------------------------
/include/bb/MetricsFunction.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 | #include
13 | #include
14 |
15 | #include "bb/Object.h"
16 | #include "bb/FrameBuffer.h"
17 |
18 |
19 | namespace bb {
20 |
21 |
22 | class MetricsFunction : public Object
23 | {
24 |
25 | public:
26 | virtual ~MetricsFunction() {}
27 |
28 | virtual std::string GetMetricsFunctionName(void) const = 0;
29 |
30 | virtual std::string GetMetricsString(void) { return "accuracy"; }
31 |
32 | virtual void Clear(void) = 0;
33 | virtual double GetMetrics(void) const = 0;
34 | virtual void CalculateMetrics(FrameBuffer y, FrameBuffer t) = 0;
35 | };
36 |
37 |
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/cuda/Manager.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "cuda_runtime.h"
5 | #include "device_launch_parameters.h"
6 |
7 | #include "bbcu/bbcu.h"
8 | #include "bbcu/bbcu_util.h"
9 |
10 |
11 |
12 | static bool bbcu_HostOnly = false;
13 |
14 |
15 | BBCU_DLL_EXPORT int bbcu_GetDeviceCount(void)
16 | {
17 | int dev_count = 0;
18 | auto status = cudaGetDeviceCount(&dev_count);
19 | if (status != cudaSuccess) {
20 | dev_count = 0;
21 | }
22 | return dev_count;
23 | }
24 |
25 | BBCU_DLL_EXPORT int bbcu_GetDevice(void)
26 | {
27 | int device;
28 | BB_CUDA_SAFE_CALL(cudaGetDevice(&device));
29 | return device;
30 | }
31 |
32 | BBCU_DLL_EXPORT void bbcu_SetDevice(int device)
33 | {
34 | BB_CUDA_SAFE_CALL(cudaSetDevice(device));
35 | }
36 |
37 |
38 |
39 |
40 | BBCU_DLL_EXPORT void bbcu_SetHostOnly(bool hostOnly)
41 | {
42 | bbcu_HostOnly = hostOnly;
43 | }
44 |
45 |
46 | BBCU_DLL_EXPORT bool bbcu_IsHostOnly(void)
47 | {
48 | return bbcu_HostOnly;
49 | }
50 |
51 |
52 | BBCU_DLL_EXPORT bool bbcu_IsDeviceAvailable(void)
53 | {
54 | return !bbcu_HostOnly;
55 | }
56 |
57 |
58 | // end of file
59 |
--------------------------------------------------------------------------------
/cuda/bbcu.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.7.34003.232
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64
15 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64
16 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64
17 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {7B3BD921-1A3E-4E24-870E-8D07D71B0F91}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/verilator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | cmake_minimum_required(VERSION 3.16)
3 |
4 | project(tb_verilator)
5 |
6 |
7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT})
8 | if (NOT verilator_FOUND)
9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable")
10 | endif()
11 |
12 | find_package(OpenCV REQUIRED)
13 | if (NOT OpenCV_FOUND)
14 | message(FATAL_ERROR "OpenCV was not found. Either install it")
15 | endif()
16 |
17 |
18 | add_executable(tb_verilator ./tb_verilator.cpp)
19 |
20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include")
21 |
22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS})
23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS})
24 |
25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17")
26 | add_compile_definitions(WITH_OPENCV2)
27 | target_link_libraries(tb_verilator PUBLIC "-pthread")
28 |
29 | verilate(tb_verilator
30 | # COVERAGE
31 | # TRACE
32 | INCLUDE_DIRS "."
33 | VERILATOR_ARGS -f verilator_cmd.txt -Os
34 | SOURCES ./tb_verilator.sv ../MnistLutCnn.v)
35 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/verilator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | cmake_minimum_required(VERSION 3.16)
3 |
4 | project(tb_verilator)
5 |
6 |
7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT})
8 | if (NOT verilator_FOUND)
9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable")
10 | endif()
11 |
12 | find_package(OpenCV REQUIRED)
13 | if (NOT OpenCV_FOUND)
14 | message(FATAL_ERROR "OpenCV was not found. Either install it")
15 | endif()
16 |
17 |
18 | add_executable(tb_verilator ./tb_verilator.cpp)
19 |
20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include")
21 |
22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS})
23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS})
24 |
25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17")
26 | #add_compile_definitions(WITH_OPENCV2)
27 | target_link_libraries(tb_verilator PUBLIC "-pthread")
28 |
29 | verilate(tb_verilator
30 | # COVERAGE
31 | # TRACE
32 | INCLUDE_DIRS "."
33 | VERILATOR_ARGS -f verilator_cmd.txt
34 | SOURCES ./tb_verilator.sv ../MnistLutSimple.v)
35 |
--------------------------------------------------------------------------------
/python/projects/thrust/PyBinaryBrainThrust.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.7.34003.232
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "core.vcxproj", "{ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Debug|x64.ActiveCfg = Debug|x64
15 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Debug|x64.Build.0 = Debug|x64
16 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Release|x64.ActiveCfg = Release|x64
17 | {ACAFEE7F-E3FF-431C-AC93-E6591C0E358D}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {2C10458D-2E46-4D6D-AB03-166526A12BF9}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | cmake_minimum_required(VERSION 3.16)
3 |
4 | project(tb_verilator)
5 |
6 |
7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT})
8 | if (NOT verilator_FOUND)
9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable")
10 | endif()
11 |
12 | find_package(OpenCV REQUIRED)
13 | if (NOT OpenCV_FOUND)
14 | message(FATAL_ERROR "OpenCV was not found. Either install it")
15 | endif()
16 |
17 |
18 | add_executable(tb_verilator ./tb_verilator.cpp)
19 |
20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include")
21 |
22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS})
23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS})
24 |
25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17")
26 | # add_compile_definitions(WITH_OPENCV2)
27 | target_link_libraries(tb_verilator PUBLIC "-pthread")
28 |
29 | verilate(tb_verilator
30 | # COVERAGE
31 | # TRACE
32 | INCLUDE_DIRS "."
33 | VERILATOR_ARGS -f verilator_cmd.txt -Os
34 | SOURCES ./tb_verilator.sv ../MnistSemanticSegmentation.v)
35 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | cmake_minimum_required(VERSION 3.16)
3 |
4 | project(tb_verilator)
5 |
6 |
7 | find_package(verilator 4.2 REQUIRED HINTS $ENV{VERILATOR_ROOT} ${VERILATOR_ROOT})
8 | if (NOT verilator_FOUND)
9 | message(FATAL_ERROR "Verilator was not found. Either install it, or set the VERILATOR_ROOT environment variable")
10 | endif()
11 |
12 | find_package(OpenCV REQUIRED)
13 | if (NOT OpenCV_FOUND)
14 | message(FATAL_ERROR "OpenCV was not found. Either install it")
15 | endif()
16 |
17 |
18 | add_executable(tb_verilator ./tb_verilator.cpp)
19 |
20 | target_include_directories(tb_verilator PUBLIC "../../../../../jelly/include")
21 |
22 | target_include_directories(tb_verilator PUBLIC ${OpenCV_INCLUDE_DIRS})
23 | target_link_libraries(tb_verilator PUBLIC ${OpenCV_LIBS})
24 |
25 | set(CMAKE_CXX_FLAGS "-O3 -std=c++17")
26 | # add_compile_definitions(WITH_OPENCV2)
27 | target_link_libraries(tb_verilator PUBLIC "-pthread")
28 |
29 | verilate(tb_verilator
30 | # COVERAGE
31 | # TRACE
32 | INCLUDE_DIRS "."
33 | VERILATOR_ARGS -f verilator_cmd.txt -Os
34 | SOURCES ./tb_verilator.sv ../MnistSegmentationAndClassification.v)
35 |
--------------------------------------------------------------------------------
/documents/sphinx/source/locale/en/LC_MESSAGES/index.po:
--------------------------------------------------------------------------------
1 | # SOME DESCRIPTIVE TITLE.
2 | # Copyright (C) 2019, Ryuji Fuchikami
3 | # This file is distributed under the same license as the BinaryBrain
4 | # package.
5 | # FIRST AUTHOR , 2019.
6 | #
7 | #, fuzzy
8 | msgid ""
9 | msgstr ""
10 | "Project-Id-Version: BinaryBrain \n"
11 | "Report-Msgid-Bugs-To: \n"
12 | "POT-Creation-Date: 2019-09-22 08:35+0900\n"
13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
14 | "Last-Translator: FULL NAME \n"
15 | "Language-Team: LANGUAGE \n"
16 | "MIME-Version: 1.0\n"
17 | "Content-Type: text/plain; charset=utf-8\n"
18 | "Content-Transfer-Encoding: 8bit\n"
19 | "Generated-By: Babel 2.7.0\n"
20 |
21 | #: ../../source/index.rst:7
22 | msgid "Welcome to BinaryBrain's documentation!"
23 | msgstr ""
24 |
25 | #: ../../source/index.rst:9
26 | msgid "Contents:"
27 | msgstr ""
28 |
29 | #: ../../source/index.rst:19
30 | msgid "Indices and tables"
31 | msgstr ""
32 |
33 | #: ../../source/index.rst:21
34 | msgid ":ref:`genindex`"
35 | msgstr ""
36 |
37 | #: ../../source/index.rst:22
38 | msgid ":ref:`modindex`"
39 | msgstr ""
40 |
41 | #: ../../source/index.rst:23
42 | msgid ":ref:`search`"
43 | msgstr ""
44 |
45 |
--------------------------------------------------------------------------------
/documents/sphinx/source/locale/ja/LC_MESSAGES/index.po:
--------------------------------------------------------------------------------
1 | # SOME DESCRIPTIVE TITLE.
2 | # Copyright (C) 2019, Ryuji Fuchikami
3 | # This file is distributed under the same license as the BinaryBrain
4 | # package.
5 | # FIRST AUTHOR , 2019.
6 | #
7 | #, fuzzy
8 | msgid ""
9 | msgstr ""
10 | "Project-Id-Version: BinaryBrain \n"
11 | "Report-Msgid-Bugs-To: \n"
12 | "POT-Creation-Date: 2019-09-22 08:35+0900\n"
13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
14 | "Last-Translator: FULL NAME \n"
15 | "Language-Team: LANGUAGE \n"
16 | "MIME-Version: 1.0\n"
17 | "Content-Type: text/plain; charset=utf-8\n"
18 | "Content-Transfer-Encoding: 8bit\n"
19 | "Generated-By: Babel 2.7.0\n"
20 |
21 | #: ../../source/index.rst:7
22 | msgid "Welcome to BinaryBrain's documentation!"
23 | msgstr ""
24 |
25 | #: ../../source/index.rst:9
26 | msgid "Contents:"
27 | msgstr ""
28 |
29 | #: ../../source/index.rst:19
30 | msgid "Indices and tables"
31 | msgstr ""
32 |
33 | #: ../../source/index.rst:21
34 | msgid ":ref:`genindex`"
35 | msgstr ""
36 |
37 | #: ../../source/index.rst:22
38 | msgid ":ref:`modindex`"
39 | msgstr ""
40 |
41 | #: ../../source/index.rst:23
42 | msgid ":ref:`search`"
43 | msgstr ""
44 |
45 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_losses.rst:
--------------------------------------------------------------------------------
1 | 損失関数(Loss functions)
2 | ==================================
3 |
4 | LossFunction クラス
5 | --------------------------------------
6 |
7 | .. autoclass:: binarybrain.losses.LossFunction
8 | :members:
9 | :show-inheritance:
10 |
11 |
12 | LossMeanSquaredError クラス
13 | --------------------------------------
14 |
15 | .. autoclass:: binarybrain.losses.LossMeanSquaredError
16 | :members:
17 | :show-inheritance:
18 |
19 |
20 | LossCrossEntropy クラス
21 | --------------------------------------
22 |
23 | .. autoclass:: binarybrain.losses.LossCrossEntropy
24 | :members:
25 | :show-inheritance:
26 |
27 | LossBinaryCrossEntropy クラス
28 | --------------------------------------
29 |
30 | .. autoclass:: binarybrain.losses.LossBinaryCrossEntropy
31 | :members:
32 | :show-inheritance:
33 |
34 | LossSoftmaxCrossEntropy クラス
35 | --------------------------------------
36 |
37 | .. autoclass:: binarybrain.losses.LossSoftmaxCrossEntropy
38 | :members:
39 | :show-inheritance:
40 |
41 |
42 | LossSigmoidCrossEntropy クラス
43 | --------------------------------------
44 |
45 | .. autoclass:: binarybrain.losses.LossSigmoidCrossEntropy
46 | :members:
47 | :show-inheritance:
48 |
49 |
--------------------------------------------------------------------------------
/documents/sphinx/source/locale/ja/LC_MESSAGES/sample_rtl.po:
--------------------------------------------------------------------------------
1 | # SOME DESCRIPTIVE TITLE.
2 | # Copyright (C) 2019, Ryuji Fuchikami
3 | # This file is distributed under the same license as the BinaryBrain
4 | # package.
5 | # FIRST AUTHOR , 2019.
6 | #
7 | #, fuzzy
8 | msgid ""
9 | msgstr ""
10 | "Project-Id-Version: BinaryBrain 3.9\n"
11 | "Report-Msgid-Bugs-To: \n"
12 | "POT-Creation-Date: 2019-09-29 19:33+0900\n"
13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
14 | "Last-Translator: FULL NAME \n"
15 | "Language-Team: LANGUAGE \n"
16 | "MIME-Version: 1.0\n"
17 | "Content-Type: text/plain; charset=utf-8\n"
18 | "Content-Transfer-Encoding: 8bit\n"
19 | "Generated-By: Babel 2.7.0\n"
20 |
21 | #: ../../source/sample_rtl.rst:3
22 | msgid "RTLの試し方"
23 | msgstr ""
24 |
25 | #: ../../source/sample_rtl.rst:7
26 | msgid "sampleの動かし方"
27 | msgstr ""
28 |
29 | #: ../../source/sample_rtl.rst:9
30 | msgid "C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。 出力したRTLの試し方は"
31 | msgstr ""
32 |
33 | #: ../../source/sample_rtl.rst:12
34 | msgid "https://github.com/ryuz/BinaryBrain/tree/master/samples/mnist/verilog"
35 | msgstr ""
36 |
37 | #: ../../source/sample_rtl.rst:15
38 | msgid "の readme.txt を参照ください。"
39 | msgstr ""
40 |
41 |
--------------------------------------------------------------------------------
/documents/sphinx/source/locale/en/LC_MESSAGES/sample_rtl.po:
--------------------------------------------------------------------------------
1 | # SOME DESCRIPTIVE TITLE.
2 | # Copyright (C) 2019, Ryuji Fuchikami
3 | # This file is distributed under the same license as the BinaryBrain
4 | # package.
5 | # FIRST AUTHOR , 2019.
6 | #
7 | #, fuzzy
8 | msgid ""
9 | msgstr ""
10 | "Project-Id-Version: BinaryBrain 3.9\n"
11 | "Report-Msgid-Bugs-To: \n"
12 | "POT-Creation-Date: 2019-09-29 19:33+0900\n"
13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
14 | "Last-Translator: FULL NAME \n"
15 | "Language-Team: LANGUAGE \n"
16 | "MIME-Version: 1.0\n"
17 | "Content-Type: text/plain; charset=utf-8\n"
18 | "Content-Transfer-Encoding: 8bit\n"
19 | "Generated-By: Babel 2.7.0\n"
20 |
21 | #: ../../source/sample_rtl.rst:3
22 | msgid "RTLの試し方"
23 | msgstr "Evaluation for FPGA"
24 |
25 | #: ../../source/sample_rtl.rst:7
26 | msgid "sampleの動かし方"
27 | msgstr "sample program"
28 |
29 | #: ../../source/sample_rtl.rst:9
30 | msgid "C++, Pythonともに Verilog RTL のソースファイルの出力が可能です。 出力したRTLの試し方は"
31 | msgstr ""
32 |
33 | #: ../../source/sample_rtl.rst:12
34 | msgid "https://github.com/ryuz/BinaryBrain/tree/master/samples/mnist/verilog"
35 | msgstr ""
36 |
37 | #: ../../source/sample_rtl.rst:15
38 | msgid "の readme.txt を参照ください。"
39 | msgstr ""
40 |
41 |
--------------------------------------------------------------------------------
/python/binarybrain/variables.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import binarybrain as bb
4 | import binarybrain.core as core
5 | import numpy as np
6 | from typing import List
7 |
8 |
9 | class Variables():
10 | """Variables class
11 |
12 | 学習の為の Optimizer と実際の学習ターゲットの変数の橋渡しに利用されるクラス。
13 | 内部的には各モデル内の重みや勾配を保有する Tensor をまとめて保持している。
14 | """
15 |
16 | def __init__(self):
17 | self.variables = core.Variables()
18 |
19 | @staticmethod
20 | def from_core(variables):
21 | new_variables = Variables()
22 | new_variables.variables = variables
23 | return new_variables
24 |
25 | def get_core(self):
26 | return self.variables
27 |
28 | def append(self, variables):
29 | """ 変数を追加
30 |
31 | Args:
32 | variables (Variables) : 追加する変数
33 | """
34 | self.variables.push_back(variables.get_core())
35 |
36 | def get_size(self):
37 | return self.variables.get_size()
38 |
39 | def at(self, item):
40 | return self.variables.at(item)
41 |
42 | def __len__(self):
43 | return self.variables.get_size()
44 |
45 | def __getitem__(self, item):
46 | return self.variables.at(item)
47 |
48 |
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/testbench/tb_mnist_simple.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 | #include
4 | #include "mnist_simple.h"
5 |
6 | int main()
7 | {
8 | // データファイル読み込み
9 | std::ifstream is("../../../../testbench/mnist_hls_test.txt");
10 | if( !is ) { std::cout << "open error : mnist_hls_test.txt" << std::endl; return 1; }
11 |
12 | int n = 0;
13 | int ok = 0;
14 | for ( int i = 0; i < 64; ++i ) {
15 | // データ読み込み
16 | ap_uint<28*28> in_data;
17 | int label;
18 | is >> label;
19 | for ( int j = 0; j < 28*28; ++j ) {
20 | int val;
21 | is >> val;
22 | in_data[j] = val;
23 | }
24 |
25 | // テスト
26 | auto out_data = mnist_simple(in_data);
27 |
28 | // 確認
29 | ap_uint<10> exp_data = (1 << label);
30 | if ( out_data == exp_data ) {
31 | std::cout << "[OK] ";
32 | ok++;
33 | } else {
34 | std::cout << "[miss] ";
35 | }
36 | std::cout << "label: " << std::dec << label << " out: 0x" << std::hex << (int)out_data << std::endl;
37 | n++;
38 | }
39 | std::cout << "total : " << std::dec << ok << "/" << n << std::endl;
40 |
41 | // とりあえず1/3整合していればOKとする
42 | assert(ok >= n/3);
43 |
44 | return 0;
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/src/mnist_sample.cpp:
--------------------------------------------------------------------------------
1 | #include "mnist_sample.h"
2 | #include "MnistDifferentiableLutHls.h"
3 |
4 |
5 | void MnistDepthwiseAffine_layer(int y[10], const ap_uint<10*DWA_DEPTH> x)
6 | {
7 | for ( int i = 0; i < 10; ++i ) {
8 | #pragma HLS unroll
9 | int sum = (int)b_tbl[i];
10 | for ( int j = 0; j < DWA_DEPTH; ++j ) {
11 | #pragma HLS unroll
12 | sum += (int)x[i*DWA_DEPTH + j] * (int)W_tbl[i][j];
13 | }
14 | y[i] = sum;
15 | }
16 | }
17 |
18 |
19 | // kernel
20 | void mnist_sample(
21 | const ap_uint<1> in[28*28],
22 | ap_uint<4> out[1]
23 | )
24 | {
25 | // input
26 | ap_uint<28*28> x0;
27 | for ( int i = 0; i < 28*28; ++i ) {
28 | x0[i] = in[i];
29 | }
30 | auto x1 = MnistLut_layer1(x0);
31 | auto x2 = MnistLut_layer2(x1);
32 | auto x3 = MnistLut_layer3(x2);
33 |
34 | // Depthwise Affine
35 | int y[10];
36 | MnistDepthwiseAffine_layer(y, x3);
37 |
38 | // argmax
39 | int max_val = -32768;
40 | ap_uint<4> max_idx = 0;
41 | for ( int i = 0; i < 10; ++i ) {
42 | if ( y[i] > max_val ) {
43 | max_val = y[i];
44 | max_idx = i;
45 | }
46 | }
47 |
48 | // output
49 | out[0] = max_idx;
50 | }
51 |
52 |
--------------------------------------------------------------------------------
/include/bb/Version.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018-2024 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 | #include
13 |
14 |
15 | namespace bb
16 | {
17 |
18 | #define BB_MAJOR_VERSION 4
19 | #define BB_MINOR_VERSION 3
20 | #define BB_REVISION_NUMBER 2
21 |
22 | #define BB_VERSION (std::to_string(BB_MAJOR_VERSION) + "." + std::to_string(BB_MINOR_VERSION) + "." + std::to_string(BB_REVISION_NUMBER))
23 |
24 |
25 | // バージョン取得
26 | inline void GetVersion(int *major_version, int *minor_version=nullptr, int *revision_number=nullptr)
27 | {
28 | if ( major_version != nullptr ) { *major_version = BB_MAJOR_VERSION; }
29 | if ( minor_version != nullptr ) { *minor_version = BB_MINOR_VERSION; }
30 | if ( revision_number != nullptr ) { *revision_number = BB_REVISION_NUMBER; }
31 | }
32 |
33 | // バージョン文字列取得
34 | inline std::string GetVersionString(void)
35 | {
36 | return BB_VERSION;
37 | }
38 |
39 |
40 | }
41 |
42 |
43 | // end of file
44 |
--------------------------------------------------------------------------------
/samples/cpp/diabetes/sample_diabetes.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | ソース ファイル
20 |
21 |
22 | ソース ファイル
23 |
24 |
25 | ソース ファイル
26 |
27 |
28 | ソース ファイル
29 |
30 |
31 |
--------------------------------------------------------------------------------
/documents/sphinx/source/python_module_models_operation.rst:
--------------------------------------------------------------------------------
1 | 演算モデル (Operation models)
2 | ======================================
3 |
4 |
5 | models モジュールには、ネットワークを構成するための各種演算モデルがあります。
6 |
7 |
8 | DifferentiableLut クラス
9 | ----------------------------
10 |
11 | .. autoclass:: binarybrain.models.DifferentiableLut
12 | :members:
13 | :show-inheritance:
14 |
15 |
16 | AverageLut クラス
17 | ----------------------------
18 |
19 | .. autoclass:: binarybrain.models.AverageLut
20 | :members:
21 | :show-inheritance:
22 |
23 | BinaryLut クラス
24 | ----------------------------
25 |
26 | .. autoclass:: binarybrain.models.BinaryLut
27 | :members:
28 | :show-inheritance:
29 |
30 |
31 | DenseAffine クラス
32 | ----------------------------
33 |
34 | .. autoclass:: binarybrain.models.DenseAffine
35 | :members:
36 | :show-inheritance:
37 |
38 | DenseAffineQuantize クラス
39 | -----------------------------
40 |
41 | .. autoclass:: binarybrain.models.DenseAffineQuantize
42 | :members:
43 | :show-inheritance:
44 |
45 |
46 |
47 | DepthwiseDenseAffine クラス
48 | ------------------------------
49 |
50 | .. autoclass:: binarybrain.models.DepthwiseDenseAffine
51 | :members:
52 | :show-inheritance:
53 |
54 |
55 | DepthwiseDenseAffineQuantize クラス
56 | --------------------------------------
57 |
58 | .. autoclass:: binarybrain.models.DepthwiseDenseAffineQuantize
59 | :members:
60 | :show-inheritance:
61 |
62 |
--------------------------------------------------------------------------------
/include/bb/LossFunction.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 |
13 | #include
14 |
15 |
16 | #include "bb/Object.h"
17 | #include "bb/FrameBuffer.h"
18 |
19 |
20 | namespace bb {
21 |
22 |
23 | class LossFunction : public Object
24 | {
25 | public:
26 | virtual std::string GetLossFunctionName(void) const = 0;
27 |
28 | /**
29 | * @brief 積算していた損失をクリア
30 | * @detail 積算していた損失をクリアする
31 | */
32 | virtual void Clear(void) = 0;
33 |
34 | /**
35 | * @brief 損失取得
36 | * @detail 損失取得
37 | * 損失の取得にGPUからのメモリコピーが発生する可能性があるので
38 | * CalculateLoss とは別メソッドにする
39 | * @return 積算していた損失を返す
40 | */
41 | virtual double GetLoss(void) const = 0;
42 |
43 | /**
44 | * @brief 損失計算
45 | * @detail 損失を計算する
46 | * @param y 結果の入力
47 | * @param t 期待値
48 | * @return backwardする誤差勾配を返す
49 | */
50 | virtual FrameBuffer CalculateLoss(FrameBuffer y_buf, FrameBuffer t_buf, index_t mini_batch_size) = 0;
51 | };
52 |
53 |
54 | }
55 |
56 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/common/bb_lut.v:
--------------------------------------------------------------------------------
1 | // LUT
2 |
3 |
4 | `timescale 1ns / 1ps
5 | `default_nettype none
6 |
7 |
8 |
9 | module bb_lut
10 | #(
11 | parameter DEVICE = "RTL",
12 | parameter N = 6,
13 | parameter [(1<
2 | #include
3 | #include
4 | #include
5 |
6 | #include "bbcu/bbcu.h"
7 | #include "bbcu/bbcu_util.h"
8 |
9 | int Test_MicroMlp_Forward(void);
10 | int Test_MicroMlp_Backward(void);
11 |
12 | int Test_StochasticLut6_Forward(void);
13 | int Test_StochasticLut6_Backward(void);
14 |
15 | void bbcu_ShufleTest(void);
16 |
17 | int main()
18 | {
19 | bbcu_ShufleTest();
20 | getchar();
21 | return 0;
22 |
23 | void* ptr0 = bbcu_LocalHeap_Malloc(2*1024);
24 | void* ptr1 = bbcu_LocalHeap_Malloc(1*1024);
25 | void* ptr2 = bbcu_LocalHeap_Malloc(3*1024);
26 |
27 | bbcu_LocalHeap_Free(ptr0);
28 | bbcu_LocalHeap_Free(ptr2);
29 |
30 | void* ptr00 = bbcu_LocalHeap_Malloc(2*1024);
31 | void* ptr02 = bbcu_LocalHeap_Malloc(3*1024);
32 |
33 | bbcu_LocalHeap_Free(ptr00);
34 | bbcu_LocalHeap_Free(ptr1);
35 | bbcu_LocalHeap_Free(ptr02);
36 |
37 | #if 0
38 | std::cout << "---- Test_MicroMlp_Forward ----" << std::endl;
39 | Test_MicroMlp_Forward();
40 |
41 | std::cout << "---- Test_MicroMlp_Backward ----" << std::endl;
42 | Test_MicroMlp_Backward();
43 | #endif
44 |
45 | #if 1
46 | std::cout << "---- Test_StochasticLut6_Forward ----" << std::endl;
47 | Test_StochasticLut6_Forward();
48 |
49 | // std::cout << "---- Test_StochasticLut6_Backward ----" << std::endl;
50 | // Test_StochasticLut6_Backward();
51 | #endif
52 |
53 | return 0;
54 | }
55 |
56 |
57 |
--------------------------------------------------------------------------------
/python/binarybrain/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | PYTHON_PATH = /usr/include/python3.6m
4 | CEREAL_PATH = ../cereal-1.2.2
5 |
6 | TARGET = core$(shell python3-config --extension-suffix)
7 |
8 | CUARCH = -gencode=arch=compute_35,code=sm_35 \
9 | -gencode=arch=compute_50,code=sm_50 \
10 | -gencode=arch=compute_60,code=sm_60 \
11 | -gencode=arch=compute_61,code=sm_61 \
12 | -gencode=arch=compute_75,code=sm_75
13 |
14 | CFLAGS = -DBB_ASSERT_EXCEPTION=1 -DBB_WITH_CEREAL=1 -DBB_WITH_CUDA=1 \
15 | -I$(CEREAL_PATH)/include -Iinclude -Ibinarybrain/cuda \
16 | -Xcompiler -pthread -Xcompiler -mavx2 -Xcompiler -mfma \
17 | -Xcompiler -fopenmp -Xcompiler -std=c++14 -Xcompiler -fPIC \
18 | $(CUARCH) \
19 | $(shell python3 -m pybind11 --includes)
20 |
21 | CUFLAGS = -DBB_ASSERT_EXCEPTION=1 -DBB_WITH_CEREAL=1 -DBB_WITH_CUDA=1 \
22 | -Iinclude -Icuda \
23 | -I$(CEREAL_PATH) \
24 | $(CUARCH) \
25 | -std=c++11 -Xcompiler -fPIC \
26 | $(shell python3 -m pybind11 --includes)
27 |
28 | LDFLAG = -Xcompiler -pthread -Xcompiler -fopenmp -lstdc++ -lm -lcublas
29 |
30 |
31 | .PHONY: all
32 | all: $(TARGET)
33 |
34 | .PHONY: clean
35 | clean:
36 | rm -f $(TARGET) core_bbcu.o core_main.o
37 |
38 | $(TARGET): core_bbcu.o core_main.o
39 | nvcc -shared core_bbcu.o core_main.o $(LDFLAG) -o $(TARGET)
40 |
41 | core_main.o: src/core_main.cpp
42 | nvcc $(CFLAGS) -c src/core_main.cpp -o core_main.o
43 |
44 | core_bbcu.o: src/core_bbcu.cu
45 | nvcc $(CUFLAGS) -c src/core_bbcu.cu -o core_bbcu.o
46 |
47 |
--------------------------------------------------------------------------------
/include/bb/CudaUtility.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 |
13 | #ifdef BB_WITH_CUDA
14 | #include "cuda_runtime.h"
15 | #include "bbcu/bbcu_util.h"
16 | #endif
17 |
18 | #include "bb/DataType.h"
19 | #include "bb/Utility.h"
20 |
21 |
22 | namespace bb {
23 |
24 |
25 | #ifdef BB_WITH_CUDA
26 |
27 | class CudaDevicePush
28 | {
29 | protected:
30 | int m_old_device;
31 | int m_device;
32 |
33 | public:
34 | CudaDevicePush(int device)
35 | {
36 | m_device = device;
37 | if ( m_device >= 0 ) {
38 | BB_CUDA_SAFE_CALL(cudaGetDevice(&m_old_device));
39 | if ( m_old_device != m_device ) {
40 | BB_CUDA_SAFE_CALL(cudaSetDevice(m_device));
41 | }
42 | }
43 | }
44 |
45 | ~CudaDevicePush()
46 | {
47 | if ( m_device >= 0 && (m_old_device != m_device) ) {
48 | BB_CUDA_SAFE_CALL(cudaSetDevice(m_old_device));
49 | }
50 | }
51 | };
52 |
53 | #else
54 |
55 | class CudaDevicePush
56 | {
57 | public:
58 | CudaDevicePush(int device) {}
59 | ~CudaDevicePush() {}
60 | };
61 |
62 | #endif
63 |
64 |
65 | }
66 |
67 | // end of file
68 |
--------------------------------------------------------------------------------
/tests/gtest/cudaMatrixColwiseSumTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include "cuda_runtime.h"
8 | #include "device_launch_parameters.h"
9 |
10 | #include "gtest/gtest.h"
11 |
12 | #if BB_WITH_CUDA
13 |
14 | #include "bb/FrameBuffer.h"
15 | #include "bbcu/bbcu.h"
16 |
17 |
18 | TEST(cudaMatrixColwiseSumTest, test_cudaMatrixColwiseSum)
19 | {
20 | int const node_size = 2;
21 | int const frame_size = 3;
22 |
23 | bb::FrameBuffer x_buf(frame_size, {node_size}, BB_TYPE_FP32);
24 | bb::Tensor y_buf({node_size}, BB_TYPE_FP32);
25 |
26 | {
27 | x_buf.SetFP32(0, 0, 1);
28 | x_buf.SetFP32(1, 0, 2);
29 | x_buf.SetFP32(2, 0, 3);
30 |
31 | x_buf.SetFP32(0, 1, 4);
32 | x_buf.SetFP32(1, 1, 5);
33 | x_buf.SetFP32(2, 1, 6);
34 | }
35 |
36 | y_buf = 0;
37 |
38 | {
39 | auto x_ptr = x_buf.LockDeviceMemoryConst();
40 | auto y_ptr = y_buf.LockDeviceMemory();
41 | bbcu_fp32_MatrixColwiseSum
42 | (
43 | (float const *)x_ptr.GetAddr(),
44 | (float *)y_ptr.GetAddr(),
45 | (int )x_buf.GetNodeSize(),
46 | (int )x_buf.GetFrameSize(),
47 | (int )(x_buf.GetFrameStride() / sizeof(float))
48 | );
49 | }
50 |
51 | {
52 | auto y_ptr = y_buf.LockConst();
53 |
54 | EXPECT_FLOAT_EQ(1+2+3, y_ptr(0));
55 | EXPECT_FLOAT_EQ(4+5+6, y_ptr(1));
56 | }
57 | }
58 |
59 |
60 | #endif
61 |
62 |
--------------------------------------------------------------------------------
/samples/hls/mnist/simple/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # setting
3 | export HLS_TARGET = mnist_sample
4 | export HLS_SOLUTION = solution_1
5 | export DEVICE_PART = xczu3eg-sbva484-1-i
6 | export CLOCK_PERIOD = 4
7 |
8 | export CSIM_OPTIONS ?=
9 | export COSIM_OPTIONS ?= -trace_level all -wave_debug
10 |
11 |
12 | # directories
13 | JELLY_DIR = ../../../../jelly
14 | SOURCE_DIR = src
15 | TESTBENCH_DIR = testbench
16 | TARGET_DIR = $(HLS_TARGET)/$(HLS_SOLUTION)
17 | CSIM_DIR = $(TARGET_DIR)/csim
18 | SYN_DIR = $(TARGET_DIR)/syn
19 | COSIM_DIR = $(TARGET_DIR)/sim
20 | IMPL_DIR = $(TARGET_DIR)/impl
21 | EXPORT_ZIP = $(IMPL_DIR)/export.zip
22 |
23 | # flags
24 | export SOURCE_FLAGS = -I$(SOURCE_DIR)
25 | export TESTBENCH_FLAGS = -I$(SOURCE_DIR) -I$(TESTBENCH_DIR)
26 |
27 | # source
28 | SOURCES = $(SOURCE_DIR)/mnist_sample.cpp
29 | export SOURCES
30 |
31 | # testbanch
32 | TESTBENCHS = $(TESTBENCH_DIR)/tb_mnist.cpp
33 | export TESTBENCHS
34 |
35 | # rules
36 | .PHONY: all
37 | all: $(EXPORT_ZIP)
38 |
39 | $(TARGET_DIR):
40 | -rm -rf $(HLS_TARGET)
41 | vitis_hls $(JELLY_DIR)/scripts/hls_create_project.tcl
42 |
43 | $(EXPORT_ZIP): $(TARGET_DIR) $(SOURCES)
44 | vitis_hls $(JELLY_DIR)/scripts/hls_csynth.tcl
45 |
46 | .PHONY: clean
47 | clean:
48 | -rm -rf $(HLS_TARGET)
49 | -rm *.log
50 |
51 |
52 | .PHONY: create
53 | create: $(TARGET_DIR)
54 |
55 | .PHONY: csynth
56 | csynth: $(EXPORT_ZIP)
57 |
58 | .PHONY: csim
59 | csim: $(TARGET_DIR)
60 | vitis_hls $(JELLY_DIR)/scripts/hls_csim.tcl
61 |
62 | .PHONY: cosim
63 | cosim: $(TARGET_DIR) $(EXPORT_ZIP)
64 | vitis_hls $(JELLY_DIR)/scripts/hls_cosim.tcl
65 |
--------------------------------------------------------------------------------
/tests/gtest/cudaMatrixRowwiseSetVectorTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include "cuda_runtime.h"
8 | #include "device_launch_parameters.h"
9 |
10 | #include "gtest/gtest.h"
11 |
12 | #if BB_WITH_CUDA
13 |
14 | #include "bb/FrameBuffer.h"
15 | #include "bbcu/bbcu.h"
16 |
17 | TEST(cudacudaMatrixRowwiseSetVectorTest, test_cudaMatrixRowwiseSetVector)
18 | {
19 | int const node_size = 513;
20 | int const frame_size = 1021;
21 |
22 | bb::Tensor x_buf(BB_TYPE_FP32, node_size);
23 | bb::FrameBuffer y_buf(BB_TYPE_FP32, frame_size, node_size);
24 |
25 | {
26 | auto x_ptr = x_buf.Lock();
27 | for (int node = 0; node < node_size; ++node) {
28 | x_ptr(node) = node + 1;
29 | }
30 | }
31 |
32 | {
33 | auto x_ptr = x_buf.LockDeviceMemoryConst();
34 | auto y_ptr = y_buf.LockDeviceMemory(true);
35 | bbcu_fp32_MatrixRowwiseSetVector
36 | (
37 | (float const *)x_ptr.GetAddr(),
38 | (float *)y_ptr.GetAddr(),
39 | (int )y_buf.GetNodeSize(),
40 | (int )y_buf.GetFrameSize(),
41 | (int )(y_buf.GetFrameStride() / sizeof(float))
42 | );
43 | }
44 |
45 | {
46 | for (int node = 0; node < node_size; ++node) {
47 | for (int frame = 0; frame < frame_size; ++frame) {
48 | EXPECT_FLOAT_EQ((float)(node+1), y_buf.GetFP32(frame, node));
49 | }
50 | }
51 | }
52 | }
53 |
54 |
55 | #endif
56 |
57 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_cnn/verilator/tb_verilator.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "Vtb_verilator.h"
5 | #include "jelly/simulator/Manager.h"
6 | #include "jelly/simulator/ClockNode.h"
7 | #include "jelly/simulator/ResetNode.h"
8 | #include "jelly/simulator/VerilatorNode.h"
9 | #include "jelly/simulator/Axi4sImageLoadNode.h"
10 | #include "jelly/simulator/Axi4sImageDumpNode.h"
11 |
12 |
13 | namespace jsim = jelly::simulator;
14 |
15 |
16 | #if VM_TRACE
17 | #include
18 | #include
19 | #endif
20 |
21 |
22 | int main(int argc, char** argv)
23 | {
24 | auto contextp = std::make_shared();
25 | contextp->debug(0);
26 | contextp->randReset(2);
27 | contextp->commandArgs(argc, argv);
28 |
29 | const auto top = std::make_shared(contextp.get(), "top");
30 |
31 |
32 | jsim::trace_ptr_t tfp = nullptr;
33 | #if VM_TRACE
34 | contextp->traceEverOn(true);
35 |
36 | tfp = std::make_shared();
37 | top->trace(tfp.get(), 100);
38 | tfp->open("tb_verilator" TRACE_EXT);
39 | #endif
40 |
41 | auto mng = jsim::Manager::Create();
42 |
43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2));
44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100));
45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp));
46 |
47 | mng->Run(10000000);
48 | // mng->Run();
49 |
50 | #if VM_TRACE
51 | tfp->close();
52 | #endif
53 |
54 | #if VM_COVERAGE
55 | contextp->coveragep()->write("coverage.dat");
56 | #endif
57 |
58 | return 0;
59 | }
60 |
61 |
62 | // end of file
63 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_lut_simple/verilator/tb_verilator.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "Vtb_verilator.h"
5 | #include "jelly/simulator/Manager.h"
6 | #include "jelly/simulator/ClockNode.h"
7 | #include "jelly/simulator/ResetNode.h"
8 | #include "jelly/simulator/VerilatorNode.h"
9 | #include "jelly/simulator/Axi4sImageLoadNode.h"
10 | #include "jelly/simulator/Axi4sImageDumpNode.h"
11 |
12 |
13 | namespace jsim = jelly::simulator;
14 |
15 |
16 | #if VM_TRACE
17 | #include
18 | #include
19 | #endif
20 |
21 |
22 | int main(int argc, char** argv)
23 | {
24 | auto contextp = std::make_shared();
25 | contextp->debug(0);
26 | contextp->randReset(2);
27 | contextp->commandArgs(argc, argv);
28 |
29 | const auto top = std::make_shared(contextp.get(), "top");
30 |
31 |
32 | jsim::trace_ptr_t tfp = nullptr;
33 | #if VM_TRACE
34 | contextp->traceEverOn(true);
35 |
36 | tfp = std::make_shared();
37 | top->trace(tfp.get(), 100);
38 | tfp->open("tb_verilator" TRACE_EXT);
39 | #endif
40 |
41 | auto mng = jsim::Manager::Create();
42 |
43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2));
44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100));
45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp));
46 |
47 | mng->Run(10000000);
48 | // mng->Run();
49 |
50 | #if VM_TRACE
51 | tfp->close();
52 | #endif
53 |
54 | #if VM_COVERAGE
55 | contextp->coveragep()->write("coverage.dat");
56 | #endif
57 |
58 | return 0;
59 | }
60 |
61 |
62 | // end of file
63 |
--------------------------------------------------------------------------------
/documents/sphinx/source/introduction_binary_modulation.rst:
--------------------------------------------------------------------------------
1 | ----------------------------------------------
2 | バイナリ変調
3 | ----------------------------------------------
4 |
5 | 概要
6 | -------
7 |
8 | 本章ではバイナリLUT-Networkに限らず、広くバイナリネットワークに適用可能な技術として、バイナリ変調の適用について述べます。
9 | バイナリ変調とフルバイナリネットワークの組み合わせは、本サイトの提唱する技術の1つであり、入出力のに多値データが
10 | 要求される場合にバイナリネットワークを適用するための手法です。
11 |
12 | 従来のバイナリネットワーク
13 | ---------------------------
14 |
15 | 従来のバイナリネットワークでは、多値画像の認識などを行うために、入力側のいくつかの層をバイナライズせずに
16 | 多値入力とすることで多値データを扱っていました。
17 | この方法は一定の効果はあるものの、入力層では乗算器を必要とする為リソースが大きく増加する上に、
18 | 出力はバイナリであり、クラスタ分類ぐらいにしか応用できないという課題がありました。
19 |
20 |
21 | バイナリ変調
22 | -------------------
23 |
24 | 信号処理の世界にはバイナリ変調という技術があります。
25 | 例えばデジタルオーディオなどの分野では 1bit ADC やD級アンプの技術は非常に重要です。
26 | こでは信号をオーバーサンプリングにより、高い周波数の 1bit のデータに量子化することで、
27 | 信号処理自体はバイナリで扱うにもかかわらず、入出力データには例えば16bit以上の高品質の
28 | 信号を得る技術です。
29 |
30 | もっとも簡単な方法はアナログ値を乱数閾値でバイナリ化することです。結果は元の
31 | アナログ値に応じた確率で1と0が生成されますので、扱いたい値がそのまま
32 | Stochastic演算の対象となります。
33 | しかしながら確率的な振る舞いはデータ数が充分多い時に顕在化してきますので
34 | 信号オーバーサンプリングは重要な技法となってきます。
35 |
36 | BinaryBrain では同様の変調を元データに施してデータを水増しすることで、
37 | 非常に小さな回路の認識率を上げたり、Autoencoderや回帰分析などの多値出力を
38 | 必要とする分野への適用可能性を広げました。
39 |
40 | 下記は、通常の Dense CNN の ReLU を Binarizer に置き換え、入力もバイナリ化して
41 | フルバイナリネットワーク化したものを用いて、バイナリ変調の効果を実験した結果です。
42 |
43 | .. image:: ../../images/binary_modulation.png
44 | :scale: 100%
45 |
46 |
47 | binary_x1 が1倍のオーバーサンプル、すなわち何もせずに単純にフルバイナリ化した場合ですが、
48 | FP32での結果に比べて大きく認識率が落ち込みます。
49 | そして、binary_x3、binary_x7, binary_x15, binary_x31 が、それぞれ3倍、7倍、15倍、31倍
50 | のオーバーサンプリングでのバイナリ変調を行ったものですが、ある程度の回復を見せている
51 | 事がうかがえます。
52 |
53 | 同じ回路に、より高いフレームレートで、変調したデータを通すだけなので、スループットは
54 | 低下しますが、ネットワークを構成する回路自体のリソースは一切変化することなく、認識率だけが
55 | 向上しているのが特徴です。
56 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_semantic_segmentation/verilator/tb_verilator.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | // #include
4 | #include "Vtb_verilator.h"
5 | #include "jelly/simulator/Manager.h"
6 | #include "jelly/simulator/ClockNode.h"
7 | #include "jelly/simulator/ResetNode.h"
8 | #include "jelly/simulator/VerilatorNode.h"
9 | #include "jelly/simulator/Axi4sImageLoadNode.h"
10 | #include "jelly/simulator/Axi4sImageDumpNode.h"
11 |
12 |
13 | namespace jsim = jelly::simulator;
14 |
15 |
16 | #if VM_TRACE
17 | #include
18 | #include
19 | #endif
20 |
21 |
22 | int main(int argc, char** argv)
23 | {
24 | auto contextp = std::make_shared();
25 | contextp->debug(0);
26 | contextp->randReset(2);
27 | contextp->commandArgs(argc, argv);
28 |
29 | const auto top = std::make_shared(contextp.get(), "top");
30 |
31 |
32 | jsim::trace_ptr_t tfp = nullptr;
33 | #if VM_TRACE
34 | contextp->traceEverOn(true);
35 |
36 | tfp = std::make_shared();
37 | top->trace(tfp.get(), 100);
38 | tfp->open("tb_verilator" TRACE_EXT);
39 | #endif
40 |
41 | auto mng = jsim::Manager::Create();
42 |
43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2));
44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100));
45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp));
46 |
47 | mng->Run(10000000);
48 | // mng->Run();
49 |
50 | #if VM_TRACE
51 | tfp->close();
52 | #endif
53 |
54 | #if VM_COVERAGE
55 | contextp->coveragep()->write("coverage.dat");
56 | #endif
57 |
58 | return 0;
59 | }
60 |
61 |
62 | // end of file
63 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/tb_mnist_segmentation_and_classification/verilator/tb_verilator.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | // #include
4 | #include "Vtb_verilator.h"
5 | #include "jelly/simulator/Manager.h"
6 | #include "jelly/simulator/ClockNode.h"
7 | #include "jelly/simulator/ResetNode.h"
8 | #include "jelly/simulator/VerilatorNode.h"
9 | #include "jelly/simulator/Axi4sImageLoadNode.h"
10 | #include "jelly/simulator/Axi4sImageDumpNode.h"
11 |
12 |
13 | namespace jsim = jelly::simulator;
14 |
15 |
16 | #if VM_TRACE
17 | #include
18 | #include
19 | #endif
20 |
21 |
22 | int main(int argc, char** argv)
23 | {
24 | auto contextp = std::make_shared();
25 | contextp->debug(0);
26 | contextp->randReset(2);
27 | contextp->commandArgs(argc, argv);
28 |
29 | const auto top = std::make_shared(contextp.get(), "top");
30 |
31 |
32 | jsim::trace_ptr_t tfp = nullptr;
33 | #if VM_TRACE
34 | contextp->traceEverOn(true);
35 |
36 | tfp = std::make_shared();
37 | top->trace(tfp.get(), 100);
38 | tfp->open("tb_verilator" TRACE_EXT);
39 | #endif
40 |
41 | auto mng = jsim::Manager::Create();
42 |
43 | mng->AddNode(jsim::ClockNode_Create(&top->clk, 5.0/2));
44 | mng->AddNode(jsim::ResetNode_Create(&top->reset, 100));
45 | mng->AddNode(jsim::VerilatorNode_Create(top, tfp));
46 |
47 | mng->Run(10000000);
48 | // mng->Run();
49 |
50 | #if VM_TRACE
51 | tfp->close();
52 | #endif
53 |
54 | #if VM_COVERAGE
55 | contextp->coveragep()->write("coverage.dat");
56 | #endif
57 |
58 | return 0;
59 | }
60 |
61 |
62 | // end of file
63 |
--------------------------------------------------------------------------------
/tests/hls/mnist/mnist_simple/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # setting
3 | export HLS_TARGET = mnist_simple
4 | export HLS_SOLUTION = solution_1
5 | export DEVICE_PART = xczu3eg-sbva484-1-i
6 | export CLOCK_PERIOD = 4
7 |
8 | export CSIM_OPTIONS ?=
9 | export COSIM_OPTIONS ?= -trace_level all -wave_debug
10 |
11 |
12 | # Ubuntu の場合下記が必要?
13 | # export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu
14 |
15 |
16 | # directories
17 | JELLY_DIR = ../../../../jelly
18 | SOURCE_DIR = src
19 | TESTBENCH_DIR = testbench
20 | TARGET_DIR = $(HLS_TARGET)/$(HLS_SOLUTION)
21 | CSIM_DIR = $(TARGET_DIR)/csim
22 | SYN_DIR = $(TARGET_DIR)/syn
23 | COSIM_DIR = $(TARGET_DIR)/sim
24 | IMPL_DIR = $(TARGET_DIR)/impl
25 | EXPORT_ZIP = $(IMPL_DIR)/export.zip
26 |
27 | # flags
28 | export SOURCE_FLAGS = -I$(SOURCE_DIR)
29 | export TESTBENCH_FLAGS = -I$(SOURCE_DIR) -I$(TESTBENCH_DIR)
30 |
31 | # source
32 | SOURCES = $(SOURCE_DIR)/mnist_simple.cpp
33 | export SOURCES
34 |
35 | # testbanch
36 | TESTBENCHS = $(TESTBENCH_DIR)/tb_mnist_simple.cpp
37 | export TESTBENCHS
38 |
39 | # rules
40 | .PHONY: all
41 | all: $(EXPORT_ZIP)
42 |
43 | $(TARGET_DIR):
44 | -rm -rf $(HLS_TARGET)
45 | vitis_hls $(JELLY_DIR)/scripts/hls_create_project.tcl
46 |
47 | $(EXPORT_ZIP): $(TARGET_DIR) $(SOURCES)
48 | vitis_hls $(JELLY_DIR)/scripts/hls_csynth.tcl
49 |
50 | .PHONY: clean
51 | clean:
52 | -rm -rf $(HLS_TARGET)
53 | -rm *.log
54 |
55 |
56 | .PHONY: create
57 | create: $(TARGET_DIR)
58 |
59 | .PHONY: csynth
60 | csynth: $(EXPORT_ZIP)
61 |
62 | .PHONY: csim
63 | csim: $(TARGET_DIR)
64 | vitis_hls $(JELLY_DIR)/scripts/hls_csim.tcl
65 |
66 | .PHONY: cosim
67 | cosim: $(TARGET_DIR) $(EXPORT_ZIP)
68 | vitis_hls $(JELLY_DIR)/scripts/hls_cosim.tcl
69 |
--------------------------------------------------------------------------------
/tests/gtest/ConvBitToRealTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "gtest/gtest.h"
5 |
6 | #include "bbcu/bbcu.h"
7 | #include "bb/FrameBuffer.h"
8 |
9 |
10 |
11 | #ifdef BB_WITH_CUDA
12 |
13 | TEST(ConvBitToRealTest, testConvBitToRealTest)
14 | {
15 | int frame_size = 1234;
16 | int node_size = 3456;
17 |
18 | bb::FrameBuffer buf_bit (frame_size, {node_size}, BB_TYPE_BIT);
19 | bb::FrameBuffer buf_fp32(frame_size, {node_size}, BB_TYPE_FP32);
20 |
21 | std::mt19937_64 mt(1);
22 | std::uniform_int_distribution dist(0, 1);
23 | for (int frame = 0; frame < frame_size; ++frame ) {
24 | for (int node = 0; node < node_size; ++node ) {
25 | buf_bit.SetBit(frame, node, dist(mt) == 1);
26 | }
27 | }
28 |
29 | {
30 | auto x_ptr = buf_bit.LockDeviceMemoryConst();
31 | auto y_ptr = buf_fp32.LockDeviceMemory(true);
32 |
33 | bbcu_ConvBitToReal(
34 | (int const *)x_ptr.GetAddr(),
35 | (float *)y_ptr.GetAddr(),
36 | 0.0f,
37 | 1.0f,
38 | (int)node_size,
39 | (int)frame_size,
40 | (int)(buf_bit.GetFrameStride() / sizeof(int)),
41 | (int)(buf_fp32.GetFrameStride() / sizeof(float))
42 | );
43 | }
44 |
45 | for (int frame = 0; frame < frame_size; ++frame ) {
46 | for (int node = 0; node < node_size; ++node ) {
47 | bool x = buf_bit.GetBit(frame, node);
48 | float y = buf_fp32.GetFP32(frame, node);
49 | EXPECT_EQ(x ? 1.0f : 0.0f, y);
50 | }
51 | }
52 | }
53 |
54 |
55 | #endif
56 |
57 |
58 | // end of file
59 |
60 |
--------------------------------------------------------------------------------
/include/bb/PnmImage.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 | #include
13 | #include
14 | #include
15 |
16 | #include "bb/DataType.h"
17 | #include "bb/FrameBuffer.h"
18 |
19 |
20 | namespace bb {
21 |
22 | inline void WritePgm(std::string fname, bb::FrameBuffer buf, int width, int height, int frame = 0)
23 | {
24 | std::ofstream ofs(fname);
25 | ofs << "P2\n";
26 | ofs << width << " " << height << "\n";
27 | ofs << "255\n";
28 | for ( int i = 0; i < width*height; ++i ) {
29 | auto v = buf.GetFP32(frame, i);
30 | v = std::max(v, 0.0f);
31 | v = std::min(v, 1.0f);
32 | ofs << (int)(v * 255.0f) << "\n";
33 | }
34 | }
35 |
36 | inline void WritePpm(std::string fname, bb::FrameBuffer buf, int width, int height, int frame = 0)
37 | {
38 | std::ofstream ofs(fname);
39 | ofs << "P3\n";
40 | ofs << width << " " << height << "\n";
41 | ofs << "255\n";
42 | for ( int i = 0; i < width*height; ++i ) {
43 | for ( int c = 0; c < 3; ++c ) {
44 | auto v = buf.GetFP32(frame, width*height*c + i);
45 | v = std::max(v, 0.0f);
46 | v = std::min(v, 1.0f);
47 | ofs << (int)(v * 255.0f) << "\n";
48 | }
49 | ofs << "\n";
50 | }
51 | }
52 |
53 | }
54 |
55 |
56 | // end of file
57 |
--------------------------------------------------------------------------------
/python/binarybrain/src/core_bbcu.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #ifndef BB_PYBIND11
11 | #define BB_PYBIND11
12 | #endif
13 |
14 | #ifndef BB_OBJECT_LOADER
15 | #define BB_OBJECT_LOADER
16 | #endif
17 |
18 |
19 | #include "Manager.cu"
20 | #include "LocalHeap.cu"
21 | #include "FrameBufferCopy.cu"
22 | #include "ConvBitToReal.cu"
23 | #include "Vector.cu"
24 | #include "MatrixColwiseSum.cu"
25 | #include "MatrixColwiseMeanVar.cu"
26 | #include "MatrixRowwiseSetVector.cu"
27 | #include "MicroMlp.cu"
28 | #include "AverageLut.cu"
29 | #include "MaxLut.cu"
30 | #include "BinaryLut6.cu"
31 | #include "DifferentiableLut.cu"
32 | #include "StochasticLut.cu"
33 | #include "StochasticMaxPooling.cu"
34 | #include "StochasticBatchNormalization.cu"
35 | #include "ShuffleModulation.cu"
36 | #include "Shuffle.cu"
37 | #include "RealToBinary.cu"
38 | #include "BinaryToReal.cu"
39 | #include "BitEncode.cu"
40 | #include "BitError.cu"
41 | #include "Im2Col.cu"
42 | #include "Col2Im.cu"
43 | #include "MaxPooling.cu"
44 | #include "UpSampling.cu"
45 | #include "BatchNormalization.cu"
46 | #include "ReLU.cu"
47 | #include "Sigmoid.cu"
48 | #include "Binarize.cu"
49 | #include "HardTanh.cu"
50 | #include "OptimizerAdam.cu"
51 | #include "LossSoftmaxCrossEntropy.cu"
52 | #include "LossMeanSquaredError.cu"
53 | #include "MetricsCategoricalAccuracy.cu"
54 | #include "Utility.cu"
55 |
56 |
57 | // end of file
58 |
--------------------------------------------------------------------------------
/cuda/MatrixRowwiseSetVector.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "cuda_runtime.h"
5 | #include "device_launch_parameters.h"
6 |
7 | #include "bbcu/bbcu.h"
8 | #include "bbcu/bbcu_util.h"
9 |
10 |
11 |
12 | // kernel
13 | __global__ void kernel_fp32_MatrixRowwiseSetVector(
14 | const float* x_vec,
15 | float* y_mat,
16 | int node_size,
17 | int frame_size,
18 | int frame_stride
19 | )
20 | {
21 | // 初期化
22 | int frame_base = threadIdx.x;
23 | int frame_step = blockDim.x;
24 | int node = blockIdx.y * blockDim.y + threadIdx.y;
25 |
26 | if (node >= node_size) {
27 | return;
28 | }
29 |
30 | // 読み込み
31 | float x = x_vec[node];
32 |
33 | float *y_ptr = &y_mat[node * frame_stride];
34 | for ( int frame = frame_base; frame < frame_size; frame += frame_step ) {
35 | y_ptr[frame] = x;
36 | }
37 | }
38 |
39 |
40 | int bbcu_fp32_MatrixRowwiseSetVector
41 | (
42 | const float* dev_x_vec,
43 | float* dev_y_mat,
44 | int node_size,
45 | int frame_size,
46 | int frame_stride,
47 | cudaStream_t streamId
48 | )
49 | {
50 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable());
51 |
52 | dim3 block(32, 32);
53 | dim3 grid(1, (node_size+block.y-1)/block.y);
54 |
55 | kernel_fp32_MatrixRowwiseSetVector<<>>(
56 | dev_x_vec,
57 | dev_y_mat,
58 | node_size,
59 | frame_size,
60 | frame_stride);
61 | BB_CUDA_CHECK_LAST_ERROR();
62 |
63 | return 0;
64 | }
65 |
66 |
67 |
--------------------------------------------------------------------------------
/tests/gtest/ShuffleTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "gtest/gtest.h"
4 |
5 | #include "bb/Shuffle.h"
6 |
7 |
8 |
9 | TEST(SuffleTest, testSuffle_test0)
10 | {
11 | auto shuffle = bb::Shuffle::Create(3);
12 |
13 | bb::FrameBuffer x(2, {6}, BB_TYPE_FP32);
14 | shuffle->SetInputShape(x.GetShape());
15 |
16 | x.SetFP32(0, 0, 11);
17 | x.SetFP32(0, 1, 12);
18 | x.SetFP32(0, 2, 13);
19 | x.SetFP32(0, 3, 14);
20 | x.SetFP32(0, 4, 15);
21 | x.SetFP32(0, 5, 16);
22 | x.SetFP32(1, 0, 21);
23 | x.SetFP32(1, 1, 22);
24 | x.SetFP32(1, 2, 23);
25 | x.SetFP32(1, 3, 24);
26 | x.SetFP32(1, 4, 25);
27 | x.SetFP32(1, 5, 26);
28 |
29 | auto y = shuffle->Forward(x);
30 |
31 | EXPECT_EQ(11, y.GetFP32(0, 0));
32 | EXPECT_EQ(13, y.GetFP32(0, 1));
33 | EXPECT_EQ(15, y.GetFP32(0, 2));
34 | EXPECT_EQ(12, y.GetFP32(0, 3));
35 | EXPECT_EQ(14, y.GetFP32(0, 4));
36 | EXPECT_EQ(16, y.GetFP32(0, 5));
37 | EXPECT_EQ(21, y.GetFP32(1, 0));
38 | EXPECT_EQ(23, y.GetFP32(1, 1));
39 | EXPECT_EQ(25, y.GetFP32(1, 2));
40 | EXPECT_EQ(22, y.GetFP32(1, 3));
41 | EXPECT_EQ(24, y.GetFP32(1, 4));
42 | EXPECT_EQ(26, y.GetFP32(1, 5));
43 |
44 | // backward
45 | auto dx = shuffle->Backward(y);
46 |
47 | EXPECT_EQ(11, dx.GetFP32(0, 0));
48 | EXPECT_EQ(12, dx.GetFP32(0, 1));
49 | EXPECT_EQ(13, dx.GetFP32(0, 2));
50 | EXPECT_EQ(14, dx.GetFP32(0, 3));
51 | EXPECT_EQ(15, dx.GetFP32(0, 4));
52 | EXPECT_EQ(16, dx.GetFP32(0, 5));
53 | EXPECT_EQ(21, dx.GetFP32(1, 0));
54 | EXPECT_EQ(22, dx.GetFP32(1, 1));
55 | EXPECT_EQ(23, dx.GetFP32(1, 2));
56 | EXPECT_EQ(24, dx.GetFP32(1, 3));
57 | EXPECT_EQ(25, dx.GetFP32(1, 4));
58 | EXPECT_EQ(26, dx.GetFP32(1, 5));
59 | }
60 |
61 |
62 |
--------------------------------------------------------------------------------
/tests/cpp/diabetes/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # target
3 | TARGET = sample-diabetes
4 | SUB_TARGETS =
5 |
6 | # run option
7 | RUN_OPTION = All
8 |
9 | # default flag
10 | DEBUG ?= No
11 | WITH_CUDA ?= Yes
12 | WITH_CEREAL ?= Yes
13 |
14 | BBCU_PATH = ../../../cuda
15 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a
16 |
17 | CEREAL_PATH = ../../../cereal
18 |
19 | ifeq ($(WITH_CUDA),Yes)
20 | else
21 | CC = g++
22 | #CC ?= clang++
23 | endif
24 |
25 | CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14
26 | CINCS = -I../../../include
27 | CDEFS =
28 |
29 | SRCS = main.cpp
30 | SRCS += DiabetesRegressionDenseAffine.cpp
31 | SRCS += DiabetesRegressionMicroMlpLut.cpp
32 | SRCS += DiabetesRegressionStochasticLut6.cpp
33 |
34 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
35 |
36 | LIBS =
37 |
38 | ifeq ($(WITH_CEREAL),Yes)
39 | CDEFS += -DBB_WITH_CEREAL
40 | CINCS += -I$(CEREAL_PATH)/include
41 | endif
42 |
43 | ifeq ($(WITH_CUDA),Yes)
44 | CC = nvcc
45 | CDEFS += -DBB_WITH_CUDA
46 | CFLAGS := -Xcompiler '$(CFLAGS)' -lcublas
47 | LIBS += $(BBCU_LIB)
48 | SUB_TARGET += bbcu_build
49 | endif
50 |
51 | .SUFFIXES: .c .o
52 |
53 | .PHONY: all
54 | all: $(SUB_TARGET) $(TARGET)
55 |
56 | .PHONY: clean
57 | clean:
58 | rm -f $(TARGET) *.o
59 |
60 | .PHONY: run
61 | run: $(TARGET) $(DATA_FILES)
62 | ./$(TARGET) $(RUN_OPTION)
63 |
64 | .PHONY: bbcu_build
65 | bbcu_build:
66 | make -C $(BBCU_PATH)
67 |
68 | $(TARGET): $(OBJS) $(LIBS)
69 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(LIBS)
70 |
71 | .cpp.o:
72 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $<
73 |
74 | depend: $(SRCS)
75 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@
76 |
77 | include depend
78 |
79 |
80 | # data
81 | DATA_FILES = diabetes_data.txt
82 | DATA_FILES += diabetes_target.txt
83 |
84 | .PHONY: dl_data
85 | dl_data:
86 | python3 diabets_data.py
87 |
88 |
--------------------------------------------------------------------------------
/samples/cpp/diabetes/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # target
3 | TARGET = sample-diabetes
4 | SUB_TARGETS =
5 |
6 | # run option
7 | RUN_OPTION = All
8 |
9 | # default flag
10 | DEBUG ?= No
11 | WITH_CUDA ?= Yes
12 | WITH_CEREAL ?= Yes
13 |
14 | BBCU_PATH = ../../../cuda
15 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a
16 |
17 | CEREAL_PATH = ../../../cereal
18 |
19 | ifeq ($(WITH_CUDA),Yes)
20 | else
21 | CC = g++
22 | #CC ?= clang++
23 | endif
24 |
25 | CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14
26 | CINCS = -I../../../include
27 | CDEFS =
28 |
29 | SRCS = main.cpp
30 | SRCS += DiabetesRegressionDenseAffine.cpp
31 | SRCS += DiabetesRegressionMicroMlpLut.cpp
32 | SRCS += DiabetesRegressionStochasticLut6.cpp
33 |
34 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
35 |
36 | LIBS =
37 |
38 | ifeq ($(WITH_CEREAL),Yes)
39 | CDEFS += -DBB_WITH_CEREAL
40 | CINCS += -I$(CEREAL_PATH)/include
41 | endif
42 |
43 | ifeq ($(WITH_CUDA),Yes)
44 | CC = nvcc
45 | CDEFS += -DBB_WITH_CUDA
46 | CFLAGS := -Xcompiler '$(CFLAGS)' -lcublas
47 | LIBS += $(BBCU_LIB)
48 | SUB_TARGET += bbcu_build
49 | endif
50 |
51 | .SUFFIXES: .c .o
52 |
53 | .PHONY: all
54 | all: $(SUB_TARGET) $(TARGET)
55 |
56 | .PHONY: clean
57 | clean:
58 | rm -f $(TARGET) *.o
59 |
60 | .PHONY: run
61 | run: $(TARGET) $(DATA_FILES)
62 | ./$(TARGET) $(RUN_OPTION)
63 |
64 | .PHONY: bbcu_build
65 | bbcu_build:
66 | make -C $(BBCU_PATH)
67 |
68 | $(TARGET): $(OBJS) $(LIBS)
69 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(LIBS)
70 |
71 | .cpp.o:
72 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $<
73 |
74 | depend: $(SRCS)
75 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@
76 |
77 | include depend
78 |
79 |
80 | # data
81 | DATA_FILES = diabetes_data.txt
82 | DATA_FILES += diabetes_target.txt
83 |
84 | .PHONY: dl_data
85 | dl_data:
86 | python3 diabets_data.py
87 |
88 |
--------------------------------------------------------------------------------
/tests/gtest/VariablesTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "gtest/gtest.h"
5 |
6 | #include "bb/Variables.h"
7 |
8 |
9 | TEST(VariablesTest, VariablesTest_Test)
10 | {
11 | auto t0 = std::make_shared(bb::indices_t({2, 3, 4}), BB_TYPE_FP32);
12 | auto t1 = std::make_shared(bb::indices_t({3, 2, 6}), BB_TYPE_FP64);
13 | auto t2 = std::make_shared(bb::indices_t({4, 1, 7}), BB_TYPE_INT32);
14 |
15 | bb::Variables var1;
16 | var1.PushBack(t0);
17 | var1.PushBack(t1);
18 | var1.PushBack(t2);
19 |
20 | bb::Variables var2(var1.GetTypes(), var1.GetShapes());
21 | bb::Variables var3(var1.GetTypes(), var1.GetShapes());
22 |
23 | var1 = 1;
24 | var2 = 2;
25 | var3 = 0;
26 |
27 | var2 += var1;
28 |
29 | {
30 | auto ptr2_0 = var2[0].Lock();
31 | auto ptr2_1 = var2[1].Lock();
32 | auto ptr2_2 = var2[2].Lock();
33 | EXPECT_EQ(3.0f, ptr2_0[0]);
34 | EXPECT_EQ(3.0f, ptr2_0[1]);
35 | EXPECT_EQ(3.0f, ptr2_0[2]);
36 | EXPECT_EQ(3.0, ptr2_1[0]);
37 | EXPECT_EQ(3.0, ptr2_1[1]);
38 | EXPECT_EQ(3.0, ptr2_1[2]);
39 | EXPECT_EQ(3, ptr2_2[0]);
40 | EXPECT_EQ(3, ptr2_2[1]);
41 | EXPECT_EQ(3, ptr2_2[2]);
42 | }
43 |
44 | var2 += 11;
45 |
46 | var3 = var1 + var2;
47 | var3 = var1 + 1;
48 | var3 = 2 + var1 + 1;
49 |
50 | var3 -= var1;
51 | var3 -= 5;
52 | var3 = var1 - var2;
53 | var3 = var1 - 1;
54 | var3 = 2 - var1;
55 |
56 | var3 *= var1;
57 | var3 *= 5;
58 | var3 = var1 * var2;
59 | var3 = var1 * 1;
60 | var3 = 2 * var1;
61 |
62 | var3 /= var1;
63 | var3 /= 5;
64 | var3 = var1 / var2;
65 | var3 = var1 / 1;
66 | var3 = 2 / var1;
67 | }
68 |
69 |
--------------------------------------------------------------------------------
/tests/gtest/DenseAffineQuantizeTest.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 | #include
4 |
5 | #include "gtest/gtest.h"
6 | #include "bb/DenseAffineQuantize.h"
7 |
8 |
9 |
10 |
11 | TEST(DenseAffineQuantizeTest, test1)
12 | {
13 | auto affine = bb::DenseAffineQuantize<>::Create(3);
14 |
15 | #if 0
16 | affine->SetInputShape({2});
17 |
18 | // forward
19 | bb::FrameBuffer x_buf(1, {2}, BB_TYPE_FP32);
20 |
21 | x_buf.SetFP32(0, 0, 1);
22 | x_buf.SetFP32(0, 1, 2);
23 | EXPECT_EQ(1, x_buf.GetFP32(0, 0));
24 | EXPECT_EQ(2, x_buf.GetFP32(0, 1));
25 |
26 | {
27 | auto W = affine->lock_W();
28 | auto b = affine->lock_b();
29 | W(0, 0) = 1;
30 | W(0, 1) = 2;
31 | W(1, 0) = 10;
32 | W(1, 1) = 20;
33 | W(2, 0) = 100;
34 | W(2, 1) = 200;
35 | b(0) = 1000;
36 | b(1) = 2000;
37 | b(2) = 3000;
38 | }
39 |
40 | auto y_buf = affine->Forward(x_buf);
41 |
42 | EXPECT_EQ(1 * 1 + 2 * 2 + 1000, y_buf.GetFP32(0, 0));
43 | EXPECT_EQ(1 * 10 + 2 * 20 + 2000, y_buf.GetFP32(0, 1));
44 | EXPECT_EQ(1 * 100 + 2 * 200 + 3000, y_buf.GetFP32(0, 2));
45 |
46 |
47 | // backward
48 |
49 | bb::FrameBuffer dy_buf(1, {3}, BB_TYPE_FP32);
50 |
51 | dy_buf.SetFP32(0, 0, 998);
52 | dy_buf.SetFP32(0, 1, 2042);
53 | dy_buf.SetFP32(0, 2, 3491);
54 |
55 | auto dx_buf = affine->Backward(dy_buf);
56 |
57 | EXPECT_EQ(370518, dx_buf.GetFP32(0, 0));
58 | EXPECT_EQ(741036, dx_buf.GetFP32(0, 1));
59 |
60 | {
61 | auto dW = affine->lock_dW_const();
62 |
63 | EXPECT_EQ(998, dW(0, 0));
64 | EXPECT_EQ(2042, dW(1, 0));
65 | EXPECT_EQ(3491, dW(2, 0));
66 | EXPECT_EQ(1996, dW(0, 1));
67 | EXPECT_EQ(4084, dW(1, 1));
68 | EXPECT_EQ(6982, dW(2, 1));
69 | }
70 | #endif
71 | }
72 |
73 |
--------------------------------------------------------------------------------
/documents/sphinx/source/introduction_case_study.rst:
--------------------------------------------------------------------------------
1 | ----------------------------------------------
2 | 事例紹介
3 | ----------------------------------------------
4 |
5 | リアルタイム認識
6 | ------------------
7 |
8 | 実装事例
9 | ^^^^^^^^^^^^^^^^^^^
10 | フルバイナリネットワークで、遅延数ミリ秒(1000fps)での画像認識の例です。
11 |
12 | .. image:: ../../images/fpga_environment.jpg
13 | :scale: 100%
14 |
15 | 下記のようなブロック図となっています。
16 |
17 | .. image:: ../../images/block_diagram.png
18 | :scale: 100%
19 |
20 |
21 | FPGAリソース
22 | ^^^^^^^^^^^^^^^^
23 |
24 | いくつかの認識について実験したものを以下に示します。
25 |
26 | .. image:: ../../images/fpga_resource.png
27 | :scale: 100%
28 |
29 | 下記はカメラやOLEDなどの制御回路も含んだものもありますが、例えば MNIST の Simple DNN
30 | であればニューラルネット部分はわずか 1460個のLUTのみで88%の認識が可能です。
31 | これは、今手に入るXILINXのもっとも小さなFPGAでも十分収まるサイズです。
32 |
33 | これは 1024-360-60-10 の4層構造のネットワークであり、例えば200MHzで動かした場合、
34 | 4サイクル(=20ナノ秒)で認識が完了します。そのため極めてリアルタイム性の高い用途への応用も可能です。
35 |
36 | もしカメラなどの入力に制約がなく、28x28の画像を毎サイクル供給可能であれば、
37 | コア自体は 200Mfpsで動作可能となります。
38 | これは1つの対象に対して条件を変えながら非常に多くの認識を行える帯域ですので、
39 | 1回の認識率は低くても、結果を二次加工することで実用的な認識率を目指すようなことも可能な帯域です。
40 |
41 |
42 | Autoencoder
43 | ------------------
44 |
45 | 通常のバイナリネットワークは出力もバイナリであるため、例えばAutoencoderのような
46 | 多値出力が必要な用途には応用が難しいという課題があります。
47 | (入力に関しては最初の数層を多値で扱う手はあります)
48 |
49 | BinaryBrainでは、バイナリ変調を用いることで、入力から出力まで全層がバイナリである
50 | Fully binary neural network で多値データを扱う方法を提供しています。
51 |
52 | MNIST
53 | ^^^^^^^^^^^^^^^^
54 |
55 | MNISTでの Autoencoder の実験結果です。
56 |
57 | .. image:: ../../images/autoencoder_mnist.png
58 | :scale: 100%
59 |
60 | MNIST画像自体が2値に近いのですが、輪郭付近でやや滑らかさが出ています。
61 |
62 |
63 | CIFAR-10
64 | ^^^^^^^^^^^^^^^^
65 |
66 | 同様にCIFAR-10のデータセットで扱ったものです。
67 |
68 | .. image:: ../../images/autoencoder_cifar10.png
69 | :scale: 100%
70 |
71 | ぼやけた感じは否めませんが、多値出力に対してある程度のことができているのは確認できます。
72 |
73 | もともとがCIFAR-10のデータセット自体が Autoencoder のような学習を目的としたデータセットではないので、
74 | 多値の従来ネットワークでもかなりボケた画像しか作れない部分はあるので、まずは実験的な結果と言えます。
75 |
76 |
--------------------------------------------------------------------------------
/tests/gtest/DifferentiableLutTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "gtest/gtest.h"
6 |
7 | #include "bb/DifferentiableLutN.h"
8 |
9 |
10 | TEST(DifferentiableLutTest, test_001)
11 | {
12 | auto lut0 = bb::DifferentiableLutN<6, float>::Create(16);
13 | bb::FrameBuffer x_buf(1, {16}, BB_TYPE_FP32);
14 |
15 | lut0->SetInputShape(x_buf.GetShape());
16 |
17 | for (int i = 0; i < 16; ++i) {
18 | x_buf.SetFP32(0, 0, (float)i/10);
19 | }
20 | auto y_buf = lut0->Forward(x_buf);
21 |
22 | bb::FrameBuffer dy_buf(1, {16}, BB_TYPE_FP32);
23 | for (int i = 0; i < 16; ++i) {
24 | dy_buf.SetFP32(0, 0, (float)i/100);
25 | }
26 |
27 | lut0->Backward(dy_buf);
28 |
29 | {
30 | std::ofstream ofs("DifferentiableLutTest001.bb_net", std::ios::binary);
31 | lut0->DumpObject(ofs);
32 | }
33 |
34 | auto lut1 = bb::DifferentiableLutN<6, float>::Create(16);
35 | {
36 | std::ifstream ifs("DifferentiableLutTest001.bb_net", std::ios::binary);
37 | lut1->LoadObject(ifs);
38 | }
39 |
40 | EXPECT_EQ(lut0->GetGamma(), lut1->GetGamma());
41 | EXPECT_EQ(lut0->GetBeta(), lut1->GetBeta());
42 | EXPECT_EQ(lut0->GetOutputShape(), lut1->GetOutputShape());
43 | EXPECT_EQ(lut0->GetInputShape(), lut1->GetInputShape());
44 | for ( bb::index_t out_node = 0; out_node < lut0->GetOutputNodeSize(); ++out_node ) {
45 | EXPECT_EQ(lut0->GetNodeConnectionSize(out_node), lut1->GetNodeConnectionSize(out_node));
46 | for ( bb::index_t in_index = 0; in_index < lut0->GetNodeConnectionSize(out_node); ++in_index ) {
47 | EXPECT_EQ(lut0->GetNodeConnectionIndex(out_node, in_index), lut1->GetNodeConnectionIndex(out_node, in_index));
48 | }
49 | }
50 |
51 | EXPECT_EQ(lut0->EqualityCheck(*lut1), true);
52 |
53 | }
54 |
55 |
56 |
--------------------------------------------------------------------------------
/documents/sphinx/source/informations.rst:
--------------------------------------------------------------------------------
1 |
2 | =================
3 | 開発情報
4 | =================
5 |
6 |
7 | githubについて
8 | ============================
9 |
10 | 現在 version4 は下記の branch で管理しています
11 |
12 | ver4_develop
13 | 開発用ブランチです。ビルド不能な状態になることもあります。
14 | 最新のコードにアクセスしたい場合はここをご覧ください。
15 |
16 | ver4_release
17 | リリース作成用ブランチです。
18 |
19 | master
20 | リリースブランチで確認したものを反映。
21 |
22 | tag は リリースのタイミングでバージョン番号のタグを打つようにしております。
23 | また、開発都合で ver4_build0001 のような形式でリリースと無関係にビルドタグを打つ場合があります。
24 |
25 | まだ、開発初期で仕様が安定していませんので、再現性の確保などが必要な際はタグを活用ください。
26 |
27 |
28 |
29 |
30 | 作者情報
31 | ============================
32 |
33 | 渕上 竜司(Ryuji Fuchikami)
34 |
35 | - github : https://github.com/ryuz
36 | - blog : http://ryuz.txt-nifty.com
37 | - twitter : https://twitter.com/ryuz88
38 | - facebook : https://www.facebook.com/ryuji.fuchikami
39 | - web-site : https://rtc-lab.com/
40 | - e-mail : ryuji.fuchikami@nifty.com
41 |
42 |
43 | 参考にさせて頂いた情報
44 | ============================
45 |
46 | - | バイナリニューラルネットとハードウェアの関係
47 | | https://www.slideshare.net/kentotajiri/ss-77136469
48 |
49 | - | BinaryConnect: Training Deep Neural Networks with binary weights during propagations
50 | | https://arxiv.org/pdf/1511.00363.pdf
51 |
52 | - | Binarized Neural Networks
53 | | https://arxiv.org/abs/1602.02505
54 |
55 | - | Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1
56 | | https://arxiv.org/abs/1602.02830
57 |
58 | - | XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks
59 | | https://arxiv.org/abs/1603.05279
60 |
61 | - | Xilinx UltraScale Architecture Configurable Logic Block User Guide
62 | | https://japan.xilinx.com/support/documentation/user_guides/ug574-ultrascale-clb.pdf
63 |
64 |
65 |
66 | 参考にした書籍
67 | ============================
68 |
69 | - | ゼロから作るDeep Learning ―Pythonで学ぶディープラーニングの理論と実装
70 | | https://www.oreilly.co.jp/books/9784873117584/
71 |
72 |
--------------------------------------------------------------------------------
/samples/cpp/cifar10/sample_cifar10.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | ソース ファイル
20 |
21 |
22 | ソース ファイル
23 |
24 |
25 | ソース ファイル
26 |
27 |
28 | ソース ファイル
29 |
30 |
31 | ソース ファイル
32 |
33 |
34 | ソース ファイル
35 |
36 |
37 | ソース ファイル
38 |
39 |
40 | ソース ファイル
41 |
42 |
43 | ソース ファイル
44 |
45 |
46 |
--------------------------------------------------------------------------------
/documents/sphinx/source/quick_start_cpp.rst:
--------------------------------------------------------------------------------
1 | ==============================
2 | クイックスタート(C++)
3 | ==============================
4 |
5 |
6 | まずはじめに付属のMNISTサンプルを動かすまでを紹介します。
7 |
8 | AXV2以降の命令が使えるCPUと、Windows7以降もしくは Linuxの環境を想定しております。
9 | CUDAにも対応していまが、nvccが利用可能な環境でビルドする必要があります。
10 |
11 | CUDAについてはNVIDIAのページを参考に事前にインストールください。
12 | https://developer.nvidia.com/cuda-downloads
13 |
14 | なお make 時に make WITH_CUDA=No と指定することで、GPUを使わないCPU版もビルド可能です。
15 |
16 |
17 | Windows
18 | -----------
19 | 1. install VisualStudio 2019 + CUDA 11.3
20 | 2. git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git
21 | 3. download MNIST from http://yann.lecun.com/exdb/mnist/
22 | 4. decompress MNIST for "\samples\cpp\mnist"
23 | 5. open VC++ solution "samples\cpp\mnist\sample_mnist.sln"
24 | 6. build "x64 Release"
25 | 7. run
26 |
27 | Linux(Ubuntu 20.04)
28 | ----------------------
29 |
30 | 1. install tools
31 | ^^^^^^^^^^^^^^^^^
32 |
33 | ::
34 |
35 | % sudo apt update
36 | % sudo apt upgrade
37 | % sudo apt install git
38 | % sudo apt install make
39 | % sudo apt install g++
40 | % wget https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
41 | % sudo sh cuda_11.3.1_465.19.01_linux.run
42 |
43 | 2. build and run
44 | ^^^^^^^^^^^^^^^^^
45 |
46 | ::
47 |
48 | % git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git
49 | % cd BinaryBrain/samples/cpp/mnist
50 | % make
51 | % make dl_data
52 | % ./sample-mnist All
53 |
54 |
55 | ここで単に
56 |
57 | ::
58 |
59 | % ./sample-mnist
60 |
61 | と打ち込むと、使い方が表示されます。
62 |
63 |
64 | Google Colaboratory
65 | ---------------------------
66 |
67 | nvcc が利用可能な Google Colaboratory でも動作可能なようです。
68 | 以下あくまで参考ですが、ランタイムのタイプをGPUに設定した上で、下記のような操作で、ビルドして動作させることができます。
69 |
70 | ::
71 |
72 | !git clone --recursive -b ver4_release https://github.com/ryuz/BinaryBrain.git
73 | %cd BinaryBrain/samples/cpp/mnist
74 | !make all
75 | !make run
76 |
77 |
--------------------------------------------------------------------------------
/include/bb/LoadXor.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 |
13 | #include
14 | #include
15 |
16 | #include "bb/DataType.h"
17 |
18 |
19 | namespace bb {
20 |
21 |
22 | template
23 | class LoadXor
24 | {
25 | public:
26 | static TrainData Load(int bit_size, int mul=1)
27 | {
28 | TrainData td;
29 |
30 | int data_size = (1 << bit_size);
31 | td.x_train.resize(data_size);
32 | td.t_train.resize(data_size);
33 | for (size_t i = 0; i < data_size; ++i) {
34 | td.x_train[i].resize(bit_size);
35 | td.t_train[i].resize(1);
36 |
37 | int y = 0;
38 | for (int j = 0; j < bit_size; ++j) {
39 | if ((i >> j) & 1) {
40 | y ^= 1;
41 | td.x_train[i][j] = (T)1.0;
42 | }
43 | else {
44 | td.x_train[i][j] = (T)0.0;
45 | }
46 | }
47 | td.t_train[i][0] = (T)y;
48 | }
49 |
50 | td.x_test = td.x_train;
51 | td.t_test = td.t_train;
52 |
53 | td.x_train.resize(data_size*mul);
54 | td.t_train.resize(data_size*mul);
55 | for (size_t i = data_size; i < data_size*mul; ++i) {
56 | td.x_train[i] = td.x_train[i%data_size];
57 | td.t_train[i] = td.t_train[i%data_size];
58 | }
59 |
60 | td.x_shape = indices_t({bit_size});
61 | td.t_shape = indices_t({1});
62 |
63 | return td;
64 | }
65 | };
66 |
67 |
68 | }
69 |
70 |
--------------------------------------------------------------------------------
/include/bb/Assert.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 | #pragma once
11 |
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 |
21 | namespace bb {
22 |
23 |
24 | #ifndef BB_ASSERT_ACTION
25 |
26 | #if defined(BB_ASSERT_GETCHAR)
27 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; printf("\nplease press enter key to exit.\n"); (void)getchar(); exit(1); } while(0)
28 | #elif defined(BB_ASSERT_EXCEPTION)
29 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; throw std::runtime_error(text); } while(0)
30 | #elif defined(BB_ASSERT_LOOP)
31 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; for (;;); } while(0)
32 | #else
33 | #define BB_ASSERT_ACTION(text) do { std::cout << "\n" << text << std::endl; exit(1); } while(0)
34 | #endif
35 |
36 | #endif
37 |
38 |
39 | // assert for always
40 | #define BB_ASSERT(v) \
41 | do { \
42 | if(!(v)) { \
43 | BB_ASSERT_ACTION("BB_ASSERT(" #v ") at " __FILE__ " line " + std::to_string(__LINE__) ); \
44 | } \
45 | } while(0)
46 |
47 | // assert for debug mode
48 | #ifdef _DEBUG
49 | #define BB_DEBUG_ASSERT(v) \
50 | do { \
51 | if(!(v)) { \
52 | BB_ASSERT_ACTION("BB_ASSERT(" #v ") at " __FILE__ " line " + std::to_string(__LINE__) ); \
53 | } \
54 | } while(0)
55 | #else
56 | #define BB_DEBUG_ASSERT(v) do{}while(0)
57 | #endif
58 |
59 |
60 | }
61 |
62 |
63 | // end of file
64 |
--------------------------------------------------------------------------------
/cuda/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # target
3 | TARGET = libbbcu.a
4 |
5 | # tools
6 | NVCC = nvcc
7 | AR = ar
8 | RM = rm
9 |
10 | # flagas
11 | CUFLAGS = -I../include -std=c++11 -Xcompiler -fPIC \
12 | -gencode=arch=compute_52,code=sm_52 \
13 | -gencode=arch=compute_53,code=sm_53 \
14 | -gencode=arch=compute_60,code=sm_60 \
15 | -gencode=arch=compute_61,code=sm_61 \
16 | -gencode=arch=compute_62,code=sm_62 \
17 | -gencode=arch=compute_70,code=sm_70 \
18 | -gencode=arch=compute_72,code=sm_72 \
19 | -gencode=arch=compute_75,code=sm_75
20 | ARFLAGS =
21 |
22 | # sources
23 | SRCS += Manager.cu
24 | SRCS += LocalHeap.cu
25 | SRCS += FrameBufferCopy.cu
26 | SRCS += ConvBitToReal.cu
27 | SRCS += Vector.cu
28 | SRCS += MatrixColwiseSum.cu
29 | SRCS += MatrixColwiseMeanVar.cu
30 | SRCS += MatrixRowwiseSetVector.cu
31 | SRCS += MicroMlp.cu
32 | SRCS += BinaryLut6.cu
33 | SRCS += DifferentiableLut.cu
34 | SRCS += StochasticLut.cu
35 | SRCS += StochasticBatchNormalization.cu
36 | SRCS += AverageLut.cu
37 | SRCS += ShuffleModulation.cu
38 | SRCS += RealToBinary.cu
39 | SRCS += BinaryToReal.cu
40 | SRCS += Im2Col.cu
41 | SRCS += Col2Im.cu
42 | SRCS += MaxPooling.cu
43 | SRCS += StochasticMaxPooling.cu
44 | SRCS += UpSampling.cu
45 | SRCS += BatchNormalization.cu
46 | SRCS += ReLU.cu
47 | SRCS += Sigmoid.cu
48 | SRCS += Binarize.cu
49 | SRCS += HardTanh.cu
50 | SRCS += OptimizerAdam.cu
51 | SRCS += LossSoftmaxCrossEntropy.cu
52 | SRCS += LossMeanSquaredError.cu
53 | SRCS += MetricsCategoricalAccuracy.cu
54 | SRCS += Utility.cu
55 |
56 | HDRS = ../include/bbcu/bbcu.h
57 | HDRS += ../include/bbcu/bbcu_util.h
58 | HDRS += Common.cuh
59 |
60 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
61 |
62 | .SUFFIXES: .cu .o
63 |
64 | .PHONY: all
65 | all: $(TARGET)
66 |
67 | .PHONY: clean
68 | clean:
69 | $(RM) -f $(TARGET) $(OBJS) depend
70 |
71 | $(TARGET): $(OBJS)
72 | $(AR) $(ARFLAGS) rcs $(TARGET) $(OBJS)
73 |
74 | .cu.o:
75 | $(NVCC) -c $(CUFLAGS) $< -o $@
76 |
77 | $(OBJS): $(HDRS)
78 |
79 | depend: $(SRCS)
80 | $(NVCC) -M $(CUFLAGS) $^ > $@
81 |
82 | include depend
83 |
--------------------------------------------------------------------------------
/tests/gtest/BitEncodeTest.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 | #include
4 |
5 | #include "gtest/gtest.h"
6 | #include "bb/BitEncode.h"
7 |
8 |
9 |
10 | TEST(BitEncodeTest, testBitEncode_test0)
11 | {
12 | auto bitenc = bb::BitEncode::Create(4);
13 |
14 | bb::FrameBuffer x(2, {3}, BB_TYPE_FP32);
15 | bitenc->SetInputShape(x.GetShape());
16 |
17 | x.SetFP32(0, 0, 0x55 / 255.0f);
18 | x.SetFP32(0, 1, 0x00 / 255.0f);
19 | x.SetFP32(0, 2, 0xff / 255.0f);
20 | x.SetFP32(1, 0, 0x11 / 255.0f);
21 | x.SetFP32(1, 1, 0x22 / 255.0f);
22 | x.SetFP32(1, 2, 0xaa / 255.0f);
23 |
24 | auto y = bitenc->Forward(x);
25 | EXPECT_EQ(bb::DataType::type, y.GetType());
26 | EXPECT_EQ(12, y.GetNodeSize());
27 |
28 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*0+0));
29 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*1+0));
30 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*2+0));
31 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*3+0));
32 |
33 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*0+1));
34 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*1+1));
35 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*2+1));
36 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(0, 3*3+1));
37 |
38 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2));
39 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2));
40 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2));
41 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(0, 3*3+2));
42 |
43 |
44 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*0+0));
45 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*1+0));
46 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+0));
47 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*3+0));
48 |
49 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*0+1));
50 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*1+1));
51 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+1));
52 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*3+1));
53 |
54 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*0+2));
55 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*1+2));
56 | EXPECT_EQ(BB_BINARY_LO, y.GetFP32(1, 3*2+2));
57 | EXPECT_EQ(BB_BINARY_HI, y.GetFP32(1, 3*3+2));
58 | }
59 |
60 |
61 |
--------------------------------------------------------------------------------
/cuda/MatrixColwiseSum.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "cuda_runtime.h"
5 | #include "device_launch_parameters.h"
6 |
7 | #include "bbcu/bbcu.h"
8 | #include "bbcu/bbcu_util.h"
9 |
10 |
11 | #define THREAD_X_UNIT 512
12 |
13 | // kernel
14 | __global__ void kernel_fp32_MatrixColwiseSum(
15 | const float* x_mat,
16 | float* y_vec,
17 | int frame_size,
18 | int frame_stride)
19 | {
20 | __shared__ float buf[THREAD_X_UNIT];
21 |
22 | // 初期化
23 | int node = blockIdx.x;
24 | int frame_base = threadIdx.x;
25 | int frame_step = blockDim.x;
26 |
27 | // 読み込み
28 | float acc = 0;
29 | const float* x_ptr = &x_mat[frame_stride * node];
30 | for ( int frame = frame_base; frame < frame_size; frame += frame_step ) {
31 | acc += x_ptr[frame];
32 | }
33 | buf[threadIdx.x] = acc;
34 |
35 | __syncthreads();
36 |
37 | int x = threadIdx.x;
38 | int comb = 1;
39 | while ( comb < blockDim.x ) {
40 | int next = comb * 2;
41 | int mask = next - 1;
42 | if ( (x & mask) == 0 ) {
43 | buf[x] += buf[x + comb];
44 | }
45 | comb = next;
46 | __syncthreads();
47 | }
48 |
49 | if ( threadIdx.x == 0 ) {
50 | y_vec[node] += buf[0];
51 | }
52 | }
53 |
54 |
55 | int bbcu_fp32_MatrixColwiseSum
56 | (
57 | const float* dev_x_mat,
58 | float* dev_y_vec,
59 | int node_size,
60 | int frame_size,
61 | int frame_stride,
62 | cudaStream_t streamId
63 | )
64 | {
65 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable());
66 |
67 | dim3 grid(node_size);
68 | dim3 block(THREAD_X_UNIT);
69 | kernel_fp32_MatrixColwiseSum<<>>(
70 | dev_x_mat,
71 | dev_y_vec,
72 | frame_size,
73 | frame_stride
74 | );
75 | BB_CUDA_CHECK_LAST_ERROR();
76 |
77 | return 0;
78 | }
79 |
80 |
81 |
--------------------------------------------------------------------------------
/tests/gtest/Makefile:
--------------------------------------------------------------------------------
1 | # target
2 | TARGET = gtest
3 |
4 | # default flag
5 | DEBUG ?= No
6 | WITH_CUDA ?= Yes
7 | WITH_CEREAL ?= Yes
8 |
9 | BBCU_PATH = ../../cuda
10 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a
11 |
12 | CEREAL_PATH = ../../cereal
13 |
14 | ifeq ($(WITH_CUDA),Yes)
15 | else
16 | CC = g++
17 | #CC ?= clang++
18 | endif
19 |
20 | #CFLAGS = -O2 -mavx2 -mfma -fopenmp -std=c++14
21 | CFLAGS = -g -O0 -mavx2 -mfma -std=c++14
22 | CINCS = -I../../include -I../../eigen
23 | CDEFS =
24 | CLIBS = -lgtest_main -lgtest -lpthread
25 |
26 | SRCS += BatchNormalizationTest.cpp
27 | SRCS += BinarizeTest.cpp
28 | SRCS += BinaryLutTest.cpp
29 | SRCS += BinaryToRealTest.cpp
30 | SRCS += ConvolutionCol2ImTest.cpp
31 | SRCS += ConvolutionIm2ColTest.cpp
32 | SRCS += DenseAffineTest.cpp
33 | SRCS += FrameBufferTest.cpp
34 | SRCS += LossSoftmaxCrossEntropyTest.cpp
35 | SRCS += LoweringConvolutionTest.cpp
36 | SRCS += MaxPoolingTest.cpp
37 | # SRCS += MemoryTest.cpp
38 | SRCS += MicroMlpAffineTest.cpp
39 | SRCS += OptimizerAdamTest.cpp
40 | SRCS += ReLUTest.cpp
41 | SRCS += RealToBinaryTest.cpp
42 | SRCS += SigmoidTest.cpp
43 | SRCS += TensorTest.cpp
44 | SRCS += VariablesTest.cpp
45 |
46 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
47 |
48 | LIBS =
49 |
50 | ifeq ($(WITH_CEREAL),Yes)
51 | CDEFS += -DBB_WITH_CEREAL
52 | CINCS += -I$(CEREAL_PATH)/include
53 | endif
54 |
55 | ifeq ($(WITH_CUDA),Yes)
56 | CC = nvcc
57 | CDEFS += -DBB_WITH_CUDA
58 | CFLAGS := -Xcompiler '$(CFLAGS)'
59 | LIBS += $(BBCU_LIB)
60 | SUB_TARGET += bbcu_build
61 | endif
62 |
63 | .SUFFIXES: .c .o
64 |
65 | .PHONY: all
66 | all: $(SUB_TARGET) $(TARGET)
67 |
68 | .PHONY: clean
69 | clean:
70 | rm -f $(TARGET) *.o
71 |
72 | .PHONY: run
73 | run: $(TARGET) train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
74 | ./$(TARGET) $(RUN_OPTION)
75 |
76 | .PHONY: bbcu_build
77 | bbcu_build:
78 | make -C $(BBCU_PATH)
79 |
80 | $(TARGET): $(OBJS) $(BBCU_LIB)
81 | $(CC) -o $(TARGET) $(CFLAGS) $(CINCS) $(CDEFS) $(OBJS) $(BBCU_LIB) $(CLIBS)
82 |
83 | .cpp.o:
84 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $<
85 |
--------------------------------------------------------------------------------
/samples/verilog/mnist/README.md:
--------------------------------------------------------------------------------
1 | # MNIST Verilog シミュレーションサンプル
2 |
3 | ## ディレクトリの説明
4 |
5 | このディレクトリ以下が MNISTのサンプルの Verilog ソースファイルの出力先ディレクトリとなっています。
6 |
7 | シミュレーションに先立って、対応するサンプルプログラムを実行して学習済みの
8 | Verilog ソースファイルを生成しておく必要があります。
9 |
10 | Verilogシミュレーションに先立って、Python もしくは C++ で学習サンプルを実行ください。
11 |
12 |
13 | - Simple系(tb_mnist_lut_simple ディレクトリ)
14 | - MnistLutSimple.v (Verilogソースコード)
15 | - mnist_test.txt (シミュレーション用データ)
16 |
17 | - CNN系(tb_mnist_lut_cnn)
18 | - MnistLutCnn.v (Verilogソースコード)
19 | - mnist_test_160x120.ppm (シミュレーション用入力画像)
20 | - mnist_test_640x480.ppm (シミュレーション用入力画像)
21 |
22 | - SegmentationAndClassification (tb_mnist_segmentation_and_classification ディレクトリ)
23 | - MnistSegmentationAndClassification.v (Verilogソースコード)
24 | - mnist_test_160x120.ppm (シミュレーション用入力画像)
25 | - mnist_test_640x480.ppm (シミュレーション用入力画像)
26 |
27 |
28 | なお、学習方式が異なるサンプルでも、同じファイルに上書きしますので
29 | どのサンプルを試すかよく確認の上に利用ください。
30 |
31 |
32 | ## シミュレーション実施
33 |
34 | シミュレーションツールには verilator、xsim(Xilinx)、veritak、iverilog の4種のスクリプトを用意しています。
35 |
36 | ただし iverilog は本システムのシミュレーションではかなり遅いようですのでお勧めしません。
37 |
38 |
39 | ### verilator の場合
40 |
41 | verilator のツールにパスが通った状態で、 verilator ディレクトリで
42 |
43 | ```
44 | make
45 | ```
46 |
47 | を実行ください。
48 |
49 | ```
50 | make clean
51 | ```
52 |
53 | で、クリーンナップ出来ます。
54 |
55 |
56 | ### xsim の場合
57 |
58 | Xilinxのツールにパスが通った状態で、xsim ディレクトリで以下のいずれかを実行ください。
59 |
60 | - run_xsim.bat (Windowsの場合)
61 | - run_xsim.sh (Linuxの場合)
62 |
63 |
64 | ### iverilog の場合
65 |
66 | ツールにパスが通った状態で、iverilog ディレクトリで以下のいずれかを実行ください。
67 |
68 | - run_iverilog.sh (Linuxのみ)
69 |
70 | ### Veritak-Win の場合
71 |
72 | vertak ディレクトリにあるプロジェクトファイルを開いて実行ください。
73 |
74 |
75 |
76 | ## 結果確認
77 |
78 | ### Simple版
79 |
80 | Simple版の場合は完了すると、認識率がコンソールに出力されます。
81 |
82 | また vcd ファイルも出力されますので gtkwave などのツールで波形を見ることもできます。
83 |
84 | ### CNN版
85 |
86 | シミュレーションがうまくいくと MaxPooling 層で縮小された後の画像サイズで認識結果で色付けしたものが
87 |
88 | col_0.ppm
89 |
90 | に出力されます。数字付近で目的の色が出ていれば正解です(黒:0, 茶:1 赤:2, 橙:3, 黄:4, 緑:5, 青:6, 紫:7, 灰:8, 白:9 )。
91 |
92 | pgmやppm などの [PNMファイル](https://en.wikipedia.org/wiki/Netpbm)を見るには IrfanView, gimp, MassiGra(+Susieプラグイン) などがおすすめです。
93 |
94 |
95 |
--------------------------------------------------------------------------------
/python/projects/discrete/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # Environment
3 | PYTHON = python3
4 | BB_PATH = ../../..
5 | PACKAGE_PATH = ../../binarybrain
6 | SRC_PATH = $(PACKAGE_PATH)/src
7 |
8 | EXT_SUFFIX = $(shell $(PYTHON)-config --extension-suffix)
9 |
10 |
11 | # target
12 | TARGET_NAME = core
13 | TARGET = $(PACKAGE_PATH)/$(TARGET_NAME)$(EXT_SUFFIX)
14 | SUB_TARGETS =
15 |
16 |
17 | # run option
18 | RUN_OPTION = All
19 |
20 | # default flag
21 | DEBUG ?= No
22 | WITH_CUDA ?= Yes
23 | WITH_CEREAL ?= Yes
24 |
25 | BBCU_PATH = $(BB_PATH)/cuda
26 | BBCU_LIB = $(BBCU_PATH)/libbbcu.a
27 |
28 | CEREAL_PATH = $(BB_PATH)/cereal
29 |
30 | ifeq ($(WITH_CUDA),Yes)
31 | else
32 | #CC = x86_64-linux-gnu-gcc
33 | CC = g++
34 | #CC ?= clang++
35 | endif
36 |
37 | # -pthread
38 | CFLAGS = -mavx2 -mfma -fopenmp -std=c++14 -fPIC
39 | CINCS = -I$(BB_PATH)/include $(shell $(PYTHON) -m pybind11 --includes)
40 | CDEFS =
41 |
42 | SRCS = $(SRC_PATH)/core_main.cpp
43 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
44 |
45 | LIBS = -lstdc++ -lm
46 | # -shared-libgcc
47 |
48 | ifeq ($(DEBUG),Yes)
49 | CFLAGS += -O0 -g
50 | else
51 | CFLAGS += -O3
52 | endif
53 |
54 | ifeq ($(WITH_CEREAL),Yes)
55 | CDEFS += -DBB_WITH_CEREAL
56 | CINCS += -I$(CEREAL_PATH)/include
57 | endif
58 |
59 | ifeq ($(WITH_CUDA),Yes)
60 | CC = nvcc
61 | CFLAGS := -shared -Xcompiler '$(CFLAGS)' -lcublas
62 | CDEFS += -DBB_WITH_CUDA
63 | LIBS += $(BBCU_LIB)
64 | SUB_TARGET += bbcu_build
65 | else
66 | CFLAGS := -shared $(CFLAGS)
67 | endif
68 |
69 | .SUFFIXES: .c .o
70 |
71 | .PHONY: all
72 | all: $(SUB_TARGET) $(TARGET)
73 |
74 | .PHONY: clean
75 | clean:
76 | rm -f $(TARGET) $(OBJS)
77 |
78 | .PHONY: mostlyclean
79 | mostlyclean: clean
80 | make -C $(BBCU_PATH) clean
81 |
82 | .PHONY: bbcu_build
83 | bbcu_build:
84 | make -C $(BBCU_PATH)
85 |
86 | $(TARGET): $(OBJS)
87 | $(CC) -o $(TARGET) $(CFLAGS) $(OBJS) $(LIBS)
88 |
89 | .cpp.o:
90 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< -o $@
91 |
92 | depend: $(SRCS)
93 | $(CC) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@
94 |
95 | include depend
96 |
97 |
--------------------------------------------------------------------------------
/python/projects/thrust/Makefile:
--------------------------------------------------------------------------------
1 | # target name
2 | TARGET_NAME = core
3 |
4 | # tools
5 | PYTHON = python3
6 | CC = g++
7 | #CC = clang++
8 | NVCC = nvcc
9 | AR = g++
10 | RM = rm
11 | DEPEND = g++
12 |
13 | # path
14 | BB_PATH = ../../..
15 | PACKAGE_PATH = ../../binarybrain
16 | SRC_PATH = $(PACKAGE_PATH)/src
17 | BBCU_PATH = $(BB_PATH)/cuda
18 | CEREAL_PATH = $(BB_PATH)/cereal
19 |
20 | # target
21 | EXT_SUFFIX = $(shell $(PYTHON)-config --extension-suffix)
22 | TARGET = $(PACKAGE_PATH)/$(TARGET_NAME)$(EXT_SUFFIX)
23 |
24 | # control flag
25 | DEBUG ?= No
26 | WITH_CUDA ?= Yes
27 | WITH_CEREAL ?= Yes
28 |
29 | # flags
30 | CFLAGS = -pthread -mavx2 -mfma -fopenmp -std=c++14 -fPIC
31 | CUFLAGS = -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_75,code=sm_75
32 | ARFLAGS = -pthread -fopenmp -fPIC
33 | CINCS = -I$(BB_PATH)/cuda -I$(BB_PATH)/include $(shell $(PYTHON) -m pybind11 --includes)
34 | CDEFS =
35 |
36 | # sources
37 | SRCS = $(SRC_PATH)/core_main.cpp
38 |
39 | # libraries
40 | LIBS = -lstdc++ -lm
41 |
42 |
43 | # debug
44 | ifeq ($(DEBUG),Yes)
45 | CFLAGS += -g -O0 -D_DEBUG
46 | else
47 | CFLAGS += -g -O3
48 | endif
49 |
50 | # CEREAL
51 | ifeq ($(WITH_CEREAL),Yes)
52 | CDEFS += -DBB_WITH_CEREAL
53 | CINCS += -I$(CEREAL_PATH)/include
54 | endif
55 |
56 | # CUDA
57 | ifeq ($(WITH_CUDA),Yes)
58 | SRCS += $(SRC_PATH)/core_bbcu.cu
59 | CC = $(NVCC)
60 | AR = $(NVCC)
61 | LIBS += -lcublas
62 | CFLAGS := $(CUFLAGS) -Xcompiler '$(CFLAGS)'
63 | ARFLAGS := -Xcompiler '$(ARFLAGS)'
64 | CDEFS += -DBB_WITH_CUDA
65 | endif
66 |
67 | # objects
68 | OBJS = $(addsuffix .o, $(basename $(SRCS)))
69 |
70 |
71 | .SUFFIXES: .c .cu .o
72 |
73 | .PHONY: all
74 | all: $(TARGET)
75 |
76 | .PHONY: clean
77 | clean:
78 | $(RM) -f $(TARGET) $(OBJS)
79 |
80 | $(TARGET): $(OBJS)
81 | $(AR) -shared $(ARFLAGS) -o $(TARGET) $(OBJS) $(LIBS)
82 |
83 | .cpp.o:
84 | $(CC) $(CFLAGS) $(CINCS) $(CDEFS) -c $< -o $@
85 |
86 | .cu.o:
87 | $(NVCC) $(CUDAFLAGS) $(CINCS) $(CDEFS) -std=c++11 -Xcompiler -fPIC -c $< -o $@
88 |
89 | depend: $(SRCS)
90 | $(DEPEND) -M $(CFLAGS) $(CINCS) $(CDEFS) $^ > $@
91 |
92 | include depend
93 |
94 |
--------------------------------------------------------------------------------
/samples/cpp/diabetes/DiabetesRegressionDenseAffine.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // diabetes regression sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 |
11 | #include "bb/Sequential.h"
12 | #include "bb/DenseAffine.h"
13 | #include "bb/ReLU.h"
14 | #include "bb/Sigmoid.h"
15 | #include "bb/MetricsMeanSquaredError.h"
16 | #include "bb/LossMeanSquaredError.h"
17 | #include "bb/OptimizerAdam.h"
18 | #include "bb/OptimizerSgd.h"
19 | #include "bb/Runner.h"
20 | #include "LoadDiabetes.h"
21 |
22 |
23 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size)
24 | {
25 | // load diabetes data
26 | auto td = LoadDiabetes<>();
27 | bb::TrainDataNormalize(td);
28 |
29 | auto net = bb::Sequential::Create();
30 | net->Add(bb::DenseAffine<>::Create(512));
31 | net->Add(bb::Sigmoid<>::Create());
32 | net->Add(bb::DenseAffine<>::Create(256));
33 | net->Add(bb::Sigmoid<>::Create());
34 | net->Add(bb::DenseAffine<>::Create(1));
35 | // net->Add(bb::Sigmoid<>::Create());
36 | net->SetInputShape({10});
37 |
38 | bb::FrameBuffer x(mini_batch_size, {10}, BB_TYPE_FP32);
39 | bb::FrameBuffer t(mini_batch_size, {1}, BB_TYPE_FP32);
40 |
41 | bb::Runner::create_t runner_create;
42 | runner_create.name = "DiabetesAffineRegression";
43 | runner_create.net = net;
44 | runner_create.lossFunc = bb::LossMeanSquaredError::Create();
45 | runner_create.metricsFunc = bb::MetricsMeanSquaredError::Create();
46 | // runner_create.optimizer = bb::OptimizerSgd::Create(0.0001f);
47 | runner_create.optimizer = bb::OptimizerAdam::Create();
48 | runner_create.write_serial = false;
49 | runner_create.file_read = false;
50 | runner_create.file_write = true;
51 | runner_create.print_progress = false;
52 | runner_create.initial_evaluation = false;
53 | auto runner = bb::Runner::Create(runner_create);
54 |
55 | runner->Fitting(td, epoch_size, mini_batch_size);
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/tests/cpp/diabetes/DiabetesRegressionDenseAffine.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // diabetes regression sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 |
11 | #include "bb/Sequential.h"
12 | #include "bb/DenseAffine.h"
13 | #include "bb/ReLU.h"
14 | #include "bb/Sigmoid.h"
15 | #include "bb/MetricsMeanSquaredError.h"
16 | #include "bb/LossMeanSquaredError.h"
17 | #include "bb/OptimizerAdam.h"
18 | #include "bb/OptimizerSgd.h"
19 | #include "bb/Runner.h"
20 | #include "LoadDiabetes.h"
21 |
22 |
23 | void DiabetesAffineRegression(int epoch_size, size_t mini_batch_size)
24 | {
25 | // load diabetes data
26 | auto td = LoadDiabetes<>();
27 | bb::TrainDataNormalize(td);
28 |
29 | auto net = bb::Sequential::Create();
30 | net->Add(bb::DenseAffine<>::Create(512));
31 | net->Add(bb::Sigmoid<>::Create());
32 | net->Add(bb::DenseAffine<>::Create(256));
33 | net->Add(bb::Sigmoid<>::Create());
34 | net->Add(bb::DenseAffine<>::Create(1));
35 | // net->Add(bb::Sigmoid<>::Create());
36 | net->SetInputShape({10});
37 |
38 | bb::FrameBuffer x(mini_batch_size, {10}, BB_TYPE_FP32);
39 | bb::FrameBuffer t(mini_batch_size, {1}, BB_TYPE_FP32);
40 |
41 | bb::Runner::create_t runner_create;
42 | runner_create.name = "DiabetesAffineRegression";
43 | runner_create.net = net;
44 | runner_create.lossFunc = bb::LossMeanSquaredError::Create();
45 | runner_create.metricsFunc = bb::MetricsMeanSquaredError::Create();
46 | // runner_create.optimizer = bb::OptimizerSgd::Create(0.0001f);
47 | runner_create.optimizer = bb::OptimizerAdam::Create();
48 | runner_create.write_serial = false;
49 | runner_create.file_read = false;
50 | runner_create.file_write = true;
51 | runner_create.print_progress = false;
52 | runner_create.initial_evaluation = false;
53 | auto runner = bb::Runner::Create(runner_create);
54 |
55 | runner->Fitting(td, epoch_size, mini_batch_size);
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/tests/cpp/xor/StochasticLut6.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // MNIST sample
4 | //
5 | // Copyright (C) 2018 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "bb/RealToBinary.h"
16 | #include "bb/BinaryToReal.h"
17 | #include "bb/StochasticLutN.h"
18 | #include "bb/BatchNormalization.h"
19 | #include "bb/Sigmoid.h"
20 | #include "bb/ReLU.h"
21 | #include "bb/LossMeanSquaredError.h"
22 | #include "bb/MetricsBinaryAccuracy.h"
23 | #include "bb/OptimizerAdam.h"
24 | #include "bb/LoadXor.h"
25 | #include "bb/Utility.h"
26 | #include "bb/Sequential.h"
27 | #include "bb/Runner.h"
28 |
29 |
30 | // MNIST CNN with LUT networks
31 | void StochasticLut6(int epoch_size, bool binary_mode)
32 | {
33 | // load data
34 | auto td = bb::LoadXor<>::Load(6, 256);
35 |
36 | /*
37 | for (int i = 0; i < 64; ++i) {
38 | std::cout << td.t_train[i][0] << " : ";
39 | for (int j = 0; j < 6; ++j) {
40 | std::cout << td.x_train[i][j] << " ";
41 | }
42 | std::cout << std::endl;
43 | }
44 | */
45 |
46 | auto net = bb::Sequential::Create();
47 | net->Add(bb::StochasticLutN<6>::Create(td.t_shape));
48 | net->SetInputShape(td.x_shape);
49 |
50 | if ( binary_mode ) {
51 | net->SendCommand("binary true");
52 | std::cout << "binary mode" << std::endl;
53 | }
54 |
55 | bb::Runner::create_t runner_create;
56 | runner_create.name = "StochasticLut6";
57 | runner_create.net = net;
58 | runner_create.lossFunc = bb::LossMeanSquaredError::Create();
59 | runner_create.metricsFunc = bb::MetricsBinaryAccuracy::Create();
60 | runner_create.optimizer = bb::OptimizerAdam::Create();
61 | runner_create.print_progress = true;
62 | runner_create.file_write = true;
63 | runner_create.initial_evaluation = false;
64 | auto runner = bb::Runner::Create(runner_create);
65 |
66 | runner->Fitting(td, epoch_size, (1 << 6));
67 | }
68 |
69 |
--------------------------------------------------------------------------------
/samples/cpp/mnist/sample_mnist.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | ソース ファイル
20 |
21 |
22 | ソース ファイル
23 |
24 |
25 | ソース ファイル
26 |
27 |
28 | ソース ファイル
29 |
30 |
31 | ソース ファイル
32 |
33 |
34 | ソース ファイル
35 |
36 |
37 | ソース ファイル
38 |
39 |
40 | ソース ファイル
41 |
42 |
43 | ソース ファイル
44 |
45 |
46 | ソース ファイル
47 |
48 |
49 | ソース ファイル
50 |
51 |
52 | ソース ファイル
53 |
54 |
55 | ソース ファイル
56 |
57 |
58 |
--------------------------------------------------------------------------------
/tests/cpp/xor/XorMicroMlp.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // MNIST sample
4 | //
5 | // Copyright (C) 2018 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include "bb/RealToBinary.h"
16 | #include "bb/BinaryToReal.h"
17 | #include "bb/MicroMlpAffine.h"
18 | #include "bb/StochasticLutN.h"
19 | #include "bb/BatchNormalization.h"
20 | #include "bb/Sigmoid.h"
21 | #include "bb/ReLU.h"
22 | #include "bb/LossMeanSquaredError.h"
23 | #include "bb/MetricsBinaryAccuracy.h"
24 | #include "bb/OptimizerAdam.h"
25 | #include "bb/LoadXor.h"
26 | #include "bb/Utility.h"
27 | #include "bb/Sequential.h"
28 | #include "bb/Runner.h"
29 |
30 |
31 | // MNIST CNN with LUT networks
32 | void XorMicroMlp(int epoch_size, bool binary_mode)
33 | {
34 | // load data
35 | auto td = bb::LoadXor<>::Load(6, 256);
36 |
37 | /*
38 | for (int i = 0; i < 64; ++i) {
39 | std::cout << td.t_train[i][0] << " : ";
40 | for (int j = 0; j < 6; ++j) {
41 | std::cout << td.x_train[i][j] << " ";
42 | }
43 | std::cout << std::endl;
44 | }
45 | */
46 |
47 | auto net = bb::Sequential::Create();
48 | net->Add(bb::MicroMlpAffine<6, 16, float>::Create(td.t_shape));
49 | net->Add(bb::BatchNormalization::Create());
50 | net->Add(bb::Sigmoid::Create());
51 | net->SetInputShape(td.x_shape);
52 |
53 | if ( binary_mode ) {
54 | net->SendCommand("binary true");
55 | std::cout << "binary mode" << std::endl;
56 | }
57 |
58 | bb::Runner::create_t runner_create;
59 | runner_create.name = "XorMicroMlp";
60 | runner_create.net = net;
61 | runner_create.lossFunc = bb::LossMeanSquaredError::Create();
62 | runner_create.metricsFunc = bb::MetricsBinaryAccuracy::Create();
63 | runner_create.optimizer = bb::OptimizerAdam::Create();
64 | runner_create.print_progress = true;
65 | runner_create.file_write = true;
66 | runner_create.initial_evaluation = false;
67 | auto runner = bb::Runner::Create(runner_create);
68 |
69 | runner->Fitting(td, epoch_size, (1 << 6));
70 | }
71 |
72 |
--------------------------------------------------------------------------------
/tests/gtest/ConcatenateTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "gtest/gtest.h"
5 |
6 | #include "bbcu/bbcu.h"
7 | #include "bb/Concatenate.h"
8 |
9 |
10 |
11 | #ifdef BB_WITH_CUDA
12 |
13 |
14 | template
15 | void PrintFrameBuf(bb::FrameBuffer buf)
16 | {
17 | for (bb::index_t frame = 0; frame < buf.GetFrameSize(); ++frame) {
18 | for (bb::index_t node = 0; node < buf.GetNodeSize(); ++node) {
19 | std::cout << "[" << frame << "]" << "[" << node << "] : "
20 | << buf.Get(frame, node) << std::endl;
21 | }
22 | }
23 | }
24 |
25 |
26 | TEST(ConcatenateTest, testConcatenateTest)
27 | {
28 | int frame_size = 4;
29 | int node0_size = 2;
30 | int node1_size = 3;
31 |
32 | bb::FrameBuffer x0_buf(frame_size, {node0_size}, BB_TYPE_FP32);
33 | bb::FrameBuffer x1_buf(frame_size, {node1_size}, BB_TYPE_FP32);
34 |
35 | std::mt19937_64 mt(1);
36 | std::normal_distribution dist(0.0f, 1.0f);
37 | for (int frame = 0; frame < frame_size; ++frame ) {
38 | for (int node = 0; node < node0_size; ++node ) {
39 | x0_buf.SetFP32(frame, node, dist(mt));
40 | }
41 | for (int node = 0; node < node1_size; ++node ) {
42 | x1_buf.SetFP32(frame, node, dist(mt));
43 | }
44 | }
45 |
46 | auto concat = bb::Concatenate::Create();
47 |
48 | auto y_bufs = concat->ForwardMulti({x0_buf, x1_buf});
49 | EXPECT_EQ(y_bufs.size(), 1);
50 | EXPECT_EQ(y_bufs[0].GetFrameSize(), frame_size);
51 | EXPECT_EQ(y_bufs[0].GetNodeSize(), node0_size+node1_size);
52 |
53 | auto dx_bufs = concat->BackwardMulti(y_bufs);
54 |
55 |
56 | for (int frame = 0; frame < frame_size; ++frame ) {
57 | for (int node = 0; node < node0_size; ++node ) {
58 | EXPECT_EQ(x0_buf.GetFP32(frame, node), dx_bufs[0].GetFP32(frame, node));
59 | }
60 | for (int node = 0; node < node1_size; ++node ) {
61 | EXPECT_EQ(x1_buf.GetFP32(frame, node), dx_bufs[1].GetFP32(frame, node));
62 | }
63 | }
64 |
65 | #if 0
66 | std::cout << "x0_buf" << std::endl;
67 | PrintFrameBuf(x0_buf);
68 | std::cout << "x1_buf" << std::endl;
69 | PrintFrameBuf(x1_buf);
70 | std::cout << "y_bufs" << std::endl;
71 | PrintFrameBuf(y_bufs[0]);
72 | #endif
73 | }
74 |
75 |
76 | #endif
77 |
78 |
79 | // end of file
80 |
81 |
--------------------------------------------------------------------------------
/tests/cpp/mnist/test_mnist.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.7.34003.232
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_mnist", "test_mnist.vcxproj", "{4D95C63D-3452-42FF-97AC-7E51A32DBCAD}"
7 | ProjectSection(ProjectDependencies) = postProject
8 | {FEADE517-59B9-4551-AD9D-D181A1442EA7} = {FEADE517-59B9-4551-AD9D-D181A1442EA7}
9 | EndProjectSection
10 | EndProject
11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "..\..\..\cuda\bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}"
12 | EndProject
13 | Global
14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
15 | DbgCpu|x64 = DbgCpu|x64
16 | Debug|x64 = Debug|x64
17 | RelCpu|x64 = RelCpu|x64
18 | Release|x64 = Release|x64
19 | EndGlobalSection
20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
21 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.ActiveCfg = DbgCpu|x64
22 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.Build.0 = DbgCpu|x64
23 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.ActiveCfg = Debug|x64
24 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.Build.0 = Debug|x64
25 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.ActiveCfg = RelCpu|x64
26 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.Build.0 = RelCpu|x64
27 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.ActiveCfg = Release|x64
28 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.Build.0 = Release|x64
29 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.ActiveCfg = DbgCpu|x64
30 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.Build.0 = DbgCpu|x64
31 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64
32 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64
33 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.ActiveCfg = RelCpu|x64
34 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.Build.0 = RelCpu|x64
35 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64
36 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64
37 | EndGlobalSection
38 | GlobalSection(SolutionProperties) = preSolution
39 | HideSolutionNode = FALSE
40 | EndGlobalSection
41 | GlobalSection(ExtensibilityGlobals) = postSolution
42 | SolutionGuid = {6AC5BC58-661C-438A-AA75-EE403E3EDE93}
43 | EndGlobalSection
44 | EndGlobal
45 |
--------------------------------------------------------------------------------
/samples/cpp/mnist/sample_mnist.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.7.34003.232
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sample_mnist", "sample_mnist.vcxproj", "{4D95C63D-3452-42FF-97AC-7E51A32DBCAD}"
7 | ProjectSection(ProjectDependencies) = postProject
8 | {FEADE517-59B9-4551-AD9D-D181A1442EA7} = {FEADE517-59B9-4551-AD9D-D181A1442EA7}
9 | EndProjectSection
10 | EndProject
11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bbcu", "..\..\..\cuda\bbcu.vcxproj", "{FEADE517-59B9-4551-AD9D-D181A1442EA7}"
12 | EndProject
13 | Global
14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
15 | DbgCpu|x64 = DbgCpu|x64
16 | Debug|x64 = Debug|x64
17 | RelCpu|x64 = RelCpu|x64
18 | Release|x64 = Release|x64
19 | EndGlobalSection
20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
21 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.ActiveCfg = DbgCpu|x64
22 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.DbgCpu|x64.Build.0 = DbgCpu|x64
23 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.ActiveCfg = Debug|x64
24 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Debug|x64.Build.0 = Debug|x64
25 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.ActiveCfg = RelCpu|x64
26 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.RelCpu|x64.Build.0 = RelCpu|x64
27 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.ActiveCfg = Release|x64
28 | {4D95C63D-3452-42FF-97AC-7E51A32DBCAD}.Release|x64.Build.0 = Release|x64
29 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.ActiveCfg = DbgCpu|x64
30 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.DbgCpu|x64.Build.0 = DbgCpu|x64
31 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.ActiveCfg = Debug|x64
32 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Debug|x64.Build.0 = Debug|x64
33 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.ActiveCfg = RelCpu|x64
34 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.RelCpu|x64.Build.0 = RelCpu|x64
35 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.ActiveCfg = Release|x64
36 | {FEADE517-59B9-4551-AD9D-D181A1442EA7}.Release|x64.Build.0 = Release|x64
37 | EndGlobalSection
38 | GlobalSection(SolutionProperties) = preSolution
39 | HideSolutionNode = FALSE
40 | EndGlobalSection
41 | GlobalSection(ExtensibilityGlobals) = postSolution
42 | SolutionGuid = {6AC5BC58-661C-438A-AA75-EE403E3EDE93}
43 | EndGlobalSection
44 | EndGlobal
45 |
--------------------------------------------------------------------------------
/samples/cpp/cifar10/sample_cifar10.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | $(ProjectDir)
5 | WindowsLocalDebugger
6 |
7 |
8 | $(ProjectDir)
9 | WindowsLocalDebugger
10 |
11 |
12 | $(ProjectDir)
13 | WindowsLocalDebugger
14 | All -num_threads 4 -print_device
15 |
16 |
17 | $(ProjectDir)
18 | WindowsLocalDebugger
19 | All -num_threads 4 -print_device
20 |
21 |
22 | $(ProjectDir)
23 | WindowsLocalDebugger
24 |
25 |
26 | $(ProjectDir)
27 | WindowsLocalDebugger
28 | All -num_threads 4 -print_device
29 |
30 |
31 | $(ProjectDir)
32 | WindowsLocalDebugger
33 | All -num_threads 4 -print_device
34 |
35 |
36 | $(ProjectDir)
37 | WindowsLocalDebugger
38 |
39 |
--------------------------------------------------------------------------------
/samples/cpp/mnist/MnistLoadNet.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // MNIST sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 |
12 | #include "bb/Sequential.h"
13 | #include "bb/DenseAffine.h"
14 | #include "bb/BatchNormalization.h"
15 | #include "bb/ReLU.h"
16 | #include "bb/Convolution2d.h"
17 | #include "bb/MaxPooling.h"
18 | #include "bb/BinaryModulation.h"
19 | #include "bb/OptimizerAdam.h"
20 | #include "bb/LossSoftmaxCrossEntropy.h"
21 | #include "bb/MetricsCategoricalAccuracy.h"
22 | #include "bb/Runner.h"
23 | #include "bb/LoadMnist.h"
24 | #include "bb/ModelLoader.h"
25 |
26 |
27 | void MnistLoadNet(int epoch_size, int mini_batch_size, std::string filename)
28 | {
29 | // load MNIST data
30 | #ifdef _DEBUG
31 | auto td = bb::LoadMnist<>::Load(512, 128);
32 | std::cout << "!!! debug mode !!!" << std::endl;
33 | #else
34 | auto td = bb::LoadMnist<>::Load();
35 | #endif
36 |
37 | // ネット読み込み
38 | auto net = bb::Model_LoadFromFile(filename);
39 | if (!net) {
40 | std::cerr << "file read error : " << filename << std::endl;
41 | return;
42 | }
43 |
44 | // set input shape
45 | // net->SetInputShape(td.x_shape);
46 |
47 | // print model information
48 | net->PrintInfo();
49 |
50 | std::cout << "-----------------------------------" << std::endl;
51 | std::cout << "epoch_size : " << epoch_size << std::endl;
52 | std::cout << "mini_batch_size : " << mini_batch_size << std::endl;
53 | std::cout << "-----------------------------------" << std::endl;
54 |
55 | // run fitting
56 | bb::Runner::create_t runner_create;
57 | runner_create.name = net->GetName();
58 | runner_create.net = net;
59 | runner_create.lossFunc = bb::LossSoftmaxCrossEntropy::Create();
60 | runner_create.metricsFunc = bb::MetricsCategoricalAccuracy::Create();
61 | runner_create.optimizer = bb::OptimizerAdam::Create();
62 | runner_create.print_progress = true; // 途中結果を表示
63 | runner_create.initial_evaluation = true; // ファイルを読んだ場合は最初に評価しておく
64 | auto runner = bb::Runner::Create(runner_create);
65 | runner->Fitting(td, epoch_size, mini_batch_size);
66 | }
67 |
68 |
69 | // end of file
70 |
--------------------------------------------------------------------------------
/tests/cpp/mnist/MnistLoadNet.cpp:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // BinaryBrain -- binary network evaluation platform
3 | // MNIST sample
4 | //
5 | // Copyright (C) 2018-2019 by Ryuji Fuchikami
6 | // --------------------------------------------------------------------------
7 |
8 |
9 | #include
10 | #include
11 |
12 | #include "bb/Sequential.h"
13 | #include "bb/DenseAffine.h"
14 | #include "bb/BatchNormalization.h"
15 | #include "bb/ReLU.h"
16 | #include "bb/Convolution2d.h"
17 | #include "bb/MaxPooling.h"
18 | #include "bb/BinaryModulation.h"
19 | #include "bb/OptimizerAdam.h"
20 | #include "bb/LossSoftmaxCrossEntropy.h"
21 | #include "bb/MetricsCategoricalAccuracy.h"
22 | #include "bb/Runner.h"
23 | #include "bb/LoadMnist.h"
24 | #include "bb/ModelLoader.h"
25 |
26 |
27 | void MnistLoadNet(int epoch_size, int mini_batch_size, std::string filename)
28 | {
29 | // load MNIST data
30 | #ifdef _DEBUG
31 | auto td = bb::LoadMnist<>::Load(512, 128);
32 | std::cout << "!!! debug mode !!!" << std::endl;
33 | #else
34 | auto td = bb::LoadMnist<>::Load();
35 | #endif
36 |
37 | // ネット読み込み
38 | auto net = bb::Model_LoadFromFile(filename);
39 | if (!net) {
40 | std::cerr << "file read error : " << filename << std::endl;
41 | return;
42 | }
43 |
44 | // set input shape
45 | // net->SetInputShape(td.x_shape);
46 |
47 | // print model information
48 | net->PrintInfo();
49 |
50 | std::cout << "-----------------------------------" << std::endl;
51 | std::cout << "epoch_size : " << epoch_size << std::endl;
52 | std::cout << "mini_batch_size : " << mini_batch_size << std::endl;
53 | std::cout << "-----------------------------------" << std::endl;
54 |
55 | // run fitting
56 | bb::Runner::create_t runner_create;
57 | runner_create.name = net->GetName();
58 | runner_create.net = net;
59 | runner_create.lossFunc = bb::LossSoftmaxCrossEntropy::Create();
60 | runner_create.metricsFunc = bb::MetricsCategoricalAccuracy::Create();
61 | runner_create.optimizer = bb::OptimizerAdam::Create();
62 | runner_create.print_progress = true; // 途中結果を表示
63 | runner_create.initial_evaluation = true; // ファイルを読んだ場合は最初に評価しておく
64 | auto runner = bb::Runner::Create(runner_create);
65 | runner->Fitting(td, epoch_size, mini_batch_size);
66 | }
67 |
68 |
69 | // end of file
70 |
--------------------------------------------------------------------------------
/include/bb/Activation.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 |
11 | #pragma once
12 |
13 | #include
14 | #include
15 |
16 | #include "bb/Manager.h"
17 | #include "bb/Model.h"
18 |
19 |
20 | namespace bb {
21 |
22 |
23 | // Activation
24 | class Activation : public Model
25 | {
26 | using _super = Model;
27 |
28 | protected:
29 | indices_t m_shape; //< 入出力の形状
30 |
31 | public:
32 | /**
33 | * @brief 入力形状設定
34 | * @detail 入力形状を設定する
35 | * 内部変数を初期化し、以降、GetOutputShape()で値取得可能となることとする
36 | * 同一形状を指定しても内部変数は初期化されるものとする
37 | * @param shape 1フレームのノードを構成するshape
38 | * @return 出力形状を返す
39 | */
40 | indices_t SetInputShape(indices_t shape) override
41 | {
42 | // 設定済みなら何もしない
43 | if ( shape == this->GetInputShape() ) {
44 | return this->GetOutputShape();
45 | }
46 |
47 | m_shape = shape;
48 | return m_shape;
49 | }
50 |
51 | /**
52 | * @brief 入力形状取得
53 | * @detail 入力形状を取得する
54 | * @return 入力形状を返す
55 | */
56 | indices_t GetInputShape(void) const override
57 | {
58 | return m_shape;
59 | }
60 |
61 | /**
62 | * @brief 出力形状取得
63 | * @detail 出力形状を取得する
64 | * @return 出力形状を返す
65 | */
66 | indices_t GetOutputShape(void) const override
67 | {
68 | return m_shape;
69 | }
70 |
71 |
72 | protected:
73 |
74 | void DumpObjectData(std::ostream &os) const override
75 | {
76 | // バージョン
77 | std::int64_t ver = 1;
78 | bb::SaveValue(os, ver);
79 |
80 | // 親クラス
81 | _super::DumpObjectData(os);
82 |
83 | // メンバ
84 | bb::SaveValue(os, m_shape);
85 | }
86 |
87 | void LoadObjectData(std::istream &is) override
88 | {
89 | // バージョン
90 | std::int64_t ver;
91 | bb::LoadValue(is, ver);
92 |
93 | BB_ASSERT(ver == 1);
94 |
95 | // 親クラス
96 | _super::LoadObjectData(is);
97 |
98 | // メンバ
99 | bb::LoadValue(is, m_shape);
100 | }
101 |
102 | };
103 |
104 |
105 | };
106 |
107 |
--------------------------------------------------------------------------------
/tests/gtest/DepthwiseDenseAffineTest.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 | #include
4 |
5 | #include "gtest/gtest.h"
6 | #include "bb/DepthwiseDenseAffine.h"
7 |
8 |
9 | TEST(DepthwiseDenseAffineTest, testAffine)
10 | {
11 | auto affine = bb::DepthwiseDenseAffine<>::Create({3});
12 |
13 | affine->SetInputShape({3, 2});
14 |
15 | // forward
16 | bb::FrameBuffer x_buf(1, {3, 2}, BB_TYPE_FP32);
17 |
18 | x_buf.SetFP32(0, 0, 1);
19 | x_buf.SetFP32(0, 1, 2);
20 | x_buf.SetFP32(0, 2, 3);
21 | x_buf.SetFP32(0, 3, 4);
22 | x_buf.SetFP32(0, 4, 5);
23 | x_buf.SetFP32(0, 5, 6);
24 |
25 | EXPECT_EQ(1, x_buf.GetFP32(0, 0));
26 | EXPECT_EQ(2, x_buf.GetFP32(0, 1));
27 | EXPECT_EQ(3, x_buf.GetFP32(0, 2));
28 | EXPECT_EQ(4, x_buf.GetFP32(0, 3));
29 | EXPECT_EQ(5, x_buf.GetFP32(0, 4));
30 | EXPECT_EQ(6, x_buf.GetFP32(0, 5));
31 |
32 | {
33 | auto W = affine->lock_W();
34 | auto b = affine->lock_b();
35 | W(0, 0, 0) = 1;
36 | W(0, 0, 1) = 2;
37 | W(1, 0, 0) = 10;
38 | W(1, 0, 1) = 20;
39 | W(2, 0, 0) = 100;
40 | W(2, 0, 1) = 200;
41 | b(0, 0) = 1000;
42 | b(1, 0) = 2000;
43 | b(2, 0) = 3000;
44 | }
45 |
46 | auto y_buf = affine->Forward(x_buf);
47 |
48 | EXPECT_EQ(1 * 1 + 2 * 2 + 1000, y_buf.GetFP32(0, 0));
49 | EXPECT_EQ(3 * 10 + 4 * 20 + 2000, y_buf.GetFP32(0, 1));
50 | EXPECT_EQ(5 * 100 + 6 * 200 + 3000, y_buf.GetFP32(0, 2));
51 |
52 |
53 | // backward
54 | bb::FrameBuffer dy_buf(1, {3}, BB_TYPE_FP32);
55 |
56 | dy_buf.SetFP32(0, 0, 123);
57 | dy_buf.SetFP32(0, 1, 456);
58 | dy_buf.SetFP32(0, 2, 789);
59 |
60 | auto dx_buf = affine->Backward(dy_buf);
61 |
62 | EXPECT_EQ(123 * 1, dx_buf.GetFP32(0, 0));
63 | EXPECT_EQ(123 * 2, dx_buf.GetFP32(0, 1));
64 | EXPECT_EQ(456 * 10, dx_buf.GetFP32(0, 2));
65 | EXPECT_EQ(456 * 20, dx_buf.GetFP32(0, 3));
66 | EXPECT_EQ(789 * 100, dx_buf.GetFP32(0, 4));
67 | EXPECT_EQ(789 * 200, dx_buf.GetFP32(0, 5));
68 |
69 | {
70 | auto db = affine->lock_db_const();
71 |
72 | EXPECT_EQ(123, db(0, 0));
73 | EXPECT_EQ(456, db(1, 0));
74 | EXPECT_EQ(789, db(2, 0));
75 | }
76 |
77 | {
78 | auto dW = affine->lock_dW_const();
79 |
80 | EXPECT_EQ(1 * 123, dW(0, 0, 0));
81 | EXPECT_EQ(2 * 123, dW(0, 0, 1));
82 | EXPECT_EQ(3 * 456, dW(1, 0, 0));
83 | EXPECT_EQ(4 * 456, dW(1, 0, 1));
84 | EXPECT_EQ(5 * 789, dW(2, 0, 0));
85 | EXPECT_EQ(6 * 789, dW(2, 0, 1));
86 | }
87 | }
88 |
89 |
--------------------------------------------------------------------------------
/cuda/OptimizerAdam.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "cuda_runtime.h"
5 | #include "device_launch_parameters.h"
6 |
7 | #include "bbcu/bbcu.h"
8 | #include "bbcu/bbcu_util.h"
9 |
10 |
11 |
12 | __global__ void kernal_fp32_OptimizerAdam(
13 | int const *size_table,
14 | float * const *params_buf_table,
15 | float * const *grads_buf_table,
16 | float * const *m_buf_table,
17 | float * const *v_buf_table,
18 | float lr_t,
19 | float neg_beta1,
20 | float neg_beta2
21 | )
22 | {
23 | int id = threadIdx.x;
24 | int id_step = blockDim.x;
25 | int index = blockDim.y * blockIdx.y + threadIdx.y;
26 |
27 | int size = size_table[index];
28 |
29 | float *params_buf = params_buf_table[index];
30 | float *grads_buf = grads_buf_table[index];
31 | float *m_buf = m_buf_table[index];
32 | float *v_buf = v_buf_table[index];
33 |
34 | for ( int n = id; n < size; n += id_step ) {
35 | float param = params_buf[n];
36 | float grad = grads_buf[n];
37 | float m = m_buf[n];
38 | float v = v_buf[n];
39 |
40 | m += neg_beta1 * (grad - m);
41 | v += neg_beta2 * (grad * grad - v);
42 | param -= lr_t * m / (sqrt(v) + 1e-7);
43 |
44 | m_buf[n] = m;
45 | v_buf[n] = v;
46 | params_buf[n] = param;
47 | // grads_buf[n] = 0;
48 | }
49 | }
50 |
51 |
52 | BBCU_DLL_EXPORT int bbcu_fp32_OptimizerAdam
53 | (
54 | int size,
55 | int const *dev_size_table,
56 | float * const *dev_params_buf_table,
57 | float * const *dev_grads_buf_table,
58 | float * const *dev_m_buf_table,
59 | float * const *dev_v_buf_table,
60 | float lr_t,
61 | float beta1,
62 | float beta2,
63 | cudaStream_t streamId
64 | )
65 | {
66 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable());
67 |
68 | dim3 grid(1, size);
69 | dim3 block(192, 1);
70 |
71 | kernal_fp32_OptimizerAdam<<>>(
72 | dev_size_table,
73 | dev_params_buf_table,
74 | dev_grads_buf_table,
75 | dev_m_buf_table,
76 | dev_v_buf_table,
77 | lr_t,
78 | (1.0f - beta1),
79 | (1.0f - beta2)
80 | );
81 | BB_CUDA_CHECK_LAST_ERROR();
82 |
83 | return 0;
84 | }
85 |
86 | // end of file
87 |
--------------------------------------------------------------------------------
/python/binarybrain/metrics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import binarybrain as bb
4 | import binarybrain.core as core
5 | import numpy as np
6 | from typing import List
7 |
8 |
9 |
10 | class Metrics(bb.Object):
11 | """Metrics class
12 | 評価関数の基底クラス
13 | """
14 |
15 | def __init__(self, core_metrics=None):
16 | super(Metrics, self).__init__(core_object=core_metrics)
17 |
18 | def clear(self):
19 | """値のクリア
20 |
21 | 集計をクリアする。通常 epoch の単位でクリアして再集計を行う
22 | """
23 | self.get_core().clear()
24 |
25 | def get(self):
26 | """値の取得
27 |
28 | Returns:
29 | metrics(float) : 現在までの損失値を返す
30 | """
31 | return self.get_core().get_metrics()
32 |
33 | def calculate(self, y_buf, t_buf):
34 | """評価の計算
35 |
36 | Args:
37 | y_buf (FrameBuffer): forward演算結果
38 | t_buf (FrameBuffer): 教師データ
39 | """
40 | return self.get_core().calculate_metrics(y_buf.get_core(), t_buf.get_core())
41 |
42 | def get_metrics_string(self):
43 | """評価対象の文字列取得
44 |
45 | 評価関数ごとに評価値の単位が異なるため計算しているものの文字列を返す
46 | 平均二乗誤差(MSE)であったり、認識率(accuracy)であったり
47 | getで得られる値を、表示やログで表す際に利用できる
48 |
49 | Args:
50 | metrics_string (str): 評価対象の文字列取得
51 | """
52 | return self.get_core().get_metrics_string()
53 |
54 |
55 | class MetricsMeanSquaredError(Metrics):
56 | """MetricsMeanSquaredError class
57 |
58 | 平均二乗誤差の評価関数
59 | 教師信号との平均二乗誤差を計算する
60 | """
61 |
62 | def __init__(self, dtype=bb.DType.FP32):
63 | core_metrics = bb.search_core_object('MetricsMeanSquaredError', [dtype]).create()
64 | super(MetricsMeanSquaredError, self).__init__(core_metrics=core_metrics)
65 |
66 |
67 | class MetricsCategoricalAccuracy(Metrics):
68 | """MetricsCategoricalAccuracy class
69 |
70 | 多クラス分類用の評価関数
71 | 一致率を accuracy として計算する
72 | """
73 |
74 | def __init__(self, dtype=bb.DType.FP32):
75 | core_metrics = bb.search_core_object('MetricsCategoricalAccuracy', [dtype]).create()
76 | super(MetricsCategoricalAccuracy, self).__init__(core_metrics=core_metrics)
77 |
78 |
79 | class MetricsBinaryCategoricalAccuracy(Metrics):
80 | """MetricsBinaryCategoricalAccuracy class
81 |
82 | 2クラス分類用の評価関数
83 | 一致率を accuracy として計算する
84 | """
85 |
86 | def __init__(self, dtype=bb.DType.FP32):
87 | core_metrics = bb.search_core_object('MetricsBinaryCategoricalAccuracy', [dtype]).create()
88 | super(MetricsBinaryCategoricalAccuracy, self).__init__(core_metrics=core_metrics)
89 |
90 |
--------------------------------------------------------------------------------
/python/binarybrain/hls.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import io
4 | import numpy as np
5 |
6 | import binarybrain as bb
7 | import binarybrain.core as core
8 |
9 |
10 | def make_lut_func_name(name, node):
11 | return "%s_lut_%d"%(name, node)
12 |
13 |
14 | def dump_hls_lut_node(f, name, lut, node):
15 | f.write("\ninline ap_uint<1> %s(\n"%(make_lut_func_name(name, node)))
16 | n = lut.get_node_connection_size(node)
17 | s = lut.get_lut_table_size(node)
18 | for i in range(n):
19 | f.write(" ap_uint<1> in_data%d"%(i))
20 | if i < n-1:
21 | f.write(",\n")
22 | else:
23 | f.write(")\n")
24 | f.write("{\n")
25 | f.write(" #pragma HLS inline\n\n")
26 | f.write(" ap_uint<%d> index;\n"%(n))
27 | for i in range(n):
28 | f.write(" index[%d] = in_data%d;\n"%(i, i))
29 | f.write(" \n")
30 | f.write(" const ap_uint<1> table[%d] = {"%(s))
31 | for i in range(s):
32 | f.write("%d,"%(lut.get_lut_table(node ,i)))
33 | f.write("};\n")
34 | f.write(" #pragma HLS bind_storage variable=table type=ROM_1P impl=LUTRAM\n")
35 | f.write(" return table[index];\n")
36 | f.write("}\n\n")
37 |
38 | def dump_hls_lut_layer(f, name, lut):
39 | ''' dump HLS source of LUT layer
40 |
41 | Args:
42 | f (StreamIO) : 出力先ストリーム
43 | name (str): 関数名
44 | lut (Model): 変換するネット
45 | '''
46 |
47 | ins = lut.get_input_node_size()
48 | outs = lut.get_output_node_size()
49 | for node in range(outs):
50 | dump_hls_lut_node(f, name, lut, node)
51 |
52 | f.write("\n")
53 | f.write("inline ap_uint<%d> %s(ap_uint<%d> in_data)\n"%(outs, name, ins))
54 | f.write("{\n")
55 | f.write(" ap_uint<%d> out_data;\n"%(outs))
56 | for node in range(outs):
57 | f.write(" out_data[%d] = %s("%(node, make_lut_func_name(name, node)))
58 | n = lut.get_node_connection_size(node)
59 | for i in range(n):
60 | f.write("in_data[%d]"%(lut.get_node_connection_index(node, i)))
61 | if i < n-1:
62 | f.write(",")
63 | else:
64 | f.write(");\n")
65 | f.write(" return out_data;\n")
66 | f.write("}\n\n")
67 |
68 |
69 | def make_hls_lut_layer(name, lut):
70 | ''' make HLS source of LUT layer
71 |
72 | Args:
73 | name (str): 関数名
74 | lut (Model): 変換するネット
75 |
76 | Returns:
77 | HLS source code (str)
78 | '''
79 |
80 | with io.StringIO() as f:
81 | dump_hls_lut_layer(f, name, lut)
82 | return f.getvalue()
83 |
--------------------------------------------------------------------------------
/cuda/ConvBitToReal.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "cuda_runtime.h"
5 | #include "device_launch_parameters.h"
6 |
7 | #include "bbcu/bbcu.h"
8 | #include "bbcu/bbcu_util.h"
9 |
10 |
11 |
12 | template
13 | __global__ void kernal_ConvBitToReal(
14 | int const *x_buf,
15 | T *y_buf,
16 | T value0,
17 | T value1,
18 | int node_size,
19 | int frame_size,
20 | int x_frame_stride,
21 | int y_frame_stride
22 | )
23 | {
24 | int frame = blockDim.x * blockIdx.x + threadIdx.x;
25 | int node = blockDim.y * blockIdx.y + threadIdx.y;
26 |
27 | int bit = (threadIdx.x & 0x1f);
28 | int bit_mask = (1 << bit);
29 | int unit = (frame >> 5);
30 |
31 | if ( frame < frame_size && node < node_size ) {
32 | int x = x_buf[node * x_frame_stride + unit];
33 | T y = (x & bit_mask) ? value1 : value0;
34 | y_buf[node * y_frame_stride + frame] = y;
35 | }
36 | }
37 |
38 |
39 | template
40 | BBCU_DLL_EXPORT int bbcu_ConvBitToReal
41 | (
42 | int const *dev_x_buf,
43 | T *dev_y_buf,
44 | T value0,
45 | T value1,
46 | int node_size,
47 | int frame_size,
48 | int x_frame_stride,
49 | int y_frame_stride,
50 | cudaStream_t streamId
51 | )
52 | {
53 | BBCU_DEBUG_ASSERT(bbcu_IsDeviceAvailable());
54 |
55 | dim3 block(32, 32);
56 | dim3 grid((frame_size + 31) / 32, (node_size + 31) / 32);
57 |
58 | kernal_ConvBitToReal<<>>
59 | (
60 | dev_x_buf,
61 | dev_y_buf,
62 | value0,
63 | value1,
64 | node_size,
65 | frame_size,
66 | x_frame_stride,
67 | y_frame_stride
68 | );
69 | BB_CUDA_CHECK_LAST_ERROR();
70 |
71 | return 0;
72 | }
73 |
74 |
75 | template BBCU_DLL_EXPORT int bbcu_ConvBitToReal
76 | (
77 | int const *dev_x_buf,
78 | float *dev_y_buf,
79 | float value0,
80 | float value1,
81 | int node_size,
82 | int frame_size,
83 | int x_frame_stride,
84 | int y_frame_stride,
85 | cudaStream_t streamId
86 | );
87 |
88 |
89 | // end of file
90 |
--------------------------------------------------------------------------------
/include/bb/Filter2d.h:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------------------------
2 | // Binary Brain -- binary neural net framework
3 | //
4 | // Copyright (C) 2018 by Ryuji Fuchikami
5 | // https://github.com/ryuz
6 | // ryuji.fuchikami@nifty.com
7 | // --------------------------------------------------------------------------
8 |
9 |
10 |
11 | #pragma once
12 |
13 |
14 | #include "bb/Model.h"
15 |
16 |
17 | namespace bb {
18 |
19 |
20 | // border_mode
21 | #define BB_BORDER_CONSTANT 0
22 | #define BB_BORDER_REFLECT 1
23 | #define BB_BORDER_REFLECT_101 2
24 | #define BB_BORDER_REPLICATE 3
25 | #define BB_BORDER_WRAP 4
26 |
27 |
28 | // 二次元フィルタの基本クラス
29 | class Filter2d : public Model
30 | {
31 | protected:
32 | /**
33 | * @brief モデルの情報を表示
34 | * @detail モデルの情報を表示する
35 | * @param os 出力ストリーム
36 | * @param indent インデント文字列
37 | */
38 | void PrintInfoText(std::ostream& os, std::string indent, int columns, int nest, int depth) const override
39 | {
40 | os << indent << " filter size : (" << GetFilterHeight() << ", " << GetFilterWidth() << ")" << std::endl;
41 | Model::PrintInfoText(os, indent, columns, nest, depth);
42 | }
43 |
44 | public:
45 | virtual index_t GetFilterHeight(void) const = 0;
46 | virtual index_t GetFilterWidth(void) const = 0;
47 |
48 | virtual std::shared_ptr< Model > GetSubLayer(void) const
49 | {
50 | return nullptr;
51 | }
52 |
53 | index_t GetInputChannels(void) const
54 | {
55 | auto shape = this->GetInputShape();
56 | BB_ASSERT(shape.size() == 3);
57 | return shape[0];
58 | }
59 |
60 | index_t GetInputHeight(void) const
61 | {
62 | auto shape = this->GetInputShape();
63 | BB_ASSERT(shape.size() == 3);
64 | return shape[1];
65 | }
66 |
67 | index_t GetInputWidth(void) const
68 | {
69 | auto shape = this->GetInputShape();
70 | BB_ASSERT(shape.size() == 3);
71 | return shape[2];
72 | }
73 |
74 | index_t GetOutputChannels(void) const
75 | {
76 | auto shape = this->GetOutputShape();
77 | BB_ASSERT(shape.size() == 3);
78 | return shape[0];
79 | }
80 |
81 | index_t GetOutputHeight(void) const
82 | {
83 | auto shape = this->GetOutputShape();
84 | BB_ASSERT(shape.size() == 3);
85 | return shape[1];
86 | }
87 |
88 | index_t GetOutputWidth(void) const
89 | {
90 | auto shape = this->GetOutputShape();
91 | BB_ASSERT(shape.size() == 3);
92 | return shape[2];
93 | }
94 | };
95 |
96 |
97 | }
--------------------------------------------------------------------------------
/tests/gtest/cudaMatrixColwiseMeanVarTest.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include "cuda_runtime.h"
8 | #include "device_launch_parameters.h"
9 |
10 | #include "gtest/gtest.h"
11 |
12 | #include "bb/FrameBuffer.h"
13 | #include "bbcu/bbcu.h"
14 |
15 |
16 | #if BB_WITH_CUDA
17 |
18 |
19 | static double calc_mean(std::valarray const &varray)
20 | {
21 | return varray.sum() / varray.size();
22 | }
23 |
24 | static double calc_var(std::valarray const &varray)
25 | {
26 | double mean = calc_mean(varray);
27 | return ((varray*varray).sum() - mean * mean*varray.size()) / varray.size();
28 | }
29 |
30 |
31 | TEST(cudaMatrixColwiseMeanVarTest, test_MatrixColwiseMeanVar)
32 | {
33 | const int n = 1027;
34 |
35 | std::mt19937_64 mt(1);
36 | std::normal_distribution dist0(1.0, 2.0);
37 | std::normal_distribution dist1(-1.0, 3.0);
38 | std::normal_distribution dist2(2.0, 4.0);
39 |
40 | std::valarray arr0(n);
41 | std::valarray arr1(n);
42 | std::valarray arr2(n);
43 | for (int i = 0; i < n; ++i) {
44 | arr0[i] = dist0(mt);
45 | arr1[i] = dist1(mt);
46 | arr2[i] = dist2(mt);
47 | }
48 |
49 | bb::FrameBuffer x_buf(n, {3}, BB_TYPE_FP32);
50 | for (int i = 0; i < n; ++i) {
51 | x_buf.SetFP32(i, 0, (float)arr0[i]);
52 | x_buf.SetFP32(i, 1, (float)arr1[i]);
53 | x_buf.SetFP32(i, 2, (float)arr2[i]);
54 | }
55 |
56 | bb::Tensor m_buf({3}, BB_TYPE_FP32);
57 | bb::Tensor v_buf({3}, BB_TYPE_FP32);
58 | {
59 | auto x_ptr = x_buf.LockDeviceMemoryConst();
60 | auto m_ptr = m_buf.LockDeviceMemory();
61 | auto v_ptr = v_buf.LockDeviceMemory();
62 | bbcu_fp32_MatrixColwiseMeanVar
63 | (
64 | (const float *)x_ptr.GetAddr(),
65 | (float *)m_ptr.GetAddr(),
66 | (float *)v_ptr.GetAddr(),
67 | (int )3,
68 | (int )n,
69 | (int )x_buf.GetFrameStride() / sizeof(float)
70 | );
71 | }
72 |
73 | {
74 | auto m_ptr = m_buf.LockConst();
75 | auto v_ptr = v_buf.LockConst();
76 |
77 | EXPECT_FLOAT_EQ((float)calc_mean(arr0), m_ptr[0]);
78 | EXPECT_FLOAT_EQ((float)calc_mean(arr1), m_ptr[1]);
79 | EXPECT_FLOAT_EQ((float)calc_mean(arr2), m_ptr[2]);
80 | EXPECT_FLOAT_EQ((float)calc_var(arr0), v_ptr[0]);
81 | EXPECT_FLOAT_EQ((float)calc_var(arr1), v_ptr[1]);
82 | EXPECT_FLOAT_EQ((float)calc_var(arr2), v_ptr[2]);
83 | }
84 | }
85 |
86 |
87 | #endif
88 |
89 |
--------------------------------------------------------------------------------