├── train_status.txt ├── temp.state ├── list.txt ├── header.txt ├── template_Makefile ├── weight_clip.py ├── net2.py ├── net3.py ├── bst.py ├── link_binary_linear.py ├── function_binary_linear.py ├── template_cpp_r7_main.cpp ├── README.md ├── link_binary_conv2d.py ├── link_integer_conv2d.py ├── trainer.py ├── eval.py ├── conv_npz2txt_v2.py ├── template_cpp_r7_socket_main.cpp ├── link_batch_normalization.py ├── gen_training_data.py ├── train.py ├── function_integer_conv2d.py ├── function_binary_conv2d.py ├── template_cpp_r7_bcnn.cpp ├── function_batch_normalization.py ├── LICENSE.txt ├── gen_cpp_code_v3.py └── guinness.py /train_status.txt: -------------------------------------------------------------------------------- 1 | stop -------------------------------------------------------------------------------- /temp.state: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HirokiNakahara/GUINNESS/HEAD/temp.state -------------------------------------------------------------------------------- /list.txt: -------------------------------------------------------------------------------- 1 | ./class3_images/airplane800 airplane 2 | ./class3_images/pets800 pets 3 | ./class3_images/car800 car 4 | -------------------------------------------------------------------------------- /header.txt: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import six 4 | import chainer 5 | from chainer import cuda 6 | from chainer import functions as F 7 | from chainer import links as L 8 | from chainer import initializers 9 | 10 | import sys 11 | sys.path.append('./') 12 | import link_binary_linear as BL 13 | import bst 14 | import link_binary_conv2d as BC 15 | import link_integer_conv2d as IC 16 | from function_binary_conv2d import func_convolution_2d 17 | from function_integer_conv2d import func_convolution_2d 18 | 19 | # for debuging of the batch normalization functions 20 | import link_batch_normalization as LBN 21 | 22 | class CNN(chainer.Chain): 23 | def __init__(self): 24 | super(CNN, self).__init__( 25 | -------------------------------------------------------------------------------- /template_Makefile: -------------------------------------------------------------------------------- 1 | APPSOURCES = (CNN_C_SOURCE) 2 | EXECUTABLE = (ELF_FILE_PATH) 3 | 4 | PLATFORM = (TARGET_BOARD) 5 | SDSFLAGS = -sds-pf ${PLATFORM} \ 6 | -sds-hw BinCNN (CNN_C_SOURCE) -sds-end \ 7 | -poll-mode 1 8 | 9 | CC = sds++ ${SDSFLAGS} 10 | 11 | CFLAGS = -Wall -O3 -c 12 | CFLAGS += -MMD -MP -MF"$(@:%.o=%.d)" 13 | LFLAGS = -O3 14 | 15 | OBJECTS := $(APPSOURCES:.cpp=.o) 16 | DEPS := $(OBJECTS:.o=.d) 17 | 18 | .PHONY: all 19 | 20 | all: ${EXECUTABLE} 21 | 22 | ${EXECUTABLE}: ${OBJECTS} 23 | ${CC} ${LFLAGS} ${OBJECTS} -o $@ 24 | 25 | -include ${DEPS} 26 | 27 | %.o: %.cpp 28 | ${CC} ${CFLAGS} $< -o $@ 29 | 30 | clean: 31 | ${RM} ${EXECUTABLE} ${OBJECTS} ${DEPS} 32 | 33 | ultraclean: clean 34 | ${RM} ${EXECUTABLE}.bit 35 | ${RM} -rf _sds sd_card 36 | -------------------------------------------------------------------------------- /weight_clip.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from chainer import cuda 3 | 4 | class WeightClip(object): 5 | 6 | """Optimizer hook function for weight clip manipulation. 7 | 8 | This hook function clips a parameter to [low, high]. 9 | It can be used in a binary weight network. 10 | 11 | Args: 12 | low (float): low value for the weight clip. 13 | high (float): high value for the weight clip. 14 | 15 | Attributes: 16 | low (float): low value for the weight clip. 17 | high (float): low value for the weight clip. 18 | 19 | """ 20 | name = 'WeightClip' 21 | 22 | def __init__(self, low=-1.0, high=1.0): 23 | self.low=low 24 | self.high=high 25 | 26 | def __call__(self, opt): 27 | if cuda.available: 28 | kernel = cuda.elementwise( 29 | 'T low, T high', 30 | 'T p', 31 | 'p = (p < low) ? low : (p > high) ? high : p', 32 | 'weight_clip') 33 | 34 | for param in opt.target.params(): 35 | p = param.data 36 | with cuda.get_device(p) as dev: 37 | if int(dev) == -1: 38 | numpy.clip(p, self.low, self.high) 39 | else: 40 | kernel(self.low, self.high, p) 41 | -------------------------------------------------------------------------------- /net2.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import six 4 | import chainer 5 | from chainer import cuda 6 | from chainer import functions as F 7 | from chainer import links as L 8 | from chainer import initializers 9 | 10 | import sys 11 | sys.path.append('./') 12 | import link_binary_linear as BL 13 | import bst 14 | import link_binary_conv2d as BC 15 | import link_integer_conv2d as IC 16 | from function_binary_conv2d import func_convolution_2d 17 | from function_integer_conv2d import func_convolution_2d 18 | 19 | # for debuging of the batch normalization functions 20 | import link_batch_normalization as LBN 21 | 22 | class CNN(chainer.Chain): 23 | def __init__(self): 24 | super(CNN, self).__init__( 25 | 26 | conv0=IC.Convolution2D(3,64,3, stride=1, pad=1, nobias=True), 27 | b0=L.BatchNormalization(64), 28 | conv1=BC.Convolution2D(64,128,3, stride=1, pad=1, nobias=True), 29 | b1=L.BatchNormalization(128), 30 | conv2=BC.Convolution2D(128,128,3, stride=1, pad=1, nobias=True), 31 | b2=L.BatchNormalization(128), 32 | fc0=BL.BinaryLinear(128,3), 33 | b3=L.BatchNormalization(3) 34 | ) 35 | 36 | def __call__(self, x, train): 37 | h = bst.bst(self.b0(self.conv0(x))) 38 | h = bst.bst(self.b1(self.conv1(h))) 39 | h = bst.bst(self.b2(self.conv2(h))) 40 | h = F.max_pooling_2d(h, 2) 41 | h = F.average_pooling_2d(h, 24) 42 | h = self.b3(self.fc0(h)) 43 | return h -------------------------------------------------------------------------------- /net3.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import six 4 | import chainer 5 | from chainer import cuda 6 | from chainer import functions as F 7 | from chainer import links as L 8 | from chainer import initializers 9 | 10 | import link_binary_linear as BL 11 | import bst 12 | import link_binary_conv2d as BC 13 | import link_integer_conv2d as IC 14 | import sys 15 | sys.path.append('./') 16 | from function_binary_conv2d import func_convolution_2d 17 | from function_integer_conv2d import func_convolution_2d 18 | 19 | # for debuging of the batch normalization functions 20 | import link_batch_normalization as LBN 21 | 22 | class CNN(chainer.Chain): 23 | def __init__(self): 24 | super(CNN, self).__init__( 25 | conv0=IC.Convolution2D(3,64,3, stride=1, pad=1, nobias=True), 26 | b0=LBN.BatchNormalization(64), 27 | conv1=BC.Convolution2D(64,128,3, stride=1, pad=1, nobias=True), 28 | b1=LBN.BatchNormalization(128), 29 | conv2=BC.Convolution2D(128,128,3, stride=1, pad=1, nobias=True), 30 | b2=LBN.BatchNormalization(128), 31 | fc0=BL.BinaryLinear(128,3), 32 | b3=LBN.BatchNormalization(3) 33 | ) 34 | 35 | def __call__(self, x, train): 36 | h = bst.bst(self.b0(self.conv0(x))) 37 | h = bst.bst(self.b1(self.conv1(h))) 38 | h = bst.bst(self.b2(self.conv2(h))) 39 | h = F.max_pooling_2d(h, 2) 40 | h = F.average_pooling_2d(h, 32) 41 | h = self.b3(self.fc0(h)) 42 | return h -------------------------------------------------------------------------------- /bst.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from chainer import cuda 4 | from chainer import function 5 | from chainer.utils import type_check 6 | 7 | 8 | class BST(function.Function): 9 | 10 | """Binary with Straight Thourgh estimator Unit.""" 11 | 12 | def __init__(self): 13 | pass 14 | 15 | def check_type_forward(self, in_types): 16 | type_check.expect(in_types.size() == 1) 17 | x_type, = in_types 18 | 19 | type_check.expect( 20 | x_type.dtype == numpy.float32, 21 | ) 22 | 23 | def forward_cpu(self, x): 24 | y = x[0] 25 | y = numpy.where(y>=0, 1, -1).astype(numpy.float32, copy=False) 26 | return y, 27 | 28 | def forward_gpu(self, x): 29 | y = cuda.elementwise( 30 | 'T x', 'T y', 31 | 'y = x >= 0 ? 1 : -1', 'bst_fwd')( 32 | x[0]) 33 | return y, 34 | 35 | def backward_cpu(self, x, gy): 36 | gx = gy[0].copy() 37 | zero_indices = numpy.abs(x[0]) > 1 38 | gx[zero_indices] = 0 39 | return gx, 40 | 41 | def backward_gpu(self, x, gy): 42 | gx = cuda.elementwise( 43 | 'T x, T gy', 'T gx', 44 | 'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')( 45 | x[0], gy[0]) 46 | return gx, 47 | 48 | 49 | def bst(x): 50 | """Binary with Straight Thourgh estimator Unit function. 51 | 52 | This function is expressed as 53 | 54 | .. math:: 55 | f(x) = \\left \\{ \\begin{array}{ll} 56 | 1 & {\\rm if}~ x \\ge 0 \\\\ 57 | -1 & {\\rm if}~ x < 0, 58 | \\end{array} \\right. 59 | 60 | See: http://arxiv.org/abs/1511.07289 61 | 62 | Args: 63 | x (~chainer.Variable): Input variable. 64 | 65 | Returns: 66 | ~chainer.Variable: Output variable. 67 | 68 | """ 69 | return BST()(x) 70 | -------------------------------------------------------------------------------- /link_binary_linear.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from chainer import link 4 | import function_binary_linear 5 | 6 | class BinaryLinear(link.Link): 7 | """Binary Linear layer (a.k.a. binary fully-connected layer). 8 | 9 | This is a link that wraps the :func:`~chainer.functions.linear` function, 10 | and holds a weight matrix ``W`` and optionally a bias vector ``b`` as 11 | parameters. 12 | 13 | The weight matrix ``W`` is initialized with i.i.d. Gaussian samples, each 14 | of which has zero mean and deviation :math:`\\sqrt{1/\\text{in_size}}`. The 15 | bias vector ``b`` is of size ``out_size``. Each element is initialized with 16 | the ``bias`` value. If ``nobias`` argument is set to True, then this link 17 | does not hold a bias vector. 18 | 19 | Args: 20 | in_size (int): Dimension of input vectors. 21 | out_size (int): Dimension of output vectors. 22 | wscale (float): Scaling factor of the weight matrix. 23 | bias (float): Initial bias value. 24 | nobias (bool): If True, then this function does not use the bias. 25 | initialW (2-D array): Initial weight value. If ``None``, then this 26 | function uses to initialize ``wscale``. 27 | initial_bias (1-D array): Initial bias value. If ``None``, then this 28 | function uses to initialize ``bias``. 29 | 30 | .. seealso:: :func:`~chainer.functions.linear` 31 | 32 | Attributes: 33 | W (~chainer.Variable): Weight parameter. 34 | b (~chainer.Variable): Bias parameter. 35 | 36 | """ 37 | def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False, 38 | initialW=None, initial_bias=None): 39 | super(BinaryLinear, self).__init__(W=(out_size, in_size)) 40 | if initialW is None: 41 | initialW = numpy.random.normal( 42 | 0, wscale * numpy.sqrt(1. / in_size), (out_size, in_size)) 43 | self.W.data[...] = initialW 44 | 45 | if nobias: 46 | self.b = None 47 | else: 48 | self.add_param('b', out_size) 49 | if initial_bias is None: 50 | initial_bias = bias 51 | self.b.data[...] = initial_bias 52 | 53 | def __call__(self, x): 54 | """Applies the linear layer. 55 | 56 | Args: 57 | x (~chainer.Variable): Batch of input vectors. 58 | 59 | Returns: 60 | ~chainer.Variable: Output of the linear layer. 61 | 62 | """ 63 | return function_binary_linear.binary_linear(x, self.W, self.b) 64 | -------------------------------------------------------------------------------- /function_binary_linear.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from chainer import cuda 4 | from chainer import function 5 | from chainer.utils import type_check 6 | 7 | def _kern(): 8 | return cuda.elementwise( 9 | 'T x', 'T y', 10 | 'y = x >= 0 ? 1 : -1', 11 | 'binarize') 12 | 13 | def _as_mat(x): 14 | if x.ndim == 2: 15 | return x 16 | return x.reshape(len(x), -1) 17 | 18 | class BinaryLinearFunction(function.Function): 19 | 20 | def check_type_forward(self, in_types): 21 | n_in = in_types.size() 22 | type_check.expect(2 <= n_in, n_in <= 3) 23 | x_type, w_type = in_types[:2] 24 | 25 | type_check.expect( 26 | x_type.dtype == numpy.float32, 27 | w_type.dtype == numpy.float32, 28 | x_type.ndim >= 2, 29 | w_type.ndim == 2, 30 | type_check.prod(x_type.shape[1:]) == w_type.shape[1], 31 | ) 32 | if n_in.eval() == 3: 33 | b_type = in_types[2] 34 | type_check.expect( 35 | b_type.dtype == numpy.float32, 36 | b_type.ndim == 1, 37 | b_type.shape[0] == w_type.shape[0], 38 | ) 39 | 40 | def forward_cpu(self, inputs): 41 | x = _as_mat(inputs[0]) 42 | W = inputs[1] 43 | Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False) 44 | 45 | Xb = numpy.where(x>=0,1,-1).astype(x.dtype, copy=False) 46 | 47 | y = Xb.dot(Wb.T) 48 | 49 | if len(inputs) == 3: 50 | b = inputs[2] 51 | y += b 52 | return y, 53 | 54 | def forward_gpu(self, inputs): 55 | x = _as_mat(inputs[0]) 56 | W = inputs[1] 57 | Wb = _kern()(W) 58 | 59 | Xb = _kern()(x) 60 | 61 | y = Xb.dot(Wb.T) 62 | 63 | if len(inputs) == 3: 64 | b = inputs[2] 65 | y += b 66 | return y, 67 | 68 | 69 | def backward_cpu(self, inputs, grad_outputs): 70 | x = _as_mat(inputs[0]) 71 | W = inputs[1] 72 | Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False) 73 | gy = grad_outputs[0] 74 | 75 | gx = gy.dot(Wb).reshape(inputs[0].shape) 76 | gW = gy.T.dot(x) 77 | if len(inputs) == 3: 78 | gb = gy.sum(0) 79 | return gx, gW, gb 80 | else: 81 | return gx, gW 82 | 83 | def backward_gpu(self, inputs, grad_outputs): 84 | x = _as_mat(inputs[0]) 85 | W = inputs[1] 86 | Wb = _kern()(W) 87 | gy = grad_outputs[0] 88 | 89 | gx = gy.dot(Wb).reshape(inputs[0].shape) 90 | gW = gy.T.dot(x) 91 | if len(inputs) == 3: 92 | gb = gy.sum(0) 93 | return gx, gW, gb 94 | else: 95 | return gx, gW 96 | 97 | 98 | def binary_linear(x, W, b=None): 99 | """Binary Linear function, or affine transformation. 100 | 101 | It accepts two or three arguments: an input minibatch ``x``, a weight 102 | matrix ``W``, and optionally a bias vector ``b``. It computes 103 | :math:`Y = xW^\\top + b`. 104 | 105 | Args: 106 | x (~chainer.Variable): Input variable. Its first dimension is assumed 107 | to be the *minibatch dimension*. The other dimensions are treated 108 | as concatenated one dimension whose size must be ``N``. 109 | W (~chainer.Variable): Weight variable of shape ``(M, N)``. 110 | b (~chainer.Variable): Bias variable (optional) of shape ``(M,)``.. 111 | 112 | Returns: 113 | ~chainer.Variable: Output variable. 114 | 115 | .. seealso:: :class:`~chainer.links.Linear` 116 | 117 | """ 118 | if b is None: 119 | return BinaryLinearFunction()(x, W) 120 | else: 121 | return BinaryLinearFunction()(x, W, b) 122 | -------------------------------------------------------------------------------- /template_cpp_r7_main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * C++ Templete for a Binarized CNN 3 | * 4 | * Created on: 2017/07/01 5 | * Author: H. Nakahara 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #ifdef __SDSCC__ 16 | #include "sds_lib.h" 17 | #else 18 | #define sds_alloc(x)(malloc(x)) 19 | #define sds_free(x)(free(x)) 20 | #endif 21 | 22 | void BinCNN( 23 | #ifdef __SDSCC__ 24 | int *t_bin_convW, 25 | int *t_BNFb, 26 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 27 | int fc_result[(OUT_DENSE_SIZ)], 28 | int init 29 | #else 30 | int t_bin_convW[(WEIGHT_SIZ)], 31 | int t_BNFb[(BIAS_SIZ)], 32 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 33 | int fc_result[(OUT_DENSE_SIZ)], 34 | int init 35 | #endif 36 | ); 37 | 38 | //-------------------------------------------------------------------- 39 | // Main Function 40 | //-------------------------------------------------------------------- 41 | int main( int argc, char *argv[]) 42 | { 43 | ap_int<64> *t_tmp_img; 44 | t_tmp_img = (ap_int<64> *)sds_alloc(((IMGSIZ)*(IMGSIZ))*sizeof(ap_int<64>)); 45 | 46 | int fc_result[(OUT_DENSE_SIZ)]; 47 | int rgb, y, x, i, offset; 48 | 49 | // copy input image to f1 50 | for( y = 0; y < (IMGSIZ); y++){ 51 | for( x = 0; x < (IMGSIZ); x++){ 52 | t_tmp_img[y*(IMGSIZ)+x] = 0; 53 | } 54 | } 55 | 56 | // ------------------------------------------------------------------ 57 | printf("load weights\n"); 58 | int *t_bin_convW; 59 | int *t_BNFb; 60 | t_bin_convW = (int *)sds_alloc(((WEIGHT_SIZ))*sizeof(int)); 61 | t_BNFb = (int *)sds_alloc(((BIAS_SIZ))*sizeof(int)); 62 | 63 | int of, inf, d_value; 64 | FILE *fp; 65 | char line[256]; 66 | 67 | (READ_BIAS_MEM) 68 | 69 | (READ_WEIGHT_MEM) 70 | 71 | printf("setup... \n"); 72 | BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 1); 73 | 74 | char image_name[256]; 75 | int cnt; 76 | 77 | #ifdef __SDSCC__ 78 | sscanf( argv[1], "%s", image_name); // 1st argument: test image (text file) 79 | sscanf( argv[2], "%d", &cnt); // 2nd argument: # of inferences 80 | #else 81 | sprintf( image_name, "test_img.txt"); 82 | cnt = 1; 83 | #endif 84 | 85 | 86 | int pixel; 87 | printf("LOAD TESTBENCH %s ... ", image_name); 88 | if( (fp = fopen(image_name, "r")) == NULL)fprintf(stderr,"CANNOT OPEN\n"); 89 | for( y = 0; y < (IMGSIZ); y++){ 90 | for( x = 0; x < (IMGSIZ); x++){ 91 | ap_int<64>tmp = 0; 92 | for( rgb = (NUMIMG) - 1; rgb >= 0 ; rgb--){ 93 | if( fgets( line, 256, fp) == NULL) 94 | fprintf(stderr,"EMPTY FILE READ\n"); 95 | sscanf( line, "%d", &d_value); 96 | 97 | tmp = tmp << 20; 98 | 99 | pixel = d_value; 100 | tmp |= ( pixel & 0xFFFFF); 101 | } 102 | t_tmp_img[ y * (IMGSIZ) + x] = tmp; 103 | } 104 | } 105 | printf("OK\n"); 106 | fclose(fp); 107 | 108 | printf("Inference %d times ... ", cnt); 109 | for( i = 0; i < cnt; i++){ 110 | BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 0); 111 | } 112 | printf("OK\n"); 113 | 114 | printf("Result\n"); 115 | for( i = 0; i < (OUT_DENSE_SIZ); i++)printf("%5d ", fc_result[i]); 116 | printf("\n"); 117 | 118 | sds_free( t_tmp_img); sds_free( t_bin_convW); sds_free( t_BNFb); 119 | 120 | return 0; 121 | } 122 | 123 | // ------------------------------------------------------------------ 124 | // END OF PROGRAM 125 | // ------------------------------------------------------------------ 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GUINNESS: A GUI based binarized Neural NEtwork SyntheSizer toward an FPGA (Trial version) 2 | 3 | This GUI based framework includes both a training on a GPU, and a bitstream generation for an FPGA using the Xilinx Inc. SDSoC. This tool uses the Chainer deep learning framework to train a binarized CNN. Also, it uses optimization techniques for an FPGA implementation. Details are shown in following papers: 4 | 5 | [Nakahara IPDPSW2017] H. Yonekawa and H. Nakahara, "On-Chip Memory Based Binarized Convolutional Deep Neural Network Applying Batch Normalization Free Technique on an FPGA," IPDPS Workshops, 2017, pp. 98-105. 6 | 7 | [Nakahara FPL2017] H. Nakahara et al., "A Fully Connected Layer Elimination for a Binarized Convolutional Neural Network on an FPGA", FPL, 2017, pp. 1-4. 8 | 9 | [Nakahara FPL2017 Demo] H. Nakahara et al., "A demonstration of the GUINNESS: A GUI based neural NEtwork SyntheSizer for an FPGA", FPL, 2017, page 1. 10 | 11 | ### 1. Requirements: 12 | 13 | Ubuntu 16.04 LTS (14.04 LTS is also supported) 14 | 15 | Python 3.5.1 16 | (Note that, my recommendation is to install by Anaconda 4.1.0 (64bit)+Pyenv, 17 | for Japanese Only, I prepared the Python 3.5 by following http://blog.algolab.jp/post/2016/08/21/pyenv-anaconda-ubuntu/) 18 | 19 | CUDA 8.0 (+GPU), CuDNN 6.0 20 | (Also, you must sign up the NVidia developer account) 21 | 22 | Chainer 1.24.0 + CuPy 2.0 23 | 24 | Xilinx Inc. SDSoC 2017.4 25 | 26 | FPGA board: Xilinx ZC702, ZC706, ZCU102, Digilent Zedboard, Zybo 27 | (Soon, I will support Intel's FPGAs!, and the PYNQ board) 28 | 29 | PyQt4, matplotlib, OpenCV3, numpy, scipy, 30 | (Above libraries are installed by the Anaconda, however, you must individually install the OpenCV by "conda install -y -c menpo opencv3") 31 | 32 | ### 2. Setup Libraries 33 | 34 | Install the following python libraries: 35 | 36 | Chainer 37 | 38 | sudo pip install chainer==1.24.0 39 | 40 | PyQt4 (not PyQt5!), it is already installed by the Anaconda 41 | 42 | sudo apt-get install python-qt4 pyqt4-dev-tools 43 | 44 | OpenCV3 45 | 46 | conda install -y -c menpo opencv3 47 | 48 | ### 3. Run GUINNESS 49 | 50 | $ python guinness.py 51 | 52 | ### 4. Tutorial 53 | 54 | Read a following document (25/Oct./2017 Updated!!) 55 | 56 | 1 The GUINNESS introduction and BCNN implementation on an FPGA 57 | guinness_tutorial1_v2.pdf 58 | 59 | 2 The GUINNESS for the Intel FPGAs (Soon, will be uploaded) 60 | 61 | 3 Pedestrian detection (Under preparing) 62 | 63 | 4 Make a custom IP core for your own FPGA board (Under preparing) 64 | 65 | ### 5. On-going works 66 | This is a just trial version. I have already developed the extend version including following ones. 67 | 68 | Supporing the Intel's FPGA (DE5-net, DE10-nano, and DE5a-net boards with the Intel SDK for OpenCL) 69 | 70 | High performance image recognition (fully pipelined and SIMD CNNs) 71 | 72 | Object detector on a low-cost FPGA (e.g., pedestrian detection) 73 | 74 | FPGA YOLOv2 (ZCU102 board) 75 | 76 | [![FPGA YOLOv2 ON YOUTUBE](http://img.youtube.com/vi/_iMboyu8iWc/0.jpg)](https://www.youtube.com/watch?v=_iMboyu8iWc&t=5s) 77 | 78 | Pedestrian Detector (Zedboard) 79 | 80 | [![Pedestrian Detector ON YOUTUBE](http://img.youtube.com/vi/X82PVBuAuuo/0.jpg)](https://www.youtube.com/watch?v=X82PVBuAuuo&list=FLIIfj2LoI2TVWF5wQkZHiHg) 81 | 82 | 83 | If you are interesting the extended one, please, contact me. 84 | 85 | ### 6. Acknowledgements 86 | This work is based on following projects: 87 | 88 | Chainer binarized neural network by Daisuke Okanohara 89 | https://github.com/hillbig/binary_net 90 | 91 | Various CNN models including Deep Residual Networks (ResNet) 92 | for CIFAR10 with Chainer by mitmul 93 | https://github.com/mitmul/chainer-cifar10 94 | 95 | This research is supported in part by the Grants in Aid for Scientistic Research of JSPS, 96 | and an Accelerated Innovation Research Initiative Turning Top Science and Ideas into High-Impact 97 | Values program(ACCEL) of JST. Also, thanks to the Xilinx University Program (XUP), Intel University Program, 98 | and the NVidia Corp.'s support. 99 | -------------------------------------------------------------------------------- /link_binary_conv2d.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import function_binary_conv2d 4 | from chainer import initializers 5 | from chainer import link 6 | 7 | import numpy 8 | 9 | 10 | class Convolution2D(link.Link): 11 | 12 | """Two-dimensional convolutional layer. 13 | 14 | This link wraps the :func:`~chainer.functions.convolution_2d` function and 15 | holds the filter weight and bias vector as parameters. 16 | 17 | Args: 18 | in_channels (int): Number of channels of input arrays. If None, 19 | parameter initialization will be deferred until the first forward 20 | data pass at which time the size will be determined. 21 | out_channels (int): Number of channels of output arrays. 22 | ksize (int or pair of ints): Size of filters (a.k.a. kernels). 23 | ``ksize=k`` and ``ksize=(k, k)`` are equivalent. 24 | stride (int or pair of ints): Stride of filter applications. 25 | ``stride=s`` and ``stride=(s, s)`` are equivalent. 26 | pad (int or pair of ints): Spatial padding width for input arrays. 27 | ``pad=p`` and ``pad=(p, p)`` are equivalent. 28 | wscale (float): Scaling factor of the initial weight. 29 | bias (float): Initial bias value. 30 | nobias (bool): If ``True``, then this link does not use the bias term. 31 | use_cudnn (bool): If ``True``, then this link uses cuDNN if available. 32 | initialW (4-D array): Initial weight value. If ``None``, then this 33 | function uses to initialize ``wscale``. 34 | May also be a callable that takes ``numpy.ndarray`` or 35 | ``cupy.ndarray`` and edits its value. 36 | initial_bias (1-D array): Initial bias value. If ``None``, then this 37 | function uses to initialize ``bias``. 38 | May also be a callable that takes ``numpy.ndarray`` or 39 | ``cupy.ndarray`` and edits its value. 40 | 41 | .. seealso:: 42 | See :func:`chainer.functions.convolution_2d` for the definition of 43 | two-dimensional convolution. 44 | 45 | Attributes: 46 | W (~chainer.Variable): Weight parameter. 47 | b (~chainer.Variable): Bias parameter. 48 | 49 | """ 50 | 51 | def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, 52 | wscale=1, bias=0, nobias=False, use_cudnn=True, 53 | initialW=None, initial_bias=None): 54 | super(Convolution2D, self).__init__() 55 | self.ksize = ksize 56 | self.stride = _pair(stride) 57 | self.pad = _pair(pad) 58 | self.use_cudnn = use_cudnn 59 | self.out_channels = out_channels 60 | self.initialW = initialW 61 | self.wscale = wscale 62 | 63 | if in_channels is None: 64 | self.add_uninitialized_param('W') 65 | else: 66 | self._initialize_params(in_channels) 67 | 68 | kh, kw = _pair(self.ksize) 69 | W_shape = (self.out_channels, in_channels, kh, kw) 70 | #self.add_param('W', W_shape) 71 | # For backward compatibility, the scale of weights is proportional to 72 | # the square root of wscale. 73 | initializers.init_weight(self.W.data, self.initialW, 74 | scale=math.sqrt(self.wscale)) 75 | 76 | if nobias: 77 | self.b = None 78 | else: 79 | self.add_param('b', out_channels) 80 | if initial_bias is None: 81 | initial_bias = bias 82 | initializers.init_weight(self.b.data, initial_bias) 83 | 84 | def _initialize_params(self, in_channels): 85 | kh, kw = _pair(self.ksize) 86 | W_shape = (self.out_channels, in_channels, kh, kw) 87 | self.add_param('W', W_shape) 88 | # For backward compatibility, the scale of weights is proportional to 89 | # the square root of wscale. 90 | initializers.init_weight(self.W.data, self.initialW, 91 | scale=math.sqrt(self.wscale)) 92 | 93 | def __call__(self, x): 94 | """Applies the convolution layer. 95 | 96 | Args: 97 | x (~chainer.Variable): Input image. 98 | 99 | Returns: 100 | ~chainer.Variable: Output of the convolution. 101 | 102 | """ 103 | return function_binary_conv2d.func_convolution_2d(x, self.W, self.b, self.stride, self.pad, self.use_cudnn) 104 | 105 | 106 | def _pair(x): 107 | if hasattr(x, '__getitem__'): 108 | return x 109 | return x, x 110 | -------------------------------------------------------------------------------- /link_integer_conv2d.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | #from chainer.functions.connection import convolution_2d 4 | import function_integer_conv2d 5 | from chainer import initializers 6 | from chainer import link 7 | 8 | import numpy 9 | 10 | 11 | class Convolution2D(link.Link): 12 | 13 | """Two-dimensional convolutional layer. 14 | 15 | This link wraps the :func:`~chainer.functions.convolution_2d` function and 16 | holds the filter weight and bias vector as parameters. 17 | 18 | Args: 19 | in_channels (int): Number of channels of input arrays. If None, 20 | parameter initialization will be deferred until the first forward 21 | data pass at which time the size will be determined. 22 | out_channels (int): Number of channels of output arrays. 23 | ksize (int or pair of ints): Size of filters (a.k.a. kernels). 24 | ``ksize=k`` and ``ksize=(k, k)`` are equivalent. 25 | stride (int or pair of ints): Stride of filter applications. 26 | ``stride=s`` and ``stride=(s, s)`` are equivalent. 27 | pad (int or pair of ints): Spatial padding width for input arrays. 28 | ``pad=p`` and ``pad=(p, p)`` are equivalent. 29 | wscale (float): Scaling factor of the initial weight. 30 | bias (float): Initial bias value. 31 | nobias (bool): If ``True``, then this link does not use the bias term. 32 | use_cudnn (bool): If ``True``, then this link uses cuDNN if available. 33 | initialW (4-D array): Initial weight value. If ``None``, then this 34 | function uses to initialize ``wscale``. 35 | May also be a callable that takes ``numpy.ndarray`` or 36 | ``cupy.ndarray`` and edits its value. 37 | initial_bias (1-D array): Initial bias value. If ``None``, then this 38 | function uses to initialize ``bias``. 39 | May also be a callable that takes ``numpy.ndarray`` or 40 | ``cupy.ndarray`` and edits its value. 41 | 42 | .. seealso:: 43 | See :func:`chainer.functions.convolution_2d` for the definition of 44 | two-dimensional convolution. 45 | 46 | Attributes: 47 | W (~chainer.Variable): Weight parameter. 48 | b (~chainer.Variable): Bias parameter. 49 | 50 | """ 51 | 52 | def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0, 53 | wscale=1, bias=0, nobias=False, use_cudnn=True, 54 | initialW=None, initial_bias=None): 55 | super(Convolution2D, self).__init__() 56 | self.ksize = ksize 57 | self.stride = _pair(stride) 58 | self.pad = _pair(pad) 59 | self.use_cudnn = use_cudnn 60 | self.out_channels = out_channels 61 | self.initialW = initialW 62 | self.wscale = wscale 63 | 64 | if in_channels is None: 65 | self.add_uninitialized_param('W') 66 | else: 67 | self._initialize_params(in_channels) 68 | 69 | kh, kw = _pair(self.ksize) 70 | W_shape = (self.out_channels, in_channels, kh, kw) 71 | #self.add_param('W', W_shape) 72 | # For backward compatibility, the scale of weights is proportional to 73 | # the square root of wscale. 74 | initializers.init_weight(self.W.data, self.initialW, 75 | scale=math.sqrt(self.wscale)) 76 | 77 | if nobias: 78 | self.b = None 79 | else: 80 | self.add_param('b', out_channels) 81 | if initial_bias is None: 82 | initial_bias = bias 83 | initializers.init_weight(self.b.data, initial_bias) 84 | 85 | def _initialize_params(self, in_channels): 86 | kh, kw = _pair(self.ksize) 87 | W_shape = (self.out_channels, in_channels, kh, kw) 88 | self.add_param('W', W_shape) 89 | # For backward compatibility, the scale of weights is proportional to 90 | # the square root of wscale. 91 | initializers.init_weight(self.W.data, self.initialW, 92 | scale=math.sqrt(self.wscale)) 93 | 94 | def __call__(self, x): 95 | """Applies the convolution layer. 96 | 97 | Args: 98 | x (~chainer.Variable): Input image. 99 | 100 | Returns: 101 | ~chainer.Variable: Output of the convolution. 102 | 103 | """ 104 | return function_integer_conv2d.func_convolution_2d(x, self.W, self.b, self.stride, self.pad, self.use_cudnn) 105 | 106 | 107 | def _pair(x): 108 | if hasattr(x, '__getitem__'): 109 | return x 110 | return x, x 111 | -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import six 3 | from scipy.misc import imresize, imrotate 4 | 5 | from chainer import functions as F 6 | from chainer import cuda 7 | from chainer import Variable 8 | 9 | class CifarTrainer(object): 10 | def __init__(self, net, optimizer, epoch_num=100, batch_size=100, device_id=-1): 11 | self.net = net 12 | self.optimizer = optimizer 13 | self.epoch_num = epoch_num 14 | self.batch_size = batch_size 15 | self.device_id = device_id 16 | if device_id >= 0: 17 | self.xp = cuda.cupy 18 | self.net.to_gpu(device_id) 19 | else: 20 | self.xp = np 21 | 22 | def fit(self, x, y, valid_x, valid_y, img_siz, img_dim, test_x=None, test_y=None, callback=None): 23 | if self.device_id >= 0: 24 | with cuda.cupy.cuda.Device(self.device_id): 25 | return self.__fit(x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback) 26 | else: 27 | return self.__fit(x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback) 28 | 29 | def __fit(self, x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback): 30 | batch_size = self.batch_size 31 | for epoch in six.moves.range(self.epoch_num): 32 | perm = np.random.permutation(len(x)) 33 | train_loss = 0 34 | train_acc = 0 35 | for i in six.moves.range(0, len(x), self.batch_size): 36 | self.net.zerograds() 37 | batch_index = perm[i:i + batch_size] 38 | x_batch = self.__trans_image(x[batch_index], img_siz, img_dim) 39 | 40 | # print(type(x_batch[0,0,0,0])) 41 | # print(batch_index) 42 | # print(type(y[batch_index][0])) 43 | 44 | loss, acc = self.__forward(x_batch, y[batch_index]) 45 | loss.backward() 46 | self.optimizer.update() 47 | train_loss += float(loss.data) * len(x_batch) 48 | train_acc += float(acc.data) * len(x_batch) 49 | train_loss /= len(x) 50 | train_acc /= len(x) 51 | valid_loss = 0 52 | valid_acc = 0 53 | if valid_x is not None and valid_y is not None: 54 | for i in six.moves.range(0, len(valid_x), self.batch_size): 55 | x_batch = valid_x[i:i + batch_size] 56 | loss, acc = self.__forward(x_batch, valid_y[i:i + batch_size], train=False) 57 | valid_loss += float(loss.data) * len(x_batch) 58 | valid_acc += float(acc.data) * len(x_batch) 59 | valid_loss /= len(valid_x) 60 | valid_acc /= len(valid_x) 61 | test_loss = 0 62 | test_acc = 0 63 | if test_x is not None and test_y is not None: 64 | for i in six.moves.range(0, len(test_x), self.batch_size): 65 | x_batch = test_x[i:i + batch_size] 66 | loss, acc = self.__forward(x_batch, test_y[i:i + batch_size], train=False) 67 | test_loss += float(loss.data) * len(x_batch) 68 | test_acc += float(acc.data) * len(x_batch) 69 | test_loss /= len(test_x) 70 | test_acc /= len(test_x) 71 | if callback is not None: 72 | callback(epoch, self.net, self.optimizer, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc) 73 | 74 | def __forward(self, batch_x, batch_t, train=True): 75 | xp = self.xp 76 | x = Variable(xp.asarray(batch_x), volatile=not train) 77 | t = Variable(xp.asarray(batch_t), volatile=not train) 78 | y = self.net(x, train=train) 79 | # print(type(y.data)) 80 | # print(type(t.data)) 81 | loss = F.softmax_cross_entropy(y, t) 82 | acc = F.accuracy(y, t) 83 | return loss, acc 84 | 85 | def __trans_image(self, x, img_siz, img_dim): 86 | size = img_siz 87 | n = x.shape[0] 88 | images = np.zeros((n, img_dim, size, size), dtype=np.float32) 89 | offset = np.random.randint(-4, 5, size=(n, 2)) 90 | mirror = np.random.randint(2, size=n) 91 | for i in six.moves.range(n): 92 | image = x[i] 93 | top, left = offset[i] 94 | left = max(0, left) 95 | top = max(0, top) 96 | right = min(size, left + size) 97 | bottom = min(size, left + size) 98 | if mirror[i] > 0: 99 | images[i,:,size-bottom:size-top,size-right:size-left] = image[:,top:bottom, left:right][:,:,::-1] 100 | else: 101 | images[i,:,size-bottom:size-top,size-right:size-left] = image[:,top:bottom,left:right] 102 | return images 103 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # eval.py 3 | # Verification in Python code 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # Acknowledgements: 11 | # This source code is based on following projects: 12 | # 13 | # Chainer binarized neural network by Daisuke Okanohara 14 | # https://github.com/hillbig/binary_net 15 | # Various CNN models including Deep Residual Networks (ResNet) 16 | # for CIFAR10 with Chainer by mitmul 17 | # https://github.com/mitmul/chainer-cifar10 18 | # ----------------------------------------------------------------------- 19 | 20 | import argparse 21 | #import cPickle as pickle # python 2.7 22 | import _pickle as pickle # python 3.5 23 | import numpy as np 24 | import os 25 | import chainer 26 | from chainer import optimizers, Variable 27 | from chainer import serializers 28 | import net3 # it will be generated by the GUINNESS 29 | 30 | import trainer 31 | import chainer.links as L 32 | 33 | import time 34 | import weight_clip 35 | 36 | import cv2 37 | 38 | if __name__ == '__main__': 39 | parser = argparse.ArgumentParser(description='Evaluation Python Code') 40 | parser.add_argument('--gpu', '-g', type=int, default=-1, 41 | help='GPU device ID (negative value indicates CPU)') 42 | parser.add_argument('--model', '-m', type=str, default='hoge.model', 43 | help='Pre-Trained Model Name') 44 | parser.add_argument('--dataset', '-d', type=str, default='two96_dataset.pkl', 45 | help='Dataset image pkl file path') 46 | parser.add_argument('--size', type=int, default=32, 47 | help='Test Image Size') 48 | args = parser.parse_args() 49 | 50 | print('loading dataset...') 51 | fname = args.dataset + '_dataset.pkl' 52 | with open(fname, 'rb') as f: 53 | images = pickle.load(f) 54 | threshold = np.int32(len(images['train'])/10*9) 55 | train_x = images['train'][:threshold].astype(np.float32) 56 | valid_x = images['train'][threshold:].astype(np.float32) 57 | test_x = images['test'].astype(np.float32) 58 | 59 | fname = args.dataset + '_label.pkl' 60 | with open(fname, 'rb') as f: 61 | labels = pickle.load(f) 62 | train_y = labels['train'][:threshold].astype(np.int32) 63 | valid_y = labels['train'][threshold:].astype(np.int32) 64 | test_y = labels['test'].astype(np.int32) 65 | 66 | print('start evaluation') 67 | 68 | net = net3.CNN() 69 | print("load pre-trained npz") 70 | serializers.load_npz(args.model, net) 71 | 72 | # set image size 73 | img_siz = args.size 74 | 75 | eval_x = np.ones((1,3,img_siz,img_siz)) 76 | 77 | # load tag file 78 | name = [] 79 | fname = args.dataset + '_tag.txt' # tag file be generated by 'gen_training_data.py' 80 | with open(fname, 'r') as f: 81 | lines2 = f.readlines() 82 | for line in lines2: 83 | name.append(line.rstrip('\n\r')) 84 | 85 | n_class = len(name) 86 | 87 | conf_matrix = np.zeros((n_class,n_class)) 88 | 89 | # specify the number of tests 90 | n_tests = 10 91 | n_acc = 0 92 | 93 | # perform test 94 | for idx in range(0,n_tests): 95 | image = test_x 96 | image = image.clip(0,255).astype(np.uint8) 97 | 98 | print("label=%d(%s)" % (test_y[idx],name[test_y[idx]])) 99 | 100 | # Note that, the test image is generated by the OpenCV2.0, thus, its format consists of 'BGR' not 'RGB' 101 | image1 = image[idx].reshape(3, img_siz, img_siz).transpose(1, 2, 0) 102 | 103 | # generate test bench 104 | # you can comment out following to generate more test bech for C/C++ simulation in the Vivado HLS, and an FPGA board 105 | ''' 106 | bench_img = image1.reshape(-1,) 107 | fname = 'test_img_%d.txt' % idx # + str(idx) + '.txt' 108 | print(' Test Image Fileout -> %s' % fname) 109 | np.savetxt(fname, bench_img, fmt="%.0f", delimiter=",") 110 | ''' 111 | 112 | eval_x[0,:,:,:] = test_x[idx] #/ 256.0 113 | 114 | result = net(Variable(eval_x.astype(np.float32)),train=False) 115 | print(result.data) 116 | print("test=%d(%s)" % (result.data.argmax(),name[result.data.argmax()])) 117 | 118 | # show test image 119 | cv2.imshow("test image", image1) 120 | cv2.waitKey(0) 121 | cv2.destroyAllWindows() 122 | 123 | # regist a confusion matrix 124 | conf_matrix[test_y[idx],result.data.argmax()] = conf_matrix[test_y[idx],result.data.argmax()] + 1 125 | 126 | if test_y[idx] == result.data.argmax(): 127 | n_acc = n_acc + 1 128 | 129 | # show a confusion matrix 130 | print("Confusion Matrix") 131 | print(conf_matrix.astype(np.int32)) 132 | print("# corrests=%d" % n_acc) 133 | print("Accuracy=%f" % (float(n_acc) / n_tests)) 134 | 135 | # ----------------------------------------------------------------------- 136 | # END OF PROGRAM 137 | # ----------------------------------------------------------------------- 138 | -------------------------------------------------------------------------------- /conv_npz2txt_v2.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # conv_npz2txt_v2.py: 3 | # Convert to a binarized weight and an integer bias 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # ----------------------------------------------------------------------- 11 | 12 | import pickle 13 | from chainer import serializers 14 | import numpy as np 15 | import argparse 16 | import sys 17 | 18 | parser = argparse.ArgumentParser(description='Weight converter') 19 | parser.add_argument('--config_path', '-c', type=str, default='./hoge', 20 | help='Configuration pickle file path') 21 | args = parser.parse_args() 22 | 23 | # load configuration from guiness GUI 24 | config_file = args.config_path + "/config.pickle" 25 | with open(config_file, mode='rb') as f: 26 | config = pickle.load(f) 27 | 28 | initial_options = config['initial_options'] 29 | n_in_fmaps = config['n_in_fmaps'] 30 | n_ou_fmaps = config['n_ou_fmaps'] 31 | infmap_siz = config['infmap_siz'] 32 | ksiz = config['ksiz'] 33 | imgsiz = config['imgsiz'] 34 | max_dense_siz = config['max_dense_siz'] 35 | out_dense_siz = config['out_dense_siz'] 36 | bias_siz = config['bias_siz'] 37 | weight_siz = config['weight_siz'] 38 | max_bconv_width = config['max_bconv_width'] 39 | num_layer = config['num_layer'] 40 | 41 | model_file = args.config_path + "/temp.model" 42 | dat = np.load(model_file) 43 | 44 | # convert .model to weights 45 | dense_idx = 0 46 | conv_idx = 0 47 | bn_idx = 0 48 | 49 | for layer in range(num_layer): 50 | # weights for convolutional layer 51 | if initial_options[layer] == 0 or initial_options[layer] == 1: 52 | key = 'conv%d/W' % conv_idx 53 | print("converting %s" % key) 54 | 55 | bincoef = np.where(dat[key]>=0,1,0).astype(dat[key].dtype, copy=False) 56 | 57 | bincoef2 = bincoef.reshape(-1,) 58 | 59 | # Text File Out 60 | fname = args.config_path + '/sdsoc/to_sd_card/conv%dW.txt' % conv_idx 61 | 62 | print(' Fileout (.txt) -> %s' % fname) 63 | np.savetxt(fname, bincoef2,fmt="%.0f",delimiter=",") 64 | 65 | # Header file out 66 | fname = args.config_path + '/HLS/conv%dW.csv' % conv_idx 67 | np.savetxt(fname, bincoef2[None,:],delimiter=",",fmt="%.0f") 68 | 69 | f = open(fname) 70 | line = f.read() 71 | f.close() 72 | 73 | header = 'ap_uint<1> t_bin_conv%dW[%d]={' % (conv_idx,len(bincoef2)) + line + '};' 74 | 75 | fname = args.config_path + '/HLS/t_bin_conv%dW.h' % conv_idx 76 | print(' Fileout (HLS) -> %s' % fname) 77 | f = open(fname, 'w') 78 | f.write(header) 79 | f.close() 80 | 81 | # Update Index 82 | conv_idx += 1 83 | 84 | 85 | # weights for FC layer 86 | if initial_options[layer] == 4: 87 | key = 'fc%d/W' % dense_idx 88 | print("converting %s" % key) 89 | bincoef = np.where(dat[key]>=0,1,0).astype(dat[key].dtype, copy=False) 90 | 91 | bincoef2 = bincoef.reshape(-1,) 92 | 93 | #File out Textfile for SDSoC 94 | fname = args.config_path + '/sdsoc/to_sd_card/fc%dW.txt' % dense_idx 95 | 96 | print(' Fileout -> %s' % fname) 97 | np.savetxt(fname, bincoef2,fmt="%.0f",delimiter=",") 98 | 99 | # Fileout headerfile for HLS 100 | fname = args.config_path + '/HLS/fc%dW.csv' % dense_idx 101 | np.savetxt(fname, bincoef2[None,:],delimiter=",",fmt="%.0f") 102 | 103 | f = open(fname) 104 | line = f.read() 105 | f.close() 106 | 107 | header = 'ap_uint<1> t_bin_fc%dW[%d]={' % (dense_idx,len(bincoef2)) + line + '};' 108 | 109 | fname = args.config_path + '/HLS/t_bin_fc%dW.h' % dense_idx 110 | print(' Fileout (HLS) -> %s' % fname) 111 | f = open(fname, 'w') 112 | f.write(header) 113 | f.close() 114 | 115 | # Update Index 116 | dense_idx += 1 117 | 118 | # bias 119 | if initial_options[layer] == 0 or initial_options[layer] == 1 or initial_options[layer] == 4: 120 | key = 'b%d' % bn_idx 121 | print("converting %s" % key) 122 | var = dat[key+'/avg_var'] 123 | beta = dat[key+'/beta'] 124 | gamma = dat[key+'/gamma'] 125 | mean = dat[key+'/avg_mean'] 126 | bn_val = np.floor((np.sqrt(var) * beta) / gamma - mean) 127 | 128 | txt_val = '' 129 | head_val = '' 130 | for ofeat in range(int(n_ou_fmaps[layer])): 131 | txt_val += "%d\n" % int(round(bn_val[ofeat],0)) 132 | if ofeat != 0: 133 | head_val += ',' 134 | head_val += "%d" % int(round(bn_val[ofeat],0)) 135 | 136 | # Fileout Textfile for SDSoC 137 | fname = args.config_path + '/sdsoc/to_sd_card/b%d_BNFb.txt' % bn_idx 138 | 139 | print(' Fileout -> %s' % fname) 140 | with open(fname,'w') as f: 141 | f.write(txt_val) 142 | 143 | # Fileout headerfile for HLS 144 | fname = args.config_path + '/HLS/b%d_BNFb.h' % bn_idx 145 | 146 | if bn_idx == 0: 147 | header = 'ap_int<20> b%d_BNFb[%d] ={' % (bn_idx,int(n_ou_fmaps[layer])) + head_val + '};' 148 | else: 149 | header = 'ap_int<16> b%d_BNFb[%d] ={' % (bn_idx,int(n_ou_fmaps[layer])) + head_val + '};' 150 | 151 | print(' Fileout -> %s' % fname) 152 | with open(fname,'w') as f: 153 | f.write(header) 154 | 155 | # Update Index 156 | bn_idx += 1 157 | 158 | # ----------------------------------------------------------------------- 159 | # END OF PROGRAM 160 | # ----------------------------------------------------------------------- 161 | -------------------------------------------------------------------------------- /template_cpp_r7_socket_main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * C++ Templete for a Binarized CNN 3 | * 4 | * Created on: 2017/07/01 5 | * Author: H. Nakahara 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | using namespace std; 21 | 22 | #include 23 | 24 | #ifdef __SDSCC__ 25 | #include "sds_lib.h" 26 | #else 27 | #define sds_alloc(x)(malloc(x)) 28 | #define sds_free(x)(free(x)) 29 | #endif 30 | 31 | void BinCNN( 32 | #ifdef __SDSCC__ 33 | int *t_bin_convW, 34 | int *t_BNFb, 35 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 36 | int fc_result[(OUT_DENSE_SIZ)], 37 | int init 38 | #else 39 | int t_bin_convW[(WEIGHT_SIZ)], 40 | int t_BNFb[(BIAS_SIZ)], 41 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 42 | int fc_result[(OUT_DENSE_SIZ)], 43 | int init 44 | #endif 45 | ); 46 | 47 | //-------------------------------------------------------------------- 48 | // Main Function 49 | //-------------------------------------------------------------------- 50 | int main( int argc, char *argv[]) 51 | { 52 | ap_int<64> *t_tmp_img; 53 | t_tmp_img = (ap_int<64> *)sds_alloc(((IMGSIZ)*(IMGSIZ))*sizeof(ap_int<64>)); 54 | 55 | int fc_result[(OUT_DENSE_SIZ)]; 56 | int rgb, y, x, i, offset; 57 | 58 | // copy input image to f1 59 | for( y = 0; y < (IMGSIZ); y++){ 60 | for( x = 0; x < (IMGSIZ); x++){ 61 | t_tmp_img[y*(IMGSIZ)+x] = 0; 62 | } 63 | } 64 | 65 | // ------------------------------------------------------------------ 66 | printf("load weights\n"); 67 | int *t_bin_convW; 68 | int *t_BNFb; 69 | t_bin_convW = (int *)sds_alloc(((WEIGHT_SIZ))*sizeof(int)); 70 | t_BNFb = (int *)sds_alloc(((BIAS_SIZ))*sizeof(int)); 71 | 72 | int of, inf, d_value; 73 | FILE *fp; 74 | char line[256]; 75 | 76 | (READ_BIAS_MEM) 77 | 78 | (READ_WEIGHT_MEM) 79 | 80 | printf("setup... \n"); 81 | BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 1); 82 | 83 | // setup socket connection ----------------------------------------- 84 | struct sockaddr_in addr; 85 | int sock; 86 | //char buf[32]; 87 | char buf[20000]; // more than 64x64x3(RGB) bytes 88 | int data; 89 | 90 | char ipadr[512]; 91 | int portnum; 92 | 93 | if( argc != 3){ 94 | printf("USAGE: #./(program).elf [IPADR] [PORTNUM]\n"); 95 | exit(-1); 96 | } 97 | 98 | sscanf( argv[1], "%s", ipadr); 99 | sscanf( argv[2], "%d", &portnum); 100 | 101 | printf("[INFO] IPADR=%s PORT=%d\n", ipadr, portnum); 102 | 103 | /* make a socket */ 104 | sock = socket(AF_INET, SOCK_STREAM, 0); 105 | /* set parameters */ 106 | addr.sin_family = AF_INET; 107 | addr.sin_port = htons(portnum); //10050 108 | addr.sin_addr.s_addr = inet_addr(ipadr); //"192.168.2.100" 109 | 110 | /* connect a server (host PC) */ 111 | connect(sock, (struct sockaddr*)&addr, sizeof(addr)); 112 | 113 | // main loop ------------------------------------------------------- 114 | while(1){ 115 | // receive data 116 | // printf("Receive data\n"); 117 | memset(buf, 0, sizeof(buf)); 118 | data = read(sock, buf, sizeof(buf)); 119 | 120 | // set pixel 121 | // printf("Set Pixel"); 122 | for( y = 0; y < (IMGSIZ); y++){ 123 | for( x = 0; x < (IMGSIZ); x++){ 124 | ap_int<64>tmp = 0; 125 | for( rgb = 0; rgb < (NUMIMG); rgb++){ 126 | tmp = tmp << 20; 127 | 128 | tmp |= ( buf[y * (IMGSIZ) * 3 + x * 3 + rgb] & 0xFFFFF); 129 | } 130 | t_tmp_img[ y * (IMGSIZ) + x] = tmp; 131 | } 132 | } 133 | // printf("OK\n"); 134 | 135 | // printf("Inference...\n"); 136 | BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 0); 137 | // printf("OK\n"); 138 | 139 | // printf("Result\n"); 140 | // for( i = 0; i < (OUT_DENSE_SIZ); i++)printf("%5d ", fc_result[i]); 141 | // printf("\n"); 142 | 143 | // send data to server 144 | double softmax[(OUT_DENSE_SIZ)]; 145 | double total_softmax = 0.0; 146 | double max_val = -9999.0; 147 | 148 | for( i = 0; i < (OUT_DENSE_SIZ); i++){ 149 | if( (double)fc_result[i] > max_val) 150 | max_val = fc_result[i]; 151 | } 152 | 153 | for( i = 0; i < (OUT_DENSE_SIZ); i++){ 154 | total_softmax += exp( (double)(fc_result[i]) / max_val); 155 | } 156 | 157 | for( i = 0; i < (OUT_DENSE_SIZ); i++){ 158 | softmax[i] = (double)exp((double)fc_result[i] / max_val) / total_softmax; 159 | buf[i] = (char)(softmax[i] * 100.0); 160 | 161 | // printf("i=%d buf=%d softmax=%f\n", i, buf[i], softmax[i]); 162 | } 163 | 164 | // printf("Send Data"); 165 | write( sock, buf, (OUT_DENSE_SIZ)); 166 | } 167 | 168 | sds_free( t_tmp_img); sds_free( t_bin_convW); sds_free( t_BNFb); 169 | close(sock); 170 | 171 | } 172 | 173 | // ------------------------------------------------------------------ 174 | // END OF PROGRAM 175 | // ------------------------------------------------------------------ 176 | -------------------------------------------------------------------------------- /link_batch_normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy 5 | 6 | from chainer.functions.normalization import batch_normalization 7 | from chainer import initializers 8 | from chainer import link 9 | from chainer import variable 10 | 11 | import function_batch_normalization 12 | 13 | class BatchNormalization(link.Link): 14 | 15 | """Batch normalization layer on outputs of linear or convolution functions. 16 | 17 | This link wraps the :func:`~chainer.functions.batch_normalization` and 18 | :func:`~chainer.functions.fixed_batch_normalization` functions. 19 | 20 | It runs in three modes: training mode, fine-tuning mode, and testing mode. 21 | 22 | In training mode, it normalizes the input by *batch statistics*. It also 23 | maintains approximated population statistics by moving averages, which can 24 | be used for instant evaluation in testing mode. 25 | 26 | In fine-tuning mode, it accumulates the input to compute *population 27 | statistics*. In order to correctly compute the population statistics, a 28 | user must use this mode to feed mini batches running through whole training 29 | dataset. 30 | 31 | In testing mode, it uses pre-computed population statistics to normalize 32 | the input variable. The population statistics is approximated if it is 33 | computed by training mode, or accurate if it is correctly computed by 34 | fine-tuning mode. 35 | 36 | Args: 37 | size (int or tuple of ints): Size (or shape) of channel 38 | dimensions. 39 | decay (float): Decay rate of moving average. It is used on training. 40 | eps (float): Epsilon value for numerical stability. 41 | dtype (numpy.dtype): Type to use in computing. 42 | use_gamma (bool): If `True`, use scaling parameter. Otherwise, use 43 | unit(1) which makes no effect. 44 | use_beta (bool): If `True`, use shifting parameter. Otherwise, use 45 | unit(0) which makes no effect. 46 | 47 | See: `Batch Normalization: Accelerating Deep Network Training by Reducing\ 48 | Internal Covariate Shift `_ 49 | 50 | .. seealso:: 51 | :func:`~chainer.functions.batch_normalization`, 52 | :func:`~chainer.functions.fixed_batch_normalization` 53 | 54 | Attributes: 55 | gamma (~chainer.Variable): Scaling parameter. 56 | beta (~chainer.Variable): Shifting parameter. 57 | avg_mean (~chainer.Variable): Population mean. 58 | avg_var (~chainer.Variable): Population variance. 59 | N (int): Count of batches given for fine-tuning. 60 | decay (float): Decay rate of moving average. It is used on training. 61 | eps (float): Epsilon value for numerical stability. This value is added 62 | to the batch variances. 63 | 64 | """ 65 | 66 | def __init__(self, size, decay=0.9, eps=2e-5, dtype=numpy.float32, 67 | use_gamma=True, use_beta=True, 68 | initial_gamma=None, initial_beta=None): 69 | super(BatchNormalization, self).__init__() 70 | if use_gamma: 71 | self.add_param('gamma', size, dtype=dtype) 72 | if initial_gamma is None: 73 | initial_gamma = initializers.One() 74 | initializers.init_weight(self.gamma.data, initial_gamma) 75 | if use_beta: 76 | self.add_param('beta', size, dtype=dtype) 77 | if initial_beta is None: 78 | initial_beta = initializers.Zero() 79 | initializers.init_weight(self.beta.data, initial_beta) 80 | self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype)) 81 | self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype)) 82 | self.add_persistent('N', 0) 83 | self.decay = decay 84 | self.eps = eps 85 | 86 | def __call__(self, x, test=False, finetune=False): 87 | """Invokes the forward propagation of BatchNormalization. 88 | 89 | BatchNormalization accepts additional arguments, which controls three 90 | different running mode. 91 | 92 | Args: 93 | x (Variable): An input variable. 94 | test (bool): If ``True``, BatchNormalization runs in testing mode; 95 | it normalizes the input using pre-computed statistics. 96 | finetune (bool): If ``True``, BatchNormalization runs in 97 | fine-tuning mode; it accumulates the input array to compute 98 | population statistics for normalization, and normalizes the 99 | input using batch statistics. 100 | 101 | If ``test`` and ``finetune`` are both ``False``, then 102 | BatchNormalization runs in training mode; it computes moving averages 103 | of mean and variance for evaluation during training, and normalizes the 104 | input using batch statistics. 105 | 106 | """ 107 | # use_batch_mean = not test or finetune -------------------------------- 108 | # ----------------------------------------------------------------------------- 109 | use_batch_mean = False 110 | 111 | if hasattr(self, 'gamma'): 112 | gamma = self.gamma 113 | else: 114 | gamma = variable.Variable(self.xp.ones( 115 | self.avg_mean.shape, dtype=x.dtype), volatile='auto') 116 | if hasattr(self, 'beta'): 117 | beta = self.beta 118 | else: 119 | beta = variable.Variable(self.xp.zeros( 120 | self.avg_mean.shape, dtype=x.dtype), volatile='auto') 121 | 122 | if use_batch_mean: 123 | if finetune: 124 | self.N += 1 125 | decay = 1. - 1. / self.N 126 | else: 127 | decay = self.decay 128 | 129 | 130 | func = function_batch_normalization.BatchNormalizationFunction( 131 | self.eps, self.avg_mean, self.avg_var, True, decay) 132 | ret = func(x, gamma, beta) 133 | 134 | self.avg_mean = func.running_mean 135 | self.avg_var = func.running_var 136 | 137 | else: 138 | # Use running average statistics or fine-tuned statistics. 139 | mean = variable.Variable(self.avg_mean, volatile='auto') 140 | var = variable.Variable(self.avg_var, volatile='auto') 141 | ret = batch_normalization.fixed_batch_normalization( 142 | x, gamma, beta, mean, var, self.eps) 143 | 144 | return ret 145 | 146 | def start_finetuning(self): 147 | """Resets the population count for collecting population statistics. 148 | 149 | This method can be skipped if it is the first time to use the 150 | fine-tuning mode. Otherwise, this method should be called before 151 | starting the fine-tuning mode again. 152 | 153 | """ 154 | self.N = 0 155 | -------------------------------------------------------------------------------- /gen_training_data.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # gen_training_data.py: 3 | # Training File Generator for prepared image files 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # ----------------------------------------------------------------------- 11 | 12 | from chainer.datasets import tuple_dataset 13 | from PIL import Image 14 | import numpy as np 15 | import glob 16 | import cv2 17 | #import cPickle as pickle # python 2.7 18 | import _pickle as pickle # python 3.5 19 | import matplotlib.pyplot as plt 20 | import argparse 21 | import random 22 | from scipy import ndimage 23 | import sys 24 | 25 | parser = argparse.ArgumentParser(description='training dataset generator') 26 | parser.add_argument('--pathfile', '-p', type=str, default='./imglist.txt', 27 | help='Image File List (test file)') 28 | parser.add_argument('--dataset', '-d', type=str, default='./hoge', 29 | help='Pickle object for dataset output file name') 30 | parser.add_argument('--size', '-s', type=int, default=32, 31 | help='dataset size (default 32x32)') 32 | 33 | # options for argumentation 34 | parser.add_argument('--rotate', '-r', type=int, default=1, 35 | help='Rotate') 36 | parser.add_argument('--flip', '-f', type=str, default='no', 37 | help='Flip') 38 | parser.add_argument('--crop', '-c', type=int, default=1, 39 | help='Crop') 40 | parser.add_argument('--keepaspect', '-k', type=str, default='no', 41 | help='Keep aspect ratio (default no)') 42 | 43 | args = parser.parse_args() 44 | 45 | dataset_fname = args.dataset + '_dataset.pkl' 46 | label_fname = args.dataset + '_label.pkl' 47 | tag_fname = args.dataset + '_tag.txt' 48 | 49 | 50 | print("[INFO] IMAGE PATH FILE %s" % args.pathfile) 51 | print("[INFO] DATASET FILE %s" % dataset_fname) 52 | print("[INFO] LABEL FILE %s" % label_fname) 53 | print("[INFO] TAG FILE %s" % tag_fname) 54 | 55 | print("[INFO] DATASET SIZE %dx%d" % (int(args.size),int(args.size))) 56 | print("[INFO] ROTATION %s" % args.rotate) 57 | print("[INFO] FLIPPING %s" % args.flip) 58 | print("[INFO] CROPPING %s" % args.crop) 59 | print("[INFO] KEEP ASPECT RATIO %s" % args.keepaspect) 60 | 61 | with open(args.pathfile, mode='r') as f: 62 | lines2 = f.readlines() 63 | 64 | pathsAndLabels = [] 65 | label_idx = 0 66 | tags = [] 67 | for line in lines2: 68 | words = line.split() 69 | tags.append(words[1]) 70 | choped_line = words[0].rstrip('\n\r') + '/' 71 | pathsAndLabels.append(np.asarray([choped_line, label_idx])) 72 | print("[INFO] %s* are assigned to %d" % (choped_line, label_idx)) 73 | label_idx = label_idx + 1 74 | 75 | # fileout tags 76 | f = open(tag_fname, 'w') 77 | for x in tags: 78 | f.write(str(x) + "\n") 79 | f.close() 80 | 81 | # set data size 82 | width = args.size 83 | height = args.size 84 | 85 | # get image path 86 | allData = [] 87 | for pathAndLabel in pathsAndLabels: 88 | path = pathAndLabel[0] 89 | label = pathAndLabel[1] 90 | imagelist = glob.glob(path + "*") 91 | for imgName in imagelist: 92 | allData.append([imgName, label]) 93 | 94 | allData = np.random.permutation(allData) 95 | 96 | # set augmentation options 97 | n_crop = args.crop 98 | n_rotate = args.rotate 99 | 100 | if args.flip == 'yes' or args.rotate > 1: 101 | n_flip = 2 102 | else: 103 | n_flip = 1 104 | 105 | # register all images, and normalization if needs,,, 106 | imageData = np.zeros((len(allData)*n_crop*n_rotate*n_flip,3,width,height)) 107 | labelData = np.zeros(len(allData)*n_crop*n_rotate*n_flip) 108 | 109 | idx = 0 110 | for pathAndLabel in allData: 111 | sys.stderr.write('\r\033[K' + "CONVERTING IMAGE %d/%d" % (idx,len(allData)*n_crop*n_rotate*n_flip)) 112 | sys.stderr.flush() 113 | 114 | org_img = cv2.imread(pathAndLabel[0]) 115 | 116 | if org_img is None: 117 | print("ERROR %s CANNOT BE OPENED" % pathAndLabel[0]) 118 | exit() 119 | 120 | for i in range(n_crop): 121 | for k in range(n_flip): 122 | for j in range(n_rotate): 123 | # padding empy pixels to keep aspect ratio 124 | if args.keepaspect == 'yes': 125 | 126 | h, w = org_img.shape[:2] 127 | 128 | if h > w: 129 | dst_img = np.zeros((h,h,3)).astype(np.uint8) #* 128 130 | d = int((h-w)/2) 131 | dst_img[0:h,d:d+w] = org_img[:,:] 132 | else: 133 | dst_img = np.zeros((w,w,3)).astype(np.uint8) #* 128 134 | d = int((w-h)/2) 135 | dst_img[d:d+h,0:w] = org_img[:,:] 136 | 137 | org_img = dst_img 138 | 139 | # cropping 140 | if i > 0: 141 | h, w = org_img.shape[:2] 142 | 143 | if args.keepaspect == 'no': 144 | h4 = h / 4 145 | w4 = w / 4 146 | left = random.randint(0,w4) 147 | right = random.randint(w-w4,w) 148 | top = random.randint(0,h4) 149 | bottom = random.randint(h - h4,h) 150 | 151 | img = org_img[top:bottom,left:right] # y:y+h,x:x+h 152 | else: 153 | rows,cols = org_img.shape[:2] 154 | 155 | # resize with cropping 156 | dd = random.randint(0,rows/8) 157 | org_img = org_img[dd:rows-dd,dd:cols-dd] 158 | rows = rows - dd 159 | cols = cols - dd 160 | 161 | # sliding 162 | h4 = rows / 4 163 | w4 = cols / 4 164 | dw = random.randint(w4*(-1),w4) 165 | dh = random.randint(h4*(-1),h4) 166 | M = np.float32([[1,0,dw],[0,1,dh]]) 167 | img = cv2.warpAffine(org_img,M,(cols,rows)) 168 | 169 | else: 170 | img = org_img 171 | 172 | 173 | #flipping (if rotate, then flipping is also applied) 174 | if k == 0: 175 | pass 176 | else: 177 | img = cv2.flip(img, 1) 178 | 179 | # rotation 180 | img = ndimage.rotate( img, 2 * j, reshape=False) 181 | 182 | # Resize 183 | img = cv2.resize(img,(width,height)) 184 | 185 | # Transpose for Chainer dataset 186 | reshaped = img.transpose(2, 0, 1) # (Y,X,BGR) -> (BGR,Y,X) 187 | 188 | # store temporary memory 189 | imageData[idx] = reshaped #bench 190 | labelData[idx] = np.int32(pathAndLabel[1]) 191 | 192 | idx = idx + 1 193 | 194 | imageData = imageData.astype(np.uint8) 195 | 196 | # generate pickle file 197 | threshold = np.int32(len(imageData)/10*9) 198 | 199 | image = {} 200 | label = {} 201 | image['train'] = imageData[0:threshold] 202 | image['test'] = imageData[threshold:] 203 | label['train'] = labelData[0:threshold] 204 | label['test'] = labelData[threshold:] 205 | 206 | print("[INFO] SAVE %s as an image dataset" % dataset_fname) 207 | with open(dataset_fname, mode='wb') as f: 208 | pickle.dump(image, f) 209 | 210 | print("[INFO] SAVE %s as a label dataset" % label_fname) 211 | with open(label_fname, mode='wb') as f: 212 | pickle.dump(label, f) 213 | 214 | # ----------------------------------------------------------------------- 215 | # END OF PROGRAM 216 | # ----------------------------------------------------------------------- 217 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # train.py 3 | # Trainer for a binarized CNN 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # Acknowledgements: 11 | # This source code is based on following projects: 12 | # 13 | # Chainer binarized neural network by Daisuke Okanohara 14 | # https://github.com/hillbig/binary_net 15 | # Various CNN models including Deep Residual Networks (ResNet) 16 | # for CIFAR10 with Chainer by mitmul 17 | # https://github.com/mitmul/chainer-cifar10 18 | # ----------------------------------------------------------------------- 19 | 20 | import argparse 21 | #import cPickle as pickle # python 2.7 22 | import _pickle as pickle # python 3.5 23 | import numpy as np 24 | import os 25 | import chainer 26 | from chainer import optimizers 27 | from chainer import serializers 28 | import net2 # it will be generated by the GUINNESS GUI 29 | 30 | import trainer 31 | 32 | import time 33 | import weight_clip 34 | 35 | if __name__ == '__main__': 36 | parser = argparse.ArgumentParser(description='CIFAR-10 dataset trainer') 37 | parser.add_argument('--gpu', '-g', type=int, default=-1, 38 | help='GPU device ID (negative value indicates CPU)') 39 | parser.add_argument('--model', '-m', type=str, default='bincnn', choices=['bincnn'], 40 | help='Model name') 41 | parser.add_argument('--batch_size', '-b', type=int, default=20, 42 | help='Mini batch size') 43 | parser.add_argument('--dataset', '-d', type=str, default='image.pkl', 44 | help='Dataset image pkl file path') 45 | parser.add_argument('--label', '-l', type=str, default='label.pkl', 46 | help='Dataset label pkl file path') 47 | parser.add_argument('--prefix', '-p', type=str, default='temp', # should be project name 48 | help='Prefix of model parameter files') 49 | parser.add_argument('--iter', type=int, default=10, 50 | help='Training iteration') 51 | parser.add_argument('--save_iter', type=int, default=0, 52 | help='Iteration interval to save model parameter file.') 53 | parser.add_argument('--lr_decay_iter', type=int, default=100, 54 | help='Iteration interval to decay learning rate') 55 | parser.add_argument('--weight_decay', type=float, default=0.0001, 56 | help='Weight decay') 57 | parser.add_argument('--optimizer', type=str, default='sgd', choices=['sgd', 'adam', 'momentum', 'delta'], 58 | help='Optimizer name') 59 | parser.add_argument('--lr', type=float, default=0.01, 60 | help='Initial learning rate for SGD') 61 | parser.add_argument('--alpha', type=float, default=0.00005, 62 | help='Initial alpha for Adam') 63 | parser.add_argument('--res_depth', type=int, default=18, 64 | help='Depth of Residual Network') 65 | parser.add_argument('--skip_depth', action='store_true', 66 | help='Use stochastic depth in Residual Network') 67 | parser.add_argument('--swapout', action='store_true', 68 | help='Use swapout') 69 | parser.add_argument('--seed', type=int, default=1, 70 | help='Random seed') 71 | parser.add_argument('--dim', type=int, default=3, 72 | help='Dimension (default RGB, that is, 3)') 73 | parser.add_argument('--siz', type=int, default=32, 74 | help='ImageSiz (default 32, that is, 32x32)') 75 | parser.add_argument('--guinness', type=str, default='./hoge', # should be project name 76 | help='Prefix of model parameter files for the GUINNESS flow') 77 | parser.add_argument('--resume', type=str, default='no', 78 | help='Resume traning, if pre-trained model exists') 79 | args = parser.parse_args() 80 | 81 | np.random.seed(args.seed) 82 | 83 | log_file_path = '{}_log.csv'.format(args.prefix) 84 | # lr_decay_iter = map(int, args.lr_decay_iter.split(',')) 85 | 86 | if args.prefix is None: 87 | model_prefix = '{}_{}'.format(args.model, args.optimizer) 88 | else: 89 | model_prefix = args.prefix 90 | 91 | # load image dataset 92 | print('loading dataset %s' % args.dataset) 93 | with open(args.dataset, 'rb') as f: 94 | images = pickle.load(f) 95 | 96 | index = np.random.permutation(len(images['train'])) 97 | threshold = np.int32(len(images['train'])/10*9) 98 | train_index = index[:threshold] 99 | valid_index = index[threshold:] 100 | 101 | train_x = images['train'][train_index].astype(np.float32) 102 | valid_x = images['train'][valid_index].astype(np.float32) 103 | test_x = images['test'].astype(np.float32) 104 | 105 | 106 | print("[INFO] #TRAIN DATA: %7d" % len(train_x)) 107 | print("[INFO] #VALID DATA: %7d" % len(valid_x)) 108 | print("[INFO] #TEST DATA: %7d" % len(test_x)) 109 | 110 | # load label dataset 111 | with open(args.label, 'rb') as f: 112 | labels = pickle.load(f) 113 | train_y = labels['train'][train_index].astype(np.int32) 114 | valid_y = labels['train'][valid_index].astype(np.int32) 115 | test_y = labels['test'].astype(np.int32) 116 | 117 | # generate testbench (test_img.txt) for C/C++ code 118 | idx = 0 119 | image = test_x 120 | 121 | # extract only one image 122 | image1 = image[idx] 123 | 124 | # generate text file as a bench marck 125 | bench_img = image1.transpose(1,2,0) 126 | bench_img = bench_img.reshape(-1,) 127 | 128 | fname = 'test_img.txt' 129 | print(' Test Image Fileout -> %s' % fname) 130 | np.savetxt(fname, bench_img,fmt="%.0f",delimiter=",") 131 | 132 | # start training 133 | print('start training') 134 | cifar_net = net2.CNN() # modified 135 | 136 | # resume pre-trained model, if exist 137 | if args.resume == 'yes': 138 | print(" Resume Pre-Trained Model") 139 | serializers.load_npz('{}.model'.format(model_prefix), cifar_net) 140 | 141 | 142 | if args.optimizer == 'sgd': 143 | print("optimizer: SGD") 144 | optimizer = optimizers.SGD(lr=args.lr) 145 | elif args.optimizer == 'momentum': 146 | print("optimizer: momentum SGD") 147 | optimizer = optimizers.MomentumSGD(lr=args.lr) 148 | elif args.optimizer == 'delta': 149 | print("optimizer: AdaDelta") 150 | optimizer = optimizers.AdaDelta() 151 | else: 152 | print("optimizer: Adam") 153 | optimizer = optimizers.Adam(alpha=args.alpha) 154 | optimizer.setup(cifar_net) 155 | if args.weight_decay > 0: 156 | optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) 157 | 158 | optimizer.add_hook(weight_clip.WeightClip()) 159 | 160 | cifar_trainer = trainer.CifarTrainer(cifar_net, optimizer, args.iter, args.batch_size, args.gpu) 161 | 162 | state = {'best_valid_error': 100, 'best_test_error': 100, 'clock': time.clock()} 163 | def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss, test_acc): 164 | error = 100 * (1 - acc) 165 | valid_error = 100 * (1 - valid_acc) 166 | test_error = 100 * (1 - test_acc) 167 | print('epoch {} done'.format(epoch)) 168 | print('train loss: {} error: {}'.format(loss, error)) 169 | print('valid loss: {} error: {}'.format(valid_loss, valid_error)) 170 | print('test loss: {} error: {}'.format(test_loss, test_error)) 171 | if valid_error < state['best_valid_error']: 172 | serializers.save_npz('{}.model'.format(model_prefix), n) 173 | serializers.save_npz('{}.state'.format(model_prefix), o) 174 | state['best_valid_error'] = valid_error 175 | state['best_test_error'] = test_error 176 | if args.save_iter > 0 and (epoch + 1) % args.save_iter == 0: 177 | serializers.save_npz('{}_{}.model'.format(model_prefix, epoch + 1), n) 178 | serializers.save_npz('{}_{}.state'.format(model_prefix, epoch + 1), o) 179 | # prevent divergence when using identity mapping model 180 | if args.model == 'identity_mapping' and epoch < 9: 181 | o.lr = 0.01 + 0.01 * (epoch + 1) 182 | # if len(lr_decay_iter) == 1 and (epoch + 1) % lr_decay_iter[0] == 0 or epoch + 1 in lr_decay_iter: 183 | # Note, "lr_decay_iter" should be a list object to store a training schedule, 184 | # However, to keep up with the Python3.5, I changed to an integer value... 185 | if (epoch + 1) % args.lr_decay_iter == 0 and epoch > 1: 186 | if hasattr(optimizer, 'alpha'): 187 | o.alpha *= 0.1 188 | else: 189 | o.lr *= 0.1 190 | clock = time.clock() 191 | print('elapsed time: {}'.format(clock - state['clock'])) 192 | state['clock'] = clock 193 | 194 | with open(log_file_path, 'a') as f: 195 | f.write('{},{},{},{},{},{},{}\n'.format(epoch + 1, loss, error, valid_loss, valid_error, test_loss, test_error)) 196 | 197 | if args.resume == 'no': 198 | print(" Create %s as a New Logfile" % log_file_path) 199 | with open(log_file_path, 'w') as f: 200 | f.write('epoch,train loss,train acc,valid loss,valid acc,test loss,test acc\n') 201 | else: 202 | print(" Overwrite Existing Logfile %s" % log_file_path) 203 | 204 | cifar_trainer.fit(train_x, train_y, valid_x, valid_y, args.siz, args.dim, test_x, test_y, on_epoch_done) 205 | 206 | print('best test error: {}'.format(state['best_test_error'])) 207 | 208 | with open("train_status.txt", 'w') as f: 209 | f.write("stop") 210 | 211 | # ----------------------------------------------------------------------- 212 | # END OF PROGRAM 213 | # ----------------------------------------------------------------------- 214 | -------------------------------------------------------------------------------- /function_integer_conv2d.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from six import moves 3 | 4 | from chainer import cuda 5 | from chainer import function 6 | from chainer.utils import conv 7 | from chainer.utils import type_check 8 | 9 | def _kern(): 10 | return cuda.elementwise( 11 | 'T x', 'T y', 12 | 'y = x >= 0 ? 1 : -1', 13 | 'binarize') 14 | 15 | def _as_mat(x): 16 | if x.ndim == 2: 17 | return x 18 | return x.reshape(len(x), -1) 19 | 20 | 21 | if cuda.cudnn_enabled: 22 | cudnn = cuda.cudnn 23 | libcudnn = cuda.cudnn.cudnn 24 | _cudnn_version = libcudnn.getVersion() 25 | _fwd_pref = libcudnn.CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT 26 | if _cudnn_version >= 4000: 27 | _bwd_filter_pref = \ 28 | libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT 29 | _bwd_data_pref = \ 30 | libcudnn.CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT 31 | 32 | 33 | def _check_cudnn_acceptable_type(x_dtype, W_dtype): 34 | return x_dtype == W_dtype and ( 35 | _cudnn_version >= 3000 or x_dtype != numpy.float16) 36 | 37 | 38 | def _pair(x): 39 | if hasattr(x, '__getitem__'): 40 | return x 41 | return x, x 42 | 43 | 44 | class IntegerConv2DFunction(function.Function): 45 | 46 | def __init__(self, stride=1, pad=0, use_cudnn=True, cover_all=False): 47 | self.sy, self.sx = _pair(stride) 48 | self.ph, self.pw = _pair(pad) 49 | self.use_cudnn = use_cudnn 50 | self.cover_all = cover_all 51 | 52 | def check_type_forward(self, in_types): 53 | n_in = in_types.size() 54 | type_check.expect(2 <= n_in, n_in <= 3) 55 | 56 | x_type = in_types[0] 57 | w_type = in_types[1] 58 | type_check.expect( 59 | x_type.dtype.kind == 'f', 60 | w_type.dtype.kind == 'f', 61 | x_type.ndim == 4, 62 | w_type.ndim == 4, 63 | x_type.shape[1] == w_type.shape[1], 64 | ) 65 | 66 | if n_in.eval() == 3: 67 | b_type = in_types[2] 68 | type_check.expect( 69 | b_type.dtype == x_type.dtype, 70 | b_type.ndim == 1, 71 | b_type.shape[0] == w_type.shape[0], 72 | ) 73 | 74 | def forward_cpu(self, inputs): 75 | x, W = inputs[:2] 76 | b = inputs[2] if len(inputs) == 3 else None 77 | kh, kw = W.shape[2:] 78 | self.col = conv.im2col_cpu( 79 | x, kh, kw, self.sy, self.sx, self.ph, self.pw, 80 | cover_all=self.cover_all) 81 | 82 | Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False) 83 | 84 | y = numpy.tensordot( 85 | self.col, Wb, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) 86 | 87 | if b is not None: 88 | y += b 89 | 90 | return numpy.rollaxis(y, 3, 1), 91 | 92 | def forward_gpu(self, inputs): 93 | x, W = inputs[:2] 94 | b = inputs[2] if len(inputs) == 3 else None 95 | 96 | out_c, _, kh, kw = W.shape 97 | n, c, h, w = x.shape 98 | 99 | out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, 100 | cover_all=self.cover_all) 101 | out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, 102 | cover_all=self.cover_all) 103 | 104 | y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) 105 | if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and 106 | _check_cudnn_acceptable_type(x.dtype, W.dtype)): 107 | x = cuda.cupy.ascontiguousarray(x) 108 | W = cuda.cupy.ascontiguousarray(W) 109 | if b is not None: 110 | b = cuda.cupy.ascontiguousarray(b) 111 | 112 | handle = cudnn.get_handle() 113 | x_desc = cudnn.create_tensor_descriptor(x) 114 | y_desc = cudnn.create_tensor_descriptor(y) 115 | 116 | self.filter_desc = cudnn.create_filter_descriptor(W) 117 | self.conv_desc = cudnn.create_convolution_descriptor( 118 | (self.ph, self.pw), (self.sy, self.sx)) 119 | if b is not None: 120 | self.bias_desc = cudnn.create_tensor_descriptor( 121 | b[None, :, None, None]) 122 | 123 | workspace_size = cuda.get_max_workspace_size() 124 | workspace = cuda.cupy.empty((workspace_size,), dtype='b') 125 | algo = libcudnn.getConvolutionForwardAlgorithm( 126 | handle, x_desc.value, self.filter_desc.value, 127 | self.conv_desc.value, y_desc.value, _fwd_pref, 128 | workspace_size) 129 | 130 | oz_dtype = 'd' if x.dtype == 'd' else 'f' 131 | one = numpy.array(1, dtype=oz_dtype).ctypes 132 | zero = numpy.array(0, dtype=oz_dtype).ctypes 133 | libcudnn.convolutionForward( 134 | handle, one.data, x_desc.value, x.data.ptr, 135 | self.filter_desc.value, W.data.ptr, self.conv_desc.value, 136 | algo, workspace.data.ptr, workspace_size, zero.data, 137 | y_desc.value, y.data.ptr) 138 | 139 | # TODO(beam2d): Support unshared bias 140 | if b is not None: 141 | cudnn.add_tensor( 142 | handle, one.data, self.bias_desc.value, b.data.ptr, 143 | one.data, y_desc.value, y.data.ptr) 144 | else: 145 | # Implementation using im2col 146 | Xb = x 147 | 148 | self.col = conv.im2col_gpu( 149 | Xb, kh, kw, self.sy, self.sx, self.ph, self.pw, 150 | cover_all=self.cover_all) 151 | 152 | W_mat = W.reshape(out_c, -1) 153 | col_mats = self.col.reshape(n, -1, out_h * out_w) 154 | 155 | Wb_mat = _kern()(W_mat) 156 | 157 | y_mats = y.reshape(n, out_c, -1) 158 | # TODO(beam2d): Use streams or batch gemm 159 | for i in moves.range(n): 160 | y_mats[i] = Wb_mat.dot(col_mats[i]) 161 | # TODO(beam2d): Support unshared bias 162 | if b is not None: 163 | y += b[:, None, None] 164 | 165 | return y, 166 | 167 | def backward_cpu(self, inputs, grad_outputs): 168 | x, W = inputs[:2] 169 | b = inputs[2] if len(inputs) == 3 else None 170 | gy = grad_outputs[0] 171 | h, w = x.shape[2:] 172 | 173 | gW = numpy.tensordot( 174 | gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) 175 | 176 | Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False) 177 | 178 | gcol = numpy.tensordot(Wb, gy, (0, 1)).astype(x.dtype, copy=False) 179 | 180 | gcol = numpy.rollaxis(gcol, 3) 181 | gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) 182 | 183 | if b is None: 184 | return gx, gW 185 | else: 186 | gb = gy.sum(axis=(0, 2, 3)) 187 | return gx, gW, gb 188 | 189 | def backward_gpu(self, inputs, grad_outputs): 190 | x, W = inputs[:2] 191 | b = inputs[2] if len(inputs) == 3 else None 192 | gy = grad_outputs[0] 193 | _, out_c, out_h, out_w = gy.shape 194 | n, c, h, w = x.shape 195 | 196 | kh, kw = W.shape[2:] 197 | 198 | gW = cuda.cupy.empty_like(W) 199 | if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and 200 | _check_cudnn_acceptable_type(x.dtype, W.dtype)): 201 | x = cuda.cupy.ascontiguousarray(x) 202 | W = cuda.cupy.ascontiguousarray(W) 203 | gy = cuda.cupy.ascontiguousarray(gy) 204 | 205 | handle = cudnn.get_handle() 206 | x_desc = cudnn.create_tensor_descriptor(x) 207 | gy_desc = cudnn.create_tensor_descriptor(gy) 208 | oz_dtype = 'd' if x.dtype == 'd' else 'f' 209 | one = numpy.array(1, dtype=oz_dtype).ctypes 210 | zero = numpy.array(0, dtype=oz_dtype).ctypes 211 | gx = cuda.cupy.empty_like(x) 212 | 213 | if _cudnn_version >= 4000: 214 | workspace_size = cuda.get_max_workspace_size() 215 | workspace = cuda.cupy.empty((workspace_size,), dtype='b') 216 | 217 | algo = libcudnn.getConvolutionBackwardFilterAlgorithm( 218 | handle, x_desc.value, gy_desc.value, 219 | self.conv_desc.value, self.filter_desc.value, 220 | _bwd_filter_pref, workspace_size) 221 | libcudnn.convolutionBackwardFilter_v3( 222 | handle, one.data, x_desc.value, x.data.ptr, 223 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 224 | algo, workspace.data.ptr, workspace_size, 225 | zero.data, self.filter_desc.value, gW.data.ptr) 226 | 227 | algo = libcudnn.getConvolutionBackwardDataAlgorithm( 228 | handle, self.filter_desc.value, gy_desc.value, 229 | self.conv_desc.value, x_desc.value, _bwd_data_pref, 230 | workspace_size) 231 | libcudnn.convolutionBackwardData_v3( 232 | handle, one.data, self.filter_desc.value, W.data.ptr, 233 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 234 | algo, workspace.data.ptr, workspace_size, 235 | zero.data, x_desc.value, gx.data.ptr) 236 | else: 237 | libcudnn.convolutionBackwardFilter_v2( 238 | handle, one.data, x_desc.value, x.data.ptr, 239 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 240 | zero.data, self.filter_desc.value, gW.data.ptr) 241 | libcudnn.convolutionBackwardData_v2( 242 | handle, one.data, self.filter_desc.value, W.data.ptr, 243 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 244 | zero.data, x_desc.value, gx.data.ptr) 245 | 246 | if b is not None: 247 | gb = cuda.cupy.empty_like(b) 248 | libcudnn.convolutionBackwardBias( 249 | handle, one.data, gy_desc.value, gy.data.ptr, 250 | zero.data, self.bias_desc.value, gb.data.ptr) 251 | else: 252 | gW_mat = gW.reshape(out_c, c * kh * kw) 253 | col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w) 254 | gy_mats = gy.reshape(n, out_c, out_h * out_w) 255 | # TODO(beam2d): Use streams or batch gemm 256 | gW_mat[...] = 0 257 | for i in moves.range(n): 258 | gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T) 259 | 260 | W_mat = W.reshape(out_c, -1) 261 | Wb_mat = _kern()(W_mat) 262 | 263 | gcol = cuda.cupy.empty_like(self.col) 264 | gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w) 265 | 266 | for i in moves.range(n): 267 | gcol_mats[i] = cuda.cupy.dot(Wb_mat.T, gy_mats[i]) 268 | 269 | gx = conv.col2im_gpu( 270 | gcol, self.sy, self.sx, self.ph, self.pw, h, w) 271 | 272 | if b is not None: 273 | gb = gy.sum(axis=(0, 2, 3)) 274 | 275 | if b is None: 276 | return gx, gW 277 | else: 278 | return gx, gW, gb 279 | 280 | 281 | def func_convolution_2d(x, W, b=None, stride=1, pad=0, use_cudnn=True, 282 | cover_all=False): 283 | """Two-dimensional convolution function. 284 | 285 | This is an implementation of two-dimensional convolution in ConvNets. 286 | It takes three variables: the input image ``x``, the filter weight ``W``, 287 | and the bias vector ``b``. 288 | 289 | Notation: here is a notation for dimensionalities. 290 | 291 | - :math:`n` is the batch size. 292 | - :math:`c_I` and :math:`c_O` are the number of the input and output, 293 | respectively. 294 | - :math:`h` and :math:`w` are the height and width of the input image, 295 | respectively. 296 | - :math:`k_H` and :math:`k_W` are the height and width of the filters, 297 | respectively. 298 | 299 | Args: 300 | x (~chainer.Variable): Input variable of shape :math:`(n, c_I, h, w)`. 301 | W (~chainer.Variable): Weight variable of shape 302 | :math:`(c_O, c_I, k_H, k_W)`. 303 | b (~chainer.Variable): Bias variable of length :math:`c_O` (optional). 304 | stride (int or pair of ints): Stride of filter applications. 305 | ``stride=s`` and ``stride=(s, s)`` are equivalent. 306 | pad (int or pair of ints): Spatial padding width for input arrays. 307 | ``pad=p`` and ``pad=(p, p)`` are equivalent. 308 | use_cudnn (bool): If ``True``, then this function uses cuDNN if 309 | available. 310 | cover_all (bool): If True, all spatial locations are convoluted into 311 | some output pixels. It may make the output size larger. 312 | 313 | 314 | Returns: 315 | ~chainer.Variable: Output variable. 316 | 317 | The two-dimensional convolution function is defined as follows. 318 | Then the ``Convolution2D`` function computes correlations between filters 319 | and patches of size :math:`(k_H, k_W)` in ``x``. 320 | Note that correlation here is equivalent to the inner product between 321 | expanded vectors. 322 | Patches are extracted at positions shifted by multiples of ``stride`` from 323 | the first position ``-pad`` for each spatial axis. 324 | The right-most (or bottom-most) patches do not run over the padded spatial 325 | size. 326 | 327 | Let :math:`(s_Y, s_X)` be the stride of filter application, and 328 | :math:`(p_H, p_W)` the spatial padding size. Then, the output size 329 | :math:`(h_O, w_O)` is determined by the following equations: 330 | 331 | .. math:: 332 | 333 | h_O &= (h + 2p_H - k_H) / s_Y + 1,\\\\ 334 | w_O &= (w + 2p_W - k_W) / s_X + 1. 335 | 336 | If the bias vector is given, then it is added to all spatial locations of 337 | the output of convolution. 338 | 339 | .. seealso:: :class:`Convolution2D` 340 | 341 | """ 342 | func = IntegerConv2DFunction(stride, pad, use_cudnn, cover_all) 343 | if b is None: 344 | return func(x, W) 345 | else: 346 | return func(x, W, b) 347 | -------------------------------------------------------------------------------- /function_binary_conv2d.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from six import moves 3 | 4 | from chainer import cuda 5 | from chainer import function 6 | from chainer.utils import conv 7 | from chainer.utils import type_check 8 | 9 | def _kern(): 10 | return cuda.elementwise( 11 | 'T x', 'T y', 12 | 'y = x >= 0 ? 1 : -1', 13 | 'binarize') 14 | 15 | def _as_mat(x): 16 | if x.ndim == 2: 17 | return x 18 | return x.reshape(len(x), -1) 19 | 20 | 21 | if cuda.cudnn_enabled: 22 | cudnn = cuda.cudnn 23 | libcudnn = cuda.cudnn.cudnn 24 | _cudnn_version = libcudnn.getVersion() 25 | _fwd_pref = libcudnn.CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT 26 | if _cudnn_version >= 4000: 27 | _bwd_filter_pref = \ 28 | libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT 29 | _bwd_data_pref = \ 30 | libcudnn.CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT 31 | 32 | 33 | def _check_cudnn_acceptable_type(x_dtype, W_dtype): 34 | return x_dtype == W_dtype and ( 35 | _cudnn_version >= 3000 or x_dtype != numpy.float16) 36 | 37 | 38 | def _pair(x): 39 | if hasattr(x, '__getitem__'): 40 | return x 41 | return x, x 42 | 43 | 44 | class BinaryConv2DFunction(function.Function): 45 | 46 | def __init__(self, stride=1, pad=0, use_cudnn=True, cover_all=False): 47 | self.sy, self.sx = _pair(stride) 48 | self.ph, self.pw = _pair(pad) 49 | self.use_cudnn = use_cudnn 50 | self.cover_all = cover_all 51 | 52 | def check_type_forward(self, in_types): 53 | n_in = in_types.size() 54 | type_check.expect(2 <= n_in, n_in <= 3) 55 | 56 | x_type = in_types[0] 57 | w_type = in_types[1] 58 | type_check.expect( 59 | x_type.dtype.kind == 'f', 60 | w_type.dtype.kind == 'f', 61 | x_type.ndim == 4, 62 | w_type.ndim == 4, 63 | x_type.shape[1] == w_type.shape[1], 64 | ) 65 | 66 | if n_in.eval() == 3: 67 | b_type = in_types[2] 68 | type_check.expect( 69 | b_type.dtype == x_type.dtype, 70 | b_type.ndim == 1, 71 | b_type.shape[0] == w_type.shape[0], 72 | ) 73 | 74 | def forward_cpu(self, inputs): 75 | x, W = inputs[:2] 76 | b = inputs[2] if len(inputs) == 3 else None 77 | kh, kw = W.shape[2:] 78 | self.col = conv.im2col_cpu( 79 | x, kh, kw, self.sy, self.sx, self.ph, self.pw, 80 | cover_all=self.cover_all) 81 | 82 | Xb = numpy.where(self.col>0,1,self.col).astype(x.dtype, copy=False) 83 | Xb = numpy.where(self.col<0,-1,Xb).astype(x.dtype, copy=False) 84 | Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False) 85 | y = numpy.tensordot( 86 | Xb, Wb, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) 87 | if b is not None: 88 | y += b 89 | 90 | return numpy.rollaxis(y, 3, 1), 91 | 92 | def forward_gpu(self, inputs): 93 | x, W = inputs[:2] 94 | b = inputs[2] if len(inputs) == 3 else None 95 | 96 | out_c, _, kh, kw = W.shape 97 | n, c, h, w = x.shape 98 | 99 | out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, 100 | cover_all=self.cover_all) 101 | out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, 102 | cover_all=self.cover_all) 103 | 104 | y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) 105 | if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and 106 | _check_cudnn_acceptable_type(x.dtype, W.dtype)): 107 | x = cuda.cupy.ascontiguousarray(x) 108 | W = cuda.cupy.ascontiguousarray(W) 109 | if b is not None: 110 | b = cuda.cupy.ascontiguousarray(b) 111 | 112 | handle = cudnn.get_handle() 113 | x_desc = cudnn.create_tensor_descriptor(x) 114 | y_desc = cudnn.create_tensor_descriptor(y) 115 | 116 | self.filter_desc = cudnn.create_filter_descriptor(W) 117 | self.conv_desc = cudnn.create_convolution_descriptor( 118 | (self.ph, self.pw), (self.sy, self.sx)) 119 | if b is not None: 120 | self.bias_desc = cudnn.create_tensor_descriptor( 121 | b[None, :, None, None]) 122 | 123 | workspace_size = cuda.get_max_workspace_size() 124 | workspace = cuda.cupy.empty((workspace_size,), dtype='b') 125 | algo = libcudnn.getConvolutionForwardAlgorithm( 126 | handle, x_desc.value, self.filter_desc.value, 127 | self.conv_desc.value, y_desc.value, _fwd_pref, 128 | workspace_size) 129 | 130 | oz_dtype = 'd' if x.dtype == 'd' else 'f' 131 | one = numpy.array(1, dtype=oz_dtype).ctypes 132 | zero = numpy.array(0, dtype=oz_dtype).ctypes 133 | libcudnn.convolutionForward( 134 | handle, one.data, x_desc.value, x.data.ptr, 135 | self.filter_desc.value, W.data.ptr, self.conv_desc.value, 136 | algo, workspace.data.ptr, workspace_size, zero.data, 137 | y_desc.value, y.data.ptr) 138 | 139 | # TODO(beam2d): Support unshared bias 140 | if b is not None: 141 | cudnn.add_tensor( 142 | handle, one.data, self.bias_desc.value, b.data.ptr, 143 | one.data, y_desc.value, y.data.ptr) 144 | else: 145 | # Implementation using im2col 146 | Xb = _kern()(x) 147 | 148 | self.col = conv.im2col_gpu( 149 | Xb, kh, kw, self.sy, self.sx, self.ph, self.pw, 150 | cover_all=self.cover_all) 151 | 152 | W_mat = W.reshape(out_c, -1) 153 | col_mats = self.col.reshape(n, -1, out_h * out_w) 154 | Wb_mat = _kern()(W_mat) 155 | 156 | y_mats = y.reshape(n, out_c, -1) 157 | # TODO(beam2d): Use streams or batch gemm 158 | for i in moves.range(n): 159 | y_mats[i] = Wb_mat.dot(col_mats[i]) 160 | # TODO(beam2d): Support unshared bias 161 | if b is not None: 162 | y += b[:, None, None] 163 | 164 | return y, 165 | 166 | def backward_cpu(self, inputs, grad_outputs): 167 | x, W = inputs[:2] 168 | b = inputs[2] if len(inputs) == 3 else None 169 | gy = grad_outputs[0] 170 | h, w = x.shape[2:] 171 | 172 | gW = numpy.tensordot( 173 | gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) 174 | 175 | Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False) 176 | 177 | gcol = numpy.tensordot(Wb, gy, (0, 1)).astype(x.dtype, copy=False) 178 | 179 | gcol = numpy.rollaxis(gcol, 3) 180 | gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) 181 | 182 | if b is None: 183 | return gx, gW 184 | else: 185 | gb = gy.sum(axis=(0, 2, 3)) 186 | return gx, gW, gb 187 | 188 | def backward_gpu(self, inputs, grad_outputs): 189 | x, W = inputs[:2] 190 | b = inputs[2] if len(inputs) == 3 else None 191 | gy = grad_outputs[0] 192 | _, out_c, out_h, out_w = gy.shape 193 | n, c, h, w = x.shape 194 | 195 | kh, kw = W.shape[2:] 196 | 197 | gW = cuda.cupy.empty_like(W) 198 | if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and 199 | _check_cudnn_acceptable_type(x.dtype, W.dtype)): 200 | x = cuda.cupy.ascontiguousarray(x) 201 | W = cuda.cupy.ascontiguousarray(W) 202 | gy = cuda.cupy.ascontiguousarray(gy) 203 | 204 | handle = cudnn.get_handle() 205 | x_desc = cudnn.create_tensor_descriptor(x) 206 | gy_desc = cudnn.create_tensor_descriptor(gy) 207 | oz_dtype = 'd' if x.dtype == 'd' else 'f' 208 | one = numpy.array(1, dtype=oz_dtype).ctypes 209 | zero = numpy.array(0, dtype=oz_dtype).ctypes 210 | gx = cuda.cupy.empty_like(x) 211 | 212 | if _cudnn_version >= 4000: 213 | workspace_size = cuda.get_max_workspace_size() 214 | workspace = cuda.cupy.empty((workspace_size,), dtype='b') 215 | 216 | algo = libcudnn.getConvolutionBackwardFilterAlgorithm( 217 | handle, x_desc.value, gy_desc.value, 218 | self.conv_desc.value, self.filter_desc.value, 219 | _bwd_filter_pref, workspace_size) 220 | libcudnn.convolutionBackwardFilter_v3( 221 | handle, one.data, x_desc.value, x.data.ptr, 222 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 223 | algo, workspace.data.ptr, workspace_size, 224 | zero.data, self.filter_desc.value, gW.data.ptr) 225 | 226 | algo = libcudnn.getConvolutionBackwardDataAlgorithm( 227 | handle, self.filter_desc.value, gy_desc.value, 228 | self.conv_desc.value, x_desc.value, _bwd_data_pref, 229 | workspace_size) 230 | libcudnn.convolutionBackwardData_v3( 231 | handle, one.data, self.filter_desc.value, W.data.ptr, 232 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 233 | algo, workspace.data.ptr, workspace_size, 234 | zero.data, x_desc.value, gx.data.ptr) 235 | else: 236 | libcudnn.convolutionBackwardFilter_v2( 237 | handle, one.data, x_desc.value, x.data.ptr, 238 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 239 | zero.data, self.filter_desc.value, gW.data.ptr) 240 | libcudnn.convolutionBackwardData_v2( 241 | handle, one.data, self.filter_desc.value, W.data.ptr, 242 | gy_desc.value, gy.data.ptr, self.conv_desc.value, 243 | zero.data, x_desc.value, gx.data.ptr) 244 | 245 | if b is not None: 246 | gb = cuda.cupy.empty_like(b) 247 | libcudnn.convolutionBackwardBias( 248 | handle, one.data, gy_desc.value, gy.data.ptr, 249 | zero.data, self.bias_desc.value, gb.data.ptr) 250 | else: 251 | gW_mat = gW.reshape(out_c, c * kh * kw) 252 | col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w) 253 | gy_mats = gy.reshape(n, out_c, out_h * out_w) 254 | # TODO(beam2d): Use streams or batch gemm 255 | gW_mat[...] = 0 256 | for i in moves.range(n): 257 | gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T) 258 | 259 | W_mat = W.reshape(out_c, -1) 260 | Wb_mat = _kern()(W_mat) 261 | 262 | gcol = cuda.cupy.empty_like(self.col) 263 | gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w) 264 | 265 | for i in moves.range(n): 266 | gcol_mats[i] = cuda.cupy.dot(Wb_mat.T, gy_mats[i]) 267 | 268 | gx = conv.col2im_gpu( 269 | gcol, self.sy, self.sx, self.ph, self.pw, h, w) 270 | 271 | if b is not None: 272 | gb = gy.sum(axis=(0, 2, 3)) 273 | 274 | if b is None: 275 | return gx, gW 276 | else: 277 | return gx, gW, gb 278 | 279 | 280 | def func_convolution_2d(x, W, b=None, stride=1, pad=0, use_cudnn=True, 281 | cover_all=False): 282 | """Two-dimensional convolution function. 283 | 284 | This is an implementation of two-dimensional convolution in ConvNets. 285 | It takes three variables: the input image ``x``, the filter weight ``W``, 286 | and the bias vector ``b``. 287 | 288 | Notation: here is a notation for dimensionalities. 289 | 290 | - :math:`n` is the batch size. 291 | - :math:`c_I` and :math:`c_O` are the number of the input and output, 292 | respectively. 293 | - :math:`h` and :math:`w` are the height and width of the input image, 294 | respectively. 295 | - :math:`k_H` and :math:`k_W` are the height and width of the filters, 296 | respectively. 297 | 298 | Args: 299 | x (~chainer.Variable): Input variable of shape :math:`(n, c_I, h, w)`. 300 | W (~chainer.Variable): Weight variable of shape 301 | :math:`(c_O, c_I, k_H, k_W)`. 302 | b (~chainer.Variable): Bias variable of length :math:`c_O` (optional). 303 | stride (int or pair of ints): Stride of filter applications. 304 | ``stride=s`` and ``stride=(s, s)`` are equivalent. 305 | pad (int or pair of ints): Spatial padding width for input arrays. 306 | ``pad=p`` and ``pad=(p, p)`` are equivalent. 307 | use_cudnn (bool): If ``True``, then this function uses cuDNN if 308 | available. 309 | cover_all (bool): If True, all spatial locations are convoluted into 310 | some output pixels. It may make the output size larger. 311 | 312 | 313 | Returns: 314 | ~chainer.Variable: Output variable. 315 | 316 | The two-dimensional convolution function is defined as follows. 317 | Then the ``Convolution2D`` function computes correlations between filters 318 | and patches of size :math:`(k_H, k_W)` in ``x``. 319 | Note that correlation here is equivalent to the inner product between 320 | expanded vectors. 321 | Patches are extracted at positions shifted by multiples of ``stride`` from 322 | the first position ``-pad`` for each spatial axis. 323 | The right-most (or bottom-most) patches do not run over the padded spatial 324 | size. 325 | 326 | Let :math:`(s_Y, s_X)` be the stride of filter application, and 327 | :math:`(p_H, p_W)` the spatial padding size. Then, the output size 328 | :math:`(h_O, w_O)` is determined by the following equations: 329 | 330 | .. math:: 331 | 332 | h_O &= (h + 2p_H - k_H) / s_Y + 1,\\\\ 333 | w_O &= (w + 2p_W - k_W) / s_X + 1. 334 | 335 | If the bias vector is given, then it is added to all spatial locations of 336 | the output of convolution. 337 | 338 | .. seealso:: :class:`Convolution2D` 339 | 340 | """ 341 | func = BinaryConv2DFunction(stride, pad, use_cudnn, cover_all) 342 | if b is None: 343 | return func(x, W) 344 | else: 345 | return func(x, W, b) 346 | -------------------------------------------------------------------------------- /template_cpp_r7_bcnn.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * C++ Templete for a Binarized CNN 3 | * 4 | * Created on: 2017/07/01 5 | * Author: H. Nakahara 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #ifdef __SDSCC__ 16 | #include "sds_lib.h" 17 | #else 18 | #define sds_alloc(x)(malloc(x)) 19 | #define sds_free(x)(free(x)) 20 | #endif 21 | 22 | // custom bitwidth for streaming operation 23 | typedef ap_int<2> bit_2; 24 | typedef ap_int<4> bit_4; 25 | typedef ap_int<8> bit_8; 26 | typedef ap_int<16> bit_16; 27 | typedef ap_int<32> bit_32; 28 | typedef ap_int<64> bit_64; 29 | typedef ap_int<128> bit_128; 30 | typedef ap_int<256> bit_256; 31 | typedef ap_int<512> bit_512; 32 | 33 | // weight memory ----------------------------------------------------------- 34 | (DEF_WEIGHT_MEM) 35 | // bias memory ------------------------------------------------------------ 36 | (DEF_BIAS_MEM) 37 | // ------------------------------------------------------------------------- 38 | // Load weights and bias from the external memory (DDR3/4 Memory) 39 | // ------------------------------------------------------------------------- 40 | #ifdef __SDSCC__ 41 | #pragma SDS data access_pattern(t_bin_convW: SEQUENTIAL) 42 | #pragma SDS data access_pattern(t_BNFb: SEQUENTIAL) 43 | #pragma SDS data zero_copy(t_bin_convW[0:(WEIGHT_SIZ)]) 44 | #pragma SDS data zero_copy(t_BNFb[0:(BIAS_SIZ)]) 45 | #endif 46 | void setup( 47 | #ifdef __SDSCC__ 48 | int *t_bin_convW, 49 | int *t_BNFb 50 | #else 51 | int t_bin_convW[(WEIGHT_SIZ)], 52 | int t_BNFb[(BIAS_SIZ)] 53 | #endif 54 | ) 55 | { 56 | // set buffer memory ----------------------------------------------- 57 | int x, y, of, inf, offset; 58 | 59 | // ----------------------------------------------------------------- 60 | // setup memory 61 | // ----------------------------------------------------------------- 62 | (SET_WEIGHT_MEM) 63 | (SET_BIAS_MEM) 64 | } 65 | 66 | // ------------------------------------------------------------------------- 67 | // Binary Convolutional Layer 68 | // ------------------------------------------------------------------------- 69 | void bin_conv2d_pipeline( 70 | ap_int<(MAX_BCONV_WIDTH)> fmap[(IMGSIZ)][(IMGSIZ)], 71 | int layer, 72 | int size, 73 | int n_in, 74 | int n_out 75 | ) 76 | { 77 | (BCONV_REG_PRAGMA) 78 | 79 | int ofeat, infeat, w_flag; 80 | int i, k, ky, kx, ix, iy, ox, oy; 81 | int idx = 0; 82 | 83 | static ap_int<(MAX_BCONV_WIDTH)> shift_reg1[((IMGSIZ)+2)*3]; 84 | #pragma HLS ARRAY_PARTITION variable=shift_reg1 complete dim=1 85 | static ap_uint<1> padding_shift_reg[((IMGSIZ)+2)*3]; 86 | #pragma HLS ARRAY_PARTITION variable=padding_shift_reg complete dim=1 87 | 88 | int cnt = 0; 89 | 90 | ix = iy = ox = oy = w_flag = 0; 91 | 92 | CONV_IF: for( k = 0; k < (size+2) * (size+2); k++){ 93 | #pragma HLS loop_flatten off 94 | 95 | SHIFT_REG: for( i = 0; i < 2 * ((IMGSIZ)+2) + 3; i++){ 96 | #pragma HLS UNROLL 97 | shift_reg1[ i] = shift_reg1[ i + 1]; 98 | padding_shift_reg[ i] = padding_shift_reg[ i + 1]; 99 | } 100 | ap_int<(MAX_BCONV_WIDTH)> din; 101 | ap_uint<1> padding; 102 | if( (ix > 0 && ix <= size) && (iy > 0 && iy <= size)){ 103 | din = (ap_int<(MAX_BCONV_WIDTH)>)fmap[iy-1][ix-1]; 104 | padding = 0; 105 | } else { 106 | ap_int<(MAX_BCONV_WIDTH)> allone; 107 | allone = ~0; 108 | din = allone; 109 | padding = 1; 110 | } 111 | switch( layer){ 112 | (BCONV_REG_SELECT) 113 | } 114 | 115 | ix++; 116 | if( ix == size+2){ 117 | ix = 0; 118 | iy++; 119 | } 120 | 121 | if( k >= ((size+2)*2+3 - 1)){ 122 | w_flag++; 123 | if( w_flag > (size+2)){ 124 | w_flag = 1; 125 | cnt = 0; 126 | } 127 | } 128 | 129 | // convolutional operation ----------------------------------- 130 | ap_uint<(MAX_BCONV_WIDTH)> bit_tmp = 0x1; 131 | ap_uint<(MAX_BCONV_WIDTH)> streamOut = 0; 132 | 133 | OF: for( ofeat = 0; ofeat < n_out; ofeat++){ 134 | ap_int<16> tmp = 0; 135 | ap_int<16> tmp2; 136 | 137 | CONV_KY: for( ky = 0; ky < 3; ky++){ 138 | #pragma HLS pipeline 139 | CONV_KX: for( kx = 0; kx < 3; kx++){ 140 | ap_uint<(MAX_BCONV_WIDTH)> bx, bw; 141 | ap_uint<(MAX_BCONV_WIDTH)> bxor; 142 | ap_uint<(MAX_BCONV_WIDTH)> mask; 143 | ap_uint<(MAX_BCONV_WIDTH)> allzero = 0; 144 | ap_uint<1>is_padding; 145 | 146 | switch( layer){ 147 | (BCONV_WEIGHT_SELECT) 148 | } 149 | 150 | (BIN_XOR_MAC) 151 | 152 | tmp2 = 0; 153 | ONES_COUNT: for( i = 0; i < (MAX_BCONV_WIDTH); i++){ 154 | tmp2 += (((bxor >> i) & 0x1) == 1) ? 1 : 0; 155 | } 156 | if( is_padding == 0) 157 | tmp += (n_in - tmp2 * 2); 158 | } 159 | } 160 | 161 | if( w_flag > 0 && w_flag <= size){ 162 | #pragma HLS pipeline 163 | ap_int<16> bias; 164 | switch( layer){ 165 | (BCONV_BIAS_SELECT) 166 | } 167 | tmp += bias; 168 | 169 | if( tmp >= 0) streamOut = streamOut | bit_tmp; 170 | 171 | bit_tmp = bit_tmp << 1; 172 | 173 | cnt++; 174 | if( cnt == n_out){ 175 | cnt = 0; 176 | fmap[oy][ox] = (ap_int<(MAX_BCONV_WIDTH)>)streamOut; 177 | 178 | ox++; 179 | if( ox == size){ 180 | ox = 0; 181 | oy++; 182 | } 183 | 184 | idx++; 185 | } 186 | 187 | } 188 | } 189 | 190 | } 191 | } 192 | 193 | // ------------------------------------------------------------------------ 194 | template< typename BIN_TYPE, typename BOUT_TYPE, int N_IFEAT, int N_OFEAT, int IF_SIZ, int OF_SIZ> 195 | void int_conv2d_pipeline( 196 | BIN_TYPE infmap[IF_SIZ][IF_SIZ], 197 | BOUT_TYPE outfmap[OF_SIZ][OF_SIZ], 198 | ap_int<(NUMIMG)> W[N_OFEAT][3*3], 199 | ap_int<20> BNFb[N_OFEAT] 200 | ) 201 | { 202 | #pragma HLS ARRAY_PARTITION variable=W cyclic factor=9 dim=2 203 | 204 | int ofeat, infeat; 205 | int w_flag; 206 | int i, k, ky, kx; 207 | 208 | int idx = 0; 209 | 210 | static ap_int shift_reg1[(IF_SIZ+2)*3]; 211 | #pragma HLS ARRAY_PARTITION variable=shift_reg1 complete dim=1 212 | int cnt = 0; 213 | 214 | int debug_out = 0; 215 | w_flag = 0; 216 | 217 | int ix, iy, ox, oy; 218 | ix = iy = ox = oy = 0; 219 | 220 | CONV_IF: for( k = 0; k < (IF_SIZ+2) * (IF_SIZ+2); k++){ 221 | #pragma HLS loop_flatten off 222 | 223 | // pipeline register ------------------------------------------ 224 | SHIFT_REG: for( i = 0; i < 2 * (IF_SIZ+2) + 3; i++){ 225 | #pragma HLS UNROLL 226 | shift_reg1[ i] = shift_reg1[ i + 1]; 227 | } 228 | ap_int din; 229 | if( (ix > 0 && ix <= IF_SIZ) && (iy > 0 && iy <= IF_SIZ)){ 230 | din = infmap[iy-1][ix-1]; 231 | } else { 232 | ap_int allzero; 233 | allzero = 0; 234 | din = allzero; 235 | } 236 | shift_reg1[ 2 * (IF_SIZ+2) + 3 - 1] = din; 237 | 238 | ix++; 239 | if( ix == IF_SIZ+2){ 240 | ix = 0; 241 | iy++; 242 | } 243 | 244 | 245 | // enable MAC operation 246 | if( k >= ((IF_SIZ+2)*2+3 - 1)){ 247 | w_flag++; 248 | if( w_flag > (IF_SIZ+2)){ 249 | w_flag = 1; 250 | cnt = 0; 251 | } 252 | } 253 | 254 | // convolutional operation ----------------------------------- 255 | ap_uintbit_tmp = 0x1; 256 | ap_uint streamOut = 0; 257 | 258 | OF: for( ofeat = 0; ofeat < N_OFEAT; ofeat++){ 259 | int tmp = 0; 260 | ap_int<20> tmp2; 261 | 262 | CONV_KY: for( ky = 0; ky < 3; ky++){ 263 | #pragma HLS pipeline 264 | CONV_KX: for( kx = 0; kx < 3; kx++){ 265 | ap_int<64> bx; 266 | ap_int<3> bw; 267 | 268 | bx = shift_reg1[ky * (IF_SIZ+2) + kx]; 269 | bw = W[ofeat][ky*3+kx]; 270 | 271 | MAC_RGB: for( i = 0; i < 3; i++){ 272 | tmp2 = ap_int<20>(bx & 0xFFFFF); 273 | tmp = ((bw & 0x1) == 0) ? (tmp - (int)tmp2) : (tmp + (int)tmp2); 274 | bw = bw >> 1; 275 | bx = bx >> 20; 276 | } 277 | } 278 | } 279 | 280 | // output to Streaming Buffer 281 | if( w_flag > 0 && w_flag <= IF_SIZ){ 282 | #pragma HLS pipeline 283 | 284 | tmp += BNFb[ofeat]; 285 | 286 | if( tmp >= 0) streamOut = streamOut | bit_tmp; 287 | 288 | bit_tmp = bit_tmp << 1; 289 | 290 | cnt++; 291 | if( cnt == N_OFEAT){ 292 | cnt = 0; 293 | 294 | outfmap[oy][ox] = streamOut; 295 | 296 | ox++; 297 | if( ox == OF_SIZ){ 298 | ox = 0; 299 | oy++; 300 | } 301 | 302 | idx++; 303 | } 304 | 305 | } 306 | } 307 | 308 | } 309 | } 310 | 311 | template< typename BIN_TYPE, typename BOUT_TYPE, int NUM_IFEAT, int NUM_OFEAT, 312 | int INFEAT_SIZ, int OFEAT_SIZ> 313 | void int_conv2d_layer( 314 | BIN_TYPE infmap[INFEAT_SIZ][INFEAT_SIZ], 315 | BOUT_TYPE outfmap[OFEAT_SIZ][OFEAT_SIZ], 316 | ap_int<(NUMIMG)> W[NUM_OFEAT][3*3], 317 | ap_int<20> BNFb[NUM_OFEAT] 318 | ) 319 | { 320 | int_conv2d_pipeline< BIN_TYPE, BOUT_TYPE, NUM_IFEAT, NUM_OFEAT, 321 | INFEAT_SIZ, OFEAT_SIZ>( infmap, outfmap, W, BNFb); 322 | } 323 | 324 | // ------------------------------------------------------------------------- 325 | // Maximum Pooling Layer 326 | // ------------------------------------------------------------------------- 327 | template< typename TYPE_BIT, int FEAT_SIZ, int POOL_SIZ> 328 | void max_pooling_layer( TYPE_BIT ftmp[FEAT_SIZ][FEAT_SIZ]) 329 | { 330 | int inf_x, inf_y, oy, ox; 331 | 332 | TYPE_BIT tmp0, tmp1, tmp2, tmp3, m; 333 | 334 | oy = 0; 335 | PY: for( inf_y = 0; inf_y < FEAT_SIZ; inf_y += 2){ 336 | ox = 0; 337 | PX: for( inf_x = 0; inf_x < FEAT_SIZ; inf_x += 2){ 338 | tmp0 = ftmp[inf_y][inf_x]; 339 | tmp1 = ftmp[inf_y][inf_x+1]; 340 | tmp2 = ftmp[inf_y+1][inf_x]; 341 | tmp3 = ftmp[inf_y+1][inf_x+1]; 342 | 343 | m = tmp0 | tmp1 | tmp2 | tmp3; 344 | ftmp[oy][ox] = m; 345 | ox++; 346 | } 347 | oy++; 348 | } 349 | } 350 | 351 | // ------------------------------------------------------------------------- 352 | // FC Layer 353 | // ------------------------------------------------------------------------- 354 | template < int NUM_OFEAT, int NUM_INFEAT> 355 | void fc_layer( 356 | ap_int<1> fc_tmp[NUM_INFEAT], 357 | ap_int<1> lW[NUM_OFEAT][NUM_INFEAT], 358 | ap_int<16> b_BNFb[NUM_OFEAT], 359 | int fc_result[(MAX_DENSE_SIZ)] 360 | ) 361 | { 362 | int ofeat, tmp, infeat; 363 | 364 | FC_O: for( ofeat = 0; ofeat < NUM_OFEAT; ofeat++){ 365 | #pragma HLS LOOP_FLATTEN off 366 | tmp = 0; 367 | 368 | FC_I: for( infeat = 0; infeat < NUM_INFEAT; infeat++){ 369 | #pragma HLS pipeline 370 | ap_int<1> bw, bx, xnor; 371 | 372 | bw = lW[ofeat][infeat]; 373 | bx = fc_tmp[infeat]; 374 | xnor = ~(bw ^ bx); 375 | 376 | tmp += (xnor == 0) ? -1 : +1; 377 | } 378 | 379 | fc_result[ofeat] = tmp + b_BNFb[ofeat]; 380 | } 381 | } 382 | 383 | // ------------------------------------------------------------------------- 384 | // Binarized CNN Kernel 385 | // ------------------------------------------------------------------------- 386 | #ifdef __SDSCC__ 387 | #pragma SDS data access_pattern(t_in_img: SEQUENTIAL) 388 | #pragma SDS data zero_copy(t_in_img[0:(IMGSIZ)*(IMGSIZ)]) 389 | #endif 390 | void kernel( 391 | #ifdef __SDSCC__ 392 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 393 | int fc_result[10] 394 | #else 395 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 396 | int fc_result[10] 397 | #endif 398 | ) 399 | { 400 | ap_int<(MAX_BCONV_WIDTH)> fb_tmp[(IMGSIZ)][(IMGSIZ)]; 401 | ap_int<1> fc_tmp[(MAX_DENSE_SIZ)]; 402 | ap_int<64> in_img[(IMGSIZ)][(IMGSIZ)]; 403 | 404 | int y, x, of, layer, bin_layer_idx; 405 | (DEF_CNN_PARAMETER) 406 | 407 | for( y = 0; y < (IMGSIZ); y++){ 408 | for( x = 0; x < (IMGSIZ); x++){ 409 | in_img[y][x] = t_in_img[y*(IMGSIZ)+x]; 410 | } 411 | } 412 | 413 | #pragma HLS INLINE 414 | 415 | bin_layer_idx = 1; 416 | BCONV: for( layer = 0; layer < (NUM_LAYER); layer++){ 417 | switch(layer){ 418 | (DEF_CNN_LAYER) 419 | } 420 | } 421 | } 422 | 423 | //-------------------------------------------------------------------- 424 | // Top Function for a Binarized CNN 425 | //-------------------------------------------------------------------- 426 | #ifdef __SDSCC__ 427 | #pragma SDS data access_pattern(t_bin_convW: SEQUENTIAL) 428 | #pragma SDS data access_pattern(t_BNFb: SEQUENTIAL) 429 | #pragma SDS data access_pattern(t_in_img: SEQUENTIAL) 430 | #pragma SDS data zero_copy(t_bin_convW[0:(WEIGHT_SIZ)]) 431 | #pragma SDS data zero_copy(t_BNFb[0:(BIAS_SIZ)]) 432 | #pragma SDS data zero_copy(t_in_img[0:(IMGSIZ)*(IMGSIZ)]) 433 | #endif 434 | void BinCNN( 435 | #ifdef __SDSCC__ 436 | int *t_bin_convW, 437 | int *t_BNFb, 438 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 439 | int fc_result[(OUT_DENSE_SIZ)], 440 | int init 441 | #else 442 | int t_bin_convW[(WEIGHT_SIZ)], 443 | int t_BNFb[(BIAS_SIZ)], 444 | ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)], 445 | int fc_result[(OUT_DENSE_SIZ)], 446 | int init 447 | #endif 448 | ) 449 | { 450 | /* 451 | #pragma HLS INTERFACE s_axilite register port=t_bin_convW bundle=slv0 452 | #pragma HLS INTERFACE s_axilite register port=t_BNFb bundle=slv0 453 | #pragma HLS INTERFACE s_axilite register port=t_in_img bundle=slv0 454 | #pragma HLS INTERFACE s_axilite register port=fc_result bundle=slv0 455 | #pragma HLS INTERFACE s_axilite register port=init bundle=slv0 456 | #pragma HLS INTERFACE s_axilite register port=return bundle=slv0 457 | */ 458 | if( init == 1) 459 | setup( t_bin_convW, t_BNFb); 460 | else 461 | kernel( t_in_img, fc_result); 462 | } 463 | 464 | // ------------------------------------------------------------------ 465 | // END OF PROGRAM 466 | // ------------------------------------------------------------------ 467 | -------------------------------------------------------------------------------- /function_batch_normalization.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from chainer import cuda 4 | from chainer import function 5 | from chainer.utils import type_check 6 | 7 | if cuda.cudnn_enabled: 8 | cudnn = cuda.cudnn 9 | libcudnn = cudnn.cudnn 10 | _cudnn_version = libcudnn.getVersion() 11 | 12 | 13 | def _as4darray(arr): 14 | if arr.ndim == 0: 15 | return arr.reshape(1, 1, 1, 1) 16 | elif arr.ndim == 4: 17 | return arr 18 | else: 19 | return arr.reshape(arr.shape[0], -1, 1, 1) 20 | 21 | 22 | def _xhat(x, mean, std, expander): 23 | x_mu = x - mean[expander] 24 | x_mu /= std[expander] 25 | return x_mu 26 | 27 | 28 | class BatchNormalizationFunction(function.Function): 29 | 30 | def __init__(self, eps=2e-5, mean=None, var=None, train=False, 31 | decay=0.9, use_cudnn=True): 32 | self.running_mean = mean 33 | self.running_var = var 34 | 35 | self.train = train 36 | self.eps = eps 37 | if cuda.cudnn_enabled and use_cudnn: 38 | if eps <= 1e-5: 39 | msg = 'cuDNN does not allow an eps value less than 1e-5.' 40 | raise RuntimeError(msg) 41 | self.use_cudnn = use_cudnn 42 | self.mean_cache = None 43 | self.decay = decay 44 | 45 | def check_type_forward(self, in_types): 46 | n_in = in_types.size().eval() 47 | if n_in != 3 and n_in != 5: 48 | raise type_check.InvalidType( 49 | '%s or %s' % (in_types.size() == 3, in_types.size() == 5), 50 | '%s == %s' % (in_types.size(), n_in)) 51 | x_type, gamma_type, beta_type = in_types[:3] 52 | M = gamma_type.ndim.eval() 53 | type_check.expect( 54 | x_type.dtype.kind == 'f', 55 | x_type.ndim >= gamma_type.ndim + 1, 56 | x_type.shape[1:1 + M] == gamma_type.shape, 57 | # TODO(beam2d): Check shape 58 | gamma_type.dtype == x_type.dtype, 59 | beta_type.dtype == x_type.dtype, 60 | gamma_type.shape == beta_type.shape, 61 | ) 62 | if len(in_types) == 5: 63 | mean_type, var_type = in_types[3:] 64 | type_check.expect( 65 | mean_type.dtype == x_type.dtype, 66 | mean_type.shape == gamma_type.shape, 67 | var_type.dtype == x_type.dtype, 68 | var_type.shape == gamma_type.shape, 69 | ) 70 | 71 | def forward(self, inputs): 72 | xp = cuda.get_array_module(*inputs) 73 | x, gamma, beta = inputs[:3] 74 | if self.train: 75 | if self.running_mean is None: 76 | self.running_mean = xp.zeros_like(gamma) 77 | self.running_var = xp.zeros_like(gamma) 78 | else: 79 | self.running_mean = xp.array(self.running_mean) 80 | self.running_var = xp.array(self.running_var) 81 | elif len(inputs) == 5: 82 | self.fixed_mean = inputs[3] 83 | self.fixed_var = inputs[4] 84 | 85 | # TODO(bkvogel): Check for float16 support again in next cuDNN version. 86 | if x[0].dtype == numpy.float16: 87 | # cuDNN v5 batch normalization does not seem to support float16. 88 | self.use_cudnn = False 89 | 90 | head_ndim = gamma.ndim + 1 91 | expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim) 92 | gamma = gamma[expander] 93 | beta = beta[expander] 94 | 95 | # cuDNN only supports these tensor dimensions because they are 96 | # the most commonly used. If there is a need to support other 97 | # dimensions with cuDNN, we could consider reshaping the input 98 | # into a 2-dim array with channels as second dim and m= as the first 100 | # dimension. 101 | self.cudnn_dim_ok = x.ndim == 2 or x.ndim == 4 102 | 103 | cudnn_updated_running_stats = False 104 | if xp is not numpy and cuda.cudnn_enabled and self.use_cudnn and \ 105 | self.cudnn_dim_ok and _cudnn_version >= 5000: 106 | if x.ndim == 4: 107 | # for convolutional layer 108 | self.mode = libcudnn.CUDNN_BATCHNORM_SPATIAL 109 | else: 110 | # for linear layer 111 | self.mode = libcudnn.CUDNN_BATCHNORM_PER_ACTIVATION 112 | 113 | x = cuda.cupy.ascontiguousarray(x) 114 | gamma = cuda.cupy.ascontiguousarray(gamma) 115 | beta = cuda.cupy.ascontiguousarray(beta) 116 | dtype = x.dtype 117 | handle = cudnn.get_handle() 118 | x_desc = cudnn.create_tensor_descriptor(_as4darray(x)) 119 | derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor() 120 | libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value, 121 | x_desc.value, self.mode) 122 | one = numpy.array(1, dtype=dtype).ctypes 123 | zero = numpy.array(0, dtype=dtype).ctypes 124 | y = cuda.cupy.empty_like(x) 125 | # Factor used in the moving average 126 | factor = 1 - self.decay 127 | 128 | if self.train: 129 | if self.mean_cache is None: 130 | # Output cache to speed up bacward pass. 131 | self.mean_cache = xp.empty_like(gamma) 132 | # Output cache to speed up bacward pass. 133 | self.var_cache = xp.empty_like(gamma) 134 | # Note: cuDNN computes the mini-batch mean and variance 135 | # internally. We can simply (optionally) pass 136 | # it the running-average mean and variance arrays. 137 | libcudnn.batchNormalizationForwardTraining( 138 | handle, self.mode, one.data, zero.data, 139 | x_desc.value, x.data.ptr, x_desc.value, 140 | y.data.ptr, derivedBnDesc.value, gamma.data.ptr, 141 | beta.data.ptr, factor, self.running_mean.data.ptr, 142 | self.running_var.data.ptr, self.eps, 143 | self.mean_cache.data.ptr, self.var_cache.data.ptr) 144 | cudnn_updated_running_stats = True 145 | else: 146 | libcudnn.batchNormalizationForwardInference( 147 | handle, self.mode, one.data, zero.data, 148 | x_desc.value, x.data.ptr, x_desc.value, y.data.ptr, 149 | derivedBnDesc.value, gamma.data.ptr, beta.data.ptr, 150 | self.fixed_mean.data.ptr, self.fixed_var.data.ptr, 151 | self.eps) 152 | else: 153 | if self.train: 154 | axis = (0,) + tuple(range(head_ndim, x.ndim)) 155 | mean = x.mean(axis=axis) 156 | var = x.var(axis=axis) 157 | var += self.eps 158 | else: 159 | mean = self.fixed_mean 160 | var = self.fixed_var 161 | 162 | 163 | 164 | self.std = xp.sqrt(var, dtype=var.dtype) 165 | if xp is numpy: 166 | self.x_hat = _xhat(x, mean, self.std, expander) 167 | y = gamma * self.x_hat 168 | y += beta 169 | 170 | else: 171 | self.x_hat, y = cuda.elementwise( 172 | 'T x, T mean, T std, T gamma, T beta', 'T x_hat, T y', 173 | ''' 174 | x_hat = (x - mean) / std; 175 | y = gamma * x_hat + beta; 176 | ''', 177 | 'bn_fwd')(x, mean[expander], self.std[expander], gamma, 178 | beta) 179 | 180 | if self.train and (not cudnn_updated_running_stats): 181 | # Note: If in training mode, the cuDNN forward training function 182 | # will do this for us, so 183 | # only run following code if cuDNN was not used. 184 | # Update running statistics: 185 | m = x.size // gamma.size 186 | adjust = m / max(m - 1., 1.) # unbiased estimation 187 | self.running_mean *= self.decay 188 | temp_ar = xp.array(mean) 189 | temp_ar *= (1 - self.decay) 190 | self.running_mean += temp_ar 191 | del temp_ar 192 | self.running_var *= self.decay 193 | temp_ar = xp.array(var) 194 | temp_ar *= (1 - self.decay) * adjust 195 | self.running_var += temp_ar 196 | del temp_ar 197 | return y, 198 | 199 | def backward(self, inputs, grad_outputs): 200 | x, gamma = inputs[:2] 201 | gy = grad_outputs[0] 202 | head_ndim = gamma.ndim + 1 203 | expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim) 204 | m = gamma.dtype.type(x.size // gamma.size) 205 | axis = (0,) + tuple(range(head_ndim, x.ndim)) 206 | xp = cuda.get_array_module(x) 207 | if len(inputs) == 5: 208 | # This case is unlikely to be used in practice and so does not 209 | # need to be optimized for performance. 210 | mean = inputs[3] 211 | var = inputs[4] 212 | std = xp.sqrt(var, dtype=var.dtype) 213 | gs = gamma / std 214 | gbeta = gy.sum(axis=axis) 215 | x_hat = _xhat(x, mean, std, expander) 216 | ggamma = (gy * x_hat).sum(axis=axis) 217 | gmean = -gs * gbeta 218 | gvar = -0.5 * gamma / var * ggamma 219 | gx = gs[expander] * gy 220 | return gx, ggamma, gbeta, gmean, gvar 221 | 222 | # Note: If length of inputs is not 5, we must be in train mode. 223 | assert self.train 224 | if xp is not numpy and cuda.cudnn_enabled and self.use_cudnn and \ 225 | self.cudnn_dim_ok and _cudnn_version >= 5000: 226 | # Note: cuDNN batch normalization backward only works in 227 | # "training mode." That is, it does not support 228 | # computing gradients in fixed-mean-variance mode, because there 229 | # is normally no reason to call backward() 230 | # while in test/evaluation mode. 231 | dtype = x.dtype 232 | handle = cudnn.get_handle() 233 | x_desc = cudnn.create_tensor_descriptor(_as4darray(x)) 234 | derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor() 235 | libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value, 236 | x_desc.value, self.mode) 237 | one = numpy.array(1, dtype=dtype).ctypes 238 | zero = numpy.array(0, dtype=dtype).ctypes 239 | gx = cuda.cupy.empty_like(x) 240 | ggamma = cuda.cupy.empty_like(gamma) 241 | gbeta = cuda.cupy.empty_like(gamma) 242 | libcudnn.batchNormalizationBackward( 243 | handle, self.mode, one.data, zero.data, 244 | one.data, zero.data, x_desc.value, x.data.ptr, 245 | x_desc.value, gy.data.ptr, x_desc.value, gx.data.ptr, 246 | derivedBnDesc.value, gamma.data.ptr, 247 | ggamma.data.ptr, gbeta.data.ptr, 248 | self.eps, self.mean_cache.data.ptr, self.var_cache.data.ptr) 249 | else: 250 | gbeta = gy.sum(axis=axis) 251 | ggamma = (gy * self.x_hat).sum(axis=axis) 252 | if xp is numpy: 253 | gx = (gamma / self.std)[expander] * ( 254 | gy - (self.x_hat * ggamma[expander] + gbeta[expander]) / m) 255 | else: 256 | inv_m = numpy.float32(1) / m 257 | gx = cuda.elementwise( 258 | 'T gy, T x_hat, T gamma, T std, T ggamma, T gbeta, \ 259 | T inv_m', 260 | 'T gx', 261 | 'gx = (gamma / std) * (gy - (x_hat * ggamma + gbeta) * \ 262 | inv_m)', 263 | 'bn_bwd')(gy, self.x_hat, gamma[expander], 264 | self.std[expander], ggamma[expander], 265 | gbeta[expander], inv_m) 266 | return gx, ggamma, gbeta 267 | 268 | 269 | def batch_normalization(x, gamma, beta, eps=2e-5, running_mean=None, 270 | running_var=None, decay=0.9, use_cudnn=True): 271 | """Batch normalization function. 272 | 273 | It takes the input variable ``x`` and two parameter variables ``gamma`` and 274 | ``beta``. The input must have the batch size and the features (or channels) 275 | as the first two dimensions of its shape. The input can have more than two 276 | dimensions, where the remaining dimensions are considered as spatial 277 | dimensions, which are considered as a part of the batch size. That is, 278 | the total batch size will be considered to be the product of all 279 | dimensions except the second dimension. 280 | 281 | Note: If this function is called, it will not be possible to access the 282 | updated running mean and variance statistics, because they are members 283 | of the function object, which cannot be accessed by the caller. 284 | If it is desired to access the updated running statistics, it is necessary 285 | to get a new instance of the function object, call the object, and then 286 | access the running_mean and/or running_var attributes. See the 287 | corresponding Link class for an example of how to do this. 288 | 289 | Args: 290 | x (Variable): The input variable. 291 | gamma (Variable): The scaling parameter of normalized data. 292 | beta (Variable): The shifting parameter of scaled normalized data. 293 | eps (float): Epsilon value for numerical stability. 294 | running_mean (array): The running average of the mean. This is a 295 | running average of the mean over several mini-batches using 296 | the decay parameter. If ``None``, the running average is not 297 | computed. If this is ``None``, then ``runnng_var`` must also 298 | be ``None``. 299 | running_var (array): The running average of the variance. This is a 300 | running average of the variance over several mini-batches using 301 | the decay parameter. If ``None``, the running average is not 302 | computed. If this is ``None``, then ``running_mean`` must also 303 | be ``None``. 304 | decay (float): Decay rate of moving average. It is used during 305 | training. 306 | use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function 307 | uses cuDNN as the core implementation. 308 | 309 | 310 | See: `Batch Normalization: Accelerating Deep Network Training by Reducing\ 311 | Internal Covariate Shift `_ 312 | 313 | .. seealso:: :class:`links.BatchNormalization` 314 | 315 | """ 316 | 317 | return BatchNormalizationFunction(eps, running_mean, running_var, True, 318 | decay, use_cudnn)(x, gamma, beta) 319 | 320 | 321 | def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5, 322 | use_cudnn=True): 323 | """Batch normalization function with fixed statistics. 324 | 325 | This is a variant of batch normalization, where the mean and variance 326 | statistics are given by the caller as fixed variables. This is 327 | used on testing mode of the batch normalization layer, where batch 328 | statistics cannot be used for prediction consistency. 329 | 330 | Args: 331 | x (Variable): The input variable. 332 | gamma (Variable): The scaling parameter of normalized data. 333 | beta (Variable): The shifting parameter of scaled normalized data. 334 | mean (Variable): The shifting parameter of input. 335 | var (Variable): The square of scaling parameter of input. 336 | eps (float): Epsilon value for numerical stability. 337 | use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function 338 | uses cuDNN as the core implementation. 339 | 340 | .. seealso:: 341 | :func:`functions.batch_normalization`, 342 | :class:`links.BatchNormalization` 343 | 344 | """ 345 | return BatchNormalizationFunction(eps, None, None, False, 0.0, 346 | use_cudnn)(x, gamma, beta, mean, var) 347 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /gen_cpp_code_v3.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # gen_cpp_code_v3.py 3 | # C++ code generator for a high-level synthesis toward an FPGA realization 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # ----------------------------------------------------------------------- 11 | 12 | #!/usr/bin/python 13 | # coding: UTF-8 14 | 15 | import argparse 16 | import re 17 | import pickle 18 | 19 | parser = argparse.ArgumentParser(description='C++ code generator') 20 | parser.add_argument('--config_path', '-c', type=str, default='./hoge', 21 | help='Configuration pickle file path') 22 | args = parser.parse_args() 23 | 24 | # load configuration from guiness GUI 25 | config_file = args.config_path + "/config.pickle" 26 | with open(config_file, mode='rb') as f: 27 | config = pickle.load(f) 28 | 29 | initial_options = config['initial_options'] 30 | n_in_fmaps = config['n_in_fmaps'] 31 | n_ou_fmaps = config['n_ou_fmaps'] 32 | infmap_siz = config['infmap_siz'] 33 | ksiz = config['ksiz'] 34 | imgsiz = config['imgsiz'] 35 | max_dense_siz = config['max_dense_siz'] 36 | out_dense_siz = config['out_dense_siz'] 37 | bias_siz = config['bias_siz'] 38 | weight_siz = config['weight_siz'] 39 | max_bconv_width = config['max_bconv_width'] 40 | num_layer = config['num_layer'] 41 | numimg = int(n_in_fmaps[0]) 42 | 43 | #(SET_WEIGHT_MEM) 44 | set_weight_mem = '' 45 | set_bias_mem = '' 46 | bconv_reg_pragma = '' 47 | bconv_reg_select = '' 48 | bconv_weight_select = '' 49 | bconv_bias_select = '' 50 | 51 | conv_idx = 0 52 | bn_idx = 0 53 | dense_idx = 0 54 | offset_weight = 0 55 | offset_bias = 0 56 | 57 | #(DEF_CNN_LAYER) 58 | from collections import Counter 59 | def_cnn_layer = '' 60 | 61 | bn_idx = 0 62 | dense_idx = 0 63 | counter = Counter(initial_options) 64 | for layer_type, cnt in counter.items(): 65 | if layer_type == 0 and cnt > 0: 66 | for i in range(len(initial_options)): 67 | if initial_options[i] == 0: 68 | def_cnn_layer += ' case %d:\n' % i 69 | def_cnn_layer += ' int_conv2d_layer\n ( in_img, fb_tmp, conv0W, b0_BNFb);\n break;\n' % (max_bconv_width,int(n_ou_fmaps[0]),int(infmap_siz[0]),int(infmap_siz[0])) 70 | 71 | elif layer_type == 1 and cnt > 0: 72 | for i in range(len(initial_options)): 73 | if initial_options[i] == 1: 74 | def_cnn_layer += ' case %d:\n' % i 75 | def_cnn_layer += ' bin_conv2d_pipeline(fb_tmp,bin_layer_idx,fsize[layer],n_in[layer],n_out[layer]);\n bin_layer_idx++;\n break;\n' 76 | 77 | elif layer_type == 2 and cnt > 0: 78 | for i in range(len(initial_options)): 79 | if initial_options[i] == 2: 80 | def_cnn_layer += ' case %d:\n' % i 81 | def_cnn_layer += ' max_pooling_layer(fb_tmp);\n break;\n' % (max_bconv_width,int(imgsiz),int(infmap_siz[i])) 82 | 83 | elif layer_type == 3 and cnt > 0: 84 | for i in range(len(initial_options)): 85 | if initial_options[i] == 3: 86 | def_cnn_layer += ' case %d:\n' % i 87 | def_cnn_layer += ' {\n' 88 | def_cnn_layer += ' ap_int<%d>mask = 0x1;\n' % int(n_in_fmaps[i]) 89 | def_cnn_layer += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 90 | def_cnn_layer += ' ap_int<11> tmp = 0;\n' 91 | def_cnn_layer += ' for( y = 0; y < %d; y++){\n' % int(infmap_siz[i]) 92 | def_cnn_layer += ' for( x = 0; x < %d; x++){\n' % int(infmap_siz[i]) 93 | def_cnn_layer += ' if( (fb_tmp[y][x] & mask) != 0)\n' 94 | def_cnn_layer += ' tmp++;\n' 95 | def_cnn_layer += ' }\n' 96 | def_cnn_layer += ' }\n' 97 | def_cnn_layer += ' if( tmp >= %d*%d/2)\n' % (int(infmap_siz[i]),int(infmap_siz[i])) 98 | def_cnn_layer += ' fc_tmp[of] = 1;\n' 99 | def_cnn_layer += ' else\n' 100 | def_cnn_layer += ' fc_tmp[of] = 0;\n' 101 | def_cnn_layer += ' mask = mask << 1;\n' 102 | def_cnn_layer += ' }\n }\n break;\n' 103 | 104 | elif layer_type == 4 and cnt > 0: 105 | for i in range(len(initial_options)): 106 | if initial_options[i] == 4: 107 | def_cnn_layer += ' case %d:\n' % i 108 | def_cnn_layer += ' fc_layer< %d, %d>( fc_tmp, fc%dW, b%d_BNFb, fc_result);\n break;\n' % (int(n_ou_fmaps[i]),int(n_in_fmaps[i]),dense_idx,bn_idx) 109 | bn_idx += 1 110 | dense_idx += 1 111 | elif initial_options[i] == 0 or initial_options[i] == 1: 112 | bn_idx += 1 113 | 114 | def_cnn_layer += ' default: break;\n' 115 | 116 | #(DEF_CNN_PARAMETER) 117 | def_cnn_parameter = ' int fsize[%d] = {' % (len(initial_options)) 118 | for i in range(len(initial_options)): 119 | if i != 0: 120 | def_cnn_parameter += ',' 121 | def_cnn_parameter += '%3d' % int(infmap_siz[i]) 122 | def_cnn_parameter += '};\n' 123 | def_cnn_parameter += ' int n_in[%d] = {' % (len(initial_options)) 124 | for i in range(len(initial_options)): 125 | if i != 0: 126 | def_cnn_parameter += ',' 127 | def_cnn_parameter += '%3d' % int(n_in_fmaps[i]) 128 | def_cnn_parameter += '};\n' 129 | def_cnn_parameter += ' int n_out[%d] = {' % (len(initial_options)) 130 | for i in range(len(initial_options)): 131 | if i != 0: 132 | def_cnn_parameter += ',' 133 | def_cnn_parameter += '%3d' % int(n_ou_fmaps[i]) 134 | def_cnn_parameter += '};\n' 135 | 136 | #(BCONV_REG_SELECT) 137 | #(BCONV_WEIGHT_SELECT) 138 | #(BCONV_BIAS_SELECT) 139 | conv_idx = 0 140 | for i in range(len(initial_options)): 141 | if initial_options[i] == 0: 142 | conv_idx += 1 143 | if initial_options[i] == 1: 144 | bconv_reg_select += ' case %d:\n' % (conv_idx) 145 | bconv_reg_select += ' shift_reg1[ 2 * (%d+2) + 3 - 1] = din;\n' % (int(infmap_siz[i])) 146 | bconv_reg_select += ' padding_shift_reg[ 2 * (%d+2) + 3 - 1] = padding; break;\n' % (int(infmap_siz[i])) 147 | bconv_reg_select += ' break;\n' 148 | 149 | bconv_weight_select += ' case %d:\n' % conv_idx 150 | bconv_weight_select += ' bx = shift_reg1[ky * (%d+2) + kx];\n' % int(infmap_siz[i]) 151 | bconv_weight_select += ' bw = (ap_uint<%d>)conv%dW[ofeat][ky*3+kx];\n' % (max_bconv_width,conv_idx) 152 | bconv_weight_select += ' mask = ~(~allzero << %d);\n' % int(n_in_fmaps[i]) 153 | bconv_weight_select += ' is_padding = padding_shift_reg[ky * (%d+2) + kx];\n' % int(infmap_siz[i]) 154 | bconv_weight_select += ' break;\n' 155 | 156 | bconv_bias_select += ' case %d: bias = b%d_BNFb[ofeat]; break;\n' % (conv_idx,conv_idx) 157 | 158 | conv_idx += 1 159 | bconv_reg_select += ' default: break;\n' 160 | bconv_weight_select += ' default: break;\n' 161 | bconv_bias_select += ' default: break;\n' 162 | 163 | #(BCONV_REG_PRAGMA) 164 | conv_idx = 0 165 | for i in range(len(initial_options)): 166 | if initial_options[i] == 0: 167 | conv_idx += 1 168 | if initial_options[i] == 1: 169 | bconv_reg_pragma += ' #pragma HLS ARRAY_PARTITION variable=conv%dW cyclic factor=9 dim=2\n' % conv_idx 170 | conv_idx += 1 171 | 172 | conv_idx = 0 173 | bn_idx = 0 174 | dense_idx = 0 175 | 176 | #(READ_WEIGHT_MEM) 177 | 178 | 179 | #(READ_BIAS_MEM) 180 | read_bias_mem = '' 181 | read_weight_mem = '' 182 | 183 | def_weight_mem = '' 184 | def_bias_mem = '' 185 | 186 | for i in range(len(initial_options)): 187 | if initial_options[i] == 0 or initial_options[i] == 1: 188 | set_weight_mem += ' printf("load conv%dW\\n");\n' % conv_idx 189 | set_weight_mem += ' offset = %d;\n' % offset_weight 190 | set_weight_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 191 | set_weight_mem += ' for( y = 0; y < 3; y++){\n' 192 | set_weight_mem += ' for( x = 0; x < 3; x++){\n' 193 | set_weight_mem += ' ap_uint<%d>tmp = 0x1;\n' % int(n_in_fmaps[i]) 194 | set_weight_mem += ' for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i]) 195 | set_weight_mem += ' if( t_bin_convW[of*%d*3*3+inf*3*3+y*3+x+offset] == 1){\n' % int(n_in_fmaps[i]) 196 | set_weight_mem += ' conv%dW[of][y*3+x] |= tmp;\n' % conv_idx 197 | set_weight_mem += ' }\n' 198 | set_weight_mem += ' tmp = tmp << 1;\n' 199 | set_weight_mem += ' }\n' 200 | set_weight_mem += ' }\n' 201 | set_weight_mem += ' }\n' 202 | set_weight_mem += ' }\n' 203 | 204 | set_bias_mem += ' printf("load b%d_BNFb\\n");\n' % bn_idx 205 | set_bias_mem += ' offset = %d;\n' % offset_bias 206 | set_bias_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 207 | set_bias_mem += ' b%d_BNFb[of] = t_BNFb[of+offset];\n' % bn_idx 208 | set_bias_mem += ' }\n' 209 | 210 | read_weight_mem += ' printf("conv%dW.txt\\n");\n' % conv_idx 211 | read_weight_mem += ' if( (fp = fopen("conv%dW.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % conv_idx 212 | read_weight_mem += ' offset = %d;\n' % offset_weight 213 | read_weight_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 214 | read_weight_mem += ' for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i]) 215 | read_weight_mem += ' for( y = 0; y < 3; y++){\n' 216 | read_weight_mem += ' for( x = 0; x < 3; x++){\n' 217 | read_weight_mem += ' if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n"); sscanf( line, "%d", &d_value);\n' 218 | read_weight_mem += ' t_bin_convW[of*%d*3*3+inf*3*3+y*3+x+offset] = d_value;\n' % int(n_in_fmaps[i]) 219 | read_weight_mem += ' }\n' 220 | read_weight_mem += ' }\n' 221 | read_weight_mem += ' }\n' 222 | read_weight_mem += ' }\n' 223 | read_weight_mem += ' fclose(fp);\n' 224 | 225 | read_bias_mem += ' printf("b%d_BNFb.txt\\n");\n' % bn_idx 226 | read_bias_mem += ' if( (fp = fopen("b%d_BNFb.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % bn_idx 227 | read_bias_mem += ' offset = %d;\n' % offset_bias 228 | read_bias_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 229 | read_bias_mem += ' if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n");\n' 230 | read_bias_mem += ' sscanf( line, "%d", &d_value);\n' 231 | read_bias_mem += ' t_BNFb[of+offset] = d_value;\n' 232 | read_bias_mem += ' }\n' 233 | read_bias_mem += ' fclose(fp);\n' 234 | 235 | def_weight_mem += 'ap_int<%d> conv%dW[%d][3*3];\n' % (int(n_in_fmaps[i]),conv_idx,int(n_ou_fmaps[i])) 236 | if initial_options[i] == 0: 237 | def_bias_mem += 'ap_int<20> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i])) 238 | else: 239 | def_bias_mem += 'ap_int<16> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i])) 240 | 241 | conv_idx += 1 242 | bn_idx += 1 243 | offset_weight += (int(n_in_fmaps[i]) * int(n_ou_fmaps[i]) * 3 * 3) 244 | offset_bias += int(n_ou_fmaps[i]) 245 | elif initial_options[i] == 4: 246 | set_weight_mem += ' printf("load fc%dW\\n");\n' % dense_idx 247 | set_weight_mem += ' offset = %d;\n' % offset_weight 248 | set_weight_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 249 | set_weight_mem += ' for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i]) 250 | set_weight_mem += ' fc%dW[of][inf] = (ap_int<1>)t_bin_convW[of*%d+inf+offset];\n' % (dense_idx,int(n_in_fmaps[i])) 251 | set_weight_mem += ' }\n' 252 | set_weight_mem += ' }\n' 253 | 254 | set_bias_mem += ' printf("load b%d_BNFb\\n");\n' % bn_idx 255 | set_bias_mem += ' offset = %d;\n' % offset_bias 256 | set_bias_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 257 | set_bias_mem += ' b%d_BNFb[of] = t_BNFb[of+offset];\n' % bn_idx 258 | set_bias_mem += ' }\n' 259 | 260 | read_weight_mem += ' printf("fc%dW.txt\\n");\n' % dense_idx 261 | read_weight_mem += ' if( (fp = fopen("fc%dW.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % dense_idx 262 | read_weight_mem += ' offset = %d;\n' % offset_weight 263 | read_weight_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 264 | read_weight_mem += ' for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i]) 265 | read_weight_mem += ' if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n"); sscanf( line, "%d", &d_value);\n' 266 | read_weight_mem += ' t_bin_convW[of*%d+inf+offset] = d_value;\n' % int(n_in_fmaps[i]) 267 | read_weight_mem += ' }\n' 268 | read_weight_mem += ' }\n' 269 | read_weight_mem += ' fclose(fp);\n' 270 | 271 | read_bias_mem += ' printf("b%d_BNFb.txt\\n");\n' % bn_idx 272 | read_bias_mem += ' if( (fp = fopen("b%d_BNFb.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % bn_idx 273 | read_bias_mem += ' offset = %d;\n' % offset_bias 274 | read_bias_mem += ' for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i]) 275 | read_bias_mem += ' if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n");\n' 276 | read_bias_mem += ' sscanf( line, "%d", &d_value);\n' 277 | read_bias_mem += ' t_BNFb[of+offset] = d_value;\n' 278 | read_bias_mem += ' }\n' 279 | read_bias_mem += ' fclose(fp);\n' 280 | 281 | def_weight_mem += 'ap_int<1> fc%dW[%d][%d];\n' % (dense_idx,int(n_ou_fmaps[i]),int(n_in_fmaps[i])) 282 | def_bias_mem += 'ap_int<16> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i])) 283 | 284 | 285 | dense_idx += 1 286 | bn_idx += 1 287 | offset_weight += (int(n_in_fmaps[i]) * int(n_ou_fmaps[i])) 288 | offset_bias += int(n_ou_fmaps[i]) 289 | 290 | # Check # of f.maps 291 | bin_xor_mac = 'bxor = (ap_uint<(MAX_BCONV_WIDTH)>)(bx ^ bw);' 292 | for i in range(len(initial_options) - 2): 293 | if int(n_in_fmaps[i+1]) != int(n_in_fmaps[i+2]): 294 | bin_xor_mac = 'bxor = (ap_uint<(MAX_BCONV_WIDTH)>)(bx ^ bw) & mask;' 295 | 296 | # generate C++ code for a binarized CNN ------------------------------------ 297 | f = open('template_cpp_r7_bcnn.cpp') 298 | lines2 = f.readlines() 299 | f.close() 300 | 301 | cpp_file = '' 302 | 303 | for line in lines2: 304 | converted = line.replace("(BIAS_SIZ)",str(bias_siz)) 305 | converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac) 306 | converted = converted.replace("(KSIZ)",str(ksiz)) 307 | converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz)) 308 | converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz)) 309 | converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz)) 310 | converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width)) 311 | converted = converted.replace("(NUM_LAYER)",str(num_layer)) 312 | converted = converted.replace("(IMGSIZ)",str(imgsiz)) 313 | converted = converted.replace("(NUMIMG)",str(numimg)) 314 | 315 | converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma) 316 | converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select) 317 | converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select) 318 | converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select) 319 | converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter) 320 | converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer) 321 | converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem) 322 | converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem) 323 | converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem) 324 | converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem) 325 | converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem) 326 | converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem) 327 | 328 | cpp_file += converted 329 | 330 | cnn_file = args.config_path + "/sdsoc/cnn.cpp" 331 | with open(cnn_file,'w') as f: 332 | f.write(cpp_file) 333 | 334 | # generate C++ main code --------------------------------------------------- 335 | f = open('template_cpp_r7_main.cpp') 336 | lines2 = f.readlines() 337 | f.close() 338 | 339 | cpp_file = '' 340 | 341 | for line in lines2: 342 | converted = line.replace("(BIAS_SIZ)",str(bias_siz)) 343 | converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac) 344 | converted = converted.replace("(KSIZ)",str(ksiz)) 345 | converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz)) 346 | converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz)) 347 | converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz)) 348 | converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width)) 349 | converted = converted.replace("(NUM_LAYER)",str(num_layer)) 350 | converted = converted.replace("(IMGSIZ)",str(imgsiz)) 351 | converted = converted.replace("(NUMIMG)",str(numimg)) 352 | 353 | converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma) 354 | converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select) 355 | converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select) 356 | converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select) 357 | converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter) 358 | converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer) 359 | converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem) 360 | converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem) 361 | converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem) 362 | converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem) 363 | converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem) 364 | converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem) 365 | 366 | cpp_file += converted 367 | 368 | cnn_file = args.config_path + "/sdsoc/main.cpp" 369 | with open(cnn_file,'w') as f: 370 | f.write(cpp_file) 371 | 372 | # generate C++ main code including a socket communication via an Ethernet ------------ 373 | f = open('template_cpp_r7_socket_main.cpp') 374 | lines2 = f.readlines() 375 | f.close() 376 | 377 | cpp_file = '' 378 | 379 | for line in lines2: 380 | converted = line.replace("(BIAS_SIZ)",str(bias_siz)) 381 | converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac) 382 | converted = converted.replace("(KSIZ)",str(ksiz)) 383 | converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz)) 384 | converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz)) 385 | converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz)) 386 | converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width)) 387 | converted = converted.replace("(NUM_LAYER)",str(num_layer)) 388 | converted = converted.replace("(IMGSIZ)",str(imgsiz)) 389 | converted = converted.replace("(NUMIMG)",str(numimg)) 390 | 391 | converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma) 392 | converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select) 393 | converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select) 394 | converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select) 395 | converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter) 396 | converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer) 397 | converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem) 398 | converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem) 399 | converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem) 400 | converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem) 401 | converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem) 402 | converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem) 403 | 404 | cpp_file += converted 405 | 406 | cnn_file = args.config_path + "/sdsoc/socket_main.cpp" 407 | with open(cnn_file,'w') as f: 408 | f.write(cpp_file) 409 | 410 | ########################################################################################### 411 | # END OF PROGRAM 412 | ########################################################################################### 413 | -------------------------------------------------------------------------------- /guinness.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------- 2 | # guinness.py 3 | # A GUI based Neural NEtwork SyntheSizer for an FPGA deep learning 4 | # 5 | # Creation Date : 04/Aug./2017 6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved. 7 | # 8 | # Released under the GPL v2.0 License. 9 | # 10 | # Acknowledgements: 11 | # This source code is based on following projects: 12 | # 13 | # Chainer binarized neural network by Daisuke Okanohara 14 | # https://github.com/hillbig/binary_net 15 | # Various CNN models including Deep Residual Networks (ResNet) 16 | # for CIFAR10 with Chainer by mitmul 17 | # https://github.com/mitmul/chainer-cifar10 18 | # ----------------------------------------------------------------------- 19 | 20 | import sys,random,time,os 21 | from PyQt4 import QtGui, QtCore 22 | from matplotlib.backends.backend_qt4agg import FigureCanvasQTAgg as FigureCanvas 23 | from matplotlib.figure import Figure 24 | import numpy as np 25 | from subprocess import check_call 26 | import pickle 27 | import subprocess 28 | #import seaborn as sns # this is optional... 29 | import shutil 30 | 31 | #global variables 32 | n_dim = 3 # the number of dimensions for the first layer (BGR format) 33 | img_siz = 32 # default input image size 34 | n_class = 10 # default the number of classes to be inferenced 35 | is_load_pretrain = 0 36 | 37 | class Layout(QtGui.QWidget): 38 | def __init__(self): 39 | super(Layout,self).__init__() 40 | 41 | global is_load_pretrain 42 | global n_dim # BGR color image 43 | global img_siz # 32x32 image 44 | global n_class # #classes 45 | 46 | is_load_pretrain = 0 47 | n_dim = 3 48 | img_siz = 32 49 | n_class = 10 50 | 51 | self.setMyself() 52 | self.set_project_name() 53 | self.show() 54 | 55 | def setMyself(self): 56 | self.setGeometry(50,50,1100,600) 57 | self.setWindowTitle("GUINNESS: A GUI based Neural NEtwork SyntheSizer") 58 | 59 | def set_project_name(self): 60 | ################################################################## 61 | # Left Column 62 | ################################################################## 63 | vbox_left_column = QtGui.QVBoxLayout() 64 | # vbox_left_column.setGeometry(QtCore.QRect(0,0,800,24)) 65 | 66 | # project setup -------------------------------------------------- 67 | project_setup_box = QtGui.QGroupBox("1. Project Setup") 68 | project = QtGui.QLabel('Project Name') 69 | self.projectEdit = QtGui.QLineEdit() 70 | self.projectEdit.setText('Project1') 71 | 72 | hbox = QtGui.QHBoxLayout() 73 | hbox.addWidget(project) 74 | hbox.addWidget(self.projectEdit) 75 | 76 | vbox_proj = QtGui.QVBoxLayout() 77 | vbox_proj.addLayout(hbox) 78 | 79 | ProjSaveButton = QtGui.QPushButton("SAVE") 80 | self.connect(ProjSaveButton,QtCore.SIGNAL('clicked()'),self.SaveProj) 81 | ProjLoadButton = QtGui.QPushButton("LOAD") 82 | self.connect(ProjLoadButton,QtCore.SIGNAL('clicked()'),self.LoadProj) 83 | hbox_proj = QtGui.QHBoxLayout() 84 | hbox_proj.addWidget(ProjSaveButton) 85 | hbox_proj.addWidget(ProjLoadButton) 86 | vbox_proj.addLayout(hbox_proj) 87 | 88 | project_setup_box.setLayout(vbox_proj) 89 | vbox_left_column.addWidget(project_setup_box) 90 | 91 | # cnn setup table ------------------------------------------------ 92 | cnn_setup_box = QtGui.QGroupBox("2. CNN Specificaion") 93 | 94 | vbox_cnn = QtGui.QVBoxLayout() 95 | 96 | cnntype = QtGui.QLabel('Type') 97 | self.combo1 = QtGui.QComboBox() 98 | self.combo1.addItem("LeNet5") 99 | self.combo1.addItem("TinyCNN") 100 | self.combo1.addItem("VGG9ave") 101 | self.combo1.addItem("VGG11ave") 102 | self.combo1.addItem("VGG16ave") 103 | self.combo1.addItem("VGG19ave") 104 | LoadButton = QtGui.QPushButton("LOAD CONFIG") 105 | self.connect(LoadButton,QtCore.SIGNAL('clicked()'),self.LoadConfig) 106 | hbox2 = QtGui.QHBoxLayout() 107 | hbox2.addWidget(cnntype) 108 | hbox2.addWidget(self.combo1) 109 | hbox2.addWidget(LoadButton) 110 | 111 | vbox_cnn.addLayout(hbox2) 112 | 113 | self.table = QtGui.QTableWidget() 114 | self.table.setColumnCount(5) 115 | 116 | labels = ["Type","In #Fmaps","Out #Fmaps","In Fsiz","Train?"] 117 | self.table.setHorizontalHeaderLabels(labels); 118 | self.table.setColumnWidth(0, 90); 119 | self.table.setColumnWidth(1, 80); 120 | self.table.setColumnWidth(2, 80); 121 | self.table.setColumnWidth(3, 50); 122 | self.table.setColumnWidth(4, 50); 123 | 124 | self.LoadConfig() 125 | 126 | self.table.setContextMenuPolicy(QtCore.Qt.CustomContextMenu) 127 | self.table.customContextMenuRequested.connect(self.contextMenu_) 128 | 129 | vbox_cnn.addWidget(self.table) 130 | cnn_setup_box.setLayout(vbox_cnn) 131 | vbox_left_column.addWidget(cnn_setup_box) 132 | 133 | ################################################################## 134 | # Right Column 135 | ################################################################## 136 | vbox_right_column = QtGui.QVBoxLayout() 137 | 138 | training_setup_box = QtGui.QGroupBox("3. Training") 139 | vbox_training = QtGui.QVBoxLayout() 140 | # parameters for traning ----------------------------------------- 141 | # training data 142 | tdlabel = QtGui.QLabel('Training Data') 143 | ld_button = QtGui.QPushButton("Load") 144 | ld_button.clicked.connect(self.open_FileDialog) 145 | self.td_label = QtGui.QLineEdit("image.pkl") 146 | 147 | hbox_td = QtGui.QHBoxLayout() 148 | hbox_td.addWidget(tdlabel) 149 | hbox_td.addWidget(ld_button) 150 | hbox_td.addWidget(self.td_label) 151 | vbox_training.addLayout(hbox_td) 152 | 153 | # training label 154 | tllabel = QtGui.QLabel('Training Label') 155 | ll_button = QtGui.QPushButton("Load") 156 | ll_button.clicked.connect(self.open_FileDialog_tl) 157 | self.tl_label = QtGui.QLineEdit("label.pkl") 158 | 159 | hbox_tl = QtGui.QHBoxLayout() 160 | hbox_tl.addWidget(tllabel) 161 | hbox_tl.addWidget(ll_button) 162 | hbox_tl.addWidget(self.tl_label) 163 | vbox_training.addLayout(hbox_tl) 164 | 165 | # # of training 166 | n_trains = QtGui.QLabel('Number of traning') 167 | self.n_trains_Edit = QtGui.QLineEdit() 168 | self.n_trains_Edit.setText("10") 169 | 170 | hbox_ntrain = QtGui.QHBoxLayout() 171 | hbox_ntrain.addWidget(n_trains) 172 | hbox_ntrain.addWidget(self.n_trains_Edit) 173 | 174 | vbox_training.addLayout(hbox_ntrain) 175 | 176 | # optimizer 177 | hbox3 = QtGui.QHBoxLayout() 178 | cnntype = QtGui.QLabel('Optimizer') 179 | self.b11=QtGui.QRadioButton("SGD") 180 | self.b11.setChecked(True) 181 | self.b12=QtGui.QRadioButton("Adam") 182 | bg1=QtGui.QButtonGroup() 183 | bg1.addButton(self.b11) 184 | bg1.addButton(self.b12) 185 | hbox3.addWidget(cnntype) 186 | hbox3.addWidget(self.b11) 187 | hbox3.addWidget(self.b12) 188 | 189 | vbox_training.addLayout(hbox3) 190 | 191 | # Use GPU? 192 | self.cb = QtGui.QCheckBox('Use GPU') 193 | self.cb.setChecked(True) 194 | vbox_training.addWidget(self.cb) 195 | 196 | # message 197 | train_process = QtGui.QLabel('Training Process View') 198 | vbox_training.addWidget(train_process) 199 | 200 | # matplotlib 201 | self.canvas = Canvas() 202 | 203 | self.canvas.refresh(int(self.n_trains_Edit.text())) 204 | 205 | vbox_training.addWidget(self.canvas) 206 | 207 | # training button 208 | hbox_control = QtGui.QHBoxLayout() 209 | self.bstart=QtGui.QPushButton("Start Training") 210 | bg1.addButton(self.bstart) 211 | self.bstart.clicked.connect(self.start_training) 212 | bstop=QtGui.QPushButton("Stop Training") 213 | bstop.setVisible(False) 214 | bg1.addButton(bstop) 215 | hbox_control.addWidget(self.bstart) 216 | hbox_control.addWidget(bstop) 217 | 218 | vbox_training.addLayout(hbox_control) 219 | training_setup_box.setLayout(vbox_training) 220 | vbox_right_column.addWidget(training_setup_box) 221 | 222 | # FPGA implementation ------------------------------------------------ 223 | # Select fpga board 224 | fpga_setup_box = QtGui.QGroupBox("4. C/C++ Code Generation for FPGA Implementation") 225 | vbox_fpga = QtGui.QVBoxLayout() 226 | 227 | fpgaboard = QtGui.QLabel('Target FPGA Board') 228 | self.combo2 = QtGui.QComboBox() 229 | self.combo2.addItem("zed") 230 | self.combo2.addItem("zybo") 231 | self.combo2.addItem("zc702") 232 | self.combo2.addItem("zcu102") 233 | hbox3 = QtGui.QHBoxLayout() 234 | hbox3.addWidget(fpgaboard) 235 | hbox3.addWidget(self.combo2) 236 | 237 | vbox_fpga.addLayout(hbox3) 238 | 239 | # # Setup Clock Frequency 240 | # clkfreq = QtGui.QLabel('Clock Frequency (MHz)') 241 | # combo3 = QtGui.QComboBox() 242 | # combo3.addItem("100.0") 243 | # combo3.addItem("147.6") 244 | # combo3.addItem("150.0") 245 | # combo3.addItem("200.0") 246 | # hbox4 = QtGui.QHBoxLayout() 247 | # hbox4.addWidget(clkfreq) 248 | # hbox4.addWidget(combo3) 249 | # 250 | # vbox_fpga.addLayout(hbox4) 251 | 252 | # Run Bitstream Generation 253 | # bstart_bitgen=QtGui.QPushButton("Generate Bitstream") 254 | bstart_bitgen=QtGui.QPushButton("Generate C/C++ Code") 255 | bg1.addButton(bstart_bitgen) 256 | bstart_bitgen.clicked.connect(self.start_bitgen) 257 | 258 | vbox_fpga.addWidget(bstart_bitgen) 259 | 260 | fpga_setup_box.setLayout(vbox_fpga) 261 | vbox_right_column.addWidget(fpga_setup_box) 262 | 263 | # ------------------------------------------------------- 264 | # overall layout 265 | # ------------------------------------------------------- 266 | hbox_global = QtGui.QHBoxLayout() 267 | hbox_global.addLayout(vbox_left_column) 268 | hbox_global.addLayout(vbox_right_column) 269 | 270 | self.setLayout(hbox_global) 271 | 272 | # ----------------------------------------------------------- 273 | # Context Menu for the CNN configuration table 274 | # ----------------------------------------------------------- 275 | def contextMenu_(self, event): 276 | menu = QtGui.QMenu() 277 | addAction = menu.addAction('Add layer',) 278 | delAction = menu.addAction('Delete layer',) 279 | 280 | action = menu.exec_(QtGui.QCursor.pos()) 281 | 282 | initial_options = [] 283 | n_in_fmaps = [] 284 | n_ou_fmaps = [] 285 | infmap_siz = [] 286 | 287 | for i in range(self.table.rowCount()): 288 | itm1 = self.table.cellWidget(i,0) 289 | itm2 = self.table.item(i,1) 290 | itm3 = self.table.item(i,2) 291 | itm4 = self.table.item(i,3) 292 | val1 = itm1.currentIndex() 293 | val2 = str(itm2.text()) 294 | val3 = str(itm3.text()) 295 | val4 = str(itm4.text()) 296 | 297 | initial_options.append(val1) 298 | n_in_fmaps.append(val2) 299 | n_ou_fmaps.append(val3) 300 | infmap_siz.append(val4) 301 | 302 | if action == addAction: 303 | initial_options.insert(self.table.currentRow(),1) 304 | n_in_fmaps.insert(self.table.currentRow(),'0') 305 | n_ou_fmaps.insert(self.table.currentRow(),'0') 306 | infmap_siz.insert(self.table.currentRow(),'0') 307 | 308 | elif action == delAction: 309 | initial_options.pop(self.table.currentRow()) 310 | n_in_fmaps.pop(self.table.currentRow()) 311 | n_ou_fmaps.pop(self.table.currentRow()) 312 | infmap_siz.pop(self.table.currentRow()) 313 | 314 | self.table.setRowCount(len(initial_options)) 315 | for index in range(len(initial_options)): 316 | combo = QtGui.QComboBox() 317 | for t in self.combo_box_options: 318 | combo.addItem(t) 319 | combo.setCurrentIndex(initial_options[index]) 320 | self.table.setCellWidget(index,0,combo) 321 | item1 = QtGui.QTableWidgetItem(n_in_fmaps[index]) 322 | self.table.setItem(index,1,item1) 323 | item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index]) 324 | self.table.setItem(index,2,item2) 325 | item3 = QtGui.QTableWidgetItem(infmap_siz[index]) 326 | self.table.setItem(index,3,item3) 327 | 328 | item4 = QtGui.QCheckBox('') 329 | item4.setChecked(True) # isChecked() == True?False? 330 | self.table.setCellWidget(index,4,item4) 331 | 332 | # ----------------------------------------------------------------------- 333 | # Performe Training 334 | # First, generate customized net.py 335 | # then, call external trainer.py 336 | # During training, the GUI plots traning process 337 | # ----------------------------------------------------------------------- 338 | def start_training(self): 339 | # remove temporary logfile, if new traning start 340 | global is_load_pretrain 341 | if is_load_pretrain == 0 and os.path.exists("./temp_log.csv") == True: 342 | print("CLEARN UP LOGFILE") 343 | # os.remove("temp_log.csv") 344 | 345 | # generate CNN python code (this version only supports chainer 1.21-24.0) 346 | print("[INFO] GENERATE PYTHON CODE FOR CNN") 347 | f = open('header.txt') 348 | pcode = f.read() 349 | pcode += '\n' 350 | f.close() 351 | 352 | conv_idx = 0 353 | bn_idx = 0 354 | dense_idx = 0 355 | for i in range(self.table.rowCount()): 356 | itm1 = self.table.cellWidget(i,0) 357 | itm2 = self.table.item(i,1) 358 | itm3 = self.table.item(i,2) 359 | itm4 = self.table.item(i,3) 360 | val1 = int(itm2.text()) 361 | val2 = int(itm3.text()) 362 | val3 = int(itm4.text()) 363 | 364 | if itm1.currentText() == 'Conv(Int)': 365 | pcode += ' conv%d=IC.Convolution2D(%d,%d,3, stride=1, pad=1, nobias=True),\n' % (conv_idx,val1,val2) 366 | pcode += ' b%d=L.BatchNormalization(%d)' % (bn_idx,val2) 367 | conv_idx += 1 368 | bn_idx += 1 369 | elif itm1.currentText() == 'Conv(Bin)': 370 | pcode += ' conv%d=BC.Convolution2D(%d,%d,3, stride=1, pad=1, nobias=True),\n' % (conv_idx,val1,val2) 371 | pcode += ' b%d=L.BatchNormalization(%d)' % (bn_idx,val2) 372 | conv_idx += 1 373 | bn_idx += 1 374 | elif itm1.currentText() == 'Max Pool': 375 | pass 376 | elif itm1.currentText() == 'Ave Pool': 377 | pass 378 | else: # Dense 379 | pcode += ' fc%d=BL.BinaryLinear(%d,%d),\n' % (dense_idx,val1,val2) 380 | pcode += ' b%d=L.BatchNormalization(%d)' % (bn_idx,val2) 381 | dense_idx += 1 382 | bn_idx += 1 383 | 384 | if i == self.table.rowCount() - 1: 385 | pcode += '\n )\n' 386 | else: 387 | if itm1.currentText() == 'Max Pool' or itm1.currentText() == 'Ave Pool': 388 | pass 389 | else: 390 | pcode += ',\n' 391 | 392 | 393 | pcode += '\n def __call__(self, x, train):\n' 394 | conv_idx = 0 395 | bn_idx = 0 396 | dense_idx = 0 397 | for i in range(self.table.rowCount()): 398 | itm1 = self.table.cellWidget(i,0) 399 | itm2 = self.table.item(i,1) 400 | itm3 = self.table.item(i,2) 401 | itm4 = self.table.item(i,3) 402 | val1 = int(itm2.text()) 403 | val2 = int(itm3.text()) 404 | val3 = int(itm4.text()) 405 | 406 | if itm1.currentText() == 'Conv(Int)': 407 | pcode += ' h = bst.bst(self.b%d(self.conv%d(x)))\n' % (bn_idx,conv_idx) 408 | bn_idx += 1 409 | conv_idx += 1 410 | elif itm1.currentText() == 'Conv(Bin)': 411 | pcode += ' h = bst.bst(self.b%d(self.conv%d(h)))\n' % (bn_idx,conv_idx) 412 | bn_idx += 1 413 | conv_idx += 1 414 | elif itm1.currentText() == 'Max Pool': 415 | pcode += ' h = F.max_pooling_2d(h, 2)\n' 416 | elif itm1.currentText() == 'Ave Pool': 417 | pcode += ' h = F.average_pooling_2d(h, %d)\n' % val3 418 | else: # Dense 419 | if i < self.table.rowCount() - 1: 420 | if i == 0: 421 | pcode += ' h = bst.bst(self.b%d(self.fc%d(x)))\n' % (bn_idx,dense_idx) 422 | else: 423 | pcode += ' h = bst.bst(self.b%d(self.fc%d(h)))\n' % (bn_idx,dense_idx) 424 | else: 425 | pcode += ' h = self.b%d(self.fc%d(h))\n' % (bn_idx,dense_idx) 426 | bn_idx += 1 427 | dense_idx += 1 428 | 429 | pcode += ' return h' 430 | 431 | # code generation ---------------------------------------------------- 432 | f = open('net2.py', 'w') 433 | f.write(pcode) 434 | f.close() 435 | 436 | # for test CNN by Python code (eval.py) 437 | net3_file = '' 438 | 439 | net3_file = pcode.replace("=L.","=LBN.") 440 | net3_file = net3_file.replace("./","../") 441 | 442 | # generate project directory if it not exist 443 | project_dir = "./" + self.projectEdit.text() 444 | if os.path.exists(project_dir) == False: 445 | os.mkdir(project_dir) 446 | 447 | # save Python simulation codes 448 | fname = "./" + self.projectEdit.text() + '/net3.py' 449 | print("[INFO] Python evaluation codes are seved to %s" % fname) 450 | with open(fname,'w') as f: 451 | f.write(net3_file) 452 | 453 | fname = "./" + self.projectEdit.text() + '/eval.py' 454 | print("[INFO] COPY evaluation code") 455 | shutil.copyfile('eval.py',fname) 456 | 457 | # setup training ----------------------------------------------------- 458 | n_iter = int(self.n_trains_Edit.text()) 459 | 460 | train_dataset = self.td_label.text() 461 | label_dataset = self.tl_label.text() 462 | if self.b11.isChecked() == True: 463 | optimizer_alg = "sgd" 464 | else: 465 | optimizer_alg = "adam" 466 | 467 | project_name = "temp" 468 | 469 | project_dir = "./" + self.projectEdit.text() 470 | if os.path.exists(project_dir) == False: 471 | os.mkdir(project_dir) 472 | 473 | # start training ----------------------------------------------------- 474 | if self.cb.isChecked() == True: 475 | print("[INFO] START TRAINING: GPU MODE") 476 | gpu = "0" 477 | else: 478 | print("[INFO] START TRAINING: CPU MODE") 479 | gpu = "-1" 480 | 481 | if is_load_pretrain == 1: 482 | print("[INFO RESUME TRANINING]") 483 | resume = "yes" 484 | 485 | # copy pre-trained model,log files 486 | if os.path.isfile('./temp.model') == True: 487 | os.remove('./temp.model') 488 | model_file = "./" + self.projectEdit.text() + '/temp.model' 489 | if os.path.isfile(model_file) == True: 490 | print("[INFO] RESUME PRE-TRAINED MODEL FILE %s" % model_file) 491 | shutil.copyfile(model_file,'./temp.model') 492 | else: 493 | print("[ERROR] model file %s not found" % model_file) 494 | exit() 495 | 496 | if os.path.isfile('./temp_log.csv') == True: 497 | os.remove('./temp_log.csv') 498 | log_file = "./" + self.projectEdit.text() + '/temp_log.csv' 499 | if os.path.isfile(log_file) == True: 500 | print("[INFO] RESUME PRE-TRAINED LOG FILE %s" % log_file) 501 | shutil.copyfile(log_file,'./temp_log.csv') 502 | else: 503 | print("[ERROR] log file %s not found" % log_file) 504 | exit() 505 | 506 | else: 507 | resume = "no" 508 | 509 | # Peform training 510 | global n_dim 511 | global img_siz 512 | 513 | subprocess.Popen(["python","train.py","-g",gpu,"--iter",str(n_iter),"--dim",str(n_dim),"--siz",str(img_siz),"--dataset",train_dataset,"--label",label_dataset,"--optimizer",optimizer_alg,"--prefix",project_name,"--lr_decay_iter","100","--resume",resume]) # background job = python train.py & 514 | 515 | # set process file 516 | with open("train_status.txt","w") as f: 517 | f.write("run") 518 | 519 | # eliminate training start button 520 | self.bstart.setVisible(False) 521 | 522 | # Start training check process 523 | self.timer = QtCore.QTimer(self) 524 | self.timer.timeout.connect(self.updateCanvas) 525 | self.timer.start(1000) 526 | 527 | # ----------------------------------------------------------------------- 528 | # Update Canvas for training process view 529 | # ----------------------------------------------------------------------- 530 | def updateCanvas(self): 531 | global is_load_pretrain 532 | log_file = "temp_log.csv" 533 | 534 | if( os.path.exists(log_file) == True): 535 | check = 0 536 | n_lines_in_logfile = 0 537 | with open(log_file,'r') as f: 538 | n_lines_in_logfile = len(f.readlines()) 539 | if n_lines_in_logfile > 2: 540 | check = 1 541 | 542 | if check == 1: 543 | train_loss,train_acc,test_loss,test_acc = np.loadtxt(log_file, delimiter=',', skiprows=1,usecols=(1,2,5,6),unpack=True) 544 | self.canvas.push_data(train_acc,test_acc,train_loss,test_loss) 545 | self.canvas.refresh(n_lines_in_logfile - 1) 546 | 547 | with open("train_status.txt", "r") as f: 548 | status = f.read() 549 | 550 | if status != 'run': 551 | print("[INFO] FINISH TRAINING") 552 | project_path = "./" + self.projectEdit.text() 553 | subprocess.Popen(["cp","temp.model",project_path]) # background job = python train.py & 554 | subprocess.Popen(["cp","temp_log.csv",project_path]) # background job = python train.py & 555 | self.timer.stop() 556 | ret = QtGui.QMessageBox.information(None, "Training Status", "Training Finished") 557 | 558 | # set continue training mode 559 | self.bstart.setVisible(True) 560 | self.bstart.setText('Continue Training') 561 | is_load_pretrain = 1 562 | 563 | # ----------------------------------------------------------------------- 564 | # Save CNN Configuration File 565 | # ----------------------------------------------------------------------- 566 | def save_configfile(self): 567 | # generate configuration file 568 | print("------------- GENERATE CONFIGURATION FILE --------------") 569 | print("TARGET DEVICE: %s" % self.combo2.currentText()) 570 | print("[INFO] Generate Configuration File") 571 | 572 | config = {} 573 | initial_options = [] 574 | n_in_fmaps = [] 575 | n_ou_fmaps = [] 576 | infmap_siz = [] 577 | max_dense_siz = 0 578 | max_bconv_width = 0 579 | bias_siz = 0 580 | weight_siz = 0 581 | 582 | global img_siz 583 | global n_class 584 | 585 | for i in range(self.table.rowCount()): 586 | itm1 = self.table.cellWidget(i,0) 587 | itm2 = self.table.item(i,1) 588 | itm3 = self.table.item(i,2) 589 | itm4 = self.table.item(i,3) 590 | val1 = str(itm2.text()) 591 | val2 = str(itm3.text()) 592 | val3 = str(itm4.text()) 593 | 594 | if itm1.currentIndex() == 4: 595 | if max_dense_siz < int(val1): 596 | max_dense_siz = int(val1) 597 | 598 | if itm1.currentIndex() == 0 or itm1.currentIndex() == 1 or itm1.currentIndex() == 4: 599 | bias_siz += int(val2) 600 | 601 | if itm1.currentIndex() == 1: 602 | if max_bconv_width < int(val2): 603 | max_bconv_width = int(val2) 604 | 605 | if itm1.currentIndex() == 0 or itm1.currentIndex() == 1: 606 | weight_siz += (int(val1) * int(val2) * 3 * 3) 607 | 608 | if itm1.currentIndex() == 4: 609 | weight_siz += (int(val1) * int(val2)) 610 | 611 | initial_options.append(itm1.currentIndex()) 612 | n_in_fmaps.append(val1) 613 | n_ou_fmaps.append(val2) 614 | infmap_siz.append(val3) 615 | 616 | config['initial_options'] = initial_options 617 | config['n_in_fmaps'] = n_in_fmaps 618 | config['n_ou_fmaps'] = n_ou_fmaps 619 | config['infmap_siz'] = infmap_siz 620 | 621 | config['ksiz'] = 3 622 | config['imgsiz'] = infmap_siz[0] 623 | config['max_dense_siz'] = max_dense_siz 624 | config['out_dense_siz'] = n_ou_fmaps[len(initial_options) - 1] 625 | config['bias_siz'] = bias_siz 626 | config['weight_siz'] = weight_siz 627 | config['max_bconv_width'] = max_bconv_width 628 | config['num_layer'] = len(initial_options) 629 | 630 | config_file = "./" + self.projectEdit.text() + "/config.pickle" 631 | with open(config_file, mode='wb') as f: 632 | pickle.dump(config, f) 633 | 634 | # ----------------------------------------------------------------------- 635 | # Generate Bitstream 636 | # ----------------------------------------------------------------------- 637 | def start_bitgen(self): 638 | # generate configuration file 639 | print("------------- GENERATE CONFIGURATION FILE --------------") 640 | print("TARGET DEVICE: %s" % self.combo2.currentText()) 641 | print("[INFO] Generate Configuration File") 642 | 643 | # save configuration file 644 | self.save_configfile() 645 | 646 | # generate SDSoC directory 647 | sdsoc_dir = "./" + self.projectEdit.text() + "/sdsoc" 648 | if os.path.exists(sdsoc_dir) == False: 649 | os.mkdir(sdsoc_dir) 650 | 651 | # Call C++ code generator for the SDSoC 652 | print("[INFO] GENERATE C++ CODE") 653 | config_path = "./" + self.projectEdit.text() 654 | 655 | subprocess.Popen(["python","gen_cpp_code_v3.py","--config_path",config_path]) # background job = python train.py & 656 | 657 | # generate makefile using template files 658 | print("[INFO] GENERATE Makefile for the SDSoC") 659 | f = open('template_Makefile') 660 | lines2 = f.readlines() 661 | f.close() 662 | 663 | makefile_txt = '' 664 | 665 | for line in lines2: 666 | tmp = line.replace("(CNN_C_SOURCE)","cnn.cpp") 667 | tmp = tmp.replace("(ELF_FILE_PATH)",self.projectEdit.text() + ".elf") 668 | tmp = tmp.replace("(TARGET_BOARD)",self.combo2.currentText()) 669 | 670 | makefile_txt += tmp 671 | 672 | makefile_name = "./" + self.projectEdit.text() + "/sdsoc/Makefile" 673 | with open(makefile_name,'w') as f: 674 | f.write(makefile_txt) 675 | 676 | # generate sdsoc/sd_card directory 677 | print("[INFO] MAKE A DIRECTROY: ./%s/sdsoc/to_sd_card" % self.projectEdit.text()) 678 | sd_card_dir = "./" + self.projectEdit.text() + "/sdsoc/to_sd_card" 679 | if os.path.exists(sd_card_dir) == False: 680 | os.mkdir(sd_card_dir) 681 | 682 | # generate HLS directory 683 | print("[INFO] MAKE A DIRECTROY: ./%s/HLS" % self.projectEdit.text()) 684 | HLS_dir = "./" + self.projectEdit.text() + "/HLS" 685 | if os.path.exists(HLS_dir) == False: 686 | os.mkdir(HLS_dir) 687 | 688 | # convert trained *.model to weight text file 689 | print("[INFO] CONVERT TRAINED WEIGHTS INTO TEXT FILE") 690 | config_path = "./" + self.projectEdit.text() 691 | proc = subprocess.Popen(["python","conv_npz2txt_v2.py","--config_path",config_path]) # background job = python train.py & 692 | proc.wait() 693 | 694 | print(" ... [FINISH]") 695 | 696 | # copy benchmark file from trainer, if it exist 697 | print("[INFO] COPY BENCHMARK IMAGE FILE") 698 | image_file = "./test_img.txt" 699 | if os.path.isfile(image_file) == True: 700 | sd_card_dir = "./" + self.projectEdit.text() + "/sdsoc/to_sd_card" 701 | subprocess.Popen(["cp",image_file,sd_card_dir]) 702 | print(" ... [FINISH]") 703 | else: 704 | print("FAILURE") 705 | 706 | # performe system generation, call SDSoC by make command 707 | # (subprocess!!!) 708 | # print("[INFO] GENERATE BITSTREAM, WAIT TENS MINUTES...") 709 | print("[INFO] SUCCESSFULLY C/C++ CODE GENERATION") 710 | print("[INFO] PLEASE, ``SAVE'' YOUR CURRENT DESIGN") 711 | 712 | # # show message 713 | # ret = QtGui.QMessageBox.information(None, "Bistream Generation Status", "C++ code generated") 714 | 715 | # ----------------------------------------------------------------------- 716 | # FileOpen Dialog for Project Configuration 717 | # ----------------------------------------------------------------------- 718 | # save configuration file 719 | def SaveProj(self): 720 | config = '' 721 | config += 'PROJECT_NAME: %s\n' % self.projectEdit.text() 722 | config += 'TRAINING_DATA: %s\n' % self.td_label.text() 723 | config += 'TRAINING_LABEL: %s\n' % self.tl_label.text() 724 | config += 'NUM_OF_EPOCS: %d\n' % int(self.n_trains_Edit.text()) 725 | if self.b11.isChecked() == True: 726 | config += 'OPTIMIZER: SGD\n' 727 | else: 728 | config += 'OPTIMIZER: Adam\n' 729 | if self.cb.isChecked() == True: 730 | config += 'USE_GPU: YES\n' 731 | else: 732 | config += 'USE_GPU: NO\n' 733 | config += 'FPGA_BOARD: %s\n' % self.combo2.currentText() 734 | 735 | config_file = "./" + self.projectEdit.text() + "/" + self.projectEdit.text() + ".proj" 736 | config_dir = "./" + self.projectEdit.text() 737 | if os.path.exists(config_dir) == False: 738 | os.mkdir(config_dir) 739 | 740 | with open(config_file, mode='w') as f: 741 | f.write(config) 742 | 743 | self.save_configfile() 744 | 745 | # load project configuration file 746 | def LoadProj(self): 747 | global is_load_pretrain 748 | filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './') 749 | 750 | with open(filename, mode='r') as f: 751 | lines2 = f.readlines() 752 | 753 | for line in lines2: 754 | key, val = line.split() 755 | 756 | if key == 'PROJECT_NAME:': 757 | self.projectEdit.setText(val) 758 | elif key == 'TRAINING_DATA:': 759 | self.td_label.setText(val) 760 | elif key == 'TRAINING_LABEL:': 761 | self.tl_label.setText(val) 762 | elif key == 'NUM_OF_EPOCS:': 763 | self.n_trains_Edit.setText(val) 764 | elif key == 'OPTIMIZER:': 765 | if val == 'SGD': 766 | self.b11.setChecked(True) 767 | self.b12.setChecked(False) 768 | else: 769 | self.b11.setChecked(False) 770 | self.b12.setChecked(True) 771 | elif key == 'USE_GPU:': 772 | if val == 'YES': 773 | self.cb.setChecked(True) 774 | else: 775 | self.cb.setChecked(False) 776 | elif key == 'FPGA_BOARD:': 777 | if val == 'zed': 778 | idx = 0 779 | elif val == 'zybo': 780 | idx = 1 781 | elif val == 'vc702': 782 | idx = 2 783 | else: # zcu102 784 | idx = 3 785 | self.combo2.setCurrentIndex(idx) 786 | else: 787 | pass 788 | 789 | # Restore CNN Configuration Table 790 | config_file = "./" + self.projectEdit.text() + "/config.pickle" 791 | with open(config_file, mode='rb') as f: 792 | config = pickle.load(f) 793 | 794 | initial_options = config['initial_options'] 795 | n_in_fmaps = config['n_in_fmaps'] 796 | n_ou_fmaps = config['n_ou_fmaps'] 797 | infmap_siz = config['infmap_siz'] 798 | 799 | self.table.setRowCount(len(initial_options)) 800 | for index in range(len(initial_options)): 801 | combo = QtGui.QComboBox() 802 | for t in self.combo_box_options: 803 | combo.addItem(t) 804 | combo.setCurrentIndex(initial_options[index]) 805 | self.table.setCellWidget(index,0,combo) 806 | item1 = QtGui.QTableWidgetItem(n_in_fmaps[index]) 807 | self.table.setItem(index,1,item1) 808 | item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index]) 809 | self.table.setItem(index,2,item2) 810 | item3 = QtGui.QTableWidgetItem(infmap_siz[index]) 811 | self.table.setItem(index,3,item3) 812 | 813 | item4 = QtGui.QCheckBox('') 814 | item4.setChecked(True) # isChecked() == True?False? 815 | self.table.setCellWidget(index,4,item4) 816 | 817 | 818 | # Restore Training Status Graph 819 | log_file = "temp_log.csv" 820 | log_path = "./" + self.projectEdit.text() + "/" + log_file 821 | 822 | if( os.path.exists(log_path) == True): 823 | print("log_file %s" % log_path) 824 | 825 | subprocess.call(["cp",log_path,"./"]) 826 | 827 | train_loss,train_acc,test_loss,test_acc = np.loadtxt(log_file, delimiter=',', skiprows=1,usecols=(1,2,5,6),unpack=True) 828 | self.canvas.push_data(train_acc,test_acc,train_loss,test_loss) 829 | self.canvas.refresh(int(self.n_trains_Edit.text())) 830 | 831 | subprocess.call(["rm","-rf",log_file]) 832 | 833 | is_load_pretrain = 1 834 | self.bstart.setText('Continue Training') 835 | 836 | # Restore Global Variables 837 | global img_siz 838 | img_siz = int(config['imgsiz']) 839 | global n_class 840 | n_class = int(n_ou_fmaps[len(initial_options) - 1]) 841 | 842 | print("[INFO] IMAGE SIZE %dx%d" % (img_siz,img_siz)) 843 | print("[INFO] #CLASSES: %d" % (n_class)) 844 | 845 | # update widgets 846 | self.update() 847 | 848 | # ----------------------------------------------------------------------- 849 | # Set Feature Map Size 850 | # ----------------------------------------------------------------------- 851 | def SetSize(self): 852 | global img_siz 853 | 854 | fsiz = 0 855 | for index in range(self.table.rowCount()): 856 | itm0 = self.table.cellWidget(index,0) 857 | itm3 = self.table.item(index,3) 858 | 859 | if index == 0: 860 | fsiz = img_siz 861 | tbl_item = QtGui.QTableWidgetItem(str(int(fsiz))) 862 | self.table.setItem(index,3,tbl_item) 863 | #fsiz = int(itm3.text()) 864 | elif itm0.currentText() == 'Conv(Int)': 865 | tbl_item = QtGui.QTableWidgetItem(str(int(fsiz))) 866 | self.table.setItem(index,3,tbl_item) 867 | 868 | elif itm0.currentText() == 'Conv(Bin)': 869 | tbl_item = QtGui.QTableWidgetItem(str(int(fsiz))) 870 | self.table.setItem(index,3,tbl_item) 871 | 872 | elif itm0.currentText() == 'Max Pool': 873 | tbl_item = QtGui.QTableWidgetItem(str(int(fsiz))) 874 | self.table.setItem(index,3,tbl_item) 875 | 876 | fsiz = fsiz / 2 877 | if fsiz < 1: 878 | fsiz = 1 879 | 880 | elif itm0.currentText() == 'Ave Pool': 881 | tbl_item = QtGui.QTableWidgetItem(str(int(fsiz))) 882 | self.table.setItem(index,3,tbl_item) 883 | 884 | fsiz = fsiz / 2 885 | if fsiz < 1: 886 | fsiz = 1 887 | 888 | else: # Dense 889 | tbl_item = QtGui.QTableWidgetItem('1') 890 | self.table.setItem(index,3,tbl_item) 891 | 892 | 893 | # ----------------------------------------------------------------------- 894 | # FileOpen Dialog for Training data selection 895 | # ----------------------------------------------------------------------- 896 | def open_FileDialog(self): 897 | global n_dim 898 | global img_siz 899 | filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './') 900 | self.td_label.setText(filename) 901 | 902 | # check dimension and size 903 | with open(filename, 'rb') as f: 904 | images = pickle.load(f) 905 | 906 | print("[INFO] IMAGE SIZE %dx%d" % (images['train'].shape[3],images['train'].shape[3])) 907 | 908 | n_dim = images['train'].shape[1] 909 | img_siz = images['train'].shape[2] 910 | 911 | self.SetSize() 912 | 913 | def open_FileDialog_tl(self): 914 | filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './') 915 | self.tl_label.setText(filename) 916 | 917 | # check dimension and size 918 | with open(filename, 'rb') as f: 919 | global n_class 920 | labels = pickle.load(f) 921 | label_set = labels['train'].astype(np.int8) 922 | max_idx = np.max(label_set) + 1 # includes '0' label 923 | print("[INFO] #CLASSES: %d" % max_idx) 924 | 925 | n_class = max_idx 926 | 927 | item3 = QtGui.QTableWidgetItem(str(n_class)) 928 | self.table.setItem(self.table.rowCount()-1,2,item3) 929 | 930 | # ----------------------------------------------------------------------- 931 | # Load PreDefined CNN 932 | # ----------------------------------------------------------------------- 933 | def LoadConfig(self): 934 | template_name = self.combo1.currentText() 935 | 936 | self.combo_box_options = ["Conv(Int)","Conv(Bin)","Max Pool","Ave Pool","Dense"] 937 | if template_name == 'LeNet5': 938 | initial_options = [0,1,1,3,4] 939 | n_in_fmaps = [ '1','64','64','64','64'] 940 | n_ou_fmaps = ['64','64','64','64','10'] 941 | infmap_siz = ['28','28','28','28','1'] 942 | elif template_name == 'TinyCNN': 943 | initial_options = [0,1,1,2,3,4] 944 | n_in_fmaps = [ '3', '64','128','128','128','128'] 945 | n_ou_fmaps = ['64','128','128','128','128', '10'] 946 | infmap_siz = ['32', '32', '32', '32', '16', '1'] 947 | elif template_name == 'VGG9ave': 948 | initial_options = [0, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 3, 4] 949 | n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64'] 950 | n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','10'] 951 | infmap_siz = ['32','32','32', '16','16','16', '8', '8', '8', '4', '4', '4', '1'] 952 | elif template_name == 'VGG11ave': 953 | initial_options = [0, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 3, 4] 954 | n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64'] 955 | n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','10'] 956 | infmap_siz = ['32','32','32', '16','16','16', '8', '8', '8', '4', '4', '4', '2', '2', '2', '1'] 957 | elif template_name == 'VGG16ave': 958 | initial_options = [0, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 3, 4] 959 | n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64'] 960 | n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','10'] 961 | infmap_siz = ['64','64','64', '32','32','32','16','16','16','16', '8', '8', '8', '8', '4', '4', '4', '4', '1'] 962 | elif template_name == 'VGG19ave': 963 | initial_options = [0, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 3, 4] 964 | n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64'] 965 | n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','10'] 966 | infmap_siz = ['64','64','64', '32','32','32','16','16','16','16','16', '8', '8', '8', '8', '8', '4', '4', '4', '4', '4', '1'] 967 | else: # VGG11 968 | initial_options = [0,1,2,1,1,2,1,1,2,1,1,2,4,4,4] 969 | n_in_fmaps = [ '3','64','64', '64','128','128','128','256','256','256','256','256','4096','1024','1024'] 970 | n_ou_fmaps = ['64','64','64','128','128','128','256','256','256','256','256','256','1024','1024', '10'] 971 | infmap_siz = ['32','32','32', '16', '16', '16', '8', '8', '8', '8', '8', '8', '1', '1', '1'] 972 | 973 | # set output #neurons (that is, #classifications) 974 | global n_class 975 | n_ou_fmaps[len(n_ou_fmaps) - 1] = str(n_class) 976 | 977 | self.table.setRowCount(len(initial_options)) 978 | for index in range(len(initial_options)): 979 | combo = QtGui.QComboBox() 980 | for t in self.combo_box_options: 981 | combo.addItem(t) 982 | combo.setCurrentIndex(initial_options[index]) 983 | self.table.setCellWidget(index,0,combo) 984 | item1 = QtGui.QTableWidgetItem(n_in_fmaps[index]) 985 | self.table.setItem(index,1,item1) 986 | item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index]) 987 | self.table.setItem(index,2,item2) 988 | item3 = QtGui.QTableWidgetItem(infmap_siz[index]) 989 | self.table.setItem(index,3,item3) 990 | 991 | item4 = QtGui.QCheckBox('') 992 | item4.setChecked(True) # isChecked() == True?False? 993 | self.table.setCellWidget(index,4,item4) 994 | 995 | # Re-setting feature map size 996 | self.SetSize() 997 | 998 | # ----------------------------------------------------------------------- 999 | # Plot Training Process (Train value, Test value) 1000 | # ----------------------------------------------------------------------- 1001 | class Canvas(FigureCanvas): 1002 | def __init__(self): 1003 | FigureCanvas.__init__(self,Figure()) 1004 | self.ax = self.figure.add_subplot(111) 1005 | self.train_acc=[0]*100 1006 | self.test_acc=[0]*100 1007 | self.train_loss=[0]*100 1008 | self.test_loss=[0]*100 1009 | self.ax.set_xlabel("epoch") 1010 | self.ax.set_ylabel("Accuracy[%]") 1011 | self.ax.set_ylim(0,100) 1012 | 1013 | self.ax2 = self.ax.twinx() 1014 | self.ax2.set_ylabel("Loss") 1015 | 1016 | self.refresh(100) 1017 | 1018 | def refresh(self,xrange): 1019 | self.ax = self.figure.add_subplot(111) 1020 | self.ax.clear() 1021 | self.ax.plot(range(0,len(self.train_acc)),np.ones(len(self.train_acc))*100.0 - self.train_acc,label='Accuracy(Train)',color="blue") 1022 | self.ax.plot(range(0,len(self.test_acc)),np.ones(len(self.test_acc))*100.0 - self.test_acc,label='Accuracy(Test)',color="red") 1023 | 1024 | self.ax.annotate('Accuracy(Test)', 1025 | xy=(xrange - 1, 100.0 - self.test_acc[len(self.test_acc) - 1]), xycoords='data', 1026 | xytext=(-100, -20), 1027 | textcoords='offset points', 1028 | arrowprops=dict(arrowstyle="->") 1029 | ) 1030 | 1031 | self.ax.set_xlabel("epoch") 1032 | self.ax.set_ylabel("Accuracy[%]") 1033 | self.ax.set_ylim(0,100) 1034 | self.ax.set_xlim(0,xrange) 1035 | self.ax.grid() 1036 | 1037 | self.ax2.clear() 1038 | self.ax2.plot(range(0,len(self.train_loss)),self.train_loss,label='Loss(Train)',color="mediumslateblue") 1039 | self.ax2.plot(range(0,len(self.test_loss)),self.test_loss,label='Loss(Test)',color="hotpink") 1040 | 1041 | self.ax2.annotate('Loss(Test)', 1042 | xy=(xrange - 1, self.test_loss[len(self.test_loss) - 1]), xycoords='data', 1043 | xytext=(-80, 20), 1044 | textcoords='offset points', 1045 | arrowprops=dict(arrowstyle="->") 1046 | ) 1047 | 1048 | self.ax2.set_ylim(0,max(self.train_loss)*1.1) 1049 | self.ax2.set_xlim(0,xrange) 1050 | self.ax2.set_ylabel("Loss") 1051 | 1052 | self.draw() 1053 | 1054 | def push_data(self,train_acc,test_acc,train_loss,test_loss): 1055 | self.train_acc = train_acc 1056 | self.test_acc = test_acc 1057 | self.train_loss = train_loss 1058 | self.test_loss = test_loss 1059 | 1060 | ########################################################################################### 1061 | # Main 1062 | ########################################################################################### 1063 | def main(): 1064 | app = QtGui.QApplication(sys.argv) 1065 | ex = Layout() 1066 | sys.exit(app.exec_()) 1067 | 1068 | if __name__ == "__main__": 1069 | main() 1070 | 1071 | ########################################################################################### 1072 | # END OF PROGRAM 1073 | ########################################################################################### 1074 | --------------------------------------------------------------------------------