├── train_status.txt
├── temp.state
├── list.txt
├── header.txt
├── template_Makefile
├── weight_clip.py
├── net2.py
├── net3.py
├── bst.py
├── link_binary_linear.py
├── function_binary_linear.py
├── template_cpp_r7_main.cpp
├── README.md
├── link_binary_conv2d.py
├── link_integer_conv2d.py
├── trainer.py
├── eval.py
├── conv_npz2txt_v2.py
├── template_cpp_r7_socket_main.cpp
├── link_batch_normalization.py
├── gen_training_data.py
├── train.py
├── function_integer_conv2d.py
├── function_binary_conv2d.py
├── template_cpp_r7_bcnn.cpp
├── function_batch_normalization.py
├── LICENSE.txt
├── gen_cpp_code_v3.py
└── guinness.py


/train_status.txt:
--------------------------------------------------------------------------------
1 | stop


--------------------------------------------------------------------------------
/temp.state:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HirokiNakahara/GUINNESS/HEAD/temp.state


--------------------------------------------------------------------------------
/list.txt:
--------------------------------------------------------------------------------
1 | ./class3_images/airplane800 airplane
2 | ./class3_images/pets800 pets
3 | ./class3_images/car800 car
4 | 


--------------------------------------------------------------------------------
/header.txt:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import six
 4 | import chainer
 5 | from chainer import cuda
 6 | from chainer import functions as F
 7 | from chainer import links as L
 8 | from chainer import initializers
 9 | 
10 | import sys
11 | sys.path.append('./')
12 | import link_binary_linear as BL
13 | import bst
14 | import link_binary_conv2d as BC
15 | import link_integer_conv2d as IC
16 | from function_binary_conv2d import func_convolution_2d
17 | from function_integer_conv2d import func_convolution_2d
18 | 
19 | # for debuging of the batch normalization functions
20 | import link_batch_normalization as LBN
21 | 
22 | class CNN(chainer.Chain):
23 |     def __init__(self):
24 |         super(CNN, self).__init__(
25 | 


--------------------------------------------------------------------------------
/template_Makefile:
--------------------------------------------------------------------------------
 1 | APPSOURCES = (CNN_C_SOURCE) 
 2 | EXECUTABLE = (ELF_FILE_PATH)
 3 | 
 4 | PLATFORM = (TARGET_BOARD)
 5 | SDSFLAGS = -sds-pf ${PLATFORM} \
 6 | 	-sds-hw BinCNN (CNN_C_SOURCE) -sds-end \
 7 | 	-poll-mode 1
 8 | 
 9 | CC = sds++ ${SDSFLAGS}
10 | 
11 | CFLAGS = -Wall -O3 -c
12 | CFLAGS += -MMD -MP -MF"$(@:%.o=%.d)"
13 | LFLAGS = -O3
14 | 
15 | OBJECTS := $(APPSOURCES:.cpp=.o)
16 | DEPS := $(OBJECTS:.o=.d)
17 | 
18 | .PHONY: all
19 | 
20 | all: ${EXECUTABLE}
21 | 
22 | ${EXECUTABLE}: ${OBJECTS}
23 | 	${CC} ${LFLAGS} ${OBJECTS} -o $@ 
24 | 
25 | -include ${DEPS}
26 | 
27 | %.o: %.cpp
28 | 	${CC} ${CFLAGS} $< -o $@
29 | 
30 | clean:
31 | 	${RM} ${EXECUTABLE} ${OBJECTS} ${DEPS}
32 | 
33 | ultraclean: clean
34 | 	${RM} ${EXECUTABLE}.bit 
35 | 	${RM} -rf _sds sd_card
36 | 


--------------------------------------------------------------------------------
/weight_clip.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from chainer import cuda
 3 | 
 4 | class WeightClip(object):
 5 | 
 6 |     """Optimizer hook function for weight clip manipulation.
 7 | 
 8 |     This hook function clips a parameter to [low, high].
 9 |     It can be used in a binary weight network.
10 | 
11 |     Args:
12 |         low (float): low value for the weight clip.
13 |         high (float): high value for the weight clip.
14 | 
15 |     Attributes:
16 |         low (float): low value for the weight clip.
17 |         high (float): low value for the weight clip.
18 | 
19 |     """
20 |     name = 'WeightClip'
21 | 
22 |     def __init__(self, low=-1.0, high=1.0):
23 |         self.low=low
24 |         self.high=high
25 | 
26 |     def __call__(self, opt):
27 |         if cuda.available:
28 |             kernel = cuda.elementwise(
29 |                 'T low, T high', 
30 |                 'T p', 
31 |                 'p = (p < low) ? low : (p > high) ? high : p',
32 |                 'weight_clip')
33 | 
34 |         for param in opt.target.params():
35 |             p = param.data
36 |             with cuda.get_device(p) as dev:
37 |                 if int(dev) == -1:
38 |                     numpy.clip(p, self.low, self.high)
39 |                 else:
40 |                     kernel(self.low, self.high, p)
41 | 


--------------------------------------------------------------------------------
/net2.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import six
 4 | import chainer
 5 | from chainer import cuda
 6 | from chainer import functions as F
 7 | from chainer import links as L
 8 | from chainer import initializers
 9 | 
10 | import sys
11 | sys.path.append('./')
12 | import link_binary_linear as BL
13 | import bst
14 | import link_binary_conv2d as BC
15 | import link_integer_conv2d as IC
16 | from function_binary_conv2d import func_convolution_2d
17 | from function_integer_conv2d import func_convolution_2d
18 | 
19 | # for debuging of the batch normalization functions
20 | import link_batch_normalization as LBN
21 | 
22 | class CNN(chainer.Chain):
23 |     def __init__(self):
24 |         super(CNN, self).__init__(
25 | 
26 |             conv0=IC.Convolution2D(3,64,3, stride=1, pad=1, nobias=True),
27 |             b0=L.BatchNormalization(64),
28 |             conv1=BC.Convolution2D(64,128,3, stride=1, pad=1, nobias=True),
29 |             b1=L.BatchNormalization(128),
30 |             conv2=BC.Convolution2D(128,128,3, stride=1, pad=1, nobias=True),
31 |             b2=L.BatchNormalization(128),
32 |             fc0=BL.BinaryLinear(128,3),
33 |             b3=L.BatchNormalization(3)
34 |         )
35 | 
36 |     def __call__(self, x, train):
37 |         h = bst.bst(self.b0(self.conv0(x)))
38 |         h = bst.bst(self.b1(self.conv1(h)))
39 |         h = bst.bst(self.b2(self.conv2(h)))
40 |         h = F.max_pooling_2d(h, 2)
41 |         h = F.average_pooling_2d(h, 24)
42 |         h = self.b3(self.fc0(h))
43 |         return h


--------------------------------------------------------------------------------
/net3.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | import six
 4 | import chainer
 5 | from chainer import cuda
 6 | from chainer import functions as F
 7 | from chainer import links as L
 8 | from chainer import initializers
 9 | 
10 | import link_binary_linear as BL
11 | import bst
12 | import link_binary_conv2d as BC
13 | import link_integer_conv2d as IC
14 | import sys
15 | sys.path.append('./')
16 | from function_binary_conv2d import func_convolution_2d
17 | from function_integer_conv2d import func_convolution_2d
18 | 
19 | # for debuging of the batch normalization functions
20 | import link_batch_normalization as LBN
21 | 
22 | class CNN(chainer.Chain):
23 |     def __init__(self):
24 |         super(CNN, self).__init__(
25 |             conv0=IC.Convolution2D(3,64,3, stride=1, pad=1, nobias=True),
26 |             b0=LBN.BatchNormalization(64),
27 |             conv1=BC.Convolution2D(64,128,3, stride=1, pad=1, nobias=True),
28 |             b1=LBN.BatchNormalization(128),
29 |             conv2=BC.Convolution2D(128,128,3, stride=1, pad=1, nobias=True),
30 |             b2=LBN.BatchNormalization(128),
31 |             fc0=BL.BinaryLinear(128,3),
32 |             b3=LBN.BatchNormalization(3)
33 |         )
34 | 
35 |     def __call__(self, x, train):
36 |         h = bst.bst(self.b0(self.conv0(x)))
37 |         h = bst.bst(self.b1(self.conv1(h)))
38 |         h = bst.bst(self.b2(self.conv2(h)))
39 |         h = F.max_pooling_2d(h, 2)
40 |         h = F.average_pooling_2d(h, 32)
41 |         h = self.b3(self.fc0(h))
42 |         return h


--------------------------------------------------------------------------------
/bst.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from chainer import cuda
 4 | from chainer import function
 5 | from chainer.utils import type_check
 6 | 
 7 | 
 8 | class BST(function.Function):
 9 | 
10 |     """Binary with Straight Thourgh estimator Unit."""
11 | 
12 |     def __init__(self):
13 |         pass
14 | 
15 |     def check_type_forward(self, in_types):
16 |         type_check.expect(in_types.size() == 1)
17 |         x_type, = in_types
18 | 
19 |         type_check.expect(
20 |             x_type.dtype == numpy.float32,
21 |         )
22 | 
23 |     def forward_cpu(self, x):
24 |         y = x[0]
25 |         y = numpy.where(y>=0, 1, -1).astype(numpy.float32, copy=False)
26 |         return y,
27 | 
28 |     def forward_gpu(self, x):
29 |         y = cuda.elementwise(
30 |             'T x', 'T y',
31 |             'y = x >= 0 ? 1 : -1', 'bst_fwd')(
32 |                 x[0])
33 |         return y,
34 | 
35 |     def backward_cpu(self, x, gy):
36 |         gx = gy[0].copy()
37 |         zero_indices = numpy.abs(x[0]) > 1
38 |         gx[zero_indices] = 0
39 |         return gx,
40 | 
41 |     def backward_gpu(self, x, gy):
42 |         gx = cuda.elementwise(
43 |             'T x, T gy', 'T gx',
44 |             'gx = abs(x) > 1 ? 0 : gy', 'bst_bwd')(
45 |                 x[0], gy[0])
46 |         return gx,
47 | 
48 | 
49 | def bst(x):
50 |     """Binary with Straight Thourgh estimator Unit function.
51 | 
52 |     This function is expressed as
53 | 
54 |     .. math::
55 |         f(x) = \\left \\{ \\begin{array}{ll}
56 |         1 & {\\rm if}~ x \\ge 0 \\\\
57 |         -1 & {\\rm if}~ x < 0,
58 |         \\end{array} \\right.
59 | 
60 |     See: http://arxiv.org/abs/1511.07289
61 | 
62 |     Args:
63 |         x (~chainer.Variable): Input variable.
64 | 
65 |     Returns:
66 |         ~chainer.Variable: Output variable.
67 | 
68 |     """
69 |     return BST()(x)
70 | 


--------------------------------------------------------------------------------
/link_binary_linear.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from chainer import link
 4 | import function_binary_linear
 5 | 
 6 | class BinaryLinear(link.Link):
 7 |     """Binary Linear layer (a.k.a. binary fully-connected layer).
 8 | 
 9 |     This is a link that wraps the :func:`~chainer.functions.linear` function,
10 |     and holds a weight matrix ``W`` and optionally a bias vector ``b`` as
11 |     parameters.
12 | 
13 |     The weight matrix ``W`` is initialized with i.i.d. Gaussian samples, each
14 |     of which has zero mean and deviation :math:`\\sqrt{1/\\text{in_size}}`. The
15 |     bias vector ``b`` is of size ``out_size``. Each element is initialized with
16 |     the ``bias`` value. If ``nobias`` argument is set to True, then this link
17 |     does not hold a bias vector.
18 | 
19 |     Args:
20 |         in_size (int): Dimension of input vectors.
21 |         out_size (int): Dimension of output vectors.
22 |         wscale (float): Scaling factor of the weight matrix.
23 |         bias (float): Initial bias value.
24 |         nobias (bool): If True, then this function does not use the bias.
25 |         initialW (2-D array): Initial weight value. If ``None``, then this
26 |             function uses to initialize ``wscale``.
27 |         initial_bias (1-D array): Initial bias value. If ``None``, then this
28 |             function uses to initialize ``bias``.
29 | 
30 |     .. seealso:: :func:`~chainer.functions.linear`
31 | 
32 |     Attributes:
33 |         W (~chainer.Variable): Weight parameter.
34 |         b (~chainer.Variable): Bias parameter.
35 | 
36 |     """
37 |     def __init__(self, in_size, out_size, wscale=1, bias=0, nobias=False,
38 |                  initialW=None, initial_bias=None):
39 |         super(BinaryLinear, self).__init__(W=(out_size, in_size))
40 |         if initialW is None:
41 |             initialW = numpy.random.normal(
42 |                 0, wscale * numpy.sqrt(1. / in_size), (out_size, in_size))
43 |         self.W.data[...] = initialW
44 | 
45 |         if nobias:
46 |             self.b = None
47 |         else:
48 |             self.add_param('b', out_size)
49 |             if initial_bias is None:
50 |                 initial_bias = bias
51 |             self.b.data[...] = initial_bias
52 | 
53 |     def __call__(self, x):
54 |         """Applies the linear layer.
55 | 
56 |         Args:
57 |             x (~chainer.Variable): Batch of input vectors.
58 | 
59 |         Returns:
60 |             ~chainer.Variable: Output of the linear layer.
61 | 
62 |         """
63 |         return function_binary_linear.binary_linear(x, self.W, self.b)
64 | 


--------------------------------------------------------------------------------
/function_binary_linear.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | 
  3 | from chainer import cuda
  4 | from chainer import function
  5 | from chainer.utils import type_check
  6 | 
  7 | def _kern():
  8 |     return cuda.elementwise(
  9 |         'T x', 'T y',
 10 |         'y = x >= 0 ? 1 : -1',
 11 |         'binarize')
 12 | 
 13 | def _as_mat(x):
 14 |     if x.ndim == 2:
 15 |         return x
 16 |     return x.reshape(len(x), -1)
 17 | 
 18 | class BinaryLinearFunction(function.Function):
 19 | 
 20 |     def check_type_forward(self, in_types):
 21 |         n_in = in_types.size()
 22 |         type_check.expect(2 <= n_in, n_in <= 3)
 23 |         x_type, w_type = in_types[:2]
 24 | 
 25 |         type_check.expect(
 26 |             x_type.dtype == numpy.float32,
 27 |             w_type.dtype == numpy.float32,
 28 |             x_type.ndim >= 2,
 29 |             w_type.ndim == 2,
 30 |             type_check.prod(x_type.shape[1:]) == w_type.shape[1],
 31 |         )
 32 |         if n_in.eval() == 3:
 33 |             b_type = in_types[2]
 34 |             type_check.expect(
 35 |                 b_type.dtype == numpy.float32,
 36 |                 b_type.ndim == 1,
 37 |                 b_type.shape[0] == w_type.shape[0],
 38 |             )
 39 | 
 40 |     def forward_cpu(self, inputs):
 41 |         x = _as_mat(inputs[0])
 42 |         W = inputs[1]
 43 |         Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False)
 44 | 
 45 |         Xb = numpy.where(x>=0,1,-1).astype(x.dtype, copy=False)
 46 | 
 47 |         y = Xb.dot(Wb.T)
 48 | 
 49 |         if len(inputs) == 3:
 50 |             b = inputs[2]
 51 |             y += b
 52 |         return y,
 53 | 
 54 |     def forward_gpu(self, inputs):
 55 |         x = _as_mat(inputs[0])
 56 |         W = inputs[1]
 57 |         Wb = _kern()(W)
 58 | 
 59 |         Xb = _kern()(x)
 60 |         
 61 |         y = Xb.dot(Wb.T)
 62 | 
 63 |         if len(inputs) == 3:
 64 |             b = inputs[2]
 65 |             y += b
 66 |         return y,
 67 | 
 68 | 
 69 |     def backward_cpu(self, inputs, grad_outputs):
 70 |         x = _as_mat(inputs[0])
 71 |         W = inputs[1]
 72 |         Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False)
 73 |         gy = grad_outputs[0]
 74 | 
 75 |         gx = gy.dot(Wb).reshape(inputs[0].shape)
 76 |         gW = gy.T.dot(x)
 77 |         if len(inputs) == 3:
 78 |             gb = gy.sum(0)
 79 |             return gx, gW, gb
 80 |         else:
 81 |             return gx, gW
 82 | 
 83 |     def backward_gpu(self, inputs, grad_outputs):
 84 |         x = _as_mat(inputs[0])
 85 |         W = inputs[1]
 86 |         Wb = _kern()(W)
 87 |         gy = grad_outputs[0]
 88 | 
 89 |         gx = gy.dot(Wb).reshape(inputs[0].shape)
 90 |         gW = gy.T.dot(x)
 91 |         if len(inputs) == 3:
 92 |             gb = gy.sum(0)
 93 |             return gx, gW, gb
 94 |         else:
 95 |             return gx, gW
 96 | 
 97 | 
 98 | def binary_linear(x, W, b=None):
 99 |     """Binary Linear function, or affine transformation.
100 | 
101 |     It accepts two or three arguments: an input minibatch ``x``, a weight
102 |     matrix ``W``, and optionally a bias vector ``b``. It computes
103 |     :math:`Y = xW^\\top + b`.
104 | 
105 |     Args:
106 |         x (~chainer.Variable): Input variable. Its first dimension is assumed
107 |             to be the *minibatch dimension*. The other dimensions are treated
108 |             as concatenated one dimension whose size must be ``N``.
109 |         W (~chainer.Variable): Weight variable of shape ``(M, N)``.
110 |         b (~chainer.Variable): Bias variable (optional) of shape ``(M,)``..
111 | 
112 |     Returns:
113 |         ~chainer.Variable: Output variable.
114 | 
115 |     .. seealso:: :class:`~chainer.links.Linear`
116 | 
117 |     """
118 |     if b is None:
119 |         return BinaryLinearFunction()(x, W)
120 |     else:
121 |         return BinaryLinearFunction()(x, W, b)
122 | 


--------------------------------------------------------------------------------
/template_cpp_r7_main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * C++ Templete for a Binarized CNN
  3 |  *
  4 |  *  Created on: 2017/07/01
  5 |  *      Author: H. Nakahara
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <iostream>
 11 | #include <bitset>
 12 | 
 13 | #include <ap_int.h>
 14 | 
 15 | #ifdef __SDSCC__
 16 | #include "sds_lib.h"
 17 | #else 
 18 | #define sds_alloc(x)(malloc(x))
 19 | #define sds_free(x)(free(x))
 20 | #endif
 21 | 
 22 | void BinCNN(
 23 | #ifdef __SDSCC__
 24 |         int *t_bin_convW,
 25 |         int *t_BNFb,
 26 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
 27 |         int fc_result[(OUT_DENSE_SIZ)],
 28 |         int init
 29 | #else 
 30 |         int t_bin_convW[(WEIGHT_SIZ)],
 31 |         int t_BNFb[(BIAS_SIZ)],
 32 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
 33 |         int fc_result[(OUT_DENSE_SIZ)],
 34 |         int init
 35 | #endif
 36 | );
 37 | 
 38 | //--------------------------------------------------------------------
 39 | // Main Function
 40 | //--------------------------------------------------------------------
 41 | int main( int argc, char *argv[])
 42 | {
 43 |     ap_int<64> *t_tmp_img;
 44 |     t_tmp_img = (ap_int<64> *)sds_alloc(((IMGSIZ)*(IMGSIZ))*sizeof(ap_int<64>));
 45 | 
 46 |     int fc_result[(OUT_DENSE_SIZ)];
 47 |     int rgb, y, x, i, offset;
 48 | 
 49 |     // copy input image to f1
 50 |     for( y = 0; y < (IMGSIZ); y++){
 51 |     	for( x = 0; x < (IMGSIZ); x++){
 52 |     		t_tmp_img[y*(IMGSIZ)+x] = 0;
 53 |         }
 54 |     }
 55 | 
 56 |     // ------------------------------------------------------------------
 57 |     printf("load weights\n");
 58 |     int *t_bin_convW;
 59 | 	int *t_BNFb;
 60 | 	t_bin_convW = (int *)sds_alloc(((WEIGHT_SIZ))*sizeof(int));
 61 | 	t_BNFb   = (int *)sds_alloc(((BIAS_SIZ))*sizeof(int));
 62 | 
 63 | 	int of, inf, d_value;
 64 | 	FILE *fp;
 65 | 	char line[256];
 66 | 
 67 | (READ_BIAS_MEM)
 68 | 
 69 | (READ_WEIGHT_MEM)
 70 | 
 71 |     printf("setup... \n");
 72 | 	BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 1);
 73 | 
 74 |     char image_name[256];
 75 |     int cnt;
 76 | 
 77 | #ifdef __SDSCC__
 78 |     sscanf( argv[1], "%s", image_name); // 1st argument: test image (text file)
 79 |     sscanf( argv[2], "%d", &cnt); // 2nd argument: # of inferences 
 80 | #else 
 81 |     sprintf( image_name, "test_img.txt");
 82 |     cnt = 1;
 83 | #endif
 84 | 
 85 | 
 86 |     int pixel;
 87 |     printf("LOAD TESTBENCH %s ... ", image_name);
 88 |     if( (fp = fopen(image_name, "r")) == NULL)fprintf(stderr,"CANNOT OPEN\n");
 89 |     for( y = 0; y < (IMGSIZ); y++){
 90 |         for( x = 0; x < (IMGSIZ); x++){
 91 |             ap_int<64>tmp = 0;
 92 |             for( rgb = (NUMIMG) - 1; rgb >= 0 ; rgb--){
 93 |                 if( fgets( line, 256, fp) == NULL)
 94 |                     fprintf(stderr,"EMPTY FILE READ\n"); 
 95 |                 sscanf( line, "%d", &d_value);
 96 | 
 97 |                 tmp = tmp << 20;
 98 | 
 99 |                 pixel = d_value;
100 |                 tmp |= ( pixel & 0xFFFFF);
101 |             }
102 |             t_tmp_img[ y * (IMGSIZ) + x] = tmp;
103 |         }
104 |     }
105 |     printf("OK\n");
106 |     fclose(fp);
107 | 
108 |     printf("Inference %d times ... ", cnt);
109 |     for( i = 0; i < cnt; i++){
110 |         BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 0);
111 |     }
112 |     printf("OK\n");
113 | 
114 |     printf("Result\n");
115 |     for( i = 0; i < (OUT_DENSE_SIZ); i++)printf("%5d ", fc_result[i]);
116 |     printf("\n");
117 | 
118 |     sds_free( t_tmp_img); sds_free( t_bin_convW); sds_free( t_BNFb);
119 | 
120 |     return 0;
121 | }
122 | 
123 | // ------------------------------------------------------------------
124 | // END OF PROGRAM
125 | // ------------------------------------------------------------------
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GUINNESS: A GUI based binarized Neural NEtwork SyntheSizer toward an FPGA (Trial version)
 2 | 
 3 | This GUI based framework includes both a training on a GPU, and a bitstream generation for an FPGA using the Xilinx Inc. SDSoC. This tool uses the Chainer deep learning framework to train a binarized CNN. Also, it uses optimization techniques for an FPGA implementation. Details are shown in following papers:
 4 | 
 5 | [Nakahara IPDPSW2017] H. Yonekawa and H. Nakahara, "On-Chip Memory Based Binarized Convolutional Deep Neural Network Applying Batch Normalization Free Technique on an FPGA," IPDPS Workshops, 2017, pp. 98-105.  
 6 | 
 7 | [Nakahara FPL2017] H. Nakahara et al., "A Fully Connected Layer Elimination for a Binarized Convolutional Neural Network on an FPGA", FPL, 2017, pp. 1-4.
 8 | 
 9 | [Nakahara FPL2017 Demo] H. Nakahara et al., "A demonstration of the GUINNESS: A GUI based neural NEtwork SyntheSizer for an FPGA", FPL, 2017, page 1.
10 | 
11 | ### 1. Requirements:
12 | 
13 | Ubuntu 16.04 LTS (14.04 LTS is also supported)  
14 | 
15 | Python 3.5.1
16 | (Note that, my recommendation is to install by Anaconda 4.1.0 (64bit)+Pyenv,
17 |  for Japanese Only, I prepared the Python 3.5 by following http://blog.algolab.jp/post/2016/08/21/pyenv-anaconda-ubuntu/)
18 | 
19 | CUDA 8.0 (+GPU), CuDNN 6.0
20 | (Also, you must sign up the NVidia developer account)
21 | 
22 | Chainer 1.24.0 + CuPy 2.0
23 | 
24 | Xilinx Inc. SDSoC 2017.4
25 | 
26 | FPGA board: Xilinx ZC702, ZC706, ZCU102, Digilent Zedboard, Zybo  
27 | (Soon, I will support Intel's FPGAs!, and the PYNQ board)  
28 | 
29 | PyQt4, matplotlib, OpenCV3, numpy, scipy,
30 | (Above libraries are installed by the Anaconda, however, you must individually install the OpenCV by "conda install -y -c menpo opencv3")
31 | 
32 | ### 2. Setup Libraries
33 | 
34 |  Install the following python libraries:
35 | 
36 |  Chainer 
37 | 
38 |  sudo pip install chainer==1.24.0
39 |  
40 |  PyQt4 (not PyQt5!), it is already installed by the Anaconda
41 | 
42 |  sudo apt-get install python-qt4 pyqt4-dev-tools
43 | 
44 |  OpenCV3
45 |  
46 |  conda install -y -c menpo opencv3
47 | 
48 | ### 3. Run GUINNESS
49 | 
50 |  $ python guinness.py
51 | 
52 | ### 4. Tutorial
53 | 
54 |  Read a following document (25/Oct./2017 Updated!!)
55 | 
56 |  1 The GUINNESS introduction and BCNN implementation on an FPGA  
57 |  guinness_tutorial1_v2.pdf <https://www.dropbox.com/s/oe6gptgyi4y92el/guinness_tutorial1_v2.pdf?dl=0>
58 | 
59 |  2 The GUINNESS for the Intel FPGAs (Soon, will be uploaded)
60 |  
61 |  3 Pedestrian detection (Under preparing)
62 | 
63 |  4 Make a custom IP core for your own FPGA board (Under preparing) 
64 | 
65 | ### 5. On-going works
66 |  This is a just trial version. I have already developed the extend version including following ones.
67 |  
68 |  Supporing the Intel's FPGA (DE5-net, DE10-nano, and DE5a-net boards with the Intel SDK for OpenCL)
69 |  
70 |  High performance image recognition (fully pipelined and SIMD CNNs)  
71 |  
72 |  Object detector on a low-cost FPGA (e.g., pedestrian detection)
73 | 
74 | FPGA YOLOv2 (ZCU102 board)
75 | 
76 | [![FPGA YOLOv2 ON YOUTUBE](http://img.youtube.com/vi/_iMboyu8iWc/0.jpg)](https://www.youtube.com/watch?v=_iMboyu8iWc&t=5s)
77 | 
78 | Pedestrian Detector (Zedboard)
79 | 
80 | [![Pedestrian Detector ON YOUTUBE](http://img.youtube.com/vi/X82PVBuAuuo/0.jpg)](https://www.youtube.com/watch?v=X82PVBuAuuo&list=FLIIfj2LoI2TVWF5wQkZHiHg)
81 | 
82 | 
83 |  If you are interesting the extended one, please, contact me.
84 | 
85 | ### 6. Acknowledgements
86 |  This work is based on following projects:
87 | 
88 |  Chainer binarized neural network by Daisuke Okanohara  
89 |  https://github.com/hillbig/binary_net
90 | 
91 |  Various CNN models including Deep Residual Networks (ResNet)   
92 |   for CIFAR10 with Chainer by mitmul  
93 |  https://github.com/mitmul/chainer-cifar10
94 | 
95 |  This research is supported in part by the Grants in Aid for Scientistic Research of JSPS,  
96 | and an Accelerated Innovation Research Initiative Turning Top Science and Ideas into High-Impact  
97 | Values program(ACCEL) of JST. Also, thanks to the Xilinx University Program (XUP), Intel University Program,
98 |  and the NVidia Corp.'s support.
99 | 


--------------------------------------------------------------------------------
/link_binary_conv2d.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import function_binary_conv2d
  4 | from chainer import initializers
  5 | from chainer import link
  6 | 
  7 | import numpy
  8 | 
  9 | 
 10 | class Convolution2D(link.Link):
 11 | 
 12 |     """Two-dimensional convolutional layer.
 13 | 
 14 |     This link wraps the :func:`~chainer.functions.convolution_2d` function and
 15 |     holds the filter weight and bias vector as parameters.
 16 | 
 17 |     Args:
 18 |         in_channels (int): Number of channels of input arrays. If None,
 19 |             parameter initialization will be deferred until the first forward
 20 |             data pass at which time the size will be determined.
 21 |         out_channels (int): Number of channels of output arrays.
 22 |         ksize (int or pair of ints): Size of filters (a.k.a. kernels).
 23 |             ``ksize=k`` and ``ksize=(k, k)`` are equivalent.
 24 |         stride (int or pair of ints): Stride of filter applications.
 25 |             ``stride=s`` and ``stride=(s, s)`` are equivalent.
 26 |         pad (int or pair of ints): Spatial padding width for input arrays.
 27 |             ``pad=p`` and ``pad=(p, p)`` are equivalent.
 28 |         wscale (float): Scaling factor of the initial weight.
 29 |         bias (float): Initial bias value.
 30 |         nobias (bool): If ``True``, then this link does not use the bias term.
 31 |         use_cudnn (bool): If ``True``, then this link uses cuDNN if available.
 32 |         initialW (4-D array): Initial weight value. If ``None``, then this
 33 |             function uses to initialize ``wscale``.
 34 |             May also be a callable that takes ``numpy.ndarray`` or
 35 |             ``cupy.ndarray`` and edits its value.
 36 |         initial_bias (1-D array): Initial bias value. If ``None``, then this
 37 |             function uses to initialize ``bias``.
 38 |             May also be a callable that takes ``numpy.ndarray`` or
 39 |             ``cupy.ndarray`` and edits its value.
 40 | 
 41 |     .. seealso::
 42 |        See :func:`chainer.functions.convolution_2d` for the definition of
 43 |        two-dimensional convolution.
 44 | 
 45 |     Attributes:
 46 |         W (~chainer.Variable): Weight parameter.
 47 |         b (~chainer.Variable): Bias parameter.
 48 | 
 49 |     """
 50 | 
 51 |     def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
 52 |                  wscale=1, bias=0, nobias=False, use_cudnn=True,
 53 |                  initialW=None, initial_bias=None):
 54 |         super(Convolution2D, self).__init__()
 55 |         self.ksize = ksize
 56 |         self.stride = _pair(stride)
 57 |         self.pad = _pair(pad)
 58 |         self.use_cudnn = use_cudnn
 59 |         self.out_channels = out_channels
 60 |         self.initialW = initialW
 61 |         self.wscale = wscale
 62 | 
 63 |         if in_channels is None:
 64 |             self.add_uninitialized_param('W')
 65 |         else:
 66 |             self._initialize_params(in_channels)
 67 | 
 68 |         kh, kw = _pair(self.ksize)
 69 |         W_shape = (self.out_channels, in_channels, kh, kw)
 70 |         #self.add_param('W', W_shape)
 71 |         # For backward compatibility, the scale of weights is proportional to
 72 |         # the square root of wscale.
 73 |         initializers.init_weight(self.W.data, self.initialW,
 74 |                                  scale=math.sqrt(self.wscale))
 75 | 
 76 |         if nobias:
 77 |             self.b = None
 78 |         else:
 79 |             self.add_param('b', out_channels)
 80 |             if initial_bias is None:
 81 |                 initial_bias = bias
 82 |             initializers.init_weight(self.b.data, initial_bias)
 83 | 
 84 |     def _initialize_params(self, in_channels):
 85 |         kh, kw = _pair(self.ksize)
 86 |         W_shape = (self.out_channels, in_channels, kh, kw)
 87 |         self.add_param('W', W_shape)
 88 |         # For backward compatibility, the scale of weights is proportional to
 89 |         # the square root of wscale.
 90 |         initializers.init_weight(self.W.data, self.initialW,
 91 |                                  scale=math.sqrt(self.wscale))
 92 | 
 93 |     def __call__(self, x):
 94 |         """Applies the convolution layer.
 95 | 
 96 |         Args:
 97 |             x (~chainer.Variable): Input image.
 98 | 
 99 |         Returns:
100 |             ~chainer.Variable: Output of the convolution.
101 | 
102 |         """
103 |         return function_binary_conv2d.func_convolution_2d(x, self.W, self.b, self.stride, self.pad, self.use_cudnn)
104 | 
105 | 
106 | def _pair(x):
107 |     if hasattr(x, '__getitem__'):
108 |         return x
109 |     return x, x
110 | 


--------------------------------------------------------------------------------
/link_integer_conv2d.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | #from chainer.functions.connection import convolution_2d
  4 | import function_integer_conv2d
  5 | from chainer import initializers
  6 | from chainer import link
  7 | 
  8 | import numpy
  9 | 
 10 | 
 11 | class Convolution2D(link.Link):
 12 | 
 13 |     """Two-dimensional convolutional layer.
 14 | 
 15 |     This link wraps the :func:`~chainer.functions.convolution_2d` function and
 16 |     holds the filter weight and bias vector as parameters.
 17 | 
 18 |     Args:
 19 |         in_channels (int): Number of channels of input arrays. If None,
 20 |             parameter initialization will be deferred until the first forward
 21 |             data pass at which time the size will be determined.
 22 |         out_channels (int): Number of channels of output arrays.
 23 |         ksize (int or pair of ints): Size of filters (a.k.a. kernels).
 24 |             ``ksize=k`` and ``ksize=(k, k)`` are equivalent.
 25 |         stride (int or pair of ints): Stride of filter applications.
 26 |             ``stride=s`` and ``stride=(s, s)`` are equivalent.
 27 |         pad (int or pair of ints): Spatial padding width for input arrays.
 28 |             ``pad=p`` and ``pad=(p, p)`` are equivalent.
 29 |         wscale (float): Scaling factor of the initial weight.
 30 |         bias (float): Initial bias value.
 31 |         nobias (bool): If ``True``, then this link does not use the bias term.
 32 |         use_cudnn (bool): If ``True``, then this link uses cuDNN if available.
 33 |         initialW (4-D array): Initial weight value. If ``None``, then this
 34 |             function uses to initialize ``wscale``.
 35 |             May also be a callable that takes ``numpy.ndarray`` or
 36 |             ``cupy.ndarray`` and edits its value.
 37 |         initial_bias (1-D array): Initial bias value. If ``None``, then this
 38 |             function uses to initialize ``bias``.
 39 |             May also be a callable that takes ``numpy.ndarray`` or
 40 |             ``cupy.ndarray`` and edits its value.
 41 | 
 42 |     .. seealso::
 43 |        See :func:`chainer.functions.convolution_2d` for the definition of
 44 |        two-dimensional convolution.
 45 | 
 46 |     Attributes:
 47 |         W (~chainer.Variable): Weight parameter.
 48 |         b (~chainer.Variable): Bias parameter.
 49 | 
 50 |     """
 51 | 
 52 |     def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
 53 |                  wscale=1, bias=0, nobias=False, use_cudnn=True,
 54 |                  initialW=None, initial_bias=None):
 55 |         super(Convolution2D, self).__init__()
 56 |         self.ksize = ksize
 57 |         self.stride = _pair(stride)
 58 |         self.pad = _pair(pad)
 59 |         self.use_cudnn = use_cudnn
 60 |         self.out_channels = out_channels
 61 |         self.initialW = initialW
 62 |         self.wscale = wscale
 63 | 
 64 |         if in_channels is None:
 65 |             self.add_uninitialized_param('W')
 66 |         else:
 67 |             self._initialize_params(in_channels)
 68 | 
 69 |         kh, kw = _pair(self.ksize)
 70 |         W_shape = (self.out_channels, in_channels, kh, kw)
 71 |         #self.add_param('W', W_shape)
 72 |         # For backward compatibility, the scale of weights is proportional to
 73 |         # the square root of wscale.
 74 |         initializers.init_weight(self.W.data, self.initialW,
 75 |                                  scale=math.sqrt(self.wscale))
 76 | 
 77 |         if nobias:
 78 |             self.b = None
 79 |         else:
 80 |             self.add_param('b', out_channels)
 81 |             if initial_bias is None:
 82 |                 initial_bias = bias
 83 |             initializers.init_weight(self.b.data, initial_bias)
 84 | 
 85 |     def _initialize_params(self, in_channels):
 86 |         kh, kw = _pair(self.ksize)
 87 |         W_shape = (self.out_channels, in_channels, kh, kw)
 88 |         self.add_param('W', W_shape)
 89 |         # For backward compatibility, the scale of weights is proportional to
 90 |         # the square root of wscale.
 91 |         initializers.init_weight(self.W.data, self.initialW,
 92 |                                  scale=math.sqrt(self.wscale))
 93 | 
 94 |     def __call__(self, x):
 95 |         """Applies the convolution layer.
 96 | 
 97 |         Args:
 98 |             x (~chainer.Variable): Input image.
 99 | 
100 |         Returns:
101 |             ~chainer.Variable: Output of the convolution.
102 | 
103 |         """
104 |         return function_integer_conv2d.func_convolution_2d(x, self.W, self.b, self.stride, self.pad, self.use_cudnn)
105 | 
106 | 
107 | def _pair(x):
108 |     if hasattr(x, '__getitem__'):
109 |         return x
110 |     return x, x
111 | 


--------------------------------------------------------------------------------
/trainer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import six
  3 | from scipy.misc import imresize, imrotate
  4 | 
  5 | from chainer import functions as F
  6 | from chainer import cuda
  7 | from chainer import Variable
  8 | 
  9 | class CifarTrainer(object):
 10 |     def __init__(self, net, optimizer, epoch_num=100, batch_size=100, device_id=-1):
 11 |         self.net = net
 12 |         self.optimizer = optimizer
 13 |         self.epoch_num = epoch_num
 14 |         self.batch_size = batch_size
 15 |         self.device_id = device_id
 16 |         if device_id >= 0:
 17 |             self.xp = cuda.cupy
 18 |             self.net.to_gpu(device_id)
 19 |         else:
 20 |             self.xp = np
 21 | 
 22 |     def fit(self, x, y, valid_x, valid_y, img_siz, img_dim, test_x=None, test_y=None, callback=None):
 23 |         if self.device_id >= 0:
 24 |             with cuda.cupy.cuda.Device(self.device_id):
 25 |                 return self.__fit(x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback)
 26 |         else:
 27 |             return self.__fit(x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback)
 28 | 
 29 |     def __fit(self, x, y, valid_x, valid_y, img_siz, img_dim, test_x, test_y, callback):
 30 |         batch_size = self.batch_size
 31 |         for epoch in six.moves.range(self.epoch_num):
 32 |             perm = np.random.permutation(len(x))
 33 |             train_loss = 0
 34 |             train_acc = 0
 35 |             for i in six.moves.range(0, len(x), self.batch_size):
 36 |                 self.net.zerograds()
 37 |                 batch_index = perm[i:i + batch_size]
 38 |                 x_batch = self.__trans_image(x[batch_index], img_siz, img_dim)
 39 | 
 40 | #                print(type(x_batch[0,0,0,0]))
 41 | #                print(batch_index)
 42 | #                print(type(y[batch_index][0]))
 43 | 
 44 |                 loss, acc = self.__forward(x_batch, y[batch_index])
 45 |                 loss.backward()
 46 |                 self.optimizer.update()
 47 |                 train_loss += float(loss.data) * len(x_batch)
 48 |                 train_acc += float(acc.data) * len(x_batch)
 49 |             train_loss /= len(x)
 50 |             train_acc /= len(x)
 51 |             valid_loss = 0
 52 |             valid_acc = 0
 53 |             if valid_x is not None and valid_y is not None:
 54 |                 for i in six.moves.range(0, len(valid_x), self.batch_size):
 55 |                     x_batch = valid_x[i:i + batch_size]
 56 |                     loss, acc = self.__forward(x_batch, valid_y[i:i + batch_size], train=False)
 57 |                     valid_loss += float(loss.data) * len(x_batch)
 58 |                     valid_acc += float(acc.data) * len(x_batch)
 59 |             valid_loss /= len(valid_x)
 60 |             valid_acc /= len(valid_x)
 61 |             test_loss = 0
 62 |             test_acc = 0
 63 |             if test_x is not None and test_y is not None:
 64 |                 for i in six.moves.range(0, len(test_x), self.batch_size):
 65 |                     x_batch = test_x[i:i + batch_size]
 66 |                     loss, acc = self.__forward(x_batch, test_y[i:i + batch_size], train=False)
 67 |                     test_loss += float(loss.data) * len(x_batch)
 68 |                     test_acc += float(acc.data) * len(x_batch)
 69 |                 test_loss /= len(test_x)
 70 |                 test_acc /= len(test_x)
 71 |             if callback is not None:
 72 |                 callback(epoch, self.net, self.optimizer, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc)
 73 | 
 74 |     def __forward(self, batch_x, batch_t, train=True):
 75 |         xp = self.xp
 76 |         x = Variable(xp.asarray(batch_x), volatile=not train)
 77 |         t = Variable(xp.asarray(batch_t), volatile=not train)
 78 |         y = self.net(x, train=train)
 79 | #        print(type(y.data))
 80 | #        print(type(t.data))
 81 |         loss = F.softmax_cross_entropy(y, t)
 82 |         acc = F.accuracy(y, t)
 83 |         return loss, acc
 84 | 
 85 |     def __trans_image(self, x, img_siz, img_dim):
 86 |         size = img_siz
 87 |         n = x.shape[0]
 88 |         images = np.zeros((n, img_dim, size, size), dtype=np.float32)
 89 |         offset = np.random.randint(-4, 5, size=(n, 2))
 90 |         mirror = np.random.randint(2, size=n)
 91 |         for i in six.moves.range(n):
 92 |             image = x[i]
 93 |             top, left = offset[i]
 94 |             left = max(0, left)
 95 |             top = max(0, top)
 96 |             right = min(size, left + size)
 97 |             bottom = min(size, left + size)
 98 |             if mirror[i] > 0:
 99 |                 images[i,:,size-bottom:size-top,size-right:size-left] = image[:,top:bottom, left:right][:,:,::-1]
100 |             else:
101 |                 images[i,:,size-bottom:size-top,size-right:size-left] = image[:,top:bottom,left:right]
102 |         return images
103 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------
  2 | # eval.py
  3 | # Verification in Python code
  4 | #
  5 | # Creation Date   : 04/Aug./2017
  6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
  7 | # 
  8 | # Released under the GPL v2.0 License.
  9 | # 
 10 | # Acknowledgements:
 11 | # This source code is based on following projects:
 12 | #
 13 | # Chainer binarized neural network by Daisuke Okanohara
 14 | # https://github.com/hillbig/binary_net
 15 | # Various CNN models including Deep Residual Networks (ResNet) 
 16 | #  for CIFAR10 with Chainer by mitmul
 17 | # https://github.com/mitmul/chainer-cifar10
 18 | # -----------------------------------------------------------------------
 19 | 
 20 | import argparse
 21 | #import cPickle as pickle # python 2.7
 22 | import _pickle as pickle # python 3.5
 23 | import numpy as np
 24 | import os
 25 | import chainer
 26 | from chainer import optimizers, Variable
 27 | from chainer import serializers
 28 | import net3 # it will be generated by the GUINNESS
 29 | 
 30 | import trainer
 31 | import chainer.links as L
 32 | 
 33 | import time
 34 | import weight_clip
 35 | 
 36 | import cv2
 37 | 
 38 | if __name__ == '__main__':
 39 |     parser = argparse.ArgumentParser(description='Evaluation Python Code')
 40 |     parser.add_argument('--gpu', '-g', type=int, default=-1,
 41 |                         help='GPU device ID (negative value indicates CPU)')
 42 |     parser.add_argument('--model', '-m', type=str, default='hoge.model',
 43 |                         help='Pre-Trained Model Name')
 44 |     parser.add_argument('--dataset', '-d', type=str, default='two96_dataset.pkl',
 45 |                         help='Dataset image pkl file path')
 46 |     parser.add_argument('--size', type=int, default=32,
 47 |                         help='Test Image Size')
 48 |     args = parser.parse_args()
 49 | 
 50 |     print('loading dataset...')
 51 |     fname = args.dataset + '_dataset.pkl'
 52 |     with open(fname, 'rb') as f:
 53 |         images = pickle.load(f)
 54 |         threshold = np.int32(len(images['train'])/10*9)
 55 |         train_x = images['train'][:threshold].astype(np.float32)
 56 |         valid_x = images['train'][threshold:].astype(np.float32)
 57 |         test_x = images['test'].astype(np.float32)
 58 | 
 59 |     fname = args.dataset + '_label.pkl'
 60 |     with open(fname, 'rb') as f:
 61 |         labels = pickle.load(f)
 62 |         train_y = labels['train'][:threshold].astype(np.int32)
 63 |         valid_y = labels['train'][threshold:].astype(np.int32)
 64 |         test_y = labels['test'].astype(np.int32)
 65 | 
 66 |     print('start evaluation')
 67 | 
 68 |     net = net3.CNN()
 69 |     print("load pre-trained npz")
 70 |     serializers.load_npz(args.model, net)
 71 | 
 72 |     # set image size
 73 |     img_siz = args.size
 74 | 
 75 |     eval_x = np.ones((1,3,img_siz,img_siz))
 76 | 
 77 |     # load tag file
 78 |     name = []
 79 |     fname = args.dataset + '_tag.txt' # tag file be generated by 'gen_training_data.py'
 80 |     with open(fname, 'r') as f:
 81 |         lines2 = f.readlines()
 82 |         for line in lines2:
 83 |             name.append(line.rstrip('\n\r'))
 84 | 
 85 |     n_class = len(name)
 86 | 
 87 |     conf_matrix = np.zeros((n_class,n_class))
 88 | 
 89 |     # specify the number of tests
 90 |     n_tests = 10
 91 |     n_acc   = 0
 92 | 
 93 |     # perform test
 94 |     for idx in range(0,n_tests):
 95 |         image = test_x
 96 |         image = image.clip(0,255).astype(np.uint8)
 97 | 
 98 |         print("label=%d(%s)" % (test_y[idx],name[test_y[idx]]))
 99 | 
100 |         # Note that, the test image is generated by the OpenCV2.0, thus, its format consists of 'BGR' not 'RGB'
101 |         image1 = image[idx].reshape(3, img_siz, img_siz).transpose(1, 2, 0)
102 | 
103 | # generate test bench
104 | # you can comment out following to generate more test bech for C/C++ simulation in the Vivado HLS, and an FPGA board
105 |         '''
106 |         bench_img = image1.reshape(-1,)
107 |         fname = 'test_img_%d.txt' % idx # + str(idx) + '.txt'
108 |         print(' Test Image Fileout -> %s' % fname)
109 |         np.savetxt(fname, bench_img, fmt="%.0f", delimiter=",")
110 |         '''
111 | 
112 |         eval_x[0,:,:,:] = test_x[idx] #/ 256.0
113 | 
114 |         result = net(Variable(eval_x.astype(np.float32)),train=False)
115 |         print(result.data)
116 |         print("test=%d(%s)" % (result.data.argmax(),name[result.data.argmax()]))
117 | 
118 |         # show test image
119 |         cv2.imshow("test image", image1)
120 |         cv2.waitKey(0)
121 |         cv2.destroyAllWindows()
122 | 
123 |         # regist a confusion matrix
124 |         conf_matrix[test_y[idx],result.data.argmax()] = conf_matrix[test_y[idx],result.data.argmax()] + 1
125 | 
126 |         if test_y[idx] == result.data.argmax():
127 |             n_acc = n_acc + 1
128 | 
129 |     # show a confusion matrix    
130 |     print("Confusion Matrix")
131 |     print(conf_matrix.astype(np.int32))
132 |     print("# corrests=%d" % n_acc)
133 |     print("Accuracy=%f" % (float(n_acc) / n_tests))
134 | 
135 | # -----------------------------------------------------------------------
136 | # END OF PROGRAM
137 | # -----------------------------------------------------------------------
138 | 


--------------------------------------------------------------------------------
/conv_npz2txt_v2.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------
  2 | # conv_npz2txt_v2.py:
  3 | # Convert to a binarized weight and an integer bias
  4 | #
  5 | # Creation Date   : 04/Aug./2017
  6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
  7 | # 
  8 | # Released under the GPL v2.0 License.
  9 | # 
 10 | # -----------------------------------------------------------------------
 11 | 
 12 | import pickle
 13 | from chainer import serializers
 14 | import numpy as np
 15 | import argparse
 16 | import sys
 17 | 
 18 | parser = argparse.ArgumentParser(description='Weight converter')
 19 | parser.add_argument('--config_path', '-c', type=str, default='./hoge',
 20 |                         help='Configuration pickle file path')
 21 | args = parser.parse_args()
 22 | 
 23 | # load configuration from guiness GUI
 24 | config_file = args.config_path + "/config.pickle"
 25 | with open(config_file, mode='rb') as f:
 26 | 	config = pickle.load(f)
 27 | 
 28 | initial_options = config['initial_options']
 29 | n_in_fmaps = config['n_in_fmaps']
 30 | n_ou_fmaps = config['n_ou_fmaps']
 31 | infmap_siz = config['infmap_siz']
 32 | ksiz = config['ksiz']
 33 | imgsiz = config['imgsiz']
 34 | max_dense_siz = config['max_dense_siz']
 35 | out_dense_siz = config['out_dense_siz']
 36 | bias_siz = config['bias_siz']
 37 | weight_siz = config['weight_siz']
 38 | max_bconv_width = config['max_bconv_width']
 39 | num_layer = config['num_layer']
 40 | 
 41 | model_file = args.config_path + "/temp.model"
 42 | dat = np.load(model_file) 
 43 | 
 44 | # convert .model to weights
 45 | dense_idx = 0
 46 | conv_idx = 0
 47 | bn_idx = 0
 48 | 
 49 | for layer in range(num_layer):
 50 | 	# weights for convolutional layer
 51 | 	if initial_options[layer] == 0 or initial_options[layer] == 1:
 52 | 		key = 'conv%d/W' % conv_idx
 53 | 		print("converting %s" % key)
 54 | 
 55 | 		bincoef = np.where(dat[key]>=0,1,0).astype(dat[key].dtype, copy=False)
 56 | 
 57 | 		bincoef2 = bincoef.reshape(-1,)
 58 | 
 59 | 		# Text File Out
 60 | 		fname = args.config_path + '/sdsoc/to_sd_card/conv%dW.txt' % conv_idx
 61 | 
 62 | 		print(' Fileout (.txt) -> %s' % fname)
 63 | 		np.savetxt(fname, bincoef2,fmt="%.0f",delimiter=",")
 64 | 
 65 | 		# Header file out
 66 | 		fname = args.config_path + '/HLS/conv%dW.csv' % conv_idx
 67 | 		np.savetxt(fname, bincoef2[None,:],delimiter=",",fmt="%.0f")
 68 | 
 69 | 		f = open(fname)
 70 | 		line = f.read()
 71 | 		f.close()
 72 | 
 73 | 		header = 'ap_uint<1> t_bin_conv%dW[%d]={' % (conv_idx,len(bincoef2)) + line + '};' 
 74 | 
 75 | 		fname = args.config_path + '/HLS/t_bin_conv%dW.h' % conv_idx
 76 | 		print(' Fileout (HLS) -> %s' % fname)
 77 | 		f = open(fname, 'w')
 78 | 		f.write(header)
 79 | 		f.close()
 80 | 
 81 | 		# Update Index
 82 | 		conv_idx += 1
 83 | 
 84 | 
 85 | 	# weights for FC layer
 86 | 	if initial_options[layer] == 4:
 87 | 		key = 'fc%d/W' % dense_idx
 88 | 		print("converting %s" % key)
 89 | 		bincoef = np.where(dat[key]>=0,1,0).astype(dat[key].dtype, copy=False)
 90 | 
 91 | 		bincoef2 = bincoef.reshape(-1,)
 92 | 
 93 | 		#File out Textfile for SDSoC
 94 | 		fname = args.config_path + '/sdsoc/to_sd_card/fc%dW.txt' % dense_idx
 95 | 
 96 | 		print(' Fileout -> %s' % fname)
 97 | 		np.savetxt(fname, bincoef2,fmt="%.0f",delimiter=",")
 98 | 
 99 | 		# Fileout headerfile for HLS
100 | 		fname = args.config_path + '/HLS/fc%dW.csv' % dense_idx
101 | 		np.savetxt(fname, bincoef2[None,:],delimiter=",",fmt="%.0f")
102 | 
103 | 		f = open(fname)
104 | 		line = f.read()
105 | 		f.close()
106 | 
107 | 		header = 'ap_uint<1> t_bin_fc%dW[%d]={' % (dense_idx,len(bincoef2)) + line + '};' 
108 | 
109 | 		fname = args.config_path + '/HLS/t_bin_fc%dW.h' % dense_idx
110 | 		print(' Fileout (HLS) -> %s' % fname)
111 | 		f = open(fname, 'w')
112 | 		f.write(header)
113 | 		f.close()
114 | 
115 | 		# Update Index
116 | 		dense_idx += 1
117 | 
118 | 	# bias
119 | 	if initial_options[layer] == 0 or initial_options[layer] == 1 or initial_options[layer] == 4:
120 | 		key = 'b%d' % bn_idx
121 | 		print("converting %s" % key)
122 | 		var = dat[key+'/avg_var']
123 | 		beta = dat[key+'/beta']
124 | 		gamma = dat[key+'/gamma']
125 | 		mean = dat[key+'/avg_mean']
126 | 		bn_val = np.floor((np.sqrt(var) * beta) / gamma - mean)
127 | 
128 | 		txt_val = ''
129 | 		head_val = ''
130 | 		for ofeat in range(int(n_ou_fmaps[layer])):
131 | 			txt_val += "%d\n" % int(round(bn_val[ofeat],0))
132 | 			if ofeat != 0:
133 | 				head_val += ','
134 | 			head_val += "%d" % int(round(bn_val[ofeat],0))
135 | 
136 | 		# Fileout Textfile for SDSoC
137 | 		fname = args.config_path + '/sdsoc/to_sd_card/b%d_BNFb.txt' % bn_idx
138 | 
139 | 		print(' Fileout -> %s' % fname)
140 | 		with open(fname,'w') as f:
141 | 			f.write(txt_val)
142 | 
143 | 		# Fileout headerfile for HLS
144 | 		fname = args.config_path + '/HLS/b%d_BNFb.h' % bn_idx
145 | 
146 | 		if bn_idx == 0:
147 | 			header = 'ap_int<20> b%d_BNFb[%d] ={' % (bn_idx,int(n_ou_fmaps[layer])) + head_val + '};' 
148 | 		else:
149 | 			header = 'ap_int<16> b%d_BNFb[%d] ={' % (bn_idx,int(n_ou_fmaps[layer])) + head_val + '};' 
150 | 
151 | 		print(' Fileout -> %s' % fname)
152 | 		with open(fname,'w') as f:
153 | 			f.write(header)
154 | 
155 | 		# Update Index
156 | 		bn_idx += 1
157 | 
158 | # -----------------------------------------------------------------------
159 | # END OF PROGRAM
160 | # -----------------------------------------------------------------------
161 | 


--------------------------------------------------------------------------------
/template_cpp_r7_socket_main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * C++ Templete for a Binarized CNN
  3 |  *
  4 |  *  Created on: 2017/07/01
  5 |  *      Author: H. Nakahara
  6 |  */
  7 | 
  8 | #include <iostream>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <unistd.h>
 12 | #include <string.h>
 13 | #include <netinet/in.h>
 14 | #include <arpa/inet.h>
 15 | #include <sys/socket.h>
 16 | #include <sys/types.h>
 17 | 
 18 | #include <math.h>
 19 | 
 20 | using namespace std;
 21 | 
 22 | #include <ap_int.h>
 23 | 
 24 | #ifdef __SDSCC__
 25 | #include "sds_lib.h"
 26 | #else 
 27 | #define sds_alloc(x)(malloc(x))
 28 | #define sds_free(x)(free(x))
 29 | #endif
 30 | 
 31 | void BinCNN(
 32 | #ifdef __SDSCC__
 33 |         int *t_bin_convW,
 34 |         int *t_BNFb,
 35 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
 36 |         int fc_result[(OUT_DENSE_SIZ)],
 37 |         int init
 38 | #else 
 39 |         int t_bin_convW[(WEIGHT_SIZ)],
 40 |         int t_BNFb[(BIAS_SIZ)],
 41 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
 42 |         int fc_result[(OUT_DENSE_SIZ)],
 43 |         int init
 44 | #endif
 45 | );
 46 | 
 47 | //--------------------------------------------------------------------
 48 | // Main Function
 49 | //--------------------------------------------------------------------
 50 | int main( int argc, char *argv[])
 51 | {
 52 |     ap_int<64> *t_tmp_img;
 53 |     t_tmp_img = (ap_int<64> *)sds_alloc(((IMGSIZ)*(IMGSIZ))*sizeof(ap_int<64>));
 54 | 
 55 |     int fc_result[(OUT_DENSE_SIZ)];
 56 |     int rgb, y, x, i, offset;
 57 | 
 58 |     // copy input image to f1
 59 |     for( y = 0; y < (IMGSIZ); y++){
 60 |     	for( x = 0; x < (IMGSIZ); x++){
 61 |     		t_tmp_img[y*(IMGSIZ)+x] = 0;
 62 |         }
 63 |     }
 64 | 
 65 |     // ------------------------------------------------------------------
 66 |     printf("load weights\n");
 67 |     int *t_bin_convW;
 68 | 	int *t_BNFb;
 69 | 	t_bin_convW = (int *)sds_alloc(((WEIGHT_SIZ))*sizeof(int));
 70 | 	t_BNFb   = (int *)sds_alloc(((BIAS_SIZ))*sizeof(int));
 71 | 
 72 | 	int of, inf, d_value;
 73 | 	FILE *fp;
 74 | 	char line[256];
 75 | 
 76 | (READ_BIAS_MEM)
 77 | 
 78 | (READ_WEIGHT_MEM)
 79 | 
 80 |     printf("setup... \n");
 81 | 	BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 1);
 82 | 
 83 |     // setup socket connection -----------------------------------------
 84 |     struct sockaddr_in addr;
 85 |     int sock;
 86 |     //char buf[32];
 87 |     char buf[20000]; // more than 64x64x3(RGB) bytes
 88 |     int data;
 89 | 
 90 |     char ipadr[512];
 91 |     int portnum;
 92 | 
 93 |     if( argc != 3){
 94 |         printf("USAGE: #./(program).elf [IPADR] [PORTNUM]\n");
 95 |         exit(-1);
 96 |     }
 97 | 
 98 |     sscanf( argv[1], "%s", ipadr);
 99 |     sscanf( argv[2], "%d", &portnum);
100 | 
101 |     printf("[INFO] IPADR=%s PORT=%d\n", ipadr, portnum);
102 | 
103 |     /* make a socket */
104 |     sock = socket(AF_INET, SOCK_STREAM, 0);
105 |     /* set parameters */
106 |     addr.sin_family = AF_INET;
107 |     addr.sin_port = htons(portnum); //10050
108 |     addr.sin_addr.s_addr = inet_addr(ipadr); //"192.168.2.100"
109 | 
110 |     /* connect a server (host PC) */
111 |     connect(sock, (struct sockaddr*)&addr, sizeof(addr));
112 | 
113 |     // main loop -------------------------------------------------------
114 |     while(1){
115 |         // receive data
116 |         // printf("Receive data\n");
117 |         memset(buf, 0, sizeof(buf));
118 |         data = read(sock, buf, sizeof(buf));
119 | 
120 |         // set pixel
121 |         // printf("Set Pixel");
122 |         for( y = 0; y < (IMGSIZ); y++){
123 |             for( x = 0; x < (IMGSIZ); x++){
124 |                 ap_int<64>tmp = 0;
125 |                 for( rgb = 0; rgb < (NUMIMG); rgb++){
126 |                     tmp = tmp << 20;
127 | 
128 |                     tmp |= ( buf[y * (IMGSIZ) * 3 + x * 3 + rgb] & 0xFFFFF);
129 |                 }
130 |                 t_tmp_img[ y * (IMGSIZ) + x] = tmp;
131 |             }
132 |         }
133 |         // printf("OK\n");
134 | 
135 |         // printf("Inference...\n");
136 |         BinCNN( t_bin_convW, t_BNFb, t_tmp_img, fc_result, 0);
137 |         // printf("OK\n");
138 | 
139 |         // printf("Result\n");
140 |         // for( i = 0; i < (OUT_DENSE_SIZ); i++)printf("%5d ", fc_result[i]);
141 |         // printf("\n");
142 | 
143 |         // send data to server
144 |         double softmax[(OUT_DENSE_SIZ)];
145 |         double total_softmax = 0.0;
146 |         double max_val = -9999.0;
147 | 
148 |         for( i = 0; i < (OUT_DENSE_SIZ); i++){
149 |             if( (double)fc_result[i] > max_val)
150 |             	max_val = fc_result[i];
151 |         }
152 | 
153 |         for( i = 0; i < (OUT_DENSE_SIZ); i++){
154 |             total_softmax += exp( (double)(fc_result[i]) / max_val);
155 |         }
156 | 
157 | 	for( i = 0; i < (OUT_DENSE_SIZ); i++){
158 |             softmax[i] = (double)exp((double)fc_result[i] / max_val) / total_softmax;
159 |             buf[i] = (char)(softmax[i] * 100.0);
160 | 
161 |             // printf("i=%d buf=%d softmax=%f\n", i, buf[i], softmax[i]);
162 |         }
163 | 
164 |         // printf("Send Data");
165 |         write( sock, buf, (OUT_DENSE_SIZ));
166 |     }
167 | 
168 |     sds_free( t_tmp_img); sds_free( t_bin_convW); sds_free( t_BNFb);
169 |     close(sock);
170 | 
171 | }
172 | 
173 | // ------------------------------------------------------------------
174 | // END OF PROGRAM
175 | // ------------------------------------------------------------------
176 | 


--------------------------------------------------------------------------------
/link_batch_normalization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import numpy
  5 | 
  6 | from chainer.functions.normalization import batch_normalization
  7 | from chainer import initializers
  8 | from chainer import link
  9 | from chainer import variable
 10 | 
 11 | import function_batch_normalization
 12 | 
 13 | class BatchNormalization(link.Link):
 14 | 
 15 |     """Batch normalization layer on outputs of linear or convolution functions.
 16 | 
 17 |     This link wraps the :func:`~chainer.functions.batch_normalization` and
 18 |     :func:`~chainer.functions.fixed_batch_normalization` functions.
 19 | 
 20 |     It runs in three modes: training mode, fine-tuning mode, and testing mode.
 21 | 
 22 |     In training mode, it normalizes the input by *batch statistics*. It also
 23 |     maintains approximated population statistics by moving averages, which can
 24 |     be used for instant evaluation in testing mode.
 25 | 
 26 |     In fine-tuning mode, it accumulates the input to compute *population
 27 |     statistics*. In order to correctly compute the population statistics, a
 28 |     user must use this mode to feed mini batches running through whole training
 29 |     dataset.
 30 | 
 31 |     In testing mode, it uses pre-computed population statistics to normalize
 32 |     the input variable. The population statistics is approximated if it is
 33 |     computed by training mode, or accurate if it is correctly computed by
 34 |     fine-tuning mode.
 35 | 
 36 |     Args:
 37 |         size (int or tuple of ints): Size (or shape) of channel
 38 |             dimensions.
 39 |         decay (float): Decay rate of moving average. It is used on training.
 40 |         eps (float): Epsilon value for numerical stability.
 41 |         dtype (numpy.dtype): Type to use in computing.
 42 |         use_gamma (bool): If `True`, use scaling parameter. Otherwise, use
 43 |             unit(1) which makes no effect.
 44 |         use_beta (bool): If `True`, use shifting parameter. Otherwise, use
 45 |             unit(0) which makes no effect.
 46 | 
 47 |     See: `Batch Normalization: Accelerating Deep Network Training by Reducing\
 48 |           Internal Covariate Shift <http://arxiv.org/abs/1502.03167>`_
 49 | 
 50 |     .. seealso::
 51 |        :func:`~chainer.functions.batch_normalization`,
 52 |        :func:`~chainer.functions.fixed_batch_normalization`
 53 | 
 54 |     Attributes:
 55 |         gamma (~chainer.Variable): Scaling parameter.
 56 |         beta (~chainer.Variable): Shifting parameter.
 57 |         avg_mean (~chainer.Variable): Population mean.
 58 |         avg_var (~chainer.Variable): Population variance.
 59 |         N (int): Count of batches given for fine-tuning.
 60 |         decay (float): Decay rate of moving average. It is used on training.
 61 |         eps (float): Epsilon value for numerical stability. This value is added
 62 |             to the batch variances.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, size, decay=0.9, eps=2e-5, dtype=numpy.float32,
 67 |                  use_gamma=True, use_beta=True,
 68 |                  initial_gamma=None, initial_beta=None):
 69 |         super(BatchNormalization, self).__init__()
 70 |         if use_gamma:
 71 |             self.add_param('gamma', size, dtype=dtype)
 72 |             if initial_gamma is None:
 73 |                 initial_gamma = initializers.One()
 74 |             initializers.init_weight(self.gamma.data, initial_gamma)
 75 |         if use_beta:
 76 |             self.add_param('beta', size, dtype=dtype)
 77 |             if initial_beta is None:
 78 |                 initial_beta = initializers.Zero()
 79 |             initializers.init_weight(self.beta.data, initial_beta)
 80 |         self.add_persistent('avg_mean', numpy.zeros(size, dtype=dtype))
 81 |         self.add_persistent('avg_var', numpy.zeros(size, dtype=dtype))
 82 |         self.add_persistent('N', 0)
 83 |         self.decay = decay
 84 |         self.eps = eps
 85 | 
 86 |     def __call__(self, x, test=False, finetune=False):
 87 |         """Invokes the forward propagation of BatchNormalization.
 88 | 
 89 |         BatchNormalization accepts additional arguments, which controls three
 90 |         different running mode.
 91 | 
 92 |         Args:
 93 |             x (Variable): An input variable.
 94 |             test (bool): If ``True``, BatchNormalization runs in testing mode;
 95 |                 it normalizes the input using pre-computed statistics.
 96 |             finetune (bool): If ``True``, BatchNormalization runs in
 97 |                 fine-tuning mode; it accumulates the input array to compute
 98 |                 population statistics for normalization, and normalizes the
 99 |                 input using batch statistics.
100 | 
101 |         If ``test`` and ``finetune`` are both ``False``, then
102 |         BatchNormalization runs in training mode; it computes moving averages
103 |         of mean and variance for evaluation during training, and normalizes the
104 |         input using batch statistics.
105 | 
106 |         """
107 | #        use_batch_mean = not test or finetune --------------------------------
108 | # -----------------------------------------------------------------------------
109 |         use_batch_mean = False
110 | 
111 |         if hasattr(self, 'gamma'):
112 |             gamma = self.gamma
113 |         else:
114 |             gamma = variable.Variable(self.xp.ones(
115 |                 self.avg_mean.shape, dtype=x.dtype), volatile='auto')
116 |         if hasattr(self, 'beta'):
117 |             beta = self.beta
118 |         else:
119 |             beta = variable.Variable(self.xp.zeros(
120 |                 self.avg_mean.shape, dtype=x.dtype), volatile='auto')
121 | 
122 |         if use_batch_mean:
123 |             if finetune:
124 |                 self.N += 1
125 |                 decay = 1. - 1. / self.N
126 |             else:
127 |                 decay = self.decay
128 | 
129 | 
130 |             func = function_batch_normalization.BatchNormalizationFunction(
131 |                 self.eps, self.avg_mean, self.avg_var, True, decay)
132 |             ret = func(x, gamma, beta)
133 | 
134 |             self.avg_mean = func.running_mean
135 |             self.avg_var = func.running_var
136 | 
137 |         else:
138 |             # Use running average statistics or fine-tuned statistics.
139 |             mean = variable.Variable(self.avg_mean, volatile='auto')
140 |             var = variable.Variable(self.avg_var, volatile='auto')
141 |             ret = batch_normalization.fixed_batch_normalization(
142 |                 x, gamma, beta, mean, var, self.eps)
143 | 
144 |         return ret
145 | 
146 |     def start_finetuning(self):
147 |         """Resets the population count for collecting population statistics.
148 | 
149 |         This method can be skipped if it is the first time to use the
150 |         fine-tuning mode. Otherwise, this method should be called before
151 |         starting the fine-tuning mode again.
152 | 
153 |         """
154 |         self.N = 0
155 | 


--------------------------------------------------------------------------------
/gen_training_data.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------
  2 | # gen_training_data.py:
  3 | # Training File Generator for prepared image files
  4 | #
  5 | # Creation Date   : 04/Aug./2017
  6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
  7 | # 
  8 | # Released under the GPL v2.0 License.
  9 | # 
 10 | # -----------------------------------------------------------------------
 11 | 
 12 | from chainer.datasets import tuple_dataset
 13 | from PIL import Image
 14 | import numpy as np
 15 | import glob
 16 | import cv2
 17 | #import cPickle as pickle # python 2.7
 18 | import _pickle as pickle # python 3.5
 19 | import matplotlib.pyplot as plt
 20 | import argparse
 21 | import random
 22 | from scipy import ndimage
 23 | import sys
 24 | 
 25 | parser = argparse.ArgumentParser(description='training dataset generator')
 26 | parser.add_argument('--pathfile', '-p', type=str, default='./imglist.txt',
 27 |                         help='Image File List (test file)')
 28 | parser.add_argument('--dataset', '-d', type=str, default='./hoge',
 29 |                         help='Pickle object for dataset output file name')
 30 | parser.add_argument('--size', '-s', type=int, default=32,
 31 |                         help='dataset size (default 32x32)')
 32 | 
 33 | # options for argumentation
 34 | parser.add_argument('--rotate', '-r', type=int, default=1,
 35 |                         help='Rotate')
 36 | parser.add_argument('--flip', '-f', type=str, default='no',
 37 |                         help='Flip')
 38 | parser.add_argument('--crop', '-c', type=int, default=1,
 39 |                         help='Crop')
 40 | parser.add_argument('--keepaspect', '-k', type=str, default='no',
 41 |                         help='Keep aspect ratio (default no)')
 42 | 
 43 | args = parser.parse_args()
 44 | 
 45 | dataset_fname = args.dataset + '_dataset.pkl'
 46 | label_fname = args.dataset + '_label.pkl'
 47 | tag_fname = args.dataset + '_tag.txt'
 48 | 
 49 | 
 50 | print("[INFO] IMAGE PATH FILE %s" % args.pathfile)
 51 | print("[INFO] DATASET FILE %s" % dataset_fname)
 52 | print("[INFO] LABEL FILE %s" % label_fname)
 53 | print("[INFO] TAG FILE %s" % tag_fname)
 54 | 
 55 | print("[INFO] DATASET SIZE %dx%d" % (int(args.size),int(args.size)))
 56 | print("[INFO] ROTATION %s" % args.rotate)
 57 | print("[INFO] FLIPPING %s" % args.flip)
 58 | print("[INFO] CROPPING %s" % args.crop)
 59 | print("[INFO] KEEP ASPECT RATIO %s" % args.keepaspect)
 60 | 
 61 | with open(args.pathfile, mode='r') as f:
 62 | 	lines2 = f.readlines()
 63 |  
 64 | pathsAndLabels = []
 65 | label_idx = 0
 66 | tags = []
 67 | for line in lines2:
 68 | 	words = line.split()
 69 | 	tags.append(words[1])
 70 | 	choped_line = words[0].rstrip('\n\r') + '/'
 71 | 	pathsAndLabels.append(np.asarray([choped_line, label_idx]))
 72 | 	print("[INFO] %s* are assigned to %d" % (choped_line, label_idx))
 73 | 	label_idx = label_idx + 1
 74 | 
 75 | # fileout tags
 76 | f = open(tag_fname, 'w')
 77 | for x in tags:
 78 | 	f.write(str(x) + "\n")
 79 | f.close()
 80 | 
 81 | # set data size
 82 | width = args.size
 83 | height = args.size
 84 | 
 85 | # get image path
 86 | allData = []
 87 | for pathAndLabel in pathsAndLabels:
 88 | 	path = pathAndLabel[0]
 89 | 	label = pathAndLabel[1]
 90 | 	imagelist = glob.glob(path + "*")
 91 | 	for imgName in imagelist:
 92 | 		allData.append([imgName, label])
 93 | 
 94 | allData = np.random.permutation(allData)
 95 | 
 96 | # set augmentation options
 97 | n_crop = args.crop
 98 | n_rotate = args.rotate
 99 | 
100 | if args.flip == 'yes' or args.rotate > 1:
101 | 	n_flip = 2
102 | else:
103 | 	n_flip = 1
104 | 
105 | # register all images, and normalization if needs,,,
106 | imageData = np.zeros((len(allData)*n_crop*n_rotate*n_flip,3,width,height))
107 | labelData = np.zeros(len(allData)*n_crop*n_rotate*n_flip)
108 | 
109 | idx = 0
110 | for pathAndLabel in allData:
111 | 	sys.stderr.write('\r\033[K' + "CONVERTING IMAGE %d/%d" % (idx,len(allData)*n_crop*n_rotate*n_flip))
112 | 	sys.stderr.flush()
113 | 
114 | 	org_img = cv2.imread(pathAndLabel[0])
115 | 
116 | 	if org_img is None:
117 | 		print("ERROR %s CANNOT BE OPENED" % pathAndLabel[0])
118 | 		exit()
119 | 
120 | 	for i in range(n_crop):
121 | 		for k in range(n_flip):
122 | 			for j in range(n_rotate):
123 | 				# padding empy pixels to keep aspect ratio
124 | 				if args.keepaspect == 'yes':
125 | 
126 | 					h, w = org_img.shape[:2]
127 | 
128 | 					if h > w:
129 | 						dst_img = np.zeros((h,h,3)).astype(np.uint8) #* 128
130 | 						d = int((h-w)/2)
131 | 						dst_img[0:h,d:d+w] = org_img[:,:]
132 | 					else:
133 | 						dst_img = np.zeros((w,w,3)).astype(np.uint8) #* 128
134 | 						d = int((w-h)/2)
135 | 						dst_img[d:d+h,0:w] = org_img[:,:]
136 | 
137 | 					org_img = dst_img
138 | 
139 | 				# cropping
140 | 				if i > 0:
141 | 					h, w = org_img.shape[:2]
142 | 
143 | 					if args.keepaspect == 'no':
144 | 						h4 = h / 4
145 | 						w4 = w / 4
146 | 						left = random.randint(0,w4)
147 | 						right = random.randint(w-w4,w)
148 | 						top = random.randint(0,h4)
149 | 						bottom = random.randint(h - h4,h)
150 | 
151 | 						img = org_img[top:bottom,left:right] # y:y+h,x:x+h
152 | 					else:
153 | 						rows,cols = org_img.shape[:2]
154 | 
155 | 						# resize with cropping
156 | 						dd = random.randint(0,rows/8)
157 | 						org_img = org_img[dd:rows-dd,dd:cols-dd]
158 | 						rows = rows - dd
159 | 						cols = cols - dd
160 | 
161 | 						# sliding
162 | 						h4 = rows / 4
163 | 						w4 = cols / 4
164 | 						dw = random.randint(w4*(-1),w4)
165 | 						dh = random.randint(h4*(-1),h4)
166 | 						M = np.float32([[1,0,dw],[0,1,dh]])
167 | 						img = cv2.warpAffine(org_img,M,(cols,rows))
168 | 
169 | 				else:
170 | 					img = org_img
171 | 
172 | 
173 | 				#flipping (if rotate, then flipping is also applied)
174 | 				if k == 0:
175 | 					pass
176 | 				else:
177 | 					img = cv2.flip(img, 1)
178 | 
179 | 				# rotation
180 | 				img = ndimage.rotate( img, 2 * j, reshape=False)
181 | 
182 | 				# Resize
183 | 				img = cv2.resize(img,(width,height))
184 | 
185 | 				# Transpose for Chainer dataset
186 | 				reshaped = img.transpose(2, 0, 1) # (Y,X,BGR) -> (BGR,Y,X)
187 | 
188 | 				# store temporary memory
189 | 				imageData[idx] = reshaped #bench
190 | 				labelData[idx] = np.int32(pathAndLabel[1])
191 | 
192 | 				idx = idx + 1
193 | 
194 | imageData = imageData.astype(np.uint8)
195 | 
196 | # generate pickle file
197 | threshold = np.int32(len(imageData)/10*9)
198 | 
199 | image = {}
200 | label = {}
201 | image['train'] = imageData[0:threshold]
202 | image['test'] = imageData[threshold:]
203 | label['train'] = labelData[0:threshold]
204 | label['test'] = labelData[threshold:]
205 | 
206 | print("[INFO] SAVE %s as an image dataset" % dataset_fname)
207 | with open(dataset_fname, mode='wb') as f:
208 | 	pickle.dump(image, f)
209 | 
210 | print("[INFO] SAVE %s as a label dataset" % label_fname)
211 | with open(label_fname, mode='wb') as f:
212 | 	pickle.dump(label, f)
213 | 
214 | # -----------------------------------------------------------------------
215 | # END OF PROGRAM
216 | # -----------------------------------------------------------------------
217 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------
  2 | # train.py
  3 | # Trainer for a binarized CNN
  4 | #
  5 | # Creation Date   : 04/Aug./2017
  6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
  7 | # 
  8 | # Released under the GPL v2.0 License.
  9 | # 
 10 | # Acknowledgements:
 11 | # This source code is based on following projects:
 12 | #
 13 | # Chainer binarized neural network by Daisuke Okanohara
 14 | # https://github.com/hillbig/binary_net
 15 | # Various CNN models including Deep Residual Networks (ResNet) 
 16 | #  for CIFAR10 with Chainer by mitmul
 17 | # https://github.com/mitmul/chainer-cifar10
 18 | # -----------------------------------------------------------------------
 19 | 
 20 | import argparse
 21 | #import cPickle as pickle # python 2.7
 22 | import _pickle as pickle # python 3.5
 23 | import numpy as np
 24 | import os
 25 | import chainer
 26 | from chainer import optimizers
 27 | from chainer import serializers
 28 | import net2 # it will be generated by the GUINNESS GUI
 29 | 
 30 | import trainer
 31 | 
 32 | import time
 33 | import weight_clip
 34 | 
 35 | if __name__ == '__main__':
 36 |     parser = argparse.ArgumentParser(description='CIFAR-10 dataset trainer')
 37 |     parser.add_argument('--gpu', '-g', type=int, default=-1,
 38 |                         help='GPU device ID (negative value indicates CPU)')
 39 |     parser.add_argument('--model', '-m', type=str, default='bincnn', choices=['bincnn'],
 40 |                         help='Model name')
 41 |     parser.add_argument('--batch_size', '-b', type=int, default=20,
 42 |                         help='Mini batch size')
 43 |     parser.add_argument('--dataset', '-d', type=str, default='image.pkl',
 44 |                         help='Dataset image pkl file path')
 45 |     parser.add_argument('--label', '-l', type=str, default='label.pkl',
 46 |                         help='Dataset label pkl file path')
 47 |     parser.add_argument('--prefix', '-p', type=str, default='temp', # should be project name
 48 |                         help='Prefix of model parameter files')
 49 |     parser.add_argument('--iter', type=int, default=10,
 50 |                         help='Training iteration')
 51 |     parser.add_argument('--save_iter', type=int, default=0,
 52 |                         help='Iteration interval to save model parameter file.')
 53 |     parser.add_argument('--lr_decay_iter', type=int, default=100,
 54 |                         help='Iteration interval to decay learning rate')
 55 |     parser.add_argument('--weight_decay', type=float, default=0.0001,
 56 |                         help='Weight decay')
 57 |     parser.add_argument('--optimizer', type=str, default='sgd', choices=['sgd', 'adam', 'momentum', 'delta'],
 58 |                         help='Optimizer name')
 59 |     parser.add_argument('--lr', type=float, default=0.01,
 60 |                         help='Initial learning rate for SGD')
 61 |     parser.add_argument('--alpha', type=float, default=0.00005,
 62 |                         help='Initial alpha for Adam')
 63 |     parser.add_argument('--res_depth', type=int, default=18,
 64 |                         help='Depth of Residual Network')
 65 |     parser.add_argument('--skip_depth', action='store_true',
 66 |                         help='Use stochastic depth in Residual Network')
 67 |     parser.add_argument('--swapout', action='store_true',
 68 |                         help='Use swapout')
 69 |     parser.add_argument('--seed', type=int, default=1,
 70 |                         help='Random seed')
 71 |     parser.add_argument('--dim', type=int, default=3,
 72 |                         help='Dimension (default RGB, that is, 3)')
 73 |     parser.add_argument('--siz', type=int, default=32,
 74 |                         help='ImageSiz (default 32, that is, 32x32)')
 75 |     parser.add_argument('--guinness', type=str, default='./hoge', # should be project name
 76 |                         help='Prefix of model parameter files for the GUINNESS flow')
 77 |     parser.add_argument('--resume', type=str, default='no',
 78 |                         help='Resume traning, if pre-trained model exists')
 79 |     args = parser.parse_args()
 80 | 
 81 |     np.random.seed(args.seed)
 82 |     
 83 |     log_file_path = '{}_log.csv'.format(args.prefix)
 84 | #    lr_decay_iter = map(int, args.lr_decay_iter.split(','))
 85 | 
 86 |     if args.prefix is None:
 87 |         model_prefix = '{}_{}'.format(args.model, args.optimizer)
 88 |     else:
 89 |         model_prefix = args.prefix
 90 | 
 91 |     # load image dataset
 92 |     print('loading dataset %s' % args.dataset)
 93 |     with open(args.dataset, 'rb') as f:
 94 |         images = pickle.load(f)
 95 | 
 96 |         index = np.random.permutation(len(images['train']))        
 97 |         threshold = np.int32(len(images['train'])/10*9)
 98 |         train_index = index[:threshold]
 99 |         valid_index = index[threshold:]
100 | 
101 |         train_x = images['train'][train_index].astype(np.float32)
102 |         valid_x = images['train'][valid_index].astype(np.float32)
103 |         test_x = images['test'].astype(np.float32)
104 | 
105 |     
106 |     print("[INFO] #TRAIN DATA: %7d" % len(train_x))
107 |     print("[INFO] #VALID DATA: %7d" % len(valid_x))
108 |     print("[INFO] #TEST  DATA: %7d" % len(test_x))
109 | 
110 |     # load label dataset
111 |     with open(args.label, 'rb') as f:
112 |         labels = pickle.load(f)
113 |         train_y = labels['train'][train_index].astype(np.int32)
114 |         valid_y = labels['train'][valid_index].astype(np.int32)
115 |         test_y = labels['test'].astype(np.int32)
116 | 
117 |     # generate testbench (test_img.txt) for C/C++ code
118 |     idx = 0
119 |     image = test_x
120 |     
121 |     # extract only one image
122 |     image1 = image[idx]
123 | 
124 |     # generate text file as a bench marck
125 |     bench_img = image1.transpose(1,2,0)
126 |     bench_img = bench_img.reshape(-1,)
127 | 
128 |     fname = 'test_img.txt'
129 |     print(' Test Image Fileout -> %s' % fname)
130 |     np.savetxt(fname, bench_img,fmt="%.0f",delimiter=",")
131 | 
132 |     # start training
133 |     print('start training')
134 |     cifar_net = net2.CNN() # modified
135 | 
136 |     # resume pre-trained model, if exist
137 |     if args.resume == 'yes':
138 |         print(" Resume Pre-Trained Model")
139 |         serializers.load_npz('{}.model'.format(model_prefix), cifar_net)
140 | 
141 | 
142 |     if args.optimizer == 'sgd':
143 |         print("optimizer: SGD")
144 |         optimizer = optimizers.SGD(lr=args.lr)
145 |     elif args.optimizer == 'momentum':
146 |         print("optimizer: momentum SGD")
147 |         optimizer = optimizers.MomentumSGD(lr=args.lr)
148 |     elif args.optimizer == 'delta':
149 |         print("optimizer: AdaDelta")
150 |         optimizer = optimizers.AdaDelta()
151 |     else:
152 |         print("optimizer: Adam")
153 |         optimizer = optimizers.Adam(alpha=args.alpha)
154 |     optimizer.setup(cifar_net)
155 |     if args.weight_decay > 0:
156 |         optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
157 | 
158 |     optimizer.add_hook(weight_clip.WeightClip())
159 | 
160 |     cifar_trainer = trainer.CifarTrainer(cifar_net, optimizer, args.iter, args.batch_size, args.gpu)
161 | 
162 |     state = {'best_valid_error': 100, 'best_test_error': 100, 'clock': time.clock()}
163 |     def on_epoch_done(epoch, n, o, loss, acc, valid_loss, valid_acc, test_loss, test_acc):
164 |         error = 100 * (1 - acc)
165 |         valid_error = 100 * (1 - valid_acc)
166 |         test_error = 100 * (1 - test_acc)
167 |         print('epoch {} done'.format(epoch))
168 |         print('train loss: {} error: {}'.format(loss, error))
169 |         print('valid loss: {} error: {}'.format(valid_loss, valid_error))
170 |         print('test  loss: {} error: {}'.format(test_loss, test_error))
171 |         if valid_error < state['best_valid_error']:
172 |             serializers.save_npz('{}.model'.format(model_prefix), n)
173 |             serializers.save_npz('{}.state'.format(model_prefix), o)
174 |             state['best_valid_error'] = valid_error
175 |             state['best_test_error'] = test_error
176 |         if args.save_iter > 0 and (epoch + 1) % args.save_iter == 0:
177 |             serializers.save_npz('{}_{}.model'.format(model_prefix, epoch + 1), n)
178 |             serializers.save_npz('{}_{}.state'.format(model_prefix, epoch + 1), o)
179 |         # prevent divergence when using identity mapping model
180 |         if args.model == 'identity_mapping' and epoch < 9:
181 |             o.lr = 0.01 + 0.01 * (epoch + 1)
182 | #        if len(lr_decay_iter) == 1 and (epoch + 1) % lr_decay_iter[0] == 0 or epoch + 1 in lr_decay_iter:
183 |         # Note, "lr_decay_iter" should be a list object to store a training schedule,
184 |         # However, to keep up with the Python3.5, I changed to an integer value...
185 |         if (epoch + 1) % args.lr_decay_iter == 0 and epoch > 1:
186 |             if hasattr(optimizer, 'alpha'):
187 |                 o.alpha *= 0.1
188 |             else:
189 |                 o.lr *= 0.1
190 |         clock = time.clock()
191 |         print('elapsed time: {}'.format(clock - state['clock']))
192 |         state['clock'] = clock
193 |         
194 |         with open(log_file_path, 'a') as f:
195 |             f.write('{},{},{},{},{},{},{}\n'.format(epoch + 1, loss, error, valid_loss, valid_error, test_loss, test_error))
196 | 
197 |     if args.resume == 'no':
198 |         print(" Create %s as a New Logfile" % log_file_path)
199 |         with open(log_file_path, 'w') as f:
200 |             f.write('epoch,train loss,train acc,valid loss,valid acc,test loss,test acc\n')
201 |     else:
202 |         print(" Overwrite Existing Logfile %s" % log_file_path)
203 | 
204 |     cifar_trainer.fit(train_x, train_y, valid_x, valid_y, args.siz, args.dim, test_x, test_y, on_epoch_done)
205 | 
206 |     print('best test error: {}'.format(state['best_test_error']))
207 | 
208 |     with open("train_status.txt", 'w') as f:
209 |         f.write("stop")
210 | 
211 | # -----------------------------------------------------------------------
212 | # END OF PROGRAM
213 | # -----------------------------------------------------------------------
214 | 


--------------------------------------------------------------------------------
/function_integer_conv2d.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | from six import moves
  3 | 
  4 | from chainer import cuda
  5 | from chainer import function
  6 | from chainer.utils import conv
  7 | from chainer.utils import type_check
  8 | 
  9 | def _kern():
 10 |     return cuda.elementwise(
 11 |         'T x', 'T y',
 12 |         'y = x >= 0 ? 1 : -1',
 13 |         'binarize')
 14 | 
 15 | def _as_mat(x):
 16 |     if x.ndim == 2:
 17 |         return x
 18 |     return x.reshape(len(x), -1)
 19 | 
 20 | 
 21 | if cuda.cudnn_enabled:
 22 |     cudnn = cuda.cudnn
 23 |     libcudnn = cuda.cudnn.cudnn
 24 |     _cudnn_version = libcudnn.getVersion()
 25 |     _fwd_pref = libcudnn.CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
 26 |     if _cudnn_version >= 4000:
 27 |         _bwd_filter_pref = \
 28 |             libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
 29 |         _bwd_data_pref = \
 30 |             libcudnn.CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
 31 | 
 32 | 
 33 | def _check_cudnn_acceptable_type(x_dtype, W_dtype):
 34 |     return x_dtype == W_dtype and (
 35 |         _cudnn_version >= 3000 or x_dtype != numpy.float16)
 36 | 
 37 | 
 38 | def _pair(x):
 39 |     if hasattr(x, '__getitem__'):
 40 |         return x
 41 |     return x, x
 42 | 
 43 | 
 44 | class IntegerConv2DFunction(function.Function):
 45 | 
 46 |     def __init__(self, stride=1, pad=0, use_cudnn=True, cover_all=False):
 47 |         self.sy, self.sx = _pair(stride)
 48 |         self.ph, self.pw = _pair(pad)
 49 |         self.use_cudnn = use_cudnn
 50 |         self.cover_all = cover_all
 51 | 
 52 |     def check_type_forward(self, in_types):
 53 |         n_in = in_types.size()
 54 |         type_check.expect(2 <= n_in, n_in <= 3)
 55 | 
 56 |         x_type = in_types[0]
 57 |         w_type = in_types[1]
 58 |         type_check.expect(
 59 |             x_type.dtype.kind == 'f',
 60 |             w_type.dtype.kind == 'f',
 61 |             x_type.ndim == 4,
 62 |             w_type.ndim == 4,
 63 |             x_type.shape[1] == w_type.shape[1],
 64 |         )
 65 | 
 66 |         if n_in.eval() == 3:
 67 |             b_type = in_types[2]
 68 |             type_check.expect(
 69 |                 b_type.dtype == x_type.dtype,
 70 |                 b_type.ndim == 1,
 71 |                 b_type.shape[0] == w_type.shape[0],
 72 |             )
 73 | 
 74 |     def forward_cpu(self, inputs):
 75 |         x, W = inputs[:2]
 76 |         b = inputs[2] if len(inputs) == 3 else None
 77 |         kh, kw = W.shape[2:]
 78 |         self.col = conv.im2col_cpu(
 79 |             x, kh, kw, self.sy, self.sx, self.ph, self.pw,
 80 |             cover_all=self.cover_all)
 81 | 
 82 |         Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False)
 83 | 
 84 |         y = numpy.tensordot(
 85 |             self.col, Wb, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
 86 | 
 87 |         if b is not None:
 88 |             y += b
 89 | 
 90 |         return numpy.rollaxis(y, 3, 1),
 91 | 
 92 |     def forward_gpu(self, inputs):
 93 |         x, W = inputs[:2]
 94 |         b = inputs[2] if len(inputs) == 3 else None
 95 | 
 96 |         out_c, _, kh, kw = W.shape
 97 |         n, c, h, w = x.shape
 98 | 
 99 |         out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph,
100 |                                       cover_all=self.cover_all)
101 |         out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw,
102 |                                       cover_all=self.cover_all)
103 | 
104 |         y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
105 |         if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
106 |                 _check_cudnn_acceptable_type(x.dtype, W.dtype)):
107 |             x = cuda.cupy.ascontiguousarray(x)
108 |             W = cuda.cupy.ascontiguousarray(W)
109 |             if b is not None:
110 |                 b = cuda.cupy.ascontiguousarray(b)
111 | 
112 |             handle = cudnn.get_handle()
113 |             x_desc = cudnn.create_tensor_descriptor(x)
114 |             y_desc = cudnn.create_tensor_descriptor(y)
115 | 
116 |             self.filter_desc = cudnn.create_filter_descriptor(W)
117 |             self.conv_desc = cudnn.create_convolution_descriptor(
118 |                 (self.ph, self.pw), (self.sy, self.sx))
119 |             if b is not None:
120 |                 self.bias_desc = cudnn.create_tensor_descriptor(
121 |                     b[None, :, None, None])
122 | 
123 |             workspace_size = cuda.get_max_workspace_size()
124 |             workspace = cuda.cupy.empty((workspace_size,), dtype='b')
125 |             algo = libcudnn.getConvolutionForwardAlgorithm(
126 |                 handle, x_desc.value, self.filter_desc.value,
127 |                 self.conv_desc.value, y_desc.value, _fwd_pref,
128 |                 workspace_size)
129 | 
130 |             oz_dtype = 'd' if x.dtype == 'd' else 'f'
131 |             one = numpy.array(1, dtype=oz_dtype).ctypes
132 |             zero = numpy.array(0, dtype=oz_dtype).ctypes
133 |             libcudnn.convolutionForward(
134 |                 handle, one.data, x_desc.value, x.data.ptr,
135 |                 self.filter_desc.value, W.data.ptr, self.conv_desc.value,
136 |                 algo, workspace.data.ptr, workspace_size, zero.data,
137 |                 y_desc.value, y.data.ptr)
138 | 
139 |             # TODO(beam2d): Support unshared bias
140 |             if b is not None:
141 |                 cudnn.add_tensor(
142 |                     handle, one.data, self.bias_desc.value, b.data.ptr,
143 |                     one.data, y_desc.value, y.data.ptr)
144 |         else:
145 |             # Implementation using im2col
146 |             Xb = x
147 | 
148 |             self.col = conv.im2col_gpu(
149 |                 Xb, kh, kw, self.sy, self.sx, self.ph, self.pw,
150 |                 cover_all=self.cover_all)
151 |             
152 |             W_mat = W.reshape(out_c, -1)
153 |             col_mats = self.col.reshape(n, -1, out_h * out_w)
154 | 
155 |             Wb_mat = _kern()(W_mat)
156 | 
157 |             y_mats = y.reshape(n, out_c, -1)
158 |             # TODO(beam2d): Use streams or batch gemm
159 |             for i in moves.range(n):
160 |                 y_mats[i] = Wb_mat.dot(col_mats[i])
161 |             # TODO(beam2d): Support unshared bias
162 |             if b is not None:
163 |                 y += b[:, None, None]
164 | 
165 |         return y,
166 | 
167 |     def backward_cpu(self, inputs, grad_outputs):
168 |         x, W = inputs[:2]
169 |         b = inputs[2] if len(inputs) == 3 else None
170 |         gy = grad_outputs[0]
171 |         h, w = x.shape[2:]
172 | 
173 |         gW = numpy.tensordot(
174 |             gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)
175 | 
176 |         Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False)
177 | 
178 |         gcol = numpy.tensordot(Wb, gy, (0, 1)).astype(x.dtype, copy=False)
179 | 
180 |         gcol = numpy.rollaxis(gcol, 3)
181 |         gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
182 | 
183 |         if b is None:
184 |             return gx, gW
185 |         else:
186 |             gb = gy.sum(axis=(0, 2, 3))
187 |             return gx, gW, gb
188 | 
189 |     def backward_gpu(self, inputs, grad_outputs):
190 |         x, W = inputs[:2]
191 |         b = inputs[2] if len(inputs) == 3 else None
192 |         gy = grad_outputs[0]
193 |         _, out_c, out_h, out_w = gy.shape
194 |         n, c, h, w = x.shape
195 | 
196 |         kh, kw = W.shape[2:]
197 | 
198 |         gW = cuda.cupy.empty_like(W)
199 |         if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
200 |                 _check_cudnn_acceptable_type(x.dtype, W.dtype)):
201 |             x = cuda.cupy.ascontiguousarray(x)
202 |             W = cuda.cupy.ascontiguousarray(W)
203 |             gy = cuda.cupy.ascontiguousarray(gy)
204 | 
205 |             handle = cudnn.get_handle()
206 |             x_desc = cudnn.create_tensor_descriptor(x)
207 |             gy_desc = cudnn.create_tensor_descriptor(gy)
208 |             oz_dtype = 'd' if x.dtype == 'd' else 'f'
209 |             one = numpy.array(1, dtype=oz_dtype).ctypes
210 |             zero = numpy.array(0, dtype=oz_dtype).ctypes
211 |             gx = cuda.cupy.empty_like(x)
212 | 
213 |             if _cudnn_version >= 4000:
214 |                 workspace_size = cuda.get_max_workspace_size()
215 |                 workspace = cuda.cupy.empty((workspace_size,), dtype='b')
216 | 
217 |                 algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
218 |                     handle, x_desc.value, gy_desc.value,
219 |                     self.conv_desc.value, self.filter_desc.value,
220 |                     _bwd_filter_pref, workspace_size)
221 |                 libcudnn.convolutionBackwardFilter_v3(
222 |                     handle, one.data, x_desc.value, x.data.ptr,
223 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
224 |                     algo, workspace.data.ptr, workspace_size,
225 |                     zero.data, self.filter_desc.value, gW.data.ptr)
226 | 
227 |                 algo = libcudnn.getConvolutionBackwardDataAlgorithm(
228 |                     handle, self.filter_desc.value, gy_desc.value,
229 |                     self.conv_desc.value, x_desc.value, _bwd_data_pref,
230 |                     workspace_size)
231 |                 libcudnn.convolutionBackwardData_v3(
232 |                     handle, one.data, self.filter_desc.value, W.data.ptr,
233 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
234 |                     algo, workspace.data.ptr, workspace_size,
235 |                     zero.data, x_desc.value, gx.data.ptr)
236 |             else:
237 |                 libcudnn.convolutionBackwardFilter_v2(
238 |                     handle, one.data, x_desc.value, x.data.ptr,
239 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
240 |                     zero.data, self.filter_desc.value, gW.data.ptr)
241 |                 libcudnn.convolutionBackwardData_v2(
242 |                     handle, one.data, self.filter_desc.value, W.data.ptr,
243 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
244 |                     zero.data, x_desc.value, gx.data.ptr)
245 | 
246 |             if b is not None:
247 |                 gb = cuda.cupy.empty_like(b)
248 |                 libcudnn.convolutionBackwardBias(
249 |                     handle, one.data, gy_desc.value, gy.data.ptr,
250 |                     zero.data, self.bias_desc.value, gb.data.ptr)
251 |         else:
252 |             gW_mat = gW.reshape(out_c, c * kh * kw)
253 |             col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
254 |             gy_mats = gy.reshape(n, out_c, out_h * out_w)
255 |             # TODO(beam2d): Use streams or batch gemm
256 |             gW_mat[...] = 0
257 |             for i in moves.range(n):
258 |                 gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)
259 | 
260 |             W_mat = W.reshape(out_c, -1)
261 |             Wb_mat = _kern()(W_mat)
262 | 
263 |             gcol = cuda.cupy.empty_like(self.col)
264 |             gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)
265 | 
266 |             for i in moves.range(n):
267 |                 gcol_mats[i] = cuda.cupy.dot(Wb_mat.T, gy_mats[i])
268 | 
269 |             gx = conv.col2im_gpu(
270 |                 gcol, self.sy, self.sx, self.ph, self.pw, h, w)
271 | 
272 |             if b is not None:
273 |                 gb = gy.sum(axis=(0, 2, 3))
274 | 
275 |         if b is None:
276 |             return gx, gW
277 |         else:
278 |             return gx, gW, gb
279 | 
280 | 
281 | def func_convolution_2d(x, W, b=None, stride=1, pad=0, use_cudnn=True,
282 |                    cover_all=False):
283 |     """Two-dimensional convolution function.
284 | 
285 |     This is an implementation of two-dimensional convolution in ConvNets.
286 |     It takes three variables: the input image ``x``, the filter weight ``W``,
287 |     and the bias vector ``b``.
288 | 
289 |     Notation: here is a notation for dimensionalities.
290 | 
291 |     - :math:`n` is the batch size.
292 |     - :math:`c_I` and :math:`c_O` are the number of the input and output,
293 |       respectively.
294 |     - :math:`h` and :math:`w` are the height and width of the input image,
295 |       respectively.
296 |     - :math:`k_H` and :math:`k_W` are the height and width of the filters,
297 |       respectively.
298 | 
299 |     Args:
300 |         x (~chainer.Variable): Input variable of shape :math:`(n, c_I, h, w)`.
301 |         W (~chainer.Variable): Weight variable of shape
302 |             :math:`(c_O, c_I, k_H, k_W)`.
303 |         b (~chainer.Variable): Bias variable of length :math:`c_O` (optional).
304 |         stride (int or pair of ints): Stride of filter applications.
305 |             ``stride=s`` and ``stride=(s, s)`` are equivalent.
306 |         pad (int or pair of ints): Spatial padding width for input arrays.
307 |             ``pad=p`` and ``pad=(p, p)`` are equivalent.
308 |         use_cudnn (bool): If ``True``, then this function uses cuDNN if
309 |             available.
310 |         cover_all (bool): If True, all spatial locations are convoluted into
311 |             some output pixels. It may make the output size larger.
312 | 
313 | 
314 |     Returns:
315 |         ~chainer.Variable: Output variable.
316 | 
317 |     The two-dimensional convolution function is defined as follows.
318 |     Then the ``Convolution2D`` function computes correlations between filters
319 |     and patches of size :math:`(k_H, k_W)` in ``x``.
320 |     Note that correlation here is equivalent to the inner product between
321 |     expanded vectors.
322 |     Patches are extracted at positions shifted by multiples of ``stride`` from
323 |     the first position ``-pad`` for each spatial axis.
324 |     The right-most (or bottom-most) patches do not run over the padded spatial
325 |     size.
326 | 
327 |     Let :math:`(s_Y, s_X)` be the stride of filter application, and
328 |     :math:`(p_H, p_W)` the spatial padding size. Then, the output size
329 |     :math:`(h_O, w_O)` is determined by the following equations:
330 | 
331 |     .. math::
332 | 
333 |        h_O &= (h + 2p_H - k_H) / s_Y + 1,\\\\
334 |        w_O &= (w + 2p_W - k_W) / s_X + 1.
335 | 
336 |     If the bias vector is given, then it is added to all spatial locations of
337 |     the output of convolution.
338 | 
339 |     .. seealso:: :class:`Convolution2D`
340 | 
341 |     """
342 |     func = IntegerConv2DFunction(stride, pad, use_cudnn, cover_all)
343 |     if b is None:
344 |         return func(x, W)
345 |     else:
346 |         return func(x, W, b)
347 | 


--------------------------------------------------------------------------------
/function_binary_conv2d.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | from six import moves
  3 | 
  4 | from chainer import cuda
  5 | from chainer import function
  6 | from chainer.utils import conv
  7 | from chainer.utils import type_check
  8 | 
  9 | def _kern():
 10 |     return cuda.elementwise(
 11 |         'T x', 'T y',
 12 |         'y = x >= 0 ? 1 : -1',
 13 |         'binarize')
 14 | 
 15 | def _as_mat(x):
 16 |     if x.ndim == 2:
 17 |         return x
 18 |     return x.reshape(len(x), -1)
 19 | 
 20 | 
 21 | if cuda.cudnn_enabled:
 22 |     cudnn = cuda.cudnn
 23 |     libcudnn = cuda.cudnn.cudnn
 24 |     _cudnn_version = libcudnn.getVersion()
 25 |     _fwd_pref = libcudnn.CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
 26 |     if _cudnn_version >= 4000:
 27 |         _bwd_filter_pref = \
 28 |             libcudnn.CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
 29 |         _bwd_data_pref = \
 30 |             libcudnn.CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
 31 | 
 32 | 
 33 | def _check_cudnn_acceptable_type(x_dtype, W_dtype):
 34 |     return x_dtype == W_dtype and (
 35 |         _cudnn_version >= 3000 or x_dtype != numpy.float16)
 36 | 
 37 | 
 38 | def _pair(x):
 39 |     if hasattr(x, '__getitem__'):
 40 |         return x
 41 |     return x, x
 42 | 
 43 | 
 44 | class BinaryConv2DFunction(function.Function):
 45 | 
 46 |     def __init__(self, stride=1, pad=0, use_cudnn=True, cover_all=False):
 47 |         self.sy, self.sx = _pair(stride)
 48 |         self.ph, self.pw = _pair(pad)
 49 |         self.use_cudnn = use_cudnn
 50 |         self.cover_all = cover_all
 51 | 
 52 |     def check_type_forward(self, in_types):
 53 |         n_in = in_types.size()
 54 |         type_check.expect(2 <= n_in, n_in <= 3)
 55 | 
 56 |         x_type = in_types[0]
 57 |         w_type = in_types[1]
 58 |         type_check.expect(
 59 |             x_type.dtype.kind == 'f',
 60 |             w_type.dtype.kind == 'f',
 61 |             x_type.ndim == 4,
 62 |             w_type.ndim == 4,
 63 |             x_type.shape[1] == w_type.shape[1],
 64 |         )
 65 | 
 66 |         if n_in.eval() == 3:
 67 |             b_type = in_types[2]
 68 |             type_check.expect(
 69 |                 b_type.dtype == x_type.dtype,
 70 |                 b_type.ndim == 1,
 71 |                 b_type.shape[0] == w_type.shape[0],
 72 |             )
 73 | 
 74 |     def forward_cpu(self, inputs):
 75 |         x, W = inputs[:2]
 76 |         b = inputs[2] if len(inputs) == 3 else None
 77 |         kh, kw = W.shape[2:]
 78 |         self.col = conv.im2col_cpu(
 79 |             x, kh, kw, self.sy, self.sx, self.ph, self.pw,
 80 |             cover_all=self.cover_all)
 81 | 
 82 |         Xb = numpy.where(self.col>0,1,self.col).astype(x.dtype, copy=False)
 83 |         Xb = numpy.where(self.col<0,-1,Xb).astype(x.dtype, copy=False)
 84 |         Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False)
 85 |         y = numpy.tensordot(
 86 |             Xb, Wb, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
 87 |         if b is not None:
 88 |             y += b
 89 | 
 90 |         return numpy.rollaxis(y, 3, 1),
 91 | 
 92 |     def forward_gpu(self, inputs):
 93 |         x, W = inputs[:2]
 94 |         b = inputs[2] if len(inputs) == 3 else None
 95 | 
 96 |         out_c, _, kh, kw = W.shape
 97 |         n, c, h, w = x.shape
 98 | 
 99 |         out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph,
100 |                                       cover_all=self.cover_all)
101 |         out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw,
102 |                                       cover_all=self.cover_all)
103 | 
104 |         y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
105 |         if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
106 |                 _check_cudnn_acceptable_type(x.dtype, W.dtype)):
107 |             x = cuda.cupy.ascontiguousarray(x)
108 |             W = cuda.cupy.ascontiguousarray(W)
109 |             if b is not None:
110 |                 b = cuda.cupy.ascontiguousarray(b)
111 | 
112 |             handle = cudnn.get_handle()
113 |             x_desc = cudnn.create_tensor_descriptor(x)
114 |             y_desc = cudnn.create_tensor_descriptor(y)
115 | 
116 |             self.filter_desc = cudnn.create_filter_descriptor(W)
117 |             self.conv_desc = cudnn.create_convolution_descriptor(
118 |                 (self.ph, self.pw), (self.sy, self.sx))
119 |             if b is not None:
120 |                 self.bias_desc = cudnn.create_tensor_descriptor(
121 |                     b[None, :, None, None])
122 | 
123 |             workspace_size = cuda.get_max_workspace_size()
124 |             workspace = cuda.cupy.empty((workspace_size,), dtype='b')
125 |             algo = libcudnn.getConvolutionForwardAlgorithm(
126 |                 handle, x_desc.value, self.filter_desc.value,
127 |                 self.conv_desc.value, y_desc.value, _fwd_pref,
128 |                 workspace_size)
129 | 
130 |             oz_dtype = 'd' if x.dtype == 'd' else 'f'
131 |             one = numpy.array(1, dtype=oz_dtype).ctypes
132 |             zero = numpy.array(0, dtype=oz_dtype).ctypes
133 |             libcudnn.convolutionForward(
134 |                 handle, one.data, x_desc.value, x.data.ptr,
135 |                 self.filter_desc.value, W.data.ptr, self.conv_desc.value,
136 |                 algo, workspace.data.ptr, workspace_size, zero.data,
137 |                 y_desc.value, y.data.ptr)
138 | 
139 |             # TODO(beam2d): Support unshared bias
140 |             if b is not None:
141 |                 cudnn.add_tensor(
142 |                     handle, one.data, self.bias_desc.value, b.data.ptr,
143 |                     one.data, y_desc.value, y.data.ptr)
144 |         else:
145 |             # Implementation using im2col
146 |             Xb = _kern()(x)
147 | 
148 |             self.col = conv.im2col_gpu(
149 |                 Xb, kh, kw, self.sy, self.sx, self.ph, self.pw,
150 |                 cover_all=self.cover_all)
151 |             
152 |             W_mat = W.reshape(out_c, -1)
153 |             col_mats = self.col.reshape(n, -1, out_h * out_w)
154 |             Wb_mat = _kern()(W_mat)
155 | 
156 |             y_mats = y.reshape(n, out_c, -1)
157 |             # TODO(beam2d): Use streams or batch gemm
158 |             for i in moves.range(n):
159 |                 y_mats[i] = Wb_mat.dot(col_mats[i])
160 |             # TODO(beam2d): Support unshared bias
161 |             if b is not None:
162 |                 y += b[:, None, None]
163 | 
164 |         return y,
165 | 
166 |     def backward_cpu(self, inputs, grad_outputs):
167 |         x, W = inputs[:2]
168 |         b = inputs[2] if len(inputs) == 3 else None
169 |         gy = grad_outputs[0]
170 |         h, w = x.shape[2:]
171 | 
172 |         gW = numpy.tensordot(
173 |             gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)
174 | 
175 |         Wb = numpy.where(W>=0,1,-1).astype(W.dtype, copy=False)
176 | 
177 |         gcol = numpy.tensordot(Wb, gy, (0, 1)).astype(x.dtype, copy=False)
178 | 
179 |         gcol = numpy.rollaxis(gcol, 3)
180 |         gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
181 | 
182 |         if b is None:
183 |             return gx, gW
184 |         else:
185 |             gb = gy.sum(axis=(0, 2, 3))
186 |             return gx, gW, gb
187 | 
188 |     def backward_gpu(self, inputs, grad_outputs):
189 |         x, W = inputs[:2]
190 |         b = inputs[2] if len(inputs) == 3 else None
191 |         gy = grad_outputs[0]
192 |         _, out_c, out_h, out_w = gy.shape
193 |         n, c, h, w = x.shape
194 | 
195 |         kh, kw = W.shape[2:]
196 | 
197 |         gW = cuda.cupy.empty_like(W)
198 |         if (self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
199 |                 _check_cudnn_acceptable_type(x.dtype, W.dtype)):
200 |             x = cuda.cupy.ascontiguousarray(x)
201 |             W = cuda.cupy.ascontiguousarray(W)
202 |             gy = cuda.cupy.ascontiguousarray(gy)
203 | 
204 |             handle = cudnn.get_handle()
205 |             x_desc = cudnn.create_tensor_descriptor(x)
206 |             gy_desc = cudnn.create_tensor_descriptor(gy)
207 |             oz_dtype = 'd' if x.dtype == 'd' else 'f'
208 |             one = numpy.array(1, dtype=oz_dtype).ctypes
209 |             zero = numpy.array(0, dtype=oz_dtype).ctypes
210 |             gx = cuda.cupy.empty_like(x)
211 | 
212 |             if _cudnn_version >= 4000:
213 |                 workspace_size = cuda.get_max_workspace_size()
214 |                 workspace = cuda.cupy.empty((workspace_size,), dtype='b')
215 | 
216 |                 algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
217 |                     handle, x_desc.value, gy_desc.value,
218 |                     self.conv_desc.value, self.filter_desc.value,
219 |                     _bwd_filter_pref, workspace_size)
220 |                 libcudnn.convolutionBackwardFilter_v3(
221 |                     handle, one.data, x_desc.value, x.data.ptr,
222 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
223 |                     algo, workspace.data.ptr, workspace_size,
224 |                     zero.data, self.filter_desc.value, gW.data.ptr)
225 | 
226 |                 algo = libcudnn.getConvolutionBackwardDataAlgorithm(
227 |                     handle, self.filter_desc.value, gy_desc.value,
228 |                     self.conv_desc.value, x_desc.value, _bwd_data_pref,
229 |                     workspace_size)
230 |                 libcudnn.convolutionBackwardData_v3(
231 |                     handle, one.data, self.filter_desc.value, W.data.ptr,
232 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
233 |                     algo, workspace.data.ptr, workspace_size,
234 |                     zero.data, x_desc.value, gx.data.ptr)
235 |             else:
236 |                 libcudnn.convolutionBackwardFilter_v2(
237 |                     handle, one.data, x_desc.value, x.data.ptr,
238 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
239 |                     zero.data, self.filter_desc.value, gW.data.ptr)
240 |                 libcudnn.convolutionBackwardData_v2(
241 |                     handle, one.data, self.filter_desc.value, W.data.ptr,
242 |                     gy_desc.value, gy.data.ptr, self.conv_desc.value,
243 |                     zero.data, x_desc.value, gx.data.ptr)
244 | 
245 |             if b is not None:
246 |                 gb = cuda.cupy.empty_like(b)
247 |                 libcudnn.convolutionBackwardBias(
248 |                     handle, one.data, gy_desc.value, gy.data.ptr,
249 |                     zero.data, self.bias_desc.value, gb.data.ptr)
250 |         else:
251 |             gW_mat = gW.reshape(out_c, c * kh * kw)
252 |             col_mats = self.col.reshape(n, c * kh * kw, out_h * out_w)
253 |             gy_mats = gy.reshape(n, out_c, out_h * out_w)
254 |             # TODO(beam2d): Use streams or batch gemm
255 |             gW_mat[...] = 0
256 |             for i in moves.range(n):
257 |                 gW_mat += cuda.cupy.dot(gy_mats[i], col_mats[i].T)
258 | 
259 |             W_mat = W.reshape(out_c, -1)
260 |             Wb_mat = _kern()(W_mat)
261 | 
262 |             gcol = cuda.cupy.empty_like(self.col)
263 |             gcol_mats = gcol.reshape(n, c * kh * kw, out_h * out_w)
264 | 
265 |             for i in moves.range(n):
266 |                 gcol_mats[i] = cuda.cupy.dot(Wb_mat.T, gy_mats[i])
267 | 
268 |             gx = conv.col2im_gpu(
269 |                 gcol, self.sy, self.sx, self.ph, self.pw, h, w)
270 | 
271 |             if b is not None:
272 |                 gb = gy.sum(axis=(0, 2, 3))
273 | 
274 |         if b is None:
275 |             return gx, gW
276 |         else:
277 |             return gx, gW, gb
278 | 
279 | 
280 | def func_convolution_2d(x, W, b=None, stride=1, pad=0, use_cudnn=True,
281 |                    cover_all=False):
282 |     """Two-dimensional convolution function.
283 | 
284 |     This is an implementation of two-dimensional convolution in ConvNets.
285 |     It takes three variables: the input image ``x``, the filter weight ``W``,
286 |     and the bias vector ``b``.
287 | 
288 |     Notation: here is a notation for dimensionalities.
289 | 
290 |     - :math:`n` is the batch size.
291 |     - :math:`c_I` and :math:`c_O` are the number of the input and output,
292 |       respectively.
293 |     - :math:`h` and :math:`w` are the height and width of the input image,
294 |       respectively.
295 |     - :math:`k_H` and :math:`k_W` are the height and width of the filters,
296 |       respectively.
297 | 
298 |     Args:
299 |         x (~chainer.Variable): Input variable of shape :math:`(n, c_I, h, w)`.
300 |         W (~chainer.Variable): Weight variable of shape
301 |             :math:`(c_O, c_I, k_H, k_W)`.
302 |         b (~chainer.Variable): Bias variable of length :math:`c_O` (optional).
303 |         stride (int or pair of ints): Stride of filter applications.
304 |             ``stride=s`` and ``stride=(s, s)`` are equivalent.
305 |         pad (int or pair of ints): Spatial padding width for input arrays.
306 |             ``pad=p`` and ``pad=(p, p)`` are equivalent.
307 |         use_cudnn (bool): If ``True``, then this function uses cuDNN if
308 |             available.
309 |         cover_all (bool): If True, all spatial locations are convoluted into
310 |             some output pixels. It may make the output size larger.
311 | 
312 | 
313 |     Returns:
314 |         ~chainer.Variable: Output variable.
315 | 
316 |     The two-dimensional convolution function is defined as follows.
317 |     Then the ``Convolution2D`` function computes correlations between filters
318 |     and patches of size :math:`(k_H, k_W)` in ``x``.
319 |     Note that correlation here is equivalent to the inner product between
320 |     expanded vectors.
321 |     Patches are extracted at positions shifted by multiples of ``stride`` from
322 |     the first position ``-pad`` for each spatial axis.
323 |     The right-most (or bottom-most) patches do not run over the padded spatial
324 |     size.
325 | 
326 |     Let :math:`(s_Y, s_X)` be the stride of filter application, and
327 |     :math:`(p_H, p_W)` the spatial padding size. Then, the output size
328 |     :math:`(h_O, w_O)` is determined by the following equations:
329 | 
330 |     .. math::
331 | 
332 |        h_O &= (h + 2p_H - k_H) / s_Y + 1,\\\\
333 |        w_O &= (w + 2p_W - k_W) / s_X + 1.
334 | 
335 |     If the bias vector is given, then it is added to all spatial locations of
336 |     the output of convolution.
337 | 
338 |     .. seealso:: :class:`Convolution2D`
339 | 
340 |     """
341 |     func = BinaryConv2DFunction(stride, pad, use_cudnn, cover_all)
342 |     if b is None:
343 |         return func(x, W)
344 |     else:
345 |         return func(x, W, b)
346 | 


--------------------------------------------------------------------------------
/template_cpp_r7_bcnn.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * C++ Templete for a Binarized CNN
  3 |  *
  4 |  *  Created on: 2017/07/01
  5 |  *      Author: H. Nakahara
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <iostream>
 11 | #include <bitset>
 12 | 
 13 | #include <ap_int.h>
 14 | 
 15 | #ifdef __SDSCC__
 16 | #include "sds_lib.h"
 17 | #else 
 18 | #define sds_alloc(x)(malloc(x))
 19 | #define sds_free(x)(free(x))
 20 | #endif
 21 | 
 22 | // custom bitwidth for streaming operation
 23 | typedef ap_int<2>    bit_2;
 24 | typedef ap_int<4>    bit_4;
 25 | typedef ap_int<8>    bit_8;
 26 | typedef ap_int<16>   bit_16;
 27 | typedef ap_int<32>   bit_32;
 28 | typedef ap_int<64>   bit_64;
 29 | typedef ap_int<128>  bit_128;
 30 | typedef ap_int<256>  bit_256;
 31 | typedef ap_int<512>  bit_512;
 32 | 
 33 | // weight memory -----------------------------------------------------------
 34 | (DEF_WEIGHT_MEM)
 35 | // bias memory ------------------------------------------------------------
 36 | (DEF_BIAS_MEM)
 37 | // -------------------------------------------------------------------------
 38 | // Load weights and bias from the external memory (DDR3/4 Memory)
 39 | // -------------------------------------------------------------------------
 40 | #ifdef __SDSCC__
 41 | #pragma SDS data access_pattern(t_bin_convW: SEQUENTIAL)
 42 | #pragma SDS data access_pattern(t_BNFb: SEQUENTIAL)
 43 | #pragma SDS data zero_copy(t_bin_convW[0:(WEIGHT_SIZ)])
 44 | #pragma SDS data zero_copy(t_BNFb[0:(BIAS_SIZ)])
 45 | #endif
 46 | void setup(
 47 | #ifdef __SDSCC__
 48 | 	    int *t_bin_convW,
 49 | 		int *t_BNFb
 50 | #else 
 51 |         int t_bin_convW[(WEIGHT_SIZ)],
 52 |         int t_BNFb[(BIAS_SIZ)]
 53 | #endif
 54 | )
 55 | {
 56 | 	// set buffer memory -----------------------------------------------
 57 | 	int x, y, of, inf, offset;
 58 | 
 59 | 	// -----------------------------------------------------------------
 60 | 	// setup memory
 61 | 	// -----------------------------------------------------------------
 62 | (SET_WEIGHT_MEM)
 63 | (SET_BIAS_MEM)
 64 | }
 65 | 
 66 | // -------------------------------------------------------------------------
 67 | // Binary Convolutional Layer
 68 | // -------------------------------------------------------------------------
 69 | void bin_conv2d_pipeline(
 70 | 		ap_int<(MAX_BCONV_WIDTH)> fmap[(IMGSIZ)][(IMGSIZ)],
 71 | 		int layer,
 72 | 		int size,
 73 | 		int n_in,
 74 | 		int n_out
 75 | 		)
 76 | {
 77 | (BCONV_REG_PRAGMA)
 78 | 
 79 | 	int ofeat, infeat, w_flag;
 80 | 	int i, k, ky, kx, ix, iy, ox, oy;
 81 | 	int idx = 0;
 82 | 
 83 | 	static ap_int<(MAX_BCONV_WIDTH)> shift_reg1[((IMGSIZ)+2)*3];
 84 | #pragma HLS ARRAY_PARTITION variable=shift_reg1 complete dim=1
 85 | 	static ap_uint<1> padding_shift_reg[((IMGSIZ)+2)*3];
 86 | #pragma HLS ARRAY_PARTITION variable=padding_shift_reg complete dim=1
 87 | 
 88 | 	int cnt = 0;
 89 | 
 90 | 	ix = iy = ox = oy = w_flag = 0;
 91 | 
 92 |     CONV_IF: for( k = 0; k < (size+2) * (size+2); k++){
 93 | #pragma HLS loop_flatten off
 94 | 
 95 |     	SHIFT_REG: for( i = 0; i < 2 * ((IMGSIZ)+2) + 3; i++){
 96 | #pragma HLS UNROLL
 97 |     		shift_reg1[ i] = shift_reg1[ i + 1];
 98 |     		padding_shift_reg[ i] = padding_shift_reg[ i + 1];
 99 |     	}
100 |     	ap_int<(MAX_BCONV_WIDTH)> din;
101 |     	ap_uint<1> padding;
102 |     	if( (ix > 0 && ix <= size) && (iy > 0 && iy <= size)){
103 | 		din = (ap_int<(MAX_BCONV_WIDTH)>)fmap[iy-1][ix-1];
104 | 		padding = 0;
105 |     	} else {
106 |     		ap_int<(MAX_BCONV_WIDTH)> allone;
107 |     		allone = ~0;
108 |     		din = allone;
109 | 		padding = 1;
110 |     	}
111 |     	switch( layer){
112 | (BCONV_REG_SELECT)
113 |     	}
114 | 
115 |     	ix++;
116 |     	if( ix == size+2){
117 |     		ix = 0;
118 |     		iy++;
119 |     	}
120 | 
121 |     	if( k >= ((size+2)*2+3 - 1)){
122 |     		w_flag++;
123 |     		if( w_flag > (size+2)){
124 |             	w_flag = 1;
125 |             	cnt    = 0;
126 |             }
127 |     	}
128 | 
129 |     	// convolutional operation -----------------------------------
130 | 		ap_uint<(MAX_BCONV_WIDTH)> bit_tmp = 0x1;
131 | 		ap_uint<(MAX_BCONV_WIDTH)> streamOut = 0;
132 | 
133 |     	OF: for( ofeat = 0; ofeat < n_out; ofeat++){
134 |     		ap_int<16> tmp = 0;
135 |     		ap_int<16> tmp2;
136 | 
137 |             CONV_KY: for( ky = 0; ky < 3; ky++){
138 | #pragma HLS pipeline
139 |             	CONV_KX: for( kx = 0; kx < 3; kx++){
140 |             		ap_uint<(MAX_BCONV_WIDTH)> bx, bw;
141 |             		ap_uint<(MAX_BCONV_WIDTH)> bxor;
142 |                     ap_uint<(MAX_BCONV_WIDTH)> mask;
143 |                     ap_uint<(MAX_BCONV_WIDTH)> allzero = 0;
144 |                     ap_uint<1>is_padding;
145 | 
146 |             		switch( layer){
147 | (BCONV_WEIGHT_SELECT)
148 |             		}
149 | 
150 |                     (BIN_XOR_MAC)
151 | 
152 | 			tmp2 = 0;
153 |                     ONES_COUNT: for( i = 0; i < (MAX_BCONV_WIDTH); i++){
154 |                         tmp2 += (((bxor >> i) & 0x1) == 1) ? 1 : 0;
155 |                     }
156 |                     if( is_padding == 0)
157 |                         tmp += (n_in - tmp2 * 2);
158 | 		}
159 |             }
160 | 
161 |             if( w_flag > 0 && w_flag <= size){
162 | #pragma HLS pipeline
163 |             	ap_int<16> bias;
164 |             	switch( layer){
165 | (BCONV_BIAS_SELECT)
166 |             	}
167 |             	tmp += bias;
168 | 
169 |             	if( tmp >= 0) streamOut = streamOut | bit_tmp;
170 | 
171 |             	bit_tmp = bit_tmp << 1;
172 | 
173 |             	cnt++;
174 |             	if( cnt == n_out){
175 |             		cnt = 0;
176 |             		fmap[oy][ox] = (ap_int<(MAX_BCONV_WIDTH)>)streamOut;
177 | 
178 |             		ox++;
179 |             		if( ox == size){
180 |             			ox = 0;
181 |             			oy++;
182 |             		}
183 | 
184 |             		idx++;
185 |             	}
186 | 
187 |             }
188 |     	}
189 | 
190 |     }
191 | }
192 | 
193 | // ------------------------------------------------------------------------
194 | template< typename BIN_TYPE, typename BOUT_TYPE, int N_IFEAT, int N_OFEAT, int IF_SIZ, int OF_SIZ>
195 | void int_conv2d_pipeline(
196 | 		BIN_TYPE infmap[IF_SIZ][IF_SIZ],
197 | 		BOUT_TYPE outfmap[OF_SIZ][OF_SIZ],
198 | 		ap_int<(NUMIMG)> W[N_OFEAT][3*3],
199 | 		ap_int<20> BNFb[N_OFEAT]
200 | 		)
201 | {
202 | #pragma HLS ARRAY_PARTITION variable=W cyclic factor=9 dim=2
203 | 
204 | 	int ofeat, infeat;
205 | 	int w_flag;
206 | 	int i, k, ky, kx;
207 | 
208 | 	int idx = 0;
209 | 
210 | 	static ap_int<N_IFEAT> shift_reg1[(IF_SIZ+2)*3];
211 | #pragma HLS ARRAY_PARTITION variable=shift_reg1 complete dim=1
212 | 	int cnt = 0;
213 | 
214 | 	int debug_out = 0;
215 |     w_flag = 0;
216 | 
217 |     int ix, iy, ox, oy;
218 |     ix = iy = ox = oy = 0;
219 | 
220 |     CONV_IF: for( k = 0; k < (IF_SIZ+2) * (IF_SIZ+2); k++){
221 | #pragma HLS loop_flatten off
222 | 
223 |     	// pipeline register ------------------------------------------
224 |     	SHIFT_REG: for( i = 0; i < 2 * (IF_SIZ+2) + 3; i++){
225 | #pragma HLS UNROLL
226 |     		shift_reg1[ i] = shift_reg1[ i + 1];
227 |     	}
228 |     	ap_int<N_IFEAT> din;
229 |     	if( (ix > 0 && ix <= IF_SIZ) && (iy > 0 && iy <= IF_SIZ)){
230 |     		din = infmap[iy-1][ix-1];
231 |     	} else {
232 |             ap_int<N_IFEAT> allzero;
233 |             allzero = 0;
234 |             din = allzero;
235 |     	}
236 |     	shift_reg1[ 2 * (IF_SIZ+2) + 3 - 1] = din;
237 | 
238 |     	ix++;
239 |     	if( ix == IF_SIZ+2){
240 |     		ix = 0;
241 |     		iy++;
242 |     	}
243 | 
244 | 
245 |     	// enable MAC operation
246 |     	if( k >= ((IF_SIZ+2)*2+3 - 1)){
247 |     		w_flag++;
248 |     		if( w_flag > (IF_SIZ+2)){
249 |             	w_flag = 1;
250 |             	cnt    = 0;
251 |             }
252 |     	}
253 | 
254 |     	// convolutional operation -----------------------------------
255 | 		ap_uint<N_OFEAT>bit_tmp = 0x1;
256 | 		ap_uint<N_OFEAT> streamOut = 0;
257 | 
258 |     	OF: for( ofeat = 0; ofeat < N_OFEAT; ofeat++){
259 |     		int tmp = 0;
260 |     		ap_int<20> tmp2;
261 | 
262 |             CONV_KY: for( ky = 0; ky < 3; ky++){
263 | #pragma HLS pipeline
264 |             	CONV_KX: for( kx = 0; kx < 3; kx++){
265 |             		ap_int<64> bx;
266 |             		ap_int<3> bw;
267 | 
268 |             		bx = shift_reg1[ky * (IF_SIZ+2) + kx];
269 |             		bw = W[ofeat][ky*3+kx];
270 | 
271 |             		MAC_RGB: for( i = 0; i < 3; i++){
272 |             			tmp2 = ap_int<20>(bx & 0xFFFFF);
273 |             			tmp = ((bw & 0x1) == 0) ? (tmp - (int)tmp2) : (tmp + (int)tmp2);
274 |             			bw = bw >> 1;
275 |             			bx = bx >> 20;
276 |             		}
277 |             	}
278 |             }
279 | 
280 |             // output to Streaming Buffer
281 |             if( w_flag > 0 && w_flag <= IF_SIZ){
282 | #pragma HLS pipeline
283 | 
284 |             	tmp += BNFb[ofeat];
285 | 
286 |             	if( tmp >= 0) streamOut = streamOut | bit_tmp;
287 | 
288 |             	bit_tmp = bit_tmp << 1;
289 | 
290 |             	cnt++;
291 |             	if( cnt == N_OFEAT){
292 |             		cnt = 0;
293 | 
294 |             		outfmap[oy][ox] = streamOut;
295 | 
296 |             		ox++;
297 |             		if( ox == OF_SIZ){
298 |             			ox = 0;
299 |             			oy++;
300 |             		}
301 | 
302 |             		idx++;
303 |             	}
304 | 
305 |             }
306 |     	}
307 | 
308 |     }
309 | }
310 | 
311 | template< typename BIN_TYPE, typename BOUT_TYPE, int NUM_IFEAT, int NUM_OFEAT,
312 |           int INFEAT_SIZ, int OFEAT_SIZ>
313 | void int_conv2d_layer(
314 | 		BIN_TYPE infmap[INFEAT_SIZ][INFEAT_SIZ],
315 | 		BOUT_TYPE outfmap[OFEAT_SIZ][OFEAT_SIZ],
316 | 		ap_int<(NUMIMG)> W[NUM_OFEAT][3*3],
317 | 		ap_int<20> BNFb[NUM_OFEAT]
318 | )
319 | {
320 | 	int_conv2d_pipeline< BIN_TYPE, BOUT_TYPE, NUM_IFEAT, NUM_OFEAT,
321 | 		INFEAT_SIZ, OFEAT_SIZ>( infmap, outfmap, W, BNFb);
322 | }
323 | 
324 | // -------------------------------------------------------------------------
325 | // Maximum Pooling Layer
326 | // -------------------------------------------------------------------------
327 | template< typename TYPE_BIT, int FEAT_SIZ, int POOL_SIZ>
328 | void max_pooling_layer( TYPE_BIT ftmp[FEAT_SIZ][FEAT_SIZ])
329 | {
330 | 	int inf_x, inf_y, oy, ox;
331 | 
332 | 	TYPE_BIT tmp0, tmp1, tmp2, tmp3, m;
333 | 
334 | 	oy = 0;
335 | 	PY: for( inf_y = 0; inf_y < FEAT_SIZ; inf_y += 2){
336 | 		ox = 0;
337 | 		PX: for( inf_x = 0; inf_x < FEAT_SIZ; inf_x += 2){
338 | 			tmp0 = ftmp[inf_y][inf_x];
339 | 			tmp1 = ftmp[inf_y][inf_x+1];
340 | 			tmp2 = ftmp[inf_y+1][inf_x];
341 | 			tmp3 = ftmp[inf_y+1][inf_x+1];
342 | 
343 | 			m = tmp0 | tmp1 | tmp2 | tmp3;
344 | 			ftmp[oy][ox] = m;
345 | 			ox++;
346 | 		}
347 | 		oy++;
348 | 	}
349 | }
350 | 
351 | // -------------------------------------------------------------------------
352 | // FC Layer
353 | // -------------------------------------------------------------------------
354 | template < int NUM_OFEAT, int NUM_INFEAT>
355 | void fc_layer(
356 | 	ap_int<1> fc_tmp[NUM_INFEAT],
357 | 	ap_int<1> lW[NUM_OFEAT][NUM_INFEAT],
358 | 	ap_int<16> b_BNFb[NUM_OFEAT],
359 | 	int fc_result[(MAX_DENSE_SIZ)]
360 | )
361 | {
362 | 	int ofeat, tmp, infeat;
363 | 
364 | 	FC_O: for( ofeat = 0; ofeat < NUM_OFEAT; ofeat++){
365 | #pragma HLS LOOP_FLATTEN off
366 | 		tmp = 0;
367 | 
368 | 		FC_I: for( infeat = 0; infeat < NUM_INFEAT; infeat++){
369 | #pragma HLS pipeline
370 | 			ap_int<1> bw, bx, xnor;
371 | 
372 | 			bw = lW[ofeat][infeat];
373 | 			bx = fc_tmp[infeat];
374 | 			xnor = ~(bw ^ bx);
375 | 
376 | 			tmp += (xnor == 0) ? -1 : +1;
377 | 		}
378 | 
379 | 		fc_result[ofeat] = tmp + b_BNFb[ofeat];
380 | 	}
381 | }
382 | 
383 | // -------------------------------------------------------------------------
384 | // Binarized CNN Kernel
385 | // -------------------------------------------------------------------------
386 | #ifdef __SDSCC__
387 | #pragma SDS data access_pattern(t_in_img: SEQUENTIAL)
388 | #pragma SDS data zero_copy(t_in_img[0:(IMGSIZ)*(IMGSIZ)])
389 | #endif
390 | void kernel(
391 | #ifdef __SDSCC__
392 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
393 |         int fc_result[10]
394 | #else 
395 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
396 |         int fc_result[10]
397 | #endif
398 | )
399 | {
400 | 	ap_int<(MAX_BCONV_WIDTH)> fb_tmp[(IMGSIZ)][(IMGSIZ)];
401 | 	ap_int<1> fc_tmp[(MAX_DENSE_SIZ)];
402 | 	ap_int<64> in_img[(IMGSIZ)][(IMGSIZ)];
403 | 
404 | 	int y, x, of, layer, bin_layer_idx;
405 | (DEF_CNN_PARAMETER)
406 | 
407 | 	for( y = 0; y < (IMGSIZ); y++){
408 | 		for( x = 0; x < (IMGSIZ); x++){
409 | 			in_img[y][x] = t_in_img[y*(IMGSIZ)+x];
410 | 		}
411 | 	}
412 | 
413 | #pragma HLS INLINE
414 | 
415 |     bin_layer_idx = 1;
416 | 	BCONV: for( layer = 0; layer < (NUM_LAYER); layer++){
417 | 		switch(layer){
418 | (DEF_CNN_LAYER)
419 | 		}
420 | 	}
421 | }
422 | 
423 | //--------------------------------------------------------------------
424 | // Top Function for a Binarized CNN
425 | //--------------------------------------------------------------------
426 | #ifdef __SDSCC__
427 | #pragma SDS data access_pattern(t_bin_convW: SEQUENTIAL)
428 | #pragma SDS data access_pattern(t_BNFb: SEQUENTIAL)
429 | #pragma SDS data access_pattern(t_in_img: SEQUENTIAL)
430 | #pragma SDS data zero_copy(t_bin_convW[0:(WEIGHT_SIZ)])
431 | #pragma SDS data zero_copy(t_BNFb[0:(BIAS_SIZ)])
432 | #pragma SDS data zero_copy(t_in_img[0:(IMGSIZ)*(IMGSIZ)])
433 | #endif
434 | void BinCNN(
435 | #ifdef __SDSCC__
436 |         int *t_bin_convW,
437 |         int *t_BNFb,
438 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
439 |         int fc_result[(OUT_DENSE_SIZ)],
440 |         int init
441 | #else 
442 |         int t_bin_convW[(WEIGHT_SIZ)],
443 |         int t_BNFb[(BIAS_SIZ)],
444 |         ap_int<64> t_in_img[(IMGSIZ)*(IMGSIZ)],
445 |         int fc_result[(OUT_DENSE_SIZ)],
446 |         int init
447 | #endif
448 | )
449 | {
450 | /*
451 | #pragma HLS INTERFACE s_axilite register port=t_bin_convW bundle=slv0
452 | #pragma HLS INTERFACE s_axilite register port=t_BNFb bundle=slv0
453 | #pragma HLS INTERFACE s_axilite register port=t_in_img bundle=slv0
454 | #pragma HLS INTERFACE s_axilite register port=fc_result bundle=slv0
455 | #pragma HLS INTERFACE s_axilite register port=init bundle=slv0
456 | #pragma HLS INTERFACE s_axilite register port=return bundle=slv0
457 | */
458 | 	if( init == 1)
459 | 		setup( t_bin_convW, t_BNFb);
460 | 	else
461 | 		kernel( t_in_img, fc_result);
462 | }
463 | 
464 | // ------------------------------------------------------------------
465 | // END OF PROGRAM
466 | // ------------------------------------------------------------------
467 | 


--------------------------------------------------------------------------------
/function_batch_normalization.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | 
  3 | from chainer import cuda
  4 | from chainer import function
  5 | from chainer.utils import type_check
  6 | 
  7 | if cuda.cudnn_enabled:
  8 |     cudnn = cuda.cudnn
  9 |     libcudnn = cudnn.cudnn
 10 |     _cudnn_version = libcudnn.getVersion()
 11 | 
 12 | 
 13 | def _as4darray(arr):
 14 |     if arr.ndim == 0:
 15 |         return arr.reshape(1, 1, 1, 1)
 16 |     elif arr.ndim == 4:
 17 |         return arr
 18 |     else:
 19 |         return arr.reshape(arr.shape[0], -1, 1, 1)
 20 | 
 21 | 
 22 | def _xhat(x, mean, std, expander):
 23 |     x_mu = x - mean[expander]
 24 |     x_mu /= std[expander]
 25 |     return x_mu
 26 | 
 27 | 
 28 | class BatchNormalizationFunction(function.Function):
 29 | 
 30 |     def __init__(self, eps=2e-5, mean=None, var=None, train=False,
 31 |                  decay=0.9, use_cudnn=True):
 32 |         self.running_mean = mean
 33 |         self.running_var = var
 34 | 
 35 |         self.train = train
 36 |         self.eps = eps
 37 |         if cuda.cudnn_enabled and use_cudnn:
 38 |             if eps <= 1e-5:
 39 |                 msg = 'cuDNN does not allow an eps value less than 1e-5.'
 40 |                 raise RuntimeError(msg)
 41 |         self.use_cudnn = use_cudnn
 42 |         self.mean_cache = None
 43 |         self.decay = decay
 44 | 
 45 |     def check_type_forward(self, in_types):
 46 |         n_in = in_types.size().eval()
 47 |         if n_in != 3 and n_in != 5:
 48 |             raise type_check.InvalidType(
 49 |                 '%s or %s' % (in_types.size() == 3, in_types.size() == 5),
 50 |                 '%s == %s' % (in_types.size(), n_in))
 51 |         x_type, gamma_type, beta_type = in_types[:3]
 52 |         M = gamma_type.ndim.eval()
 53 |         type_check.expect(
 54 |             x_type.dtype.kind == 'f',
 55 |             x_type.ndim >= gamma_type.ndim + 1,
 56 |             x_type.shape[1:1 + M] == gamma_type.shape,
 57 |             # TODO(beam2d): Check shape
 58 |             gamma_type.dtype == x_type.dtype,
 59 |             beta_type.dtype == x_type.dtype,
 60 |             gamma_type.shape == beta_type.shape,
 61 |         )
 62 |         if len(in_types) == 5:
 63 |             mean_type, var_type = in_types[3:]
 64 |             type_check.expect(
 65 |                 mean_type.dtype == x_type.dtype,
 66 |                 mean_type.shape == gamma_type.shape,
 67 |                 var_type.dtype == x_type.dtype,
 68 |                 var_type.shape == gamma_type.shape,
 69 |             )
 70 | 
 71 |     def forward(self, inputs):
 72 |         xp = cuda.get_array_module(*inputs)
 73 |         x, gamma, beta = inputs[:3]
 74 |         if self.train:
 75 |             if self.running_mean is None:
 76 |                 self.running_mean = xp.zeros_like(gamma)
 77 |                 self.running_var = xp.zeros_like(gamma)
 78 |             else:
 79 |                 self.running_mean = xp.array(self.running_mean)
 80 |                 self.running_var = xp.array(self.running_var)
 81 |         elif len(inputs) == 5:
 82 |             self.fixed_mean = inputs[3]
 83 |             self.fixed_var = inputs[4]
 84 | 
 85 |         # TODO(bkvogel): Check for float16 support again in next cuDNN version.
 86 |         if x[0].dtype == numpy.float16:
 87 |             # cuDNN v5 batch normalization does not seem to support float16.
 88 |             self.use_cudnn = False
 89 | 
 90 |         head_ndim = gamma.ndim + 1
 91 |         expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim)
 92 |         gamma = gamma[expander]
 93 |         beta = beta[expander]
 94 | 
 95 |         # cuDNN only supports these tensor dimensions because they are
 96 |         # the most commonly used. If there is a need to support other
 97 |         # dimensions with cuDNN, we could consider reshaping the input
 98 |         # into a 2-dim array with channels as second dim and m=<product
 99 |         # of all dimensions except the 2nd dimension> as the first
100 |         # dimension.
101 |         self.cudnn_dim_ok = x.ndim == 2 or x.ndim == 4
102 | 
103 |         cudnn_updated_running_stats = False
104 |         if xp is not numpy and cuda.cudnn_enabled and self.use_cudnn and \
105 |                 self.cudnn_dim_ok and _cudnn_version >= 5000:
106 |             if x.ndim == 4:
107 |                 # for convolutional layer
108 |                 self.mode = libcudnn.CUDNN_BATCHNORM_SPATIAL
109 |             else:
110 |                 # for linear layer
111 |                 self.mode = libcudnn.CUDNN_BATCHNORM_PER_ACTIVATION
112 | 
113 |             x = cuda.cupy.ascontiguousarray(x)
114 |             gamma = cuda.cupy.ascontiguousarray(gamma)
115 |             beta = cuda.cupy.ascontiguousarray(beta)
116 |             dtype = x.dtype
117 |             handle = cudnn.get_handle()
118 |             x_desc = cudnn.create_tensor_descriptor(_as4darray(x))
119 |             derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor()
120 |             libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value,
121 |                                               x_desc.value, self.mode)
122 |             one = numpy.array(1, dtype=dtype).ctypes
123 |             zero = numpy.array(0, dtype=dtype).ctypes
124 |             y = cuda.cupy.empty_like(x)
125 |             # Factor used in the moving average
126 |             factor = 1 - self.decay
127 | 
128 |             if self.train:
129 |                 if self.mean_cache is None:
130 |                     # Output cache to speed up bacward pass.
131 |                     self.mean_cache = xp.empty_like(gamma)
132 |                     # Output cache to speed up bacward pass.
133 |                     self.var_cache = xp.empty_like(gamma)
134 |                 # Note: cuDNN computes the mini-batch mean and variance
135 |                 # internally. We can simply (optionally) pass
136 |                 # it the running-average mean and variance arrays.
137 |                 libcudnn.batchNormalizationForwardTraining(
138 |                     handle, self.mode, one.data, zero.data,
139 |                     x_desc.value, x.data.ptr, x_desc.value,
140 |                     y.data.ptr, derivedBnDesc.value, gamma.data.ptr,
141 |                     beta.data.ptr, factor, self.running_mean.data.ptr,
142 |                     self.running_var.data.ptr, self.eps,
143 |                     self.mean_cache.data.ptr, self.var_cache.data.ptr)
144 |                 cudnn_updated_running_stats = True
145 |             else:
146 |                 libcudnn.batchNormalizationForwardInference(
147 |                     handle, self.mode, one.data, zero.data,
148 |                     x_desc.value, x.data.ptr, x_desc.value, y.data.ptr,
149 |                     derivedBnDesc.value, gamma.data.ptr, beta.data.ptr,
150 |                     self.fixed_mean.data.ptr, self.fixed_var.data.ptr,
151 |                     self.eps)
152 |         else:
153 |             if self.train:
154 |                 axis = (0,) + tuple(range(head_ndim, x.ndim))
155 |                 mean = x.mean(axis=axis)
156 |                 var = x.var(axis=axis)
157 |                 var += self.eps
158 |             else:
159 |                 mean = self.fixed_mean
160 |                 var = self.fixed_var
161 | 
162 | 
163 | 
164 |             self.std = xp.sqrt(var, dtype=var.dtype)
165 |             if xp is numpy:
166 |                 self.x_hat = _xhat(x, mean, self.std, expander)
167 |                 y = gamma * self.x_hat
168 |                 y += beta
169 | 
170 |             else:
171 |                 self.x_hat, y = cuda.elementwise(
172 |                     'T x, T mean, T std, T gamma, T beta', 'T x_hat, T y',
173 |                     '''
174 |                     x_hat = (x - mean) / std;
175 |                     y = gamma * x_hat + beta;
176 |                     ''',
177 |                     'bn_fwd')(x, mean[expander], self.std[expander], gamma,
178 |                               beta)
179 | 
180 |         if self.train and (not cudnn_updated_running_stats):
181 |             # Note: If in training mode, the cuDNN forward training function
182 |             # will do this for us, so
183 |             # only run following code if cuDNN was not used.
184 |             # Update running statistics:
185 |             m = x.size // gamma.size
186 |             adjust = m / max(m - 1., 1.)  # unbiased estimation
187 |             self.running_mean *= self.decay
188 |             temp_ar = xp.array(mean)
189 |             temp_ar *= (1 - self.decay)
190 |             self.running_mean += temp_ar
191 |             del temp_ar
192 |             self.running_var *= self.decay
193 |             temp_ar = xp.array(var)
194 |             temp_ar *= (1 - self.decay) * adjust
195 |             self.running_var += temp_ar
196 |             del temp_ar
197 |         return y,
198 | 
199 |     def backward(self, inputs, grad_outputs):
200 |         x, gamma = inputs[:2]
201 |         gy = grad_outputs[0]
202 |         head_ndim = gamma.ndim + 1
203 |         expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim)
204 |         m = gamma.dtype.type(x.size // gamma.size)
205 |         axis = (0,) + tuple(range(head_ndim, x.ndim))
206 |         xp = cuda.get_array_module(x)
207 |         if len(inputs) == 5:
208 |             # This case is unlikely to be used in practice and so does not
209 |             # need to be optimized for performance.
210 |             mean = inputs[3]
211 |             var = inputs[4]
212 |             std = xp.sqrt(var, dtype=var.dtype)
213 |             gs = gamma / std
214 |             gbeta = gy.sum(axis=axis)
215 |             x_hat = _xhat(x, mean, std, expander)
216 |             ggamma = (gy * x_hat).sum(axis=axis)
217 |             gmean = -gs * gbeta
218 |             gvar = -0.5 * gamma / var * ggamma
219 |             gx = gs[expander] * gy
220 |             return gx, ggamma, gbeta, gmean, gvar
221 | 
222 |         # Note: If length of inputs is not 5, we must be in train mode.
223 |         assert self.train
224 |         if xp is not numpy and cuda.cudnn_enabled and self.use_cudnn and \
225 |                 self.cudnn_dim_ok and _cudnn_version >= 5000:
226 |             # Note: cuDNN batch normalization backward only works in
227 |             # "training mode." That is, it does not support
228 |             # computing gradients in fixed-mean-variance mode, because there
229 |             # is normally no reason to call backward()
230 |             # while in test/evaluation mode.
231 |             dtype = x.dtype
232 |             handle = cudnn.get_handle()
233 |             x_desc = cudnn.create_tensor_descriptor(_as4darray(x))
234 |             derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor()
235 |             libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value,
236 |                                               x_desc.value, self.mode)
237 |             one = numpy.array(1, dtype=dtype).ctypes
238 |             zero = numpy.array(0, dtype=dtype).ctypes
239 |             gx = cuda.cupy.empty_like(x)
240 |             ggamma = cuda.cupy.empty_like(gamma)
241 |             gbeta = cuda.cupy.empty_like(gamma)
242 |             libcudnn.batchNormalizationBackward(
243 |                 handle, self.mode, one.data, zero.data,
244 |                 one.data, zero.data, x_desc.value, x.data.ptr,
245 |                 x_desc.value, gy.data.ptr, x_desc.value, gx.data.ptr,
246 |                 derivedBnDesc.value, gamma.data.ptr,
247 |                 ggamma.data.ptr, gbeta.data.ptr,
248 |                 self.eps, self.mean_cache.data.ptr, self.var_cache.data.ptr)
249 |         else:
250 |             gbeta = gy.sum(axis=axis)
251 |             ggamma = (gy * self.x_hat).sum(axis=axis)
252 |             if xp is numpy:
253 |                 gx = (gamma / self.std)[expander] * (
254 |                     gy - (self.x_hat * ggamma[expander] + gbeta[expander]) / m)
255 |             else:
256 |                 inv_m = numpy.float32(1) / m
257 |                 gx = cuda.elementwise(
258 |                     'T gy, T x_hat, T gamma, T std, T ggamma, T gbeta, \
259 |                     T inv_m',
260 |                     'T gx',
261 |                     'gx = (gamma / std) * (gy - (x_hat * ggamma + gbeta) * \
262 |                     inv_m)',
263 |                     'bn_bwd')(gy, self.x_hat, gamma[expander],
264 |                               self.std[expander], ggamma[expander],
265 |                               gbeta[expander], inv_m)
266 |         return gx, ggamma, gbeta
267 | 
268 | 
269 | def batch_normalization(x, gamma, beta, eps=2e-5, running_mean=None,
270 |                         running_var=None, decay=0.9, use_cudnn=True):
271 |     """Batch normalization function.
272 | 
273 |     It takes the input variable ``x`` and two parameter variables ``gamma`` and
274 |     ``beta``. The input must have the batch size and the features (or channels)
275 |     as the first two dimensions of its shape. The input can have more than two
276 |     dimensions, where the remaining dimensions are considered as spatial
277 |     dimensions, which are considered as a part of the batch size. That is,
278 |     the total batch size will be considered to be the product of all
279 |     dimensions except the second dimension.
280 | 
281 |     Note: If this function is called, it will not be possible to access the
282 |     updated running mean and variance statistics, because they are members
283 |     of the function object, which cannot be accessed by the caller.
284 |     If it is desired to access the updated running statistics, it is necessary
285 |     to get a new instance of the function object, call the object, and then
286 |     access the running_mean and/or running_var attributes. See the
287 |     corresponding Link class for an example of how to do this.
288 | 
289 |     Args:
290 |         x (Variable): The input variable.
291 |         gamma (Variable): The scaling parameter of normalized data.
292 |         beta (Variable): The shifting parameter of scaled normalized data.
293 |         eps (float): Epsilon value for numerical stability.
294 |         running_mean (array): The running average of the mean. This is a
295 |             running average of the mean over several mini-batches using
296 |             the decay parameter. If ``None``, the running average is not
297 |             computed. If this is ``None``, then ``runnng_var`` must also
298 |             be ``None``.
299 |         running_var (array): The running average of the variance. This is a
300 |             running average of the variance over several mini-batches using
301 |             the decay parameter. If ``None``, the running average is not
302 |             computed. If this is ``None``, then ``running_mean`` must also
303 |             be ``None``.
304 |         decay (float): Decay rate of moving average. It is used during
305 |             training.
306 |         use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function
307 |             uses cuDNN as the core implementation.
308 | 
309 | 
310 |     See: `Batch Normalization: Accelerating Deep Network Training by Reducing\
311 |           Internal Covariate Shift <http://arxiv.org/abs/1502.03167>`_
312 | 
313 |     .. seealso:: :class:`links.BatchNormalization`
314 | 
315 |     """
316 | 
317 |     return BatchNormalizationFunction(eps, running_mean, running_var, True,
318 |                                       decay, use_cudnn)(x, gamma, beta)
319 | 
320 | 
321 | def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5,
322 |                               use_cudnn=True):
323 |     """Batch normalization function with fixed statistics.
324 | 
325 |     This is a variant of batch normalization, where the mean and variance
326 |     statistics are given by the caller as fixed variables. This is
327 |     used on testing mode of the batch normalization layer, where batch
328 |     statistics cannot be used for prediction consistency.
329 | 
330 |     Args:
331 |         x (Variable): The input variable.
332 |         gamma (Variable): The scaling parameter of normalized data.
333 |         beta (Variable): The shifting parameter of scaled normalized data.
334 |         mean (Variable): The shifting parameter of input.
335 |         var (Variable): The square of scaling parameter of input.
336 |         eps (float): Epsilon value for numerical stability.
337 |         use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function
338 |             uses cuDNN as the core implementation.
339 | 
340 |     .. seealso::
341 |        :func:`functions.batch_normalization`,
342 |        :class:`links.BatchNormalization`
343 | 
344 |     """
345 |     return BatchNormalizationFunction(eps, None, None, False, 0.0,
346 |                                       use_cudnn)(x, gamma, beta, mean, var)
347 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/gen_cpp_code_v3.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------
  2 | # gen_cpp_code_v3.py
  3 | # C++ code generator for a high-level synthesis toward an FPGA realization
  4 | #
  5 | # Creation Date   : 04/Aug./2017
  6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
  7 | # 
  8 | # Released under the GPL v2.0 License.
  9 | #
 10 | # -----------------------------------------------------------------------
 11 | 
 12 | #!/usr/bin/python
 13 | # coding: UTF-8
 14 | 
 15 | import argparse
 16 | import re
 17 | import pickle
 18 | 
 19 | parser = argparse.ArgumentParser(description='C++ code generator')
 20 | parser.add_argument('--config_path', '-c', type=str, default='./hoge',
 21 |                         help='Configuration pickle file path')
 22 | args = parser.parse_args()
 23 | 
 24 | # load configuration from guiness GUI
 25 | config_file = args.config_path + "/config.pickle"
 26 | with open(config_file, mode='rb') as f:
 27 | 	config = pickle.load(f)
 28 | 
 29 | initial_options = config['initial_options']
 30 | n_in_fmaps = config['n_in_fmaps']
 31 | n_ou_fmaps = config['n_ou_fmaps']
 32 | infmap_siz = config['infmap_siz']
 33 | ksiz = config['ksiz']
 34 | imgsiz = config['imgsiz']
 35 | max_dense_siz = config['max_dense_siz']
 36 | out_dense_siz = config['out_dense_siz']
 37 | bias_siz = config['bias_siz']
 38 | weight_siz = config['weight_siz']
 39 | max_bconv_width = config['max_bconv_width']
 40 | num_layer = config['num_layer']
 41 | numimg = int(n_in_fmaps[0])
 42 | 
 43 | #(SET_WEIGHT_MEM)
 44 | set_weight_mem = ''
 45 | set_bias_mem = ''
 46 | bconv_reg_pragma = ''
 47 | bconv_reg_select = ''
 48 | bconv_weight_select = ''
 49 | bconv_bias_select = ''
 50 | 
 51 | conv_idx = 0
 52 | bn_idx = 0
 53 | dense_idx = 0
 54 | offset_weight = 0
 55 | offset_bias = 0
 56 | 
 57 | #(DEF_CNN_LAYER)
 58 | from collections import Counter
 59 | def_cnn_layer = ''
 60 | 
 61 | bn_idx = 0
 62 | dense_idx = 0
 63 | counter = Counter(initial_options)
 64 | for layer_type, cnt in counter.items():
 65 | 	if layer_type == 0 and cnt > 0:
 66 | 		for i in range(len(initial_options)):
 67 | 			if initial_options[i] == 0:
 68 | 				def_cnn_layer += '            case %d:\n' % i
 69 | 		def_cnn_layer += '            int_conv2d_layer<bit_64, bit_%d, 64, %d, %d, %d>\n            ( in_img, fb_tmp, conv0W, b0_BNFb);\n            break;\n' % (max_bconv_width,int(n_ou_fmaps[0]),int(infmap_siz[0]),int(infmap_siz[0]))
 70 | 
 71 | 	elif layer_type == 1 and cnt > 0:
 72 | 		for i in range(len(initial_options)):
 73 | 			if initial_options[i] == 1:
 74 | 				def_cnn_layer += '            case %d:\n' % i
 75 | 		def_cnn_layer += '            bin_conv2d_pipeline(fb_tmp,bin_layer_idx,fsize[layer],n_in[layer],n_out[layer]);\n            bin_layer_idx++;\n            break;\n'
 76 | 
 77 | 	elif layer_type == 2 and cnt > 0:
 78 | 		for i in range(len(initial_options)):
 79 | 			if initial_options[i] == 2:
 80 | 				def_cnn_layer += '            case %d:\n' % i
 81 | 		def_cnn_layer += '            max_pooling_layer<bit_%d, %d, %d>(fb_tmp);\n            break;\n' % (max_bconv_width,int(imgsiz),int(infmap_siz[i]))
 82 | 
 83 | 	elif layer_type == 3 and cnt > 0:
 84 | 		for i in range(len(initial_options)):
 85 | 			if initial_options[i] == 3:
 86 | 				def_cnn_layer += '            case %d:\n' % i
 87 | 				def_cnn_layer += '            {\n'
 88 | 				def_cnn_layer += '                ap_int<%d>mask = 0x1;\n' % int(n_in_fmaps[i])
 89 | 				def_cnn_layer += '                for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
 90 | 				def_cnn_layer += '                	ap_int<11> tmp = 0;\n'
 91 | 				def_cnn_layer += '                	for( y = 0; y < %d; y++){\n' % int(infmap_siz[i])
 92 | 				def_cnn_layer += '                		for( x = 0; x < %d; x++){\n' % int(infmap_siz[i])
 93 | 				def_cnn_layer += '                			if( (fb_tmp[y][x] & mask) != 0)\n'
 94 | 				def_cnn_layer += '                				tmp++;\n'
 95 | 				def_cnn_layer += '                		}\n'
 96 | 				def_cnn_layer += '                	}\n'
 97 | 				def_cnn_layer += '                	if( tmp >= %d*%d/2)\n' % (int(infmap_siz[i]),int(infmap_siz[i]))
 98 | 				def_cnn_layer += '                		fc_tmp[of] = 1;\n'
 99 | 				def_cnn_layer += '                	else\n'
100 | 				def_cnn_layer += '                		fc_tmp[of] = 0;\n'
101 | 				def_cnn_layer += '                	mask = mask << 1;\n'
102 | 				def_cnn_layer += '                }\n                }\n            break;\n'
103 | 	
104 | 	elif layer_type == 4 and cnt > 0:
105 | 		for i in range(len(initial_options)):
106 | 			if initial_options[i] == 4:
107 | 				def_cnn_layer += '            case %d:\n' % i
108 | 				def_cnn_layer += '            fc_layer< %d, %d>( fc_tmp, fc%dW, b%d_BNFb, fc_result);\n            break;\n' % (int(n_ou_fmaps[i]),int(n_in_fmaps[i]),dense_idx,bn_idx)
109 | 				bn_idx += 1
110 | 				dense_idx += 1
111 | 			elif initial_options[i] == 0 or initial_options[i] == 1:
112 | 				bn_idx += 1
113 | 
114 | def_cnn_layer += '            default: break;\n'
115 | 
116 | #(DEF_CNN_PARAMETER)
117 | def_cnn_parameter = '    int fsize[%d] = {' % (len(initial_options))
118 | for i in range(len(initial_options)):
119 | 	if i != 0:
120 | 		def_cnn_parameter += ','
121 | 	def_cnn_parameter += '%3d' % int(infmap_siz[i])
122 | def_cnn_parameter += '};\n'
123 | def_cnn_parameter += '    int n_in[%d]  = {' % (len(initial_options))
124 | for i in range(len(initial_options)):
125 | 	if i != 0:
126 | 		def_cnn_parameter += ','
127 | 	def_cnn_parameter += '%3d' % int(n_in_fmaps[i])
128 | def_cnn_parameter += '};\n'
129 | def_cnn_parameter += '    int n_out[%d] = {' % (len(initial_options))
130 | for i in range(len(initial_options)):
131 | 	if i != 0:
132 | 		def_cnn_parameter += ','
133 | 	def_cnn_parameter += '%3d' % int(n_ou_fmaps[i])
134 | def_cnn_parameter += '};\n'
135 | 
136 | #(BCONV_REG_SELECT)
137 | #(BCONV_WEIGHT_SELECT)
138 | #(BCONV_BIAS_SELECT)
139 | conv_idx = 0
140 | for i in range(len(initial_options)):
141 | 	if initial_options[i] == 0:
142 | 		conv_idx += 1
143 | 	if initial_options[i] == 1:
144 | 		bconv_reg_select += '        case  %d:\n' % (conv_idx)
145 | 		bconv_reg_select += '        shift_reg1[ 2 * (%d+2) + 3 - 1] = din;\n' % (int(infmap_siz[i]))
146 | 		bconv_reg_select += '        padding_shift_reg[ 2 * (%d+2) + 3 - 1] = padding; break;\n' % (int(infmap_siz[i]))
147 | 		bconv_reg_select += '        break;\n'
148 | 
149 | 		bconv_weight_select += '                        case %d:\n' % conv_idx
150 | 		bconv_weight_select += '                            bx = shift_reg1[ky * (%d+2) + kx];\n' % int(infmap_siz[i])
151 | 		bconv_weight_select += '                            bw = (ap_uint<%d>)conv%dW[ofeat][ky*3+kx];\n' % (max_bconv_width,conv_idx)
152 | 		bconv_weight_select += '                            mask = ~(~allzero << %d);\n' % int(n_in_fmaps[i])
153 | 		bconv_weight_select += '                            is_padding = padding_shift_reg[ky * (%d+2) + kx];\n' % int(infmap_siz[i])
154 | 		bconv_weight_select += '                        break;\n'
155 | 
156 | 		bconv_bias_select += '            	case %d:  bias = b%d_BNFb[ofeat]; break;\n' % (conv_idx,conv_idx)
157 | 
158 | 		conv_idx += 1
159 | bconv_reg_select += '        default: break;\n'
160 | bconv_weight_select += '                        default: break;\n'
161 | bconv_bias_select += '            	default: break;\n'
162 | 
163 | #(BCONV_REG_PRAGMA)
164 | conv_idx = 0
165 | for i in range(len(initial_options)):
166 | 	if initial_options[i] == 0:
167 | 		conv_idx += 1
168 | 	if initial_options[i] == 1:
169 | 		bconv_reg_pragma += '    #pragma HLS ARRAY_PARTITION variable=conv%dW cyclic factor=9 dim=2\n' % conv_idx
170 | 		conv_idx += 1
171 | 
172 | conv_idx = 0
173 | bn_idx = 0
174 | dense_idx = 0
175 | 
176 | #(READ_WEIGHT_MEM)
177 | 
178 | 
179 | #(READ_BIAS_MEM)
180 | read_bias_mem = ''
181 | read_weight_mem = ''
182 | 
183 | def_weight_mem = ''
184 | def_bias_mem = ''
185 | 
186 | for i in range(len(initial_options)):
187 | 	if initial_options[i] == 0 or initial_options[i] == 1:
188 | 		set_weight_mem += '    printf("load conv%dW\\n");\n' % conv_idx
189 | 		set_weight_mem += '    offset = %d;\n' % offset_weight
190 | 		set_weight_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
191 | 		set_weight_mem += '        for( y = 0; y < 3; y++){\n'
192 | 		set_weight_mem += '            for( x = 0; x < 3; x++){\n'
193 | 		set_weight_mem += '                ap_uint<%d>tmp = 0x1;\n' % int(n_in_fmaps[i])
194 | 		set_weight_mem += '                for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i])
195 | 		set_weight_mem += '                     if( t_bin_convW[of*%d*3*3+inf*3*3+y*3+x+offset] == 1){\n' % int(n_in_fmaps[i])
196 | 		set_weight_mem += '                         conv%dW[of][y*3+x] |= tmp;\n' % conv_idx
197 | 		set_weight_mem += '                     }\n'
198 | 		set_weight_mem += '                tmp = tmp << 1;\n'
199 | 		set_weight_mem += '                }\n'
200 | 		set_weight_mem += '            }\n'
201 | 		set_weight_mem += '        }\n'
202 | 		set_weight_mem += '    }\n'
203 | 
204 | 		set_bias_mem += '    printf("load b%d_BNFb\\n");\n' % bn_idx
205 | 		set_bias_mem += '    offset = %d;\n' % offset_bias
206 | 		set_bias_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
207 | 		set_bias_mem += '        b%d_BNFb[of] = t_BNFb[of+offset];\n' % bn_idx
208 | 		set_bias_mem += '    }\n'
209 | 
210 | 		read_weight_mem += '    printf("conv%dW.txt\\n");\n' % conv_idx
211 | 		read_weight_mem += '    if( (fp = fopen("conv%dW.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % conv_idx
212 | 		read_weight_mem += '    offset = %d;\n' % offset_weight
213 | 		read_weight_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
214 | 		read_weight_mem += '        for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i])
215 | 		read_weight_mem += '            for( y = 0; y < 3; y++){\n'
216 | 		read_weight_mem += '                for( x = 0; x < 3; x++){\n'
217 | 		read_weight_mem += '                    if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n"); sscanf( line, "%d", &d_value);\n'
218 | 		read_weight_mem += '                    t_bin_convW[of*%d*3*3+inf*3*3+y*3+x+offset] = d_value;\n' % int(n_in_fmaps[i])
219 | 		read_weight_mem += '                }\n'
220 | 		read_weight_mem += '            }\n'
221 | 		read_weight_mem += '        }\n'
222 | 		read_weight_mem += '    }\n'
223 | 		read_weight_mem += '    fclose(fp);\n'
224 | 
225 | 		read_bias_mem += '    printf("b%d_BNFb.txt\\n");\n' % bn_idx
226 | 		read_bias_mem += '    if( (fp = fopen("b%d_BNFb.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % bn_idx
227 | 		read_bias_mem += '    offset = %d;\n' % offset_bias
228 | 		read_bias_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
229 | 		read_bias_mem += '        if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n");\n'
230 | 		read_bias_mem += '        sscanf( line, "%d", &d_value);\n'
231 | 		read_bias_mem += '        t_BNFb[of+offset] = d_value;\n'
232 | 		read_bias_mem += '    }\n'
233 | 		read_bias_mem += '    fclose(fp);\n'
234 | 
235 | 		def_weight_mem += 'ap_int<%d>  conv%dW[%d][3*3];\n' % (int(n_in_fmaps[i]),conv_idx,int(n_ou_fmaps[i]))
236 | 		if initial_options[i] == 0:
237 | 			def_bias_mem += 'ap_int<20> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i]))
238 | 		else:
239 | 			def_bias_mem += 'ap_int<16> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i]))
240 | 
241 | 		conv_idx += 1
242 | 		bn_idx += 1
243 | 		offset_weight += (int(n_in_fmaps[i]) * int(n_ou_fmaps[i]) * 3 * 3)
244 | 		offset_bias += int(n_ou_fmaps[i])
245 | 	elif initial_options[i] == 4:
246 | 		set_weight_mem += '    printf("load fc%dW\\n");\n' % dense_idx
247 | 		set_weight_mem += '    offset = %d;\n' % offset_weight
248 | 		set_weight_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
249 | 		set_weight_mem += '        for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i])
250 | 		set_weight_mem += '            fc%dW[of][inf] = (ap_int<1>)t_bin_convW[of*%d+inf+offset];\n' % (dense_idx,int(n_in_fmaps[i]))
251 | 		set_weight_mem += '        }\n'
252 | 		set_weight_mem += '    }\n'
253 | 
254 | 		set_bias_mem += '    printf("load b%d_BNFb\\n");\n' % bn_idx
255 | 		set_bias_mem += '    offset = %d;\n' % offset_bias
256 | 		set_bias_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
257 | 		set_bias_mem += '        b%d_BNFb[of] = t_BNFb[of+offset];\n' % bn_idx
258 | 		set_bias_mem += '    }\n'
259 | 
260 | 		read_weight_mem += '    printf("fc%dW.txt\\n");\n' % dense_idx
261 | 		read_weight_mem += '    if( (fp = fopen("fc%dW.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % dense_idx
262 | 		read_weight_mem += '    offset = %d;\n' % offset_weight
263 | 		read_weight_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
264 | 		read_weight_mem += '        for( inf = 0; inf < %d; inf++){\n' % int(n_in_fmaps[i])
265 | 		read_weight_mem += '            if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n"); sscanf( line, "%d", &d_value);\n'
266 | 		read_weight_mem += '            t_bin_convW[of*%d+inf+offset] = d_value;\n' % int(n_in_fmaps[i])
267 | 		read_weight_mem += '        }\n'
268 | 		read_weight_mem += '    }\n'
269 | 		read_weight_mem += '    fclose(fp);\n'
270 | 
271 | 		read_bias_mem += '    printf("b%d_BNFb.txt\\n");\n' % bn_idx
272 | 		read_bias_mem += '    if( (fp = fopen("b%d_BNFb.txt", "r")) == NULL)fprintf(stderr,"CANNOT OPEN\\n");\n' % bn_idx
273 | 		read_bias_mem += '    offset = %d;\n' % offset_bias
274 | 		read_bias_mem += '    for( of = 0; of < %d; of++){\n' % int(n_ou_fmaps[i])
275 | 		read_bias_mem += '        if( fgets( line, 256, fp) == NULL)fprintf(stderr,"EMPTY FILE READ\\n");\n'
276 | 		read_bias_mem += '        sscanf( line, "%d", &d_value);\n'
277 | 		read_bias_mem += '        t_BNFb[of+offset] = d_value;\n'
278 | 		read_bias_mem += '    }\n'
279 | 		read_bias_mem += '    fclose(fp);\n'
280 | 
281 | 		def_weight_mem += 'ap_int<1>  fc%dW[%d][%d];\n' % (dense_idx,int(n_ou_fmaps[i]),int(n_in_fmaps[i]))
282 | 		def_bias_mem += 'ap_int<16> b%d_BNFb[%d];\n' % (bn_idx,int(n_ou_fmaps[i]))
283 | 
284 | 
285 | 		dense_idx += 1
286 | 		bn_idx += 1
287 | 		offset_weight += (int(n_in_fmaps[i]) * int(n_ou_fmaps[i]))
288 | 		offset_bias += int(n_ou_fmaps[i])
289 | 
290 | # Check # of f.maps
291 | bin_xor_mac = 'bxor = (ap_uint<(MAX_BCONV_WIDTH)>)(bx ^ bw);'
292 | for i in range(len(initial_options) - 2):
293 | 	if int(n_in_fmaps[i+1]) != int(n_in_fmaps[i+2]):
294 | 		bin_xor_mac = 'bxor = (ap_uint<(MAX_BCONV_WIDTH)>)(bx ^ bw) & mask;'
295 | 
296 | # generate C++ code for a binarized CNN ------------------------------------
297 | f = open('template_cpp_r7_bcnn.cpp')
298 | lines2 = f.readlines()
299 | f.close()
300 | 
301 | cpp_file = ''
302 | 
303 | for line in lines2:
304 |     converted = line.replace("(BIAS_SIZ)",str(bias_siz))
305 |     converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac)
306 |     converted = converted.replace("(KSIZ)",str(ksiz))
307 |     converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz))
308 |     converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz))
309 |     converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz))
310 |     converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width))
311 |     converted = converted.replace("(NUM_LAYER)",str(num_layer))
312 |     converted = converted.replace("(IMGSIZ)",str(imgsiz))
313 |     converted = converted.replace("(NUMIMG)",str(numimg))
314 | 
315 |     converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma)
316 |     converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select)
317 |     converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select)
318 |     converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select)
319 |     converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter)
320 |     converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer)
321 |     converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem)
322 |     converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem)
323 |     converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem)
324 |     converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem)
325 |     converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem)
326 |     converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem)
327 | 
328 |     cpp_file += converted
329 |     
330 | cnn_file = args.config_path + "/sdsoc/cnn.cpp"
331 | with open(cnn_file,'w') as f:
332 | 	f.write(cpp_file)
333 | 
334 | # generate C++ main code ---------------------------------------------------
335 | f = open('template_cpp_r7_main.cpp')
336 | lines2 = f.readlines()
337 | f.close()
338 | 
339 | cpp_file = ''
340 | 
341 | for line in lines2:
342 |     converted = line.replace("(BIAS_SIZ)",str(bias_siz))
343 |     converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac)
344 |     converted = converted.replace("(KSIZ)",str(ksiz))
345 |     converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz))
346 |     converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz))
347 |     converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz))
348 |     converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width))
349 |     converted = converted.replace("(NUM_LAYER)",str(num_layer))
350 |     converted = converted.replace("(IMGSIZ)",str(imgsiz))
351 |     converted = converted.replace("(NUMIMG)",str(numimg))
352 | 
353 |     converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma)
354 |     converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select)
355 |     converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select)
356 |     converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select)
357 |     converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter)
358 |     converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer)
359 |     converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem)
360 |     converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem)
361 |     converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem)
362 |     converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem)
363 |     converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem)
364 |     converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem)
365 | 
366 |     cpp_file += converted
367 |     
368 | cnn_file = args.config_path + "/sdsoc/main.cpp"
369 | with open(cnn_file,'w') as f:
370 | 	f.write(cpp_file)
371 | 
372 | # generate C++ main code including a socket communication via an Ethernet ------------
373 | f = open('template_cpp_r7_socket_main.cpp')
374 | lines2 = f.readlines()
375 | f.close()
376 | 
377 | cpp_file = ''
378 | 
379 | for line in lines2:
380 |     converted = line.replace("(BIAS_SIZ)",str(bias_siz))
381 |     converted = converted.replace("(BIN_XOR_MAC)",bin_xor_mac)
382 |     converted = converted.replace("(KSIZ)",str(ksiz))
383 |     converted = converted.replace("(MAX_DENSE_SIZ)",str(max_dense_siz))
384 |     converted = converted.replace("(OUT_DENSE_SIZ)",str(out_dense_siz))
385 |     converted = converted.replace("(WEIGHT_SIZ)",str(weight_siz))
386 |     converted = converted.replace("(MAX_BCONV_WIDTH)",str(max_bconv_width))
387 |     converted = converted.replace("(NUM_LAYER)",str(num_layer))
388 |     converted = converted.replace("(IMGSIZ)",str(imgsiz))
389 |     converted = converted.replace("(NUMIMG)",str(numimg))
390 | 
391 |     converted = converted.replace("(BCONV_REG_PRAGMA)",bconv_reg_pragma)
392 |     converted = converted.replace("(BCONV_REG_SELECT)",bconv_reg_select)
393 |     converted = converted.replace("(BCONV_BIAS_SELECT)",bconv_bias_select)
394 |     converted = converted.replace("(BCONV_WEIGHT_SELECT)",bconv_weight_select)
395 |     converted = converted.replace("(DEF_CNN_PARAMETER)",def_cnn_parameter)
396 |     converted = converted.replace("(DEF_CNN_LAYER)",def_cnn_layer)
397 |     converted = converted.replace("(DEF_BIAS_MEM)",def_bias_mem)
398 |     converted = converted.replace("(DEF_WEIGHT_MEM)",def_weight_mem)
399 |     converted = converted.replace("(SET_BIAS_MEM)",set_bias_mem)
400 |     converted = converted.replace("(SET_WEIGHT_MEM)",set_weight_mem)
401 |     converted = converted.replace("(READ_BIAS_MEM)",read_bias_mem)
402 |     converted = converted.replace("(READ_WEIGHT_MEM)",read_weight_mem)
403 | 
404 |     cpp_file += converted
405 | 
406 | cnn_file = args.config_path + "/sdsoc/socket_main.cpp"
407 | with open(cnn_file,'w') as f:
408 | 	f.write(cpp_file)
409 | 
410 | ###########################################################################################
411 | # END OF PROGRAM
412 | ###########################################################################################
413 | 


--------------------------------------------------------------------------------
/guinness.py:
--------------------------------------------------------------------------------
   1 | # -----------------------------------------------------------------------
   2 | # guinness.py
   3 | # A GUI based Neural NEtwork SyntheSizer for an FPGA deep learning
   4 | #
   5 | # Creation Date   : 04/Aug./2017
   6 | # Copyright (C) <2017> Hiroki Nakahara, All rights reserved.
   7 | # 
   8 | # Released under the GPL v2.0 License.
   9 | # 
  10 | # Acknowledgements:
  11 | # This source code is based on following projects:
  12 | #
  13 | # Chainer binarized neural network by Daisuke Okanohara
  14 | # https://github.com/hillbig/binary_net
  15 | # Various CNN models including Deep Residual Networks (ResNet) 
  16 | #  for CIFAR10 with Chainer by mitmul
  17 | # https://github.com/mitmul/chainer-cifar10
  18 | # -----------------------------------------------------------------------
  19 | 
  20 | import sys,random,time,os
  21 | from PyQt4 import QtGui, QtCore
  22 | from matplotlib.backends.backend_qt4agg import FigureCanvasQTAgg as FigureCanvas
  23 | from matplotlib.figure import Figure
  24 | import numpy as np
  25 | from subprocess import check_call
  26 | import pickle
  27 | import subprocess
  28 | #import seaborn as sns # this is optional...
  29 | import shutil
  30 | 
  31 | #global variables
  32 | n_dim = 3 # the number of dimensions for the first layer (BGR format)
  33 | img_siz = 32 # default input image size
  34 | n_class = 10 # default the number of classes to be inferenced
  35 | is_load_pretrain = 0
  36 | 
  37 | class Layout(QtGui.QWidget):
  38 |     def __init__(self):
  39 |         super(Layout,self).__init__()
  40 | 
  41 |         global is_load_pretrain
  42 |         global n_dim # BGR color image
  43 |         global img_siz # 32x32 image
  44 |         global n_class # #classes
  45 |         
  46 |         is_load_pretrain = 0
  47 |         n_dim = 3
  48 |         img_siz = 32
  49 |         n_class = 10
  50 | 
  51 |         self.setMyself()
  52 |         self.set_project_name()
  53 |         self.show()
  54 | 
  55 |     def setMyself(self):
  56 |         self.setGeometry(50,50,1100,600)
  57 |         self.setWindowTitle("GUINNESS: A GUI based Neural NEtwork SyntheSizer")
  58 | 
  59 |     def set_project_name(self):
  60 |         ##################################################################
  61 |         # Left Column
  62 |         ##################################################################
  63 |         vbox_left_column = QtGui.QVBoxLayout()
  64 | #        vbox_left_column.setGeometry(QtCore.QRect(0,0,800,24))
  65 | 
  66 |         # project setup --------------------------------------------------
  67 |         project_setup_box = QtGui.QGroupBox("1. Project Setup")
  68 |         project = QtGui.QLabel('Project Name')
  69 |         self.projectEdit = QtGui.QLineEdit()
  70 |         self.projectEdit.setText('Project1')
  71 | 
  72 |         hbox = QtGui.QHBoxLayout()
  73 |         hbox.addWidget(project)
  74 |         hbox.addWidget(self.projectEdit)
  75 | 
  76 |         vbox_proj = QtGui.QVBoxLayout()
  77 |         vbox_proj.addLayout(hbox)
  78 | 
  79 |         ProjSaveButton = QtGui.QPushButton("SAVE")
  80 |         self.connect(ProjSaveButton,QtCore.SIGNAL('clicked()'),self.SaveProj)
  81 |         ProjLoadButton = QtGui.QPushButton("LOAD")
  82 |         self.connect(ProjLoadButton,QtCore.SIGNAL('clicked()'),self.LoadProj)
  83 |         hbox_proj = QtGui.QHBoxLayout()
  84 |         hbox_proj.addWidget(ProjSaveButton)
  85 |         hbox_proj.addWidget(ProjLoadButton)
  86 |         vbox_proj.addLayout(hbox_proj)
  87 | 
  88 |         project_setup_box.setLayout(vbox_proj)
  89 |         vbox_left_column.addWidget(project_setup_box)
  90 | 
  91 |         # cnn setup table ------------------------------------------------
  92 |         cnn_setup_box = QtGui.QGroupBox("2. CNN Specificaion")
  93 | 
  94 |         vbox_cnn = QtGui.QVBoxLayout()
  95 | 
  96 |         cnntype = QtGui.QLabel('Type')
  97 |         self.combo1 = QtGui.QComboBox()
  98 |         self.combo1.addItem("LeNet5")
  99 |         self.combo1.addItem("TinyCNN")
 100 |         self.combo1.addItem("VGG9ave")
 101 |         self.combo1.addItem("VGG11ave")
 102 |         self.combo1.addItem("VGG16ave")
 103 |         self.combo1.addItem("VGG19ave")
 104 |         LoadButton = QtGui.QPushButton("LOAD CONFIG")
 105 |         self.connect(LoadButton,QtCore.SIGNAL('clicked()'),self.LoadConfig)
 106 |         hbox2 = QtGui.QHBoxLayout()
 107 |         hbox2.addWidget(cnntype)
 108 |         hbox2.addWidget(self.combo1)
 109 |         hbox2.addWidget(LoadButton)
 110 | 
 111 |         vbox_cnn.addLayout(hbox2)
 112 | 
 113 |         self.table = QtGui.QTableWidget()
 114 |         self.table.setColumnCount(5)
 115 | 
 116 |         labels = ["Type","In #Fmaps","Out #Fmaps","In Fsiz","Train?"]
 117 |         self.table.setHorizontalHeaderLabels(labels);
 118 |         self.table.setColumnWidth(0, 90);
 119 |         self.table.setColumnWidth(1, 80);
 120 |         self.table.setColumnWidth(2, 80);
 121 |         self.table.setColumnWidth(3, 50);
 122 |         self.table.setColumnWidth(4, 50);
 123 | 
 124 |         self.LoadConfig()
 125 | 
 126 |         self.table.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
 127 |         self.table.customContextMenuRequested.connect(self.contextMenu_)
 128 | 
 129 |         vbox_cnn.addWidget(self.table)
 130 |         cnn_setup_box.setLayout(vbox_cnn)
 131 |         vbox_left_column.addWidget(cnn_setup_box)
 132 | 
 133 |         ##################################################################
 134 |         # Right Column
 135 |         ##################################################################
 136 |         vbox_right_column = QtGui.QVBoxLayout()
 137 | 
 138 |         training_setup_box = QtGui.QGroupBox("3. Training")
 139 |         vbox_training = QtGui.QVBoxLayout()
 140 |         # parameters for traning -----------------------------------------
 141 |         # training data
 142 |         tdlabel = QtGui.QLabel('Training Data')
 143 |         ld_button = QtGui.QPushButton("Load")
 144 |         ld_button.clicked.connect(self.open_FileDialog)
 145 |         self.td_label = QtGui.QLineEdit("image.pkl")
 146 | 
 147 |         hbox_td = QtGui.QHBoxLayout()
 148 |         hbox_td.addWidget(tdlabel) 
 149 |         hbox_td.addWidget(ld_button) 
 150 |         hbox_td.addWidget(self.td_label)
 151 |         vbox_training.addLayout(hbox_td)
 152 | 
 153 |         # training label
 154 |         tllabel = QtGui.QLabel('Training Label')
 155 |         ll_button = QtGui.QPushButton("Load")
 156 |         ll_button.clicked.connect(self.open_FileDialog_tl)
 157 |         self.tl_label = QtGui.QLineEdit("label.pkl") 
 158 | 
 159 |         hbox_tl = QtGui.QHBoxLayout()
 160 |         hbox_tl.addWidget(tllabel) 
 161 |         hbox_tl.addWidget(ll_button) 
 162 |         hbox_tl.addWidget(self.tl_label)
 163 |         vbox_training.addLayout(hbox_tl)
 164 | 
 165 |         # # of training
 166 |         n_trains = QtGui.QLabel('Number of traning')
 167 |         self.n_trains_Edit = QtGui.QLineEdit()
 168 |         self.n_trains_Edit.setText("10")
 169 | 
 170 |         hbox_ntrain = QtGui.QHBoxLayout()
 171 |         hbox_ntrain.addWidget(n_trains)
 172 |         hbox_ntrain.addWidget(self.n_trains_Edit)
 173 | 
 174 |         vbox_training.addLayout(hbox_ntrain)
 175 | 
 176 |         # optimizer
 177 |         hbox3 = QtGui.QHBoxLayout()
 178 |         cnntype = QtGui.QLabel('Optimizer')        
 179 |         self.b11=QtGui.QRadioButton("SGD")
 180 |         self.b11.setChecked(True)
 181 |         self.b12=QtGui.QRadioButton("Adam")
 182 |         bg1=QtGui.QButtonGroup()
 183 |         bg1.addButton(self.b11)
 184 |         bg1.addButton(self.b12)
 185 |         hbox3.addWidget(cnntype)
 186 |         hbox3.addWidget(self.b11)
 187 |         hbox3.addWidget(self.b12)
 188 | 
 189 |         vbox_training.addLayout(hbox3)
 190 | 
 191 |         # Use GPU?
 192 |         self.cb = QtGui.QCheckBox('Use GPU')
 193 |         self.cb.setChecked(True)
 194 |         vbox_training.addWidget(self.cb)
 195 | 
 196 |         # message
 197 |         train_process = QtGui.QLabel('Training Process View')
 198 |         vbox_training.addWidget(train_process)
 199 | 
 200 |         # matplotlib
 201 |         self.canvas = Canvas()
 202 | 
 203 |         self.canvas.refresh(int(self.n_trains_Edit.text()))
 204 | 
 205 |         vbox_training.addWidget(self.canvas)
 206 |         
 207 |         # training button
 208 |         hbox_control = QtGui.QHBoxLayout()
 209 |         self.bstart=QtGui.QPushButton("Start Training")
 210 |         bg1.addButton(self.bstart)
 211 |         self.bstart.clicked.connect(self.start_training)
 212 |         bstop=QtGui.QPushButton("Stop Training")
 213 |         bstop.setVisible(False)
 214 |         bg1.addButton(bstop)
 215 |         hbox_control.addWidget(self.bstart)
 216 |         hbox_control.addWidget(bstop)
 217 | 
 218 |         vbox_training.addLayout(hbox_control)
 219 |         training_setup_box.setLayout(vbox_training)
 220 |         vbox_right_column.addWidget(training_setup_box)
 221 | 
 222 |         # FPGA implementation ------------------------------------------------
 223 |         # Select fpga board
 224 |         fpga_setup_box = QtGui.QGroupBox("4. C/C++ Code Generation for FPGA Implementation")
 225 |         vbox_fpga = QtGui.QVBoxLayout()
 226 | 
 227 |         fpgaboard = QtGui.QLabel('Target FPGA Board')
 228 |         self.combo2 = QtGui.QComboBox()
 229 |         self.combo2.addItem("zed")
 230 |         self.combo2.addItem("zybo")
 231 |         self.combo2.addItem("zc702")
 232 |         self.combo2.addItem("zcu102")
 233 |         hbox3 = QtGui.QHBoxLayout()
 234 |         hbox3.addWidget(fpgaboard)
 235 |         hbox3.addWidget(self.combo2)
 236 | 
 237 |         vbox_fpga.addLayout(hbox3)
 238 | 
 239 | #        # Setup Clock Frequency
 240 | #        clkfreq = QtGui.QLabel('Clock Frequency (MHz)')
 241 | #        combo3 = QtGui.QComboBox()
 242 | #        combo3.addItem("100.0")
 243 | #        combo3.addItem("147.6")
 244 | #        combo3.addItem("150.0")
 245 | #        combo3.addItem("200.0")
 246 | #        hbox4 = QtGui.QHBoxLayout()
 247 | #        hbox4.addWidget(clkfreq)
 248 | #        hbox4.addWidget(combo3)
 249 | #
 250 | #        vbox_fpga.addLayout(hbox4)
 251 | 
 252 |         # Run Bitstream Generation
 253 | #        bstart_bitgen=QtGui.QPushButton("Generate Bitstream")
 254 |         bstart_bitgen=QtGui.QPushButton("Generate C/C++ Code")
 255 |         bg1.addButton(bstart_bitgen)
 256 |         bstart_bitgen.clicked.connect(self.start_bitgen)
 257 | 
 258 |         vbox_fpga.addWidget(bstart_bitgen)
 259 | 
 260 |         fpga_setup_box.setLayout(vbox_fpga)
 261 |         vbox_right_column.addWidget(fpga_setup_box)
 262 | 
 263 |         # -------------------------------------------------------
 264 |         # overall layout
 265 |         # -------------------------------------------------------
 266 |         hbox_global = QtGui.QHBoxLayout()
 267 |         hbox_global.addLayout(vbox_left_column)
 268 |         hbox_global.addLayout(vbox_right_column)
 269 | 
 270 |         self.setLayout(hbox_global)
 271 | 
 272 |     # -----------------------------------------------------------
 273 |     # Context Menu for the CNN configuration table
 274 |     # -----------------------------------------------------------
 275 |     def contextMenu_(self, event):
 276 |         menu = QtGui.QMenu()
 277 |         addAction = menu.addAction('Add layer',)
 278 |         delAction = menu.addAction('Delete layer',)
 279 | 
 280 |         action = menu.exec_(QtGui.QCursor.pos())
 281 | 
 282 |         initial_options = []
 283 |         n_in_fmaps = []
 284 |         n_ou_fmaps = []
 285 |         infmap_siz = []
 286 | 
 287 |         for i in range(self.table.rowCount()):
 288 |             itm1 = self.table.cellWidget(i,0)
 289 |             itm2 = self.table.item(i,1)
 290 |             itm3 = self.table.item(i,2) 
 291 |             itm4 = self.table.item(i,3) 
 292 |             val1 = itm1.currentIndex()
 293 |             val2 = str(itm2.text())
 294 |             val3 = str(itm3.text())
 295 |             val4 = str(itm4.text())
 296 | 
 297 |             initial_options.append(val1)
 298 |             n_in_fmaps.append(val2)
 299 |             n_ou_fmaps.append(val3)
 300 |             infmap_siz.append(val4)
 301 | 
 302 |         if action == addAction:
 303 |             initial_options.insert(self.table.currentRow(),1)
 304 |             n_in_fmaps.insert(self.table.currentRow(),'0')
 305 |             n_ou_fmaps.insert(self.table.currentRow(),'0')
 306 |             infmap_siz.insert(self.table.currentRow(),'0')
 307 | 
 308 |         elif action == delAction:
 309 |             initial_options.pop(self.table.currentRow())
 310 |             n_in_fmaps.pop(self.table.currentRow())
 311 |             n_ou_fmaps.pop(self.table.currentRow())
 312 |             infmap_siz.pop(self.table.currentRow())
 313 | 
 314 |         self.table.setRowCount(len(initial_options))
 315 |         for index in range(len(initial_options)):
 316 |             combo = QtGui.QComboBox()
 317 |             for t in self.combo_box_options:
 318 |                 combo.addItem(t)
 319 |             combo.setCurrentIndex(initial_options[index])
 320 |             self.table.setCellWidget(index,0,combo)
 321 |             item1 = QtGui.QTableWidgetItem(n_in_fmaps[index])
 322 |             self.table.setItem(index,1,item1)
 323 |             item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index])
 324 |             self.table.setItem(index,2,item2)
 325 |             item3 = QtGui.QTableWidgetItem(infmap_siz[index])
 326 |             self.table.setItem(index,3,item3)
 327 | 
 328 |             item4 = QtGui.QCheckBox('')
 329 |             item4.setChecked(True) # isChecked() == True?False?
 330 |             self.table.setCellWidget(index,4,item4)
 331 | 
 332 |     # -----------------------------------------------------------------------
 333 |     # Performe Training
 334 |     #  First, generate customized net.py
 335 |     #   then, call external trainer.py
 336 |     #  During training, the GUI plots traning process
 337 |     # -----------------------------------------------------------------------
 338 |     def start_training(self):
 339 |         # remove temporary logfile, if new traning start
 340 |         global is_load_pretrain
 341 |         if is_load_pretrain == 0 and os.path.exists("./temp_log.csv") == True:
 342 |             print("CLEARN UP LOGFILE")
 343 | #            os.remove("temp_log.csv")
 344 | 
 345 |         # generate CNN python code (this version only supports chainer 1.21-24.0)
 346 |         print("[INFO] GENERATE PYTHON CODE FOR CNN")
 347 |         f = open('header.txt')
 348 |         pcode = f.read()
 349 |         pcode += '\n'
 350 |         f.close()
 351 | 
 352 |         conv_idx = 0
 353 |         bn_idx = 0
 354 |         dense_idx = 0
 355 |         for i in range(self.table.rowCount()):
 356 |             itm1 = self.table.cellWidget(i,0)
 357 |             itm2 = self.table.item(i,1)
 358 |             itm3 = self.table.item(i,2) 
 359 |             itm4 = self.table.item(i,3) 
 360 |             val1 = int(itm2.text())
 361 |             val2 = int(itm3.text())
 362 |             val3 = int(itm4.text())
 363 |             
 364 |             if itm1.currentText() == 'Conv(Int)':
 365 |                 pcode += '            conv%d=IC.Convolution2D(%d,%d,3, stride=1, pad=1, nobias=True),\n' % (conv_idx,val1,val2)
 366 |                 pcode += '            b%d=L.BatchNormalization(%d)' % (bn_idx,val2)
 367 |                 conv_idx += 1
 368 |                 bn_idx += 1
 369 |             elif itm1.currentText() == 'Conv(Bin)':
 370 |                 pcode += '            conv%d=BC.Convolution2D(%d,%d,3, stride=1, pad=1, nobias=True),\n' % (conv_idx,val1,val2)
 371 |                 pcode += '            b%d=L.BatchNormalization(%d)' % (bn_idx,val2)
 372 |                 conv_idx += 1
 373 |                 bn_idx += 1
 374 |             elif itm1.currentText() == 'Max Pool':
 375 |                 pass
 376 |             elif itm1.currentText() == 'Ave Pool':
 377 |                 pass
 378 |             else: # Dense
 379 |                 pcode += '            fc%d=BL.BinaryLinear(%d,%d),\n' % (dense_idx,val1,val2)
 380 |                 pcode += '            b%d=L.BatchNormalization(%d)' % (bn_idx,val2)
 381 |                 dense_idx += 1
 382 |                 bn_idx += 1
 383 |             
 384 |             if i == self.table.rowCount() - 1:
 385 |                 pcode += '\n        )\n'
 386 |             else:
 387 |                 if itm1.currentText() == 'Max Pool' or itm1.currentText() == 'Ave Pool':
 388 |                     pass
 389 |                 else:
 390 |                     pcode += ',\n'
 391 | 
 392 | 
 393 |         pcode += '\n    def __call__(self, x, train):\n'
 394 |         conv_idx = 0
 395 |         bn_idx = 0
 396 |         dense_idx = 0
 397 |         for i in range(self.table.rowCount()):
 398 |             itm1 = self.table.cellWidget(i,0)
 399 |             itm2 = self.table.item(i,1)
 400 |             itm3 = self.table.item(i,2) 
 401 |             itm4 = self.table.item(i,3) 
 402 |             val1 = int(itm2.text())
 403 |             val2 = int(itm3.text())
 404 |             val3 = int(itm4.text())
 405 |             
 406 |             if itm1.currentText() == 'Conv(Int)':
 407 |                 pcode += '        h = bst.bst(self.b%d(self.conv%d(x)))\n' % (bn_idx,conv_idx)
 408 |                 bn_idx += 1
 409 |                 conv_idx += 1
 410 |             elif itm1.currentText() == 'Conv(Bin)':
 411 |                 pcode += '        h = bst.bst(self.b%d(self.conv%d(h)))\n' % (bn_idx,conv_idx)
 412 |                 bn_idx += 1
 413 |                 conv_idx += 1
 414 |             elif itm1.currentText() == 'Max Pool':
 415 |                 pcode += '        h = F.max_pooling_2d(h, 2)\n'
 416 |             elif itm1.currentText() == 'Ave Pool':
 417 |                 pcode += '        h = F.average_pooling_2d(h, %d)\n' % val3
 418 |             else: # Dense
 419 |                 if i < self.table.rowCount() - 1:
 420 |                     if i == 0:
 421 |                         pcode += '        h = bst.bst(self.b%d(self.fc%d(x)))\n' % (bn_idx,dense_idx)
 422 |                     else:
 423 |                         pcode += '        h = bst.bst(self.b%d(self.fc%d(h)))\n' % (bn_idx,dense_idx)
 424 |                 else:
 425 |                     pcode += '        h = self.b%d(self.fc%d(h))\n' % (bn_idx,dense_idx)
 426 |                 bn_idx += 1
 427 |                 dense_idx += 1
 428 | 
 429 |         pcode += '        return h'
 430 | 
 431 |         # code generation ----------------------------------------------------
 432 |         f = open('net2.py', 'w')
 433 |         f.write(pcode)
 434 |         f.close()
 435 | 
 436 |         # for test CNN by Python code (eval.py)
 437 |         net3_file = ''
 438 | 
 439 |         net3_file = pcode.replace("=L.","=LBN.")
 440 |         net3_file = net3_file.replace("./","../")
 441 | 
 442 |         # generate project directory if it not exist
 443 |         project_dir = "./" + self.projectEdit.text()
 444 |         if os.path.exists(project_dir) == False:
 445 |             os.mkdir(project_dir)
 446 | 
 447 |         # save Python simulation codes
 448 |         fname = "./" + self.projectEdit.text() + '/net3.py'
 449 |         print("[INFO] Python evaluation codes are seved to %s" % fname)
 450 |         with open(fname,'w') as f:
 451 |             f.write(net3_file)
 452 | 
 453 |         fname = "./" + self.projectEdit.text() + '/eval.py'
 454 |         print("[INFO] COPY evaluation code")
 455 |         shutil.copyfile('eval.py',fname)
 456 | 
 457 |         # setup training -----------------------------------------------------
 458 |         n_iter = int(self.n_trains_Edit.text())
 459 | 
 460 |         train_dataset = self.td_label.text()
 461 |         label_dataset = self.tl_label.text()
 462 |         if self.b11.isChecked() == True:
 463 |             optimizer_alg = "sgd"
 464 |         else:
 465 |             optimizer_alg = "adam"
 466 | 
 467 |         project_name = "temp"
 468 | 
 469 |         project_dir = "./" + self.projectEdit.text()
 470 |         if os.path.exists(project_dir) == False:
 471 |             os.mkdir(project_dir)
 472 | 
 473 |         # start training -----------------------------------------------------
 474 |         if self.cb.isChecked() == True:
 475 |             print("[INFO] START TRAINING: GPU MODE")
 476 |             gpu = "0"
 477 |         else:
 478 |             print("[INFO] START TRAINING: CPU MODE")
 479 |             gpu = "-1"
 480 | 
 481 |         if is_load_pretrain == 1:
 482 |             print("[INFO RESUME TRANINING]")
 483 |             resume = "yes"
 484 |             
 485 |             # copy pre-trained model,log files
 486 |             if os.path.isfile('./temp.model') == True:
 487 |                 os.remove('./temp.model')
 488 |             model_file = "./" + self.projectEdit.text() + '/temp.model'
 489 |             if os.path.isfile(model_file) == True:
 490 |                 print("[INFO] RESUME PRE-TRAINED MODEL FILE %s" % model_file)
 491 |                 shutil.copyfile(model_file,'./temp.model')
 492 |             else:
 493 |                 print("[ERROR] model file %s not found" % model_file)
 494 |                 exit()
 495 | 
 496 |             if os.path.isfile('./temp_log.csv') == True:
 497 |                 os.remove('./temp_log.csv')
 498 |             log_file = "./" + self.projectEdit.text() + '/temp_log.csv'
 499 |             if os.path.isfile(log_file) == True:
 500 |                 print("[INFO] RESUME PRE-TRAINED LOG FILE %s" % log_file)
 501 |                 shutil.copyfile(log_file,'./temp_log.csv')
 502 |             else:
 503 |                 print("[ERROR] log file %s not found" % log_file)
 504 |                 exit()
 505 |             
 506 |         else:
 507 |             resume = "no"
 508 | 
 509 |         # Peform training
 510 |         global n_dim
 511 |         global img_siz
 512 |         
 513 |         subprocess.Popen(["python","train.py","-g",gpu,"--iter",str(n_iter),"--dim",str(n_dim),"--siz",str(img_siz),"--dataset",train_dataset,"--label",label_dataset,"--optimizer",optimizer_alg,"--prefix",project_name,"--lr_decay_iter","100","--resume",resume]) # background job = python train.py &
 514 | 
 515 |         # set process file
 516 |         with open("train_status.txt","w") as f:
 517 |             f.write("run")
 518 | 
 519 |         # eliminate training start button
 520 |         self.bstart.setVisible(False)
 521 | 
 522 |         # Start training check process
 523 |         self.timer = QtCore.QTimer(self)
 524 |         self.timer.timeout.connect(self.updateCanvas)
 525 |         self.timer.start(1000)
 526 |     
 527 |     # -----------------------------------------------------------------------
 528 |     # Update Canvas for training process view
 529 |     # -----------------------------------------------------------------------
 530 |     def updateCanvas(self):
 531 |         global is_load_pretrain
 532 |         log_file = "temp_log.csv"
 533 | 
 534 |         if( os.path.exists(log_file) == True):
 535 |             check = 0
 536 |             n_lines_in_logfile = 0
 537 |             with open(log_file,'r') as f:
 538 |                 n_lines_in_logfile = len(f.readlines())
 539 |                 if n_lines_in_logfile > 2:
 540 |                     check = 1
 541 | 
 542 |             if check == 1:
 543 |                 train_loss,train_acc,test_loss,test_acc = np.loadtxt(log_file, delimiter=',', skiprows=1,usecols=(1,2,5,6),unpack=True)
 544 |                 self.canvas.push_data(train_acc,test_acc,train_loss,test_loss)
 545 |                 self.canvas.refresh(n_lines_in_logfile - 1)
 546 | 
 547 |         with open("train_status.txt", "r") as f:
 548 |             status = f.read()
 549 | 
 550 |             if status != 'run':
 551 |                 print("[INFO] FINISH TRAINING")
 552 |                 project_path = "./" + self.projectEdit.text()
 553 |                 subprocess.Popen(["cp","temp.model",project_path]) # background job = python train.py &
 554 |                 subprocess.Popen(["cp","temp_log.csv",project_path]) # background job = python train.py &
 555 |                 self.timer.stop()
 556 |                 ret = QtGui.QMessageBox.information(None, "Training Status", "Training Finished")
 557 | 
 558 |                 # set continue training mode
 559 |                 self.bstart.setVisible(True)
 560 |                 self.bstart.setText('Continue Training')
 561 |                 is_load_pretrain = 1
 562 | 
 563 |     # -----------------------------------------------------------------------
 564 |     # Save CNN Configuration File
 565 |     # -----------------------------------------------------------------------
 566 |     def save_configfile(self):
 567 |         # generate configuration file
 568 |         print("------------- GENERATE CONFIGURATION FILE --------------")
 569 |         print("TARGET DEVICE: %s" % self.combo2.currentText())
 570 |         print("[INFO] Generate Configuration File")
 571 | 
 572 |         config = {}
 573 |         initial_options = []
 574 |         n_in_fmaps = []
 575 |         n_ou_fmaps = []
 576 |         infmap_siz = []
 577 |         max_dense_siz = 0
 578 |         max_bconv_width = 0
 579 |         bias_siz = 0
 580 |         weight_siz = 0
 581 | 
 582 |         global img_siz
 583 |         global n_class
 584 | 
 585 |         for i in range(self.table.rowCount()):
 586 |             itm1 = self.table.cellWidget(i,0)
 587 |             itm2 = self.table.item(i,1)
 588 |             itm3 = self.table.item(i,2) 
 589 |             itm4 = self.table.item(i,3) 
 590 |             val1 = str(itm2.text())
 591 |             val2 = str(itm3.text())
 592 |             val3 = str(itm4.text())
 593 | 
 594 |             if itm1.currentIndex() == 4:
 595 |                 if max_dense_siz < int(val1):
 596 |                     max_dense_siz = int(val1)
 597 | 
 598 |             if itm1.currentIndex() == 0 or itm1.currentIndex() == 1 or itm1.currentIndex() == 4:
 599 |                 bias_siz += int(val2)
 600 | 
 601 |             if itm1.currentIndex() == 1:
 602 |                 if max_bconv_width < int(val2):
 603 |                     max_bconv_width = int(val2)
 604 | 
 605 |             if itm1.currentIndex() == 0 or itm1.currentIndex() == 1:
 606 |                 weight_siz += (int(val1) * int(val2) * 3 * 3)
 607 | 
 608 |             if itm1.currentIndex() == 4:
 609 |                 weight_siz += (int(val1) * int(val2))
 610 | 
 611 |             initial_options.append(itm1.currentIndex())
 612 |             n_in_fmaps.append(val1)
 613 |             n_ou_fmaps.append(val2)
 614 |             infmap_siz.append(val3)
 615 | 
 616 |         config['initial_options'] = initial_options
 617 |         config['n_in_fmaps'] = n_in_fmaps
 618 |         config['n_ou_fmaps'] = n_ou_fmaps
 619 |         config['infmap_siz'] = infmap_siz
 620 | 
 621 |         config['ksiz'] = 3
 622 |         config['imgsiz'] = infmap_siz[0]
 623 |         config['max_dense_siz'] = max_dense_siz
 624 |         config['out_dense_siz'] = n_ou_fmaps[len(initial_options) - 1]
 625 |         config['bias_siz'] = bias_siz
 626 |         config['weight_siz'] = weight_siz
 627 |         config['max_bconv_width'] = max_bconv_width
 628 |         config['num_layer'] = len(initial_options)
 629 | 
 630 |         config_file = "./" + self.projectEdit.text() + "/config.pickle"
 631 |         with open(config_file, mode='wb') as f:
 632 |             pickle.dump(config, f)
 633 |        
 634 |     # -----------------------------------------------------------------------
 635 |     # Generate Bitstream
 636 |     # -----------------------------------------------------------------------
 637 |     def start_bitgen(self):
 638 |         # generate configuration file
 639 |         print("------------- GENERATE CONFIGURATION FILE --------------")
 640 |         print("TARGET DEVICE: %s" % self.combo2.currentText())
 641 |         print("[INFO] Generate Configuration File")
 642 | 
 643 |         # save configuration file
 644 |         self.save_configfile()
 645 | 
 646 |         # generate SDSoC directory
 647 |         sdsoc_dir = "./" + self.projectEdit.text() + "/sdsoc"
 648 |         if os.path.exists(sdsoc_dir) == False:
 649 |             os.mkdir(sdsoc_dir)
 650 | 
 651 |         # Call C++ code generator for the SDSoC
 652 |         print("[INFO] GENERATE C++ CODE")
 653 |         config_path = "./" + self.projectEdit.text()
 654 | 
 655 |         subprocess.Popen(["python","gen_cpp_code_v3.py","--config_path",config_path]) # background job = python train.py &
 656 | 
 657 |         # generate makefile using template files
 658 |         print("[INFO] GENERATE Makefile for the SDSoC")
 659 |         f = open('template_Makefile')
 660 |         lines2 = f.readlines()
 661 |         f.close()
 662 | 
 663 |         makefile_txt = ''
 664 | 
 665 |         for line in lines2:
 666 |             tmp = line.replace("(CNN_C_SOURCE)","cnn.cpp")
 667 |             tmp = tmp.replace("(ELF_FILE_PATH)",self.projectEdit.text() + ".elf")
 668 |             tmp = tmp.replace("(TARGET_BOARD)",self.combo2.currentText())
 669 | 
 670 |             makefile_txt += tmp
 671 | 
 672 |         makefile_name = "./" + self.projectEdit.text() + "/sdsoc/Makefile"
 673 |         with open(makefile_name,'w') as f:
 674 |             f.write(makefile_txt)
 675 | 
 676 |         # generate sdsoc/sd_card directory
 677 |         print("[INFO] MAKE A DIRECTROY: ./%s/sdsoc/to_sd_card" % self.projectEdit.text())
 678 |         sd_card_dir = "./" + self.projectEdit.text() + "/sdsoc/to_sd_card"
 679 |         if os.path.exists(sd_card_dir) == False:
 680 |             os.mkdir(sd_card_dir)
 681 | 
 682 |         # generate HLS directory
 683 |         print("[INFO] MAKE A DIRECTROY: ./%s/HLS" % self.projectEdit.text())
 684 |         HLS_dir = "./" + self.projectEdit.text() + "/HLS"
 685 |         if os.path.exists(HLS_dir) == False:
 686 |             os.mkdir(HLS_dir)
 687 | 
 688 |         # convert trained *.model to weight text file
 689 |         print("[INFO] CONVERT TRAINED WEIGHTS INTO TEXT FILE")
 690 |         config_path = "./" + self.projectEdit.text()
 691 |         proc = subprocess.Popen(["python","conv_npz2txt_v2.py","--config_path",config_path]) # background job = python train.py &
 692 |         proc.wait()
 693 | 
 694 |         print(" ... [FINISH]")
 695 | 
 696 |         # copy benchmark file from trainer, if it exist
 697 |         print("[INFO] COPY BENCHMARK IMAGE FILE")
 698 |         image_file = "./test_img.txt"
 699 |         if os.path.isfile(image_file) == True:
 700 |             sd_card_dir = "./" + self.projectEdit.text() + "/sdsoc/to_sd_card"
 701 |             subprocess.Popen(["cp",image_file,sd_card_dir])
 702 |             print(" ... [FINISH]")
 703 |         else:
 704 |             print("FAILURE")
 705 | 
 706 |         # performe system generation, call SDSoC by make command
 707 |         # (subprocess!!!)
 708 | #        print("[INFO] GENERATE BITSTREAM, WAIT TENS MINUTES...")
 709 |         print("[INFO] SUCCESSFULLY C/C++ CODE GENERATION")
 710 |         print("[INFO] PLEASE, ``SAVE'' YOUR CURRENT DESIGN")
 711 | 
 712 | #        # show message
 713 | #        ret = QtGui.QMessageBox.information(None, "Bistream Generation Status", "C++ code generated")
 714 | 
 715 |     # -----------------------------------------------------------------------
 716 |     # FileOpen Dialog for Project Configuration
 717 |     # -----------------------------------------------------------------------
 718 |     # save configuration file
 719 |     def SaveProj(self):
 720 |         config = ''
 721 |         config += 'PROJECT_NAME: %s\n' % self.projectEdit.text()
 722 |         config += 'TRAINING_DATA: %s\n' % self.td_label.text()
 723 |         config += 'TRAINING_LABEL: %s\n' % self.tl_label.text()
 724 |         config += 'NUM_OF_EPOCS: %d\n' % int(self.n_trains_Edit.text())
 725 |         if self.b11.isChecked() == True:
 726 |             config += 'OPTIMIZER: SGD\n'
 727 |         else:
 728 |             config += 'OPTIMIZER: Adam\n'
 729 |         if self.cb.isChecked() == True:
 730 |             config += 'USE_GPU: YES\n'
 731 |         else:
 732 |             config += 'USE_GPU: NO\n'
 733 |         config += 'FPGA_BOARD: %s\n' % self.combo2.currentText()
 734 | 
 735 |         config_file = "./" + self.projectEdit.text() + "/" + self.projectEdit.text() + ".proj"
 736 |         config_dir = "./" + self.projectEdit.text()
 737 |         if os.path.exists(config_dir) == False:
 738 |             os.mkdir(config_dir)
 739 | 
 740 |         with open(config_file, mode='w') as f:
 741 |             f.write(config)
 742 | 
 743 |         self.save_configfile()
 744 | 
 745 |     # load project configuration file
 746 |     def LoadProj(self):
 747 |         global is_load_pretrain
 748 |         filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './')
 749 | 
 750 |         with open(filename, mode='r') as f:
 751 |             lines2 = f.readlines()
 752 |         
 753 |             for line in lines2:
 754 |                 key, val = line.split()
 755 |                 
 756 |                 if key == 'PROJECT_NAME:':
 757 |                     self.projectEdit.setText(val)
 758 |                 elif key == 'TRAINING_DATA:':
 759 |                     self.td_label.setText(val)
 760 |                 elif key == 'TRAINING_LABEL:':
 761 |                     self.tl_label.setText(val)
 762 |                 elif key == 'NUM_OF_EPOCS:':
 763 |                     self.n_trains_Edit.setText(val)
 764 |                 elif key == 'OPTIMIZER:':
 765 |                     if val == 'SGD':
 766 |                         self.b11.setChecked(True)
 767 |                         self.b12.setChecked(False)
 768 |                     else:
 769 |                         self.b11.setChecked(False)
 770 |                         self.b12.setChecked(True)
 771 |                 elif key == 'USE_GPU:':
 772 |                     if val == 'YES':
 773 |                         self.cb.setChecked(True)
 774 |                     else:
 775 |                         self.cb.setChecked(False)
 776 |                 elif key == 'FPGA_BOARD:':
 777 |                     if val == 'zed':
 778 |                         idx = 0
 779 |                     elif val == 'zybo':
 780 |                         idx = 1
 781 |                     elif val == 'vc702':
 782 |                         idx = 2
 783 |                     else: # zcu102
 784 |                         idx = 3
 785 |                     self.combo2.setCurrentIndex(idx)
 786 |                 else:
 787 |                     pass        
 788 | 
 789 |         # Restore CNN Configuration Table
 790 |         config_file = "./" + self.projectEdit.text() + "/config.pickle"
 791 |         with open(config_file, mode='rb') as f:
 792 |             config = pickle.load(f)
 793 | 
 794 |         initial_options = config['initial_options']
 795 |         n_in_fmaps = config['n_in_fmaps']
 796 |         n_ou_fmaps = config['n_ou_fmaps']
 797 |         infmap_siz = config['infmap_siz']
 798 | 
 799 |         self.table.setRowCount(len(initial_options))
 800 |         for index in range(len(initial_options)):
 801 |             combo = QtGui.QComboBox()
 802 |             for t in self.combo_box_options:
 803 |                 combo.addItem(t)
 804 |             combo.setCurrentIndex(initial_options[index])
 805 |             self.table.setCellWidget(index,0,combo)
 806 |             item1 = QtGui.QTableWidgetItem(n_in_fmaps[index])
 807 |             self.table.setItem(index,1,item1)
 808 |             item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index])
 809 |             self.table.setItem(index,2,item2)
 810 |             item3 = QtGui.QTableWidgetItem(infmap_siz[index])
 811 |             self.table.setItem(index,3,item3)
 812 | 
 813 |             item4 = QtGui.QCheckBox('')
 814 |             item4.setChecked(True) # isChecked() == True?False?
 815 |             self.table.setCellWidget(index,4,item4)
 816 | 
 817 | 
 818 |         # Restore Training Status Graph
 819 |         log_file = "temp_log.csv"
 820 |         log_path = "./" + self.projectEdit.text() + "/" + log_file
 821 | 
 822 |         if( os.path.exists(log_path) == True):
 823 |             print("log_file %s" % log_path)
 824 | 
 825 |             subprocess.call(["cp",log_path,"./"])
 826 | 
 827 |             train_loss,train_acc,test_loss,test_acc = np.loadtxt(log_file, delimiter=',', skiprows=1,usecols=(1,2,5,6),unpack=True)
 828 |             self.canvas.push_data(train_acc,test_acc,train_loss,test_loss)
 829 |             self.canvas.refresh(int(self.n_trains_Edit.text()))
 830 | 
 831 |             subprocess.call(["rm","-rf",log_file])
 832 | 
 833 |         is_load_pretrain = 1
 834 |         self.bstart.setText('Continue Training')
 835 | 
 836 |         # Restore Global Variables
 837 |         global img_siz 
 838 |         img_siz = int(config['imgsiz'])
 839 |         global n_class 
 840 |         n_class = int(n_ou_fmaps[len(initial_options) - 1])
 841 |         
 842 |         print("[INFO] IMAGE SIZE %dx%d" % (img_siz,img_siz))
 843 |         print("[INFO] #CLASSES: %d" % (n_class))
 844 | 
 845 |         # update widgets
 846 |         self.update()
 847 | 
 848 |     # -----------------------------------------------------------------------
 849 |     # Set Feature Map Size
 850 |     # -----------------------------------------------------------------------
 851 |     def SetSize(self):
 852 |         global img_siz
 853 | 
 854 |         fsiz = 0
 855 |         for index in range(self.table.rowCount()):
 856 |             itm0 = self.table.cellWidget(index,0)
 857 |             itm3 = self.table.item(index,3) 
 858 |             
 859 |             if index == 0:
 860 |                 fsiz = img_siz
 861 |                 tbl_item = QtGui.QTableWidgetItem(str(int(fsiz)))
 862 |                 self.table.setItem(index,3,tbl_item)
 863 |                 #fsiz = int(itm3.text())
 864 |             elif itm0.currentText() == 'Conv(Int)':
 865 |                 tbl_item = QtGui.QTableWidgetItem(str(int(fsiz)))
 866 |                 self.table.setItem(index,3,tbl_item)
 867 | 
 868 |             elif itm0.currentText() == 'Conv(Bin)':
 869 |                 tbl_item = QtGui.QTableWidgetItem(str(int(fsiz)))
 870 |                 self.table.setItem(index,3,tbl_item)
 871 | 
 872 |             elif itm0.currentText() == 'Max Pool':
 873 |                 tbl_item = QtGui.QTableWidgetItem(str(int(fsiz)))
 874 |                 self.table.setItem(index,3,tbl_item)
 875 | 
 876 |                 fsiz = fsiz / 2
 877 |                 if fsiz < 1:
 878 |                     fsiz = 1
 879 | 
 880 |             elif itm0.currentText() == 'Ave Pool':
 881 |                 tbl_item = QtGui.QTableWidgetItem(str(int(fsiz)))
 882 |                 self.table.setItem(index,3,tbl_item)
 883 | 
 884 |                 fsiz = fsiz / 2
 885 |                 if fsiz < 1:
 886 |                     fsiz = 1
 887 | 
 888 |             else: # Dense
 889 |                 tbl_item = QtGui.QTableWidgetItem('1')
 890 |                 self.table.setItem(index,3,tbl_item)
 891 | 
 892 | 
 893 |     # -----------------------------------------------------------------------
 894 |     # FileOpen Dialog for Training data selection
 895 |     # -----------------------------------------------------------------------
 896 |     def open_FileDialog(self):
 897 |         global n_dim
 898 |         global img_siz
 899 |         filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './')
 900 |         self.td_label.setText(filename)
 901 | 
 902 |         # check dimension and size
 903 |         with open(filename, 'rb') as f:
 904 |             images = pickle.load(f)        
 905 |         
 906 |             print("[INFO] IMAGE SIZE %dx%d" % (images['train'].shape[3],images['train'].shape[3]))
 907 | 
 908 |             n_dim = images['train'].shape[1]
 909 |             img_siz = images['train'].shape[2]
 910 | 
 911 |             self.SetSize()
 912 | 
 913 |     def open_FileDialog_tl(self):
 914 |         filename = QtGui.QFileDialog.getOpenFileName(self, 'File Open', './')
 915 |         self.tl_label.setText(filename)
 916 | 
 917 |         # check dimension and size
 918 |         with open(filename, 'rb') as f:
 919 |             global n_class
 920 |             labels = pickle.load(f)        
 921 |             label_set = labels['train'].astype(np.int8)
 922 |             max_idx = np.max(label_set) + 1 # includes '0' label
 923 |             print("[INFO] #CLASSES: %d" % max_idx)
 924 | 
 925 |             n_class = max_idx
 926 | 
 927 |             item3 = QtGui.QTableWidgetItem(str(n_class))
 928 |             self.table.setItem(self.table.rowCount()-1,2,item3)
 929 | 
 930 |     # -----------------------------------------------------------------------
 931 |     # Load PreDefined CNN
 932 |     # -----------------------------------------------------------------------
 933 |     def LoadConfig(self):
 934 |         template_name = self.combo1.currentText()
 935 | 
 936 |         self.combo_box_options = ["Conv(Int)","Conv(Bin)","Max Pool","Ave Pool","Dense"]
 937 |         if template_name == 'LeNet5':
 938 |             initial_options = [0,1,1,3,4]
 939 |             n_in_fmaps = [ '1','64','64','64','64']
 940 |             n_ou_fmaps = ['64','64','64','64','10']
 941 |             infmap_siz = ['28','28','28','28','1']
 942 |         elif template_name == 'TinyCNN':
 943 |             initial_options   = [0,1,1,2,3,4]
 944 |             n_in_fmaps = [ '3', '64','128','128','128','128']
 945 |             n_ou_fmaps = ['64','128','128','128','128', '10']
 946 |             infmap_siz = ['32', '32', '32', '32', '16',  '1']
 947 |         elif template_name == 'VGG9ave':
 948 |             initial_options = [0, 1,   2,    1,   1,   2,   1,   1,   2,   1,   1,   3,   4]
 949 |             n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64']
 950 |             n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','10']
 951 |             infmap_siz = ['32','32','32', '16','16','16', '8', '8', '8', '4', '4', '4', '1']
 952 |         elif template_name == 'VGG11ave':
 953 |             initial_options = [0, 1,   2,    1,   1,   2,   1,   1,   2,   1,   1,   2,   1,   1,   3,   4]
 954 |             n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64']
 955 |             n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','10']
 956 |             infmap_siz = ['32','32','32', '16','16','16', '8', '8', '8', '4', '4', '4', '2', '2', '2', '1']
 957 |         elif template_name == 'VGG16ave':
 958 |             initial_options = [0, 1,   2,    1,   1,   2,   1,   1,   1,   2,   1,   1,   1,   2,   1,   1,   1,   3,   4]
 959 |             n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64']
 960 |             n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','10']
 961 |             infmap_siz = ['64','64','64', '32','32','32','16','16','16','16', '8', '8', '8', '8', '4', '4', '4', '4', '1']
 962 |         elif template_name == 'VGG19ave':
 963 |             initial_options = [0, 1,   2,    1,   1,   2,   1,   1,   1,   1,   2,   1,   1,   1,   1,   2,   1,   1,   1,   1,   3,   4]
 964 |             n_in_fmaps = [ '3','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64']
 965 |             n_ou_fmaps = ['64','64','64', '64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','64','10']
 966 |             infmap_siz = ['64','64','64', '32','32','32','16','16','16','16','16', '8', '8', '8', '8', '8', '4', '4', '4', '4', '4', '1']
 967 |         else: # VGG11
 968 |             initial_options   = [0,1,2,1,1,2,1,1,2,1,1,2,4,4,4]
 969 |             n_in_fmaps = [ '3','64','64', '64','128','128','128','256','256','256','256','256','4096','1024','1024']
 970 |             n_ou_fmaps = ['64','64','64','128','128','128','256','256','256','256','256','256','1024','1024',  '10']
 971 |             infmap_siz = ['32','32','32', '16', '16', '16',  '8',  '8',  '8',  '8',  '8',  '8',   '1',   '1',   '1']
 972 | 
 973 |         # set output #neurons (that is, #classifications)
 974 |         global n_class
 975 |         n_ou_fmaps[len(n_ou_fmaps) - 1] = str(n_class)
 976 | 
 977 |         self.table.setRowCount(len(initial_options))
 978 |         for index in range(len(initial_options)):
 979 |             combo = QtGui.QComboBox()
 980 |             for t in self.combo_box_options:
 981 |                 combo.addItem(t)
 982 |             combo.setCurrentIndex(initial_options[index])
 983 |             self.table.setCellWidget(index,0,combo)
 984 |             item1 = QtGui.QTableWidgetItem(n_in_fmaps[index])
 985 |             self.table.setItem(index,1,item1)
 986 |             item2 = QtGui.QTableWidgetItem(n_ou_fmaps[index])
 987 |             self.table.setItem(index,2,item2)
 988 |             item3 = QtGui.QTableWidgetItem(infmap_siz[index])
 989 |             self.table.setItem(index,3,item3)
 990 | 
 991 |             item4 = QtGui.QCheckBox('')
 992 |             item4.setChecked(True) # isChecked() == True?False?
 993 |             self.table.setCellWidget(index,4,item4)
 994 | 
 995 |         # Re-setting feature map size
 996 |         self.SetSize()
 997 | 
 998 | # -----------------------------------------------------------------------
 999 | # Plot Training Process (Train value, Test value)
1000 | # -----------------------------------------------------------------------
1001 | class Canvas(FigureCanvas):
1002 |     def __init__(self):
1003 |         FigureCanvas.__init__(self,Figure())
1004 |         self.ax = self.figure.add_subplot(111)
1005 |         self.train_acc=[0]*100
1006 |         self.test_acc=[0]*100
1007 |         self.train_loss=[0]*100
1008 |         self.test_loss=[0]*100
1009 |         self.ax.set_xlabel("epoch")
1010 |         self.ax.set_ylabel("Accuracy[%]")
1011 |         self.ax.set_ylim(0,100)
1012 | 
1013 |         self.ax2 = self.ax.twinx()
1014 |         self.ax2.set_ylabel("Loss")
1015 | 
1016 |         self.refresh(100)
1017 | 
1018 |     def refresh(self,xrange):
1019 |         self.ax = self.figure.add_subplot(111)
1020 |         self.ax.clear()
1021 |         self.ax.plot(range(0,len(self.train_acc)),np.ones(len(self.train_acc))*100.0 - self.train_acc,label='Accuracy(Train)',color="blue")
1022 |         self.ax.plot(range(0,len(self.test_acc)),np.ones(len(self.test_acc))*100.0 - self.test_acc,label='Accuracy(Test)',color="red")
1023 | 
1024 |         self.ax.annotate('Accuracy(Test)', 
1025 |             xy=(xrange - 1, 100.0 - self.test_acc[len(self.test_acc) - 1]), xycoords='data',
1026 |             xytext=(-100, -20), 
1027 |             textcoords='offset points',
1028 |             arrowprops=dict(arrowstyle="->")
1029 |             )
1030 | 
1031 |         self.ax.set_xlabel("epoch")
1032 |         self.ax.set_ylabel("Accuracy[%]")
1033 |         self.ax.set_ylim(0,100)
1034 |         self.ax.set_xlim(0,xrange)
1035 |         self.ax.grid()
1036 | 
1037 |         self.ax2.clear()
1038 |         self.ax2.plot(range(0,len(self.train_loss)),self.train_loss,label='Loss(Train)',color="mediumslateblue")
1039 |         self.ax2.plot(range(0,len(self.test_loss)),self.test_loss,label='Loss(Test)',color="hotpink")
1040 | 
1041 |         self.ax2.annotate('Loss(Test)', 
1042 |             xy=(xrange - 1, self.test_loss[len(self.test_loss) - 1]), xycoords='data',
1043 |             xytext=(-80, 20), 
1044 |             textcoords='offset points',
1045 |             arrowprops=dict(arrowstyle="->")
1046 |             )
1047 | 
1048 |         self.ax2.set_ylim(0,max(self.train_loss)*1.1)
1049 |         self.ax2.set_xlim(0,xrange)
1050 |         self.ax2.set_ylabel("Loss")
1051 | 
1052 |         self.draw()
1053 | 
1054 |     def push_data(self,train_acc,test_acc,train_loss,test_loss):
1055 |         self.train_acc = train_acc
1056 |         self.test_acc = test_acc
1057 |         self.train_loss = train_loss
1058 |         self.test_loss = test_loss
1059 | 
1060 | ###########################################################################################
1061 | # Main
1062 | ###########################################################################################
1063 | def main():
1064 |     app = QtGui.QApplication(sys.argv)
1065 |     ex = Layout()
1066 |     sys.exit(app.exec_())
1067 | 
1068 | if __name__ == "__main__":
1069 |     main()
1070 | 
1071 | ###########################################################################################
1072 | # END OF PROGRAM
1073 | ###########################################################################################
1074 | 


--------------------------------------------------------------------------------