├── README.md
├── denoise_dcgan
    ├── README
    ├── dcgan.py
    ├── read_stl10.py
    └── utils.py
├── dl
    ├── Makefile
    ├── include
    │   ├── .data.hpp.swp
    │   ├── .matrix.hpp.swp
    │   ├── .param.h.swp
    │   ├── convnet.hpp
    │   ├── data.hpp
    │   ├── dropout_layer.hpp
    │   ├── inner_product_layer.hpp
    │   ├── json
    │   │   ├── json-forwards.h
    │   │   └── json.h
    │   ├── layer.hpp
    │   ├── layer_kernel.cuh
    │   ├── load_layer.hpp
    │   ├── logistic.hpp
    │   ├── matrix.hpp
    │   ├── matrix_kernel.hpp
    │   ├── model_component.hpp
    │   ├── param.h
    │   ├── pooling_layer.hpp
    │   ├── relu_layer.hpp
    │   ├── sigmoid_layer.hpp
    │   ├── train_classification.hpp
    │   ├── train_model.hpp
    │   └── utils.cuh
    ├── main_src
    │   └── cifar_classify.cu
    ├── script
    │   ├── .ropeproject
    │   │   ├── config.py
    │   │   ├── globalnames
    │   │   ├── history
    │   │   └── objectdb
    │   └── cifar10.json
    ├── src
    │   ├── convnet.cu
    │   ├── data.cu
    │   ├── dropout_layer.cu
    │   ├── inner_product_layer.cu
    │   ├── jsoncpp.cpp
    │   ├── layer_kernel.cu
    │   ├── load_layer.cpp
    │   ├── logistic.cu
    │   ├── matrix.cu
    │   ├── matrix_kernel.cu
    │   ├── model_component.cpp
    │   ├── pooling_layer.cu
    │   ├── relu_layer.cu
    │   ├── sigmoid_layer.cu
    │   ├── train_classification.cpp
    │   ├── train_model.cpp
    │   └── utils.cu
    └── test
    │   └── test.cu
├── guichuideng
    ├── 12345vs678.png
    ├── 1234678.png
    ├── 1234vs5678.png
    ├── 12578vs346.png
    ├── 125vs34678.png
    ├── 125vs34vs678.png
    ├── README
    ├── anaylse.py
    ├── feature.txt
    ├── feature_count.py
    ├── freq1.png
    ├── freq2.png
    ├── input_features.bin
    ├── lr.py
    └── reduction.py
├── rl
    └── cartpole
    │   ├── policy_gradient.py
    │   ├── random_guess_hill_climbing.py
    │   └── upload.py
└── tf_autoencoder
    ├── README.md
    ├── autoencoder.py
    └── test.py


/README.md:
--------------------------------------------------------------------------------
1 | # deep-learning
2 | 


--------------------------------------------------------------------------------
/denoise_dcgan/README:
--------------------------------------------------------------------------------
 1 | This code works for image denoising without tuning the parameters such as number of convolution layer, learning rate...
 2 | The clean images should be .png files which save in ./data/real_images, and the noise images save in ./data/dataset/noise_images
 3 | 
 4 | read_stl10.py reads the binary file of [stl10](https://cs.stanford.edu/~acoates/stl10/) and saves as png files, simultaneously, adds guass noise to each image and saves in ./data/dataset/noise_images
 5 | 
 6 | utils.py is copy from [https://github.com/carpedm20/DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow), which is used to read png files of given directory.
 7 | 
 8 | dcgan.py is used to train the whole network. 
 9 | 
10 | python dcgan.py --dataset stl10_binary --batch_size 64 --image_size 96 --epoch 100 --learning_rate 0.00001 --c_dim 3
11 | 
12 | --dataset, the folder name which you save your images, ./data/stl_binary/real_images.
13 | --batch_size, this number is related to final sample images, if you set batch_size is 100, you need change the dcgan.py file line 262 [8, 8] to [10, 10].
14 | --c_dim, which means gray(1) or rgb(3).
15 | --image_size
16 | --epoch
17 | --learning_rate
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/denoise_dcgan/dcgan.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import os
  4 | import time
  5 | from glob import glob
  6 | from utils import *
  7 | 
  8 | def generator(gdata, img_size, batch_size, c_dim, num_filter):
  9 |     s2 = img_size/2
 10 |     s4 = img_size/4
 11 |    
 12 |     stddev = 0.001
 13 |     with tf.variable_scope('g_conv1') as scope:
 14 |         w = tf.get_variable('w', [4, 4, c_dim, num_filter], 
 15 |                 initializer=tf.random_normal_initializer(stddev=stddev))
 16 |         gconv = tf.nn.conv2d(gdata, w, strides=[1, 2, 2, 1], 
 17 |                 padding='SAME') 
 18 |         biases = tf.get_variable('biases', [num_filter], 
 19 |                 initializer=tf.constant_initializer(0.0))
 20 |         bias = tf.nn.bias_add(gconv, biases)
 21 |         gconv1 = tf.nn.relu(bias, name=scope.name)
 22 | 
 23 |     with tf.variable_scope('g_conv2') as scope:
 24 |         w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 
 25 |                 initializer=tf.random_normal_initializer(stddev=stddev))
 26 |         gconv = tf.nn.conv2d(gconv1, w, strides=[1, 2, 2, 1], 
 27 |                 padding='SAME') 
 28 |         biases = tf.get_variable('biases', [num_filter*2], 
 29 |                 initializer=tf.constant_initializer(0.0))
 30 |         bias = tf.nn.bias_add(gconv, biases)
 31 |         gconv2 = tf.nn.relu(bias, name=scope.name)
 32 | 
 33 |     with tf.variable_scope('g_deconv1') as scope:
 34 |         w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 
 35 |                 initializer=tf.random_normal_initializer(stddev=stddev))
 36 |         deconv = tf.nn.conv2d_transpose(gconv2, w, 
 37 |                 output_shape=[batch_size, s2, s2, num_filter], 
 38 |                 strides=[1, 2, 2, 1]) 
 39 |         biases = tf.get_variable('biases', [num_filter], 
 40 |                 initializer=tf.constant_initializer(0.0))
 41 |         deconv1 = tf.nn.bias_add(deconv, biases)
 42 | 
 43 |     with tf.variable_scope('g_deconv2') as scope:
 44 |         w = tf.get_variable('w', [4, 4, c_dim, num_filter], 
 45 |                 initializer=tf.random_normal_initializer(stddev=stddev))
 46 |         deconv = tf.nn.conv2d_transpose(deconv1, w, 
 47 |                 output_shape=[batch_size, img_size, img_size, c_dim], 
 48 |                 strides=[1, 2, 2, 1]) 
 49 |         biases = tf.get_variable('biases', [c_dim], 
 50 |                 initializer=tf.constant_initializer(0.0))
 51 |         deconv2 = tf.nn.bias_add(deconv, biases)
 52 | 
 53 |     return tf.nn.tanh(deconv2)
 54 | 
 55 | def discriminator(ddata, batch_size, c_dim, num_filter, leak, reuse=False):
 56 |     if reuse:
 57 |         tf.get_variable_scope().reuse_variables()
 58 | 
 59 |     stddev = 0.002
 60 |     with tf.variable_scope('d_conv1') as scope:
 61 |         w = tf.get_variable('w', [4, 4, c_dim, num_filter], 
 62 |                 initializer=tf.truncated_normal_initializer(stddev=stddev))
 63 |         dconv = tf.nn.conv2d(ddata, w, strides=[1, 2, 2, 1], 
 64 |                 padding='SAME') 
 65 |         biases = tf.get_variable('biases', [num_filter], 
 66 |                 initializer=tf.constant_initializer(0.0))
 67 |         bias = tf.nn.bias_add(dconv, biases)
 68 |         dconv1 = tf.maximum(bias, leak*bias)
 69 | 
 70 |     with tf.variable_scope('d_conv2') as scope:
 71 |         w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 
 72 |                 initializer=tf.truncated_normal_initializer(stddev=stddev))
 73 |         dconv = tf.nn.conv2d(dconv1, w, strides=[1, 2, 2, 1], 
 74 |                 padding='SAME') 
 75 |         biases = tf.get_variable('biases', [num_filter*2], 
 76 |                 initializer=tf.constant_initializer(0.0))
 77 |         bias = tf.nn.bias_add(dconv, biases)
 78 |         dconv2 = tf.maximum(bias, leak*bias)
 79 | 
 80 |     with tf.variable_scope('d_conv3') as scope:
 81 |         w = tf.get_variable('w', [4, 4, num_filter*2, num_filter*4], 
 82 |                 initializer=tf.truncated_normal_initializer(stddev=stddev))
 83 |         dconv = tf.nn.conv2d(dconv2, w, strides=[1, 2, 2, 1], 
 84 |                 padding='SAME') 
 85 |         biases = tf.get_variable('biases', [num_filter*4], 
 86 |                 initializer=tf.constant_initializer(0.0))
 87 |         bias = tf.nn.bias_add(dconv, biases)
 88 |         dconv3 = tf.maximum(bias, leak*bias)
 89 | 
 90 |     with tf.variable_scope('d_conv4') as scope:
 91 |         w = tf.get_variable('w', [4, 4, num_filter*4, num_filter*8], 
 92 |                 initializer=tf.truncated_normal_initializer(stddev=stddev))
 93 |         dconv = tf.nn.conv2d(dconv3, w, strides=[1, 2, 2, 1], 
 94 |                 padding='SAME') 
 95 |         biases = tf.get_variable('biases', [num_filter*8], 
 96 |                 initializer=tf.constant_initializer(0.0))
 97 |         dconv4 = tf.maximum(bias, leak*bias)
 98 | 
 99 |     with tf.variable_scope('d_local1') as scope:
100 |         local_in = tf.reshape(dconv4, [batch_size, -1])
101 |         shape = local_in.get_shape().as_list()
102 | 
103 |         w = tf.get_variable('w', [shape[1], 1], tf.float32,
104 |                 tf.random_normal_initializer(stddev=stddev))
105 |         biases = tf.get_variable("biases", [1],
106 |                 initializer=tf.constant_initializer(0.0))
107 |         dlocal = tf.matmul(local_in, w) + biases
108 | 
109 |     return tf.nn.sigmoid(dlocal), dlocal
110 |         
111 | def build_model(img_size, batch_size=100, num_filter=16, c_dim=1, leak=0.1):
112 | 
113 |     noise_images = tf.placeholder(tf.float32, [batch_size] 
114 |             + [img_size, img_size, c_dim], name='noise_images')
115 |     real_images = tf.placeholder(tf.float32, [batch_size] 
116 |             + [img_size, img_size, c_dim], name='real_images')
117 | 
118 |     G = generator(noise_images, img_size, batch_size, c_dim, num_filter)
119 |     D, D_logots = discriminator(real_images, batch_size, c_dim, num_filter, leak)
120 |     D_, D_logots_ = discriminator(G, batch_size, c_dim, num_filter, leak, reuse=True)
121 | 
122 |     d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots, tf.ones_like(D)))
123 |     d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.zeros_like(D_)))
124 |     g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.ones_like(D_)))
125 | 
126 |     d_loss = d_loss_real + d_loss_fake
127 | 
128 |     t_vars = tf.trainable_variables()
129 |     
130 |     d_vars = [var for var in t_vars if 'd_' in var.name]
131 |     g_vars = [var for var in t_vars if 'g_' in var.name]
132 | 
133 |     saver = tf.train.Saver()
134 | 
135 |     return G, g_loss, d_loss, d_vars, g_vars, saver
136 | 
137 | flags = tf.app.flags
138 | flags.DEFINE_float("learning_rate", 0.0002, "Learning rate for adam [0.0002]")
139 | flags.DEFINE_float("beta1", 0.5, "Learning rate for adam [0.0002]")
140 | flags.DEFINE_integer("epoch", 10, "Epoch to train [10]")
141 | flags.DEFINE_string("dataset", "xxx", "The name of dataset []")
142 | flags.DEFINE_integer("batch_size", 64, "The size of batch images [64]")
143 | flags.DEFINE_integer("image_size", 100, "The size of image to use (will be center cropped) [10.]")
144 | flags.DEFINE_integer("c_dim", 1, "Dimension of image color. [1]")
145 | 
146 | FLAGS = flags.FLAGS
147 | 
148 | def read_images(c_dim):
149 |     is_grayscale = (c_dim == 1)
150 |     real_data = glob(os.path.join("./data", FLAGS.dataset, "real_images", "*.png"))
151 |     noise_data = glob(os.path.join("./data", FLAGS.dataset, "noise_images", "*.png"))
152 | 
153 |     real = [get_image(img_file, FLAGS.image_size, is_crop=False, is_grayscale=is_grayscale) for img_file in real_data]
154 |     noise = [get_image(img_file, FLAGS.image_size, is_crop=False, is_grayscale=is_grayscale) for img_file in noise_data]
155 | 
156 |     if is_grayscale:
157 |         reals = np.array(real).astype(np.float32)[:,:,:,None]
158 |         noises = np.array(noise).astype(np.float32)[:,:,:,None]
159 |     else:
160 |         reals = np.array(real).astype(np.float32)
161 |         noises = np.array(noise).astype(np.float32)
162 | 
163 |     return reals, noises
164 | 
165 | #def train(sess, G, d_loss, d_vars, g_loss, g_vars, saver, c_dim=1):
166 | def train(sess, img_size, batch_size=100, num_filter=16, c_dim=1, leak=0.2):
167 | 
168 | 
169 |     noise_images = tf.placeholder(tf.float32, [batch_size] 
170 |             + [img_size, img_size, c_dim], name='noise_images')
171 |     real_images = tf.placeholder(tf.float32, [batch_size] 
172 |             + [img_size, img_size, c_dim], name='real_images')
173 | 
174 |     G = generator(noise_images, img_size, batch_size, c_dim, num_filter)
175 |     D, D_logots = discriminator(real_images, batch_size, c_dim, num_filter, leak)
176 |     D_, D_logots_ = discriminator(G, batch_size, c_dim, num_filter, leak, reuse=True)
177 | 
178 |     d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots, tf.ones_like(D)))
179 |     d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.zeros_like(D_)))
180 |     g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.ones_like(D_)))
181 | 
182 |     d_loss = d_loss_real + d_loss_fake
183 | 
184 |     t_vars = tf.trainable_variables()
185 |     
186 |     d_vars = [var for var in t_vars if 'd_' in var.name]
187 |     g_vars = [var for var in t_vars if 'g_' in var.name]
188 | 
189 |     saver = tf.train.Saver()
190 | 
191 | 
192 |     d_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1).minimize(d_loss, var_list=d_vars)
193 |     g_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1).minimize(g_loss, var_list=g_vars)
194 | 
195 |     tf.initialize_all_variables().run()
196 | 
197 |     start_time = time.time()
198 |     counter = 0
199 | 
200 |     reals, noises = read_images(c_dim)
201 | 
202 |     sample_images = reals[0:batch_size]
203 |     sample_z = noises[0:batch_size]
204 | 
205 |     model_name = "DCGAN.model"
206 |     model_dir = "%s_%s_%s" % (FLAGS.dataset, FLAGS.batch_size, FLAGS.image_size)
207 |     checkpoint_dir = os.path.join('./checkpoint', model_dir)
208 |     if not os.path.exists(checkpoint_dir):
209 |         os.makedirs(checkpoint_dir)
210 | 
211 |     for epoch in range(FLAGS.epoch):
212 | 
213 |         data = glob(os.path.join("./data", FLAGS.dataset, "real_images", "*.png"))
214 |         num_batch = len(data) // FLAGS.batch_size
215 | 
216 |         print 'num_batch', num_batch
217 | 
218 |         for idx in range(0, num_batch):
219 |             
220 |             batch_images = reals[idx*FLAGS.batch_size:(idx+1)*FLAGS.batch_size]
221 |             batch_z = noises[idx*FLAGS.batch_size:(idx+1)*FLAGS.batch_size]
222 |             
223 |             #update 
224 |             out1 = sess.run([d_optim], feed_dict={real_images: batch_images, noise_images: batch_z})
225 | 
226 |             #update G
227 |             out2 = sess.run([g_optim], feed_dict={noise_images:batch_z})
228 | 
229 |             errD_fake = d_loss_fake.eval({noise_images: batch_z})
230 |             errD_real = d_loss_real.eval({real_images:batch_images})
231 |             errG = g_loss.eval({noise_images: batch_z})
232 | 
233 |             counter += 1
234 |             print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" % (epoch, 
235 |                     idx, num_batch, time.time() - start_time, 
236 |                     errD_fake+errD_real, errG))
237 | 
238 |             if np.mod(counter, 100) == 1:
239 |                 samples, loss1, loss2 = sess.run([G, d_loss, 
240 |                         g_loss], feed_dict={noise_images: sample_z,
241 |                         real_images: sample_images})
242 |                 save_images(sample_z, [10, 10], './{}/noise_{:02d}_{:04d}.png'.format('./sample', epoch, idx))
243 |                 save_images(samples, [10, 10], './{}/denoise_{:02d}_{:04d}.png'.format('./sample', epoch, idx))
244 |                 save_images(sample_images, [10, 10], './{}/train_{:02d}_{:04d}.png'.format('./sample', epoch, idx))
245 |                 print("[Sample] d_loss: %.8f, g_loss: %.8f" % (loss1, loss2))
246 | 
247 |             if np.mod(counter, 500) == 2:
248 |                 saver.save(sess, os.path.join(checkpoint_dir, model_name), global_step=counter)
249 |                 
250 | #G, g_loss, d_loss, d_vars, g_vars, saver = build_model(FLAGS.image_size, FLAGS.batch_size)
251 | with tf.Session() as sess:
252 |     #train(sess, G, d_loss, d_vars, g_loss, g_vars, saver)
253 |     train(sess, FLAGS.image_size, FLAGS.batch_size, c_dim=FLAGS.c_dim)
254 |     
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 


--------------------------------------------------------------------------------
/denoise_dcgan/read_stl10.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import os, sys, tarfile, urllib
 4 | import numpy as np
 5 | import Image
 6 | 
 7 | height = 96
 8 | width = 96
 9 | 
10 | 
11 | data_path = './data/stl10_binary/train_X.bin'
12 | 
13 | f = open(data_path, 'rb')
14 | 
15 | everything = np.fromfile(f, dtype=np.uint8)
16 | images = np.reshape(everything, (-1, 3, 96, 96))
17 | 
18 | images = np.transpose(images, (0, 3, 2, 1))
19 | 
20 | print images.shape
21 | 
22 | mean = 0
23 | sigma = 100
24 | 
25 | for i in range(len(images)):
26 |     new_img = Image.fromarray(images[i], 'RGB')
27 |     new_img.save('./data/stl10_binary/real_images/'+str(i)+'.png')
28 | 
29 |     gauss = np.random.normal(mean, sigma, (height*width)).reshape(height, width)
30 | 
31 |     noisy = images[i].astype(np.float32)
32 |     noisy[:,:,0] = noisy[:,:,0] + gauss
33 |     noisy[:,:,1] = noisy[:,:,1] + gauss
34 |     noisy[:,:,2] = noisy[:,:,2] + gauss
35 | 
36 |     noisy = noisy - np.min(noisy)
37 |     noisy = noisy / np.max(noisy)
38 |     noisy = (noisy*255).astype(np.uint8)
39 |     
40 |     new_img = Image.fromarray(noisy, 'RGB')
41 |     new_img.save('./data/stl10_binary/noise_images/'+str(i)+'.png')
42 | 
43 | 
44 |     
45 | 
46 |     
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/denoise_dcgan/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Some codes from https://github.com/Newmu/dcgan_code
  3 | """
  4 | from __future__ import division
  5 | import math
  6 | import json
  7 | import random
  8 | import pprint
  9 | import scipy.misc
 10 | import numpy as np
 11 | from time import gmtime, strftime
 12 | 
 13 | pp = pprint.PrettyPrinter()
 14 | 
 15 | get_stddev = lambda x, k_h, k_w: 1/math.sqrt(k_w*k_h*x.get_shape()[-1])
 16 | 
 17 | def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False):
 18 |     return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w)
 19 | 
 20 | def save_images(images, size, image_path):
 21 |     return imsave(inverse_transform(images), size, image_path)
 22 | 
 23 | def imread(path, is_grayscale = False):
 24 |     if (is_grayscale):
 25 |         return scipy.misc.imread(path, flatten = True).astype(np.float)
 26 |     else:
 27 |         return scipy.misc.imread(path).astype(np.float)
 28 | 
 29 | def merge_images(images, size):
 30 |     return inverse_transform(images)
 31 | 
 32 | def merge(images, size):
 33 |     h, w = images.shape[1], images.shape[2]
 34 |     img = np.zeros((h * size[0], w * size[1], 3))
 35 |     for idx, image in enumerate(images):
 36 |         i = idx % size[1]
 37 |         j = idx // size[1]
 38 |         img[j*h:j*h+h, i*w:i*w+w, :] = image
 39 | 
 40 |     return img
 41 | 
 42 | def imsave(images, size, path):
 43 |     return scipy.misc.imsave(path, merge(images, size))
 44 | 
 45 | def center_crop(x, crop_h, crop_w=None, resize_w=64):
 46 |     if crop_w is None:
 47 |         crop_w = crop_h
 48 |     h, w = x.shape[:2]
 49 |     j = int(round((h - crop_h)/2.))
 50 |     i = int(round((w - crop_w)/2.))
 51 |     return scipy.misc.imresize(x[j:j+crop_h, i:i+crop_w],
 52 |                                [resize_w, resize_w])
 53 | 
 54 | def transform(image, npx=64, is_crop=True, resize_w=64):
 55 |     # npx : # of pixels width/height of image
 56 |     if is_crop:
 57 |         cropped_image = center_crop(image, npx, resize_w=resize_w)
 58 |     else:
 59 |         cropped_image = image
 60 |     return np.array(cropped_image)/127.5 - 1.
 61 | 
 62 | def inverse_transform(images):
 63 |     return (images+1.)/2.
 64 | 
 65 | 
 66 | def to_json(output_path, *layers):
 67 |     with open(output_path, "w") as layer_f:
 68 |         lines = ""
 69 |         for w, b, bn in layers:
 70 |             layer_idx = w.name.split('/')[0].split('h')[1]
 71 | 
 72 |             B = b.eval()
 73 | 
 74 |             if "lin/" in w.name:
 75 |                 W = w.eval()
 76 |                 depth = W.shape[1]
 77 |             else:
 78 |                 W = np.rollaxis(w.eval(), 2, 0)
 79 |                 depth = W.shape[0]
 80 | 
 81 |             biases = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(B)]}
 82 |             if bn != None:
 83 |                 gamma = bn.gamma.eval()
 84 |                 beta = bn.beta.eval()
 85 | 
 86 |                 gamma = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(gamma)]}
 87 |                 beta = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(beta)]}
 88 |             else:
 89 |                 gamma = {"sy": 1, "sx": 1, "depth": 0, "w": []}
 90 |                 beta = {"sy": 1, "sx": 1, "depth": 0, "w": []}
 91 | 
 92 |             if "lin/" in w.name:
 93 |                 fs = []
 94 |                 for w in W.T:
 95 |                     fs.append({"sy": 1, "sx": 1, "depth": W.shape[0], "w": ['%.2f' % elem for elem in list(w)]})
 96 | 
 97 |                 lines += """
 98 |                     var layer_%s = {
 99 |                         "layer_type": "fc", 
100 |                         "sy": 1, "sx": 1, 
101 |                         "out_sx": 1, "out_sy": 1,
102 |                         "stride": 1, "pad": 0,
103 |                         "out_depth": %s, "in_depth": %s,
104 |                         "biases": %s,
105 |                         "gamma": %s,
106 |                         "beta": %s,
107 |                         "filters": %s
108 |                     };""" % (layer_idx.split('_')[0], W.shape[1], W.shape[0], biases, gamma, beta, fs)
109 |             else:
110 |                 fs = []
111 |                 for w_ in W:
112 |                     fs.append({"sy": 5, "sx": 5, "depth": W.shape[3], "w": ['%.2f' % elem for elem in list(w_.flatten())]})
113 | 
114 |                 lines += """
115 |                     var layer_%s = {
116 |                         "layer_type": "deconv", 
117 |                         "sy": 5, "sx": 5,
118 |                         "out_sx": %s, "out_sy": %s,
119 |                         "stride": 2, "pad": 1,
120 |                         "out_depth": %s, "in_depth": %s,
121 |                         "biases": %s,
122 |                         "gamma": %s,
123 |                         "beta": %s,
124 |                         "filters": %s
125 |                     };""" % (layer_idx, 2**(int(layer_idx)+2), 2**(int(layer_idx)+2),
126 |                              W.shape[0], W.shape[3], biases, gamma, beta, fs)
127 |         layer_f.write(" ".join(lines.replace("'","").split()))
128 | 
129 | def make_gif(images, fname, duration=2, true_image=False):
130 |   import moviepy.editor as mpy
131 | 
132 |   def make_frame(t):
133 |     try:
134 |       x = images[int(len(images)/duration*t)]
135 |     except:
136 |       x = images[-1]
137 | 
138 |     if true_image:
139 |       return x.astype(np.uint8)
140 |     else:
141 |       return ((x+1)/2*255).astype(np.uint8)
142 | 
143 |   clip = mpy.VideoClip(make_frame, duration=duration)
144 |   clip.write_gif(fname, fps = len(images) / duration)
145 | 
146 | def visualize(sess, dcgan, config, option):
147 |   if option == 0:
148 |     z_sample = np.random.uniform(-0.5, 0.5, size=(config.batch_size, dcgan.z_dim))
149 |     samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
150 |     save_images(samples, [8, 8], './samples/test_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime()))
151 |   elif option == 1:
152 |     values = np.arange(0, 1, 1./config.batch_size)
153 |     for idx in xrange(100):
154 |       print(" [*] %d" % idx)
155 |       z_sample = np.zeros([config.batch_size, dcgan.z_dim])
156 |       for kdx, z in enumerate(z_sample):
157 |         z[idx] = values[kdx]
158 | 
159 |       samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
160 |       save_images(samples, [8, 8], './samples/test_arange_%s.png' % (idx))
161 |   elif option == 2:
162 |     values = np.arange(0, 1, 1./config.batch_size)
163 |     for idx in [random.randint(0, 99) for _ in xrange(100)]:
164 |       print(" [*] %d" % idx)
165 |       z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim))
166 |       z_sample = np.tile(z, (config.batch_size, 1))
167 |       #z_sample = np.zeros([config.batch_size, dcgan.z_dim])
168 |       for kdx, z in enumerate(z_sample):
169 |         z[idx] = values[kdx]
170 | 
171 |       samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
172 |       make_gif(samples, './samples/test_gif_%s.gif' % (idx))
173 |   elif option == 3:
174 |     values = np.arange(0, 1, 1./config.batch_size)
175 |     for idx in xrange(100):
176 |       print(" [*] %d" % idx)
177 |       z_sample = np.zeros([config.batch_size, dcgan.z_dim])
178 |       for kdx, z in enumerate(z_sample):
179 |         z[idx] = values[kdx]
180 | 
181 |       samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
182 |       make_gif(samples, './samples/test_gif_%s.gif' % (idx))
183 |   elif option == 4:
184 |     image_set = []
185 |     values = np.arange(0, 1, 1./config.batch_size)
186 | 
187 |     for idx in xrange(100):
188 |       print(" [*] %d" % idx)
189 |       z_sample = np.zeros([config.batch_size, dcgan.z_dim])
190 |       for kdx, z in enumerate(z_sample): z[idx] = values[kdx]
191 | 
192 |       image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}))
193 |       make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx))
194 | 
195 |     new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) \
196 |         for idx in range(64) + range(63, -1, -1)]
197 |     make_gif(new_image_set, './samples/test_gif_merged.gif', duration=8)
198 | 


--------------------------------------------------------------------------------
/dl/Makefile:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | GCC = gcc
 3 | CC = g++ -std=c++0x                                                                    
 4 | NVCC = nvcc
 5 | CCFLAGS = -c -pg
 6 | NVCCFLAGS = -g -pg -O3 -c
 7 | PTXFLAGES = --machine 64
 8 | 
 9 | LIB = -L/usr/local/cuda/lib64 -lcuda -lcudart  -lcublas -lm
10 | INCLUDES = -I./include
11 | 
12 | BUILD_DIR = ./bin
13 | OBJ_DIR = ./obj
14 | SRCS_DIR = ./src
15 | INCLUDES_DIR = ./include
16 | SRCS_TARGET_DIR = ./main_src
17 | 
18 | CU_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.cuh")
19 | CXX_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.h")
20 | HXX_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.hpp")
21 | 
22 | #HXX对应的cpp,cu，然后将它们排除掉，不进行编译
23 | HXX_SRCS = $(subst $(INCLUDES_DIR), $(SRCS_DIR), ${HXX_INCLUDES:.hpp=.cpp})
24 | HXX_SRCS += $(subst $(INCLUDES_DIR), $(SRCS_DIR), ${HXX_INCLUDES:.hpp=.cu})
25 | CU_SRCS = $(filter-out $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cu"))
26 | CXX_SRCS = $(filter-out $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cpp"))
27 | CU_HPP_SRCS = $(filter $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cu"))
28 | CXX_HPP_SRCS = $(filter $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cpp"))
29 | 
30 | #生成的链接文件
31 | CXX_OBJS += $(subst $(SRCS_DIR), $(OBJ_DIR), ${CXX_SRCS:.cpp=.o})
32 | CU_OBJS += $(subst $(SRCS_DIR), $(OBJ_DIR), ${CU_SRCS:.cu=.o})
33 | 
34 | TARGET ?= main
35 | MULTI_PROCESS ?= 1
36 | MULTI_MECHINE ?= 0
37 | OPEN_MPI ?= 0
38 | NUM_PROCESS ?= 2
39 | BUILD_TARGET = $(BUILD_DIR)/$(TARGET)
40 | SRCS_TARGET = $(SRCS_TARGET_DIR)/$(TARGET).cu
41 | OBJ_TARGET = $(OBJ_DIR)/$(TARGET).o
42 | 
43 | #print: $(CXX_SRCS) $(CU_SRCS) 
44 | #	echo $(HXX_SRCS)
45 | #	echo $(CXX_SRCS)
46 | #	echo $(CU_SRCS)
47 | 
48 | $(OBJ_DIR)/%.o: $(SRCS_DIR)/%.cpp
49 | 	$(CC) $(CCFLAGS) $^ $(INCLUDES) -o $@ 
50 | 	
51 | $(OBJ_DIR)/%.o: $(SRCS_DIR)/%.cu
52 | 	$(NVCC) $(CCFLAGS) $^ $(INCLUDES) -o $@
53 | 
54 | $(BUILD_TARGET): $(CXX_OBJS) $(CU_OBJS) $(SRCS_TARGET) $(CU_HPP_SRCS) $(CXX_HPP_SRCS)
55 | 	$(NVCC) $(NVCCFLAGS) $(SRCS_TARGET) $(INCLUDES) -o $(OBJ_TARGET)
56 | 	$(NVCC) -o $(BUILD_TARGET) $(OBJ_TARGET) $(CXX_OBJS) $(CU_OBJS) $(LIB) $(INCLUDES)
57 | 	
58 | cleanall:
59 | 	rm -rf $(OBJ_DIR)/*.o  
60 | 


--------------------------------------------------------------------------------
/dl/include/.data.hpp.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.data.hpp.swp


--------------------------------------------------------------------------------
/dl/include/.matrix.hpp.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.matrix.hpp.swp


--------------------------------------------------------------------------------
/dl/include/.param.h.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.param.h.swp


--------------------------------------------------------------------------------
/dl/include/convnet.hpp:
--------------------------------------------------------------------------------
 1 | /// 
 2 | /// \file convnet.hpp
 3 | /// @brief
 4 | 
 5 | #ifndef CONVNET_H_
 6 | #define CONVNET_H_
 7 | 
 8 | #include <iostream>
 9 | #include <cudnn.h>
10 | #include "layer.hpp"
11 | 
12 | 
13 | template <typename Dtype>
14 | class ConvNet : public TrainLayer<Dtype>{
15 | 
16 | private:
17 | 
18 | 	Matrix<Dtype>* unfold_dE_db_tmp;
19 | 	Matrix<Dtype>* dE_db_tmp;
20 | 	Matrix<Dtype>* padded_x;
21 | 	Matrix<Dtype>* unfold_x;
22 | 
23 | 	Matrix<Dtype>* unranged_dE_dx;
24 | 	Matrix<Dtype>* unranged_dE_dw;
25 | 	int _filt_pixs;
26 | 	int _conv_pixs;
27 | 	int _padded_in_pixs;
28 | 	int _in_pixs;
29 | 	int _box_in_pixs;
30 | 	int _num_box;
31 | 	
32 | 	ConvParam* _cp;
33 | 
34 | public:
35 | 	ConvNet(ConvParam* cp);
36 | 	~ConvNet();
37 | 
38 | 	void initCuda();
39 | 	void computeOutput(Matrix<Dtype>* x);
40 | 	void computeDerivsOfPars(Matrix<Dtype>* x);
41 | 	void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
42 | 	
43 | };
44 | 
45 | #include "../src/convnet.cu"
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/dl/include/data.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file data.hpp
 3 | ///
 4 | #ifndef DATA_HPP_
 5 | #define DATA_HPP_
 6 | 
 7 | #include <iostream>
 8 | #include <vector>
 9 | #include <stdio.h>
10 | 
11 | template <typename Dtype>
12 | class Data {
13 | 
14 | public:
15 | 	Data() {}
16 | 	virtual ~Data() {}
17 | 
18 | 	void copyFromHost(Dtype* data_value, const int data_len);
19 | 	void copyFromDevice(Data<Dtype>* dev_data);
20 | 	void copyToHost(Dtype* data_value, const int data_len);
21 | 	void copyToDevice(Data<Dtype>* dev_data);
22 | 
23 | 	void zeros();
24 | 
25 | 	inline Dtype* getDevData() const {
26 | 		return _data_value;
27 | 	}
28 | 
29 | 
30 | protected:
31 | 	//数据形状不固定，由子类来定
32 | 	std::vector<int> _shape;
33 | 	Dtype* _data_value;
34 | 	bool _is_own_data;
35 | 	int _amount;
36 | };
37 | 
38 | #include "../src/data.cu"
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/dl/include/dropout_layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file dropout_layer.cuh
 3 | /// @brief 实现了对输入每一个点求dropout
 4 | 
 5 | #ifndef DROPOUT_LAYER_H_
 6 | #define DROPOUT_LAYER_H_
 7 | 
 8 | #include <iostream>
 9 | #include <curand.h>
10 | #include "layer.hpp"
11 | 
12 | template <typename Dtype>
13 | class DropoutLayer : public Layer<Dtype> {
14 | 
15 | public:
16 | 	
17 | 	DropoutLayer(Param* fcp);
18 | 	~DropoutLayer();
19 | 
20 | 	void initCuda();
21 | 	void computeOutput(Matrix<Dtype>* x);
22 | 	void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
23 | 
24 | private:
25 | 	Param* _p;
26 | 	Matrix<int> *_drop_record;  ///>记录该点是否被丢弃
27 | 	Matrix<curandState> *_drop_rand_probs; ///>记录该点被丢弃的概率，与0.5比较
28 | 	bool _is_set_up;  ///>随机数初始化
29 | };
30 | 
31 | 
32 | #include "../src/dropout_layer.cu"
33 | #endif
34 | 


--------------------------------------------------------------------------------
/dl/include/inner_product_layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file inner_product_layer.cuh
 3 | /// @brief 实现了inner product
 4 | 
 5 | #ifndef INNER_PRODUCT_LAYER_CUH_
 6 | #define INNER_PRODUCT_LAYER_CUH_
 7 | 
 8 | #include <iostream>
 9 | #include "layer.hpp"
10 | #include "layer_kernel.cuh"
11 | 
12 | template <typename Dtype>
13 | class InnerProductLayer : public TrainLayer<Dtype> {
14 | 
15 | public:
16 | 	
17 | 	InnerProductLayer(InnerParam* fcp);
18 | 	~InnerProductLayer();
19 | 
20 | 	void initCuda();
21 | 	void computeOutput(Matrix<Dtype>* x);
22 | 	void computeDerivsOfPars(Matrix<Dtype>* x);
23 | 	void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
24 | 
25 | private:
26 | 	InnerParam* _fcp;
27 | 	Matrix<Dtype>* data_T;
28 | 	Matrix<Dtype>* w_T;
29 | };
30 | 
31 | #include "../src/inner_product_layer.cu"
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/dl/include/json/json-forwards.h:
--------------------------------------------------------------------------------
  1 | /// Json-cpp amalgated forward header (http://jsoncpp.sourceforge.net/).
  2 | /// It is intended to be used with #include "json/json-forwards.h"
  3 | /// This header provides forward declaration for all JsonCpp types.
  4 | 
  5 | // //////////////////////////////////////////////////////////////////////
  6 | // Beginning of content of file: LICENSE
  7 | // //////////////////////////////////////////////////////////////////////
  8 | 
  9 | /*
 10 | The JsonCpp library's source code, including accompanying documentation, 
 11 | tests and demonstration applications, are licensed under the following
 12 | conditions...
 13 | 
 14 | The author (Baptiste Lepilleur) explicitly disclaims copyright in all 
 15 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 
 16 | this software is released into the Public Domain.
 17 | 
 18 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
 19 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur, and is
 20 | released under the terms of the MIT License (see below).
 21 | 
 22 | In jurisdictions which recognize Public Domain property, the user of this 
 23 | software may choose to accept it either as 1) Public Domain, 2) under the 
 24 | conditions of the MIT License (see below), or 3) under the terms of dual 
 25 | Public Domain/MIT License conditions described here, as they choose.
 26 | 
 27 | The MIT License is about as close to Public Domain as a license can get, and is
 28 | described in clear, concise terms at:
 29 | 
 30 |    http://en.wikipedia.org/wiki/MIT_License
 31 |    
 32 | The full text of the MIT License follows:
 33 | 
 34 | ========================================================================
 35 | Copyright (c) 2007-2010 Baptiste Lepilleur
 36 | 
 37 | Permission is hereby granted, free of charge, to any person
 38 | obtaining a copy of this software and associated documentation
 39 | files (the "Software"), to deal in the Software without
 40 | restriction, including without limitation the rights to use, copy,
 41 | modify, merge, publish, distribute, sublicense, and/or sell copies
 42 | of the Software, and to permit persons to whom the Software is
 43 | furnished to do so, subject to the following conditions:
 44 | 
 45 | The above copyright notice and this permission notice shall be
 46 | included in all copies or substantial portions of the Software.
 47 | 
 48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 49 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 50 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 51 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 52 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 53 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 54 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 55 | SOFTWARE.
 56 | ========================================================================
 57 | (END LICENSE TEXT)
 58 | 
 59 | The MIT license is compatible with both the GPL and commercial
 60 | software, affording one all of the rights of Public Domain with the
 61 | minor nuisance of being required to keep the above copyright notice
 62 | and license text in the source code. Note also that by accepting the
 63 | Public Domain "license" you can re-license your copy using whatever
 64 | license you like.
 65 | 
 66 | */
 67 | 
 68 | // //////////////////////////////////////////////////////////////////////
 69 | // End of content of file: LICENSE
 70 | // //////////////////////////////////////////////////////////////////////
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | #ifndef JSON_FORWARD_AMALGATED_H_INCLUDED
 77 | # define JSON_FORWARD_AMALGATED_H_INCLUDED
 78 | /// If defined, indicates that the source file is amalgated
 79 | /// to prevent private header inclusion.
 80 | #define JSON_IS_AMALGAMATION
 81 | 
 82 | // //////////////////////////////////////////////////////////////////////
 83 | // Beginning of content of file: include/json/config.h
 84 | // //////////////////////////////////////////////////////////////////////
 85 | 
 86 | // Copyright 2007-2010 Baptiste Lepilleur
 87 | // Distributed under MIT license, or public domain if desired and
 88 | // recognized in your jurisdiction.
 89 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
 90 | 
 91 | #ifndef JSON_CONFIG_H_INCLUDED
 92 | #define JSON_CONFIG_H_INCLUDED
 93 | 
 94 | /// If defined, indicates that json library is embedded in CppTL library.
 95 | //# define JSON_IN_CPPTL 1
 96 | 
 97 | /// If defined, indicates that json may leverage CppTL library
 98 | //#  define JSON_USE_CPPTL 1
 99 | /// If defined, indicates that cpptl vector based map should be used instead of
100 | /// std::map
101 | /// as Value container.
102 | //#  define JSON_USE_CPPTL_SMALLMAP 1
103 | 
104 | // If non-zero, the library uses exceptions to report bad input instead of C
105 | // assertion macros. The default is to use exceptions.
106 | #ifndef JSON_USE_EXCEPTION
107 | #define JSON_USE_EXCEPTION 1
108 | #endif
109 | 
110 | /// If defined, indicates that the source file is amalgated
111 | /// to prevent private header inclusion.
112 | /// Remarks: it is automatically defined in the generated amalgated header.
113 | // #define JSON_IS_AMALGAMATION
114 | 
115 | #ifdef JSON_IN_CPPTL
116 | #include <cpptl/config.h>
117 | #ifndef JSON_USE_CPPTL
118 | #define JSON_USE_CPPTL 1
119 | #endif
120 | #endif
121 | 
122 | #ifdef JSON_IN_CPPTL
123 | #define JSON_API CPPTL_API
124 | #elif defined(JSON_DLL_BUILD)
125 | #if defined(_MSC_VER)
126 | #define JSON_API __declspec(dllexport)
127 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
128 | #endif // if defined(_MSC_VER)
129 | #elif defined(JSON_DLL)
130 | #if defined(_MSC_VER)
131 | #define JSON_API __declspec(dllimport)
132 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING
133 | #endif // if defined(_MSC_VER)
134 | #endif // ifdef JSON_IN_CPPTL
135 | #if !defined(JSON_API)
136 | #define JSON_API
137 | #endif
138 | 
139 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for
140 | // integer
141 | // Storages, and 64 bits integer support is disabled.
142 | // #define JSON_NO_INT64 1
143 | 
144 | #if defined(_MSC_VER) && _MSC_VER <= 1200 // MSVC 6
145 | // Microsoft Visual Studio 6 only support conversion from __int64 to double
146 | // (no conversion from unsigned __int64).
147 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1
148 | // Disable warning 4786 for VS6 caused by STL (identifier was truncated to '255'
149 | // characters in the debug information)
150 | // All projects I've ever seen with VS6 were using this globally (not bothering
151 | // with pragma push/pop).
152 | #pragma warning(disable : 4786)
153 | #endif // if defined(_MSC_VER)  &&  _MSC_VER < 1200 // MSVC 6
154 | 
155 | #if defined(_MSC_VER) && _MSC_VER >= 1500 // MSVC 2008
156 | /// Indicates that the following function is deprecated.
157 | #define JSONCPP_DEPRECATED(message) __declspec(deprecated(message))
158 | #elif defined(__clang__) && defined(__has_feature)
159 | #if __has_feature(attribute_deprecated_with_message)
160 | #define JSONCPP_DEPRECATED(message)  __attribute__ ((deprecated(message)))
161 | #endif
162 | #elif defined(__GNUC__) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
163 | #define JSONCPP_DEPRECATED(message)  __attribute__ ((deprecated(message)))
164 | #elif defined(__GNUC__) &&  (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
165 | #define JSONCPP_DEPRECATED(message)  __attribute__((__deprecated__))
166 | #endif
167 | 
168 | #if !defined(JSONCPP_DEPRECATED)
169 | #define JSONCPP_DEPRECATED(message)
170 | #endif // if !defined(JSONCPP_DEPRECATED)
171 | 
172 | namespace Json {
173 | typedef int Int;
174 | typedef unsigned int UInt;
175 | #if defined(JSON_NO_INT64)
176 | typedef int LargestInt;
177 | typedef unsigned int LargestUInt;
178 | #undef JSON_HAS_INT64
179 | #else                 // if defined(JSON_NO_INT64)
180 | // For Microsoft Visual use specific types as long long is not supported
181 | #if defined(_MSC_VER) // Microsoft Visual Studio
182 | typedef __int64 Int64;
183 | typedef unsigned __int64 UInt64;
184 | #else                 // if defined(_MSC_VER) // Other platforms, use long long
185 | typedef long long int Int64;
186 | typedef unsigned long long int UInt64;
187 | #endif // if defined(_MSC_VER)
188 | typedef Int64 LargestInt;
189 | typedef UInt64 LargestUInt;
190 | #define JSON_HAS_INT64
191 | #endif // if defined(JSON_NO_INT64)
192 | } // end namespace Json
193 | 
194 | #endif // JSON_CONFIG_H_INCLUDED
195 | 
196 | // //////////////////////////////////////////////////////////////////////
197 | // End of content of file: include/json/config.h
198 | // //////////////////////////////////////////////////////////////////////
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | // //////////////////////////////////////////////////////////////////////
206 | // Beginning of content of file: include/json/forwards.h
207 | // //////////////////////////////////////////////////////////////////////
208 | 
209 | // Copyright 2007-2010 Baptiste Lepilleur
210 | // Distributed under MIT license, or public domain if desired and
211 | // recognized in your jurisdiction.
212 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
213 | 
214 | #ifndef JSON_FORWARDS_H_INCLUDED
215 | #define JSON_FORWARDS_H_INCLUDED
216 | 
217 | #if !defined(JSON_IS_AMALGAMATION)
218 | #include "config.h"
219 | #endif // if !defined(JSON_IS_AMALGAMATION)
220 | 
221 | namespace Json {
222 | 
223 | // writer.h
224 | class FastWriter;
225 | class StyledWriter;
226 | 
227 | // reader.h
228 | class Reader;
229 | 
230 | // features.h
231 | class Features;
232 | 
233 | // value.h
234 | typedef unsigned int ArrayIndex;
235 | class StaticString;
236 | class Path;
237 | class PathArgument;
238 | class Value;
239 | class ValueIteratorBase;
240 | class ValueIterator;
241 | class ValueConstIterator;
242 | 
243 | } // namespace Json
244 | 
245 | #endif // JSON_FORWARDS_H_INCLUDED
246 | 
247 | // //////////////////////////////////////////////////////////////////////
248 | // End of content of file: include/json/forwards.h
249 | // //////////////////////////////////////////////////////////////////////
250 | 
251 | 
252 | 
253 | 
254 | 
255 | #endif //ifndef JSON_FORWARD_AMALGATED_H_INCLUDED
256 | 


--------------------------------------------------------------------------------
/dl/include/layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file layer.hpp
 3 | ///
 4 | #ifndef LAYER_HPP_
 5 | #define LAYER_HPP_
 6 | 
 7 | #include <cuda_runtime.h>
 8 | #include "utils.cuh"
 9 | #include "param.h"
10 | #include "matrix.hpp"
11 | 
12 | template <typename Dtype>
13 | class Layer {
14 | 
15 | public:
16 | 	Layer() {}
17 | 	virtual ~Layer() {}
18 | 
19 | 	virtual void initCuda() {}
20 | 	virtual void computeOutput(Matrix<Dtype>* x) {}
21 | 
22 | 	virtual void computeDerivsOfInput(Matrix<Dtype>* dE_dx) {}
23 | 
24 | 	inline Matrix<Dtype>* getY() {
25 | 		return _y;
26 | 	}   
27 | 	inline Matrix<Dtype>* getDEDY() {
28 | 		return _dE_dy;
29 | 	}
30 | 
31 | protected:
32 | 	cublasHandle_t handle;
33 | 	Matrix<Dtype>* _y;    ///>每一层的输出
34 | 	Matrix<Dtype>* _dE_dy;   ///>每层输出的导数
35 | };
36 | 
37 | template <typename Dtype>
38 | class TrainLayer : public Layer<Dtype> {
39 | 
40 | public:
41 | 	TrainLayer(TrainParam* tp){
42 | 		_tp = tp;
43 | 	}
44 | 	TrainLayer() {}
45 | 	virtual ~TrainLayer() {}
46 | 
47 | 	virtual void computeDerivsOfPars(Matrix<Dtype>* x) {}
48 | 
49 | 	void updatePars(bool isShow = false) {
50 | 		if(isShow == true){
51 | 			_w->showValue("w");
52 | 			_dE_dw->showValue("dEdw");
53 | 			cout << _tp->getMomentum() << ":" << _tp->getWeightDecay() << ":" \
54 | 				<< _tp->getWLR() << ":" << _tp->getBiasLR() << endl; 
55 | 		}
56 | 		_w_inc->addSum(_w, _dE_dw, _tp->getMomentum(), -_tp->getWeightDecay(), \
57 | 			            -_tp->getWLR() / _tp->getMinibatchSize());
58 | 		_w->add(_w_inc, 1, 1);
59 | 
60 | 		_bias_inc->add(_dE_db, _tp->getMomentum(), \
61 | 				-_tp->getBiasLR() / _tp->getMinibatchSize());
62 | 		_bias->add(_bias_inc, 1, 1);
63 | 	}
64 | 	inline Matrix<Dtype>* getW() {
65 | 		return _w;
66 | 	}
67 | 	inline Matrix<Dtype>* getBias() {
68 | 		return _bias;
69 | 	}
70 | 
71 | protected:
72 | 	Matrix<Dtype>* _w;
73 | 	Matrix<Dtype>* _bias;
74 | 	Matrix<Dtype>* _w_inc;
75 | 	Matrix<Dtype>* _bias_inc;
76 | 	Matrix<Dtype>* _dE_dw;
77 | 	Matrix<Dtype>* _dE_db;
78 | 
79 | 	TrainParam* _tp;
80 | };
81 | 
82 | 
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/dl/include/layer_kernel.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * filename: layer_kernel.cuh
  3 |  */
  4 | #ifndef LAYER_KERNEL_CUH_
  5 | #define LAYER_KERNEL_CUH_
  6 | 
  7 | #include "param.h"
  8 | 
  9 | // CUDA: grid stride looping
 10 | #define CUDA_KERNEL_LOOP(i, n) \
 11 | 	for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 12 | 			i < (n); \
 13 | 			i += blockDim.x * gridDim.x)
 14 | 
 15 | 
 16 | __global__ void forward_convolution(const float* x, const float* w, \
 17 | 		const float* bias, float* targets, \
 18 | 		const int in_height, const int in_width, const int in_channel, \
 19 | 		const int out_height, const int out_width, \
 20 | 		const int filter_height, const int filter_width, const int filter_channel, \
 21 | 		const int stride_height, const int stride_width, \
 22 | 		const int box_num_height, const int box_num_width, \
 23 | 		const int box_in_height, const int box_in_width, \
 24 | 		const int box_out_height, const int box_out_width);
 25 | 
 26 | 
 27 | __global__ void backward_convolution(const float* dE_dy, const float *w, \
 28 | 		float* targets, \
 29 | 		const int box_in_height, const int box_in_width, \
 30 | 		const int box_out_height, const int box_out_width, \
 31 | 		const int out_channel, const int in_channel, \
 32 | 		const int out_height, const int out_width, \
 33 | 		const int filter_height, const int filter_width, \
 34 | 		const int stride_height, const int stride_width, \
 35 | 		const int box_num_height, const int box_num_width);
 36 | 
 37 | 
 38 | __global__ void compute_convolution_derivs(const float* dE_dy, const float *x, \
 39 | 		float* dE_dw, const int box_out_height, const int box_out_width, \
 40 | 		const int out_channel, const int in_channel, const int in_height, \
 41 | 		const int in_width, const int out_height, const int out_width, \
 42 | 		const int filter_height, const int filter_width, \
 43 | 		const int stride_height, const int stride_width, \
 44 | 		const int box_num_height, const int box_num_width);
 45 | 
 46 | 
 47 | __global__ void compact_dervis_w(const float* unranged_dE_dw, \
 48 | 		float* dE_dw, const int filter_height, const int filter_width, \
 49 | 		const int box_num_height, const int box_num_width, \
 50 | 		const int minibatch_size, const int in_channel, const int out_channel);
 51 | 
 52 | __global__ void compute_derivs_of_bias(const float* dE_dy, float* targets, \
 53 | 		const int out_height, const int out_width, const int out_channel, \
 54 | 		const int box_out_height, const int box_out_width, \
 55 | 		const int box_num_height, const int box_num_width);
 56 | 
 57 | 
 58 | __global__ void pad_to_ori(float* dst, const float* src, const int num_kernel, \
 59 | 		const int img_height, const int img_width, \
 60 | 		const int padded_img_height, const int padded_img_width, \
 61 | 		const int img_channel);
 62 | 
 63 | __global__ void ori_to_padding(const float* src, float* dst, const int num_kernel, \
 64 | 		const int img_height, const int img_width, const int padded_img_height, \
 65 | 		const int padded_img_width, const int img_channel);
 66 | 
 67 | __global__ void max_pooling(const float* convOutputs, float* targets, int* maxPoolPos, \
 68 | 		const int in_height, const int in_width, \
 69 | 		const int in_channels, const int out_height, const int out_width, \
 70 | 		const int filter_height, const int filter_width, \
 71 | 		const int stride_height, const int stride_width,  \
 72 | 		const int box_out_height, const int box_out_width, \
 73 | 		const int box_num_height, const int box_num_width);
 74 | 
 75 | __global__ void avg_pooling(const float* convOutputs, float* targets, \
 76 | 		const int in_height, const int in_width, \
 77 | 		const int in_channels, const int out_height, const int out_width, \
 78 | 		const int filter_height, const int filter_width, \
 79 | 		const int stride_height, const int stride_width,  \
 80 | 		const int box_out_height, const int box_out_width, \
 81 | 		const int box_num_height, const int box_num_width);
 82 | 
 83 | __global__ void compute_dE_dy_max(const float* dE_dy_i, float* targets, \
 84 | 		int* maxPoolPos, \
 85 | 		const int box_in_height, const int box_in_width, \
 86 | 		const int box_out_height, const int box_out_width, \
 87 | 		const int num_filters, \
 88 | 		const int out_height, const int out_width, \
 89 | 		const int filter_height, const int filter_width, \
 90 | 		const int stride_height, const int stride_width,  \
 91 | 		const int box_num_height, const int box_num_width);
 92 | 
 93 | __global__ void compute_dE_dy_avg(const float* dE_dy_i, float* targets, \
 94 | 		const int box_in_height, const int box_in_width, \
 95 | 		const int box_out_height, const int box_out_width, \
 96 | 		const int num_filters, \
 97 | 		const int out_height, const int out_width, \
 98 | 		const int filter_height, const int filter_width, \
 99 | 		const int stride_height, const int stride_width,  \
100 | 		const int box_num_height, const int box_num_width);
101 | 
102 | __global__ void compute_dE_dy(const float* y_j, const int* labels, \
103 | 		float* dE_dy_j, const int width);
104 | 
105 | 
106 | __global__ void compactOverlap(float* src, float* targets, \
107 | 		const int in_height, const int in_width, const int in_channel, \
108 | 		const int overlap_height, const int overlap_width, \
109 | 		const int box_in_height, const int box_in_width, \
110 | 		const int box_num_height, const int box_num_width);
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/dl/include/load_layer.hpp:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file load_layer.hpp
  3 | /// \brief 从文件中下载数据
  4 | ///
  5 | 
  6 | #ifndef LOAD_LAYER_HPP_
  7 | #define LOAD_LAYER_HPP_
  8 | 
  9 | #include<iostream>
 10 | #include<fstream>
 11 | #include<vector>
 12 | #include<map>
 13 | #include<stdlib.h>
 14 | #include"utils.cuh"
 15 | 
 16 | #define MAX_OBJECT_NUM 24
 17 | 
 18 | using namespace std;
 19 | 
 20 | /// \brief 执行下载数据行为的类
 21 | ///
 22 | template <typename Dtype>
 23 | class LoadLayer {
 24 | 
 25 | public:
 26 | 
 27 | 	/// \brief 默认构造函数表示个数信息需要从文件中读取，而不是传递进来的
 28 | 	LoadLayer() {}
 29 | 	LoadLayer(const int num_train, const int num_valid, \
 30 | 		const int num_test, const int img_size, const int img_channel);
 31 | 	virtual ~LoadLayer();
 32 | 
 33 | 	virtual void loadBinary(string filenmae, Dtype* pixel_ptr, \
 34 | 		int* label_ptr, int batch_idx) {}
 35 | 
 36 | 	void meanOneImg(Dtype* pixel_ptr, int process_len);
 37 | 	void stdOneImg(Dtype* pixel_ptr, int process_len);
 38 | 
 39 | 	virtual void loadTrainOneBatch(int batch_idx, \
 40 | 				Dtype* &mini_pixel, int* &mini_label) {}
 41 | 	virtual void loadValidOneBatch(int batch_idx, \
 42 | 				 Dtype* &mini_pixel, int* &mini_label) {}
 43 | 	virtual void loadTestOneBatch(int batch_idx, \
 44 | 				Dtype* &mini_pixel, int *&mini_label) {}
 45 | 
 46 | 	int getNumTrain(){
 47 | 		return _num_train;
 48 | 	}
 49 | 	int getNumValid(){
 50 | 		return _num_valid;
 51 | 	}
 52 | 	int getNumTest(){
 53 | 		return _num_test;
 54 | 	}
 55 | 	int getImgSize(){
 56 | 		return _img_size;
 57 | 	}
 58 | 	int getImgChannel(){
 59 | 		return _img_channel;
 60 | 	}
 61 | 
 62 | 	Dtype* getTrainPixel(){
 63 | 		return _train_pixel;
 64 | 	}
 65 | 	int* getTrainLabel(){
 66 | 		return _train_label;
 67 | 	}
 68 | 	Dtype* getValidPixel(){
 69 | 		return _valid_pixel;
 70 | 	}
 71 | 	int* getValidLabel(){
 72 | 		return _valid_label;
 73 | 	}
 74 | 	Dtype* getTestPixel(){
 75 | 		return _test_pixel;
 76 | 	}
 77 | 	int* getTestLabel(){
 78 | 		return _test_label;
 79 | 	}
 80 | 
 81 | protected:
 82 | 	long long _num_train;
 83 | 	int _num_valid;
 84 | 	int _num_test;
 85 | 	int _img_size;
 86 | 	int _img_height;
 87 | 	int _img_width;
 88 | 	int _img_channel;
 89 | 	int _img_sqrt;
 90 | 
 91 | 	///返回cpu数据
 92 | 	int* _train_label;
 93 | 	int* _valid_label;
 94 | 	int* _test_label;
 95 | 	Dtype* _train_pixel;
 96 | 	Dtype* _valid_pixel;
 97 | 	Dtype* _test_pixel;
 98 | 	int* _train_label_ptr;
 99 | 	int* _valid_label_ptr;
100 | 	int* _test_label_ptr;
101 | 	Dtype* _train_pixel_ptr;
102 | 	Dtype* _valid_pixel_ptr;
103 | 	Dtype* _test_pixel_ptr;
104 | 
105 | 	bool _is_base_alloc;
106 | 
107 | };
108 | 
109 | 
110 | template <typename Dtype>
111 | class LoadCifar10 : public LoadLayer<Dtype> {
112 | 
113 | 	int _minibatch_size;
114 | public: 
115 | 	LoadCifar10(const int minibatch_size);
116 | 
117 | 	~LoadCifar10() {}
118 | 
119 | 	using LoadLayer<Dtype>::loadBinary;
120 | 	void loadBinary(string filename, Dtype* &pixel_ptr, int* &label_ptr);
121 | 	void loadTrainOneBatch(int batch_idx, 
122 | 				Dtype* &mini_pixel, int* &mini_label);
123 | 	void loadValidOneBatch(int batch_idx, 
124 | 				 Dtype* &mini_pixel, int* &mini_label);
125 | 
126 | };
127 | 
128 | 
129 | #include "../src/load_layer.cpp"
130 | 
131 | #endif
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/dl/include/logistic.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file logistic.cuh
 3 | /// @brief 实现了softmax
 4 | 
 5 | #ifndef LOGISTIC_CUH_
 6 | #define LOGISTIC_CUH_
 7 | 
 8 | #include <iostream>
 9 | #include "layer.hpp"
10 | #include "layer_kernel.cuh"
11 | 
12 | template <typename Dtype>
13 | class Logistic : public Layer<Dtype> {
14 | 
15 | public:
16 | 	Logistic(FullConnectParam* fcp);
17 | 	~Logistic();
18 | 	
19 | 	void initCuda();
20 | 	void computeOutput(Matrix<Dtype>* x);
21 | 	double computeError(Matrix<int>* labels, int& num_error);
22 | 	using Layer<Dtype>::computeDerivsOfInput;
23 | 	void computeDerivsOfInput(Matrix<Dtype>* x, Matrix<int>* labels);
24 | 
25 | 	inline Matrix<int>* getResultRecord(){
26 | 		_d_record->copyFromHost(_h_record, this->_y->getNumCols() * this->_y->getNumCols());
27 | 		return _d_record;
28 | 	}
29 | 	inline void setRecordToZero(){
30 | 		memset(_h_record, 0, sizeof(int) * this->_y->getNumCols() * this->_y->getNumCols());
31 | 	}
32 | 
33 | 
34 | private:
35 | 	FullConnectParam* _fcp;
36 | 	int* h_labels;
37 | 	Dtype* y_CPU;
38 | 	Dtype* correct_probs;
39 | 	Matrix<Dtype>* d_max_pos_of_out;
40 | 	Dtype* h_max_pos_of_out;
41 | 
42 | 	Matrix<int>* _d_record;  ///>这个变量用来存储最后分类的结果，10*10的矩阵
43 | 	int* _h_record;
44 | 
45 | 	
46 | };
47 | 
48 | #include "../src/logistic.cu"
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/dl/include/matrix.hpp:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file matrix.hpp
  3 | /// \brief 继承数据类，拥有矩阵的特性
  4 | ///
  5 | 
  6 | 
  7 | #ifndef Matrix_H_
  8 | #define Matrix_H_
  9 | 
 10 | #include <iostream>
 11 | #include <string>
 12 | #include <curand_kernel.h>
 13 | #include "cublas_v2.h"
 14 | #include "data.hpp"
 15 | 
 16 | #define CUDA_ERROR_CHECK
 17 | 
 18 | #define cudaCheckError()  __cudaCheckError(__FILE__, __LINE__)
 19 | 
 20 | inline void __cudaCheckError(const char *file, const int line){
 21 | #ifdef CUDA_ERROR_CHECK
 22 | 	cudaError err = cudaGetLastError();
 23 | 	if(cudaSuccess != err){
 24 | 		fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", \
 25 | 			file, line, cudaGetErrorString(err));
 26 | 		exit(-1);
 27 | 	}
 28 | #endif
 29 | }
 30 | 
 31 | using namespace std;
 32 | 
 33 | /// \brief 实现了矩阵类，数据将以矩阵形式保存
 34 | ///
 35 | template<typename Dtype>
 36 | class Matrix : public Data<Dtype> {
 37 | private:
 38 |     static cudaDeviceProp deviceProps;  ///< 查询gpu硬件规格
 39 | 
 40 | public:
 41 | 
 42 |     /// 运算的枚举
 43 |     ///
 44 |     ///	该枚举定义了对类中成员执行何种运算
 45 |     enum FUNCTIONS {
 46 |         LOG, EXP, RECIPROCAL, SOFTMAX, SIGMOID, DROPOUT
 47 |     };
 48 | 
 49 |     Matrix(int numRows, int numCols);
 50 | 
 51 |     Matrix(const Matrix *like, bool copy);
 52 | 
 53 |     Matrix(const Matrix *like);
 54 | 
 55 |     ~Matrix();
 56 |     /// \brief 初始化类中成员，为行列赋值
 57 | 	
 58 |     void _init(int numRows, int numCols);
 59 | 
 60 |     /// \brief 判断两个对象维数是否相等
 61 |     inline bool isSameDims(const Matrix<Dtype> *m) const {
 62 |         return m->getNumRows() == this->_shape[0] && m->getNumCols() == this->_shape[1];
 63 |     }
 64 | 
 65 |     inline int getNumRows() const {
 66 |         return this->_shape[0];
 67 |     }
 68 | 
 69 |     inline int getNumCols() const {
 70 |         return this->_shape[1];
 71 |     }
 72 | 
 73 |     inline int getNumEles() const {
 74 |         return this->_amount;
 75 |     }
 76 | 
 77 |     inline void changePtr(const int add) {
 78 |         this->_data_value = this->_data_value + add;
 79 |     }
 80 | 
 81 |     inline void changePtrFromStart(Dtype *start, const int add) {
 82 |         this->_data_value = start + add;
 83 |     }
 84 | 
 85 |     inline void setPtr(Dtype *start) {
 86 |         this->_data_value = start;
 87 |     }
 88 | 
 89 |     /// \brief 求矩阵转置
 90 |     void getTranspose(Matrix<Dtype> *target);
 91 | 
 92 |     /// \brief 矩阵右乘
 93 |     /// \param[in] b
 94 |     /// \param[out] target 两个矩阵相乘输出
 95 |     void rightMult(Matrix<Dtype> *b, float scale_AB, Matrix<Dtype> *target, \
 96 |                 cublasHandle_t &handle);
 97 | 
 98 |     /// \brief 将每一行累加起来生成一列，列个数保持不变
 99 |     /// \param[out] target
100 |     void sumRow(Matrix<Dtype> *target);
101 | 
102 |     void sumCol(Matrix<Dtype> *target);
103 | 
104 |     /// \brief 用一个标量减去整个矩阵
105 |     /// \param[out] target 假如没有这个参数，那么计算结果保存在调用矩阵中
106 |     void subtractFromScalar(float scalar, Matrix<Dtype> *target);
107 | 
108 |     void subtractFromScalar(float scalar);
109 | 
110 |     /// \brief 矩阵间点乘
111 |     ///
112 |     /// 点乘结果保存在调用矩阵中
113 |     /// \param[in] b 用来与调用矩阵进行点乘
114 |     /// \param[out] target 保存矩阵与列向量点乘，若没有这个参数，则保存在调用矩阵中
115 |     void eltWiseMult(Matrix<Dtype> *b, Matrix<Dtype> *target);
116 | 
117 |     void eltWiseMult(Matrix<Dtype> *b);
118 | 
119 |     /// \brief 矩阵每一列与列向量相加
120 |     /// \param[in] vec 用来加法的列向量
121 |     /// \param[out] target 保存矩阵与列向量相加结果，若没有这个参数，则保存在调用矩阵中
122 |     void addColVector(Matrix<Dtype> *vec, float scale_vec, Matrix<Dtype> *target);
123 | 
124 |     void addColVector(Matrix<Dtype> *vec);
125 | 
126 |     void addRowVector(Matrix<Dtype> *vec, float scale_vec, Matrix<Dtype> *target);
127 | 
128 |     void addRowVector(Matrix<Dtype> *vec);
129 | 
130 |     /// \brief 对矩阵每一个值执行某种运算
131 |     ///
132 |     /// 针对矩阵每一个值，可以执行FUNCTIONS枚举量中任意一种运算
133 |     /// \param[out] target 保存执行运算后的值，没有该参数，则保存在调用矩阵中
134 |     void apply(FUNCTIONS f, Matrix<Dtype> *target);
135 | 
136 |     void apply(FUNCTIONS f);
137 | 
138 | 	void applyRelu(Matrix<Dtype>* target, Matrix<int>* record, bool direction = true);
139 | 
140 | 	void applyDropout(Matrix<Dtype> *target, Matrix<int>* record, \
141 | 		Matrix<curandState>* rand_probs, bool is_set_up);
142 | 
143 |     /// \brief 矩阵间点加
144 |     ///
145 |     /// 将输入的三个矩阵点加，然后保存在调用矩阵中
146 |     /// \param[in] b 用来与调用矩阵进行点加
147 |     /// \param[in] c 点加
148 |     void addSum(Matrix<Dtype> *b, Matrix<Dtype> *c, float scale_This, \
149 |                 float scale_B, float scale_C);
150 | 
151 |     void add(Matrix<Dtype> *b, float scale_This, float scale);
152 | 
153 |     /// \brief 矩阵一行最大值
154 |     /// \param[out] max_vec 保存每一行的最大值的位置
155 |     void maxPosInRow(Matrix<Dtype> *max_vec);
156 | 
157 | 
158 |     /// \brief 打印矩阵
159 |     /// \param[in] name 矩阵的名称
160 |     void showValue(string name);
161 | 
162 |     /// \brief 给矩阵重新赋值
163 |     ///
164 |     /// 输入是float时，矩阵全部赋值为这个值。输入是int时，矩阵每个位置对这个int取余
165 |     void reValue(float value);
166 | 
167 | 	void reValue(int value, bool is_div = false);
168 | 
169 |     Dtype computeNorm(int len);
170 | 
171 |     void cropMatToNew(Matrix<Dtype> *tar, const int row_start, const int cropped_height, \
172 |             const int col_start, const int cropped_width);
173 | 
174 |     Dtype getPosValue(int pos);
175 |     Dtype getFirstPosValue();
176 | 
177 | 	void savePars(string filename);
178 | 	void readPars(string filename);
179 | 
180 |     void subedByUnitMat();
181 |     void subPortion(Matrix<Dtype>* b, const int b_row, \
182 | 			const int b_col);
183 |     void setValueAt(const int height_idx, \
184 | 		const int width_idx, const Dtype value);
185 | };
186 | 
187 | #include "../src/matrix.cu"
188 | 
189 | #endif
190 | 


--------------------------------------------------------------------------------
/dl/include/matrix_kernel.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file matrix_kernel.cuh
 3 | /// \brief matrix类的kernel函数
 4 | 
 5 | #ifndef MATRIX_KERNEL_H_
 6 | #define MATRIX_KERNEL_H_
 7 | 
 8 | #include <curand_kernel.h>
 9 | 
10 | #define NUM_BLOCKS_MAX                      65535
11 | 
12 | #define ADD_BLOCK_SIZE						16
13 | #define COPY_BLOCK_SIZE                     16
14 | 
15 | #define DIVUP(a, b)                     (((a) + (b) - 1) / (b))
16 | 
17 | template <typename Dtype>
18 | __global__ void kTranspose(Dtype* srcData, Dtype* dstData, \
19 | 		const int width, const int height);
20 | 
21 | /// \brief gpu实现addRowVector
22 | ///
23 | /// \param[in] width 传递矩阵的长宽
24 | template <typename Dtype>
25 | __global__ void kAddRowVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \
26 | 		const int width, const int height, float scaleVec); 
27 | 
28 | template <typename Dtype>
29 | __global__ void kSubtractFromScalar(Dtype* gData, float scalar, Dtype* target, \
30 | 		const int width, const int height);
31 | 
32 | template <typename Dtype>
33 | __global__ void kSoftmax(Dtype* gData, Dtype* target, const int width, \
34 | 		const int height);
35 | 
36 | template <typename Dtype>
37 | __global__ void kReciprocal(Dtype* gData, Dtype* target, const int width, \
38 | 		const int height);
39 | 
40 | template <typename Dtype>
41 | __global__ void kLog(Dtype* gData, Dtype* target, const int width, \
42 | 		const int height);
43 | 
44 | template <typename Dtype>
45 | __global__ void kSigmoid(Dtype* gData, Dtype* target, const int width, \
46 | 		const int height);
47 | 
48 | template <typename Dtype>
49 | __global__ void kSetUpCurand(curandState *state, const int width, const int height);
50 | 
51 | template <typename Dtype>
52 | __global__ void kDropout(Dtype* gData, Dtype* target, int* record, \
53 | 		curandState *state, const int width, const int height);
54 | 
55 | template <typename Dtype>
56 | __global__ void kRelu(Dtype* gData, Dtype* target, int* record, const int length);
57 | 
58 | template <typename Dtype>
59 | __global__ void kReluBack(Dtype* gData, Dtype* target, int* record, const int length);
60 | 
61 | template <typename Dtype>
62 | __global__ void kDumbSumCols(Dtype* mat, Dtype* vec, const int width, \
63 | 		const int height); 
64 | 
65 | template <typename Dtype>
66 | __global__ void kDumbMaxPosInRow(Dtype* mat, Dtype* vec, const int width, \
67 | 		const int height); 
68 | 
69 | template <typename Dtype>
70 | __global__ void kMult(Dtype* matA, Dtype* matB, Dtype* tgtMat, \
71 | 		const int width, const int height);
72 | 
73 | template <typename Dtype>
74 | __global__ void kAdd(Dtype* matA, Dtype* matB, Dtype* tgtMat, float scaleA,  \
75 | 		float scaleB, const int width, const int height);
76 | 
77 | //dst = (src + [added_value, 0, ..., 0]) * scale
78 | template <typename Dtype>
79 | __global__ void kComputeHouseholderVec(const Dtype* src, Dtype* dst, \
80 | 		Dtype added_value, Dtype scale, const int len);
81 | 
82 | template <typename Dtype>
83 | __global__ void kSubedByUnitMat(Dtype* matA, Dtype* tgtMat, \
84 | 		const int width, const int height);
85 | 
86 | //B只占A的一个部分，减去这个部分
87 | template <typename Dtype>
88 | __global__ void kSubPortion(Dtype* matA, Dtype* matB, Dtype* tgtMat, \
89 | 		const int a_width, const int a_height, \
90 | 		const int b_width, const int b_height);
91 | 
92 | #include "../src/matrix_kernel.cu"
93 | 
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/dl/include/model_component.hpp:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file model_component.hpp
  3 | /// \brief 继承数据类，拥有矩阵的特性
  4 | ///
  5 | 
  6 | 
  7 | #ifndef MODELCOMPONENT_H_
  8 | #define MODELCOMPONENT_H_
  9 | 
 10 | #include <vector>
 11 | #include <map>
 12 | #include "matrix.hpp"
 13 | #include "param.h"
 14 | #include "layer.hpp"
 15 | 
 16 | using namespace std;
 17 | 
 18 | 
 19 | /// \brief 实现了网络的组件，例如由几个卷积层几个全链接层构成
 20 | ///
 21 | template<typename Dtype>
 22 | class ModelComponent {
 23 | 
 24 | template<typename D>
 25 | friend class TrainModel;
 26 | 
 27 | template<typename F>
 28 | friend class TrainClassification;
 29 | 
 30 | private:
 31 | 
 32 |     long long _num_train; ///>model的参数
 33 |     int _num_valid;
 34 |     int _minibatch_size;
 35 |     int _num_train_batch;
 36 |     int _num_valid_batch;
 37 |     int _num_epoch;
 38 |     int _num_layers;
 39 |     int _num_need_train_layers;
 40 |     int _img_height;
 41 |     int _img_width;
 42 |     int _img_channel;
 43 |     int _one_img_len;  ///>输入的一张图片的长度
 44 | 
 45 |     vector< Layer<Dtype>* > _layers;    ///>保存每个层的指针
 46 |     vector< Layer<Dtype>* > _layers_needed_train;
 47 |     vector<Param*> _layers_param;  ///>保存每一层的参数
 48 |     vector<Param*> _layers_need_train_param;
 49 |     vector<int> _w_len;   ///>需要训练的层的权重长度，用来进程间传递数据
 50 |     vector<int> _bias_len;
 51 |     vector<float> _w_init_gauss;
 52 |     vector< Matrix<Dtype>* > _w; ///>保存需要训练层的权重指针
 53 |     vector< Matrix<Dtype>* > _bias;
 54 | 
 55 |     vector< Matrix<Dtype>* > _y;
 56 |     vector< Matrix<Dtype>* > _dE_dy;
 57 |     vector< Matrix<Dtype>* > _y_needed_train;
 58 | 
 59 |     Matrix<Dtype>* _mini_data;  ///> 保存像素值
 60 |     Matrix<int>* _mini_label;   ///> 保存物体分类的类别
 61 | 
 62 |     map<string, LayerType> _string_map_layertype;
 63 | 	map<string, PoolingType> _string_map_pooltype;
 64 | 
 65 | public:
 66 | 
 67 |     ModelComponent();
 68 |     ~ModelComponent() {}
 69 | 
 70 | 
 71 |     void setImgHeight(const int img_height){
 72 |         _img_height = img_height;
 73 |     }
 74 |     void setImgWidth(const int img_width){
 75 |         _img_width = img_width;
 76 |     }
 77 |     void setImgChannel(const int img_channel){
 78 |         _img_channel = img_channel;
 79 |     }
 80 |     void setOneImgLen(const int one_img_len){
 81 |         _one_img_len = one_img_len;
 82 |     }
 83 |     void setNumLayers(const int num_layers){
 84 |         _num_layers = num_layers;
 85 |     }
 86 |     void setNumNeedTrainLayers(const int num_need_train_layers){
 87 |         _num_need_train_layers= num_need_train_layers;
 88 |     }
 89 |     void setNumTrain(const long long num_train){
 90 |         _num_train = num_train;
 91 |     }
 92 |     void setNumValid(const int num_valid){
 93 |         _num_valid = num_valid;
 94 |     }
 95 |     void setMinibatchSize(const int minibatch_size){
 96 |         _minibatch_size = minibatch_size;
 97 |     }
 98 |     void setNumTrainBatch(){
 99 |         _num_train_batch = _num_train / _minibatch_size;
100 |     }
101 |     void setNumValidBatch(){
102 |         _num_valid_batch = _num_valid / _minibatch_size;
103 |     }
104 |     void setEpoch(const int num_epoch){
105 |         _num_epoch = num_epoch;
106 |     }
107 |     void setLayers(Layer<Dtype>* layer){
108 |         _layers.push_back(layer);
109 |     }
110 |     void setNeedTrainLayers(Layer<Dtype>* need_train_layer){
111 |         _layers.push_back(need_train_layer);
112 |     }
113 |     void setLayersParam(Param* param){
114 |         _layers_param.push_back(param);
115 |     }
116 |     void setNeedTrainLayersParam(Param* param){
117 |         _layers_need_train_param.push_back(param);
118 |     }
119 |     void setWLen(int w_len){
120 |         _w_len.push_back(w_len);
121 |     }
122 |     void setBiasLen(int bias_len){
123 |         _bias_len.push_back(bias_len);
124 |     }
125 |     void setW(Matrix<Dtype> *w){
126 |         _w.push_back(w);
127 |     }
128 |     void setBias(Matrix<Dtype> *bias){
129 |         _bias.push_back(bias);
130 |     }
131 |     void setY(Matrix<Dtype> *y){
132 |         _y.push_back(y);
133 |     }
134 |     void setDEDY(Matrix<Dtype> *dE_dy) {
135 |         _dE_dy.push_back(dE_dy);
136 |     }
137 | 
138 |     int getImgHeight(){
139 |         return _img_height;
140 |     }
141 |     int getImgWidth(){
142 |         return _img_width;
143 |     }
144 |     int getImgChannel(){
145 |         return _img_channel;
146 |     }
147 |     int getOneImgLen(){
148 |         return _one_img_len;
149 |     }
150 |     int getNumLayers(){
151 |         return _num_layers;
152 |     }
153 |     int getNumNeedTrainLayers(){
154 |         return _num_need_train_layers;
155 |     }
156 |     long long getNumTrain(){
157 |         return _num_train;
158 |     }
159 |     int getNumValid(){
160 |         return _num_valid;
161 |     }
162 |     int getMinibatchSize(){
163 |         return _minibatch_size;
164 |     }
165 |     int getNumTrainBatch(){
166 |         return _num_train_batch;
167 |     }
168 |     int getNumValidBatch(){
169 |         return _num_valid_batch;
170 |     }
171 |     int getNumEpoch(){
172 |         return _num_epoch;
173 |     }
174 |     vector< Layer<Dtype>* > getLayers(){
175 |         return _layers;
176 |     }
177 |     vector< Layer<Dtype>* > getNeedTrainLayers(){
178 |         return _layers_needed_train;
179 |     }
180 |     vector<Param*> getLayersParam(){
181 |         return _layers_param;
182 |     }
183 |     vector<Param*> getNeedTrainLayersParam(){
184 |         return _layers_need_train_param;
185 |     }
186 |     vector<int> getWLen(){
187 |         return _w_len;
188 |     }
189 |     vector<int> getBiasLen(){
190 |         return _bias_len;
191 |     }
192 |     vector< Matrix<Dtype>* > getW(){
193 |         return _w;
194 |     }
195 |     vector< Matrix<Dtype>* > getBias(){
196 |         return _bias;
197 |     }
198 |     vector< Matrix<Dtype>* > getY(){
199 |         return _y;
200 |     }
201 |     vector< Matrix<Dtype>* > getDEDY(){
202 |         return _dE_dy;
203 |     }
204 | 
205 | };
206 | 
207 | #include "../src/model_component.cpp"
208 | 
209 | #endif
210 | 


--------------------------------------------------------------------------------
/dl/include/param.h:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file param.h
  3 | ///
  4 | #ifndef PARAM_H_
  5 | #define PARAM_H_
  6 | 
  7 | #include <string>
  8 | #include <iostream>
  9 | #include <cmath>
 10 | 
 11 | using namespace std;
 12 | 
 13 | #define MAX_THREAD_SIZE 32
 14 | #define MAX_NUM_KERNEL 4096
 15 | #define MAX_NUM_THREAD 1024
 16 | 
 17 | typedef enum PARAM_CONNECT_TYPE {
 18 |     PARAM_CONNECT_TYPE_LOCAL = 0,
 19 |     PARAM_CONNECT_TYPE_FULL = 1
 20 | } ConnectType;
 21 | 
 22 | typedef enum POOLING_TYPE {
 23 | 	MAX_POOLING = 0,
 24 | 	AVG_POOLING = 1
 25 | } PoolingType;
 26 | 
 27 | typedef enum PARAM_TRAIN_TYPE {
 28 |     NOTNEED = 0,
 29 |     NEED = 1
 30 | } ParamTrainType;
 31 | 
 32 | typedef enum LAYER_TYPE {
 33 |     CONVOLUTION = 0,
 34 |     POOLING = 1,
 35 |     SIGMOID = 2,
 36 |     RECTIFIED = 3,
 37 |     INNERPRODUCT = 4,
 38 |     SOFTMAX = 5,
 39 |     DROPOUT = 6,
 40 | 	PREDICTOBJECT = 7,
 41 | 	RECOMMENDSUBSTITUE = 8,
 42 | 	RECOMMENDCOMPATIBLE = 9
 43 | } LayerType;
 44 | 
 45 | /// \brief 实现了每一层的参数
 46 | ///
 47 | class Param {
 48 | 
 49 | public:
 50 |     Param() { }
 51 | 
 52 |     virtual ~Param() { }
 53 | 
 54 |     Param(string name, LayerType layer_type) : \
 55 |                 _name(name), _layer_type(layer_type), \
 56 | 				_param_train_type(NOTNEED){}
 57 | 
 58 | 	virtual int getNumOut() {return 0;}
 59 | 	virtual int getOutChannel() {return 0;}
 60 | 	virtual int getOutWidth() {return 0;}	
 61 | 	virtual int getOutHeight() {return 0;}	
 62 | 
 63 |     inline int getMinibatchSize() {
 64 |         return _minibatch_size;
 65 |     }
 66 | 	inline string getName(){
 67 | 		return _name;
 68 | 	}
 69 |     inline ConnectType getConnectType() {
 70 |         return type;
 71 |     }
 72 |     ParamTrainType getParamTrainType(){
 73 |         return _param_train_type;
 74 |     }
 75 |     LayerType getLayerType(){
 76 |         return _layer_type;
 77 |     }
 78 |     virtual void printParam(){
 79 |         cout << "\n============"<< _name << "============" \
 80 |                 << "\nlayer_type: " << _layer_type;
 81 |     }
 82 | 	static void setMinibatchSize(const int minibatch_size){
 83 | 		_minibatch_size = minibatch_size;
 84 | 	}
 85 | 
 86 | protected:
 87 |     string _name;  ///> 实例化每一层的名字，用来区分不同的层
 88 |     static int _minibatch_size;
 89 |     ConnectType type;
 90 |     ParamTrainType _param_train_type;
 91 |     LayerType _layer_type;
 92 | };
 93 | 
 94 | /// \brief 实现了需要训练的层参数，主要为了改变权重和调节学习率
 95 | class TrainParam : public virtual Param {
 96 | public:
 97 |     TrainParam() { }
 98 | 
 99 |     virtual ~TrainParam() { }
100 | 
101 |     TrainParam(const float w_lr, const float b_lr, \
102 |             const float momentum, const float weight_decay, \
103 | 			const float w_gauss) \
104 | 		: _w_lr(w_lr), _b_lr(b_lr), _momentum(momentum), \
105 | 		_weight_decay(w_lr*weight_decay), _w_gauss(w_gauss){	
106 | 		this->_param_train_type = NEED;
107 | 	}
108 | 
109 |     inline void lrMultiScale(float lr_scale) {
110 |         _w_lr *= lr_scale;
111 |         _b_lr *= lr_scale;
112 | 		cout << _w_lr << ":" << _b_lr << endl;
113 |     }
114 |     inline void lrChangeTo(float new_w, float new_b) {
115 |         _w_lr = new_w;
116 |         _b_lr = new_b;
117 |     }
118 |     inline float getWLR() {
119 |         return _w_lr;
120 |     }
121 |     inline float getBiasLR() {
122 |         return _b_lr;
123 |     }
124 |     inline float getMomentum() {
125 |         return _momentum;
126 |     }
127 | 	inline float getWeightDecay() {
128 | 		return _weight_decay;
129 | 	}
130 | 	float getWGauss() {
131 | 		return _w_gauss;
132 | 	}
133 |     void printParam(){
134 |         cout << "\nw_lr: " << _w_lr \
135 | 				<< "\nb_lr: " << _b_lr \
136 | 				<< "\nmomentum: " << _momentum \
137 | 				<< "\nweight_decay: " << _weight_decay \
138 | 				<< "\nw_gauss: " << _w_gauss ;
139 |     }
140 | 
141 | protected:
142 |     float _w_lr;
143 |     float _b_lr;
144 |     float _momentum;
145 | 	float _weight_decay;
146 | 	float _w_gauss;
147 | 
148 | };
149 | 
150 | /// \brief 局部连接层的参数，以图片形式保存数据
151 | class LocalConnectParam : public virtual Param {
152 | public:
153 | 
154 |     LocalConnectParam() { }
155 | 
156 |     virtual ~LocalConnectParam() { }
157 | 
158 |     LocalConnectParam(LayerType layer_type, string name, const int in_height, \
159 | 		const int in_width, const int pad_height, const int pad_width, \
160 | 		const int stride_height, const int stride_width, \
161 | 		const int in_channel, \
162 | 		const int filter_height, const int filter_width, const int out_channel) \
163 | 		: _in_height(in_height), _in_width(in_width), _stride_height(stride_height), \
164 | 		_stride_width(stride_width), _in_channel(in_channel), \
165 | 		_pad_height(pad_height), _pad_width(pad_width), \
166 | 		_filter_height(filter_height), _filter_width(filter_width), \
167 | 		_out_channel(out_channel){
168 | 
169 |             this->_layer_type = layer_type;
170 | 			this->_name = name;
171 | 			this->type = PARAM_CONNECT_TYPE_LOCAL;
172 | 			_padded_in_height = in_height + 2 * pad_height;
173 | 			_padded_in_width = in_width + 2 * pad_width;
174 | 			_out_height = ceil(((_padded_in_height - filter_height)*1.0f) / stride_height) + 1;
175 | 			_out_width = ceil(((_padded_in_width - filter_width)*1.0f) / stride_width) + 1;
176 | 			_box_num_height = ceil((this->getOutHeight() - MAX_THREAD_SIZE) \
177 | 							* 1.0f / MAX_THREAD_SIZE) + 1;
178 | 			_box_num_width = ceil((this->getOutWidth() - MAX_THREAD_SIZE) \
179 | 							* 1.0f / MAX_THREAD_SIZE) + 1;
180 | 			_box_out_height = MAX_THREAD_SIZE > _out_height \
181 | 					? _out_height : MAX_THREAD_SIZE;
182 | 			_box_out_width = MAX_THREAD_SIZE > _out_width \
183 | 					? _out_width : MAX_THREAD_SIZE;
184 | 			_box_in_height = (_box_out_height - 1) * stride_height + filter_height;
185 | 			_box_in_width = (_box_out_width - 1) * stride_width + filter_width;
186 | 			
187 | 			int pow2Length = _out_height; 
188 | 			if(pow2Length & (pow2Length - 1)){
189 | 				while(pow2Length & (pow2Length - 1)){
190 | 					pow2Length &= pow2Length - 1;
191 | 				}
192 | 				pow2Length *= 2;
193 | 			}
194 | 			_thread_height = pow2Length > MAX_THREAD_SIZE \
195 | 							 ? MAX_THREAD_SIZE : pow2Length;
196 | 
197 | 			pow2Length = _out_width; 
198 | 			if(pow2Length & (pow2Length - 1)){
199 | 				while(pow2Length & (pow2Length - 1)){
200 | 					pow2Length &= pow2Length - 1;
201 | 				}
202 | 				pow2Length *= 2;
203 | 			}
204 | 			_thread_width = pow2Length > MAX_THREAD_SIZE \
205 | 							? MAX_THREAD_SIZE : pow2Length;
206 | 
207 | 			_overlap_height = _filter_height - stride_height;
208 | 			_overlap_width = _filter_width - stride_width;
209 | 
210 | 		}
211 | 
212 |     LocalConnectParam(LayerType layer_type, string name, \
213 | 		const int pad_height, const int pad_width, \
214 | 		const int stride_height, const int stride_width, \
215 | 		const int filter_height, const int filter_width, const int filter_channel, \
216 | 		LocalConnectParam* lc_par) \
217 | 		: _in_height(lc_par->getOutHeight()), _in_width(lc_par->getOutWidth()), \
218 | 		_stride_height(stride_height), _stride_width(stride_width), \
219 | 		_in_channel(lc_par->getOutChannel()), _pad_height(pad_height), \
220 | 		_filter_height(filter_height), _filter_width(filter_width) {
221 | 
222 |             this->_layer_type = layer_type;
223 | 			this->_name = name;
224 | 			if(filter_channel != 0)
225 | 				_out_channel = filter_channel;
226 | 			else
227 | 				_out_channel = _in_channel;
228 | 
229 | 			this->type = PARAM_CONNECT_TYPE_LOCAL;
230 | 
231 | 			_padded_in_height = _in_height + 2 * pad_height;
232 | 			_padded_in_width = _in_width + 2 * pad_height;
233 | 			_out_height = ceil(((_padded_in_height - filter_height)*1.0f) / stride_height) + 1;
234 | 			_out_width = ceil(((_padded_in_width - filter_width)*1.0f) / stride_width) + 1;
235 | 			_box_num_height = ceil((this->getOutHeight() - MAX_THREAD_SIZE) \
236 | 							* 1.0f / MAX_THREAD_SIZE) + 1;
237 | 			_box_num_width = ceil((this->getOutWidth() - MAX_THREAD_SIZE) \
238 | 							* 1.0f / MAX_THREAD_SIZE) + 1;
239 | 	
240 | 			_box_out_height = MAX_THREAD_SIZE > _out_height \
241 | 					? _out_height : MAX_THREAD_SIZE;
242 | 			_box_out_width = MAX_THREAD_SIZE > _out_width \
243 | 					? _out_width : MAX_THREAD_SIZE;
244 | 
245 | 			_box_in_height = (_box_out_height - 1) * stride_height + filter_height;
246 | 			_box_in_width = (_box_out_width - 1) * stride_width + filter_width;
247 | 			
248 | 			int pow2Length = _out_height; 
249 | 			if(pow2Length & (pow2Length - 1)){
250 | 				while(pow2Length & (pow2Length - 1)){
251 | 					pow2Length &= pow2Length - 1;
252 | 				}
253 | 				pow2Length *= 2;
254 | 			}
255 | 			_thread_height = pow2Length > MAX_THREAD_SIZE \
256 | 							 ? MAX_THREAD_SIZE : pow2Length;
257 | 
258 | 			pow2Length = _out_width; 
259 | 			if(pow2Length & (pow2Length - 1)){
260 | 				while(pow2Length & (pow2Length - 1)){
261 | 					pow2Length &= pow2Length - 1;
262 | 				}
263 | 				pow2Length *= 2;
264 | 			}
265 | 			_thread_width = pow2Length > MAX_THREAD_SIZE \
266 | 							? MAX_THREAD_SIZE : pow2Length;
267 | 			
268 | 			_overlap_height = _filter_height - stride_height;
269 | 			_overlap_width = _filter_width - stride_width;
270 | 
271 | 
272 | 		}
273 | 
274 |     inline int getInHeight() {
275 |         return _in_height;
276 |     }
277 |     inline int getInWidth() {
278 |         return _in_width;
279 |     }
280 |     inline int getInChannel() {
281 |         return _in_channel;
282 |     }
283 |     inline int getOutHeight() {
284 |         return _out_height;
285 |     }
286 |     inline int getOutWidth() {
287 |         return _out_width;
288 |     }
289 |     inline int getFilterHeight() {
290 |         return _filter_height;
291 |     }
292 |     inline int getFilterWidth() {
293 |         return _filter_width;
294 |     }
295 |     inline int getOutChannel() {
296 |         return _out_channel;
297 |     }
298 |     inline int getPaddedInHeight() {
299 |         return _padded_in_height;
300 |     }
301 |     inline int getPaddedInWidth() {
302 |         return _padded_in_width;
303 |     }
304 | 
305 |     inline int getStrideHeight(){
306 |         return _stride_height;
307 |     }
308 |     inline int getStrideWidth(){
309 |         return _stride_width;
310 |     }
311 |     inline int getPadHeight(){
312 |         return _pad_height;
313 |     }
314 |     inline int getPadWidth(){
315 |         return _pad_width;
316 |     }
317 | 	int getOverlapHeight(){
318 | 		return _overlap_height;
319 | 	}
320 | 	int getOverlapWidth(){
321 | 		return _overlap_width;
322 | 	}
323 | 	int getThreadHeight(){
324 | 		return _thread_height;
325 | 	}
326 | 	int getThreadWidth(){
327 | 		return _thread_width;
328 | 	}
329 |     void printParam(){
330 |         Param::printParam();
331 |         cout << "\nin_height: " << _in_height \
332 | 			<< "\nin_width: " << _in_width \
333 | 				<< "\nin_channel: " << _in_channel \
334 |                 << "\nfilter_height: " << _filter_height \
335 |                 << "\nfilter_width: " << _filter_width \
336 | 				<< "\nfilter_channel: " << _out_channel \
337 | 				<< "\npad_height: " << _pad_height \
338 | 				<< "\npad_width: " << _pad_width \
339 | 				<< "\nstride_height: " << _stride_height \
340 | 				<< "\nstride_width: " << _stride_width;
341 |     }
342 | 	inline int getBoxNumHeight(){
343 | 		return _box_num_height;
344 | 	}
345 | 	inline int getBoxNumWidth(){
346 | 		return _box_num_width;
347 | 	}
348 | 	inline int getBoxInHeight(){
349 | 		return _box_in_height;
350 | 	}
351 | 	inline int getBoxInWidth(){
352 | 		return _box_in_width;
353 | 	}
354 | 	inline int getBoxOutHeight(){
355 | 		return _box_out_height;
356 | 	}
357 | 	inline int getBoxOutWidth(){
358 | 		return _box_out_width;
359 | 	}
360 | 
361 | private:
362 |     int _in_height;
363 | 	int _in_width;
364 |     int _pad_height;
365 |     int _pad_width;
366 |     int _padded_in_height;
367 |     int _padded_in_width;
368 |     int _stride_height;
369 |     int _stride_width;
370 |     int _in_channel;
371 |     int _filter_height; ///>在卷积中是filter，在pooling中是pool
372 |     int _filter_width; ///>在卷积中是filter，在pooling中是pool
373 |     int _out_height;
374 |     int _out_width;
375 |     int _out_channel;
376 | 	int _box_in_height; ///>用来计算一个box输出的
377 | 	int _box_in_width; ///>用来计算一个box输出的卷积输入
378 | 	int _box_out_height; 
379 | 	int _box_out_width; 
380 | 	int _box_num_height;  ///>总的box个数的行 
381 | 	int _box_num_width;  ///>总的box个数的列 
382 | 	int _thread_height;
383 | 	int _thread_width;
384 | 	int _overlap_height;
385 | 	int _overlap_width;
386 | };
387 | 
388 | /// \brief 全连接层的参数，展开图片为一个矢量保存数据
389 | ///
390 | /// 可以针对每一个值做某种操作，例如Relu、sigmoid、tanh等，
391 | /// 此处不需要训练
392 | class FullConnectParam : public virtual Param {
393 | public:
394 |     FullConnectParam() { }
395 |     virtual ~FullConnectParam() { }
396 |     FullConnectParam(LayerType layer_type, string name, \
397 |         const int num_in, const int num_out) \
398 | 		: _num_in(num_in), _num_out(num_out) {
399 |             this->_layer_type = layer_type;
400 | 			this->_name = name;
401 | 			this->type = PARAM_CONNECT_TYPE_FULL;
402 | 		}
403 |     FullConnectParam(LayerType layer_type, string name, \
404 | 		const int num_out, Param* par){
405 |             this->_layer_type = layer_type;
406 | 			this->_name = name;
407 | 			this->type = PARAM_CONNECT_TYPE_FULL;
408 | 			
409 | 			///由传递进来的层类型决定计算方式
410 | 			ConnectType ct = par->getConnectType();
411 | 			if(ct == PARAM_CONNECT_TYPE_LOCAL)
412 | 				_num_in = par->getOutHeight()*par->getOutWidth()*par->getOutChannel(); 
413 | 			else if(ct == PARAM_CONNECT_TYPE_FULL)
414 | 				_num_in = par->getNumOut(); 
415 | 	
416 | 			if(num_out != 0)
417 | 				_num_out = num_out;
418 | 			else
419 | 				_num_out = _num_in;
420 | 		}
421 | 
422 | 
423 |     inline int getNumIn() {
424 |         return _num_in;
425 |     }
426 | 
427 |     inline int getNumOut() {
428 |         return _num_out;
429 |     }
430 |     void printParam(){
431 |         Param::printParam();
432 |         cout << "\nnum_in: " << _num_in \
433 | 				<< "\nnum_out: " << _num_out;
434 |     }
435 | 
436 | private:
437 |     int _num_in;
438 |     int _num_out;
439 | };
440 | 
441 | class ConvParam : public TrainParam, public LocalConnectParam {
442 | public:
443 |     ConvParam(){}
444 | 
445 |     ~ConvParam(){}
446 | 
447 |     ConvParam(const LayerType layer_type, const string name, \
448 |             const float w_lr, \
449 | 			const float b_lr, const float momentum, \
450 | 			const float weight_decay, const float w_gauss, \
451 |             const int in_height, const int in_width, \
452 |             const int pad_height, const int pad_width, \
453 | 			const int stride_height, \
454 | 			const int stride_width, const int in_channel, \
455 |             const int filter_height, const int filter_width, \
456 | 			const int filter_channel) \
457 |             : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), \
458 |               LocalConnectParam(layer_type, name, in_height, in_width, \
459 | 		            pad_height, pad_width, stride_height, stride_width, \
460 | 					in_channel, filter_height, \
461 | 					filter_width, filter_channel) {}
462 | 
463 |     ConvParam(const LayerType layer_type, const string name, const float w_lr, \
464 |             const float b_lr, const float momentum, \
465 | 			const float weight_decay, const float w_gauss, \
466 | 			const int pad_height, const int pad_width, \
467 |             const int stride_height, const int stride_width, const int filter_height, \
468 | 			const int filter_width, \
469 |             const int filter_channel, LocalConnectParam *lc_par) \
470 |             : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), \
471 |               LocalConnectParam(layer_type, name, pad_height, pad_width, stride_height, \
472 | 					  stride_width, \
473 | 		            filter_height, filter_width, filter_channel, lc_par)  {}
474 |     void printParam(){
475 |         LocalConnectParam::printParam();
476 |         TrainParam::printParam();
477 |     }
478 | };
479 | 
480 | class PoolParam : public LocalConnectParam {
481 | public:
482 | 		PoolParam() {}
483 | 		~PoolParam() {}
484 | 
485 |     	PoolParam(const LayerType layer_type, const string name, \
486 |             const int in_height, const int in_width, \
487 | 			const int pad_height, const int pad_width, \
488 | 			const int stride_height, const int stride_width, \
489 | 			const int in_channel, const int filter_height, \
490 | 			const int filter_width, \
491 | 			const int filter_channel, PoolingType p_type) 
492 |             :  LocalConnectParam(layer_type, name, in_height, in_width, \
493 | 					pad_height, pad_width, stride_height, stride_width, \
494 | 					in_channel, filter_height, \
495 | 					filter_width, filter_channel) , \
496 | 			_p_type(p_type) {}
497 | 
498 |     	PoolParam(const LayerType layer_type, const string name, \
499 |             const int pad_height, const int pad_width, \
500 | 			const int stride_height, const int stride_width, \
501 | 			const int filter_height, const int filter_width, \
502 | 			const int filter_channel, \
503 | 			LocalConnectParam* lc_par, PoolingType p_type) 
504 |             :  LocalConnectParam(layer_type, name, pad_height, \
505 | 					pad_width, stride_height, \
506 | 					stride_width, \
507 | 					filter_height, filter_width, \
508 | 					filter_channel, lc_par), _p_type(p_type){}
509 | 
510 | 		inline PoolingType getPoolType(){
511 | 			return _p_type;
512 | 		}
513 |         void printParam(){
514 |             LocalConnectParam::printParam();
515 |         }
516 | 
517 | 
518 | private:
519 | 	PoolingType _p_type;	
520 | };
521 | 
522 | 
523 | /// \brief 可以进行训练的全连接层
524 | class InnerParam : public TrainParam, public FullConnectParam {
525 | public:
526 |     InnerParam(){}
527 | 
528 |     ~InnerParam() {}
529 | 
530 |     InnerParam(const LayerType layer_type, const string name, \
531 | 		const float w_lr, const float b_lr, const float momentum, \
532 | 		const float weight_decay, const float w_gauss, \
533 | 		const int num_in, const int num_out) \
534 |         : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss),
535 |           FullConnectParam(layer_type, name, num_in, num_out){}
536 | 
537 |     InnerParam(const LayerType layer_type, const string name, \
538 |         const float w_lr, const float b_lr, \
539 |         const float momentum, const float weight_decay, \
540 | 		const float w_gauss, \
541 |         const int num_out, Param* par) \
542 |         : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss),  \
543 |           FullConnectParam(layer_type, name, num_out, par) {}
544 |     void printParam(){
545 |         FullConnectParam::printParam();
546 |         TrainParam::printParam();
547 |     }
548 | };
549 | 
550 | #endif
551 | 


--------------------------------------------------------------------------------
/dl/include/pooling_layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file pooling_layer.cuh
 3 | /// @brief 实现了pooling
 4 | 
 5 | #ifndef POOLING_LAYER_H_
 6 | #define POOLING_LAYER_H_
 7 | 
 8 | #include <iostream>
 9 | #include <cmath>
10 | #include "layer.hpp"
11 | #include "layer_kernel.cuh"
12 | 
13 | template <typename Dtype>
14 | class PoolingLayer : public Layer<Dtype> {
15 | 
16 | public:
17 |     PoolingLayer(PoolParam *lcp);
18 | 
19 |     ~PoolingLayer();
20 | 
21 |     void initCuda();
22 | 
23 |     void computeOutput(Matrix<Dtype>* x);
24 | 
25 |     void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
26 | 
27 | private:
28 |     Matrix<int>* _max_pos;
29 |     PoolParam* _lcp;
30 | 	Matrix<Dtype>* unranged_dE_dx;
31 | 	int _num_box;
32 | };
33 | 
34 | #include "../src/pooling_layer.cu"
35 | 
36 | #endif
37 | 
38 | 


--------------------------------------------------------------------------------
/dl/include/relu_layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file relu_layer.cuh
 3 | /// @brief 实现了对输入每一个点求relu
 4 | 
 5 | #ifndef RELU_LAYER_H_
 6 | #define RELU_LAYER_H_
 7 | 
 8 | #include <iostream>
 9 | #include "layer.hpp"
10 | 
11 | template <typename Dtype>
12 | class ReluLayer : public Layer<Dtype> {
13 | 
14 | public:
15 | 	
16 | 	ReluLayer(Param* fcp);
17 | 	~ReluLayer();
18 | 
19 | 	void initCuda();
20 | 	void computeOutput(Matrix<Dtype>* x);
21 | 	void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
22 | 
23 | private:
24 | 	Param* _p;
25 | 	Matrix<int> *_record;
26 | };
27 | 
28 | 
29 | #include "../src/relu_layer.cu"
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/dl/include/sigmoid_layer.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file sigmoid_layer.cuh
 3 | /// @brief 实现了对输入每一个点求sigmoid
 4 | 
 5 | #ifndef SIGMOID_LAYER_H_
 6 | #define SIGMOID_LAYER_H_
 7 | 
 8 | #include <iostream>
 9 | #include "layer.hpp"
10 | 
11 | template <typename Dtype>
12 | class SigmoidLayer : public Layer<Dtype> {
13 | 
14 | public:
15 | 	
16 | 	SigmoidLayer(Param* fcp);
17 | 	~SigmoidLayer();
18 | 
19 | 	void initCuda();
20 | 	void computeOutput(Matrix<Dtype>* x);
21 | 	void computeDerivsOfInput(Matrix<Dtype>* dE_dx);
22 | 
23 | private:
24 | 	Param* _fcp;
25 | };
26 | 
27 | 
28 | #include "../src/sigmoid_layer.cu"
29 | #endif
30 | 


--------------------------------------------------------------------------------
/dl/include/train_classification.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file train_classification.hpp
 3 | /// \brief 
 4 | ///
 5 | 
 6 | 
 7 | #ifndef TRAINCLASSIFICATION_H_
 8 | #define TRAINCLASSIFICATION_H_
 9 | 
10 | #include "train_model.hpp"
11 | 
12 | /// \brief
13 | ///
14 | template<typename Dtype>
15 | class TrainClassification : public TrainModel<Dtype> {
16 | private:
17 | 
18 | public:
19 |     TrainClassification(bool has_valid, bool is_test) \
20 | 		: TrainModel<Dtype>(has_valid, is_test) {}
21 |     ~TrainClassification() {}
22 | 
23 |     void createPixelAndLabel();
24 | 	void parseImgBinary(string train_file, string valid_file);
25 | 
26 | 	void forwardLastLayer();
27 | 	void backwardLastLayer();
28 | 	virtual void train();
29 | 	virtual void test() {}
30 | 
31 | };
32 | 
33 | #include "../src/train_classification.cpp"
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/dl/include/train_model.hpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file train_model.hpp
 3 | /// \brief 
 4 | ///
 5 | 
 6 | 
 7 | #ifndef TRAINMODEL_H_
 8 | #define TRAINMODEL_H_
 9 | 
10 | #include "model_component.hpp"
11 | #include "load_layer.hpp"
12 | 
13 | using namespace std;
14 | 
15 | /// \brief
16 | ///
17 | template<typename Dtype>
18 | class TrainModel {
19 | protected:
20 |     ModelComponent<Dtype> *_model_component;
21 |     LoadLayer<Dtype> *_load_layer;
22 |     float _likelihood;    ///>cost function的输出值
23 | 	int _error;    ///>分类的error个数
24 | 	//early stopping
25 | 	float _min_likelihood;       ///>early stopping所控制得到的最小cost
26 | 	vector<float> _strip_likelihood;  ///>用来控制early stopping
27 | 	int _min_epoch;
28 | 	int _min_error;
29 | 	int _num_strip;
30 | 	bool _is_stop;   ///>训练是否由于early stopping而中断
31 | 	bool _has_valid;
32 | 	bool _is_test;
33 | 	int _num_data_type;  //train是0，valid是1，test是2
34 | 
35 | public:
36 |     TrainModel(bool has_valid, bool is_test);
37 |     virtual ~TrainModel();
38 | 
39 |     void parseNetJson(string json_file);
40 | 
41 |     void createLayer();
42 |     void createYDEDY();
43 |     void createWBias();
44 | 
45 |     void initWeightByRandom();
46 |     void initWeightByFile(vector<string> w_file, vector<string> bias_file);
47 |     void forwardPropagate();
48 |     void backwardPropagate();
49 |     void computeAndUpdatePars();
50 | 
51 | 	virtual void forwardLastLayer() {}
52 | 	virtual void backwardLastLayer() {}
53 | 
54 |     virtual void train() {}
55 | 
56 | 	//返回是true就停下，返回是false就继续执行
57 | 	void earlyStopping(int epoch_idx);
58 | 
59 | };
60 | 
61 | #include "../src/train_model.cpp"
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/dl/include/utils.cuh:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef UTILS_H_
 3 | #define UTILS_H_
 4 | 
 5 | #include <iostream>
 6 | #include <fstream>
 7 | #include "matrix.hpp"
 8 | #include <time.h>
 9 | 
10 | using namespace std;
11 | 
12 | void printTime(clock_t &t, string s);
13 | 
14 | 
15 | void initW(Matrix<float>* nvMat);
16 | 
17 | void gaussRand(Matrix<float>* nvMat, float var = 1, \
18 |             float mean = 0);
19 | 
20 | float gaussGen(float var, float mean);
21 | 
22 | void gaussRand(float *w, int length, float var = 1, float mean = 0);
23 | 
24 | void readData(Matrix<float>* nvData, string filename, \
25 |             bool isData, int addZerosInFront = 0);
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/dl/main_src/cifar_classify.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | ///  \file conv3.cu
 3 | ///
 4 | 
 5 | #include <iostream>
 6 | #include <fstream>
 7 | #include <sstream>
 8 | #include <cmath>
 9 | #include "train_classification.hpp"
10 | #include "convnet.hpp"
11 | 
12 | using namespace std;
13 | 
14 | int Param::_minibatch_size = 0;
15 | 
16 | int main(int argc, char** argv){
17 | 
18 | 	TrainClassification<float> *cifar_model = new TrainClassification<float>(true, false);
19 | 
20 | 	cifar_model->parseNetJson("script/cifar10.json");
21 | 	cout << "done1\n";
22 | 	cifar_model->parseImgBinary("", "");
23 | 	cifar_model->createLayer();
24 | 	cifar_model->createWBias();
25 | 	cifar_model->createPixelAndLabel();
26 | 	cifar_model->createYDEDY();
27 | 	cifar_model->initWeightByRandom();
28 | 	cifar_model->train();
29 | 	 	
30 | 	delete cifar_model;
31 | 
32 | 
33 | 	return 0;
34 | }
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/dl/script/.ropeproject/config.py:
--------------------------------------------------------------------------------
 1 | # The default ``config.py``
 2 | 
 3 | 
 4 | def set_prefs(prefs):
 5 |     """This function is called before opening the project"""
 6 | 
 7 |     # Specify which files and folders to ignore in the project.
 8 |     # Changes to ignored resources are not added to the history and
 9 |     # VCSs.  Also they are not returned in `Project.get_files()`.
10 |     # Note that ``?`` and ``*`` match all characters but slashes.
11 |     # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
12 |     # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
13 |     # '.svn': matches 'pkg/.svn' and all of its children
14 |     # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
15 |     # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
16 |     prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
17 |                                   '.hg', '.svn', '_svn', '.git']
18 | 
19 |     # Specifies which files should be considered python files.  It is
20 |     # useful when you have scripts inside your project.  Only files
21 |     # ending with ``.py`` are considered to be python files by
22 |     # default.
23 |     #prefs['python_files'] = ['*.py']
24 | 
25 |     # Custom source folders:  By default rope searches the project
26 |     # for finding source folders (folders that should be searched
27 |     # for finding modules).  You can add paths to that list.  Note
28 |     # that rope guesses project source folders correctly most of the
29 |     # time; use this if you have any problems.
30 |     # The folders should be relative to project root and use '/' for
31 |     # separating folders regardless of the platform rope is running on.
32 |     # 'src/my_source_folder' for instance.
33 |     #prefs.add('source_folders', 'src')
34 | 
35 |     # You can extend python path for looking up modules
36 |     #prefs.add('python_path', '~/python/')
37 | 
38 |     # Should rope save object information or not.
39 |     prefs['save_objectdb'] = True
40 |     prefs['compress_objectdb'] = False
41 | 
42 |     # If `True`, rope analyzes each module when it is being saved.
43 |     prefs['automatic_soa'] = True
44 |     # The depth of calls to follow in static object analysis
45 |     prefs['soa_followed_calls'] = 0
46 | 
47 |     # If `False` when running modules or unit tests "dynamic object
48 |     # analysis" is turned off.  This makes them much faster.
49 |     prefs['perform_doa'] = True
50 | 
51 |     # Rope can check the validity of its object DB when running.
52 |     prefs['validate_objectdb'] = True
53 | 
54 |     # How many undos to hold?
55 |     prefs['max_history_items'] = 32
56 | 
57 |     # Shows whether to save history across sessions.
58 |     prefs['save_history'] = True
59 |     prefs['compress_history'] = False
60 | 
61 |     # Set the number spaces used for indenting.  According to
62 |     # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
63 |     # unit-tests use 4 spaces it is more reliable, too.
64 |     prefs['indent_size'] = 4
65 | 
66 |     # Builtin and c-extension modules that are allowed to be imported
67 |     # and inspected by rope.
68 |     prefs['extension_modules'] = []
69 | 
70 |     # Add all standard c-extensions to extension_modules list.
71 |     prefs['import_dynload_stdmods'] = True
72 | 
73 |     # If `True` modules with syntax errors are considered to be empty.
74 |     # The default value is `False`; When `False` syntax errors raise
75 |     # `rope.base.exceptions.ModuleSyntaxError` exception.
76 |     prefs['ignore_syntax_errors'] = False
77 | 
78 |     # If `True`, rope ignores unresolvable imports.  Otherwise, they
79 |     # appear in the importing namespace.
80 |     prefs['ignore_bad_imports'] = False
81 | 
82 | 
83 | def project_opened(project):
84 |     """This function is called after opening the project"""
85 |     # Do whatever you like here!
86 | 


--------------------------------------------------------------------------------
/dl/script/.ropeproject/globalnames:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/globalnames


--------------------------------------------------------------------------------
/dl/script/.ropeproject/history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/history


--------------------------------------------------------------------------------
/dl/script/.ropeproject/objectdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/objectdb


--------------------------------------------------------------------------------
/dl/script/cifar10.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"name": "CIFAR10net",
  3 | 	"minibatch_size": 100,
  4 | 	"num_epoch": 300,
  5 | 	"img_height": 32,
  6 | 	"img_width": 32,
  7 | 	"img_channel": 3,
  8 | 	"layer":
  9 | 	[
 10 | 		{
 11 | 			"type": "CONVOLUTION",
 12 | 			"name": "conv1",
 13 | 			"pad_height": 2,
 14 | 			"pad_width": 2,
 15 | 			"stride_height": 1,
 16 | 			"stride_width": 1,
 17 | 			"filter_height": 5,
 18 | 			"filter_width": 5,
 19 | 			"filter_channel": 64,
 20 | 			"w_lr": 0.001,
 21 | 			"bias_lr": 0.002,
 22 | 			"momentum": 0.9,
 23 | 			"weight_decay": 0,
 24 | 			"w_gauss": 0.001
 25 | 		},
 26 | 		{
 27 | 			"type": "RECTIFIED",
 28 | 			"name": "relu1"
 29 | 		},
 30 | 		{
 31 | 			"type": "POOLING",
 32 | 			"name": "pool1",
 33 | 			"pool_type": "MAX_POOLING",
 34 | 			"pad_height": 0,
 35 | 			"pad_width": 0,
 36 | 			"stride_height": 2,
 37 | 			"stride_width": 2,
 38 | 			"filter_height": 3,
 39 | 			"filter_width": 3
 40 | 		},
 41 | 		{
 42 | 			"type": "CONVOLUTION",
 43 | 			"name": "conv2",
 44 | 			"pad_height": 2,
 45 | 			"pad_width": 2,
 46 | 			"stride_height": 1,
 47 | 			"stride_width": 1,
 48 | 			"filter_height": 5,
 49 | 			"filter_width": 5,
 50 | 			"filter_channel": 32,
 51 | 			"w_lr": 0.001,
 52 | 			"bias_lr": 0.002,
 53 | 			"momentum": 0.9,
 54 | 			"weight_decay": 0,
 55 | 			"w_gauss": 0.01
 56 | 		},
 57 | 		{
 58 | 			"type": "RECTIFIED",
 59 | 			"name": "relu2"
 60 | 		},
 61 | 		{
 62 | 			"type": "POOLING",
 63 | 			"name": "pool2",
 64 | 			"pool_type": "AVG_POOLING",
 65 | 			"pad_height": 0,
 66 | 			"pad_width": 0,
 67 | 			"stride_height": 2,
 68 | 			"stride_width": 2,
 69 | 			"filter_height": 3,
 70 | 			"filter_width": 3
 71 | 		},
 72 | 		{
 73 | 			"type": "CONVOLUTION",
 74 | 			"name": "conv3",
 75 | 			"pad_height": 2,
 76 | 			"pad_width": 2,
 77 | 			"stride_height": 1,
 78 | 			"stride_width": 1,
 79 | 			"filter_height": 5,
 80 | 			"filter_width": 5,
 81 | 			"filter_channel": 64,
 82 | 			"w_lr": 0.001,
 83 | 			"bias_lr": 0.002,
 84 | 			"momentum": 0.9,
 85 | 			"weight_decay": 0,
 86 | 			"w_gauss": 0.01
 87 | 		},
 88 | 		{
 89 | 			"type": "RECTIFIED",
 90 | 			"name": "relu3"
 91 | 		},
 92 | 		{
 93 | 			"type": "POOLING",
 94 | 			"name": "pool3",
 95 | 			"pool_type": "AVG_POOLING",
 96 | 			"pad_height": 0,
 97 | 			"pad_width": 0,
 98 | 			"stride_height": 2,
 99 | 			"stride_width": 2,
100 | 			"filter_height": 3,
101 | 			"filter_width": 3
102 | 		},
103 | 		{
104 | 			"type": "INNERPRODUCT",
105 | 			"name": "inner1",
106 | 			"num_out": 64,
107 | 			"w_lr": 0.001,
108 | 			"bias_lr": 0.002,
109 | 			"momentum": 0.9,
110 | 			"weight_decay": 0,
111 | 			"w_gauss": 0.1
112 | 		},
113 | 		{
114 | 			"type": "INNERPRODUCT",
115 | 			"name": "inner2",
116 | 			"num_out": 10,
117 | 			"w_lr": 0.001,
118 | 			"bias_lr": 0.002,
119 | 			"momentum": 0.9,
120 | 			"weight_decay": 0,
121 | 			"w_gauss": 0.1
122 | 		},
123 | 		{
124 | 			"type": "SOFTMAX",
125 | 			"name": "softmax"
126 | 		}
127 | 
128 | 	]
129 | }
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/dl/src/convnet.cu:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file convnet.cu
  3 | /// @brief
  4 | 
  5 | 
  6 | #include <time.h>
  7 | 
  8 | #include "convnet.hpp"
  9 | #include "layer_kernel.cuh"
 10 | 
 11 | using namespace std;
 12 | 
 13 | template <typename Dtype>
 14 | ConvNet<Dtype>::ConvNet(ConvParam* cp) : TrainLayer<Dtype>(cp){
 15 | 
 16 | 	this->_cp = cp;
 17 | 	this->_filt_pixs			= this->_cp->getFilterHeight()*_cp->getFilterWidth();
 18 | 	this->_conv_pixs			= this->_cp->getOutHeight()*_cp->getOutWidth();
 19 | 	this->_padded_in_pixs		= this->_cp->getPaddedInHeight()*cp->getPaddedInWidth();
 20 | 	this->_in_pixs				= this->_cp->getInHeight()*_cp->getInWidth();
 21 | 	this->_box_in_pixs			= this->_cp->getBoxInHeight()*_cp->getBoxInWidth();
 22 | 	cublasCreate(&this->handle);
 23 | 
 24 | 	_num_box = _cp->getBoxNumHeight()*_cp->getBoxNumWidth();
 25 | }
 26 | 
 27 | template <typename Dtype>
 28 | ConvNet<Dtype>::~ConvNet() {
 29 | 
 30 | 	delete this->_w;
 31 | 	delete this->_w_inc;
 32 | 	delete this->_bias;
 33 | 	delete this->_bias_inc;
 34 | 
 35 | 	delete this->_y;
 36 | 	delete this->_dE_dy;
 37 | 	delete this->_dE_dw;
 38 | 	delete this->_dE_db;
 39 | 
 40 | 	delete unfold_x;
 41 | 	delete dE_db_tmp;
 42 | 	if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0)
 43 | 		delete padded_x;
 44 | 	if((_cp->getOutHeight() > MAX_THREAD_SIZE \
 45 | 				|| _cp->getOutWidth() > MAX_THREAD_SIZE) \
 46 | 			&& (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0))	
 47 | 		delete unranged_dE_dx;
 48 | 	if(_cp->getOutHeight() > MAX_THREAD_SIZE || _cp->getOutWidth() > MAX_THREAD_SIZE){
 49 | 		delete unranged_dE_dw;
 50 | 		delete unfold_dE_db_tmp;
 51 | 	}
 52 | 
 53 | 	cublasDestroy(this->handle);
 54 | 
 55 | }
 56 | 
 57 | template <typename Dtype>
 58 | void ConvNet<Dtype>::initCuda() {
 59 | 
 60 | 	this->_w            	= new Matrix<Dtype>(_filt_pixs \
 61 | 			* this->_cp->getInChannel(), \
 62 | 			this->_cp->getOutChannel());
 63 | 	this->_bias         	= new Matrix<Dtype>(1, this->_cp->getOutChannel());
 64 | 	this->_y            	= new Matrix<Dtype>(this->_cp->getMinibatchSize(), \
 65 | 			this->_cp->getOutChannel() * _conv_pixs);
 66 | 	this->_dE_dy        	= new Matrix<Dtype>(this->_y);
 67 | 
 68 | 	this->_dE_dw          	= new Matrix<Dtype>(this->_w);
 69 | 	this->_dE_db           	= new Matrix<Dtype>(this->_bias);
 70 | 
 71 | 	this->_w_inc		 	= new Matrix<Dtype>(this->_w);
 72 | 	this->_bias_inc		 	= new Matrix<Dtype>(this->_bias);
 73 | 
 74 | 	if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0)
 75 | 		this->padded_x 		= new Matrix<Dtype>(this->_cp->getMinibatchSize(), \
 76 | 				this->_cp->getInChannel() * _padded_in_pixs);
 77 | 	unfold_x 		= new Matrix<Dtype>(this->_cp->getMinibatchSize(), \
 78 | 			this->_cp->getInChannel() * _padded_in_pixs);
 79 | 
 80 | 	if((_cp->getOutHeight() > MAX_THREAD_SIZE \
 81 | 				|| _cp->getOutWidth() > MAX_THREAD_SIZE) \
 82 | 			&& (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){
 83 | 		unranged_dE_dx = new Matrix<Dtype>(_cp->getMinibatchSize(), \
 84 | 				_box_in_pixs*_num_box*_cp->getOutChannel());
 85 | 	}
 86 | 	unranged_dE_dw = new Matrix<Dtype>(_cp->getMinibatchSize(), \
 87 | 			_filt_pixs*_cp->getInChannel()* \
 88 | 			_num_box*_cp->getOutChannel());
 89 | 
 90 | 	if(_cp->getOutHeight() > MAX_THREAD_SIZE \
 91 | 			|| _cp->getOutWidth() > MAX_THREAD_SIZE) {
 92 | 		unfold_dE_db_tmp		 = new Matrix<Dtype>(this->_cp->getMinibatchSize(), \
 93 | 				this->_cp->getOutChannel()*_num_box);
 94 | 	}
 95 | 
 96 | 	dE_db_tmp 				= new Matrix<Dtype>(this->_cp->getMinibatchSize(), \
 97 | 			this->_cp->getOutChannel());
 98 | 
 99 | 	this->_w_inc->zeros();
100 | 	this->_bias_inc->zeros();
101 | }
102 | 
103 | template <typename Dtype>
104 | void ConvNet<Dtype>::computeOutput(Matrix<Dtype>* x){
105 | 
106 | 	this->_y->zeros();
107 | 
108 | 	int num_kernel;
109 | 	int num_block;
110 | 
111 | 	if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){ 
112 | 		num_kernel = this->_cp->getMinibatchSize() * _in_pixs \
113 | 					 * this->_cp->getInChannel();
114 | 		num_block = MAX_NUM_KERNEL < (num_kernel / MAX_NUM_THREAD + 1) \
115 | 					? MAX_NUM_KERNEL : (num_kernel / MAX_NUM_THREAD + 1); 
116 | 		padded_x->zeros();
117 | 		ori_to_padding<<<num_block, MAX_NUM_THREAD>>>(x->getDevData(), \
118 | 				padded_x->getDevData(), num_kernel, this->_cp->getInHeight(), \
119 | 				_cp->getInWidth(), _cp->getPaddedInHeight(), \
120 | 				_cp->getPaddedInWidth(), _cp->getInChannel());
121 | 		cudaDeviceSynchronize();
122 | 		cudaCheckError();
123 | 	}else
124 | 		padded_x = x;
125 | 
126 | 	dim3 blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel()*_num_box);
127 | 	dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight());
128 | 
129 | 
130 | 	forward_convolution<<<blocks, threads, \
131 | 		sizeof(Dtype)*(_cp->getInChannel()*_filt_pixs + _box_in_pixs)>>>(\
132 | 				padded_x->getDevData(), this->_w->getDevData(), \
133 | 				this->_bias->getDevData(), this->_y->getDevData(), \
134 | 				_cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \
135 | 				_cp->getInChannel(), _cp->getOutHeight(), \
136 | 				_cp->getOutWidth(), _cp->getFilterHeight(), \
137 | 				_cp->getFilterWidth(), _cp->getOutChannel(), \
138 | 				_cp->getStrideHeight(), _cp->getStrideWidth(), \
139 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \
140 | 				_cp->getBoxInHeight(), _cp->getBoxInWidth(), \
141 | 				_cp->getBoxOutHeight(), _cp->getBoxOutWidth());
142 | 	cudaDeviceSynchronize();
143 | 	cudaCheckError();
144 | }
145 | 
146 | template <typename Dtype>
147 | void ConvNet<Dtype>::computeDerivsOfPars(Matrix<Dtype>* x){
148 | 
149 | 	dim3 blocks = dim3(_cp->getMinibatchSize() \
150 | 			, _num_box \
151 | 			*_cp->getFilterHeight()*_cp->getFilterWidth());
152 | 
153 | 	dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight());
154 | 
155 | 	unranged_dE_dw->zeros();
156 | 
157 | 	Dtype *dE_db_multi_channel;
158 | 	if(_cp->getOutHeight() > MAX_THREAD_SIZE \
159 | 			|| _cp->getOutWidth() > MAX_THREAD_SIZE) {
160 | 		unfold_dE_db_tmp->zeros();
161 | 		dE_db_multi_channel = unfold_dE_db_tmp->getDevData();
162 | 
163 | 	}else{
164 | 		dE_db_tmp->zeros();
165 | 		dE_db_multi_channel = dE_db_tmp->getDevData();
166 | 
167 | 	}
168 | 
169 | 	compute_convolution_derivs<<<blocks, threads, \
170 | 		sizeof(Dtype)*(_cp->getBoxOutHeight()*_cp->getBoxOutWidth())>>>( \
171 | 				this->_dE_dy->getDevData(), padded_x->getDevData(), \
172 | 				unranged_dE_dw->getDevData(), \
173 | 				_cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \
174 | 				_cp->getOutChannel(), _cp->getInChannel(), \
175 | 				_cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \
176 | 				_cp->getOutHeight(), _cp->getOutWidth(), \
177 | 				_cp->getFilterHeight(), _cp->getFilterWidth(), \
178 | 				_cp->getStrideHeight(), _cp->getStrideWidth(), \
179 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth());
180 | 
181 | 	cudaDeviceSynchronize();
182 | 	cudaCheckError();
183 | 
184 | 	blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel()*_num_box);
185 | 	compute_derivs_of_bias<<<blocks, threads, \
186 | 		sizeof(Dtype)*_cp->getBoxOutHeight()*_cp->getBoxOutWidth()>>>( \
187 | 				this->_dE_dy->getDevData(), dE_db_multi_channel, \
188 | 				_cp->getOutHeight(), _cp->getOutWidth(), \
189 | 				_cp->getOutChannel(), \
190 | 				_cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \
191 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth());
192 | 
193 | 	cudaDeviceSynchronize();
194 | 	cudaCheckError();
195 | 
196 | 	blocks = dim3(1, _cp->getInChannel()*_cp->getOutChannel());
197 | 	compact_dervis_w<<<blocks, threads, 0>>>( \
198 | 			unranged_dE_dw->getDevData(), this->_dE_dw->getDevData(), \
199 | 			_cp->getFilterHeight(), _cp->getFilterWidth(), \
200 | 			_cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \
201 | 			_cp->getMinibatchSize(), _cp->getInChannel(), _cp->getOutChannel());
202 | 	cudaDeviceSynchronize();
203 | 	cudaCheckError();
204 | 	if(_cp->getOutHeight() > MAX_THREAD_SIZE \
205 | 			|| _cp->getOutWidth() > MAX_THREAD_SIZE) {
206 | 		blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel());
207 | 		compute_derivs_of_bias<<<blocks, threads, sizeof(Dtype)*_num_box>>>( \
208 | 				unfold_dE_db_tmp->getDevData(), dE_db_tmp->getDevData(), \
209 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \
210 | 				_cp->getOutChannel(), _cp->getBoxNumHeight(), \
211 | 				_cp->getBoxNumWidth(), 1, 1);
212 | 	}
213 | 	cudaDeviceSynchronize();
214 | 	cudaCheckError();
215 | 
216 | 	dE_db_tmp->sumRow(this->_dE_db);
217 | 
218 | }
219 | 
220 | template <typename Dtype>
221 | void ConvNet<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
222 | 
223 | 
224 | 	dim3 blocks = dim3(_cp->getMinibatchSize(), _cp->getInChannel() * _num_box);
225 | 	dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight());
226 | 
227 | 	int box_in_height = MAX_THREAD_SIZE > _cp->getOutHeight() \
228 | 						? _cp->getPaddedInHeight() : _cp->getBoxInHeight();
229 | 	int box_in_width = MAX_THREAD_SIZE > _cp->getOutWidth() \
230 | 					   ? _cp->getPaddedInWidth() : _cp->getBoxInWidth();
231 | 
232 | 	Dtype* p_dE_dx;
233 | 	if((_cp->getOutHeight() > MAX_THREAD_SIZE \
234 | 				|| _cp->getOutWidth() > MAX_THREAD_SIZE) \
235 | 			&& (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){
236 | 		unranged_dE_dx->zeros();
237 | 		p_dE_dx = unranged_dE_dx->getDevData();
238 | 
239 | 	}else if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){
240 | 		unfold_x->zeros();
241 | 		p_dE_dx = unfold_x->getDevData();
242 | 
243 | 	}else{
244 | 		dE_dx->zeros();
245 | 		p_dE_dx = dE_dx->getDevData();
246 | 
247 | 	}
248 | 
249 | 	backward_convolution<<<blocks, threads, \
250 | 		sizeof(Dtype)*box_in_height*box_in_width>>>( \
251 | 				this->_dE_dy->getDevData(), this->_w->getDevData(), \
252 | 				p_dE_dx, box_in_height, box_in_width, \
253 | 				_cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \
254 | 				_cp->getOutChannel(), _cp->getInChannel(), \
255 | 				_cp->getOutHeight(), _cp->getOutWidth(), \
256 | 				_cp->getFilterHeight(), _cp->getFilterWidth(), \
257 | 				_cp->getStrideHeight(), _cp->getStrideWidth(), \
258 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth());
259 | 	cudaDeviceSynchronize();
260 | 	cudaCheckError();
261 | 
262 | 	if((_cp->getOutHeight() > MAX_THREAD_SIZE \
263 | 				|| _cp->getOutWidth() > MAX_THREAD_SIZE) \
264 | 			&& (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){
265 | 
266 | 		if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){
267 | 			unfold_x->zeros();
268 | 			p_dE_dx = unfold_x->getDevData();
269 | 
270 | 		}else{
271 | 			dE_dx->zeros();
272 | 			p_dE_dx = dE_dx->getDevData();
273 | 
274 | 		}
275 | 
276 | 		compactOverlap<<<_cp->getMinibatchSize(), _cp->getInChannel()>>>( \
277 | 				unranged_dE_dx->getDevData(), p_dE_dx, \
278 | 				_cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \
279 | 				_cp->getInChannel(),  _cp->getOverlapHeight(), _cp->getOverlapWidth(), \
280 | 				box_in_height, box_in_width, \
281 | 				_cp->getBoxNumHeight(), _cp->getBoxNumWidth());
282 | 		cudaDeviceSynchronize();
283 | 		cudaCheckError();
284 | 	}
285 | 
286 | 
287 | 	if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){
288 | 		int num_kernel = this->_cp->getMinibatchSize() * _in_pixs \
289 | 						 * this->_cp->getInChannel();
290 | 		int num_block = MAX_NUM_KERNEL < (num_kernel / MAX_NUM_THREAD + 1) \
291 | 						? MAX_NUM_KERNEL : (num_kernel / MAX_NUM_THREAD + 1);
292 | 		pad_to_ori<<<num_block, MAX_NUM_THREAD>>>(dE_dx->getDevData(), \
293 | 				p_dE_dx, num_kernel, _cp->getInHeight(), _cp->getInWidth(), \
294 | 				_cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \
295 | 				_cp->getInChannel());
296 | 		cudaDeviceSynchronize();
297 | 		cudaCheckError();
298 | 
299 | 	}
300 | }
301 | 


--------------------------------------------------------------------------------
/dl/src/data.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file data.cu
 3 | /// 
 4 | 
 5 | #include "data.hpp"
 6 | 
 7 | using namespace std;
 8 | 
 9 | 
10 | template <typename Dtype>
11 | void Data<Dtype>::copyFromHost(Dtype* data_value_in, const int data_len){
12 | 	cudaError_t status = cudaMemcpy(_data_value, data_value_in, \
13 | 			sizeof(Dtype) * data_len, cudaMemcpyHostToDevice);
14 | 	if (status != cudaSuccess) {
15 | 		cout << stderr, "!!!! device access error (write)\n";
16 | 		exit( EXIT_FAILURE );
17 | 	}  	
18 | }
19 | 
20 | template <typename Dtype>
21 | void Data<Dtype>::copyFromDevice(Data<Dtype>* data_in){
22 | 	cudaError_t status = cudaMemcpy(_data_value, data_in->getDevData(), \
23 | 			sizeof(Dtype) * _amount, cudaMemcpyDeviceToDevice);
24 | 	if (status != cudaSuccess) {
25 | 		cout << stderr, "!!!! device access error (write)\n";
26 | 		exit( EXIT_FAILURE );
27 | 
28 | 	}   
29 | }
30 | 
31 | template <typename Dtype>
32 | void Data<Dtype>::copyToHost(Dtype* data_value_in, const int data_len){
33 | //	cout << sizeof(Dtype) << ":" << data_len << endl;
34 | 	cudaError_t status = cudaMemcpy(data_value_in, _data_value, \
35 | 			sizeof(Dtype) * data_len, cudaMemcpyDeviceToHost);
36 | 	if (status != cudaSuccess) {
37 | 		cout << stderr, "!!!! device access error (write)\n";
38 | 		exit( EXIT_FAILURE );
39 | 	} 
40 | }
41 | 
42 | template <typename Dtype>
43 | void Data<Dtype>::copyToDevice(Data<Dtype>* data_in){
44 | 	cudaError_t status = cudaMemcpy(data_in->getDevData(), _data_value, \
45 | 			sizeof(Dtype) * _amount, cudaMemcpyDeviceToDevice);
46 | 	if (status != cudaSuccess) {
47 | 		cout << stderr, "!!!! device access error (write)\n";
48 | 		exit( EXIT_FAILURE );
49 | 
50 | 	}   
51 | }
52 | 
53 | template <typename Dtype>
54 | void Data<Dtype>::zeros(){
55 | 	cudaMemset(_data_value, 0, _amount * sizeof(Dtype));
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/dl/src/dropout_layer.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file dropout_layer.cu
 3 | /// @brief
 4 | 
 5 | 
 6 | using namespace std;
 7 | 
 8 | template <typename Dtype>
 9 | DropoutLayer<Dtype>::DropoutLayer(Param* p){
10 | 
11 | 	this->_p           = p;
12 | }
13 | 
14 | template <typename Dtype>
15 | DropoutLayer<Dtype>::~DropoutLayer() {
16 | 	delete  this->_y; 
17 | 	delete  this->_dE_dy;
18 | 	delete  _drop_record;
19 | 	delete  _drop_rand_probs;
20 | 
21 | }
22 | 
23 | template <typename Dtype>
24 | void DropoutLayer<Dtype>::initCuda() {
25 | 
26 | 
27 | 	ConnectType ct = this->_p->getConnectType();
28 | 	int col;
29 | 	if(ct == PARAM_CONNECT_TYPE_LOCAL)
30 | 		col = _p->getOutHeight()*_p->getOutWidth() \
31 | 			  * this->_p->getOutChannel(); 
32 | 	else if(ct == PARAM_CONNECT_TYPE_FULL)
33 | 		col = this->_p->getNumOut();
34 | 		
35 | 	this->_y             = new Matrix<Dtype>(_p->getMinibatchSize(), col);
36 | 	this->_dE_dy         = new Matrix<Dtype>(this->_y);
37 | 	_drop_record		 = new Matrix<int>(_p->getMinibatchSize(), col);
38 | 	_drop_rand_probs     = new Matrix<curandState>(_p->getMinibatchSize(), col);
39 | 	_is_set_up 			 = false;
40 | }
41 | 
42 | template <typename Dtype>
43 | void DropoutLayer<Dtype>::computeOutput(Matrix<Dtype>* x){ 
44 | 	
45 | 	x->applyDropout(this->_y, _drop_record, _drop_rand_probs, _is_set_up);
46 | 	
47 | 	if(_is_set_up == false)
48 | 		_is_set_up = true;
49 | 	
50 | 
51 | }
52 | 
53 | template <typename Dtype>
54 | void DropoutLayer<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
55 | 
56 | 	this->_dE_dy->applyRelu(dE_dx, _drop_record, false);
57 | 
58 | }
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/dl/src/inner_product_layer.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file inner_product_layer.cu
 3 | /// @brief
 4 | 
 5 | #include "inner_product_layer.hpp"
 6 | 
 7 | using namespace std;
 8 | 
 9 | template <typename Dtype>
10 | InnerProductLayer<Dtype>::InnerProductLayer<Dtype>(InnerParam* fcp) : \
11 |  	TrainLayer<Dtype>((TrainParam*)fcp){
12 | 	this->_fcp = fcp;
13 | 	cublasCreate(&this->handle);
14 | }
15 | 
16 | template <typename Dtype>
17 | InnerProductLayer<Dtype>::~InnerProductLayer<Dtype>() {
18 | 
19 | 	delete this->_w; 
20 | 	delete this->_w_inc;
21 | 	delete this->_bias;
22 | 	delete this->_bias_inc;
23 | 
24 | 	delete this->_y; 
25 | 	delete this->_dE_dy;
26 | 	delete this->_dE_db;
27 | 	delete this->_dE_dw;
28 | 	
29 | 	cublasDestroy(this->handle);
30 | }
31 | 
32 | template <typename Dtype>
33 | void InnerProductLayer<Dtype>::initCuda() {
34 | 
35 | 	this->_w            = new Matrix<Dtype>(this->_fcp->getNumIn(), this->_fcp->getNumOut());
36 | 	this->_bias         = new Matrix<Dtype>(1, this->_fcp->getNumOut());
37 | 
38 | 	this->_y            = new Matrix<Dtype>(this->_fcp->getMinibatchSize(), this->_fcp->getNumOut());
39 | 	
40 | 	this->_dE_dy        = new Matrix<Dtype>(this->_y);
41 | 	this->_dE_db        = new Matrix<Dtype>(this->_bias);
42 | 	this->_dE_dw        = new Matrix<Dtype>(this->_w);
43 | 
44 | 	this->_w_inc        = new Matrix<Dtype>(this->_w);
45 | 	this->_bias_inc     = new Matrix<Dtype>(this->_bias);
46 | 	
47 | 	data_T = new Matrix<Dtype>(_fcp->getNumIn(), _fcp->getMinibatchSize());
48 | 	w_T = new Matrix<Dtype>(this->_w->getNumCols(), this->_w->getNumRows());
49 | 
50 | 	this->_w_inc->zeros();
51 | 	this->_bias_inc->zeros();
52 | }
53 | 
54 | template <typename Dtype>
55 | void InnerProductLayer<Dtype>::computeOutput(Matrix<Dtype>* x){ 
56 | //	x->showValue("data");
57 | //	this->_w->showValue("w");
58 | 
59 | //	x->reValue(512);
60 | //	this->_w->reValue(1.0f);
61 | 
62 | 	x->rightMult(this->_w, 1, this->_y, this->handle);
63 | 	this->_y->addRowVector(this->_bias);
64 | //	this->_y->showValue("yj1");
65 | 
66 | }
67 | 
68 | 
69 | template <typename Dtype>
70 | void InnerProductLayer<Dtype>::computeDerivsOfPars(Matrix<Dtype>* x){
71 | 
72 | 	
73 | //	x->reValue(512);
74 | //	this->_dE_dy->reValue(1.0f);
75 | 
76 | 	x->getTranspose(data_T);
77 | 
78 | 	data_T->rightMult(this->_dE_dy, 1, this->_dE_dw, this->handle);
79 | 	this->_dE_dy->sumRow(this->_dE_db);
80 | 
81 | //this->_dE_dw->showValue("dedwinner");
82 | //this->_dE_dy->showValue("innerdedy");
83 | }
84 | 
85 | template <typename Dtype>
86 | void InnerProductLayer<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
87 | 
88 | //	this->_w->reValue(1.0f);
89 | //	this->_dE_dy->reValue(64);
90 | 
91 | 	this->_w->getTranspose(w_T);
92 | 	this->_dE_dy->rightMult(w_T, 1, dE_dx, this->handle);
93 | //dE_dx->showValue("innerdedx");
94 | 
95 | 
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/dl/src/load_layer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *	filename: load_layer.cpp
  3 |  */
  4 | #include <cmath>
  5 | #include <stdlib.h>
  6 | #include <iostream>
  7 | #include <bits/stl_bvector.h>
  8 | #include <algorithm>
  9 | #include "load_layer.hpp"
 10 | 
 11 | using namespace std;
 12 | 
 13 | template <typename Dtype>
 14 | void LoadLayer<Dtype>::meanOneImg(Dtype* pixel_ptr, int process_len){
 15 | 	Dtype avg = 0;
 16 | 	for(int i = 0; i < process_len; i++){
 17 | 		avg += pixel_ptr[i];
 18 | 	}
 19 | 	avg /= process_len;
 20 | 
 21 | 	for(int i = 0; i < process_len; i++){
 22 | 		pixel_ptr[i] = pixel_ptr[i] - avg;
 23 | 	}
 24 | }
 25 | 
 26 | template <typename Dtype>
 27 | void LoadLayer<Dtype>::stdOneImg(Dtype* pixel_ptr, int process_len){
 28 | 	Dtype std = 0;
 29 | 	for(int i = 0; i < process_len; i++){
 30 | 		std += pixel_ptr[i] * pixel_ptr[i];
 31 | 	}
 32 | 
 33 | 	std /= process_len;
 34 | 	std = sqrt(std);
 35 | 	for(int i = 0; i < process_len; i++){
 36 | 		pixel_ptr[i] /= std;
 37 | 	}
 38 | }
 39 | 
 40 | template <typename Dtype>
 41 | LoadLayer<Dtype>::LoadLayer(const int num_train, const int num_valid, \
 42 | 		const int num_test, const int img_size, const int img_channel) \
 43 | 	: _num_train(num_train), _num_test(num_test), _num_valid(num_valid), \
 44 | 	_img_size(img_size), _img_channel(img_channel){
 45 | 		_img_sqrt = _img_size * _img_size;
 46 | 		if (img_size > 0 && img_channel > 0) {
 47 | 			if (num_train > 0) {
 48 | 				_train_pixel = new Dtype[_num_train * _img_sqrt * _img_channel];
 49 | 				_train_label = new int[_num_train];
 50 | 				_train_pixel_ptr = _train_pixel;
 51 | 				_train_label_ptr = _train_label;
 52 | 			}
 53 | 			if (num_valid > 0) {
 54 | 				_valid_pixel = new Dtype[_num_valid * _img_sqrt * _img_channel];
 55 | 				_valid_label = new int[_num_valid];
 56 | 				_valid_pixel_ptr = _valid_pixel;
 57 | 				_valid_label_ptr = _valid_label;
 58 | 			}
 59 | 			if (num_test > 0) {
 60 | 				_test_pixel = new Dtype[_num_test * _img_sqrt * _img_channel];
 61 | 				_test_label = new int[_num_test];
 62 | 				_test_pixel_ptr = _test_pixel;
 63 | 				_test_label_ptr = _test_label;
 64 | 			}
 65 | 		}
 66 | 		_is_base_alloc = true;
 67 | 
 68 | 	}
 69 | 
 70 | template <typename Dtype>
 71 | LoadLayer<Dtype>::~LoadLayer(){
 72 | 	if (_img_size > 0 && _img_channel > 0 && _is_base_alloc == true) {
 73 | 		if (_num_train > 0) {
 74 | 			delete[] _train_pixel;
 75 | 			delete[] _train_label;
 76 | 		}
 77 | 		if (_num_valid > 0) {
 78 | 			delete[] _valid_pixel;
 79 | 			delete[] _valid_label;
 80 | 		}
 81 | 		if (_num_test > 0) {
 82 | 			delete[] _test_pixel;
 83 | 			delete[] _test_label;
 84 | 		}
 85 | 	}
 86 | }
 87 | 
 88 | template <typename Dtype>
 89 | LoadCifar10<Dtype>::LoadCifar10(const int minibatch_size) : \
 90 | 		LoadLayer<Dtype>(50000, 10000, 0, 32, 3){
 91 | 
 92 | 			_minibatch_size = minibatch_size;
 93 | 
 94 | 			for(int i = 1; i < 6; i++){
 95 | 				string s;
 96 | 				stringstream ss;
 97 | 				ss << i;
 98 | 				ss >> s;
 99 | 				string filename = "../../data/cifar-10-batches-bin/data_batch_"+s+".bin";
100 | 				loadBinary(filename, this->_train_pixel_ptr, \
101 | 						this->_train_label_ptr);
102 | 			}
103 | 			loadBinary("../../data/cifar-10-batches-bin/test_batch.bin", \
104 | 					this->_valid_pixel_ptr, this->_valid_label_ptr);
105 | 
106 | 		}
107 | 
108 | template <typename Dtype>
109 | void LoadCifar10<Dtype>::loadTrainOneBatch(int batch_idx, \
110 | 		Dtype* &mini_pixel, int* &mini_label){
111 | 	mini_pixel = this->_train_pixel + batch_idx*_minibatch_size \
112 | 				 *this->_img_channel*this->_img_sqrt;
113 | 	mini_label = this->_train_label + batch_idx*_minibatch_size;
114 | }
115 | 
116 | 
117 | template <typename Dtype>
118 | void LoadCifar10<Dtype>::loadValidOneBatch(int batch_idx, \
119 | 		Dtype* &mini_pixel, int* &mini_label){
120 | 	mini_pixel = this->_valid_pixel + batch_idx*_minibatch_size \
121 | 				 *this->_img_channel*this->_img_sqrt;
122 | 	mini_label = this->_valid_label + batch_idx*_minibatch_size;
123 | }
124 | 
125 | template <typename Dtype>
126 | void LoadCifar10<Dtype>::loadBinary(string filename, \
127 | 		Dtype* &pixel_ptr, int* &label_ptr){
128 | 
129 | 	ifstream fin(filename.c_str(), ifstream::binary);		
130 | 	if(!fin.is_open()){
131 | 		cout << "open file failed\n";
132 | 		exit(EXIT_FAILURE);
133 | 	}
134 | 	unsigned char tmp;
135 | 	char buf;
136 | 	fin.seekg(0, fin.end);
137 | 	int length = fin.tellg();
138 | 	int num = length / (this->_img_sqrt * this->_img_channel + 1);
139 | 	//numebr of picture in this input file. 
140 | 	fin.seekg(0, fin.beg);
141 | 
142 | 	for(int i = 0; i < num; i++){
143 | 		fin.read(&buf, 1);
144 | 		tmp = buf;
145 | 		label_ptr[0] = (int)tmp;
146 | 		for(int j = 0; j < this->_img_channel; j++){
147 | 			for(int k = 0; k < this->_img_sqrt; k++){
148 | 				fin.read(&buf, 1);
149 | 				tmp = buf;
150 | 				pixel_ptr[k] = (int)tmp;
151 | 			}
152 | 			this->meanOneImg(pixel_ptr, this->_img_sqrt);
153 | //			this->stdOneImg(pixel_ptr, this->_img_sqrt);
154 | 			if(i != num - 1 || j != this->_img_channel - 1)
155 | 				pixel_ptr += this->_img_sqrt;
156 | 
157 | 		}
158 | 		if(i != num - 1){
159 | 			label_ptr++;
160 | 		}
161 | 	}
162 | 	fin.close();
163 | }
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/dl/src/logistic.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file logistic.cu
 3 | ///
 4 | 
 5 | #include "logistic.hpp"
 6 | 
 7 | using namespace std;
 8 | 
 9 | template <typename Dtype>
10 | Logistic<Dtype>::Logistic<Dtype>(FullConnectParam* fcp) {
11 | 	this->_fcp = fcp;
12 | 	
13 | }
14 | 
15 | template <typename Dtype>
16 | Logistic<Dtype>::~Logistic<Dtype>() {
17 | 
18 | 	delete this->_y;
19 | 	delete[] h_labels;
20 | 	delete[] y_CPU;
21 | 	delete[] correct_probs;
22 | 	delete d_max_pos_of_out;
23 | 	delete[] h_max_pos_of_out;
24 | 	delete _d_record;
25 | 	delete[] _h_record;
26 | }
27 | 
28 | template <typename Dtype>
29 | void Logistic<Dtype>::initCuda() {
30 | 
31 | 	this->_y            = new Matrix<Dtype>(this->_fcp->getMinibatchSize(), \
32 | 								this->_fcp->getNumOut());
33 | 	h_labels 			= new int[this->_fcp->getMinibatchSize()];
34 | 	y_CPU 				= new Dtype[this->_y->getNumEles()];
35 | 	correct_probs 		= new Dtype[this->_y->getNumRows()];
36 | 	d_max_pos_of_out 	= new Matrix<Dtype>(this->_y->getNumRows(), 1);
37 | 	h_max_pos_of_out 	= new Dtype[this->_y->getNumRows()];
38 | 
39 | 	_d_record 		= new Matrix<int>(this->_y->getNumCols(), this->_y->getNumCols());
40 | 	_h_record 		= new int[this->_y->getNumCols() * this->_y->getNumCols()];
41 | }
42 | 
43 | template <typename Dtype>
44 | void Logistic<Dtype>::computeOutput(Matrix<Dtype>* x){
45 | 	this->_y->zeros();
46 | 	x->apply(Matrix<Dtype>::SOFTMAX, this->_y);
47 | }
48 | 
49 | template <typename Dtype>
50 | double Logistic<Dtype>::computeError(Matrix<int>* labels, int& num_error){
51 | 
52 | 	labels->copyToHost(h_labels, labels->getNumEles());
53 | 
54 | 	this->_y->copyToHost(y_CPU, this->_y->getNumEles());
55 | 
56 | 	/// 记录找打的最大位置上的likelihood
57 | 	/// 记录最大位置的下标
58 | 	this->_y->maxPosInRow(d_max_pos_of_out);
59 | 		
60 | 
61 | 	d_max_pos_of_out->copyToHost(h_max_pos_of_out, this->_y->getNumRows());
62 | 
63 | 	for (int c = 0; c < this->_y->getNumRows(); c++) {
64 | 		int true_label = h_labels[c];
65 | 		int predict_label = h_max_pos_of_out[c];
66 | 		if(y_CPU[c*this->_y->getNumCols()+true_label] == 0)
67 | 			correct_probs[c] = -10000;
68 | 		else
69 | 			correct_probs[c] = log(y_CPU[c * this->_y->getNumCols() + true_label]);
70 | 
71 | 		if(predict_label != true_label)
72 | 			num_error++;
73 | 		_h_record[predict_label * this->_y->getNumCols() + true_label]++ ;
74 | 	}
75 | 	double result = 0;
76 | 	for(int i = 0; i < labels->getNumEles(); i++){
77 | 		result -= correct_probs[i];
78 | 	}
79 | 
80 | 
81 | 	return result;
82 | }
83 | 
84 | template <typename Dtype>
85 | void Logistic<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx, Matrix<int>* labels){
86 | 	assert(labels->getNumRows() == dE_dx->getNumRows());
87 | 	dE_dx->zeros();
88 | 
89 | 	const int num_thread = DIVUP(this->_fcp->getNumOut(), ADD_BLOCK_SIZE) * ADD_BLOCK_SIZE;
90 | 	compute_dE_dy<<<this->_fcp->getMinibatchSize(), num_thread>>>(this->_y->getDevData(), \
91 | 			labels->getDevData(), dE_dx->getDevData(), this->_fcp->getNumOut());
92 | 	cudaThreadSynchronize();
93 | 	cudaCheckError();
94 | 
95 | }
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/dl/src/matrix.cu:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file matrix.cu
  3 | /// \brief 矩阵类源文件
  4 | 
  5 | #include <cuda_runtime.h>
  6 | #include <stdlib.h>
  7 | #include <stdio.h>
  8 | #include <assert.h>
  9 | #include <cstring>
 10 | #include "matrix.hpp"
 11 | #include "matrix_kernel.hpp"
 12 | 
 13 | using namespace std;
 14 | 
 15 | template <typename Dtype>
 16 | Matrix<Dtype>::Matrix(int num_row, int num_col){
 17 | 	_init(num_row, num_col);
 18 | }
 19 | 
 20 | template <typename Dtype>
 21 | Matrix<Dtype>::Matrix(const Matrix<Dtype>* like, bool copy){
 22 | 	_init(like->getNumRows(), like->getNumCols());
 23 | 	if (copy) {
 24 | 		copyFromDevice(like);
 25 | 	}
 26 | }
 27 | 
 28 | template <typename Dtype>
 29 | Matrix<Dtype>::Matrix(const Matrix<Dtype>* like) {
 30 | 	_init(like->getNumRows(), like->getNumCols());
 31 | }
 32 | 
 33 | template <typename Dtype>
 34 | Matrix<Dtype>::~Matrix(){
 35 | 	if(this->_is_own_data && this->_amount > 0){
 36 | 		cudaFree(this->_data_value);
 37 | 	}
 38 | }
 39 | 
 40 | template <typename Dtype>
 41 | void Matrix<Dtype>::_init(int num_row, int num_col) {
 42 | 	this->_shape.push_back(num_row);
 43 | 	this->_shape.push_back(num_col);
 44 | 	this->_amount = num_row * num_col;
 45 | 	this->_is_own_data = true;
 46 | 	if (this->_amount > 0) {
 47 | 		cudaError_t status;
 48 | 		status = cudaMalloc((void**) &this->_data_value, \
 49 | 				this->_amount * sizeof(Dtype));
 50 | 		/*
 51 | 		else if(a == ALLOC_ON_UNIFIED_MEMORY){
 52 | 			status = cudaMallocManaged(&this->_data_value, \
 53 | 				this->_shape[0] * this->_shape[1] * sizeof(Dtype));
 54 | 		}*/
 55 | 		if (status != cudaSuccess) {
 56 | 			fprintf(stderr, "!!!! device memory allocation error\n");
 57 | 			exit(EXIT_FAILURE);
 58 | 		}
 59 | 	} 
 60 | }
 61 | 
 62 | 
 63 | template <typename Dtype>
 64 | void Matrix<Dtype>::getTranspose(Matrix<Dtype>* target){
 65 | 	
 66 | 	const int width = this->_shape[1];
 67 | 	const int height = this->_shape[0];
 68 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
 69 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
 70 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
 71 | 				NUM_BLOCKS_MAX));
 72 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
 73 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
 74 | 	
 75 | 	kTranspose<Dtype><<<grid_size, block_size>>>(this->_data_value, \
 76 | 				target->getDevData(), width, height);
 77 | 	cudaDeviceSynchronize();
 78 | 	cudaCheckError();
 79 | }
 80 | 
 81 | template <typename Dtype>
 82 | void Matrix<Dtype>::rightMult(Matrix<Dtype>* b, float scale_AB, \
 83 | 		Matrix<Dtype> *target, cublasHandle_t& handle) {
 84 | 
 85 | 	clock_t t = clock();
 86 | 
 87 | 	int m = this->_shape[0];
 88 | 	int k = this->_shape[1];
 89 | 	int n = b->getNumCols();
 90 | 	float scale_tar = 0;
 91 | 	assert(k == b->getNumRows());
 92 | 	//列主
 93 | 	cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &scale_AB, \
 94 | 				b->getDevData(), n, this->_data_value, k, \
 95 | 				&scale_tar, target->getDevData(), n);
 96 | }
 97 | 
 98 | template <typename Dtype>
 99 | void Matrix<Dtype>::addColVector(Matrix<Dtype>* vec){
100 | 	addColVector(vec, 1, this);
101 | }
102 | 
103 | template <typename Dtype>
104 | void Matrix<Dtype>::addColVector(Matrix<Dtype>* vec, float scaleVec, Matrix<Dtype>* target){
105 | 
106 | 	Matrix<Dtype>* ori_trans = new Matrix(this->_shape[1], this->_shape[0]);
107 | 	this->getTranspose(ori_trans);
108 | 	ori_trans->addRowVector(vec);
109 | 	ori_trans->getTranspose(target);
110 | 	delete ori_trans;
111 | }
112 | 
113 | template <typename Dtype>
114 | void Matrix<Dtype>::addRowVector(Matrix<Dtype>* vec){
115 | 	addRowVector(vec, 1, this);	
116 | }
117 | 
118 | template <typename Dtype>
119 | void Matrix<Dtype>::addRowVector(Matrix<Dtype>* vec, float scaleVec, Matrix<Dtype>* target){
120 | 	assert(vec->getNumRows() == 1 || vec->getNumCols() == 1);
121 | 	assert(vec->getNumRows() == this->_shape[0] || vec->getNumCols() == this->_shape[1]);
122 | 	const int width = this->_shape[1];
123 | 	const int height = this->_shape[0];
124 | 
125 | 	//表达成了矩阵的结构，就分开处理算了,block和thread的x维控制列数
126 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
127 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
128 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
129 | 				NUM_BLOCKS_MAX));
130 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
131 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
132 | 
133 | 	kAddRowVector<Dtype><<<grid_size, block_size>>>(this->_data_value, vec->getDevData(), \
134 | 			target->getDevData(), width, height, scaleVec);
135 | 	cudaDeviceSynchronize();
136 | 	cudaCheckError();
137 | 	
138 | }
139 | 
140 | template <typename Dtype>
141 | void Matrix<Dtype>::subtractFromScalar(float scalar, Matrix<Dtype>* target) { 
142 | 
143 | 	const int width = this->_shape[1];
144 | 	const int height = this->_shape[0];
145 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
146 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
147 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
148 | 				NUM_BLOCKS_MAX));
149 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
150 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
151 | 	
152 | 	kSubtractFromScalar<Dtype><<<grid_size, block_size>>>(this->_data_value, scalar, \
153 | 			target->getDevData(), width, height);
154 | 	cudaDeviceSynchronize();
155 | 	cudaCheckError();
156 | }
157 | 
158 | template <typename Dtype>
159 | void Matrix<Dtype>::subtractFromScalar(float scalar) {
160 | 	subtractFromScalar(scalar, this);
161 | }
162 | 
163 | template <typename Dtype>
164 | void Matrix<Dtype>::apply(Matrix<Dtype>::FUNCTIONS f, Matrix<Dtype> *target){
165 | 	
166 | 	const int width = this->_shape[1];
167 | 	const int height = this->_shape[0];
168 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
169 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
170 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
171 | 				NUM_BLOCKS_MAX));
172 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
173 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
174 | 
175 | 	if(f == Matrix<Dtype>::SOFTMAX){
176 | 		//一个block只计算一行数据
177 | 		grid_size = dim3(1, height, 1);
178 | 		block_size = dim3(num_blocks_x * ADD_BLOCK_SIZE, 1, 1);
179 | 		kSoftmax<Dtype><<<grid_size, block_size, sizeof(Dtype) * width>>>(this->_data_value, \
180 | 				target->getDevData(), this->_shape[1], this->_shape[0]);
181 | 	}else if(f == Matrix<Dtype>::RECIPROCAL) {
182 | 		kReciprocal<Dtype><<<grid_size, block_size>>>(this->_data_value, target->getDevData(), \
183 | 				width, height);
184 | 	}else if(f == Matrix<Dtype>::LOG) {
185 | 		kLog<Dtype><<<grid_size, block_size>>>(this->_data_value, target->getDevData(), \
186 | 				width, height);
187 | 	}else if(f == Matrix<Dtype>::SIGMOID) {
188 | 		kSigmoid<Dtype><<<grid_size, block_size>>>(this->_data_value, target->getDevData(), \
189 | 				width, height);
190 | 	}
191 | 	cudaDeviceSynchronize();
192 | 	cudaCheckError();
193 | }
194 | 
195 | template <typename Dtype>
196 | void Matrix<Dtype>::applyRelu(Matrix<Dtype> *target, Matrix<int>* record, \
197 | 		bool direction){
198 | 	const int width = this->_shape[1];
199 | 	const int height = this->_shape[0];
200 | 	const int length = width*height;
201 | 
202 | 	const int num_blocks = DIVUP(length, 1024);
203 | 	assert(num_blocks < NUM_BLOCKS_MAX);
204 | 
205 | 	if(direction)
206 | 		kRelu<Dtype><<<num_blocks, 1024>>>(this->_data_value, \
207 | 				target->getDevData(), record->getDevData(), length);	
208 | 	else
209 | 		kReluBack<Dtype><<<num_blocks, 1024>>>(this->_data_value, \
210 | 				target->getDevData(), record->getDevData(), length);	
211 | 	cudaDeviceSynchronize();
212 | 	cudaCheckError();
213 | }
214 | 
215 | template <typename Dtype>
216 | void Matrix<Dtype>::applyDropout(Matrix<Dtype> *target, Matrix<int>* record, \
217 | 		Matrix<curandState>* rand_probs, bool is_set_up){
218 | 
219 | 	const int width = this->_shape[1];
220 | 	const int height = this->_shape[0];
221 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
222 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
223 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
224 | 				NUM_BLOCKS_MAX));
225 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
226 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1);
227 | 
228 | 	if(is_set_up == false){
229 | 		kSetUpCurand<Dtype><<<grid_size, block_size>>>(rand_probs->getDevData(), \
230 | 				width, height);	
231 | 		cudaDeviceSynchronize();
232 | 		cudaCheckError();
233 | 	
234 | 	}
235 | 
236 | 	kDropout<Dtype><<<grid_size, block_size>>>(this->_data_value, \
237 | 			target->getDevData(), record->getDevData(), \
238 | 			rand_probs->getDevData(), width, height);	
239 | 	cudaDeviceSynchronize();
240 | 	cudaCheckError();
241 | }
242 | 
243 | template <typename Dtype>
244 | void Matrix<Dtype>::apply(Matrix<Dtype>::FUNCTIONS f) {
245 | 	apply(f, this);
246 | }
247 | 
248 | template <typename Dtype>
249 | void Matrix<Dtype>::sumCol(Matrix<Dtype>* target){
250 | 	const int width = this->_shape[1];
251 | 	const int height = this->_shape[0];
252 | 
253 | 	kDumbSumCols<Dtype><<<height, 1024, sizeof(Dtype) * width>>>(this->_data_value, \
254 | 			target->getDevData(), width, height);
255 | 	cudaDeviceSynchronize();
256 | 	cudaCheckError();
257 | }
258 | 
259 | template <typename Dtype>
260 | void Matrix<Dtype>::sumRow(Matrix<Dtype>* target){
261 | 	Matrix<Dtype>* trans = new Matrix(this->_shape[1], this->_shape[0]);
262 | 	this->getTranspose(trans);
263 | 	trans->sumCol(target);
264 | 	delete trans;
265 | }
266 | 
267 | //位置下标从0开始
268 | template <typename Dtype>
269 | void Matrix<Dtype>::maxPosInRow(Matrix<Dtype>* maxVec){
270 | 	const int width = this->_shape[1];
271 | 	const int height = this->_shape[0];
272 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
273 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
274 | 	dim3 grid_size(1, height, 1); 
275 | 	dim3 block_size(num_blocks_x * ADD_BLOCK_SIZE, 1, 1); 
276 | 
277 | 	kDumbMaxPosInRow<Dtype><<<grid_size, block_size, \
278 | 			sizeof(Dtype) * width>>>(this->_data_value, \
279 | 			maxVec->getDevData(), width, height);
280 | 	cudaDeviceSynchronize();
281 | 	cudaCheckError();
282 | }
283 | 
284 | template <typename Dtype>
285 | void Matrix<Dtype>::eltWiseMult(Matrix<Dtype>* b, Matrix<Dtype>* target) {
286 | 
287 | 	assert(b->getNumCols() == this->_shape[1]);
288 | 
289 | 	const int width = this->_shape[1];
290 | 	const int height = this->_shape[0];
291 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
292 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
293 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
294 | 				NUM_BLOCKS_MAX));
295 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
296 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
297 | 
298 | 	kMult<Dtype><<<grid_size, block_size>>>(this->_data_value, \
299 | 			b->getDevData(), target->getDevData(), width, height);
300 | 	cudaDeviceSynchronize();
301 | 	cudaCheckError();
302 | }
303 | 
304 | template <typename Dtype>
305 | void Matrix<Dtype>::eltWiseMult(Matrix<Dtype>* b) {
306 | 	eltWiseMult(b, this);
307 | }
308 | 
309 | template <typename Dtype>
310 | void Matrix<Dtype>::addSum(Matrix<Dtype>* b, Matrix<Dtype>* c, float scaleThis, \
311 | 		float scaleB, float scaleC){
312 | 	this->add(b, scaleThis, scaleB);	
313 | 	this->add(c, 1, scaleC);	
314 | }
315 | 
316 | template <typename Dtype>
317 | void Matrix<Dtype>::add(Matrix<Dtype>* b, float scale_this, float scale_B){
318 | 	assert(this->isSameDims(b));
319 | 	const int width = this->_shape[1];
320 | 	const int height = this->_shape[0];
321 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
322 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
323 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
324 | 				NUM_BLOCKS_MAX));
325 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1); 
326 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 
327 | 	
328 | 	kAdd<Dtype><<<grid_size, block_size>>>(this->getDevData(), b->getDevData(), \
329 | 			this->getDevData(), scale_this, scale_B, width, height);
330 | 	cudaDeviceSynchronize();
331 | 	cudaCheckError();
332 | }
333 | 
334 | 
335 | template <typename Dtype>
336 | void Matrix<Dtype>::showValue(string name){
337 | 
338 | 	Dtype* tmp_yh = new Dtype[this->_amount];
339 | 	this->copyToHost(tmp_yh, this->_amount);
340 | 	cout << "-------------"<< name << "--------------" << endl;
341 | 	cout << this->_shape[0] << ":" << this->_shape[1] << endl;
342 | 	for(int i = 0; i < this->_shape[0]; i++){
343 | 		for(int j = 0; j < this->_shape[1]; j++){
344 | 			cout << tmp_yh[i * this->_shape[1] + j] << "\t";
345 | 			if(j != 0 && j % (this->_shape[1]) == this->_shape[1]  - 1)
346 | 				cout << endl;
347 | 			if(this->_shape[1] == 1)
348 | 				cout << endl;
349 | 		}
350 | 	}
351 | 	delete[] tmp_yh;
352 | }
353 | 
354 | template <typename Dtype>
355 | void Matrix<Dtype>::reValue(float value){
356 | 	int length = this->getNumRows() * this->getNumCols();
357 | 	Dtype* tmp_yh = new Dtype[length];
358 | 	for(int i = 0; i < length; i++){
359 | 		tmp_yh[i] = value;
360 | 	}
361 | 	this->copyFromHost(tmp_yh, length);
362 | 	delete[] tmp_yh;
363 | }
364 | 
365 | template <typename Dtype>
366 | void Matrix<Dtype>::reValue(int value, bool is_div){
367 | 	int length = this->getNumRows() * this->getNumCols();
368 | 	Dtype* tmp_yh = new Dtype[length];
369 | 	for(int i = 0; i < length; i++){
370 | 		if(!is_div)
371 | 			tmp_yh[i] = i % value;
372 | 		else
373 | 			tmp_yh[i] = i / value;
374 | 	}
375 | 	this->copyFromHost(tmp_yh, length);
376 | 	delete[] tmp_yh;
377 | }
378 | 
379 | template <typename Dtype>
380 | Dtype Matrix<Dtype>::computeNorm(int len){
381 | 	Dtype norm_cpu;
382 | 	Matrix<Dtype>* norm_gpu = new Matrix<Dtype>(1, 1);
383 | 	kComputeNorm<<<1, 1024, sizeof(Dtype)*len>>>(this->_data_value, \
384 | 			norm_gpu->getDevData(), len);
385 | 	cudaDeviceSynchronize();
386 | 	cudaCheckError();
387 | 	norm_gpu->copyToHost(&norm_cpu, 1);
388 | 	delete norm_gpu;
389 | 	return norm_cpu;
390 | }
391 | 
392 | template <typename Dtype>
393 | void Matrix<Dtype>::cropMatToNew(Matrix<Dtype> *tar, const int row_start, \
394 | 		const int cropped_height, const int col_start, const int cropped_width){
395 | 	kCropImg<<<1, 1024>>>(this->_data_value, tar->getDevData(), row_start, \
396 | 			cropped_height, col_start, cropped_width, this->_shape[1]);
397 | 	cudaDeviceSynchronize();
398 | 	cudaCheckError();
399 | }
400 | 
401 | template <typename Dtype>
402 | Dtype Matrix<Dtype>::getPosValue(int pos){
403 | 	Dtype tmp;
404 | 	cudaMemcpy(&tmp, this->_data_value + pos, sizeof(Dtype), cudaMemcpyDeviceToHost);
405 | 	return tmp;
406 | }
407 | 
408 | template <typename Dtype>
409 | Dtype Matrix<Dtype>::getFirstPosValue(){
410 | 	return getPosValue(0);
411 | }
412 | 
413 | template <typename Dtype>
414 | void Matrix<Dtype>::subedByUnitMat(){
415 | 
416 | 	const int width = this->_shape[1];
417 | 	const int height = this->_shape[0];
418 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
419 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
420 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
421 | 				NUM_BLOCKS_MAX));
422 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1);
423 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1);
424 | 
425 | 	kSubedByUnitMat<Dtype><<<grid_size, block_size>>>(this->getDevData(), \
426 | 			this->getDevData(), width, height);
427 | 	cudaDeviceSynchronize();
428 | 	cudaCheckError();
429 | }
430 | 
431 | template <typename Dtype>
432 | void Matrix<Dtype>::setValueAt(const int height_idx, \
433 | 		const int width_idx, const Dtype value){
434 | 	int pos = height_idx*this->_shape[1] + width_idx;
435 | 	cudaMemcpy(this->_data_value + pos, &value, sizeof(Dtype), \
436 | 			cudaMemcpyHostToDevice);
437 | }
438 | 
439 | template <typename Dtype>
440 | void Matrix<Dtype>::subPortion(Matrix<Dtype>* b, const int b_row, \
441 | 			const int b_col){
442 | 
443 | 	const int width = b_col;
444 | 	const int height = b_row;
445 | 	const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE);
446 | 	assert(num_blocks_x < NUM_BLOCKS_MAX);
447 | 	const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \
448 | 				NUM_BLOCKS_MAX));
449 | 	dim3 grid_size(num_blocks_x, num_blocks_y, 1);
450 | 	dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1);
451 | 
452 | 	kSubPortion<Dtype><<<grid_size, block_size>>>(this->getDevData(), \
453 | 			b->getDevData()+b_col, this->getDevData(), this->_shape[1], \
454 | 			this->_shape[0], width, height);
455 | 	cudaDeviceSynchronize();
456 | 	cudaCheckError();
457 | }
458 | 
459 | template <typename Dtype>
460 | void Matrix<Dtype>::readPars(string filename){
461 | 	ifstream fin1(filename.c_str(), ios::binary);
462 | 	int dataLen = this->getNumRows() * this->getNumCols();
463 | 	Dtype* tmp = new Dtype[dataLen];
464 | 	fin1.read((char*)(tmp), sizeof(Dtype) * dataLen);
465 | 	cudaMemcpy(this->getDevData(), tmp, sizeof(Dtype)*dataLen, \
466 | 				cudaMemcpyHostToDevice);
467 | 	fin1.close();
468 | 	delete tmp;
469 | }
470 | 
471 | template <typename Dtype>
472 | void Matrix<Dtype>::savePars(string filename){
473 | 	ofstream fout(filename.c_str(), ios::binary);
474 | 	int dataLen = this->getNumRows() * this->getNumCols();
475 | 	Dtype* tmp = new Dtype[dataLen];
476 | 	cudaMemcpy(tmp, this->getDevData(), sizeof(Dtype)*dataLen, \
477 | 				cudaMemcpyDeviceToHost);
478 | 	fout.write((char*)(tmp), sizeof(Dtype) * dataLen);
479 | 	fout.close();
480 | 	delete tmp;
481 | }
482 | 
483 | 
484 | 
485 | 
486 | 
487 | 
488 | 
489 | 
490 | 
491 | 
492 | 


--------------------------------------------------------------------------------
/dl/src/matrix_kernel.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * filename:nvmatrix_kernel.cu
  3 |  */
  4 | 
  5 | #include <cuda_runtime.h>
  6 | #include <curand_kernel.h>
  7 | #include "matrix_kernel.hpp"
  8 | 
  9 | template <typename Dtype>
 10 | __device__ Dtype mySigmoid(Dtype x) {
 11 | 	if(x < -300)
 12 | 		return 0;
 13 | 	else if( x > 300)
 14 | 		return 1;
 15 | 	else
 16 | 		return 1 / (1 + __expf(-x));
 17 | }
 18 | 
 19 | 
 20 | template <typename Dtype>
 21 | __global__ void kAddRowVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \
 22 | 		const int width, const int height, float scaleVec) {
 23 | 
 24 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
 25 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
 26 | 	const int idx = idxY * width + idxX;
 27 | 	const int numThreads = blockDim.x * gridDim.x * \
 28 | 									blockDim.y * gridDim.y;
 29 | 
 30 | 	//此处控制了线程数要小于行列积
 31 | 	for (int i = idx; i < width * height; i += numThreads) {
 32 | 		tgtMat[idx] = mat[idx] + scaleVec * vec[idx % width];
 33 | 
 34 | 	}
 35 | }
 36 | 
 37 | template <typename Dtype>
 38 | __global__ void kSoftmax(Dtype* gData, Dtype* target, const int width, \
 39 | 		const int height) {   
 40 | 
 41 | 	//跟同一个block里面值比较大小取最大值，减去最大值
 42 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
 43 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
 44 | 	const int idx = idxY * width + idxX;
 45 | 	//数据放入共享内存
 46 | 	//计算离行值最近的2的次方
 47 | 	int pow2Length = width;
 48 | 	if(pow2Length & (pow2Length - 1)){
 49 | 		while(pow2Length & (pow2Length - 1)){
 50 | 			pow2Length &= pow2Length - 1;
 51 | 		}
 52 | 	}
 53 | 	extern __shared__ Dtype ori[];
 54 | 	__shared__ Dtype max;
 55 | 
 56 | 	if(idxX < width)
 57 | 		ori[idxX] = gData[idx];
 58 | 	__syncthreads();
 59 | 
 60 | 	//先通过reduce来求最大值
 61 | 	if(idxX >= pow2Length && idxX < width)
 62 | 		ori[idxX - pow2Length] = ori[idxX - pow2Length] > ori[idxX] \
 63 | 								 ? ori[idxX - pow2Length] : ori[idxX];
 64 | 	__syncthreads();
 65 | 
 66 | 	for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){
 67 | 		if(idxX < activeThreads){
 68 | 			ori[idxX] = ori[idxX + activeThreads] > ori[idxX] \
 69 | 						? ori[idxX + activeThreads] : ori[idxX];
 70 | 		}
 71 | 		__syncthreads();
 72 | 
 73 | 	}
 74 | 	if(idxX == 0)
 75 | 		max = ori[0];
 76 | 	__syncthreads();
 77 | 
 78 | 	if(idxX < width)
 79 | 		target[idx] = __expf(gData[idx] - max);
 80 | 
 81 | 	//reduce求和
 82 | 	if(idxX < width)
 83 | 		ori[idxX] = target[idx];
 84 | 	__syncthreads();
 85 | 
 86 | 	if(idxX >= pow2Length && idxX < width)
 87 | 		ori[idxX - pow2Length] += ori[idxX];
 88 | 	__syncthreads();
 89 | 
 90 | 	for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){
 91 | 		if(idxX < activeThreads){
 92 | 			ori[idxX] += ori[idxX + activeThreads];
 93 | 		}
 94 | 		__syncthreads();
 95 | 	}
 96 | 
 97 | 	if(idxX < width)
 98 | 		target[idx] = target[idx] / ori[0];
 99 | 
100 | }
101 | 
102 | template <typename Dtype>
103 | __global__ void kSetUpCurand(curandState *state, const int width, const int height) {
104 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
105 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
106 | 	const int idx = idxY * width + idxX;
107 | 
108 | 	if(idxY < height && idxX < width){
109 | 		curand_init(0, idx, 0, &state[idx]);
110 | 	}
111 | }
112 | 
113 | template <typename Dtype>
114 | __global__ void kDropout(Dtype* gData, Dtype* target, int* record, \
115 | 		curandState *state, const int width, const int height) {
116 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
117 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
118 | 	const int idx = idxY * width + idxX;
119 | 
120 | 	if(idxY < height && idxX < width){
121 | 		curandState local_state = state[idx];
122 | 		Dtype local_prob = curand_uniform(&local_state);
123 | 		
124 | 		if(local_prob > 0.5){
125 | 			target[idx] = gData[idx];
126 | 			record[idx] = 1;
127 | 		}else{
128 | 			target[idx] = 0;
129 | 			record[idx] = 0;
130 | 		}
131 | 		state[idx] = local_state;
132 | 	}
133 | }
134 | 
135 | template <typename Dtype>
136 | __global__ void kRelu(Dtype* gData, Dtype* target, int* record, const int length) {
137 | 	const int idx = blockIdx.x * blockDim.x + threadIdx.x;
138 | 
139 | 	if(idx < length){
140 | 		if(gData[idx] > 0){
141 | 			target[idx] = gData[idx];
142 | 			record[idx] = 1;
143 | 		}else{
144 | 			target[idx] = 0;
145 | 			record[idx] = 0;
146 | 		}
147 | 	}
148 | }
149 | template <typename Dtype>
150 | __global__ void kReluBack(Dtype* gData, Dtype* target, int* record, const int length) {
151 | 	const int idx = blockIdx.x * blockDim.x + threadIdx.x;
152 | 
153 | 	if(idx < length){
154 | 		if(record[idx] == 1){
155 | 			target[idx] = gData[idx];
156 | 		}else{
157 | 			target[idx] = 0;
158 | 		}
159 | 	}
160 | }
161 | 
162 | template <typename Dtype>
163 | __global__ void kSigmoid(Dtype* gData, Dtype* target, const int width, \
164 | 		const int height) {
165 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
166 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
167 | 	const int idx = idxY * width + idxX;
168 | 
169 | 	if(idxY < height && idxX < width)
170 | 		target[idx] = mySigmoid(gData[idx]);
171 | }
172 | 
173 | template <typename Dtype>
174 | __global__ void kReciprocal(Dtype* gData, Dtype* target, const int width, \
175 | 		const int height) {
176 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
177 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
178 | 	const int idx = idxY * width + idxX;
179 | 
180 | 	if(idxY < height && idxX < width)
181 | 		target[idx] = 1 / gData[idx];
182 | }
183 | 
184 | template <typename Dtype>
185 | __global__ void kLog(Dtype* gData, Dtype* target, const int width, \
186 | 		const int height) {   
187 | 
188 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
189 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
190 | 	const int idx = idxY * width + idxX;
191 | 
192 | 	if(idxY < height && idxX < width){
193 | 		double tmp = gData[idx] < 1 - 10e-15 ? gData[idx] : 1 - 10e-15;
194 | 		tmp = tmp > 10e-15 ? tmp : 10e-15;
195 | 		target[idx] = __logf(gData[idx]);
196 | 	}
197 | }
198 | 
199 | template <typename Dtype>
200 | __global__ void kCompactCol(const Dtype* ori, Dtype* target, const int interval, \
201 | 		const int width, const int height){
202 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
203 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
204 | 	const int oriIdx = idxY * width * interval + idxX * interval;
205 | 	const int tarIdx = idxY * width + idxX;
206 | 
207 | 	if(idxY < height && idxX < width){
208 | 		target[tarIdx] = 0;
209 | 		for(int i = 0; i < interval; i++){
210 | 			target[tarIdx] += ori[i + oriIdx];
211 | 		}
212 | 	}
213 | 
214 | }
215 | 
216 | 
217 | template <typename Dtype>
218 | __global__ void kDumbSumCols(Dtype* mat, Dtype* vec, const int width, \
219 | 		const int height) {
220 | 
221 | 	extern __shared__ Dtype ori[];
222 | 
223 | 	//距离width最近的2次幂
224 | 	int pow2Length = width;
225 | 	if(pow2Length & (pow2Length - 1)){
226 | 		while(pow2Length & (pow2Length - 1)){
227 | 			pow2Length &= pow2Length - 1;
228 | 		}
229 | 	}
230 | 	
231 | 
232 | 	//reduce求和
233 | 	int i = threadIdx.x;
234 | 	while(i < width){
235 | 		ori[i] = mat[blockIdx.x * width + i];
236 | 		i += blockDim.x;
237 | 	}
238 | 	__syncthreads();
239 | 	int reduce_len = pow2Length > blockDim.x ? blockDim.x : pow2Length;
240 | 
241 | 	//需要执行reduce的次数，一次性只能执行最多32*32
242 | 	int times = width / reduce_len;
243 | 
244 | 	//把最后无法整除的地方先处理
245 | 	int idxX = threadIdx.x + reduce_len * times;
246 | 	if(idxX > (reduce_len * times) && idxX < width)
247 | 		ori[idxX - reduce_len] += ori[idxX];
248 | 	__syncthreads();
249 | 
250 | 
251 | 	for(int j = times - 1; j >= 0; j--){
252 | 		idxX = threadIdx.x + j * reduce_len;
253 | 		if(threadIdx.x == 0 && ((j + 1) * reduce_len) < width)
254 | 			ori[0] += ori[(j + 1) * reduce_len];
255 | 		__syncthreads();
256 | 		for(int activeThreads = (reduce_len >> 1); activeThreads; activeThreads >>= 1){ 
257 | 			if(threadIdx.x < activeThreads){
258 | 				ori[idxX] += ori[idxX + activeThreads];
259 | 			}
260 | 			__syncthreads();
261 | 		}
262 | 	}
263 | 
264 | 	if(threadIdx.x == 0){
265 | 		vec[blockIdx.x] = ori[0];
266 | 	}
267 | 	__syncthreads();
268 | 
269 | }
270 | 
271 | 
272 | template <typename Dtype>
273 | __global__ void kDumbMaxPosInRow(Dtype* mat, Dtype* vec, const int width, \
274 | 		const int height) {
275 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
276 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
277 | 	const int idx = idxY * width + idxX;
278 | 
279 | 	extern __shared__ Dtype ori[];
280 | 
281 | 	int pow2Length = width;
282 | 	if(pow2Length & (pow2Length - 1)){
283 | 		while(pow2Length & (pow2Length - 1)){
284 | 			pow2Length &= pow2Length - 1;
285 | 		}
286 | 	}
287 | 
288 | 	//reduce求最大值
289 | 	if(idxX < width)
290 | 		ori[idxX] = mat[idx];
291 | 	__syncthreads();
292 | 
293 | 	if(idxX >= pow2Length && idxX < width)
294 | 		ori[idxX - pow2Length] = ori[idxX - pow2Length] > ori[idxX] \
295 | 								 ? ori[idxX - pow2Length] : ori[idxX];
296 | 	__syncthreads();
297 | 
298 | 	for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){ 
299 | 		if(idxX < activeThreads){
300 | 			ori[idxX] = ori[idxX + activeThreads] > ori[idxX] \
301 | 						? ori[idxX + activeThreads] : ori[idxX];
302 | 		}
303 | 		__syncthreads();
304 | 	}   
305 | 
306 | 	if(mat[idx] == ori[0] && idxX < width)
307 | 		vec[idxY] = idxX;
308 | 
309 | 	__syncthreads();
310 | }
311 | 
312 | template <typename Dtype>
313 | __global__ void kMultByColVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \
314 | 		const int width, const int height) {
315 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
316 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
317 | 	const int idx = idxY * width + idxX;
318 | 
319 | 	if(idxY < height && idxX < width)
320 | 		tgtMat[idx] = mat[idx] * vec[idxY];
321 | }
322 | 
323 | template <typename Dtype>
324 | __global__ void kSubtractFromScalar(Dtype* gData, float scalar, Dtype* target, \
325 | 		const int width, const int height) {
326 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
327 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
328 | 	const int idx = idxY * width + idxX;
329 | 
330 | 	if(idxY < height && idxX < width)
331 | 		target[idx] = scalar - gData[idx];
332 | }
333 | 
334 | template <typename Dtype>
335 | __global__ void kMult(Dtype* matA, Dtype* matB, Dtype* tgtMat, \
336 | 		const int width, const int height) {
337 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
338 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
339 | 	const int idx = idxY * width + idxX;
340 | 
341 | 	if(idxY < height && idxX < width)
342 | 		tgtMat[idx] = matA[idx] * matB[idx];
343 | }
344 | 
345 | template <typename Dtype>
346 | __global__ void kAdd(Dtype* matA, Dtype* matB, Dtype* tgtMat, float scaleA,  \
347 | 		float scaleB, const int width, const int height) {
348 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
349 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
350 | 	const int idx = idxY * width + idxX;
351 | 
352 | 	if(idxY < height && idxX < width)
353 | 		tgtMat[idx] = scaleA * matA[idx] + scaleB * matB[idx];
354 | }
355 | 
356 | 
357 | template <typename Dtype>
358 | __global__ void kTranspose(Dtype* srcData, Dtype* dstData, \
359 | 		const int width, const int height){
360 | 
361 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
362 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
363 | 	const int srcIdx = idxY * width + idxX;
364 | 	const int dstIdx = idxX * height + idxY;
365 | 
366 | 	if(idxY < height && idxX < width)
367 | 		dstData[dstIdx] = srcData[srcIdx];
368 | 
369 | }
370 | 
371 | template <typename Dtype>
372 | __global__ void kComputeNorm(const Dtype* vec, Dtype* norm, const int len){
373 | 	//每一个block计算一个模
374 | 	extern __shared__ Dtype sh_norm[];
375 | 
376 | 	int pow2_len = len;
377 | 	if (pow2_len & (pow2_len - 1)) {
378 | 		while (pow2_len & (pow2_len - 1)){
379 | 			pow2_len &= pow2_len - 1;
380 | 		}
381 | 	}
382 | 
383 | 	int i = threadIdx.x;
384 | 	while (i < len) {
385 | 		sh_norm[i] = vec[i]*vec[i];
386 | 		i += blockDim.x;
387 | 	}
388 | 
389 | 	int reduce_len = pow2_len > blockDim.x ? blockDim.x : pow2_len;
390 | 	int times = len / reduce_len;
391 | 
392 | 	int vec_pos = threadIdx.x + reduce_len * times;
393 | 	if (vec_pos > (reduce_len * times) && vec_pos < len) {
394 | 		sh_norm[vec_pos - reduce_len] += sh_norm[vec_pos];
395 | 	}
396 | 	__syncthreads();
397 | 
398 | 	for (int j = times-1; j >= 0; j--) {
399 | 		vec_pos = threadIdx.x + j*reduce_len;
400 | 		if (threadIdx.x == 0 && (j + 1) * reduce_len < len) {
401 | 			sh_norm[0] += sh_norm[(j + 1) * reduce_len];
402 | 		}
403 | 		__syncthreads();
404 | 		for (int active_thread = (reduce_len >> 1); active_thread; active_thread >>= 1) {
405 | 			if (threadIdx.x < active_thread) {
406 | 				sh_norm[vec_pos] += sh_norm[vec_pos + active_thread];
407 | 			}
408 | 			__syncthreads();
409 | 		}
410 | 	}
411 | 
412 | 	if (threadIdx.x == 0) {
413 | 		norm[0] = sqrt(sh_norm[0]);
414 | 	}
415 | 
416 | 	__syncthreads();
417 | }
418 | 
419 | template <typename Dtype>
420 | __global__ void kCropImg(const Dtype* ori_img, Dtype* dst_img, \
421 | 		const int row_start, const int cropped_height, \
422 | 		const int col_start, const int cropped_width, \
423 | 		const int ori_width){
424 | 
425 | 	int idx = threadIdx.x;
426 | 
427 | 	while (idx < cropped_height*cropped_width) {
428 | 		int ori_row_idx = idx / cropped_width + row_start;
429 | 		int ori_col_idx = idx % cropped_width + col_start;
430 | 		dst_img[idx] = ori_img[ori_row_idx*ori_width + ori_col_idx];
431 | 		idx += blockDim.x;
432 | 	}
433 | 	__syncthreads();
434 | }
435 | 
436 | template <typename Dtype>
437 | __global__ void kComputeHouseholderVec(const Dtype* src, Dtype* dst, \
438 | 		Dtype added_value, Dtype scale, const int len) {
439 | 	int idx = threadIdx.x;
440 | 	while (idx < len) {
441 | 		if (idx == 0) {
442 | 			dst[idx] = scale * (src[idx] + added_value);
443 | 		} else
444 | 			dst[idx] = scale * src[idx];
445 | 		idx += blockDim.x;
446 | 	}
447 | }
448 | 
449 | template <typename Dtype>
450 | __global__ void kSubedByUnitMat(Dtype* matA, Dtype* tgtMat, \
451 | 		const int width, const int height) {
452 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
453 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
454 | 	const int idx = idxY * width + idxX;
455 | 
456 | 	if(idxY < height && idxX < width ){
457 | 		if ( idxX == idxY)
458 | 			tgtMat[idx] = 1 - matA[idx];
459 | 		else
460 | 			tgtMat[idx] = - matA[idx];
461 | 	}
462 | 
463 | }
464 | 
465 | template <typename Dtype>
466 | __global__ void kSubPortion(Dtype* matA, Dtype* matB, Dtype* tgtMat, \
467 | 		const int a_width, const int a_height, \
468 | 		const int b_width, const int b_height){
469 | 
470 | 	const int row_dist = a_height - b_height;
471 | 	const int col_dist = a_width - b_width;
472 | 	const int idxY = blockIdx.y * blockDim.y + threadIdx.y;
473 | 	const int idxX = blockIdx.x * blockDim.x + threadIdx.x;
474 | 	const int idx = idxY * b_width + idxX;
475 | 
476 | 	const int a_idx = (idxY+row_dist)*a_width + idxX+col_dist;
477 | 
478 | 	if(idxY < b_height && idxX < b_width ){
479 | 		tgtMat[a_idx] = matA[a_idx] - matB[idx];
480 | 	}
481 | 
482 | }
483 | 
484 | 
485 | 
486 | 
487 | 
488 | 
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | 
496 | 
497 | 
498 | 
499 | 
500 | 
501 | 
502 | 
503 | 
504 | 


--------------------------------------------------------------------------------
/dl/src/model_component.cpp:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file model_component.cpp
 3 | /// @brief
 4 | #include "model_component.hpp"
 5 | 
 6 | using namespace std;
 7 | 
 8 | template <typename Dtype>
 9 | ModelComponent<Dtype>::ModelComponent() {
10 | 	_string_map_layertype["CONVOLUTION"] = CONVOLUTION;
11 | 	_string_map_layertype["POOLING"] = POOLING;
12 | 	_string_map_layertype["SIGMOID"] = SIGMOID;
13 | 	_string_map_layertype["RECTIFIED"] = RECTIFIED;
14 | 	_string_map_layertype["INNERPRODUCT"] = INNERPRODUCT;
15 | 	_string_map_layertype["SOFTMAX"] = SOFTMAX;
16 | 	_string_map_layertype["DROPOUT"] = DROPOUT;
17 | 
18 | 	_string_map_pooltype["MAX_POOLING"] = MAX_POOLING;
19 | 	_string_map_pooltype["AVG_POOLING"] = AVG_POOLING;
20 | 
21 | 
22 | 	_num_need_train_layers = 0;
23 | }
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/dl/src/pooling_layer.cu:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file pooling_layer.cu
  3 | ///
  4 | 
  5 | #include "pooling_layer.hpp"
  6 | 
  7 | using namespace std;
  8 | 
  9 | template <typename Dtype>
 10 | PoolingLayer<Dtype>::PoolingLayer(PoolParam *lcp){
 11 | 	this->_lcp = lcp;
 12 | 	_num_box = _lcp->getBoxNumHeight()*_lcp->getBoxNumWidth();
 13 | 
 14 | 	cublasCreate(&this->handle);
 15 | 	
 16 | }
 17 | 
 18 | template <typename Dtype>
 19 | PoolingLayer<Dtype>::~PoolingLayer() {
 20 | 
 21 | 	delete this-> _y;
 22 | 	delete this->_dE_dy;
 23 | 
 24 | 	if(_lcp->getPoolType() == MAX_POOLING )
 25 | 		delete _max_pos;
 26 | 	if((_lcp->getOutHeight() > MAX_THREAD_SIZE \
 27 | 				|| _lcp->getOutWidth() > MAX_THREAD_SIZE) \
 28 | 			&& (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0))
 29 | 		delete unranged_dE_dx;
 30 | 	cublasDestroy(this->handle);
 31 | }
 32 | 
 33 | template <typename Dtype>
 34 | void PoolingLayer<Dtype>::initCuda() {
 35 | 
 36 | 
 37 | 	this->_y               = new Matrix<Dtype>(_lcp->getMinibatchSize(), \
 38 | 			_lcp->getOutHeight()*_lcp->getOutWidth()* _lcp->getOutChannel());
 39 | 
 40 | 	this->_dE_dy           = new Matrix<Dtype>(this->_y);
 41 | 
 42 | 
 43 | 	if(_lcp->getPoolType() == MAX_POOLING ){
 44 | 		_max_pos           = new Matrix<int>(_lcp->getMinibatchSize(), \
 45 | 			_lcp->getOutHeight()*_lcp->getOutWidth()* _lcp->getOutChannel());
 46 | 
 47 | 	}
 48 | 	if((_lcp->getOutHeight() > MAX_THREAD_SIZE \
 49 | 				|| _lcp->getOutWidth() > MAX_THREAD_SIZE) \
 50 | 			&& (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){
 51 | 		unranged_dE_dx = new Matrix<Dtype>(_lcp->getMinibatchSize(), \
 52 | 				_lcp->getBoxInHeight()*_lcp->getBoxInWidth() \
 53 | 				* _lcp->getBoxNumHeight()*_lcp->getBoxNumWidth() \
 54 | 				* _lcp->getOutChannel());
 55 | 	}
 56 | 
 57 | }
 58 | 
 59 | template <typename Dtype>
 60 | void PoolingLayer<Dtype>::computeOutput(Matrix<Dtype>* x){
 61 | 
 62 | 	this->_y->zeros();	
 63 | 
 64 | 	dim3 blocks = dim3(_lcp->getMinibatchSize(), _lcp->getInChannel() * _num_box);
 65 | 	dim3 threads = dim3(_lcp->getThreadWidth(), _lcp->getThreadHeight()); 
 66 | 
 67 | 	if(_lcp->getPoolType() == MAX_POOLING ){
 68 | 		max_pooling<<<blocks, threads>>>(x->getDevData(), \
 69 | 				this->_y->getDevData(), _max_pos->getDevData(), \
 70 | 				_lcp->getInHeight(), _lcp->getInWidth(), \
 71 | 				_lcp->getInChannel(), \
 72 | 				_lcp->getOutHeight(), _lcp->getOutWidth(), \
 73 | 				_lcp->getFilterHeight(), _lcp->getFilterWidth(), \
 74 | 				_lcp->getStrideHeight(), _lcp->getStrideWidth(), \
 75 | 				_lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \
 76 | 				_lcp->getBoxNumHeight(), _lcp->getBoxNumWidth());  
 77 | 
 78 | 	}else if(_lcp->getPoolType() == AVG_POOLING){
 79 | 		avg_pooling<<<blocks, threads>>>(x->getDevData(), \
 80 | 				this->_y->getDevData(), \
 81 | 				_lcp->getInHeight(), _lcp->getInWidth(), \
 82 | 				_lcp->getInChannel(), \
 83 | 				_lcp->getOutHeight(), _lcp->getOutWidth(), \
 84 | 				_lcp->getFilterHeight(), _lcp->getFilterWidth(), \
 85 | 				_lcp->getStrideHeight(), _lcp->getStrideWidth(), \
 86 | 				_lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \
 87 | 				_lcp->getBoxNumHeight(), _lcp->getBoxNumWidth());  
 88 | 	}else{
 89 | 		cout << "Pooling type is invalid !\n";	
 90 | 		exit(EXIT_FAILURE);
 91 | 	}
 92 | 
 93 | 	cudaThreadSynchronize();
 94 | 	cudaCheckError();
 95 | 
 96 | }
 97 | 
 98 | template <typename Dtype>
 99 | void PoolingLayer<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
100 | 
101 | 	dim3 blocks = dim3(_lcp->getMinibatchSize(), _lcp->getInChannel() * _num_box);
102 | 	dim3 threads = dim3(_lcp->getThreadWidth(), _lcp->getThreadHeight());
103 | 
104 | 	int box_in_height = MAX_THREAD_SIZE > _lcp->getOutHeight() \
105 | 				? _lcp->getInHeight() : _lcp->getBoxInHeight();
106 | 	int box_in_width = MAX_THREAD_SIZE > _lcp->getOutWidth() \
107 | 				? _lcp->getInWidth() : _lcp->getBoxInWidth();
108 | 
109 | 	Dtype* p_dE_dx;
110 | 	if((_lcp->getOutHeight() > MAX_THREAD_SIZE \
111 | 				|| _lcp->getOutWidth() > MAX_THREAD_SIZE) \
112 | 			&& (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){
113 | 		unranged_dE_dx->zeros();
114 | 		p_dE_dx = unranged_dE_dx->getDevData();
115 | 	}else{
116 | 		dE_dx->zeros();
117 | 		p_dE_dx = dE_dx->getDevData();
118 | 	}
119 | 
120 | 	if(_lcp->getPoolType() == MAX_POOLING ){
121 | 		compute_dE_dy_max<<<blocks, threads, \
122 | 			sizeof(Dtype)*box_in_height*box_in_width>>>( \
123 | 					this->_dE_dy->getDevData(), \
124 | 					p_dE_dx, _max_pos->getDevData(), \
125 | 					box_in_height, box_in_width, \
126 | 					_lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \
127 | 					_lcp->getInChannel(), \
128 | 					_lcp->getOutHeight(), _lcp->getOutWidth(), \
129 | 					_lcp->getFilterHeight(), _lcp->getFilterWidth(), \
130 | 					_lcp->getStrideHeight(), _lcp->getStrideWidth(), \
131 | 					_lcp->getBoxNumHeight(), _lcp->getBoxNumWidth());  
132 | 		cudaThreadSynchronize();
133 | 		cudaCheckError();
134 | 
135 | 
136 | 	}else if(_lcp->getPoolType() == AVG_POOLING){
137 | 		compute_dE_dy_avg<<<blocks, threads, \
138 | 			sizeof(Dtype)*box_in_height*box_in_width>>>( \
139 | 					this->_dE_dy->getDevData(), p_dE_dx, \
140 | 					box_in_height, box_in_width, \
141 | 					_lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \
142 | 					_lcp->getInChannel(), \
143 | 					_lcp->getOutHeight(), _lcp->getOutWidth(), \
144 | 					_lcp->getFilterHeight(), _lcp->getFilterWidth(), \
145 | 					_lcp->getStrideHeight(), _lcp->getStrideWidth(), \
146 | 					_lcp->getBoxNumHeight(), _lcp->getBoxNumWidth());  
147 | 		cudaThreadSynchronize();
148 | 		cudaCheckError();
149 | 
150 | 	}else{
151 | 		cout << "Pooling type is invalid !\n";	
152 | 		exit(EXIT_FAILURE);
153 | 	}
154 | 
155 | 	if((_lcp->getOutHeight() > MAX_THREAD_SIZE \
156 | 				|| _lcp->getOutWidth() > MAX_THREAD_SIZE) \
157 | 			&& (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){
158 | 		dE_dx->zeros();
159 | 
160 | 		compactOverlap<<<_lcp->getMinibatchSize(), _lcp->getInChannel()>>>( \
161 | 				unranged_dE_dx->getDevData(), dE_dx->getDevData(), \
162 | 				_lcp->getInHeight(), _lcp->getInWidth(), \
163 | 				_lcp->getInChannel(),  _lcp->getOverlapHeight(), \
164 | 				_lcp->getOverlapWidth(), \
165 | 				box_in_height, box_in_width, \
166 | 				_lcp->getBoxNumHeight(), _lcp->getBoxNumWidth());  
167 | 		cudaThreadSynchronize();
168 | 		cudaCheckError();
169 | 	}
170 | }
171 | 
172 | 
173 | 
174 | 


--------------------------------------------------------------------------------
/dl/src/relu_layer.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file relu_layer.cu
 3 | /// @brief
 4 | 
 5 | 
 6 | using namespace std;
 7 | 
 8 | template <typename Dtype>
 9 | ReluLayer<Dtype>::ReluLayer(Param* p){
10 | 
11 | 	this->_p           = p;
12 | }
13 | 
14 | template <typename Dtype>
15 | ReluLayer<Dtype>::~ReluLayer() {
16 | 	delete  this->_y; 
17 | 	delete  this->_dE_dy;
18 | 	delete _record;
19 | }
20 | 
21 | template <typename Dtype>
22 | void ReluLayer<Dtype>::initCuda() {
23 | 
24 | 
25 | 	ConnectType ct = this->_p->getConnectType();
26 | 	int col;
27 | 	if(ct == PARAM_CONNECT_TYPE_LOCAL)
28 | 		col = _p->getOutHeight()*_p->getOutWidth() \
29 | 			  * this->_p->getOutChannel(); 
30 | 	else if(ct == PARAM_CONNECT_TYPE_FULL)
31 | 		col = this->_p->getNumOut(); 
32 | 	this->_y             = new Matrix<Dtype>(_p->getMinibatchSize(), \
33 | 								col);
34 | 	this->_dE_dy         = new Matrix<Dtype>(this->_y);
35 | 	
36 | 	_record				 = new Matrix<int>(_p->getMinibatchSize(), col);
37 | 
38 | }
39 | 
40 | template <typename Dtype>
41 | void ReluLayer<Dtype>::computeOutput(Matrix<Dtype>* x){ 
42 | 
43 | 	this->_y->zeros();	
44 | 	x->applyRelu(this->_y, _record);
45 | 	
46 | }
47 | 
48 | template <typename Dtype>
49 | void ReluLayer<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
50 | 	dE_dx->zeros();
51 | 
52 | 	this->_dE_dy->applyRelu(dE_dx, _record, false);
53 | 
54 | }
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/dl/src/sigmoid_layer.cu:
--------------------------------------------------------------------------------
 1 | ///
 2 | /// \file sigmoid_layer.cu
 3 | /// @brief
 4 | 
 5 | #include "sigmoid_layer.hpp"
 6 | 
 7 | using namespace std;
 8 | 
 9 | template <typename Dtype>
10 | SigmoidLayer<Dtype>::SigmoidLayer(Param* fcp){
11 | 
12 | 	this->_fcp           = fcp;
13 | }
14 | 
15 | template <typename Dtype>
16 | SigmoidLayer<Dtype>::~SigmoidLayer() {
17 | 	delete  this->_y; 
18 | 	delete  this->_dE_dy;
19 | }
20 | 
21 | template <typename Dtype>
22 | void SigmoidLayer<Dtype>::initCuda() {
23 | 
24 | 
25 | 	ConnectType ct = this->_fcp->getConnectType();
26 | 	int col;
27 | 	if(ct == PARAM_CONNECT_TYPE_LOCAL)
28 | 		col = _fcp->getOutHeight()*_fcp->getOutWidth() \
29 | 			   	* this->_fcp->getOutChannel(); 
30 | 	else if(ct == PARAM_CONNECT_TYPE_FULL)
31 | 		col = this->_fcp->getNumOut(); 
32 | 	this->_y             = new Matrix<Dtype>(_fcp->getMinibatchSize(), \
33 | 								col);
34 | 	this->_dE_dy         = new Matrix<Dtype>(this->_y);
35 | }
36 | 
37 | template <typename Dtype>
38 | void SigmoidLayer<Dtype>::computeOutput(Matrix<Dtype>* x){ 
39 | 	x->apply(Matrix<Dtype>::SIGMOID, this->_y);
40 | }
41 | 
42 | template <typename Dtype>
43 | void SigmoidLayer<Dtype>::computeDerivsOfInput(Matrix<Dtype>* dE_dx){
44 | 
45 | 
46 | 	this->_y->subtractFromScalar(1, dE_dx);
47 | 
48 | 	dE_dx->eltWiseMult(this->_y);
49 | 
50 | 	dE_dx->eltWiseMult(this->_dE_dy);
51 | 
52 | }
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/dl/src/train_classification.cpp:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file train_classification.cpp
  3 | /// @brief
  4 | 
  5 | 
  6 | #include <iostream>
  7 | #include <algorithm>
  8 | #include <sstream>
  9 | #include "train_classification.hpp"
 10 | 
 11 | using namespace std;
 12 | 
 13 | template <typename Dtype>
 14 | void TrainClassification<Dtype>::createPixelAndLabel(){
 15 | 	this->_model_component->_mini_data = new Matrix<Dtype>(this->_model_component->_minibatch_size, \
 16 | 				this->_model_component->_one_img_len);
 17 | 	this->_model_component->_mini_label	= new Matrix<int>(this->_model_component->_minibatch_size, 1);
 18 | }
 19 | 
 20 | template <typename Dtype>
 21 | void TrainClassification<Dtype>::parseImgBinary(string train_file, string valid_file){
 22 | 	this->_load_layer = new LoadCifar10<Dtype>(this->_model_component->_minibatch_size);
 23 | 	this->_model_component->_num_train = this->_load_layer->getNumTrain();
 24 | 	this->_model_component->_num_valid = this->_load_layer->getNumValid();
 25 | 	this->_model_component->setNumTrainBatch();
 26 | 	this->_model_component->setNumValidBatch();
 27 | 
 28 | }
 29 | 
 30 | template <typename Dtype>
 31 | void TrainClassification<Dtype>::forwardLastLayer(){
 32 | 
 33 | 	this->_model_component->_layers[this->_model_component->_num_layers-1]->computeOutput(\
 34 | 			this->_model_component->_y[this->_model_component->_num_layers-1]);
 35 | 	this->_likelihood += dynamic_cast<Logistic<Dtype>* >( \
 36 | 			this->_model_component->_layers[this->_model_component->_num_layers-1]) \
 37 | 						 ->computeError(this->_model_component->_mini_label, this->_error);
 38 | }
 39 | 
 40 | template <typename Dtype>
 41 | void TrainClassification<Dtype>::backwardLastLayer(){
 42 | 	Logistic<Dtype> *last_layer = dynamic_cast<Logistic<Dtype>* >( \
 43 | 			this->_model_component->_layers[this->_model_component->_num_layers-1]);
 44 | 	last_layer->computeDerivsOfInput(this->_model_component->_dE_dy[ \
 45 | 			this->_model_component->_num_layers-2], \
 46 | 			this->_model_component->_mini_label);
 47 | }
 48 | 
 49 | template <typename Dtype>
 50 | void TrainClassification<Dtype>::train() {
 51 | 
 52 | 	clock_t t;
 53 | 	t = clock();
 54 | 
 55 | 	int pixel_len = this->_model_component->_minibatch_size*this->_model_component->_one_img_len;
 56 | 	int label_len = this->_model_component->_minibatch_size;
 57 | 	Dtype *h_mini_pixel = new Dtype[pixel_len];   //分配在主机内存上
 58 | 	int *h_mini_label = new int[label_len]; 
 59 | 
 60 | 	for (int epoch_idx = 0; epoch_idx < this->_model_component->_num_epoch; \
 61 | 			epoch_idx++) {
 62 | 
 63 | 		this->_likelihood = 0;
 64 | 		this->_error = 0;
 65 | 
 66 | 		Logistic<Dtype> *last_layer = dynamic_cast<Logistic<Dtype>* >( \
 67 | 				this->_model_component->_layers[this->_model_component->_num_layers-1]);
 68 | 		last_layer->setRecordToZero();
 69 | 
 70 | 
 71 | 		for(int batch_idx = 0; batch_idx < this->_model_component->_num_train_batch; \
 72 | 				batch_idx++){
 73 | 
 74 | 			this->_load_layer->loadTrainOneBatch(batch_idx, h_mini_pixel, h_mini_label);
 75 | 			this->_model_component->_mini_data->copyFromHost(h_mini_pixel, \
 76 | 						pixel_len);
 77 | 			this->_model_component->_mini_label->copyFromHost(h_mini_label, \
 78 | 						label_len);
 79 | 			this->forwardPropagate();
 80 | 			forwardLastLayer();
 81 | 			backwardLastLayer();
 82 | 			this->backwardPropagate();
 83 | 			
 84 | 			this->computeAndUpdatePars();
 85 | 
 86 | 			if(batch_idx == this->_model_component->_num_train_batch-1){
 87 | 				cout << "----------epoch_idx: " << epoch_idx << "-----------\n";
 88 | 				cout << "training likelihood: " << this->_likelihood << endl;
 89 | 				cout << "classification training accuarcy: " << 1-(float)this->_error/ \
 90 | 					(this->_model_component->_num_train_batch \
 91 | 					 *this->_model_component->getMinibatchSize()) << endl;
 92 | 				Matrix<int>* train_record = last_layer->getResultRecord();
 93 | 				train_record->showValue("train record");
 94 | 
 95 | 				this->_likelihood = 0;
 96 | 				this->_error = 0;
 97 | 
 98 | 				last_layer->setRecordToZero();
 99 | 
100 | 				for(int valid_idx = 0; \
101 | 						valid_idx < this->_model_component->_num_valid_batch; \
102 | 						valid_idx++){
103 | 						
104 | 					this->_load_layer->loadValidOneBatch( valid_idx, \
105 | 						h_mini_pixel, h_mini_label);
106 | 					this->_model_component->_mini_data->copyFromHost(h_mini_pixel, \
107 | 						pixel_len);
108 | 					this->_model_component->_mini_label->copyFromHost(h_mini_label, \
109 | 						label_len);
110 | 
111 | 					this->forwardPropagate();
112 | 					forwardLastLayer();
113 | 
114 | 				}
115 | 				Matrix<int>* valid_record = last_layer->getResultRecord();
116 | 				valid_record->showValue("valid record");
117 | 
118 | 				cout << "validation likelihood: " << this->_likelihood << endl;
119 | 				cout << "classification valid accuarcy: " << 1-(float)this->_error/ \
120 | 					(this->_model_component->_num_valid_batch \
121 | 					 *this->_model_component->getMinibatchSize()) << endl;
122 | 
123 | 
124 | 			}
125 | 		}
126 | 
127 | 		t = clock() - t;
128 | 		cout << ((float)t/CLOCKS_PER_SEC) << "s.\n";
129 | 		t = clock();
130 | 
131 | 	}
132 | }
133 | 


--------------------------------------------------------------------------------
/dl/src/train_model.cpp:
--------------------------------------------------------------------------------
  1 | ///
  2 | /// \file train_model.cpp
  3 | /// @brief
  4 | 
  5 | 
  6 | #include <iostream>
  7 | #include <algorithm>
  8 | #include <sstream>
  9 | #include "train_model.hpp"
 10 | #include "json/json.h"
 11 | #include "inner_product_layer.hpp"
 12 | #include "logistic.hpp"
 13 | #include "sigmoid_layer.hpp"
 14 | #include "relu_layer.hpp"
 15 | #include "convnet.hpp"
 16 | #include "pooling_layer.hpp"
 17 | #include "dropout_layer.hpp"
 18 | 
 19 | using namespace std;
 20 | 
 21 | template <typename Dtype>
 22 | TrainModel<Dtype>::TrainModel(bool has_valid, bool is_test){
 23 | 	_model_component = new ModelComponent<Dtype>();
 24 | 	_likelihood = 0;
 25 | 	_is_stop = false;
 26 | 	_has_valid = has_valid;
 27 | 	_is_test = is_test;
 28 | 	if(has_valid)
 29 | 		_num_data_type = 2;
 30 | 	else
 31 | 		_num_data_type = 1;
 32 | }
 33 | 
 34 | template <typename Dtype>
 35 | TrainModel<Dtype>::~TrainModel() {
 36 | 	delete _model_component;
 37 | 	delete _load_layer;
 38 | }
 39 | 
 40 | template <typename Dtype>
 41 | void TrainModel<Dtype>::parseNetJson(string json_file) {
 42 | 	Json::Reader reader;
 43 | 	Json::Value root;
 44 | 	ifstream fin(json_file.c_str());
 45 | 	if (reader.parse(fin, root)) {
 46 | 		_model_component->_minibatch_size = root["minibatch_size"].asInt();
 47 | 		Param::setMinibatchSize(_model_component->_minibatch_size);
 48 | 
 49 | 		_model_component->_num_epoch = root["num_epoch"].asInt();
 50 | 		_model_component->_img_height = root["img_height"].asInt();
 51 | 		_model_component->_img_width = root["img_width"].asInt();
 52 | 		_model_component->_img_channel = root["img_channel"].asInt();
 53 | 
 54 | 		cout << "\n===========overall==============" \
 55 | 				<< "\nnum_epoch: " << _model_component->_num_epoch \
 56 | 				<< "\nbatchSize: " << _model_component->_minibatch_size;
 57 | 		
 58 | 
 59 | 		_model_component->_num_layers = root["layer"].size();
 60 | 
 61 | 		string layer_type, name;
 62 | 		int pad_height, pad_width, stride_height, stride_width;
 63 | 		int	filter_height, filter_width, filter_channel, num_out, num_in;
 64 | 		float w_lr, bias_lr, momentum, weight_decay, w_gauss;
 65 | 		string p_type;
 66 | 		Param* param;
 67 | 
 68 | 		for (int i = 0; i < _model_component->_num_layers; ++i) {
 69 | 			layer_type = root["layer"][i]["type"].asString();
 70 | 			name = root["layer"][i]["name"].asString();
 71 | 			if (!root["layer"][i]["filter_height"].isNull()) {
 72 | 				pad_height = root["layer"][i]["pad_height"].asInt();
 73 | 				pad_width = root["layer"][i]["pad_width"].asInt();
 74 | 				stride_height = root["layer"][i]["stride_height"].asInt();
 75 | 				stride_width = root["layer"][i]["stride_width"].asInt();
 76 | 				filter_height = root["layer"][i]["filter_height"].asInt();
 77 | 				filter_width = root["layer"][i]["filter_width"].asInt();
 78 | 			}
 79 | 			if (!root["layer"][i]["w_lr"].isNull()) {
 80 | 				w_lr = root["layer"][i]["w_lr"].asFloat();
 81 | 				bias_lr = root["layer"][i]["bias_lr"].asFloat();
 82 | 				momentum = root["layer"][i]["momentum"].asFloat();
 83 | 				weight_decay = root["layer"][i]["weight_decay"].asFloat();
 84 | 				w_gauss = root["layer"][i]["w_gauss"].asFloat();
 85 | 			}
 86 | 			if (!root["layer"][i]["num_out"].isNull()) {
 87 | 				num_out = root["layer"][i]["num_out"].asInt();
 88 | 			}
 89 | 			if (!root["layer"][i]["num_in"].isNull()) {
 90 | 				num_in = root["layer"][i]["num_in"].asInt();
 91 | 			}
 92 | 			if (!root["layer"][i]["pool_type"].isNull()) {
 93 | 				p_type = root["layer"][i]["pool_type"].asString();
 94 | 			}
 95 | 			if (!root["layer"][i]["filter_channel"].isNull()) {
 96 | 				filter_channel = root["layer"][i]["filter_channel"].asInt();
 97 | 			}else{
 98 | 				filter_channel = 0;
 99 | 			}
100 | 			if (layer_type == "CONVOLUTION") {
101 | 				if (_model_component->_layers_param.size() == 0) {
102 | 					param = new ConvParam( \
103 | 							_model_component->_string_map_layertype[layer_type], \
104 | 							name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \
105 | 							_model_component->_img_height, _model_component->_img_width, \
106 | 							pad_height, pad_width, stride_height, stride_width, \
107 | 							_model_component->_img_channel, filter_height, \
108 | 							filter_width, filter_channel);
109 | 				} else{
110 | 					param = new ConvParam( \
111 | 							_model_component->_string_map_layertype[layer_type], \
112 | 							name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \
113 | 							pad_height, pad_width, stride_height, stride_width, \
114 | 							filter_height, filter_width, filter_channel, \
115 | 							dynamic_cast<LocalConnectParam*>( \
116 | 								_model_component->_layers_param.back()));
117 | 				}
118 | 			} else if (layer_type == "POOLING") {
119 | 				param = new PoolParam( \
120 | 						_model_component->_string_map_layertype[layer_type], \
121 | 						name, pad_height, pad_width, stride_height, stride_width, \
122 | 						filter_height, filter_width, 0, \
123 | 						dynamic_cast<LocalConnectParam*>( \
124 | 							_model_component->_layers_param.at( \
125 | 								_model_component->_layers_param.size() - 2)), \
126 | 						_model_component->_string_map_pooltype[p_type]);
127 | 			} else if (layer_type == "SIGMOID" || layer_type == "RECTIFIED" \
128 | 					|| layer_type == "SOFTMAX" || layer_type == "DROPOUT") {
129 | 				param = new FullConnectParam( \
130 | 						_model_component->_string_map_layertype[layer_type], \
131 | 						name, 0, _model_component->_layers_param.back());
132 | 			} else if (layer_type == "INNERPRODUCT" ) {
133 | 				if (_model_component->_layers_param.size() == 0) {
134 | 					num_in = _model_component->_img_height \
135 | 							 * _model_component->_img_width \
136 | 							 * _model_component->_img_channel;
137 | 					param = new InnerParam( \
138 | 							_model_component->_string_map_layertype[layer_type], \
139 | 							name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \
140 | 							num_in, num_out);
141 | 				}else{
142 | 					param = new InnerParam( \
143 | 							_model_component->_string_map_layertype[layer_type], \
144 | 							name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \
145 | 							num_out, _model_component->_layers_param.back());
146 | 				}
147 | 			} else if(layer_type == "PREDICTOBJECT"){
148 | 				param = new FullConnectParam( \
149 | 						_model_component->_string_map_layertype[layer_type], \
150 | 						name, 0, _model_component->_layers_param.back());
151 | 			} else if(layer_type == "RECOMMENDSUBSTITUE"){
152 | 				param = new FullConnectParam( \
153 | 						_model_component->_string_map_layertype[layer_type], \
154 | 						name, num_out, _model_component->_layers_param.back());
155 | 			} else if(layer_type == "RECOMMENDCOMPATIBLE"){
156 | 				param = new FullConnectParam( \
157 | 						_model_component->_string_map_layertype[layer_type], \
158 | 						name, num_out, _model_component->_layers_param.back());
159 | 			}
160 | 			param->printParam();
161 | 			_model_component->_layers_param.push_back(param);
162 | 
163 | 			if (param->getParamTrainType() == NEED) {
164 | 				_model_component->_layers_need_train_param.push_back(param);
165 | 				_model_component->_num_need_train_layers++;
166 | 			}
167 | 		}
168 | 	}
169 | 	_model_component->_one_img_len = _model_component->_img_width \
170 | 									 *_model_component->_img_height \
171 | 									 *_model_component->_img_channel;
172 | }
173 | 
174 | template <typename Dtype>
175 | void TrainModel<Dtype>::createLayer(){
176 | 	cout << _model_component->_num_layers << endl;
177 | 	for (int i = 0; i < _model_component->_num_layers; ++i){
178 | 		Layer<Dtype> *layer;
179 | 		Param *param = _model_component->_layers_param[i];
180 | 		try{
181 | 			if (param->getLayerType() == CONVOLUTION) {
182 | 				LocalConnectParam* lcp = dynamic_cast<LocalConnectParam*>(param);
183 | 				if(lcp == NULL)
184 | 					throw 5;
185 | 				layer = new ConvNet<Dtype>(dynamic_cast<ConvParam*>(lcp));
186 | 			} else if (param->getLayerType() == POOLING) {
187 | 				layer = new PoolingLayer<Dtype>(dynamic_cast<PoolParam*>(param));
188 | 			} else if (param->getLayerType() == SIGMOID) {
189 | 				layer = new SigmoidLayer<Dtype>(dynamic_cast<FullConnectParam*>(param));
190 | 			} else if (param->getLayerType() == RECTIFIED) {
191 | 				layer = new ReluLayer<Dtype>(dynamic_cast<FullConnectParam*>(param));
192 | 			} else if (param->getLayerType() == SOFTMAX) {
193 | 				layer = new Logistic<Dtype>(dynamic_cast<FullConnectParam*>(param));
194 | 			} else if (param->getLayerType() == DROPOUT) {
195 | 				layer = new DropoutLayer<Dtype>(dynamic_cast<FullConnectParam*>(param));
196 | 			} else if (param->getLayerType() == INNERPRODUCT ) {
197 | 				FullConnectParam* fcp = dynamic_cast<FullConnectParam*>(param);
198 | 				layer = new InnerProductLayer<Dtype>(dynamic_cast<InnerParam*>(fcp));
199 | 			}
200 | 		}catch(int e){
201 | 			cout << "dynamic point is null\n";
202 | 		}
203 | 
204 | 		layer->initCuda();
205 | 		_model_component->_layers.push_back(layer);
206 | 
207 | 		if (param->getParamTrainType() == NEED) {
208 | 			_model_component->_layers_needed_train.push_back(layer);
209 | 		}
210 | 	}
211 | }
212 | 
213 | template <typename Dtype>
214 | void TrainModel<Dtype>::createWBias() {
215 | 	for (int i = 0; i < _model_component->getNumNeedTrainLayers(); ++i) {
216 | 		TrainLayer<Dtype>* tl = dynamic_cast<TrainLayer<Dtype>*>( \
217 | 				_model_component->_layers_needed_train[i]);
218 | 		_model_component->_w.push_back(tl->getW());
219 | 		_model_component->_bias.push_back(tl->getBias());
220 | 		_model_component->_w_len.push_back(tl->getW()->getNumEles());
221 | 		_model_component->_bias_len.push_back(tl->getBias()->getNumEles());
222 | 	}
223 | }
224 | 
225 | template <typename Dtype>
226 | void TrainModel<Dtype>::createYDEDY() {
227 | 	_model_component->_y.push_back(_model_component->_mini_data);
228 | 	_model_component->_y_needed_train.push_back(_model_component->_mini_data);
229 | 	for (int i = 0; i < _model_component->_num_layers; ++i){
230 | 		_model_component->_y.push_back( \
231 | 				_model_component->_layers[i]->getY());
232 | 		_model_component->_dE_dy.push_back( \
233 | 				_model_component->_layers[i]->getDEDY());
234 | 		if (_model_component->_layers_param[i]->getParamTrainType() == NEED \
235 | 				&& i > 0) {
236 | 			///> 为了反向对weight和bias求导时要用到
237 | 			_model_component->_y_needed_train.push_back( \
238 | 					_model_component->_layers[i-1]->getY());
239 | 		}
240 | 	}
241 | }
242 | 
243 | template <typename Dtype>
244 | void TrainModel<Dtype>::initWeightByRandom() {
245 | 	
246 | 	srand((unsigned)time(NULL)); 
247 | 	for (int k = 0; k < _model_component->_num_need_train_layers; ++k) {
248 | 		gaussRand(_model_component->_w[k], \
249 | 					dynamic_cast<TrainParam*>( \
250 | 						_model_component->_layers_need_train_param[k])->getWGauss());
251 | 		cudaMemset(_model_component->_bias[k]->getDevData(), 0, \
252 | 					sizeof(float) * _model_component->_bias_len[k]);
253 | 	}
254 | }
255 | 
256 | template <typename Dtype>
257 | void TrainModel<Dtype>::initWeightByFile(vector<string> w_file, \
258 | 		vector<string> bias_file) {
259 | 	for (int k = 0; k < _model_component->_num_need_train_layers; ++k) {
260 | 			_model_component->_w[k]->readPars(w_file[k]);
261 | 			_model_component->_bias[k]->readPars(bias_file[k]);
262 | 	}
263 | }
264 | 
265 | template <typename Dtype>
266 | void TrainModel<Dtype>::forwardPropagate(){
267 | 	for (int k = 0; k < _model_component->_num_layers-1; ++k) {
268 | 		_model_component->_layers[k]->computeOutput(\
269 | 				_model_component->_y[k]);
270 | 	}
271 | }
272 | 
273 | template <typename Dtype>
274 | void TrainModel<Dtype>::backwardPropagate(){
275 | 	for (int k = _model_component->_num_layers-2; k > 0; --k) {
276 | 		_model_component->_layers[k]->computeDerivsOfInput( \
277 | 				_model_component->_dE_dy[k-1]);
278 | 	}
279 | }
280 | 
281 | template <typename Dtype>
282 | void TrainModel<Dtype>::computeAndUpdatePars(){
283 | 	for (int k = _model_component->_num_need_train_layers-1; k >= 0; --k) {
284 | 		TrainLayer<Dtype> *tl = dynamic_cast< TrainLayer<Dtype>* >( \
285 | 				_model_component->_layers_needed_train[k]);
286 | 		tl->computeDerivsOfPars(_model_component->_y_needed_train[k]);
287 | 		tl->updatePars();
288 | 	}
289 | }
290 | 
291 | template <typename Dtype>
292 | void TrainModel<Dtype>::earlyStopping(int epoch_idx) {
293 | 	if(_strip_likelihood.size() == 0){
294 | 		_min_likelihood = _likelihood;
295 | 		_min_error = _error;
296 | 		_min_epoch = epoch_idx;
297 | 		_strip_likelihood.push_back(_likelihood);
298 | 	}else if(_strip_likelihood.size() < _num_strip){
299 | 		_strip_likelihood.push_back(_likelihood);
300 | 		if(_min_likelihood > _likelihood){
301 | 			_min_likelihood = _likelihood;
302 | 			_min_error = _error;
303 | 			_min_epoch = epoch_idx;
304 | 		}
305 | 	}else if(_strip_likelihood.size() == _num_strip){
306 | 		if(_min_likelihood > _likelihood){
307 | 			_min_likelihood = _likelihood;
308 | 			_min_error = _error;
309 | 			_min_epoch = epoch_idx;
310 | 		}
311 | 		_strip_likelihood.erase(_strip_likelihood.begin());
312 | 		_strip_likelihood.push_back(_likelihood);
313 | 
314 | 		double tmp = 0;
315 | 
316 | 		vector<float>::iterator min_value = min_element(_strip_likelihood.begin(), _strip_likelihood.end());
317 | 
318 | 		double generalization_loss = 100*(_likelihood/_min_likelihood - 1);
319 | 		double progress_loss = 1000 * (tmp / (_num_strip*(*min_value)) - 1);
320 | 
321 | 		cout << generalization_loss << ":" << progress_loss << endl;
322 | 
323 | 		if(generalization_loss / progress_loss > 0.8)
324 | 			_is_stop = true;
325 | 	}else{
326 | 		cerr << "early Stopping parameters are wrong." << endl;
327 | 		exit(EXIT_FAILURE);
328 | 	}
329 | }
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 


--------------------------------------------------------------------------------
/dl/src/utils.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "utils.cuh"
  3 | 
  4 | using namespace std;
  5 | 
  6 | void printTime(clock_t &t, string s){ 
  7 | 	t = clock() - t;
  8 | 	cout << "\n"<< s << ": " << ((float)t/CLOCKS_PER_SEC) << " s.";
  9 | 	t = clock();
 10 | }
 11 | 
 12 | void initW(Matrix<float>* nvMat){
 13 | 	int length = nvMat->getNumRows() * nvMat->getNumCols();
 14 | 	float* a = new float[length];
 15 | 	srand((unsigned)time(NULL));
 16 | 	float bound = sqrt(1.0 / length);
 17 | 	for(int i = 0; i < length; i++){
 18 | 		int k = rand() % 200;
 19 | 		if(k < 100)
 20 | 			a[i] = (k/100.0)*(-bound);
 21 | 		else
 22 | 			a[i] = ((k - 100)/100.0)*bound; 
 23 | 	}   
 24 | 	nvMat->copyFromHost(a, length);
 25 | 	delete a;
 26 | }
 27 | 
 28 | void gaussRand(Matrix<float>* nvMat, float var, float mean){
 29 | 	int length = nvMat->getNumRows() * nvMat->getNumCols();
 30 | 	float* a = new float[length];
 31 | 	// std::default_random_engine generator;
 32 | 	//  std::normal_distribution<float> distribution(mean, var);
 33 | 
 34 | 	for(int i = 0; i < length; i++){
 35 | 		//        float k = distribution(generator);
 36 | 		if(var == 0)
 37 | 			a[i] = 0.0f;
 38 | 		else
 39 | 			a[i] = gaussGen(var, mean); 
 40 | 	} 
 41 | 	nvMat->copyFromHost(a, length);
 42 | 	delete a;
 43 | }
 44 | 
 45 | void gaussRand(float *w, int length, float var, float mean){
 46 | 	// std::default_random_engine generator;
 47 | 	//  std::normal_distribution<float> distribution(mean, var);
 48 | 
 49 | 	for(int i = 0; i < length; i++){
 50 | 		//        float k = distribution(generator);
 51 | 		if(var == 0)
 52 | 			w[i] = 0.0f;
 53 | 		else
 54 | 			w[i] = gaussGen(var, mean); 
 55 | 	} 
 56 | }
 57 | 
 58 | float gaussGen(float var, float mean)
 59 | {
 60 | 	static float V1, V2, S;
 61 | 	static int phase = 0;
 62 | 	float X;
 63 | 
 64 | 	if ( phase == 0 ) {
 65 | 		do {
 66 | 			float U1 = (float)rand() / RAND_MAX;
 67 | 			float U2 = (float)rand() / RAND_MAX;
 68 | 
 69 | 			V1 = 2 * U1 - 1;
 70 | 			V2 = 2 * U2 - 1;
 71 | 			S = V1 * V1 + V2 * V2;
 72 | 		} while(S >= 1 || S == 0);
 73 | 
 74 | 		X = V1 * sqrt(-2 * log(S) / S);
 75 | 	} else
 76 | 		X = V2 * sqrt(-2 * log(S) / S);
 77 | 
 78 | 	phase = 1 - phase;
 79 | 
 80 | 	return (X * var + mean);
 81 | }
 82 | 
 83 | void readData(Matrix<float>* nvData, string filename, \
 84 | 			bool isData, int addZerosInFront){
 85 | 	int length = nvData->getNumRows() * nvData->getNumCols();
 86 | 	ifstream fin(filename.c_str(), ios::binary);
 87 | 	float* data = new float[length];
 88 | 	char* readData = new char[length];
 89 | 	fin.read(readData + addZerosInFront, length - addZerosInFront);
 90 | 	for(int i = 0; i < length; i++){
 91 | 		if(i < addZerosInFront)
 92 | 			readData[i] = 0;
 93 | 		unsigned char tmp = readData[i];
 94 | 		if(isData){
 95 | 			data[i] = (int)tmp / 255.0;
 96 | 		}
 97 | 		else
 98 | 			data[i] = (int)tmp;
 99 | 	}
100 | 	nvData->copyFromHost(data, length);
101 | 	fin.close();
102 | 	delete data;
103 | 	delete readData;
104 | }
105 | 
106 | 


--------------------------------------------------------------------------------
/dl/test/test.cu:
--------------------------------------------------------------------------------
1 | #include <iostream>
2 | #include "blob.cuh"
3 | 
4 | int main(){
5 | 	std::cout << "hi" << std::endl;
6 | }
7 | 


--------------------------------------------------------------------------------
/guichuideng/12345vs678.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/12345vs678.png


--------------------------------------------------------------------------------
/guichuideng/1234678.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/1234678.png


--------------------------------------------------------------------------------
/guichuideng/1234vs5678.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/1234vs5678.png


--------------------------------------------------------------------------------
/guichuideng/12578vs346.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/12578vs346.png


--------------------------------------------------------------------------------
/guichuideng/125vs34678.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/125vs34678.png


--------------------------------------------------------------------------------
/guichuideng/125vs34vs678.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/125vs34vs678.png


--------------------------------------------------------------------------------
/guichuideng/README:
--------------------------------------------------------------------------------
 1 | 这个代码是为了实现将鬼吹灯中出现的介词、副词、助词当做特征字，统计每一万字它们出现的个数，并利用后续的分析来看鬼吹灯系列在写的过程中是否存在写作方式改变。
 2 | 
 3 | 需要自己下载鬼吹灯的txt，并且将8本分别保存在section文件夹中，以1_1.txt、1_2.txt...2_4.txt的命名方式保存。
 4 | 
 5 | feature_count.py, 这个文件统计了每个特征字在每一万字中出现次数，count/1_1_1_feature_count.txt对应的是每一万字中统计结果。同时将结果保存在二进制文件中方便后续读取。保存的格式是哪本书-哪一万字-601长度向量
 6 | 
 7 | reduction.py，实现了用pca/tsne降维并显示
 8 | 
 9 | anaylse.py，直接统计每个词出现次数的折线图
10 | 
11 | lr.py，实现了逻辑回归用于二分类
12 | 


--------------------------------------------------------------------------------
/guichuideng/anaylse.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # -*- coding: utf-8 -*
 3 | import codecs
 4 | import numpy as np
 5 | import pickle
 6 | import itertools
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | 
10 | file_read = open('input_features.bin', 'rb')
11 | s = file_read.read()
12 | input_features = pickle.loads(s)
13 | file_read.close()
14 | 
15 | X = []
16 | num_1 = 0
17 | for section_id in input_features:
18 |     if section_id < 4:
19 |         num_1 += len(input_features[section_id])
20 |     X.append(input_features[section_id])
21 | 
22 | Y = np.array(list(itertools.chain.from_iterable(X)))
23 | 
24 | idx = np.linspace(0, len(Y[0])-1, num=len(Y[0]), dtype=np.int)
25 | np.random.shuffle(Y[num_1:])
26 | 
27 | print num_1
28 | 
29 | for i in range(10):
30 |     plt.plot(idx, Y[i+num_1])
31 | plt.xlabel('Feature ID')
32 | plt.ylabel('Feature Count')
33 | plt.title('5~8 Feature Appearence Frequency')
34 | plt.show()
35 | 
36 | 


--------------------------------------------------------------------------------
/guichuideng/feature.txt:
--------------------------------------------------------------------------------
 1 | 乃	乌	乍	了	一	万	无	不	专
 2 | 业	东	且	世	两	习	也	乱	举
 3 | 公	共	其	具	勿	匆	决	况	净
 4 | 历	分	初	刚	划	列	则	别	刬
 5 | 剩	兀	允	光	先	兜	亏	互	亘
 6 | 亟	匪	匿	阳	阴	阿	除	陡	险
 7 | 都	隐	兹	兼	几	凡	即	却	再
 8 | 罔	力	加	务	动	劣	勤	从	今
 9 | 会	佥	仅	仍	休	但	何	侪	便
10 | 俄	俪	侵	信	俶	倒	健	俱	倏
11 | 假	偶	偏	偷	偕	傍	傥	傻	全
12 | 单	卒	南	亢	交	亦	亲	亶	讫
13 | 讵	许	识	诚	该	试	询	诮	诺
14 | 谛	谟	又	及	反	取	叠	芴	茀
15 | 苟	苦	荐	莫	蓦	蔑	径	很	徒
16 | 得	微	迄	还	近	连	迭	迥	逆
17 | 适	递	通	造	逐	逼	遂	逾	遽
18 | 寻	将	大	夫	太	奉	奇	奈	奄
19 | 飞	干	平	并	幸	巨	巧	左	差
20 | 弥	强	底	庚	庶	庸	廑	已	希
21 | 常	可	叵	只	合	各	同	向	否
22 | 咋	哪	咸	哿	唯	啻	善	嗣	嘣
23 | 噎	驯	骊	骎	骤	间	阑	阖	宁
24 | 安	定	审	实	宛	宜	害	容	宿
25 | 寔	寝	寡	好	妄	姑	姗	始	委
26 | 娄	犹	独	狠	猝	猛	岂	岗	崭
27 | 尽	层	展	屡	饱	才	扔	扩	挺
28 | 捴	擅	汔	沉	泛	没	浑	活	洒
29 | 洵	浸	浪	混	渐	深	滋	滚	溘
30 | 滥	溜	满	漫	潜	约	纯	终	给
31 | 绝	统	绷	缕	在	坏	坚	均	垂
32 | 填	增	固	多	少	尚	忝	尝	快
33 | 怫	怪	恒	恍	恰	恬	恂	惟	慌
34 | 愣	慎	慢	憬	尤	就	备	复	夐
35 | 子	孔	孛	财	赆	贼	赖	比	毕
36 | 焉	煞	长	较	辄	死	殆	殊	斗
37 | 危	方	旅	旋	风	成	或	所	烂
38 | 既	斩	断	老	毫	本	未	权	杀
39 | 杂	极	条	果	枚	枉	棐	概	横
40 | 特	改	放	故	敢	欻	日	早	时
41 | 昆	明	是	晃	暂	暗	暴	手	拜
42 | 永	毋	必	忒	忽	总	恶	恚	恐
43 | 恣	悉	愈	憙	更	曷	曾	最	朅
44 | 有	肯	朋	胡	胜	胥	脱	腾	臆
45 | 止	正	此	白	的	皆	登	甚	私
46 | 稍	稀	稔	立	竟	端	竭	盍	益
47 | 盛	盗	盖	每	直	相	真	睋	瞥
48 | 痛	生	砀	确	硬	碜	磕	申	畅
49 | 略	率	究	空	窃	突	窘	甫	蚤
50 | 蛮	聊	良	虚	类	粗	精	紧	素
51 | 綦	齐	舒	覃	覆	行	翻	肆	肇
52 | 至	致	笃	第	等	簇	自	臭	重
53 | 身	躬	豫	酣	酷	貌	赵	起	越
54 | 足	跃	踽	非	雅	魆	首	黕	默
55 | 黩	齁	顾	须	颇	顶	顿	频	顺
56 | 裁
57 | 临	乎	与	为	共	冲	到	兜	于
58 | 即	从	以	似	假	去	让	诸	及
59 | 往	迆	连	迎	道	遵	对	导	寻
60 | 将	当	叫	吃	合	同	向	和	问
61 | 如	尽	打	执	把	投	拦	按	捉
62 | 洎	给	维	缘	在	因	惟	就	比
63 | 照	较	方	爿	暨	拿	替	望	朝
64 | 爰	直	由	率	被	用	繇	齐	至
65 | 管	自	起	趁	践	跟
66 | 么	了	与	不	且	之	为	兮	其
67 | 到	云	阿	却	个	以	们	价	似
68 | 讫	诸	取	若	得	逝	将	夫	头
69 | 只	吗	向	吧	呗	呃	呀	员	呵
70 | 呢	哇	咦	哟	哉	啊	哩	啵	唻
71 | 啰	唯	嘛	噬	嚜	家	如	掉	给
72 | 维	圪	在	尔	惟	子	赊	焉	然
73 | 旃	所	见	斯	者	来	欤	是	毋
74 | 曰	的	每	看	着	矣	罢	而	耶
75 | 粤	聿	等	言	越	馨
76 | 
77 | 


--------------------------------------------------------------------------------
/guichuideng/feature_count.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # -*- coding: utf-8 -*
  3 | import codecs
  4 | import numpy
  5 | import jieba
  6 | import pickle
  7 | 
  8 | def get_feature_word():
  9 |     '''
 10 |     切分feature文件用于生成关键字
 11 |     '''
 12 |     feature_file = codecs.open('feature.txt', 'r')
 13 |     content = feature_file.read()
 14 |     feature_file.close()
 15 | 
 16 |     segments = []
 17 |     segs = jieba.cut(content)
 18 | 
 19 |     #保存feature关键字
 20 |     feature_word = []
 21 |     for seg in segs:
 22 |         if seg != '\n' and seg != '\t' and seg != ' ':
 23 |             feature_word.append(seg)
 24 | 
 25 | 
 26 |     return feature_word
 27 | 
 28 | class FeatureCount:
 29 |     feature_count = {}
 30 |     def __init__(self, feature_word):
 31 |         #初始化每个关键字出现次数为0
 32 |         print len(feature_word)
 33 |         for i in range(len(feature_word)):
 34 |             self.feature_count[feature_word[i]] = 0
 35 |         print len(self.feature_count)
 36 | 
 37 |     def clear(self):
 38 |         for feature in self.feature_count:
 39 |             self.feature_count[feature] = 0
 40 | 
 41 | def get_input_feature_from_one_section(section_name, fc):
 42 |     '''
 43 |     切分一部书的内容，同时统计上面的每个关键字出现的次数
 44 |     '''
 45 |     section_file = codecs.open('section/'+section_name+'.txt', 'r')
 46 |     content = section_file.read()
 47 |     section_file.close()
 48 | 
 49 |     segments = []
 50 |     segs = jieba.cut(content)
 51 | 
 52 |     #统计每一万个字中每个关键字出现次数
 53 |     input_feature = []
 54 |     #用于计算是否到达一万字
 55 |     i = 0
 56 |     j = 0
 57 |     #用于保存每一万字文本
 58 |     c = ''
 59 | 
 60 |     for seg in segs:
 61 |         c += seg
 62 |         j += 1
 63 |         if seg != '\n' and seg != '\t' and seg != ' ':
 64 |             if seg in fc.feature_count:
 65 |                 fc.feature_count[seg] += 1
 66 |             i += 1
 67 |             
 68 |         if i % 10000 == 0 or seg == object():
 69 |             input_feature.append(fc.feature_count.values())
 70 | 
 71 |             #保存这一万字中关键词出现次数
 72 |             output = codecs.open('count/'+section_name+'_'
 73 |                     +str(i/10000)+'_feature_count.txt','w','utf-8')
 74 |             for feature in fc.feature_count:
 75 |                 output.write(feature+'\t'+str(fc.feature_count[feature])+'\n')
 76 |             fc.clear()
 77 | 
 78 |             #保持一万字文本，这里制表符和换行符没有计算在内
 79 |             output = codecs.open('segment/'+section_name+'_'
 80 |                     +str(i/10000)+'.txt','w','utf-8')
 81 |             output.write(c)
 82 |             c = ''
 83 | 
 84 |     print section_name, i, j, len(input_feature)
 85 | 
 86 |     return input_feature                
 87 | 
 88 | feature_word = get_feature_word()
 89 | 
 90 | fc = FeatureCount(feature_word)
 91 | 
 92 | sections = ['1_1', '1_2', '1_3', '1_4', '2_1', '2_2', '2_3', '2_4']
 93 | input_features = {}
 94 | for i in range(len(sections)):
 95 |     input_features[i] = get_input_feature_from_one_section(sections[i], fc)
 96 | 
 97 | #将逻辑回归输入先保存下来
 98 | input_file = open('input_features.bin', 'wb')
 99 | s = pickle.dumps(input_features)
100 | input_file.write(s)
101 | input_file.close()
102 | 
103 | 


--------------------------------------------------------------------------------
/guichuideng/freq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/freq1.png


--------------------------------------------------------------------------------
/guichuideng/freq2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/freq2.png


--------------------------------------------------------------------------------
/guichuideng/lr.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # -*- coding: utf-8 -*
 4 | import mxnet as mx
 5 | import numpy as np
 6 | import pickle
 7 | import itertools
 8 | 
 9 | def get_train_val():
10 | 
11 |     pos_section = np.array([0,1,6,7])
12 | 
13 |     file_read = open('input_features.bin', 'rb')
14 |     s = file_read.read()
15 |     input_features = pickle.loads(s)
16 |     file_read.close()
17 | 
18 |     X = []
19 |     for section_id in input_features:
20 |         X.append(input_features[section_id])
21 | 
22 |     X = np.array(list(itertools.chain.from_iterable(X)))
23 |     
24 |     Y = []
25 |     #
26 |     for section_id in input_features:
27 |         for i in range(len(input_features[section_id])):
28 |             if section_id in pos_section:
29 |                 Y.append(1)
30 |             else:
31 |                 Y.append(0)
32 |     Y = np.array(Y)
33 | 
34 |     idx = np.linspace(0, len(Y)-1, num=len(Y), dtype=np.int)
35 |     np.random.shuffle(idx)
36 |     idx = idx[:87]
37 |     
38 | 
39 |     train_label = Y[idx]
40 |     train_data = X[idx]
41 |     val_label = np.delete(Y, idx)
42 |     val_data = np.delete(X, idx, axis=0)
43 | 
44 |     return train_label, train_data, val_label, val_data
45 | 
46 | train_label, train_data, val_label, val_data = get_train_val()
47 | 
48 | print 'train_data:', train_data.shape
49 | print 'train_label:', train_label.shape
50 | print 'val_data:', val_data.shape
51 | print 'val_label:', val_label.shape
52 | 
53 | batch_size = 3
54 | train_iter = mx.io.NDArrayIter(train_data, train_label, batch_size)
55 | val_iter = mx.io.NDArrayIter(val_data, val_label, batch_size)
56 | 
57 | import logging
58 | logging.getLogger().setLevel(logging.DEBUG)
59 | 
60 | data = mx.sym.Variable('data')
61 | fc = mx.sym.FullyConnected(data=data, name='fc', num_hidden=2)
62 | lr = mx.sym.SoftmaxOutput(data=fc, name='softmax')
63 | 
64 | model = mx.model.FeedForward(symbol=lr, num_epoch=100, 
65 |         learning_rate=0.01)
66 | 
67 | model.fit(X = train_iter, eval_data=val_iter, 
68 |         batch_end_callback = mx.callback.Speedometer(batch_size, 10))
69 | i = 0
70 | j = 0
71 | m = 0
72 | n = 0
73 | for k in range(39):
74 |     if model.predict(val_data)[k].argmax() == 1 and val_label[k] == 1:
75 |         i += 1
76 |     elif model.predict(val_data)[k].argmax() == 0 and val_label[k] == 1:
77 |         j += 1
78 |     elif model.predict(val_data)[k].argmax() == 1 and val_label[k] == 0:
79 |         m += 1
80 |     elif model.predict(val_data)[k].argmax() == 0 and val_label[k] == 0:
81 |         n += 1
82 | print '\tPredict 1\tPredict 0'
83 | print 'True 1\t',i,'\t\t',j
84 | print 'True 0\t',m,'\t\t',n
85 | 


--------------------------------------------------------------------------------
/guichuideng/reduction.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # -*- coding: utf-8 -*
 3 | import numpy as np
 4 | from sklearn import decomposition, manifold
 5 | import pickle
 6 | import itertools
 7 | import matplotlib.pyplot as plt
 8 | import pylab
 9 | from mpl_toolkits.mplot3d import Axes3D
10 | 
11 | 
12 | file_read = open('input_features.bin', 'rb')
13 | s = file_read.read()
14 | input_features = pickle.loads(s)
15 | file_read.close()
16 | 
17 | high_dim_input = []
18 | for section_id in input_features:
19 |     high_dim_input.append(input_features[section_id])
20 | 
21 | high_dim_input = np.array(list(itertools.chain.from_iterable(high_dim_input)))
22 | 
23 | labels = []
24 | for section_id in input_features:
25 |     for i in range(len(input_features[section_id])):
26 |         labels.append(section_id)
27 | labels = np.array(labels)
28 | 
29 | '''
30 | 进行pca降维
31 | '''
32 | pca = decomposition.PCA(n_components=2)
33 | #isomap = manifold.TSNE(n_components=2, init='pca', random_state=0)
34 | X_input = pca.fit_transform(high_dim_input)
35 | 
36 | print 'Percentage of variance explained by each of the selected components:', pca.explained_variance_ratio_
37 | 
38 | colors = [
39 |         '#FC0E77', '#FC0E77', 
40 |         'turquoise', 'turquoise',
41 |         'turquoise', 'turquoise',
42 |         '#FC0E77', '#FC0E77'
43 |         ]
44 | 
45 | colors = ['#48A946', '#E55523', '#E5E223', '#23E5DF', '#F70DB4', '#0D77F7','#CD2E7C', '#F70D80']
46 | markers = ['1', '2', '3', '4', '5', '6', '7', '8']
47 | 
48 | s = []
49 | for color, i, marker in zip(colors, [0, 1, 2, 3, 4, 5, 6, 7], markers):
50 |     s.append(plt.scatter(X_input[labels == i, 0], X_input[labels == i, 1],
51 |                  color=color, s=100, marker=r"${}$".format(marker)))
52 | plt.legend((s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]), 
53 |         ('1_1.txt', '1_2.txt', '1_3.txt', '1_4.txt', 
54 |          '2_1.txt', '2_2.txt', '2_3.txt', '2_4.txt'), loc='lower left')
55 | plt.title('1278 vs 3456')
56 | plt.show()
57 | 


--------------------------------------------------------------------------------
/rl/cartpole/policy_gradient.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import gym
  3 | import numpy as np
  4 | 
  5 | def generate_episode(env, weight):
  6 |     episode = []
  7 |     pre_observation = env.reset()
  8 | 
  9 |     t = 0
 10 |     #generate 1 episodes for training.
 11 |     while 1:
 12 |         #env.render()
 13 |         pi, action = choose_action(weight, pre_observation)
 14 |     
 15 |         observation, reward, done, info = env.step(action)
 16 |         episode.append([pre_observation, action, pi, reward])
 17 |         pre_observation = observation
 18 |     
 19 |         t += 1
 20 |         if done or t > 1000:
 21 |             break
 22 |     return episode
 23 | 
 24 | def evaluate_given_parameter_sigmoid(env, weight):
 25 |     observation = env.reset()
 26 |     total_reward = 0.
 27 |     for t in range(1000):
 28 |         env.render()
 29 |         weighted_sum = np.dot(weight, observation)
 30 |         pi = 1 / (1 + np.exp(-weighted_sum))
 31 |         if pi > 0.5:
 32 |             action = 1
 33 |         else:
 34 |             action = 0
 35 |         
 36 |         observation, reward, done, info = env.step(action)
 37 |         total_reward += reward
 38 |         if done:
 39 |             break
 40 |     return total_reward
 41 | 
 42 | def monte_carlo_policy_gradient(env):
 43 | 
 44 |     learning_rate = -0.0001
 45 |     best_reward = -100.0
 46 | 
 47 |     weight = np.random.rand(4)
 48 | 
 49 |     for iiter in xrange(1000):
 50 | 
 51 |         cur_episode = generate_episode(env, weight)
 52 |         for t in range(len(cur_episode)):
 53 |              
 54 |             observation, action, pi, reward = cur_episode[t]
 55 | 
 56 |             #update theta
 57 |             weight += learning_rate*(1-pi)*np.transpose(-observation)*reward
 58 | 
 59 |     cur_reward = evaluate_given_parameter_sigmoid(env, weight)
 60 |     print 'Monte-Carlo policy gradient get reward', cur_reward
 61 |  
 62 | def choose_action(weight, observation):
 63 | 
 64 |     weighted_sum = np.dot(weight, observation)
 65 |     pi = 1 / (1 + np.exp(-weighted_sum))
 66 |     if pi > 0.5:
 67 |         action = 1
 68 |     else:
 69 |         action = 0
 70 |     return pi, action
 71 | 
 72 | def actor_critic_policy_gradient(env):
 73 |     gamma = 1
 74 | 
 75 |     p_weight = np.random.rand(4)
 76 |         
 77 |     #weight for value function
 78 |     v_weight = np.random.rand(4)
 79 | 
 80 |     p_learning_rate = -0.0001
 81 |     v_learning_rate = -0.0001
 82 | 
 83 |     done = True
 84 | 
 85 |     for iiter in xrange(1000):
 86 | 
 87 |         t = 0
 88 |         while 1:
 89 |             if done:
 90 |                 print 'start new training...'
 91 |                 print 'p_weight', p_weight
 92 |                 print 'v_weight', v_weight
 93 | 
 94 |                 pre_observation = env.reset()
 95 |                 pre_pi, pre_action = choose_action(p_weight, pre_observation)
 96 |         
 97 |                 pre_phi = pre_observation
 98 |                 pre_q = np.dot(v_weight, pre_phi)
 99 | 
100 |             #env.render()
101 | 
102 |             observation, reward, done, info = env.step(pre_action)
103 | 
104 |             pi, action = choose_action(p_weight, observation)
105 |             
106 |             phi = observation
107 |             q = np.dot(v_weight, phi)
108 | 
109 |             delta = reward + gamma*q - pre_q
110 | 
111 |             p_weight += p_learning_rate*(1-pre_pi)*np.transpose(-pre_observation)*pre_q
112 | 
113 |             v_weight += v_learning_rate*delta*np.transpose(pre_phi)
114 | 
115 |             pre_pi = pi
116 |             pre_observation = observation
117 |             pre_q = q
118 |             pre_phi = phi
119 |             pre_action = action
120 | 
121 |             t += 1
122 |             if done:
123 |                 break
124 | 
125 |     cur_reward = evaluate_given_parameter_sigmoid(env, p_weight)
126 |     print 'Actor critic policy gradient get reward', cur_reward
127 | 
128 | env = gym.make('CartPole-v0')
129 | 
130 | #env.monitor.start('cartpole-hill/', force=True)
131 | actor_critic_policy_gradient(env)
132 | #env.monitor.close()
133 | 
134 | monte_carlo_policy_gradient(env)
135 | 


--------------------------------------------------------------------------------
/rl/cartpole/random_guess_hill_climbing.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import gym
 3 | import numpy as np
 4 | 
 5 | def evaluate_given_parameter_by_sign(env, weight):
 6 |     observation = env.reset()
 7 |     total_reward = 0.
 8 |     for t in range(1000):
 9 |         env.render()
10 |         weighted_sum = np.dot(weight, observation)
11 |         if weighted_sum >= 0:
12 |             action = 1
13 |         else:
14 |             action = 0
15 |         
16 |         observation, reward, done, info = env.step(action)
17 |         total_reward += reward
18 |         if done:
19 |             break
20 |     return total_reward
21 | 
22 | def random_guess():
23 |     env = gym.make('CartPole-v0')
24 |     np.random.seed(10)
25 |     best_reward = -100.0
26 | 
27 |     for iiter in xrange(1000):
28 |         weight = np.random.rand(4)
29 | 
30 |         cur_reward = evaluate_given_parameter_by_sign(env, weight)
31 |         if cur_reward > best_reward:
32 |             best_reward = cur_reward
33 |             best_weight = weight
34 | 
35 |         if best_reward == 1000:
36 |             break
37 |         
38 |     print("Random guess algorithm best reward", best_reward)
39 |     print("Random guess algorithm best weight", best_weight)
40 | 
41 | def hill_climbing():
42 |     env = gym.make('CartPole-v0')
43 |     best_reward = -100.0
44 |     np.random.seed(10)
45 |     best_weight = np.random.rand(4)
46 | 
47 |     for iiter in xrange(1000):
48 |         weight = best_weight + np.random.normal(0, 0.01, 4)
49 | 
50 |         cur_reward = evaluate_given_parameter_by_sign(env, weight)
51 |         if cur_reward > best_reward:
52 |             best_reward = cur_reward
53 |             best_weight = weight
54 | 
55 |         if best_reward == 1000:
56 |             break
57 |         
58 |     print("Hill climbing algorithm best reward", best_reward)
59 |     print("Hill climbing algorithm best weight", best_weight)
60 | 
61 | random_guess()
62 | hill_climbing()
63 | 


--------------------------------------------------------------------------------
/rl/cartpole/upload.py:
--------------------------------------------------------------------------------
1 | 
2 | import gym
3 | 
4 | gym.upload('deep-learning/rl/cartpole-hill')
5 | 


--------------------------------------------------------------------------------
/tf_autoencoder/README.md:
--------------------------------------------------------------------------------
 1 | This autoencoder.py implements the deep autoencoder network. Interfaces of the autoencoder is the same as sklearn's Manifold Learning.
 2 | 
 3 | * fit(X) Fit the autoecoder network for data X
 4 | * fit_transform(X)   Fit the model from data in X and transform X.
 5 | * get_params()  Get parameters of this network.
 6 | * reconstruction_error(X)  Compute the reconstruction error for the data X.
 7 | * set_params()    Set the parameters which comes from saved file.
 8 | * transform(X)    Transform X.
 9 | 
10 | The test.py is an example for reducing the 28*28 mnist dataset images into 2 dimention and visualize it.
11 | 
12 | I change some code from [Variational Autoencoder in TensorFlow](https://jmetzen.github.io/2015-11-27/vae.html) to this autoencoder network in tensorflow.
13 | 


--------------------------------------------------------------------------------
/tf_autoencoder/autoencoder.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import time
  5 | 
  6 | def xavier_init(fan_in, fan_out, constant=1):
  7 | 	""" Xavier initialization of network weights"""
  8 | 	low = -constant*np.sqrt(6.0/(fan_in + fan_out))
  9 | 	high = constant*np.sqrt(6.0/(fan_in + fan_out))
 10 | 	return tf.random_uniform((fan_in, fan_out),
 11 | 			minval=low, maxval=high,
 12 | 			dtype=tf.float32)
 13 | 
 14 | class Autoencoder(object):
 15 |     """Initilize the autoencoder neural network
 16 | 
 17 | 	Attributes:
 18 |         sess: the tensorflow session
 19 | 	network_architecture: neural number of each layer
 20 | 	transfer_fct: activation function, default is sigmoid
 21 | 	lr: learning rate
 22 | 	batch_size: 
 23 |     """
 24 |     def __init__(self, sess, network_architecture,
 25 | 			transfer_fct=tf.nn.sigmoid,
 26 | 			learning_rate=0.001, batch_size=100):
 27 |         """initlize the parameters and construct whole network"""
 28 | 
 29 |         self.network_architecture = network_architecture
 30 |         self.transfer_fct = transfer_fct
 31 |         self.lr = learning_rate
 32 |         self.batch_size = batch_size
 33 | 
 34 |         print "units number in each layer: " + str(self.network_architecture)
 35 |         print "learning rate: " + str(self.lr)
 36 |         print "batch size is: " + str(self.batch_size)
 37 | 
 38 | 	    #input of whole network
 39 |         self.x = tf.placeholder(tf.float32, [None, network_architecture[0]])
 40 |         self.W = []
 41 |         self.bias = []
 42 |         
 43 |         self._create_network()
 44 | 
 45 |         self._create_loss_optimizer()
 46 | 
 47 |         init = tf.initialize_all_variables()
 48 | 
 49 |         self.sess =sess
 50 |         self.sess.run(init)
 51 | 
 52 |         self.saver = tf.train.Saver(tf.trainable_variables())
 53 | 
 54 |     def _create_network(self):
 55 |         """according to the neural number in each layer, initilize 
 56 |             weight and bias, then connect the whole net.
 57 | 
 58 |         """
 59 |         self._create_forward(self.x)
 60 |         self._create_backward(self.y)
 61 | 
 62 |     def _create_forward(self, x):
 63 |         for i in xrange(1, len(self.network_architecture)):
 64 |             y = self._create_one_layer(x, self.network_architecture[i], i)
 65 |             x = y
 66 |         self.y = y
 67 | 
 68 |     def _create_backward(self, x):
 69 |         for i in xrange(len(self.network_architecture)-2, -1, -1):
 70 |             y = self._create_one_layer(x, self.network_architecture[i], i, is_encoder=False)
 71 |             x = y
 72 |         self.reconstruct_x = y
 73 | 
 74 |     def _create_one_layer(self, x, num_out, w_id, is_encoder=True):
 75 |         """construct one encoder or decoder layer
 76 | 
 77 |         Args:
 78 |             x: input of this layer
 79 |             num_out: neural number of this layer
 80 |             w_id: if this layer is decoder, weight of this layer comes from the saved weight list
 81 |             is_encoder: if True, create the new weight variable
 82 | 
 83 |         Returns:
 84 |             y: output of this layer
 85 |          """
 86 |         if not is_encoder:
 87 |             weight = tf.transpose(self.W[w_id])
 88 |         else:
 89 |             (batch_size, num_in) = tf.Tensor.get_shape(x).as_list()
 90 |             weight = tf.Variable(xavier_init(num_in, num_out))
 91 |             self.W += [weight]
 92 | 
 93 |         bias = tf.Variable(tf.zeros([num_out], dtype=tf.float32))
 94 |         self.bias += [bias]
 95 |         y = self.transfer_fct(tf.add(tf.matmul(x, weight), bias))
 96 | 
 97 |         return y
 98 | 
 99 |     def _create_loss_optimizer(self):
100 | 	"""construct the cost function
101 | 
102 | 	reconstruction loss which comes from the cross entropy
103 | 
104 | 	"""
105 | 	self.cost = -tf.reduce_sum(self.x * tf.log(1e-10 + self.reconstruct_x)
106 | 				+ (1-self.x) * tf.log(1e-10 + 1 - self.reconstruct_x), 1)
107 | 	self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost)
108 | 
109 |     def partial_fit(self, X):
110 | 	"""training one batch
111 | 
112 |         Args:
113 |             X: input of this batch
114 | 
115 |         Returns:
116 |             cost: cost of this batch
117 | 
118 | 	"""
119 | 	opt, cost = self.sess.run((self.optimizer, self.cost),
120 | 					    feed_dict={self.x: X})
121 | 	return cost
122 | 
123 |     def set_params(self):
124 |         self.saver.restore(self.sess, "model.ckpt")
125 | 
126 |     def reconstruction_error(self, X):
127 |         return self.sess.run(self.cost, feed_dict={self.x: X})
128 | 
129 |     def get_params(self):
130 |         return self.W
131 | 
132 |     def fit(self, X, num_epochs=100):
133 |         n_samples = len(X)
134 |         total_batch = int(len(X)/self.batch_size)
135 | 
136 |         t = time.time()
137 |         for epoch in xrange(num_epochs):
138 |             avg_cost = 0.0
139 |             for i in xrange(total_batch):
140 |                 batch_x = X[i*self.batch_size:(i+1)*self.batch_size]
141 |                 cost = self.partial_fit(batch_x)
142 |                 avg_cost += cost
143 |             avg_cost = avg_cost / n_samples
144 | 
145 |             print "Epoch:" + str(epoch) +  " cost=" + str(np.mean(avg_cost))
146 | 
147 |             if epoch % 10 is 0:
148 |                 print 'until current epoch ' + str(epoch) +' cost: ' + str(time.time()-t) + ' s.'
149 |                 
150 |        
151 |         #save the net parameters
152 |         self.saver.save(self.sess, "model.ckpt")
153 | 
154 |     def fit_transform(self, X, num_epochs=100):
155 |         self.fit(X, num_epochs)
156 |         return transform(X)
157 | 
158 |     def transform(self, X):
159 |         """transform x
160 | 	"""
161 |         return self.sess.run(self.y, feed_dict={self.x: X})
162 | 
163 | 
164 | 
165 | 


--------------------------------------------------------------------------------
/tf_autoencoder/test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib.pyplot as plt
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | import autoencoder
 6 | 
 7 | 
 8 | from tensorflow.examples.tutorials.mnist import input_data
 9 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
10 | n_samples = mnist.train.num_examples
11 | 
12 | sess = tf.Session()
13 | 
14 | np.random.seed(0)
15 | tf.set_random_seed(0)
16 | 
17 | batch_size=100
18 | num_epochs = 50 
19 | display_step=2
20 | network_architecture = [784, 500, 500, 2]
21 | 
22 | ae = autoencoder.Autoencoder(sess, network_architecture, batch_size=batch_size)
23 | ae.fit(mnist.train.images, num_epochs)                                          
24 | 
25 | x_sample, y_sample = mnist.test.next_batch(5000)
26 | z_mu = ae.transform(x_sample)
27 | plt.figure(figsize=(8, 6)) 
28 | plt.scatter(z_mu[:, 0], z_mu[:, 1], c=np.argmax(y_sample, 1)) 
29 | plt.colorbar()
30 | plt.savefig('test_ae.png')
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------