├── README.md ├── denoise_dcgan ├── README ├── dcgan.py ├── read_stl10.py └── utils.py ├── dl ├── Makefile ├── include │ ├── .data.hpp.swp │ ├── .matrix.hpp.swp │ ├── .param.h.swp │ ├── convnet.hpp │ ├── data.hpp │ ├── dropout_layer.hpp │ ├── inner_product_layer.hpp │ ├── json │ │ ├── json-forwards.h │ │ └── json.h │ ├── layer.hpp │ ├── layer_kernel.cuh │ ├── load_layer.hpp │ ├── logistic.hpp │ ├── matrix.hpp │ ├── matrix_kernel.hpp │ ├── model_component.hpp │ ├── param.h │ ├── pooling_layer.hpp │ ├── relu_layer.hpp │ ├── sigmoid_layer.hpp │ ├── train_classification.hpp │ ├── train_model.hpp │ └── utils.cuh ├── main_src │ └── cifar_classify.cu ├── script │ ├── .ropeproject │ │ ├── config.py │ │ ├── globalnames │ │ ├── history │ │ └── objectdb │ └── cifar10.json ├── src │ ├── convnet.cu │ ├── data.cu │ ├── dropout_layer.cu │ ├── inner_product_layer.cu │ ├── jsoncpp.cpp │ ├── layer_kernel.cu │ ├── load_layer.cpp │ ├── logistic.cu │ ├── matrix.cu │ ├── matrix_kernel.cu │ ├── model_component.cpp │ ├── pooling_layer.cu │ ├── relu_layer.cu │ ├── sigmoid_layer.cu │ ├── train_classification.cpp │ ├── train_model.cpp │ └── utils.cu └── test │ └── test.cu ├── guichuideng ├── 12345vs678.png ├── 1234678.png ├── 1234vs5678.png ├── 12578vs346.png ├── 125vs34678.png ├── 125vs34vs678.png ├── README ├── anaylse.py ├── feature.txt ├── feature_count.py ├── freq1.png ├── freq2.png ├── input_features.bin ├── lr.py └── reduction.py ├── rl └── cartpole │ ├── policy_gradient.py │ ├── random_guess_hill_climbing.py │ └── upload.py └── tf_autoencoder ├── README.md ├── autoencoder.py └── test.py /README.md: -------------------------------------------------------------------------------- 1 | # deep-learning 2 | -------------------------------------------------------------------------------- /denoise_dcgan/README: -------------------------------------------------------------------------------- 1 | This code works for image denoising without tuning the parameters such as number of convolution layer, learning rate... 2 | The clean images should be .png files which save in ./data/real_images, and the noise images save in ./data/dataset/noise_images 3 | 4 | read_stl10.py reads the binary file of [stl10](https://cs.stanford.edu/~acoates/stl10/) and saves as png files, simultaneously, adds guass noise to each image and saves in ./data/dataset/noise_images 5 | 6 | utils.py is copy from [https://github.com/carpedm20/DCGAN-tensorflow](https://github.com/carpedm20/DCGAN-tensorflow), which is used to read png files of given directory. 7 | 8 | dcgan.py is used to train the whole network. 9 | 10 | python dcgan.py --dataset stl10_binary --batch_size 64 --image_size 96 --epoch 100 --learning_rate 0.00001 --c_dim 3 11 | 12 | --dataset, the folder name which you save your images, ./data/stl_binary/real_images. 13 | --batch_size, this number is related to final sample images, if you set batch_size is 100, you need change the dcgan.py file line 262 [8, 8] to [10, 10]. 14 | --c_dim, which means gray(1) or rgb(3). 15 | --image_size 16 | --epoch 17 | --learning_rate 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /denoise_dcgan/dcgan.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import os 4 | import time 5 | from glob import glob 6 | from utils import * 7 | 8 | def generator(gdata, img_size, batch_size, c_dim, num_filter): 9 | s2 = img_size/2 10 | s4 = img_size/4 11 | 12 | stddev = 0.001 13 | with tf.variable_scope('g_conv1') as scope: 14 | w = tf.get_variable('w', [4, 4, c_dim, num_filter], 15 | initializer=tf.random_normal_initializer(stddev=stddev)) 16 | gconv = tf.nn.conv2d(gdata, w, strides=[1, 2, 2, 1], 17 | padding='SAME') 18 | biases = tf.get_variable('biases', [num_filter], 19 | initializer=tf.constant_initializer(0.0)) 20 | bias = tf.nn.bias_add(gconv, biases) 21 | gconv1 = tf.nn.relu(bias, name=scope.name) 22 | 23 | with tf.variable_scope('g_conv2') as scope: 24 | w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 25 | initializer=tf.random_normal_initializer(stddev=stddev)) 26 | gconv = tf.nn.conv2d(gconv1, w, strides=[1, 2, 2, 1], 27 | padding='SAME') 28 | biases = tf.get_variable('biases', [num_filter*2], 29 | initializer=tf.constant_initializer(0.0)) 30 | bias = tf.nn.bias_add(gconv, biases) 31 | gconv2 = tf.nn.relu(bias, name=scope.name) 32 | 33 | with tf.variable_scope('g_deconv1') as scope: 34 | w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 35 | initializer=tf.random_normal_initializer(stddev=stddev)) 36 | deconv = tf.nn.conv2d_transpose(gconv2, w, 37 | output_shape=[batch_size, s2, s2, num_filter], 38 | strides=[1, 2, 2, 1]) 39 | biases = tf.get_variable('biases', [num_filter], 40 | initializer=tf.constant_initializer(0.0)) 41 | deconv1 = tf.nn.bias_add(deconv, biases) 42 | 43 | with tf.variable_scope('g_deconv2') as scope: 44 | w = tf.get_variable('w', [4, 4, c_dim, num_filter], 45 | initializer=tf.random_normal_initializer(stddev=stddev)) 46 | deconv = tf.nn.conv2d_transpose(deconv1, w, 47 | output_shape=[batch_size, img_size, img_size, c_dim], 48 | strides=[1, 2, 2, 1]) 49 | biases = tf.get_variable('biases', [c_dim], 50 | initializer=tf.constant_initializer(0.0)) 51 | deconv2 = tf.nn.bias_add(deconv, biases) 52 | 53 | return tf.nn.tanh(deconv2) 54 | 55 | def discriminator(ddata, batch_size, c_dim, num_filter, leak, reuse=False): 56 | if reuse: 57 | tf.get_variable_scope().reuse_variables() 58 | 59 | stddev = 0.002 60 | with tf.variable_scope('d_conv1') as scope: 61 | w = tf.get_variable('w', [4, 4, c_dim, num_filter], 62 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 63 | dconv = tf.nn.conv2d(ddata, w, strides=[1, 2, 2, 1], 64 | padding='SAME') 65 | biases = tf.get_variable('biases', [num_filter], 66 | initializer=tf.constant_initializer(0.0)) 67 | bias = tf.nn.bias_add(dconv, biases) 68 | dconv1 = tf.maximum(bias, leak*bias) 69 | 70 | with tf.variable_scope('d_conv2') as scope: 71 | w = tf.get_variable('w', [4, 4, num_filter, num_filter*2], 72 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 73 | dconv = tf.nn.conv2d(dconv1, w, strides=[1, 2, 2, 1], 74 | padding='SAME') 75 | biases = tf.get_variable('biases', [num_filter*2], 76 | initializer=tf.constant_initializer(0.0)) 77 | bias = tf.nn.bias_add(dconv, biases) 78 | dconv2 = tf.maximum(bias, leak*bias) 79 | 80 | with tf.variable_scope('d_conv3') as scope: 81 | w = tf.get_variable('w', [4, 4, num_filter*2, num_filter*4], 82 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 83 | dconv = tf.nn.conv2d(dconv2, w, strides=[1, 2, 2, 1], 84 | padding='SAME') 85 | biases = tf.get_variable('biases', [num_filter*4], 86 | initializer=tf.constant_initializer(0.0)) 87 | bias = tf.nn.bias_add(dconv, biases) 88 | dconv3 = tf.maximum(bias, leak*bias) 89 | 90 | with tf.variable_scope('d_conv4') as scope: 91 | w = tf.get_variable('w', [4, 4, num_filter*4, num_filter*8], 92 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 93 | dconv = tf.nn.conv2d(dconv3, w, strides=[1, 2, 2, 1], 94 | padding='SAME') 95 | biases = tf.get_variable('biases', [num_filter*8], 96 | initializer=tf.constant_initializer(0.0)) 97 | dconv4 = tf.maximum(bias, leak*bias) 98 | 99 | with tf.variable_scope('d_local1') as scope: 100 | local_in = tf.reshape(dconv4, [batch_size, -1]) 101 | shape = local_in.get_shape().as_list() 102 | 103 | w = tf.get_variable('w', [shape[1], 1], tf.float32, 104 | tf.random_normal_initializer(stddev=stddev)) 105 | biases = tf.get_variable("biases", [1], 106 | initializer=tf.constant_initializer(0.0)) 107 | dlocal = tf.matmul(local_in, w) + biases 108 | 109 | return tf.nn.sigmoid(dlocal), dlocal 110 | 111 | def build_model(img_size, batch_size=100, num_filter=16, c_dim=1, leak=0.1): 112 | 113 | noise_images = tf.placeholder(tf.float32, [batch_size] 114 | + [img_size, img_size, c_dim], name='noise_images') 115 | real_images = tf.placeholder(tf.float32, [batch_size] 116 | + [img_size, img_size, c_dim], name='real_images') 117 | 118 | G = generator(noise_images, img_size, batch_size, c_dim, num_filter) 119 | D, D_logots = discriminator(real_images, batch_size, c_dim, num_filter, leak) 120 | D_, D_logots_ = discriminator(G, batch_size, c_dim, num_filter, leak, reuse=True) 121 | 122 | d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots, tf.ones_like(D))) 123 | d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.zeros_like(D_))) 124 | g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.ones_like(D_))) 125 | 126 | d_loss = d_loss_real + d_loss_fake 127 | 128 | t_vars = tf.trainable_variables() 129 | 130 | d_vars = [var for var in t_vars if 'd_' in var.name] 131 | g_vars = [var for var in t_vars if 'g_' in var.name] 132 | 133 | saver = tf.train.Saver() 134 | 135 | return G, g_loss, d_loss, d_vars, g_vars, saver 136 | 137 | flags = tf.app.flags 138 | flags.DEFINE_float("learning_rate", 0.0002, "Learning rate for adam [0.0002]") 139 | flags.DEFINE_float("beta1", 0.5, "Learning rate for adam [0.0002]") 140 | flags.DEFINE_integer("epoch", 10, "Epoch to train [10]") 141 | flags.DEFINE_string("dataset", "xxx", "The name of dataset []") 142 | flags.DEFINE_integer("batch_size", 64, "The size of batch images [64]") 143 | flags.DEFINE_integer("image_size", 100, "The size of image to use (will be center cropped) [10.]") 144 | flags.DEFINE_integer("c_dim", 1, "Dimension of image color. [1]") 145 | 146 | FLAGS = flags.FLAGS 147 | 148 | def read_images(c_dim): 149 | is_grayscale = (c_dim == 1) 150 | real_data = glob(os.path.join("./data", FLAGS.dataset, "real_images", "*.png")) 151 | noise_data = glob(os.path.join("./data", FLAGS.dataset, "noise_images", "*.png")) 152 | 153 | real = [get_image(img_file, FLAGS.image_size, is_crop=False, is_grayscale=is_grayscale) for img_file in real_data] 154 | noise = [get_image(img_file, FLAGS.image_size, is_crop=False, is_grayscale=is_grayscale) for img_file in noise_data] 155 | 156 | if is_grayscale: 157 | reals = np.array(real).astype(np.float32)[:,:,:,None] 158 | noises = np.array(noise).astype(np.float32)[:,:,:,None] 159 | else: 160 | reals = np.array(real).astype(np.float32) 161 | noises = np.array(noise).astype(np.float32) 162 | 163 | return reals, noises 164 | 165 | #def train(sess, G, d_loss, d_vars, g_loss, g_vars, saver, c_dim=1): 166 | def train(sess, img_size, batch_size=100, num_filter=16, c_dim=1, leak=0.2): 167 | 168 | 169 | noise_images = tf.placeholder(tf.float32, [batch_size] 170 | + [img_size, img_size, c_dim], name='noise_images') 171 | real_images = tf.placeholder(tf.float32, [batch_size] 172 | + [img_size, img_size, c_dim], name='real_images') 173 | 174 | G = generator(noise_images, img_size, batch_size, c_dim, num_filter) 175 | D, D_logots = discriminator(real_images, batch_size, c_dim, num_filter, leak) 176 | D_, D_logots_ = discriminator(G, batch_size, c_dim, num_filter, leak, reuse=True) 177 | 178 | d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots, tf.ones_like(D))) 179 | d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.zeros_like(D_))) 180 | g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(D_logots_, tf.ones_like(D_))) 181 | 182 | d_loss = d_loss_real + d_loss_fake 183 | 184 | t_vars = tf.trainable_variables() 185 | 186 | d_vars = [var for var in t_vars if 'd_' in var.name] 187 | g_vars = [var for var in t_vars if 'g_' in var.name] 188 | 189 | saver = tf.train.Saver() 190 | 191 | 192 | d_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1).minimize(d_loss, var_list=d_vars) 193 | g_optim = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1).minimize(g_loss, var_list=g_vars) 194 | 195 | tf.initialize_all_variables().run() 196 | 197 | start_time = time.time() 198 | counter = 0 199 | 200 | reals, noises = read_images(c_dim) 201 | 202 | sample_images = reals[0:batch_size] 203 | sample_z = noises[0:batch_size] 204 | 205 | model_name = "DCGAN.model" 206 | model_dir = "%s_%s_%s" % (FLAGS.dataset, FLAGS.batch_size, FLAGS.image_size) 207 | checkpoint_dir = os.path.join('./checkpoint', model_dir) 208 | if not os.path.exists(checkpoint_dir): 209 | os.makedirs(checkpoint_dir) 210 | 211 | for epoch in range(FLAGS.epoch): 212 | 213 | data = glob(os.path.join("./data", FLAGS.dataset, "real_images", "*.png")) 214 | num_batch = len(data) // FLAGS.batch_size 215 | 216 | print 'num_batch', num_batch 217 | 218 | for idx in range(0, num_batch): 219 | 220 | batch_images = reals[idx*FLAGS.batch_size:(idx+1)*FLAGS.batch_size] 221 | batch_z = noises[idx*FLAGS.batch_size:(idx+1)*FLAGS.batch_size] 222 | 223 | #update 224 | out1 = sess.run([d_optim], feed_dict={real_images: batch_images, noise_images: batch_z}) 225 | 226 | #update G 227 | out2 = sess.run([g_optim], feed_dict={noise_images:batch_z}) 228 | 229 | errD_fake = d_loss_fake.eval({noise_images: batch_z}) 230 | errD_real = d_loss_real.eval({real_images:batch_images}) 231 | errG = g_loss.eval({noise_images: batch_z}) 232 | 233 | counter += 1 234 | print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" % (epoch, 235 | idx, num_batch, time.time() - start_time, 236 | errD_fake+errD_real, errG)) 237 | 238 | if np.mod(counter, 100) == 1: 239 | samples, loss1, loss2 = sess.run([G, d_loss, 240 | g_loss], feed_dict={noise_images: sample_z, 241 | real_images: sample_images}) 242 | save_images(sample_z, [10, 10], './{}/noise_{:02d}_{:04d}.png'.format('./sample', epoch, idx)) 243 | save_images(samples, [10, 10], './{}/denoise_{:02d}_{:04d}.png'.format('./sample', epoch, idx)) 244 | save_images(sample_images, [10, 10], './{}/train_{:02d}_{:04d}.png'.format('./sample', epoch, idx)) 245 | print("[Sample] d_loss: %.8f, g_loss: %.8f" % (loss1, loss2)) 246 | 247 | if np.mod(counter, 500) == 2: 248 | saver.save(sess, os.path.join(checkpoint_dir, model_name), global_step=counter) 249 | 250 | #G, g_loss, d_loss, d_vars, g_vars, saver = build_model(FLAGS.image_size, FLAGS.batch_size) 251 | with tf.Session() as sess: 252 | #train(sess, G, d_loss, d_vars, g_loss, g_vars, saver) 253 | train(sess, FLAGS.image_size, FLAGS.batch_size, c_dim=FLAGS.c_dim) 254 | 255 | 256 | 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /denoise_dcgan/read_stl10.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os, sys, tarfile, urllib 4 | import numpy as np 5 | import Image 6 | 7 | height = 96 8 | width = 96 9 | 10 | 11 | data_path = './data/stl10_binary/train_X.bin' 12 | 13 | f = open(data_path, 'rb') 14 | 15 | everything = np.fromfile(f, dtype=np.uint8) 16 | images = np.reshape(everything, (-1, 3, 96, 96)) 17 | 18 | images = np.transpose(images, (0, 3, 2, 1)) 19 | 20 | print images.shape 21 | 22 | mean = 0 23 | sigma = 100 24 | 25 | for i in range(len(images)): 26 | new_img = Image.fromarray(images[i], 'RGB') 27 | new_img.save('./data/stl10_binary/real_images/'+str(i)+'.png') 28 | 29 | gauss = np.random.normal(mean, sigma, (height*width)).reshape(height, width) 30 | 31 | noisy = images[i].astype(np.float32) 32 | noisy[:,:,0] = noisy[:,:,0] + gauss 33 | noisy[:,:,1] = noisy[:,:,1] + gauss 34 | noisy[:,:,2] = noisy[:,:,2] + gauss 35 | 36 | noisy = noisy - np.min(noisy) 37 | noisy = noisy / np.max(noisy) 38 | noisy = (noisy*255).astype(np.uint8) 39 | 40 | new_img = Image.fromarray(noisy, 'RGB') 41 | new_img.save('./data/stl10_binary/noise_images/'+str(i)+'.png') 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /denoise_dcgan/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some codes from https://github.com/Newmu/dcgan_code 3 | """ 4 | from __future__ import division 5 | import math 6 | import json 7 | import random 8 | import pprint 9 | import scipy.misc 10 | import numpy as np 11 | from time import gmtime, strftime 12 | 13 | pp = pprint.PrettyPrinter() 14 | 15 | get_stddev = lambda x, k_h, k_w: 1/math.sqrt(k_w*k_h*x.get_shape()[-1]) 16 | 17 | def get_image(image_path, image_size, is_crop=True, resize_w=64, is_grayscale = False): 18 | return transform(imread(image_path, is_grayscale), image_size, is_crop, resize_w) 19 | 20 | def save_images(images, size, image_path): 21 | return imsave(inverse_transform(images), size, image_path) 22 | 23 | def imread(path, is_grayscale = False): 24 | if (is_grayscale): 25 | return scipy.misc.imread(path, flatten = True).astype(np.float) 26 | else: 27 | return scipy.misc.imread(path).astype(np.float) 28 | 29 | def merge_images(images, size): 30 | return inverse_transform(images) 31 | 32 | def merge(images, size): 33 | h, w = images.shape[1], images.shape[2] 34 | img = np.zeros((h * size[0], w * size[1], 3)) 35 | for idx, image in enumerate(images): 36 | i = idx % size[1] 37 | j = idx // size[1] 38 | img[j*h:j*h+h, i*w:i*w+w, :] = image 39 | 40 | return img 41 | 42 | def imsave(images, size, path): 43 | return scipy.misc.imsave(path, merge(images, size)) 44 | 45 | def center_crop(x, crop_h, crop_w=None, resize_w=64): 46 | if crop_w is None: 47 | crop_w = crop_h 48 | h, w = x.shape[:2] 49 | j = int(round((h - crop_h)/2.)) 50 | i = int(round((w - crop_w)/2.)) 51 | return scipy.misc.imresize(x[j:j+crop_h, i:i+crop_w], 52 | [resize_w, resize_w]) 53 | 54 | def transform(image, npx=64, is_crop=True, resize_w=64): 55 | # npx : # of pixels width/height of image 56 | if is_crop: 57 | cropped_image = center_crop(image, npx, resize_w=resize_w) 58 | else: 59 | cropped_image = image 60 | return np.array(cropped_image)/127.5 - 1. 61 | 62 | def inverse_transform(images): 63 | return (images+1.)/2. 64 | 65 | 66 | def to_json(output_path, *layers): 67 | with open(output_path, "w") as layer_f: 68 | lines = "" 69 | for w, b, bn in layers: 70 | layer_idx = w.name.split('/')[0].split('h')[1] 71 | 72 | B = b.eval() 73 | 74 | if "lin/" in w.name: 75 | W = w.eval() 76 | depth = W.shape[1] 77 | else: 78 | W = np.rollaxis(w.eval(), 2, 0) 79 | depth = W.shape[0] 80 | 81 | biases = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(B)]} 82 | if bn != None: 83 | gamma = bn.gamma.eval() 84 | beta = bn.beta.eval() 85 | 86 | gamma = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(gamma)]} 87 | beta = {"sy": 1, "sx": 1, "depth": depth, "w": ['%.2f' % elem for elem in list(beta)]} 88 | else: 89 | gamma = {"sy": 1, "sx": 1, "depth": 0, "w": []} 90 | beta = {"sy": 1, "sx": 1, "depth": 0, "w": []} 91 | 92 | if "lin/" in w.name: 93 | fs = [] 94 | for w in W.T: 95 | fs.append({"sy": 1, "sx": 1, "depth": W.shape[0], "w": ['%.2f' % elem for elem in list(w)]}) 96 | 97 | lines += """ 98 | var layer_%s = { 99 | "layer_type": "fc", 100 | "sy": 1, "sx": 1, 101 | "out_sx": 1, "out_sy": 1, 102 | "stride": 1, "pad": 0, 103 | "out_depth": %s, "in_depth": %s, 104 | "biases": %s, 105 | "gamma": %s, 106 | "beta": %s, 107 | "filters": %s 108 | };""" % (layer_idx.split('_')[0], W.shape[1], W.shape[0], biases, gamma, beta, fs) 109 | else: 110 | fs = [] 111 | for w_ in W: 112 | fs.append({"sy": 5, "sx": 5, "depth": W.shape[3], "w": ['%.2f' % elem for elem in list(w_.flatten())]}) 113 | 114 | lines += """ 115 | var layer_%s = { 116 | "layer_type": "deconv", 117 | "sy": 5, "sx": 5, 118 | "out_sx": %s, "out_sy": %s, 119 | "stride": 2, "pad": 1, 120 | "out_depth": %s, "in_depth": %s, 121 | "biases": %s, 122 | "gamma": %s, 123 | "beta": %s, 124 | "filters": %s 125 | };""" % (layer_idx, 2**(int(layer_idx)+2), 2**(int(layer_idx)+2), 126 | W.shape[0], W.shape[3], biases, gamma, beta, fs) 127 | layer_f.write(" ".join(lines.replace("'","").split())) 128 | 129 | def make_gif(images, fname, duration=2, true_image=False): 130 | import moviepy.editor as mpy 131 | 132 | def make_frame(t): 133 | try: 134 | x = images[int(len(images)/duration*t)] 135 | except: 136 | x = images[-1] 137 | 138 | if true_image: 139 | return x.astype(np.uint8) 140 | else: 141 | return ((x+1)/2*255).astype(np.uint8) 142 | 143 | clip = mpy.VideoClip(make_frame, duration=duration) 144 | clip.write_gif(fname, fps = len(images) / duration) 145 | 146 | def visualize(sess, dcgan, config, option): 147 | if option == 0: 148 | z_sample = np.random.uniform(-0.5, 0.5, size=(config.batch_size, dcgan.z_dim)) 149 | samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) 150 | save_images(samples, [8, 8], './samples/test_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime())) 151 | elif option == 1: 152 | values = np.arange(0, 1, 1./config.batch_size) 153 | for idx in xrange(100): 154 | print(" [*] %d" % idx) 155 | z_sample = np.zeros([config.batch_size, dcgan.z_dim]) 156 | for kdx, z in enumerate(z_sample): 157 | z[idx] = values[kdx] 158 | 159 | samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) 160 | save_images(samples, [8, 8], './samples/test_arange_%s.png' % (idx)) 161 | elif option == 2: 162 | values = np.arange(0, 1, 1./config.batch_size) 163 | for idx in [random.randint(0, 99) for _ in xrange(100)]: 164 | print(" [*] %d" % idx) 165 | z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim)) 166 | z_sample = np.tile(z, (config.batch_size, 1)) 167 | #z_sample = np.zeros([config.batch_size, dcgan.z_dim]) 168 | for kdx, z in enumerate(z_sample): 169 | z[idx] = values[kdx] 170 | 171 | samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) 172 | make_gif(samples, './samples/test_gif_%s.gif' % (idx)) 173 | elif option == 3: 174 | values = np.arange(0, 1, 1./config.batch_size) 175 | for idx in xrange(100): 176 | print(" [*] %d" % idx) 177 | z_sample = np.zeros([config.batch_size, dcgan.z_dim]) 178 | for kdx, z in enumerate(z_sample): 179 | z[idx] = values[kdx] 180 | 181 | samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) 182 | make_gif(samples, './samples/test_gif_%s.gif' % (idx)) 183 | elif option == 4: 184 | image_set = [] 185 | values = np.arange(0, 1, 1./config.batch_size) 186 | 187 | for idx in xrange(100): 188 | print(" [*] %d" % idx) 189 | z_sample = np.zeros([config.batch_size, dcgan.z_dim]) 190 | for kdx, z in enumerate(z_sample): z[idx] = values[kdx] 191 | 192 | image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})) 193 | make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx)) 194 | 195 | new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) \ 196 | for idx in range(64) + range(63, -1, -1)] 197 | make_gif(new_image_set, './samples/test_gif_merged.gif', duration=8) 198 | -------------------------------------------------------------------------------- /dl/Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | GCC = gcc 3 | CC = g++ -std=c++0x 4 | NVCC = nvcc 5 | CCFLAGS = -c -pg 6 | NVCCFLAGS = -g -pg -O3 -c 7 | PTXFLAGES = --machine 64 8 | 9 | LIB = -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lm 10 | INCLUDES = -I./include 11 | 12 | BUILD_DIR = ./bin 13 | OBJ_DIR = ./obj 14 | SRCS_DIR = ./src 15 | INCLUDES_DIR = ./include 16 | SRCS_TARGET_DIR = ./main_src 17 | 18 | CU_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.cuh") 19 | CXX_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.h") 20 | HXX_INCLUDES = $(shell find $(INCLUDES_DIR) -name "*.hpp") 21 | 22 | #HXX对应的cpp,cu,然后将它们排除掉,不进行编译 23 | HXX_SRCS = $(subst $(INCLUDES_DIR), $(SRCS_DIR), ${HXX_INCLUDES:.hpp=.cpp}) 24 | HXX_SRCS += $(subst $(INCLUDES_DIR), $(SRCS_DIR), ${HXX_INCLUDES:.hpp=.cu}) 25 | CU_SRCS = $(filter-out $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cu")) 26 | CXX_SRCS = $(filter-out $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cpp")) 27 | CU_HPP_SRCS = $(filter $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cu")) 28 | CXX_HPP_SRCS = $(filter $(HXX_SRCS), $(shell find $(SRCS_DIR) -name "*.cpp")) 29 | 30 | #生成的链接文件 31 | CXX_OBJS += $(subst $(SRCS_DIR), $(OBJ_DIR), ${CXX_SRCS:.cpp=.o}) 32 | CU_OBJS += $(subst $(SRCS_DIR), $(OBJ_DIR), ${CU_SRCS:.cu=.o}) 33 | 34 | TARGET ?= main 35 | MULTI_PROCESS ?= 1 36 | MULTI_MECHINE ?= 0 37 | OPEN_MPI ?= 0 38 | NUM_PROCESS ?= 2 39 | BUILD_TARGET = $(BUILD_DIR)/$(TARGET) 40 | SRCS_TARGET = $(SRCS_TARGET_DIR)/$(TARGET).cu 41 | OBJ_TARGET = $(OBJ_DIR)/$(TARGET).o 42 | 43 | #print: $(CXX_SRCS) $(CU_SRCS) 44 | # echo $(HXX_SRCS) 45 | # echo $(CXX_SRCS) 46 | # echo $(CU_SRCS) 47 | 48 | $(OBJ_DIR)/%.o: $(SRCS_DIR)/%.cpp 49 | $(CC) $(CCFLAGS) $^ $(INCLUDES) -o $@ 50 | 51 | $(OBJ_DIR)/%.o: $(SRCS_DIR)/%.cu 52 | $(NVCC) $(CCFLAGS) $^ $(INCLUDES) -o $@ 53 | 54 | $(BUILD_TARGET): $(CXX_OBJS) $(CU_OBJS) $(SRCS_TARGET) $(CU_HPP_SRCS) $(CXX_HPP_SRCS) 55 | $(NVCC) $(NVCCFLAGS) $(SRCS_TARGET) $(INCLUDES) -o $(OBJ_TARGET) 56 | $(NVCC) -o $(BUILD_TARGET) $(OBJ_TARGET) $(CXX_OBJS) $(CU_OBJS) $(LIB) $(INCLUDES) 57 | 58 | cleanall: 59 | rm -rf $(OBJ_DIR)/*.o 60 | -------------------------------------------------------------------------------- /dl/include/.data.hpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.data.hpp.swp -------------------------------------------------------------------------------- /dl/include/.matrix.hpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.matrix.hpp.swp -------------------------------------------------------------------------------- /dl/include/.param.h.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/include/.param.h.swp -------------------------------------------------------------------------------- /dl/include/convnet.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file convnet.hpp 3 | /// @brief 4 | 5 | #ifndef CONVNET_H_ 6 | #define CONVNET_H_ 7 | 8 | #include 9 | #include 10 | #include "layer.hpp" 11 | 12 | 13 | template 14 | class ConvNet : public TrainLayer{ 15 | 16 | private: 17 | 18 | Matrix* unfold_dE_db_tmp; 19 | Matrix* dE_db_tmp; 20 | Matrix* padded_x; 21 | Matrix* unfold_x; 22 | 23 | Matrix* unranged_dE_dx; 24 | Matrix* unranged_dE_dw; 25 | int _filt_pixs; 26 | int _conv_pixs; 27 | int _padded_in_pixs; 28 | int _in_pixs; 29 | int _box_in_pixs; 30 | int _num_box; 31 | 32 | ConvParam* _cp; 33 | 34 | public: 35 | ConvNet(ConvParam* cp); 36 | ~ConvNet(); 37 | 38 | void initCuda(); 39 | void computeOutput(Matrix* x); 40 | void computeDerivsOfPars(Matrix* x); 41 | void computeDerivsOfInput(Matrix* dE_dx); 42 | 43 | }; 44 | 45 | #include "../src/convnet.cu" 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /dl/include/data.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file data.hpp 3 | /// 4 | #ifndef DATA_HPP_ 5 | #define DATA_HPP_ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | template 12 | class Data { 13 | 14 | public: 15 | Data() {} 16 | virtual ~Data() {} 17 | 18 | void copyFromHost(Dtype* data_value, const int data_len); 19 | void copyFromDevice(Data* dev_data); 20 | void copyToHost(Dtype* data_value, const int data_len); 21 | void copyToDevice(Data* dev_data); 22 | 23 | void zeros(); 24 | 25 | inline Dtype* getDevData() const { 26 | return _data_value; 27 | } 28 | 29 | 30 | protected: 31 | //数据形状不固定,由子类来定 32 | std::vector _shape; 33 | Dtype* _data_value; 34 | bool _is_own_data; 35 | int _amount; 36 | }; 37 | 38 | #include "../src/data.cu" 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /dl/include/dropout_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file dropout_layer.cuh 3 | /// @brief 实现了对输入每一个点求dropout 4 | 5 | #ifndef DROPOUT_LAYER_H_ 6 | #define DROPOUT_LAYER_H_ 7 | 8 | #include 9 | #include 10 | #include "layer.hpp" 11 | 12 | template 13 | class DropoutLayer : public Layer { 14 | 15 | public: 16 | 17 | DropoutLayer(Param* fcp); 18 | ~DropoutLayer(); 19 | 20 | void initCuda(); 21 | void computeOutput(Matrix* x); 22 | void computeDerivsOfInput(Matrix* dE_dx); 23 | 24 | private: 25 | Param* _p; 26 | Matrix *_drop_record; ///>记录该点是否被丢弃 27 | Matrix *_drop_rand_probs; ///>记录该点被丢弃的概率,与0.5比较 28 | bool _is_set_up; ///>随机数初始化 29 | }; 30 | 31 | 32 | #include "../src/dropout_layer.cu" 33 | #endif 34 | -------------------------------------------------------------------------------- /dl/include/inner_product_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file inner_product_layer.cuh 3 | /// @brief 实现了inner product 4 | 5 | #ifndef INNER_PRODUCT_LAYER_CUH_ 6 | #define INNER_PRODUCT_LAYER_CUH_ 7 | 8 | #include 9 | #include "layer.hpp" 10 | #include "layer_kernel.cuh" 11 | 12 | template 13 | class InnerProductLayer : public TrainLayer { 14 | 15 | public: 16 | 17 | InnerProductLayer(InnerParam* fcp); 18 | ~InnerProductLayer(); 19 | 20 | void initCuda(); 21 | void computeOutput(Matrix* x); 22 | void computeDerivsOfPars(Matrix* x); 23 | void computeDerivsOfInput(Matrix* dE_dx); 24 | 25 | private: 26 | InnerParam* _fcp; 27 | Matrix* data_T; 28 | Matrix* w_T; 29 | }; 30 | 31 | #include "../src/inner_product_layer.cu" 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /dl/include/json/json-forwards.h: -------------------------------------------------------------------------------- 1 | /// Json-cpp amalgated forward header (http://jsoncpp.sourceforge.net/). 2 | /// It is intended to be used with #include "json/json-forwards.h" 3 | /// This header provides forward declaration for all JsonCpp types. 4 | 5 | // ////////////////////////////////////////////////////////////////////// 6 | // Beginning of content of file: LICENSE 7 | // ////////////////////////////////////////////////////////////////////// 8 | 9 | /* 10 | The JsonCpp library's source code, including accompanying documentation, 11 | tests and demonstration applications, are licensed under the following 12 | conditions... 13 | 14 | The author (Baptiste Lepilleur) explicitly disclaims copyright in all 15 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 16 | this software is released into the Public Domain. 17 | 18 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of 19 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur, and is 20 | released under the terms of the MIT License (see below). 21 | 22 | In jurisdictions which recognize Public Domain property, the user of this 23 | software may choose to accept it either as 1) Public Domain, 2) under the 24 | conditions of the MIT License (see below), or 3) under the terms of dual 25 | Public Domain/MIT License conditions described here, as they choose. 26 | 27 | The MIT License is about as close to Public Domain as a license can get, and is 28 | described in clear, concise terms at: 29 | 30 | http://en.wikipedia.org/wiki/MIT_License 31 | 32 | The full text of the MIT License follows: 33 | 34 | ======================================================================== 35 | Copyright (c) 2007-2010 Baptiste Lepilleur 36 | 37 | Permission is hereby granted, free of charge, to any person 38 | obtaining a copy of this software and associated documentation 39 | files (the "Software"), to deal in the Software without 40 | restriction, including without limitation the rights to use, copy, 41 | modify, merge, publish, distribute, sublicense, and/or sell copies 42 | of the Software, and to permit persons to whom the Software is 43 | furnished to do so, subject to the following conditions: 44 | 45 | The above copyright notice and this permission notice shall be 46 | included in all copies or substantial portions of the Software. 47 | 48 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 49 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 50 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 51 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 52 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 53 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 54 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 55 | SOFTWARE. 56 | ======================================================================== 57 | (END LICENSE TEXT) 58 | 59 | The MIT license is compatible with both the GPL and commercial 60 | software, affording one all of the rights of Public Domain with the 61 | minor nuisance of being required to keep the above copyright notice 62 | and license text in the source code. Note also that by accepting the 63 | Public Domain "license" you can re-license your copy using whatever 64 | license you like. 65 | 66 | */ 67 | 68 | // ////////////////////////////////////////////////////////////////////// 69 | // End of content of file: LICENSE 70 | // ////////////////////////////////////////////////////////////////////// 71 | 72 | 73 | 74 | 75 | 76 | #ifndef JSON_FORWARD_AMALGATED_H_INCLUDED 77 | # define JSON_FORWARD_AMALGATED_H_INCLUDED 78 | /// If defined, indicates that the source file is amalgated 79 | /// to prevent private header inclusion. 80 | #define JSON_IS_AMALGAMATION 81 | 82 | // ////////////////////////////////////////////////////////////////////// 83 | // Beginning of content of file: include/json/config.h 84 | // ////////////////////////////////////////////////////////////////////// 85 | 86 | // Copyright 2007-2010 Baptiste Lepilleur 87 | // Distributed under MIT license, or public domain if desired and 88 | // recognized in your jurisdiction. 89 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 90 | 91 | #ifndef JSON_CONFIG_H_INCLUDED 92 | #define JSON_CONFIG_H_INCLUDED 93 | 94 | /// If defined, indicates that json library is embedded in CppTL library. 95 | //# define JSON_IN_CPPTL 1 96 | 97 | /// If defined, indicates that json may leverage CppTL library 98 | //# define JSON_USE_CPPTL 1 99 | /// If defined, indicates that cpptl vector based map should be used instead of 100 | /// std::map 101 | /// as Value container. 102 | //# define JSON_USE_CPPTL_SMALLMAP 1 103 | 104 | // If non-zero, the library uses exceptions to report bad input instead of C 105 | // assertion macros. The default is to use exceptions. 106 | #ifndef JSON_USE_EXCEPTION 107 | #define JSON_USE_EXCEPTION 1 108 | #endif 109 | 110 | /// If defined, indicates that the source file is amalgated 111 | /// to prevent private header inclusion. 112 | /// Remarks: it is automatically defined in the generated amalgated header. 113 | // #define JSON_IS_AMALGAMATION 114 | 115 | #ifdef JSON_IN_CPPTL 116 | #include 117 | #ifndef JSON_USE_CPPTL 118 | #define JSON_USE_CPPTL 1 119 | #endif 120 | #endif 121 | 122 | #ifdef JSON_IN_CPPTL 123 | #define JSON_API CPPTL_API 124 | #elif defined(JSON_DLL_BUILD) 125 | #if defined(_MSC_VER) 126 | #define JSON_API __declspec(dllexport) 127 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 128 | #endif // if defined(_MSC_VER) 129 | #elif defined(JSON_DLL) 130 | #if defined(_MSC_VER) 131 | #define JSON_API __declspec(dllimport) 132 | #define JSONCPP_DISABLE_DLL_INTERFACE_WARNING 133 | #endif // if defined(_MSC_VER) 134 | #endif // ifdef JSON_IN_CPPTL 135 | #if !defined(JSON_API) 136 | #define JSON_API 137 | #endif 138 | 139 | // If JSON_NO_INT64 is defined, then Json only support C++ "int" type for 140 | // integer 141 | // Storages, and 64 bits integer support is disabled. 142 | // #define JSON_NO_INT64 1 143 | 144 | #if defined(_MSC_VER) && _MSC_VER <= 1200 // MSVC 6 145 | // Microsoft Visual Studio 6 only support conversion from __int64 to double 146 | // (no conversion from unsigned __int64). 147 | #define JSON_USE_INT64_DOUBLE_CONVERSION 1 148 | // Disable warning 4786 for VS6 caused by STL (identifier was truncated to '255' 149 | // characters in the debug information) 150 | // All projects I've ever seen with VS6 were using this globally (not bothering 151 | // with pragma push/pop). 152 | #pragma warning(disable : 4786) 153 | #endif // if defined(_MSC_VER) && _MSC_VER < 1200 // MSVC 6 154 | 155 | #if defined(_MSC_VER) && _MSC_VER >= 1500 // MSVC 2008 156 | /// Indicates that the following function is deprecated. 157 | #define JSONCPP_DEPRECATED(message) __declspec(deprecated(message)) 158 | #elif defined(__clang__) && defined(__has_feature) 159 | #if __has_feature(attribute_deprecated_with_message) 160 | #define JSONCPP_DEPRECATED(message) __attribute__ ((deprecated(message))) 161 | #endif 162 | #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) 163 | #define JSONCPP_DEPRECATED(message) __attribute__ ((deprecated(message))) 164 | #elif defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 165 | #define JSONCPP_DEPRECATED(message) __attribute__((__deprecated__)) 166 | #endif 167 | 168 | #if !defined(JSONCPP_DEPRECATED) 169 | #define JSONCPP_DEPRECATED(message) 170 | #endif // if !defined(JSONCPP_DEPRECATED) 171 | 172 | namespace Json { 173 | typedef int Int; 174 | typedef unsigned int UInt; 175 | #if defined(JSON_NO_INT64) 176 | typedef int LargestInt; 177 | typedef unsigned int LargestUInt; 178 | #undef JSON_HAS_INT64 179 | #else // if defined(JSON_NO_INT64) 180 | // For Microsoft Visual use specific types as long long is not supported 181 | #if defined(_MSC_VER) // Microsoft Visual Studio 182 | typedef __int64 Int64; 183 | typedef unsigned __int64 UInt64; 184 | #else // if defined(_MSC_VER) // Other platforms, use long long 185 | typedef long long int Int64; 186 | typedef unsigned long long int UInt64; 187 | #endif // if defined(_MSC_VER) 188 | typedef Int64 LargestInt; 189 | typedef UInt64 LargestUInt; 190 | #define JSON_HAS_INT64 191 | #endif // if defined(JSON_NO_INT64) 192 | } // end namespace Json 193 | 194 | #endif // JSON_CONFIG_H_INCLUDED 195 | 196 | // ////////////////////////////////////////////////////////////////////// 197 | // End of content of file: include/json/config.h 198 | // ////////////////////////////////////////////////////////////////////// 199 | 200 | 201 | 202 | 203 | 204 | 205 | // ////////////////////////////////////////////////////////////////////// 206 | // Beginning of content of file: include/json/forwards.h 207 | // ////////////////////////////////////////////////////////////////////// 208 | 209 | // Copyright 2007-2010 Baptiste Lepilleur 210 | // Distributed under MIT license, or public domain if desired and 211 | // recognized in your jurisdiction. 212 | // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 213 | 214 | #ifndef JSON_FORWARDS_H_INCLUDED 215 | #define JSON_FORWARDS_H_INCLUDED 216 | 217 | #if !defined(JSON_IS_AMALGAMATION) 218 | #include "config.h" 219 | #endif // if !defined(JSON_IS_AMALGAMATION) 220 | 221 | namespace Json { 222 | 223 | // writer.h 224 | class FastWriter; 225 | class StyledWriter; 226 | 227 | // reader.h 228 | class Reader; 229 | 230 | // features.h 231 | class Features; 232 | 233 | // value.h 234 | typedef unsigned int ArrayIndex; 235 | class StaticString; 236 | class Path; 237 | class PathArgument; 238 | class Value; 239 | class ValueIteratorBase; 240 | class ValueIterator; 241 | class ValueConstIterator; 242 | 243 | } // namespace Json 244 | 245 | #endif // JSON_FORWARDS_H_INCLUDED 246 | 247 | // ////////////////////////////////////////////////////////////////////// 248 | // End of content of file: include/json/forwards.h 249 | // ////////////////////////////////////////////////////////////////////// 250 | 251 | 252 | 253 | 254 | 255 | #endif //ifndef JSON_FORWARD_AMALGATED_H_INCLUDED 256 | -------------------------------------------------------------------------------- /dl/include/layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file layer.hpp 3 | /// 4 | #ifndef LAYER_HPP_ 5 | #define LAYER_HPP_ 6 | 7 | #include 8 | #include "utils.cuh" 9 | #include "param.h" 10 | #include "matrix.hpp" 11 | 12 | template 13 | class Layer { 14 | 15 | public: 16 | Layer() {} 17 | virtual ~Layer() {} 18 | 19 | virtual void initCuda() {} 20 | virtual void computeOutput(Matrix* x) {} 21 | 22 | virtual void computeDerivsOfInput(Matrix* dE_dx) {} 23 | 24 | inline Matrix* getY() { 25 | return _y; 26 | } 27 | inline Matrix* getDEDY() { 28 | return _dE_dy; 29 | } 30 | 31 | protected: 32 | cublasHandle_t handle; 33 | Matrix* _y; ///>每一层的输出 34 | Matrix* _dE_dy; ///>每层输出的导数 35 | }; 36 | 37 | template 38 | class TrainLayer : public Layer { 39 | 40 | public: 41 | TrainLayer(TrainParam* tp){ 42 | _tp = tp; 43 | } 44 | TrainLayer() {} 45 | virtual ~TrainLayer() {} 46 | 47 | virtual void computeDerivsOfPars(Matrix* x) {} 48 | 49 | void updatePars(bool isShow = false) { 50 | if(isShow == true){ 51 | _w->showValue("w"); 52 | _dE_dw->showValue("dEdw"); 53 | cout << _tp->getMomentum() << ":" << _tp->getWeightDecay() << ":" \ 54 | << _tp->getWLR() << ":" << _tp->getBiasLR() << endl; 55 | } 56 | _w_inc->addSum(_w, _dE_dw, _tp->getMomentum(), -_tp->getWeightDecay(), \ 57 | -_tp->getWLR() / _tp->getMinibatchSize()); 58 | _w->add(_w_inc, 1, 1); 59 | 60 | _bias_inc->add(_dE_db, _tp->getMomentum(), \ 61 | -_tp->getBiasLR() / _tp->getMinibatchSize()); 62 | _bias->add(_bias_inc, 1, 1); 63 | } 64 | inline Matrix* getW() { 65 | return _w; 66 | } 67 | inline Matrix* getBias() { 68 | return _bias; 69 | } 70 | 71 | protected: 72 | Matrix* _w; 73 | Matrix* _bias; 74 | Matrix* _w_inc; 75 | Matrix* _bias_inc; 76 | Matrix* _dE_dw; 77 | Matrix* _dE_db; 78 | 79 | TrainParam* _tp; 80 | }; 81 | 82 | 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /dl/include/layer_kernel.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * filename: layer_kernel.cuh 3 | */ 4 | #ifndef LAYER_KERNEL_CUH_ 5 | #define LAYER_KERNEL_CUH_ 6 | 7 | #include "param.h" 8 | 9 | // CUDA: grid stride looping 10 | #define CUDA_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 12 | i < (n); \ 13 | i += blockDim.x * gridDim.x) 14 | 15 | 16 | __global__ void forward_convolution(const float* x, const float* w, \ 17 | const float* bias, float* targets, \ 18 | const int in_height, const int in_width, const int in_channel, \ 19 | const int out_height, const int out_width, \ 20 | const int filter_height, const int filter_width, const int filter_channel, \ 21 | const int stride_height, const int stride_width, \ 22 | const int box_num_height, const int box_num_width, \ 23 | const int box_in_height, const int box_in_width, \ 24 | const int box_out_height, const int box_out_width); 25 | 26 | 27 | __global__ void backward_convolution(const float* dE_dy, const float *w, \ 28 | float* targets, \ 29 | const int box_in_height, const int box_in_width, \ 30 | const int box_out_height, const int box_out_width, \ 31 | const int out_channel, const int in_channel, \ 32 | const int out_height, const int out_width, \ 33 | const int filter_height, const int filter_width, \ 34 | const int stride_height, const int stride_width, \ 35 | const int box_num_height, const int box_num_width); 36 | 37 | 38 | __global__ void compute_convolution_derivs(const float* dE_dy, const float *x, \ 39 | float* dE_dw, const int box_out_height, const int box_out_width, \ 40 | const int out_channel, const int in_channel, const int in_height, \ 41 | const int in_width, const int out_height, const int out_width, \ 42 | const int filter_height, const int filter_width, \ 43 | const int stride_height, const int stride_width, \ 44 | const int box_num_height, const int box_num_width); 45 | 46 | 47 | __global__ void compact_dervis_w(const float* unranged_dE_dw, \ 48 | float* dE_dw, const int filter_height, const int filter_width, \ 49 | const int box_num_height, const int box_num_width, \ 50 | const int minibatch_size, const int in_channel, const int out_channel); 51 | 52 | __global__ void compute_derivs_of_bias(const float* dE_dy, float* targets, \ 53 | const int out_height, const int out_width, const int out_channel, \ 54 | const int box_out_height, const int box_out_width, \ 55 | const int box_num_height, const int box_num_width); 56 | 57 | 58 | __global__ void pad_to_ori(float* dst, const float* src, const int num_kernel, \ 59 | const int img_height, const int img_width, \ 60 | const int padded_img_height, const int padded_img_width, \ 61 | const int img_channel); 62 | 63 | __global__ void ori_to_padding(const float* src, float* dst, const int num_kernel, \ 64 | const int img_height, const int img_width, const int padded_img_height, \ 65 | const int padded_img_width, const int img_channel); 66 | 67 | __global__ void max_pooling(const float* convOutputs, float* targets, int* maxPoolPos, \ 68 | const int in_height, const int in_width, \ 69 | const int in_channels, const int out_height, const int out_width, \ 70 | const int filter_height, const int filter_width, \ 71 | const int stride_height, const int stride_width, \ 72 | const int box_out_height, const int box_out_width, \ 73 | const int box_num_height, const int box_num_width); 74 | 75 | __global__ void avg_pooling(const float* convOutputs, float* targets, \ 76 | const int in_height, const int in_width, \ 77 | const int in_channels, const int out_height, const int out_width, \ 78 | const int filter_height, const int filter_width, \ 79 | const int stride_height, const int stride_width, \ 80 | const int box_out_height, const int box_out_width, \ 81 | const int box_num_height, const int box_num_width); 82 | 83 | __global__ void compute_dE_dy_max(const float* dE_dy_i, float* targets, \ 84 | int* maxPoolPos, \ 85 | const int box_in_height, const int box_in_width, \ 86 | const int box_out_height, const int box_out_width, \ 87 | const int num_filters, \ 88 | const int out_height, const int out_width, \ 89 | const int filter_height, const int filter_width, \ 90 | const int stride_height, const int stride_width, \ 91 | const int box_num_height, const int box_num_width); 92 | 93 | __global__ void compute_dE_dy_avg(const float* dE_dy_i, float* targets, \ 94 | const int box_in_height, const int box_in_width, \ 95 | const int box_out_height, const int box_out_width, \ 96 | const int num_filters, \ 97 | const int out_height, const int out_width, \ 98 | const int filter_height, const int filter_width, \ 99 | const int stride_height, const int stride_width, \ 100 | const int box_num_height, const int box_num_width); 101 | 102 | __global__ void compute_dE_dy(const float* y_j, const int* labels, \ 103 | float* dE_dy_j, const int width); 104 | 105 | 106 | __global__ void compactOverlap(float* src, float* targets, \ 107 | const int in_height, const int in_width, const int in_channel, \ 108 | const int overlap_height, const int overlap_width, \ 109 | const int box_in_height, const int box_in_width, \ 110 | const int box_num_height, const int box_num_width); 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /dl/include/load_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file load_layer.hpp 3 | /// \brief 从文件中下载数据 4 | /// 5 | 6 | #ifndef LOAD_LAYER_HPP_ 7 | #define LOAD_LAYER_HPP_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include"utils.cuh" 15 | 16 | #define MAX_OBJECT_NUM 24 17 | 18 | using namespace std; 19 | 20 | /// \brief 执行下载数据行为的类 21 | /// 22 | template 23 | class LoadLayer { 24 | 25 | public: 26 | 27 | /// \brief 默认构造函数表示个数信息需要从文件中读取,而不是传递进来的 28 | LoadLayer() {} 29 | LoadLayer(const int num_train, const int num_valid, \ 30 | const int num_test, const int img_size, const int img_channel); 31 | virtual ~LoadLayer(); 32 | 33 | virtual void loadBinary(string filenmae, Dtype* pixel_ptr, \ 34 | int* label_ptr, int batch_idx) {} 35 | 36 | void meanOneImg(Dtype* pixel_ptr, int process_len); 37 | void stdOneImg(Dtype* pixel_ptr, int process_len); 38 | 39 | virtual void loadTrainOneBatch(int batch_idx, \ 40 | Dtype* &mini_pixel, int* &mini_label) {} 41 | virtual void loadValidOneBatch(int batch_idx, \ 42 | Dtype* &mini_pixel, int* &mini_label) {} 43 | virtual void loadTestOneBatch(int batch_idx, \ 44 | Dtype* &mini_pixel, int *&mini_label) {} 45 | 46 | int getNumTrain(){ 47 | return _num_train; 48 | } 49 | int getNumValid(){ 50 | return _num_valid; 51 | } 52 | int getNumTest(){ 53 | return _num_test; 54 | } 55 | int getImgSize(){ 56 | return _img_size; 57 | } 58 | int getImgChannel(){ 59 | return _img_channel; 60 | } 61 | 62 | Dtype* getTrainPixel(){ 63 | return _train_pixel; 64 | } 65 | int* getTrainLabel(){ 66 | return _train_label; 67 | } 68 | Dtype* getValidPixel(){ 69 | return _valid_pixel; 70 | } 71 | int* getValidLabel(){ 72 | return _valid_label; 73 | } 74 | Dtype* getTestPixel(){ 75 | return _test_pixel; 76 | } 77 | int* getTestLabel(){ 78 | return _test_label; 79 | } 80 | 81 | protected: 82 | long long _num_train; 83 | int _num_valid; 84 | int _num_test; 85 | int _img_size; 86 | int _img_height; 87 | int _img_width; 88 | int _img_channel; 89 | int _img_sqrt; 90 | 91 | ///返回cpu数据 92 | int* _train_label; 93 | int* _valid_label; 94 | int* _test_label; 95 | Dtype* _train_pixel; 96 | Dtype* _valid_pixel; 97 | Dtype* _test_pixel; 98 | int* _train_label_ptr; 99 | int* _valid_label_ptr; 100 | int* _test_label_ptr; 101 | Dtype* _train_pixel_ptr; 102 | Dtype* _valid_pixel_ptr; 103 | Dtype* _test_pixel_ptr; 104 | 105 | bool _is_base_alloc; 106 | 107 | }; 108 | 109 | 110 | template 111 | class LoadCifar10 : public LoadLayer { 112 | 113 | int _minibatch_size; 114 | public: 115 | LoadCifar10(const int minibatch_size); 116 | 117 | ~LoadCifar10() {} 118 | 119 | using LoadLayer::loadBinary; 120 | void loadBinary(string filename, Dtype* &pixel_ptr, int* &label_ptr); 121 | void loadTrainOneBatch(int batch_idx, 122 | Dtype* &mini_pixel, int* &mini_label); 123 | void loadValidOneBatch(int batch_idx, 124 | Dtype* &mini_pixel, int* &mini_label); 125 | 126 | }; 127 | 128 | 129 | #include "../src/load_layer.cpp" 130 | 131 | #endif 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /dl/include/logistic.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file logistic.cuh 3 | /// @brief 实现了softmax 4 | 5 | #ifndef LOGISTIC_CUH_ 6 | #define LOGISTIC_CUH_ 7 | 8 | #include 9 | #include "layer.hpp" 10 | #include "layer_kernel.cuh" 11 | 12 | template 13 | class Logistic : public Layer { 14 | 15 | public: 16 | Logistic(FullConnectParam* fcp); 17 | ~Logistic(); 18 | 19 | void initCuda(); 20 | void computeOutput(Matrix* x); 21 | double computeError(Matrix* labels, int& num_error); 22 | using Layer::computeDerivsOfInput; 23 | void computeDerivsOfInput(Matrix* x, Matrix* labels); 24 | 25 | inline Matrix* getResultRecord(){ 26 | _d_record->copyFromHost(_h_record, this->_y->getNumCols() * this->_y->getNumCols()); 27 | return _d_record; 28 | } 29 | inline void setRecordToZero(){ 30 | memset(_h_record, 0, sizeof(int) * this->_y->getNumCols() * this->_y->getNumCols()); 31 | } 32 | 33 | 34 | private: 35 | FullConnectParam* _fcp; 36 | int* h_labels; 37 | Dtype* y_CPU; 38 | Dtype* correct_probs; 39 | Matrix* d_max_pos_of_out; 40 | Dtype* h_max_pos_of_out; 41 | 42 | Matrix* _d_record; ///>这个变量用来存储最后分类的结果,10*10的矩阵 43 | int* _h_record; 44 | 45 | 46 | }; 47 | 48 | #include "../src/logistic.cu" 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /dl/include/matrix.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file matrix.hpp 3 | /// \brief 继承数据类,拥有矩阵的特性 4 | /// 5 | 6 | 7 | #ifndef Matrix_H_ 8 | #define Matrix_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include "cublas_v2.h" 14 | #include "data.hpp" 15 | 16 | #define CUDA_ERROR_CHECK 17 | 18 | #define cudaCheckError() __cudaCheckError(__FILE__, __LINE__) 19 | 20 | inline void __cudaCheckError(const char *file, const int line){ 21 | #ifdef CUDA_ERROR_CHECK 22 | cudaError err = cudaGetLastError(); 23 | if(cudaSuccess != err){ 24 | fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", \ 25 | file, line, cudaGetErrorString(err)); 26 | exit(-1); 27 | } 28 | #endif 29 | } 30 | 31 | using namespace std; 32 | 33 | /// \brief 实现了矩阵类,数据将以矩阵形式保存 34 | /// 35 | template 36 | class Matrix : public Data { 37 | private: 38 | static cudaDeviceProp deviceProps; ///< 查询gpu硬件规格 39 | 40 | public: 41 | 42 | /// 运算的枚举 43 | /// 44 | /// 该枚举定义了对类中成员执行何种运算 45 | enum FUNCTIONS { 46 | LOG, EXP, RECIPROCAL, SOFTMAX, SIGMOID, DROPOUT 47 | }; 48 | 49 | Matrix(int numRows, int numCols); 50 | 51 | Matrix(const Matrix *like, bool copy); 52 | 53 | Matrix(const Matrix *like); 54 | 55 | ~Matrix(); 56 | /// \brief 初始化类中成员,为行列赋值 57 | 58 | void _init(int numRows, int numCols); 59 | 60 | /// \brief 判断两个对象维数是否相等 61 | inline bool isSameDims(const Matrix *m) const { 62 | return m->getNumRows() == this->_shape[0] && m->getNumCols() == this->_shape[1]; 63 | } 64 | 65 | inline int getNumRows() const { 66 | return this->_shape[0]; 67 | } 68 | 69 | inline int getNumCols() const { 70 | return this->_shape[1]; 71 | } 72 | 73 | inline int getNumEles() const { 74 | return this->_amount; 75 | } 76 | 77 | inline void changePtr(const int add) { 78 | this->_data_value = this->_data_value + add; 79 | } 80 | 81 | inline void changePtrFromStart(Dtype *start, const int add) { 82 | this->_data_value = start + add; 83 | } 84 | 85 | inline void setPtr(Dtype *start) { 86 | this->_data_value = start; 87 | } 88 | 89 | /// \brief 求矩阵转置 90 | void getTranspose(Matrix *target); 91 | 92 | /// \brief 矩阵右乘 93 | /// \param[in] b 94 | /// \param[out] target 两个矩阵相乘输出 95 | void rightMult(Matrix *b, float scale_AB, Matrix *target, \ 96 | cublasHandle_t &handle); 97 | 98 | /// \brief 将每一行累加起来生成一列,列个数保持不变 99 | /// \param[out] target 100 | void sumRow(Matrix *target); 101 | 102 | void sumCol(Matrix *target); 103 | 104 | /// \brief 用一个标量减去整个矩阵 105 | /// \param[out] target 假如没有这个参数,那么计算结果保存在调用矩阵中 106 | void subtractFromScalar(float scalar, Matrix *target); 107 | 108 | void subtractFromScalar(float scalar); 109 | 110 | /// \brief 矩阵间点乘 111 | /// 112 | /// 点乘结果保存在调用矩阵中 113 | /// \param[in] b 用来与调用矩阵进行点乘 114 | /// \param[out] target 保存矩阵与列向量点乘,若没有这个参数,则保存在调用矩阵中 115 | void eltWiseMult(Matrix *b, Matrix *target); 116 | 117 | void eltWiseMult(Matrix *b); 118 | 119 | /// \brief 矩阵每一列与列向量相加 120 | /// \param[in] vec 用来加法的列向量 121 | /// \param[out] target 保存矩阵与列向量相加结果,若没有这个参数,则保存在调用矩阵中 122 | void addColVector(Matrix *vec, float scale_vec, Matrix *target); 123 | 124 | void addColVector(Matrix *vec); 125 | 126 | void addRowVector(Matrix *vec, float scale_vec, Matrix *target); 127 | 128 | void addRowVector(Matrix *vec); 129 | 130 | /// \brief 对矩阵每一个值执行某种运算 131 | /// 132 | /// 针对矩阵每一个值,可以执行FUNCTIONS枚举量中任意一种运算 133 | /// \param[out] target 保存执行运算后的值,没有该参数,则保存在调用矩阵中 134 | void apply(FUNCTIONS f, Matrix *target); 135 | 136 | void apply(FUNCTIONS f); 137 | 138 | void applyRelu(Matrix* target, Matrix* record, bool direction = true); 139 | 140 | void applyDropout(Matrix *target, Matrix* record, \ 141 | Matrix* rand_probs, bool is_set_up); 142 | 143 | /// \brief 矩阵间点加 144 | /// 145 | /// 将输入的三个矩阵点加,然后保存在调用矩阵中 146 | /// \param[in] b 用来与调用矩阵进行点加 147 | /// \param[in] c 点加 148 | void addSum(Matrix *b, Matrix *c, float scale_This, \ 149 | float scale_B, float scale_C); 150 | 151 | void add(Matrix *b, float scale_This, float scale); 152 | 153 | /// \brief 矩阵一行最大值 154 | /// \param[out] max_vec 保存每一行的最大值的位置 155 | void maxPosInRow(Matrix *max_vec); 156 | 157 | 158 | /// \brief 打印矩阵 159 | /// \param[in] name 矩阵的名称 160 | void showValue(string name); 161 | 162 | /// \brief 给矩阵重新赋值 163 | /// 164 | /// 输入是float时,矩阵全部赋值为这个值。输入是int时,矩阵每个位置对这个int取余 165 | void reValue(float value); 166 | 167 | void reValue(int value, bool is_div = false); 168 | 169 | Dtype computeNorm(int len); 170 | 171 | void cropMatToNew(Matrix *tar, const int row_start, const int cropped_height, \ 172 | const int col_start, const int cropped_width); 173 | 174 | Dtype getPosValue(int pos); 175 | Dtype getFirstPosValue(); 176 | 177 | void savePars(string filename); 178 | void readPars(string filename); 179 | 180 | void subedByUnitMat(); 181 | void subPortion(Matrix* b, const int b_row, \ 182 | const int b_col); 183 | void setValueAt(const int height_idx, \ 184 | const int width_idx, const Dtype value); 185 | }; 186 | 187 | #include "../src/matrix.cu" 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /dl/include/matrix_kernel.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file matrix_kernel.cuh 3 | /// \brief matrix类的kernel函数 4 | 5 | #ifndef MATRIX_KERNEL_H_ 6 | #define MATRIX_KERNEL_H_ 7 | 8 | #include 9 | 10 | #define NUM_BLOCKS_MAX 65535 11 | 12 | #define ADD_BLOCK_SIZE 16 13 | #define COPY_BLOCK_SIZE 16 14 | 15 | #define DIVUP(a, b) (((a) + (b) - 1) / (b)) 16 | 17 | template 18 | __global__ void kTranspose(Dtype* srcData, Dtype* dstData, \ 19 | const int width, const int height); 20 | 21 | /// \brief gpu实现addRowVector 22 | /// 23 | /// \param[in] width 传递矩阵的长宽 24 | template 25 | __global__ void kAddRowVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \ 26 | const int width, const int height, float scaleVec); 27 | 28 | template 29 | __global__ void kSubtractFromScalar(Dtype* gData, float scalar, Dtype* target, \ 30 | const int width, const int height); 31 | 32 | template 33 | __global__ void kSoftmax(Dtype* gData, Dtype* target, const int width, \ 34 | const int height); 35 | 36 | template 37 | __global__ void kReciprocal(Dtype* gData, Dtype* target, const int width, \ 38 | const int height); 39 | 40 | template 41 | __global__ void kLog(Dtype* gData, Dtype* target, const int width, \ 42 | const int height); 43 | 44 | template 45 | __global__ void kSigmoid(Dtype* gData, Dtype* target, const int width, \ 46 | const int height); 47 | 48 | template 49 | __global__ void kSetUpCurand(curandState *state, const int width, const int height); 50 | 51 | template 52 | __global__ void kDropout(Dtype* gData, Dtype* target, int* record, \ 53 | curandState *state, const int width, const int height); 54 | 55 | template 56 | __global__ void kRelu(Dtype* gData, Dtype* target, int* record, const int length); 57 | 58 | template 59 | __global__ void kReluBack(Dtype* gData, Dtype* target, int* record, const int length); 60 | 61 | template 62 | __global__ void kDumbSumCols(Dtype* mat, Dtype* vec, const int width, \ 63 | const int height); 64 | 65 | template 66 | __global__ void kDumbMaxPosInRow(Dtype* mat, Dtype* vec, const int width, \ 67 | const int height); 68 | 69 | template 70 | __global__ void kMult(Dtype* matA, Dtype* matB, Dtype* tgtMat, \ 71 | const int width, const int height); 72 | 73 | template 74 | __global__ void kAdd(Dtype* matA, Dtype* matB, Dtype* tgtMat, float scaleA, \ 75 | float scaleB, const int width, const int height); 76 | 77 | //dst = (src + [added_value, 0, ..., 0]) * scale 78 | template 79 | __global__ void kComputeHouseholderVec(const Dtype* src, Dtype* dst, \ 80 | Dtype added_value, Dtype scale, const int len); 81 | 82 | template 83 | __global__ void kSubedByUnitMat(Dtype* matA, Dtype* tgtMat, \ 84 | const int width, const int height); 85 | 86 | //B只占A的一个部分,减去这个部分 87 | template 88 | __global__ void kSubPortion(Dtype* matA, Dtype* matB, Dtype* tgtMat, \ 89 | const int a_width, const int a_height, \ 90 | const int b_width, const int b_height); 91 | 92 | #include "../src/matrix_kernel.cu" 93 | 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /dl/include/model_component.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file model_component.hpp 3 | /// \brief 继承数据类,拥有矩阵的特性 4 | /// 5 | 6 | 7 | #ifndef MODELCOMPONENT_H_ 8 | #define MODELCOMPONENT_H_ 9 | 10 | #include 11 | #include 12 | #include "matrix.hpp" 13 | #include "param.h" 14 | #include "layer.hpp" 15 | 16 | using namespace std; 17 | 18 | 19 | /// \brief 实现了网络的组件,例如由几个卷积层几个全链接层构成 20 | /// 21 | template 22 | class ModelComponent { 23 | 24 | template 25 | friend class TrainModel; 26 | 27 | template 28 | friend class TrainClassification; 29 | 30 | private: 31 | 32 | long long _num_train; ///>model的参数 33 | int _num_valid; 34 | int _minibatch_size; 35 | int _num_train_batch; 36 | int _num_valid_batch; 37 | int _num_epoch; 38 | int _num_layers; 39 | int _num_need_train_layers; 40 | int _img_height; 41 | int _img_width; 42 | int _img_channel; 43 | int _one_img_len; ///>输入的一张图片的长度 44 | 45 | vector< Layer* > _layers; ///>保存每个层的指针 46 | vector< Layer* > _layers_needed_train; 47 | vector _layers_param; ///>保存每一层的参数 48 | vector _layers_need_train_param; 49 | vector _w_len; ///>需要训练的层的权重长度,用来进程间传递数据 50 | vector _bias_len; 51 | vector _w_init_gauss; 52 | vector< Matrix* > _w; ///>保存需要训练层的权重指针 53 | vector< Matrix* > _bias; 54 | 55 | vector< Matrix* > _y; 56 | vector< Matrix* > _dE_dy; 57 | vector< Matrix* > _y_needed_train; 58 | 59 | Matrix* _mini_data; ///> 保存像素值 60 | Matrix* _mini_label; ///> 保存物体分类的类别 61 | 62 | map _string_map_layertype; 63 | map _string_map_pooltype; 64 | 65 | public: 66 | 67 | ModelComponent(); 68 | ~ModelComponent() {} 69 | 70 | 71 | void setImgHeight(const int img_height){ 72 | _img_height = img_height; 73 | } 74 | void setImgWidth(const int img_width){ 75 | _img_width = img_width; 76 | } 77 | void setImgChannel(const int img_channel){ 78 | _img_channel = img_channel; 79 | } 80 | void setOneImgLen(const int one_img_len){ 81 | _one_img_len = one_img_len; 82 | } 83 | void setNumLayers(const int num_layers){ 84 | _num_layers = num_layers; 85 | } 86 | void setNumNeedTrainLayers(const int num_need_train_layers){ 87 | _num_need_train_layers= num_need_train_layers; 88 | } 89 | void setNumTrain(const long long num_train){ 90 | _num_train = num_train; 91 | } 92 | void setNumValid(const int num_valid){ 93 | _num_valid = num_valid; 94 | } 95 | void setMinibatchSize(const int minibatch_size){ 96 | _minibatch_size = minibatch_size; 97 | } 98 | void setNumTrainBatch(){ 99 | _num_train_batch = _num_train / _minibatch_size; 100 | } 101 | void setNumValidBatch(){ 102 | _num_valid_batch = _num_valid / _minibatch_size; 103 | } 104 | void setEpoch(const int num_epoch){ 105 | _num_epoch = num_epoch; 106 | } 107 | void setLayers(Layer* layer){ 108 | _layers.push_back(layer); 109 | } 110 | void setNeedTrainLayers(Layer* need_train_layer){ 111 | _layers.push_back(need_train_layer); 112 | } 113 | void setLayersParam(Param* param){ 114 | _layers_param.push_back(param); 115 | } 116 | void setNeedTrainLayersParam(Param* param){ 117 | _layers_need_train_param.push_back(param); 118 | } 119 | void setWLen(int w_len){ 120 | _w_len.push_back(w_len); 121 | } 122 | void setBiasLen(int bias_len){ 123 | _bias_len.push_back(bias_len); 124 | } 125 | void setW(Matrix *w){ 126 | _w.push_back(w); 127 | } 128 | void setBias(Matrix *bias){ 129 | _bias.push_back(bias); 130 | } 131 | void setY(Matrix *y){ 132 | _y.push_back(y); 133 | } 134 | void setDEDY(Matrix *dE_dy) { 135 | _dE_dy.push_back(dE_dy); 136 | } 137 | 138 | int getImgHeight(){ 139 | return _img_height; 140 | } 141 | int getImgWidth(){ 142 | return _img_width; 143 | } 144 | int getImgChannel(){ 145 | return _img_channel; 146 | } 147 | int getOneImgLen(){ 148 | return _one_img_len; 149 | } 150 | int getNumLayers(){ 151 | return _num_layers; 152 | } 153 | int getNumNeedTrainLayers(){ 154 | return _num_need_train_layers; 155 | } 156 | long long getNumTrain(){ 157 | return _num_train; 158 | } 159 | int getNumValid(){ 160 | return _num_valid; 161 | } 162 | int getMinibatchSize(){ 163 | return _minibatch_size; 164 | } 165 | int getNumTrainBatch(){ 166 | return _num_train_batch; 167 | } 168 | int getNumValidBatch(){ 169 | return _num_valid_batch; 170 | } 171 | int getNumEpoch(){ 172 | return _num_epoch; 173 | } 174 | vector< Layer* > getLayers(){ 175 | return _layers; 176 | } 177 | vector< Layer* > getNeedTrainLayers(){ 178 | return _layers_needed_train; 179 | } 180 | vector getLayersParam(){ 181 | return _layers_param; 182 | } 183 | vector getNeedTrainLayersParam(){ 184 | return _layers_need_train_param; 185 | } 186 | vector getWLen(){ 187 | return _w_len; 188 | } 189 | vector getBiasLen(){ 190 | return _bias_len; 191 | } 192 | vector< Matrix* > getW(){ 193 | return _w; 194 | } 195 | vector< Matrix* > getBias(){ 196 | return _bias; 197 | } 198 | vector< Matrix* > getY(){ 199 | return _y; 200 | } 201 | vector< Matrix* > getDEDY(){ 202 | return _dE_dy; 203 | } 204 | 205 | }; 206 | 207 | #include "../src/model_component.cpp" 208 | 209 | #endif 210 | -------------------------------------------------------------------------------- /dl/include/param.h: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file param.h 3 | /// 4 | #ifndef PARAM_H_ 5 | #define PARAM_H_ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | #define MAX_THREAD_SIZE 32 14 | #define MAX_NUM_KERNEL 4096 15 | #define MAX_NUM_THREAD 1024 16 | 17 | typedef enum PARAM_CONNECT_TYPE { 18 | PARAM_CONNECT_TYPE_LOCAL = 0, 19 | PARAM_CONNECT_TYPE_FULL = 1 20 | } ConnectType; 21 | 22 | typedef enum POOLING_TYPE { 23 | MAX_POOLING = 0, 24 | AVG_POOLING = 1 25 | } PoolingType; 26 | 27 | typedef enum PARAM_TRAIN_TYPE { 28 | NOTNEED = 0, 29 | NEED = 1 30 | } ParamTrainType; 31 | 32 | typedef enum LAYER_TYPE { 33 | CONVOLUTION = 0, 34 | POOLING = 1, 35 | SIGMOID = 2, 36 | RECTIFIED = 3, 37 | INNERPRODUCT = 4, 38 | SOFTMAX = 5, 39 | DROPOUT = 6, 40 | PREDICTOBJECT = 7, 41 | RECOMMENDSUBSTITUE = 8, 42 | RECOMMENDCOMPATIBLE = 9 43 | } LayerType; 44 | 45 | /// \brief 实现了每一层的参数 46 | /// 47 | class Param { 48 | 49 | public: 50 | Param() { } 51 | 52 | virtual ~Param() { } 53 | 54 | Param(string name, LayerType layer_type) : \ 55 | _name(name), _layer_type(layer_type), \ 56 | _param_train_type(NOTNEED){} 57 | 58 | virtual int getNumOut() {return 0;} 59 | virtual int getOutChannel() {return 0;} 60 | virtual int getOutWidth() {return 0;} 61 | virtual int getOutHeight() {return 0;} 62 | 63 | inline int getMinibatchSize() { 64 | return _minibatch_size; 65 | } 66 | inline string getName(){ 67 | return _name; 68 | } 69 | inline ConnectType getConnectType() { 70 | return type; 71 | } 72 | ParamTrainType getParamTrainType(){ 73 | return _param_train_type; 74 | } 75 | LayerType getLayerType(){ 76 | return _layer_type; 77 | } 78 | virtual void printParam(){ 79 | cout << "\n============"<< _name << "============" \ 80 | << "\nlayer_type: " << _layer_type; 81 | } 82 | static void setMinibatchSize(const int minibatch_size){ 83 | _minibatch_size = minibatch_size; 84 | } 85 | 86 | protected: 87 | string _name; ///> 实例化每一层的名字,用来区分不同的层 88 | static int _minibatch_size; 89 | ConnectType type; 90 | ParamTrainType _param_train_type; 91 | LayerType _layer_type; 92 | }; 93 | 94 | /// \brief 实现了需要训练的层参数,主要为了改变权重和调节学习率 95 | class TrainParam : public virtual Param { 96 | public: 97 | TrainParam() { } 98 | 99 | virtual ~TrainParam() { } 100 | 101 | TrainParam(const float w_lr, const float b_lr, \ 102 | const float momentum, const float weight_decay, \ 103 | const float w_gauss) \ 104 | : _w_lr(w_lr), _b_lr(b_lr), _momentum(momentum), \ 105 | _weight_decay(w_lr*weight_decay), _w_gauss(w_gauss){ 106 | this->_param_train_type = NEED; 107 | } 108 | 109 | inline void lrMultiScale(float lr_scale) { 110 | _w_lr *= lr_scale; 111 | _b_lr *= lr_scale; 112 | cout << _w_lr << ":" << _b_lr << endl; 113 | } 114 | inline void lrChangeTo(float new_w, float new_b) { 115 | _w_lr = new_w; 116 | _b_lr = new_b; 117 | } 118 | inline float getWLR() { 119 | return _w_lr; 120 | } 121 | inline float getBiasLR() { 122 | return _b_lr; 123 | } 124 | inline float getMomentum() { 125 | return _momentum; 126 | } 127 | inline float getWeightDecay() { 128 | return _weight_decay; 129 | } 130 | float getWGauss() { 131 | return _w_gauss; 132 | } 133 | void printParam(){ 134 | cout << "\nw_lr: " << _w_lr \ 135 | << "\nb_lr: " << _b_lr \ 136 | << "\nmomentum: " << _momentum \ 137 | << "\nweight_decay: " << _weight_decay \ 138 | << "\nw_gauss: " << _w_gauss ; 139 | } 140 | 141 | protected: 142 | float _w_lr; 143 | float _b_lr; 144 | float _momentum; 145 | float _weight_decay; 146 | float _w_gauss; 147 | 148 | }; 149 | 150 | /// \brief 局部连接层的参数,以图片形式保存数据 151 | class LocalConnectParam : public virtual Param { 152 | public: 153 | 154 | LocalConnectParam() { } 155 | 156 | virtual ~LocalConnectParam() { } 157 | 158 | LocalConnectParam(LayerType layer_type, string name, const int in_height, \ 159 | const int in_width, const int pad_height, const int pad_width, \ 160 | const int stride_height, const int stride_width, \ 161 | const int in_channel, \ 162 | const int filter_height, const int filter_width, const int out_channel) \ 163 | : _in_height(in_height), _in_width(in_width), _stride_height(stride_height), \ 164 | _stride_width(stride_width), _in_channel(in_channel), \ 165 | _pad_height(pad_height), _pad_width(pad_width), \ 166 | _filter_height(filter_height), _filter_width(filter_width), \ 167 | _out_channel(out_channel){ 168 | 169 | this->_layer_type = layer_type; 170 | this->_name = name; 171 | this->type = PARAM_CONNECT_TYPE_LOCAL; 172 | _padded_in_height = in_height + 2 * pad_height; 173 | _padded_in_width = in_width + 2 * pad_width; 174 | _out_height = ceil(((_padded_in_height - filter_height)*1.0f) / stride_height) + 1; 175 | _out_width = ceil(((_padded_in_width - filter_width)*1.0f) / stride_width) + 1; 176 | _box_num_height = ceil((this->getOutHeight() - MAX_THREAD_SIZE) \ 177 | * 1.0f / MAX_THREAD_SIZE) + 1; 178 | _box_num_width = ceil((this->getOutWidth() - MAX_THREAD_SIZE) \ 179 | * 1.0f / MAX_THREAD_SIZE) + 1; 180 | _box_out_height = MAX_THREAD_SIZE > _out_height \ 181 | ? _out_height : MAX_THREAD_SIZE; 182 | _box_out_width = MAX_THREAD_SIZE > _out_width \ 183 | ? _out_width : MAX_THREAD_SIZE; 184 | _box_in_height = (_box_out_height - 1) * stride_height + filter_height; 185 | _box_in_width = (_box_out_width - 1) * stride_width + filter_width; 186 | 187 | int pow2Length = _out_height; 188 | if(pow2Length & (pow2Length - 1)){ 189 | while(pow2Length & (pow2Length - 1)){ 190 | pow2Length &= pow2Length - 1; 191 | } 192 | pow2Length *= 2; 193 | } 194 | _thread_height = pow2Length > MAX_THREAD_SIZE \ 195 | ? MAX_THREAD_SIZE : pow2Length; 196 | 197 | pow2Length = _out_width; 198 | if(pow2Length & (pow2Length - 1)){ 199 | while(pow2Length & (pow2Length - 1)){ 200 | pow2Length &= pow2Length - 1; 201 | } 202 | pow2Length *= 2; 203 | } 204 | _thread_width = pow2Length > MAX_THREAD_SIZE \ 205 | ? MAX_THREAD_SIZE : pow2Length; 206 | 207 | _overlap_height = _filter_height - stride_height; 208 | _overlap_width = _filter_width - stride_width; 209 | 210 | } 211 | 212 | LocalConnectParam(LayerType layer_type, string name, \ 213 | const int pad_height, const int pad_width, \ 214 | const int stride_height, const int stride_width, \ 215 | const int filter_height, const int filter_width, const int filter_channel, \ 216 | LocalConnectParam* lc_par) \ 217 | : _in_height(lc_par->getOutHeight()), _in_width(lc_par->getOutWidth()), \ 218 | _stride_height(stride_height), _stride_width(stride_width), \ 219 | _in_channel(lc_par->getOutChannel()), _pad_height(pad_height), \ 220 | _filter_height(filter_height), _filter_width(filter_width) { 221 | 222 | this->_layer_type = layer_type; 223 | this->_name = name; 224 | if(filter_channel != 0) 225 | _out_channel = filter_channel; 226 | else 227 | _out_channel = _in_channel; 228 | 229 | this->type = PARAM_CONNECT_TYPE_LOCAL; 230 | 231 | _padded_in_height = _in_height + 2 * pad_height; 232 | _padded_in_width = _in_width + 2 * pad_height; 233 | _out_height = ceil(((_padded_in_height - filter_height)*1.0f) / stride_height) + 1; 234 | _out_width = ceil(((_padded_in_width - filter_width)*1.0f) / stride_width) + 1; 235 | _box_num_height = ceil((this->getOutHeight() - MAX_THREAD_SIZE) \ 236 | * 1.0f / MAX_THREAD_SIZE) + 1; 237 | _box_num_width = ceil((this->getOutWidth() - MAX_THREAD_SIZE) \ 238 | * 1.0f / MAX_THREAD_SIZE) + 1; 239 | 240 | _box_out_height = MAX_THREAD_SIZE > _out_height \ 241 | ? _out_height : MAX_THREAD_SIZE; 242 | _box_out_width = MAX_THREAD_SIZE > _out_width \ 243 | ? _out_width : MAX_THREAD_SIZE; 244 | 245 | _box_in_height = (_box_out_height - 1) * stride_height + filter_height; 246 | _box_in_width = (_box_out_width - 1) * stride_width + filter_width; 247 | 248 | int pow2Length = _out_height; 249 | if(pow2Length & (pow2Length - 1)){ 250 | while(pow2Length & (pow2Length - 1)){ 251 | pow2Length &= pow2Length - 1; 252 | } 253 | pow2Length *= 2; 254 | } 255 | _thread_height = pow2Length > MAX_THREAD_SIZE \ 256 | ? MAX_THREAD_SIZE : pow2Length; 257 | 258 | pow2Length = _out_width; 259 | if(pow2Length & (pow2Length - 1)){ 260 | while(pow2Length & (pow2Length - 1)){ 261 | pow2Length &= pow2Length - 1; 262 | } 263 | pow2Length *= 2; 264 | } 265 | _thread_width = pow2Length > MAX_THREAD_SIZE \ 266 | ? MAX_THREAD_SIZE : pow2Length; 267 | 268 | _overlap_height = _filter_height - stride_height; 269 | _overlap_width = _filter_width - stride_width; 270 | 271 | 272 | } 273 | 274 | inline int getInHeight() { 275 | return _in_height; 276 | } 277 | inline int getInWidth() { 278 | return _in_width; 279 | } 280 | inline int getInChannel() { 281 | return _in_channel; 282 | } 283 | inline int getOutHeight() { 284 | return _out_height; 285 | } 286 | inline int getOutWidth() { 287 | return _out_width; 288 | } 289 | inline int getFilterHeight() { 290 | return _filter_height; 291 | } 292 | inline int getFilterWidth() { 293 | return _filter_width; 294 | } 295 | inline int getOutChannel() { 296 | return _out_channel; 297 | } 298 | inline int getPaddedInHeight() { 299 | return _padded_in_height; 300 | } 301 | inline int getPaddedInWidth() { 302 | return _padded_in_width; 303 | } 304 | 305 | inline int getStrideHeight(){ 306 | return _stride_height; 307 | } 308 | inline int getStrideWidth(){ 309 | return _stride_width; 310 | } 311 | inline int getPadHeight(){ 312 | return _pad_height; 313 | } 314 | inline int getPadWidth(){ 315 | return _pad_width; 316 | } 317 | int getOverlapHeight(){ 318 | return _overlap_height; 319 | } 320 | int getOverlapWidth(){ 321 | return _overlap_width; 322 | } 323 | int getThreadHeight(){ 324 | return _thread_height; 325 | } 326 | int getThreadWidth(){ 327 | return _thread_width; 328 | } 329 | void printParam(){ 330 | Param::printParam(); 331 | cout << "\nin_height: " << _in_height \ 332 | << "\nin_width: " << _in_width \ 333 | << "\nin_channel: " << _in_channel \ 334 | << "\nfilter_height: " << _filter_height \ 335 | << "\nfilter_width: " << _filter_width \ 336 | << "\nfilter_channel: " << _out_channel \ 337 | << "\npad_height: " << _pad_height \ 338 | << "\npad_width: " << _pad_width \ 339 | << "\nstride_height: " << _stride_height \ 340 | << "\nstride_width: " << _stride_width; 341 | } 342 | inline int getBoxNumHeight(){ 343 | return _box_num_height; 344 | } 345 | inline int getBoxNumWidth(){ 346 | return _box_num_width; 347 | } 348 | inline int getBoxInHeight(){ 349 | return _box_in_height; 350 | } 351 | inline int getBoxInWidth(){ 352 | return _box_in_width; 353 | } 354 | inline int getBoxOutHeight(){ 355 | return _box_out_height; 356 | } 357 | inline int getBoxOutWidth(){ 358 | return _box_out_width; 359 | } 360 | 361 | private: 362 | int _in_height; 363 | int _in_width; 364 | int _pad_height; 365 | int _pad_width; 366 | int _padded_in_height; 367 | int _padded_in_width; 368 | int _stride_height; 369 | int _stride_width; 370 | int _in_channel; 371 | int _filter_height; ///>在卷积中是filter,在pooling中是pool 372 | int _filter_width; ///>在卷积中是filter,在pooling中是pool 373 | int _out_height; 374 | int _out_width; 375 | int _out_channel; 376 | int _box_in_height; ///>用来计算一个box输出的 377 | int _box_in_width; ///>用来计算一个box输出的卷积输入 378 | int _box_out_height; 379 | int _box_out_width; 380 | int _box_num_height; ///>总的box个数的行 381 | int _box_num_width; ///>总的box个数的列 382 | int _thread_height; 383 | int _thread_width; 384 | int _overlap_height; 385 | int _overlap_width; 386 | }; 387 | 388 | /// \brief 全连接层的参数,展开图片为一个矢量保存数据 389 | /// 390 | /// 可以针对每一个值做某种操作,例如Relu、sigmoid、tanh等, 391 | /// 此处不需要训练 392 | class FullConnectParam : public virtual Param { 393 | public: 394 | FullConnectParam() { } 395 | virtual ~FullConnectParam() { } 396 | FullConnectParam(LayerType layer_type, string name, \ 397 | const int num_in, const int num_out) \ 398 | : _num_in(num_in), _num_out(num_out) { 399 | this->_layer_type = layer_type; 400 | this->_name = name; 401 | this->type = PARAM_CONNECT_TYPE_FULL; 402 | } 403 | FullConnectParam(LayerType layer_type, string name, \ 404 | const int num_out, Param* par){ 405 | this->_layer_type = layer_type; 406 | this->_name = name; 407 | this->type = PARAM_CONNECT_TYPE_FULL; 408 | 409 | ///由传递进来的层类型决定计算方式 410 | ConnectType ct = par->getConnectType(); 411 | if(ct == PARAM_CONNECT_TYPE_LOCAL) 412 | _num_in = par->getOutHeight()*par->getOutWidth()*par->getOutChannel(); 413 | else if(ct == PARAM_CONNECT_TYPE_FULL) 414 | _num_in = par->getNumOut(); 415 | 416 | if(num_out != 0) 417 | _num_out = num_out; 418 | else 419 | _num_out = _num_in; 420 | } 421 | 422 | 423 | inline int getNumIn() { 424 | return _num_in; 425 | } 426 | 427 | inline int getNumOut() { 428 | return _num_out; 429 | } 430 | void printParam(){ 431 | Param::printParam(); 432 | cout << "\nnum_in: " << _num_in \ 433 | << "\nnum_out: " << _num_out; 434 | } 435 | 436 | private: 437 | int _num_in; 438 | int _num_out; 439 | }; 440 | 441 | class ConvParam : public TrainParam, public LocalConnectParam { 442 | public: 443 | ConvParam(){} 444 | 445 | ~ConvParam(){} 446 | 447 | ConvParam(const LayerType layer_type, const string name, \ 448 | const float w_lr, \ 449 | const float b_lr, const float momentum, \ 450 | const float weight_decay, const float w_gauss, \ 451 | const int in_height, const int in_width, \ 452 | const int pad_height, const int pad_width, \ 453 | const int stride_height, \ 454 | const int stride_width, const int in_channel, \ 455 | const int filter_height, const int filter_width, \ 456 | const int filter_channel) \ 457 | : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), \ 458 | LocalConnectParam(layer_type, name, in_height, in_width, \ 459 | pad_height, pad_width, stride_height, stride_width, \ 460 | in_channel, filter_height, \ 461 | filter_width, filter_channel) {} 462 | 463 | ConvParam(const LayerType layer_type, const string name, const float w_lr, \ 464 | const float b_lr, const float momentum, \ 465 | const float weight_decay, const float w_gauss, \ 466 | const int pad_height, const int pad_width, \ 467 | const int stride_height, const int stride_width, const int filter_height, \ 468 | const int filter_width, \ 469 | const int filter_channel, LocalConnectParam *lc_par) \ 470 | : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), \ 471 | LocalConnectParam(layer_type, name, pad_height, pad_width, stride_height, \ 472 | stride_width, \ 473 | filter_height, filter_width, filter_channel, lc_par) {} 474 | void printParam(){ 475 | LocalConnectParam::printParam(); 476 | TrainParam::printParam(); 477 | } 478 | }; 479 | 480 | class PoolParam : public LocalConnectParam { 481 | public: 482 | PoolParam() {} 483 | ~PoolParam() {} 484 | 485 | PoolParam(const LayerType layer_type, const string name, \ 486 | const int in_height, const int in_width, \ 487 | const int pad_height, const int pad_width, \ 488 | const int stride_height, const int stride_width, \ 489 | const int in_channel, const int filter_height, \ 490 | const int filter_width, \ 491 | const int filter_channel, PoolingType p_type) 492 | : LocalConnectParam(layer_type, name, in_height, in_width, \ 493 | pad_height, pad_width, stride_height, stride_width, \ 494 | in_channel, filter_height, \ 495 | filter_width, filter_channel) , \ 496 | _p_type(p_type) {} 497 | 498 | PoolParam(const LayerType layer_type, const string name, \ 499 | const int pad_height, const int pad_width, \ 500 | const int stride_height, const int stride_width, \ 501 | const int filter_height, const int filter_width, \ 502 | const int filter_channel, \ 503 | LocalConnectParam* lc_par, PoolingType p_type) 504 | : LocalConnectParam(layer_type, name, pad_height, \ 505 | pad_width, stride_height, \ 506 | stride_width, \ 507 | filter_height, filter_width, \ 508 | filter_channel, lc_par), _p_type(p_type){} 509 | 510 | inline PoolingType getPoolType(){ 511 | return _p_type; 512 | } 513 | void printParam(){ 514 | LocalConnectParam::printParam(); 515 | } 516 | 517 | 518 | private: 519 | PoolingType _p_type; 520 | }; 521 | 522 | 523 | /// \brief 可以进行训练的全连接层 524 | class InnerParam : public TrainParam, public FullConnectParam { 525 | public: 526 | InnerParam(){} 527 | 528 | ~InnerParam() {} 529 | 530 | InnerParam(const LayerType layer_type, const string name, \ 531 | const float w_lr, const float b_lr, const float momentum, \ 532 | const float weight_decay, const float w_gauss, \ 533 | const int num_in, const int num_out) \ 534 | : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), 535 | FullConnectParam(layer_type, name, num_in, num_out){} 536 | 537 | InnerParam(const LayerType layer_type, const string name, \ 538 | const float w_lr, const float b_lr, \ 539 | const float momentum, const float weight_decay, \ 540 | const float w_gauss, \ 541 | const int num_out, Param* par) \ 542 | : TrainParam(w_lr, b_lr, momentum, weight_decay, w_gauss), \ 543 | FullConnectParam(layer_type, name, num_out, par) {} 544 | void printParam(){ 545 | FullConnectParam::printParam(); 546 | TrainParam::printParam(); 547 | } 548 | }; 549 | 550 | #endif 551 | -------------------------------------------------------------------------------- /dl/include/pooling_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file pooling_layer.cuh 3 | /// @brief 实现了pooling 4 | 5 | #ifndef POOLING_LAYER_H_ 6 | #define POOLING_LAYER_H_ 7 | 8 | #include 9 | #include 10 | #include "layer.hpp" 11 | #include "layer_kernel.cuh" 12 | 13 | template 14 | class PoolingLayer : public Layer { 15 | 16 | public: 17 | PoolingLayer(PoolParam *lcp); 18 | 19 | ~PoolingLayer(); 20 | 21 | void initCuda(); 22 | 23 | void computeOutput(Matrix* x); 24 | 25 | void computeDerivsOfInput(Matrix* dE_dx); 26 | 27 | private: 28 | Matrix* _max_pos; 29 | PoolParam* _lcp; 30 | Matrix* unranged_dE_dx; 31 | int _num_box; 32 | }; 33 | 34 | #include "../src/pooling_layer.cu" 35 | 36 | #endif 37 | 38 | -------------------------------------------------------------------------------- /dl/include/relu_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file relu_layer.cuh 3 | /// @brief 实现了对输入每一个点求relu 4 | 5 | #ifndef RELU_LAYER_H_ 6 | #define RELU_LAYER_H_ 7 | 8 | #include 9 | #include "layer.hpp" 10 | 11 | template 12 | class ReluLayer : public Layer { 13 | 14 | public: 15 | 16 | ReluLayer(Param* fcp); 17 | ~ReluLayer(); 18 | 19 | void initCuda(); 20 | void computeOutput(Matrix* x); 21 | void computeDerivsOfInput(Matrix* dE_dx); 22 | 23 | private: 24 | Param* _p; 25 | Matrix *_record; 26 | }; 27 | 28 | 29 | #include "../src/relu_layer.cu" 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /dl/include/sigmoid_layer.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file sigmoid_layer.cuh 3 | /// @brief 实现了对输入每一个点求sigmoid 4 | 5 | #ifndef SIGMOID_LAYER_H_ 6 | #define SIGMOID_LAYER_H_ 7 | 8 | #include 9 | #include "layer.hpp" 10 | 11 | template 12 | class SigmoidLayer : public Layer { 13 | 14 | public: 15 | 16 | SigmoidLayer(Param* fcp); 17 | ~SigmoidLayer(); 18 | 19 | void initCuda(); 20 | void computeOutput(Matrix* x); 21 | void computeDerivsOfInput(Matrix* dE_dx); 22 | 23 | private: 24 | Param* _fcp; 25 | }; 26 | 27 | 28 | #include "../src/sigmoid_layer.cu" 29 | #endif 30 | -------------------------------------------------------------------------------- /dl/include/train_classification.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file train_classification.hpp 3 | /// \brief 4 | /// 5 | 6 | 7 | #ifndef TRAINCLASSIFICATION_H_ 8 | #define TRAINCLASSIFICATION_H_ 9 | 10 | #include "train_model.hpp" 11 | 12 | /// \brief 13 | /// 14 | template 15 | class TrainClassification : public TrainModel { 16 | private: 17 | 18 | public: 19 | TrainClassification(bool has_valid, bool is_test) \ 20 | : TrainModel(has_valid, is_test) {} 21 | ~TrainClassification() {} 22 | 23 | void createPixelAndLabel(); 24 | void parseImgBinary(string train_file, string valid_file); 25 | 26 | void forwardLastLayer(); 27 | void backwardLastLayer(); 28 | virtual void train(); 29 | virtual void test() {} 30 | 31 | }; 32 | 33 | #include "../src/train_classification.cpp" 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /dl/include/train_model.hpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file train_model.hpp 3 | /// \brief 4 | /// 5 | 6 | 7 | #ifndef TRAINMODEL_H_ 8 | #define TRAINMODEL_H_ 9 | 10 | #include "model_component.hpp" 11 | #include "load_layer.hpp" 12 | 13 | using namespace std; 14 | 15 | /// \brief 16 | /// 17 | template 18 | class TrainModel { 19 | protected: 20 | ModelComponent *_model_component; 21 | LoadLayer *_load_layer; 22 | float _likelihood; ///>cost function的输出值 23 | int _error; ///>分类的error个数 24 | //early stopping 25 | float _min_likelihood; ///>early stopping所控制得到的最小cost 26 | vector _strip_likelihood; ///>用来控制early stopping 27 | int _min_epoch; 28 | int _min_error; 29 | int _num_strip; 30 | bool _is_stop; ///>训练是否由于early stopping而中断 31 | bool _has_valid; 32 | bool _is_test; 33 | int _num_data_type; //train是0,valid是1,test是2 34 | 35 | public: 36 | TrainModel(bool has_valid, bool is_test); 37 | virtual ~TrainModel(); 38 | 39 | void parseNetJson(string json_file); 40 | 41 | void createLayer(); 42 | void createYDEDY(); 43 | void createWBias(); 44 | 45 | void initWeightByRandom(); 46 | void initWeightByFile(vector w_file, vector bias_file); 47 | void forwardPropagate(); 48 | void backwardPropagate(); 49 | void computeAndUpdatePars(); 50 | 51 | virtual void forwardLastLayer() {} 52 | virtual void backwardLastLayer() {} 53 | 54 | virtual void train() {} 55 | 56 | //返回是true就停下,返回是false就继续执行 57 | void earlyStopping(int epoch_idx); 58 | 59 | }; 60 | 61 | #include "../src/train_model.cpp" 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /dl/include/utils.cuh: -------------------------------------------------------------------------------- 1 | 2 | #ifndef UTILS_H_ 3 | #define UTILS_H_ 4 | 5 | #include 6 | #include 7 | #include "matrix.hpp" 8 | #include 9 | 10 | using namespace std; 11 | 12 | void printTime(clock_t &t, string s); 13 | 14 | 15 | void initW(Matrix* nvMat); 16 | 17 | void gaussRand(Matrix* nvMat, float var = 1, \ 18 | float mean = 0); 19 | 20 | float gaussGen(float var, float mean); 21 | 22 | void gaussRand(float *w, int length, float var = 1, float mean = 0); 23 | 24 | void readData(Matrix* nvData, string filename, \ 25 | bool isData, int addZerosInFront = 0); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /dl/main_src/cifar_classify.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file conv3.cu 3 | /// 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "train_classification.hpp" 10 | #include "convnet.hpp" 11 | 12 | using namespace std; 13 | 14 | int Param::_minibatch_size = 0; 15 | 16 | int main(int argc, char** argv){ 17 | 18 | TrainClassification *cifar_model = new TrainClassification(true, false); 19 | 20 | cifar_model->parseNetJson("script/cifar10.json"); 21 | cout << "done1\n"; 22 | cifar_model->parseImgBinary("", ""); 23 | cifar_model->createLayer(); 24 | cifar_model->createWBias(); 25 | cifar_model->createPixelAndLabel(); 26 | cifar_model->createYDEDY(); 27 | cifar_model->initWeightByRandom(); 28 | cifar_model->train(); 29 | 30 | delete cifar_model; 31 | 32 | 33 | return 0; 34 | } 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /dl/script/.ropeproject/config.py: -------------------------------------------------------------------------------- 1 | # The default ``config.py`` 2 | 3 | 4 | def set_prefs(prefs): 5 | """This function is called before opening the project""" 6 | 7 | # Specify which files and folders to ignore in the project. 8 | # Changes to ignored resources are not added to the history and 9 | # VCSs. Also they are not returned in `Project.get_files()`. 10 | # Note that ``?`` and ``*`` match all characters but slashes. 11 | # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc' 12 | # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc' 13 | # '.svn': matches 'pkg/.svn' and all of its children 14 | # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o' 15 | # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o' 16 | prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject', 17 | '.hg', '.svn', '_svn', '.git'] 18 | 19 | # Specifies which files should be considered python files. It is 20 | # useful when you have scripts inside your project. Only files 21 | # ending with ``.py`` are considered to be python files by 22 | # default. 23 | #prefs['python_files'] = ['*.py'] 24 | 25 | # Custom source folders: By default rope searches the project 26 | # for finding source folders (folders that should be searched 27 | # for finding modules). You can add paths to that list. Note 28 | # that rope guesses project source folders correctly most of the 29 | # time; use this if you have any problems. 30 | # The folders should be relative to project root and use '/' for 31 | # separating folders regardless of the platform rope is running on. 32 | # 'src/my_source_folder' for instance. 33 | #prefs.add('source_folders', 'src') 34 | 35 | # You can extend python path for looking up modules 36 | #prefs.add('python_path', '~/python/') 37 | 38 | # Should rope save object information or not. 39 | prefs['save_objectdb'] = True 40 | prefs['compress_objectdb'] = False 41 | 42 | # If `True`, rope analyzes each module when it is being saved. 43 | prefs['automatic_soa'] = True 44 | # The depth of calls to follow in static object analysis 45 | prefs['soa_followed_calls'] = 0 46 | 47 | # If `False` when running modules or unit tests "dynamic object 48 | # analysis" is turned off. This makes them much faster. 49 | prefs['perform_doa'] = True 50 | 51 | # Rope can check the validity of its object DB when running. 52 | prefs['validate_objectdb'] = True 53 | 54 | # How many undos to hold? 55 | prefs['max_history_items'] = 32 56 | 57 | # Shows whether to save history across sessions. 58 | prefs['save_history'] = True 59 | prefs['compress_history'] = False 60 | 61 | # Set the number spaces used for indenting. According to 62 | # :PEP:`8`, it is best to use 4 spaces. Since most of rope's 63 | # unit-tests use 4 spaces it is more reliable, too. 64 | prefs['indent_size'] = 4 65 | 66 | # Builtin and c-extension modules that are allowed to be imported 67 | # and inspected by rope. 68 | prefs['extension_modules'] = [] 69 | 70 | # Add all standard c-extensions to extension_modules list. 71 | prefs['import_dynload_stdmods'] = True 72 | 73 | # If `True` modules with syntax errors are considered to be empty. 74 | # The default value is `False`; When `False` syntax errors raise 75 | # `rope.base.exceptions.ModuleSyntaxError` exception. 76 | prefs['ignore_syntax_errors'] = False 77 | 78 | # If `True`, rope ignores unresolvable imports. Otherwise, they 79 | # appear in the importing namespace. 80 | prefs['ignore_bad_imports'] = False 81 | 82 | 83 | def project_opened(project): 84 | """This function is called after opening the project""" 85 | # Do whatever you like here! 86 | -------------------------------------------------------------------------------- /dl/script/.ropeproject/globalnames: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/globalnames -------------------------------------------------------------------------------- /dl/script/.ropeproject/history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/history -------------------------------------------------------------------------------- /dl/script/.ropeproject/objectdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/dl/script/.ropeproject/objectdb -------------------------------------------------------------------------------- /dl/script/cifar10.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CIFAR10net", 3 | "minibatch_size": 100, 4 | "num_epoch": 300, 5 | "img_height": 32, 6 | "img_width": 32, 7 | "img_channel": 3, 8 | "layer": 9 | [ 10 | { 11 | "type": "CONVOLUTION", 12 | "name": "conv1", 13 | "pad_height": 2, 14 | "pad_width": 2, 15 | "stride_height": 1, 16 | "stride_width": 1, 17 | "filter_height": 5, 18 | "filter_width": 5, 19 | "filter_channel": 64, 20 | "w_lr": 0.001, 21 | "bias_lr": 0.002, 22 | "momentum": 0.9, 23 | "weight_decay": 0, 24 | "w_gauss": 0.001 25 | }, 26 | { 27 | "type": "RECTIFIED", 28 | "name": "relu1" 29 | }, 30 | { 31 | "type": "POOLING", 32 | "name": "pool1", 33 | "pool_type": "MAX_POOLING", 34 | "pad_height": 0, 35 | "pad_width": 0, 36 | "stride_height": 2, 37 | "stride_width": 2, 38 | "filter_height": 3, 39 | "filter_width": 3 40 | }, 41 | { 42 | "type": "CONVOLUTION", 43 | "name": "conv2", 44 | "pad_height": 2, 45 | "pad_width": 2, 46 | "stride_height": 1, 47 | "stride_width": 1, 48 | "filter_height": 5, 49 | "filter_width": 5, 50 | "filter_channel": 32, 51 | "w_lr": 0.001, 52 | "bias_lr": 0.002, 53 | "momentum": 0.9, 54 | "weight_decay": 0, 55 | "w_gauss": 0.01 56 | }, 57 | { 58 | "type": "RECTIFIED", 59 | "name": "relu2" 60 | }, 61 | { 62 | "type": "POOLING", 63 | "name": "pool2", 64 | "pool_type": "AVG_POOLING", 65 | "pad_height": 0, 66 | "pad_width": 0, 67 | "stride_height": 2, 68 | "stride_width": 2, 69 | "filter_height": 3, 70 | "filter_width": 3 71 | }, 72 | { 73 | "type": "CONVOLUTION", 74 | "name": "conv3", 75 | "pad_height": 2, 76 | "pad_width": 2, 77 | "stride_height": 1, 78 | "stride_width": 1, 79 | "filter_height": 5, 80 | "filter_width": 5, 81 | "filter_channel": 64, 82 | "w_lr": 0.001, 83 | "bias_lr": 0.002, 84 | "momentum": 0.9, 85 | "weight_decay": 0, 86 | "w_gauss": 0.01 87 | }, 88 | { 89 | "type": "RECTIFIED", 90 | "name": "relu3" 91 | }, 92 | { 93 | "type": "POOLING", 94 | "name": "pool3", 95 | "pool_type": "AVG_POOLING", 96 | "pad_height": 0, 97 | "pad_width": 0, 98 | "stride_height": 2, 99 | "stride_width": 2, 100 | "filter_height": 3, 101 | "filter_width": 3 102 | }, 103 | { 104 | "type": "INNERPRODUCT", 105 | "name": "inner1", 106 | "num_out": 64, 107 | "w_lr": 0.001, 108 | "bias_lr": 0.002, 109 | "momentum": 0.9, 110 | "weight_decay": 0, 111 | "w_gauss": 0.1 112 | }, 113 | { 114 | "type": "INNERPRODUCT", 115 | "name": "inner2", 116 | "num_out": 10, 117 | "w_lr": 0.001, 118 | "bias_lr": 0.002, 119 | "momentum": 0.9, 120 | "weight_decay": 0, 121 | "w_gauss": 0.1 122 | }, 123 | { 124 | "type": "SOFTMAX", 125 | "name": "softmax" 126 | } 127 | 128 | ] 129 | } 130 | 131 | 132 | -------------------------------------------------------------------------------- /dl/src/convnet.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file convnet.cu 3 | /// @brief 4 | 5 | 6 | #include 7 | 8 | #include "convnet.hpp" 9 | #include "layer_kernel.cuh" 10 | 11 | using namespace std; 12 | 13 | template 14 | ConvNet::ConvNet(ConvParam* cp) : TrainLayer(cp){ 15 | 16 | this->_cp = cp; 17 | this->_filt_pixs = this->_cp->getFilterHeight()*_cp->getFilterWidth(); 18 | this->_conv_pixs = this->_cp->getOutHeight()*_cp->getOutWidth(); 19 | this->_padded_in_pixs = this->_cp->getPaddedInHeight()*cp->getPaddedInWidth(); 20 | this->_in_pixs = this->_cp->getInHeight()*_cp->getInWidth(); 21 | this->_box_in_pixs = this->_cp->getBoxInHeight()*_cp->getBoxInWidth(); 22 | cublasCreate(&this->handle); 23 | 24 | _num_box = _cp->getBoxNumHeight()*_cp->getBoxNumWidth(); 25 | } 26 | 27 | template 28 | ConvNet::~ConvNet() { 29 | 30 | delete this->_w; 31 | delete this->_w_inc; 32 | delete this->_bias; 33 | delete this->_bias_inc; 34 | 35 | delete this->_y; 36 | delete this->_dE_dy; 37 | delete this->_dE_dw; 38 | delete this->_dE_db; 39 | 40 | delete unfold_x; 41 | delete dE_db_tmp; 42 | if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0) 43 | delete padded_x; 44 | if((_cp->getOutHeight() > MAX_THREAD_SIZE \ 45 | || _cp->getOutWidth() > MAX_THREAD_SIZE) \ 46 | && (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)) 47 | delete unranged_dE_dx; 48 | if(_cp->getOutHeight() > MAX_THREAD_SIZE || _cp->getOutWidth() > MAX_THREAD_SIZE){ 49 | delete unranged_dE_dw; 50 | delete unfold_dE_db_tmp; 51 | } 52 | 53 | cublasDestroy(this->handle); 54 | 55 | } 56 | 57 | template 58 | void ConvNet::initCuda() { 59 | 60 | this->_w = new Matrix(_filt_pixs \ 61 | * this->_cp->getInChannel(), \ 62 | this->_cp->getOutChannel()); 63 | this->_bias = new Matrix(1, this->_cp->getOutChannel()); 64 | this->_y = new Matrix(this->_cp->getMinibatchSize(), \ 65 | this->_cp->getOutChannel() * _conv_pixs); 66 | this->_dE_dy = new Matrix(this->_y); 67 | 68 | this->_dE_dw = new Matrix(this->_w); 69 | this->_dE_db = new Matrix(this->_bias); 70 | 71 | this->_w_inc = new Matrix(this->_w); 72 | this->_bias_inc = new Matrix(this->_bias); 73 | 74 | if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0) 75 | this->padded_x = new Matrix(this->_cp->getMinibatchSize(), \ 76 | this->_cp->getInChannel() * _padded_in_pixs); 77 | unfold_x = new Matrix(this->_cp->getMinibatchSize(), \ 78 | this->_cp->getInChannel() * _padded_in_pixs); 79 | 80 | if((_cp->getOutHeight() > MAX_THREAD_SIZE \ 81 | || _cp->getOutWidth() > MAX_THREAD_SIZE) \ 82 | && (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){ 83 | unranged_dE_dx = new Matrix(_cp->getMinibatchSize(), \ 84 | _box_in_pixs*_num_box*_cp->getOutChannel()); 85 | } 86 | unranged_dE_dw = new Matrix(_cp->getMinibatchSize(), \ 87 | _filt_pixs*_cp->getInChannel()* \ 88 | _num_box*_cp->getOutChannel()); 89 | 90 | if(_cp->getOutHeight() > MAX_THREAD_SIZE \ 91 | || _cp->getOutWidth() > MAX_THREAD_SIZE) { 92 | unfold_dE_db_tmp = new Matrix(this->_cp->getMinibatchSize(), \ 93 | this->_cp->getOutChannel()*_num_box); 94 | } 95 | 96 | dE_db_tmp = new Matrix(this->_cp->getMinibatchSize(), \ 97 | this->_cp->getOutChannel()); 98 | 99 | this->_w_inc->zeros(); 100 | this->_bias_inc->zeros(); 101 | } 102 | 103 | template 104 | void ConvNet::computeOutput(Matrix* x){ 105 | 106 | this->_y->zeros(); 107 | 108 | int num_kernel; 109 | int num_block; 110 | 111 | if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){ 112 | num_kernel = this->_cp->getMinibatchSize() * _in_pixs \ 113 | * this->_cp->getInChannel(); 114 | num_block = MAX_NUM_KERNEL < (num_kernel / MAX_NUM_THREAD + 1) \ 115 | ? MAX_NUM_KERNEL : (num_kernel / MAX_NUM_THREAD + 1); 116 | padded_x->zeros(); 117 | ori_to_padding<<>>(x->getDevData(), \ 118 | padded_x->getDevData(), num_kernel, this->_cp->getInHeight(), \ 119 | _cp->getInWidth(), _cp->getPaddedInHeight(), \ 120 | _cp->getPaddedInWidth(), _cp->getInChannel()); 121 | cudaDeviceSynchronize(); 122 | cudaCheckError(); 123 | }else 124 | padded_x = x; 125 | 126 | dim3 blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel()*_num_box); 127 | dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight()); 128 | 129 | 130 | forward_convolution<<getInChannel()*_filt_pixs + _box_in_pixs)>>>(\ 132 | padded_x->getDevData(), this->_w->getDevData(), \ 133 | this->_bias->getDevData(), this->_y->getDevData(), \ 134 | _cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \ 135 | _cp->getInChannel(), _cp->getOutHeight(), \ 136 | _cp->getOutWidth(), _cp->getFilterHeight(), \ 137 | _cp->getFilterWidth(), _cp->getOutChannel(), \ 138 | _cp->getStrideHeight(), _cp->getStrideWidth(), \ 139 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \ 140 | _cp->getBoxInHeight(), _cp->getBoxInWidth(), \ 141 | _cp->getBoxOutHeight(), _cp->getBoxOutWidth()); 142 | cudaDeviceSynchronize(); 143 | cudaCheckError(); 144 | } 145 | 146 | template 147 | void ConvNet::computeDerivsOfPars(Matrix* x){ 148 | 149 | dim3 blocks = dim3(_cp->getMinibatchSize() \ 150 | , _num_box \ 151 | *_cp->getFilterHeight()*_cp->getFilterWidth()); 152 | 153 | dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight()); 154 | 155 | unranged_dE_dw->zeros(); 156 | 157 | Dtype *dE_db_multi_channel; 158 | if(_cp->getOutHeight() > MAX_THREAD_SIZE \ 159 | || _cp->getOutWidth() > MAX_THREAD_SIZE) { 160 | unfold_dE_db_tmp->zeros(); 161 | dE_db_multi_channel = unfold_dE_db_tmp->getDevData(); 162 | 163 | }else{ 164 | dE_db_tmp->zeros(); 165 | dE_db_multi_channel = dE_db_tmp->getDevData(); 166 | 167 | } 168 | 169 | compute_convolution_derivs<<getBoxOutHeight()*_cp->getBoxOutWidth())>>>( \ 171 | this->_dE_dy->getDevData(), padded_x->getDevData(), \ 172 | unranged_dE_dw->getDevData(), \ 173 | _cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \ 174 | _cp->getOutChannel(), _cp->getInChannel(), \ 175 | _cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \ 176 | _cp->getOutHeight(), _cp->getOutWidth(), \ 177 | _cp->getFilterHeight(), _cp->getFilterWidth(), \ 178 | _cp->getStrideHeight(), _cp->getStrideWidth(), \ 179 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth()); 180 | 181 | cudaDeviceSynchronize(); 182 | cudaCheckError(); 183 | 184 | blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel()*_num_box); 185 | compute_derivs_of_bias<<getBoxOutHeight()*_cp->getBoxOutWidth()>>>( \ 187 | this->_dE_dy->getDevData(), dE_db_multi_channel, \ 188 | _cp->getOutHeight(), _cp->getOutWidth(), \ 189 | _cp->getOutChannel(), \ 190 | _cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \ 191 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth()); 192 | 193 | cudaDeviceSynchronize(); 194 | cudaCheckError(); 195 | 196 | blocks = dim3(1, _cp->getInChannel()*_cp->getOutChannel()); 197 | compact_dervis_w<<>>( \ 198 | unranged_dE_dw->getDevData(), this->_dE_dw->getDevData(), \ 199 | _cp->getFilterHeight(), _cp->getFilterWidth(), \ 200 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \ 201 | _cp->getMinibatchSize(), _cp->getInChannel(), _cp->getOutChannel()); 202 | cudaDeviceSynchronize(); 203 | cudaCheckError(); 204 | if(_cp->getOutHeight() > MAX_THREAD_SIZE \ 205 | || _cp->getOutWidth() > MAX_THREAD_SIZE) { 206 | blocks = dim3(_cp->getMinibatchSize(), _cp->getOutChannel()); 207 | compute_derivs_of_bias<<>>( \ 208 | unfold_dE_db_tmp->getDevData(), dE_db_tmp->getDevData(), \ 209 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth(), \ 210 | _cp->getOutChannel(), _cp->getBoxNumHeight(), \ 211 | _cp->getBoxNumWidth(), 1, 1); 212 | } 213 | cudaDeviceSynchronize(); 214 | cudaCheckError(); 215 | 216 | dE_db_tmp->sumRow(this->_dE_db); 217 | 218 | } 219 | 220 | template 221 | void ConvNet::computeDerivsOfInput(Matrix* dE_dx){ 222 | 223 | 224 | dim3 blocks = dim3(_cp->getMinibatchSize(), _cp->getInChannel() * _num_box); 225 | dim3 threads = dim3(_cp->getThreadWidth(), _cp->getThreadHeight()); 226 | 227 | int box_in_height = MAX_THREAD_SIZE > _cp->getOutHeight() \ 228 | ? _cp->getPaddedInHeight() : _cp->getBoxInHeight(); 229 | int box_in_width = MAX_THREAD_SIZE > _cp->getOutWidth() \ 230 | ? _cp->getPaddedInWidth() : _cp->getBoxInWidth(); 231 | 232 | Dtype* p_dE_dx; 233 | if((_cp->getOutHeight() > MAX_THREAD_SIZE \ 234 | || _cp->getOutWidth() > MAX_THREAD_SIZE) \ 235 | && (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){ 236 | unranged_dE_dx->zeros(); 237 | p_dE_dx = unranged_dE_dx->getDevData(); 238 | 239 | }else if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){ 240 | unfold_x->zeros(); 241 | p_dE_dx = unfold_x->getDevData(); 242 | 243 | }else{ 244 | dE_dx->zeros(); 245 | p_dE_dx = dE_dx->getDevData(); 246 | 247 | } 248 | 249 | backward_convolution<<>>( \ 251 | this->_dE_dy->getDevData(), this->_w->getDevData(), \ 252 | p_dE_dx, box_in_height, box_in_width, \ 253 | _cp->getBoxOutHeight(), _cp->getBoxOutWidth(), \ 254 | _cp->getOutChannel(), _cp->getInChannel(), \ 255 | _cp->getOutHeight(), _cp->getOutWidth(), \ 256 | _cp->getFilterHeight(), _cp->getFilterWidth(), \ 257 | _cp->getStrideHeight(), _cp->getStrideWidth(), \ 258 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth()); 259 | cudaDeviceSynchronize(); 260 | cudaCheckError(); 261 | 262 | if((_cp->getOutHeight() > MAX_THREAD_SIZE \ 263 | || _cp->getOutWidth() > MAX_THREAD_SIZE) \ 264 | && (_cp->getOverlapHeight() > 0 || _cp->getOverlapWidth() > 0)){ 265 | 266 | if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){ 267 | unfold_x->zeros(); 268 | p_dE_dx = unfold_x->getDevData(); 269 | 270 | }else{ 271 | dE_dx->zeros(); 272 | p_dE_dx = dE_dx->getDevData(); 273 | 274 | } 275 | 276 | compactOverlap<<<_cp->getMinibatchSize(), _cp->getInChannel()>>>( \ 277 | unranged_dE_dx->getDevData(), p_dE_dx, \ 278 | _cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \ 279 | _cp->getInChannel(), _cp->getOverlapHeight(), _cp->getOverlapWidth(), \ 280 | box_in_height, box_in_width, \ 281 | _cp->getBoxNumHeight(), _cp->getBoxNumWidth()); 282 | cudaDeviceSynchronize(); 283 | cudaCheckError(); 284 | } 285 | 286 | 287 | if(_cp->getPadHeight() > 0 || _cp->getPadWidth() > 0){ 288 | int num_kernel = this->_cp->getMinibatchSize() * _in_pixs \ 289 | * this->_cp->getInChannel(); 290 | int num_block = MAX_NUM_KERNEL < (num_kernel / MAX_NUM_THREAD + 1) \ 291 | ? MAX_NUM_KERNEL : (num_kernel / MAX_NUM_THREAD + 1); 292 | pad_to_ori<<>>(dE_dx->getDevData(), \ 293 | p_dE_dx, num_kernel, _cp->getInHeight(), _cp->getInWidth(), \ 294 | _cp->getPaddedInHeight(), _cp->getPaddedInWidth(), \ 295 | _cp->getInChannel()); 296 | cudaDeviceSynchronize(); 297 | cudaCheckError(); 298 | 299 | } 300 | } 301 | -------------------------------------------------------------------------------- /dl/src/data.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file data.cu 3 | /// 4 | 5 | #include "data.hpp" 6 | 7 | using namespace std; 8 | 9 | 10 | template 11 | void Data::copyFromHost(Dtype* data_value_in, const int data_len){ 12 | cudaError_t status = cudaMemcpy(_data_value, data_value_in, \ 13 | sizeof(Dtype) * data_len, cudaMemcpyHostToDevice); 14 | if (status != cudaSuccess) { 15 | cout << stderr, "!!!! device access error (write)\n"; 16 | exit( EXIT_FAILURE ); 17 | } 18 | } 19 | 20 | template 21 | void Data::copyFromDevice(Data* data_in){ 22 | cudaError_t status = cudaMemcpy(_data_value, data_in->getDevData(), \ 23 | sizeof(Dtype) * _amount, cudaMemcpyDeviceToDevice); 24 | if (status != cudaSuccess) { 25 | cout << stderr, "!!!! device access error (write)\n"; 26 | exit( EXIT_FAILURE ); 27 | 28 | } 29 | } 30 | 31 | template 32 | void Data::copyToHost(Dtype* data_value_in, const int data_len){ 33 | // cout << sizeof(Dtype) << ":" << data_len << endl; 34 | cudaError_t status = cudaMemcpy(data_value_in, _data_value, \ 35 | sizeof(Dtype) * data_len, cudaMemcpyDeviceToHost); 36 | if (status != cudaSuccess) { 37 | cout << stderr, "!!!! device access error (write)\n"; 38 | exit( EXIT_FAILURE ); 39 | } 40 | } 41 | 42 | template 43 | void Data::copyToDevice(Data* data_in){ 44 | cudaError_t status = cudaMemcpy(data_in->getDevData(), _data_value, \ 45 | sizeof(Dtype) * _amount, cudaMemcpyDeviceToDevice); 46 | if (status != cudaSuccess) { 47 | cout << stderr, "!!!! device access error (write)\n"; 48 | exit( EXIT_FAILURE ); 49 | 50 | } 51 | } 52 | 53 | template 54 | void Data::zeros(){ 55 | cudaMemset(_data_value, 0, _amount * sizeof(Dtype)); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /dl/src/dropout_layer.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file dropout_layer.cu 3 | /// @brief 4 | 5 | 6 | using namespace std; 7 | 8 | template 9 | DropoutLayer::DropoutLayer(Param* p){ 10 | 11 | this->_p = p; 12 | } 13 | 14 | template 15 | DropoutLayer::~DropoutLayer() { 16 | delete this->_y; 17 | delete this->_dE_dy; 18 | delete _drop_record; 19 | delete _drop_rand_probs; 20 | 21 | } 22 | 23 | template 24 | void DropoutLayer::initCuda() { 25 | 26 | 27 | ConnectType ct = this->_p->getConnectType(); 28 | int col; 29 | if(ct == PARAM_CONNECT_TYPE_LOCAL) 30 | col = _p->getOutHeight()*_p->getOutWidth() \ 31 | * this->_p->getOutChannel(); 32 | else if(ct == PARAM_CONNECT_TYPE_FULL) 33 | col = this->_p->getNumOut(); 34 | 35 | this->_y = new Matrix(_p->getMinibatchSize(), col); 36 | this->_dE_dy = new Matrix(this->_y); 37 | _drop_record = new Matrix(_p->getMinibatchSize(), col); 38 | _drop_rand_probs = new Matrix(_p->getMinibatchSize(), col); 39 | _is_set_up = false; 40 | } 41 | 42 | template 43 | void DropoutLayer::computeOutput(Matrix* x){ 44 | 45 | x->applyDropout(this->_y, _drop_record, _drop_rand_probs, _is_set_up); 46 | 47 | if(_is_set_up == false) 48 | _is_set_up = true; 49 | 50 | 51 | } 52 | 53 | template 54 | void DropoutLayer::computeDerivsOfInput(Matrix* dE_dx){ 55 | 56 | this->_dE_dy->applyRelu(dE_dx, _drop_record, false); 57 | 58 | } 59 | 60 | 61 | -------------------------------------------------------------------------------- /dl/src/inner_product_layer.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file inner_product_layer.cu 3 | /// @brief 4 | 5 | #include "inner_product_layer.hpp" 6 | 7 | using namespace std; 8 | 9 | template 10 | InnerProductLayer::InnerProductLayer(InnerParam* fcp) : \ 11 | TrainLayer((TrainParam*)fcp){ 12 | this->_fcp = fcp; 13 | cublasCreate(&this->handle); 14 | } 15 | 16 | template 17 | InnerProductLayer::~InnerProductLayer() { 18 | 19 | delete this->_w; 20 | delete this->_w_inc; 21 | delete this->_bias; 22 | delete this->_bias_inc; 23 | 24 | delete this->_y; 25 | delete this->_dE_dy; 26 | delete this->_dE_db; 27 | delete this->_dE_dw; 28 | 29 | cublasDestroy(this->handle); 30 | } 31 | 32 | template 33 | void InnerProductLayer::initCuda() { 34 | 35 | this->_w = new Matrix(this->_fcp->getNumIn(), this->_fcp->getNumOut()); 36 | this->_bias = new Matrix(1, this->_fcp->getNumOut()); 37 | 38 | this->_y = new Matrix(this->_fcp->getMinibatchSize(), this->_fcp->getNumOut()); 39 | 40 | this->_dE_dy = new Matrix(this->_y); 41 | this->_dE_db = new Matrix(this->_bias); 42 | this->_dE_dw = new Matrix(this->_w); 43 | 44 | this->_w_inc = new Matrix(this->_w); 45 | this->_bias_inc = new Matrix(this->_bias); 46 | 47 | data_T = new Matrix(_fcp->getNumIn(), _fcp->getMinibatchSize()); 48 | w_T = new Matrix(this->_w->getNumCols(), this->_w->getNumRows()); 49 | 50 | this->_w_inc->zeros(); 51 | this->_bias_inc->zeros(); 52 | } 53 | 54 | template 55 | void InnerProductLayer::computeOutput(Matrix* x){ 56 | // x->showValue("data"); 57 | // this->_w->showValue("w"); 58 | 59 | // x->reValue(512); 60 | // this->_w->reValue(1.0f); 61 | 62 | x->rightMult(this->_w, 1, this->_y, this->handle); 63 | this->_y->addRowVector(this->_bias); 64 | // this->_y->showValue("yj1"); 65 | 66 | } 67 | 68 | 69 | template 70 | void InnerProductLayer::computeDerivsOfPars(Matrix* x){ 71 | 72 | 73 | // x->reValue(512); 74 | // this->_dE_dy->reValue(1.0f); 75 | 76 | x->getTranspose(data_T); 77 | 78 | data_T->rightMult(this->_dE_dy, 1, this->_dE_dw, this->handle); 79 | this->_dE_dy->sumRow(this->_dE_db); 80 | 81 | //this->_dE_dw->showValue("dedwinner"); 82 | //this->_dE_dy->showValue("innerdedy"); 83 | } 84 | 85 | template 86 | void InnerProductLayer::computeDerivsOfInput(Matrix* dE_dx){ 87 | 88 | // this->_w->reValue(1.0f); 89 | // this->_dE_dy->reValue(64); 90 | 91 | this->_w->getTranspose(w_T); 92 | this->_dE_dy->rightMult(w_T, 1, dE_dx, this->handle); 93 | //dE_dx->showValue("innerdedx"); 94 | 95 | 96 | } 97 | 98 | -------------------------------------------------------------------------------- /dl/src/load_layer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * filename: load_layer.cpp 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "load_layer.hpp" 10 | 11 | using namespace std; 12 | 13 | template 14 | void LoadLayer::meanOneImg(Dtype* pixel_ptr, int process_len){ 15 | Dtype avg = 0; 16 | for(int i = 0; i < process_len; i++){ 17 | avg += pixel_ptr[i]; 18 | } 19 | avg /= process_len; 20 | 21 | for(int i = 0; i < process_len; i++){ 22 | pixel_ptr[i] = pixel_ptr[i] - avg; 23 | } 24 | } 25 | 26 | template 27 | void LoadLayer::stdOneImg(Dtype* pixel_ptr, int process_len){ 28 | Dtype std = 0; 29 | for(int i = 0; i < process_len; i++){ 30 | std += pixel_ptr[i] * pixel_ptr[i]; 31 | } 32 | 33 | std /= process_len; 34 | std = sqrt(std); 35 | for(int i = 0; i < process_len; i++){ 36 | pixel_ptr[i] /= std; 37 | } 38 | } 39 | 40 | template 41 | LoadLayer::LoadLayer(const int num_train, const int num_valid, \ 42 | const int num_test, const int img_size, const int img_channel) \ 43 | : _num_train(num_train), _num_test(num_test), _num_valid(num_valid), \ 44 | _img_size(img_size), _img_channel(img_channel){ 45 | _img_sqrt = _img_size * _img_size; 46 | if (img_size > 0 && img_channel > 0) { 47 | if (num_train > 0) { 48 | _train_pixel = new Dtype[_num_train * _img_sqrt * _img_channel]; 49 | _train_label = new int[_num_train]; 50 | _train_pixel_ptr = _train_pixel; 51 | _train_label_ptr = _train_label; 52 | } 53 | if (num_valid > 0) { 54 | _valid_pixel = new Dtype[_num_valid * _img_sqrt * _img_channel]; 55 | _valid_label = new int[_num_valid]; 56 | _valid_pixel_ptr = _valid_pixel; 57 | _valid_label_ptr = _valid_label; 58 | } 59 | if (num_test > 0) { 60 | _test_pixel = new Dtype[_num_test * _img_sqrt * _img_channel]; 61 | _test_label = new int[_num_test]; 62 | _test_pixel_ptr = _test_pixel; 63 | _test_label_ptr = _test_label; 64 | } 65 | } 66 | _is_base_alloc = true; 67 | 68 | } 69 | 70 | template 71 | LoadLayer::~LoadLayer(){ 72 | if (_img_size > 0 && _img_channel > 0 && _is_base_alloc == true) { 73 | if (_num_train > 0) { 74 | delete[] _train_pixel; 75 | delete[] _train_label; 76 | } 77 | if (_num_valid > 0) { 78 | delete[] _valid_pixel; 79 | delete[] _valid_label; 80 | } 81 | if (_num_test > 0) { 82 | delete[] _test_pixel; 83 | delete[] _test_label; 84 | } 85 | } 86 | } 87 | 88 | template 89 | LoadCifar10::LoadCifar10(const int minibatch_size) : \ 90 | LoadLayer(50000, 10000, 0, 32, 3){ 91 | 92 | _minibatch_size = minibatch_size; 93 | 94 | for(int i = 1; i < 6; i++){ 95 | string s; 96 | stringstream ss; 97 | ss << i; 98 | ss >> s; 99 | string filename = "../../data/cifar-10-batches-bin/data_batch_"+s+".bin"; 100 | loadBinary(filename, this->_train_pixel_ptr, \ 101 | this->_train_label_ptr); 102 | } 103 | loadBinary("../../data/cifar-10-batches-bin/test_batch.bin", \ 104 | this->_valid_pixel_ptr, this->_valid_label_ptr); 105 | 106 | } 107 | 108 | template 109 | void LoadCifar10::loadTrainOneBatch(int batch_idx, \ 110 | Dtype* &mini_pixel, int* &mini_label){ 111 | mini_pixel = this->_train_pixel + batch_idx*_minibatch_size \ 112 | *this->_img_channel*this->_img_sqrt; 113 | mini_label = this->_train_label + batch_idx*_minibatch_size; 114 | } 115 | 116 | 117 | template 118 | void LoadCifar10::loadValidOneBatch(int batch_idx, \ 119 | Dtype* &mini_pixel, int* &mini_label){ 120 | mini_pixel = this->_valid_pixel + batch_idx*_minibatch_size \ 121 | *this->_img_channel*this->_img_sqrt; 122 | mini_label = this->_valid_label + batch_idx*_minibatch_size; 123 | } 124 | 125 | template 126 | void LoadCifar10::loadBinary(string filename, \ 127 | Dtype* &pixel_ptr, int* &label_ptr){ 128 | 129 | ifstream fin(filename.c_str(), ifstream::binary); 130 | if(!fin.is_open()){ 131 | cout << "open file failed\n"; 132 | exit(EXIT_FAILURE); 133 | } 134 | unsigned char tmp; 135 | char buf; 136 | fin.seekg(0, fin.end); 137 | int length = fin.tellg(); 138 | int num = length / (this->_img_sqrt * this->_img_channel + 1); 139 | //numebr of picture in this input file. 140 | fin.seekg(0, fin.beg); 141 | 142 | for(int i = 0; i < num; i++){ 143 | fin.read(&buf, 1); 144 | tmp = buf; 145 | label_ptr[0] = (int)tmp; 146 | for(int j = 0; j < this->_img_channel; j++){ 147 | for(int k = 0; k < this->_img_sqrt; k++){ 148 | fin.read(&buf, 1); 149 | tmp = buf; 150 | pixel_ptr[k] = (int)tmp; 151 | } 152 | this->meanOneImg(pixel_ptr, this->_img_sqrt); 153 | // this->stdOneImg(pixel_ptr, this->_img_sqrt); 154 | if(i != num - 1 || j != this->_img_channel - 1) 155 | pixel_ptr += this->_img_sqrt; 156 | 157 | } 158 | if(i != num - 1){ 159 | label_ptr++; 160 | } 161 | } 162 | fin.close(); 163 | } 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /dl/src/logistic.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file logistic.cu 3 | /// 4 | 5 | #include "logistic.hpp" 6 | 7 | using namespace std; 8 | 9 | template 10 | Logistic::Logistic(FullConnectParam* fcp) { 11 | this->_fcp = fcp; 12 | 13 | } 14 | 15 | template 16 | Logistic::~Logistic() { 17 | 18 | delete this->_y; 19 | delete[] h_labels; 20 | delete[] y_CPU; 21 | delete[] correct_probs; 22 | delete d_max_pos_of_out; 23 | delete[] h_max_pos_of_out; 24 | delete _d_record; 25 | delete[] _h_record; 26 | } 27 | 28 | template 29 | void Logistic::initCuda() { 30 | 31 | this->_y = new Matrix(this->_fcp->getMinibatchSize(), \ 32 | this->_fcp->getNumOut()); 33 | h_labels = new int[this->_fcp->getMinibatchSize()]; 34 | y_CPU = new Dtype[this->_y->getNumEles()]; 35 | correct_probs = new Dtype[this->_y->getNumRows()]; 36 | d_max_pos_of_out = new Matrix(this->_y->getNumRows(), 1); 37 | h_max_pos_of_out = new Dtype[this->_y->getNumRows()]; 38 | 39 | _d_record = new Matrix(this->_y->getNumCols(), this->_y->getNumCols()); 40 | _h_record = new int[this->_y->getNumCols() * this->_y->getNumCols()]; 41 | } 42 | 43 | template 44 | void Logistic::computeOutput(Matrix* x){ 45 | this->_y->zeros(); 46 | x->apply(Matrix::SOFTMAX, this->_y); 47 | } 48 | 49 | template 50 | double Logistic::computeError(Matrix* labels, int& num_error){ 51 | 52 | labels->copyToHost(h_labels, labels->getNumEles()); 53 | 54 | this->_y->copyToHost(y_CPU, this->_y->getNumEles()); 55 | 56 | /// 记录找打的最大位置上的likelihood 57 | /// 记录最大位置的下标 58 | this->_y->maxPosInRow(d_max_pos_of_out); 59 | 60 | 61 | d_max_pos_of_out->copyToHost(h_max_pos_of_out, this->_y->getNumRows()); 62 | 63 | for (int c = 0; c < this->_y->getNumRows(); c++) { 64 | int true_label = h_labels[c]; 65 | int predict_label = h_max_pos_of_out[c]; 66 | if(y_CPU[c*this->_y->getNumCols()+true_label] == 0) 67 | correct_probs[c] = -10000; 68 | else 69 | correct_probs[c] = log(y_CPU[c * this->_y->getNumCols() + true_label]); 70 | 71 | if(predict_label != true_label) 72 | num_error++; 73 | _h_record[predict_label * this->_y->getNumCols() + true_label]++ ; 74 | } 75 | double result = 0; 76 | for(int i = 0; i < labels->getNumEles(); i++){ 77 | result -= correct_probs[i]; 78 | } 79 | 80 | 81 | return result; 82 | } 83 | 84 | template 85 | void Logistic::computeDerivsOfInput(Matrix* dE_dx, Matrix* labels){ 86 | assert(labels->getNumRows() == dE_dx->getNumRows()); 87 | dE_dx->zeros(); 88 | 89 | const int num_thread = DIVUP(this->_fcp->getNumOut(), ADD_BLOCK_SIZE) * ADD_BLOCK_SIZE; 90 | compute_dE_dy<<_fcp->getMinibatchSize(), num_thread>>>(this->_y->getDevData(), \ 91 | labels->getDevData(), dE_dx->getDevData(), this->_fcp->getNumOut()); 92 | cudaThreadSynchronize(); 93 | cudaCheckError(); 94 | 95 | } 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /dl/src/matrix.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file matrix.cu 3 | /// \brief 矩阵类源文件 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "matrix.hpp" 11 | #include "matrix_kernel.hpp" 12 | 13 | using namespace std; 14 | 15 | template 16 | Matrix::Matrix(int num_row, int num_col){ 17 | _init(num_row, num_col); 18 | } 19 | 20 | template 21 | Matrix::Matrix(const Matrix* like, bool copy){ 22 | _init(like->getNumRows(), like->getNumCols()); 23 | if (copy) { 24 | copyFromDevice(like); 25 | } 26 | } 27 | 28 | template 29 | Matrix::Matrix(const Matrix* like) { 30 | _init(like->getNumRows(), like->getNumCols()); 31 | } 32 | 33 | template 34 | Matrix::~Matrix(){ 35 | if(this->_is_own_data && this->_amount > 0){ 36 | cudaFree(this->_data_value); 37 | } 38 | } 39 | 40 | template 41 | void Matrix::_init(int num_row, int num_col) { 42 | this->_shape.push_back(num_row); 43 | this->_shape.push_back(num_col); 44 | this->_amount = num_row * num_col; 45 | this->_is_own_data = true; 46 | if (this->_amount > 0) { 47 | cudaError_t status; 48 | status = cudaMalloc((void**) &this->_data_value, \ 49 | this->_amount * sizeof(Dtype)); 50 | /* 51 | else if(a == ALLOC_ON_UNIFIED_MEMORY){ 52 | status = cudaMallocManaged(&this->_data_value, \ 53 | this->_shape[0] * this->_shape[1] * sizeof(Dtype)); 54 | }*/ 55 | if (status != cudaSuccess) { 56 | fprintf(stderr, "!!!! device memory allocation error\n"); 57 | exit(EXIT_FAILURE); 58 | } 59 | } 60 | } 61 | 62 | 63 | template 64 | void Matrix::getTranspose(Matrix* target){ 65 | 66 | const int width = this->_shape[1]; 67 | const int height = this->_shape[0]; 68 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 69 | assert(num_blocks_x < NUM_BLOCKS_MAX); 70 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 71 | NUM_BLOCKS_MAX)); 72 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 73 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 74 | 75 | kTranspose<<>>(this->_data_value, \ 76 | target->getDevData(), width, height); 77 | cudaDeviceSynchronize(); 78 | cudaCheckError(); 79 | } 80 | 81 | template 82 | void Matrix::rightMult(Matrix* b, float scale_AB, \ 83 | Matrix *target, cublasHandle_t& handle) { 84 | 85 | clock_t t = clock(); 86 | 87 | int m = this->_shape[0]; 88 | int k = this->_shape[1]; 89 | int n = b->getNumCols(); 90 | float scale_tar = 0; 91 | assert(k == b->getNumRows()); 92 | //列主 93 | cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &scale_AB, \ 94 | b->getDevData(), n, this->_data_value, k, \ 95 | &scale_tar, target->getDevData(), n); 96 | } 97 | 98 | template 99 | void Matrix::addColVector(Matrix* vec){ 100 | addColVector(vec, 1, this); 101 | } 102 | 103 | template 104 | void Matrix::addColVector(Matrix* vec, float scaleVec, Matrix* target){ 105 | 106 | Matrix* ori_trans = new Matrix(this->_shape[1], this->_shape[0]); 107 | this->getTranspose(ori_trans); 108 | ori_trans->addRowVector(vec); 109 | ori_trans->getTranspose(target); 110 | delete ori_trans; 111 | } 112 | 113 | template 114 | void Matrix::addRowVector(Matrix* vec){ 115 | addRowVector(vec, 1, this); 116 | } 117 | 118 | template 119 | void Matrix::addRowVector(Matrix* vec, float scaleVec, Matrix* target){ 120 | assert(vec->getNumRows() == 1 || vec->getNumCols() == 1); 121 | assert(vec->getNumRows() == this->_shape[0] || vec->getNumCols() == this->_shape[1]); 122 | const int width = this->_shape[1]; 123 | const int height = this->_shape[0]; 124 | 125 | //表达成了矩阵的结构,就分开处理算了,block和thread的x维控制列数 126 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 127 | assert(num_blocks_x < NUM_BLOCKS_MAX); 128 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 129 | NUM_BLOCKS_MAX)); 130 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 131 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 132 | 133 | kAddRowVector<<>>(this->_data_value, vec->getDevData(), \ 134 | target->getDevData(), width, height, scaleVec); 135 | cudaDeviceSynchronize(); 136 | cudaCheckError(); 137 | 138 | } 139 | 140 | template 141 | void Matrix::subtractFromScalar(float scalar, Matrix* target) { 142 | 143 | const int width = this->_shape[1]; 144 | const int height = this->_shape[0]; 145 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 146 | assert(num_blocks_x < NUM_BLOCKS_MAX); 147 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 148 | NUM_BLOCKS_MAX)); 149 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 150 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 151 | 152 | kSubtractFromScalar<<>>(this->_data_value, scalar, \ 153 | target->getDevData(), width, height); 154 | cudaDeviceSynchronize(); 155 | cudaCheckError(); 156 | } 157 | 158 | template 159 | void Matrix::subtractFromScalar(float scalar) { 160 | subtractFromScalar(scalar, this); 161 | } 162 | 163 | template 164 | void Matrix::apply(Matrix::FUNCTIONS f, Matrix *target){ 165 | 166 | const int width = this->_shape[1]; 167 | const int height = this->_shape[0]; 168 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 169 | assert(num_blocks_x < NUM_BLOCKS_MAX); 170 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 171 | NUM_BLOCKS_MAX)); 172 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 173 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 174 | 175 | if(f == Matrix::SOFTMAX){ 176 | //一个block只计算一行数据 177 | grid_size = dim3(1, height, 1); 178 | block_size = dim3(num_blocks_x * ADD_BLOCK_SIZE, 1, 1); 179 | kSoftmax<<>>(this->_data_value, \ 180 | target->getDevData(), this->_shape[1], this->_shape[0]); 181 | }else if(f == Matrix::RECIPROCAL) { 182 | kReciprocal<<>>(this->_data_value, target->getDevData(), \ 183 | width, height); 184 | }else if(f == Matrix::LOG) { 185 | kLog<<>>(this->_data_value, target->getDevData(), \ 186 | width, height); 187 | }else if(f == Matrix::SIGMOID) { 188 | kSigmoid<<>>(this->_data_value, target->getDevData(), \ 189 | width, height); 190 | } 191 | cudaDeviceSynchronize(); 192 | cudaCheckError(); 193 | } 194 | 195 | template 196 | void Matrix::applyRelu(Matrix *target, Matrix* record, \ 197 | bool direction){ 198 | const int width = this->_shape[1]; 199 | const int height = this->_shape[0]; 200 | const int length = width*height; 201 | 202 | const int num_blocks = DIVUP(length, 1024); 203 | assert(num_blocks < NUM_BLOCKS_MAX); 204 | 205 | if(direction) 206 | kRelu<<>>(this->_data_value, \ 207 | target->getDevData(), record->getDevData(), length); 208 | else 209 | kReluBack<<>>(this->_data_value, \ 210 | target->getDevData(), record->getDevData(), length); 211 | cudaDeviceSynchronize(); 212 | cudaCheckError(); 213 | } 214 | 215 | template 216 | void Matrix::applyDropout(Matrix *target, Matrix* record, \ 217 | Matrix* rand_probs, bool is_set_up){ 218 | 219 | const int width = this->_shape[1]; 220 | const int height = this->_shape[0]; 221 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 222 | assert(num_blocks_x < NUM_BLOCKS_MAX); 223 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 224 | NUM_BLOCKS_MAX)); 225 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 226 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 227 | 228 | if(is_set_up == false){ 229 | kSetUpCurand<<>>(rand_probs->getDevData(), \ 230 | width, height); 231 | cudaDeviceSynchronize(); 232 | cudaCheckError(); 233 | 234 | } 235 | 236 | kDropout<<>>(this->_data_value, \ 237 | target->getDevData(), record->getDevData(), \ 238 | rand_probs->getDevData(), width, height); 239 | cudaDeviceSynchronize(); 240 | cudaCheckError(); 241 | } 242 | 243 | template 244 | void Matrix::apply(Matrix::FUNCTIONS f) { 245 | apply(f, this); 246 | } 247 | 248 | template 249 | void Matrix::sumCol(Matrix* target){ 250 | const int width = this->_shape[1]; 251 | const int height = this->_shape[0]; 252 | 253 | kDumbSumCols<<>>(this->_data_value, \ 254 | target->getDevData(), width, height); 255 | cudaDeviceSynchronize(); 256 | cudaCheckError(); 257 | } 258 | 259 | template 260 | void Matrix::sumRow(Matrix* target){ 261 | Matrix* trans = new Matrix(this->_shape[1], this->_shape[0]); 262 | this->getTranspose(trans); 263 | trans->sumCol(target); 264 | delete trans; 265 | } 266 | 267 | //位置下标从0开始 268 | template 269 | void Matrix::maxPosInRow(Matrix* maxVec){ 270 | const int width = this->_shape[1]; 271 | const int height = this->_shape[0]; 272 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 273 | assert(num_blocks_x < NUM_BLOCKS_MAX); 274 | dim3 grid_size(1, height, 1); 275 | dim3 block_size(num_blocks_x * ADD_BLOCK_SIZE, 1, 1); 276 | 277 | kDumbMaxPosInRow<<>>(this->_data_value, \ 279 | maxVec->getDevData(), width, height); 280 | cudaDeviceSynchronize(); 281 | cudaCheckError(); 282 | } 283 | 284 | template 285 | void Matrix::eltWiseMult(Matrix* b, Matrix* target) { 286 | 287 | assert(b->getNumCols() == this->_shape[1]); 288 | 289 | const int width = this->_shape[1]; 290 | const int height = this->_shape[0]; 291 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 292 | assert(num_blocks_x < NUM_BLOCKS_MAX); 293 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 294 | NUM_BLOCKS_MAX)); 295 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 296 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 297 | 298 | kMult<<>>(this->_data_value, \ 299 | b->getDevData(), target->getDevData(), width, height); 300 | cudaDeviceSynchronize(); 301 | cudaCheckError(); 302 | } 303 | 304 | template 305 | void Matrix::eltWiseMult(Matrix* b) { 306 | eltWiseMult(b, this); 307 | } 308 | 309 | template 310 | void Matrix::addSum(Matrix* b, Matrix* c, float scaleThis, \ 311 | float scaleB, float scaleC){ 312 | this->add(b, scaleThis, scaleB); 313 | this->add(c, 1, scaleC); 314 | } 315 | 316 | template 317 | void Matrix::add(Matrix* b, float scale_this, float scale_B){ 318 | assert(this->isSameDims(b)); 319 | const int width = this->_shape[1]; 320 | const int height = this->_shape[0]; 321 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 322 | assert(num_blocks_x < NUM_BLOCKS_MAX); 323 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 324 | NUM_BLOCKS_MAX)); 325 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 326 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 327 | 328 | kAdd<<>>(this->getDevData(), b->getDevData(), \ 329 | this->getDevData(), scale_this, scale_B, width, height); 330 | cudaDeviceSynchronize(); 331 | cudaCheckError(); 332 | } 333 | 334 | 335 | template 336 | void Matrix::showValue(string name){ 337 | 338 | Dtype* tmp_yh = new Dtype[this->_amount]; 339 | this->copyToHost(tmp_yh, this->_amount); 340 | cout << "-------------"<< name << "--------------" << endl; 341 | cout << this->_shape[0] << ":" << this->_shape[1] << endl; 342 | for(int i = 0; i < this->_shape[0]; i++){ 343 | for(int j = 0; j < this->_shape[1]; j++){ 344 | cout << tmp_yh[i * this->_shape[1] + j] << "\t"; 345 | if(j != 0 && j % (this->_shape[1]) == this->_shape[1] - 1) 346 | cout << endl; 347 | if(this->_shape[1] == 1) 348 | cout << endl; 349 | } 350 | } 351 | delete[] tmp_yh; 352 | } 353 | 354 | template 355 | void Matrix::reValue(float value){ 356 | int length = this->getNumRows() * this->getNumCols(); 357 | Dtype* tmp_yh = new Dtype[length]; 358 | for(int i = 0; i < length; i++){ 359 | tmp_yh[i] = value; 360 | } 361 | this->copyFromHost(tmp_yh, length); 362 | delete[] tmp_yh; 363 | } 364 | 365 | template 366 | void Matrix::reValue(int value, bool is_div){ 367 | int length = this->getNumRows() * this->getNumCols(); 368 | Dtype* tmp_yh = new Dtype[length]; 369 | for(int i = 0; i < length; i++){ 370 | if(!is_div) 371 | tmp_yh[i] = i % value; 372 | else 373 | tmp_yh[i] = i / value; 374 | } 375 | this->copyFromHost(tmp_yh, length); 376 | delete[] tmp_yh; 377 | } 378 | 379 | template 380 | Dtype Matrix::computeNorm(int len){ 381 | Dtype norm_cpu; 382 | Matrix* norm_gpu = new Matrix(1, 1); 383 | kComputeNorm<<<1, 1024, sizeof(Dtype)*len>>>(this->_data_value, \ 384 | norm_gpu->getDevData(), len); 385 | cudaDeviceSynchronize(); 386 | cudaCheckError(); 387 | norm_gpu->copyToHost(&norm_cpu, 1); 388 | delete norm_gpu; 389 | return norm_cpu; 390 | } 391 | 392 | template 393 | void Matrix::cropMatToNew(Matrix *tar, const int row_start, \ 394 | const int cropped_height, const int col_start, const int cropped_width){ 395 | kCropImg<<<1, 1024>>>(this->_data_value, tar->getDevData(), row_start, \ 396 | cropped_height, col_start, cropped_width, this->_shape[1]); 397 | cudaDeviceSynchronize(); 398 | cudaCheckError(); 399 | } 400 | 401 | template 402 | Dtype Matrix::getPosValue(int pos){ 403 | Dtype tmp; 404 | cudaMemcpy(&tmp, this->_data_value + pos, sizeof(Dtype), cudaMemcpyDeviceToHost); 405 | return tmp; 406 | } 407 | 408 | template 409 | Dtype Matrix::getFirstPosValue(){ 410 | return getPosValue(0); 411 | } 412 | 413 | template 414 | void Matrix::subedByUnitMat(){ 415 | 416 | const int width = this->_shape[1]; 417 | const int height = this->_shape[0]; 418 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 419 | assert(num_blocks_x < NUM_BLOCKS_MAX); 420 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 421 | NUM_BLOCKS_MAX)); 422 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 423 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 424 | 425 | kSubedByUnitMat<<>>(this->getDevData(), \ 426 | this->getDevData(), width, height); 427 | cudaDeviceSynchronize(); 428 | cudaCheckError(); 429 | } 430 | 431 | template 432 | void Matrix::setValueAt(const int height_idx, \ 433 | const int width_idx, const Dtype value){ 434 | int pos = height_idx*this->_shape[1] + width_idx; 435 | cudaMemcpy(this->_data_value + pos, &value, sizeof(Dtype), \ 436 | cudaMemcpyHostToDevice); 437 | } 438 | 439 | template 440 | void Matrix::subPortion(Matrix* b, const int b_row, \ 441 | const int b_col){ 442 | 443 | const int width = b_col; 444 | const int height = b_row; 445 | const int num_blocks_x = DIVUP(width, ADD_BLOCK_SIZE); 446 | assert(num_blocks_x < NUM_BLOCKS_MAX); 447 | const int num_blocks_y = max(1, min(DIVUP(height, ADD_BLOCK_SIZE), \ 448 | NUM_BLOCKS_MAX)); 449 | dim3 grid_size(num_blocks_x, num_blocks_y, 1); 450 | dim3 block_size(ADD_BLOCK_SIZE, ADD_BLOCK_SIZE, 1); 451 | 452 | kSubPortion<<>>(this->getDevData(), \ 453 | b->getDevData()+b_col, this->getDevData(), this->_shape[1], \ 454 | this->_shape[0], width, height); 455 | cudaDeviceSynchronize(); 456 | cudaCheckError(); 457 | } 458 | 459 | template 460 | void Matrix::readPars(string filename){ 461 | ifstream fin1(filename.c_str(), ios::binary); 462 | int dataLen = this->getNumRows() * this->getNumCols(); 463 | Dtype* tmp = new Dtype[dataLen]; 464 | fin1.read((char*)(tmp), sizeof(Dtype) * dataLen); 465 | cudaMemcpy(this->getDevData(), tmp, sizeof(Dtype)*dataLen, \ 466 | cudaMemcpyHostToDevice); 467 | fin1.close(); 468 | delete tmp; 469 | } 470 | 471 | template 472 | void Matrix::savePars(string filename){ 473 | ofstream fout(filename.c_str(), ios::binary); 474 | int dataLen = this->getNumRows() * this->getNumCols(); 475 | Dtype* tmp = new Dtype[dataLen]; 476 | cudaMemcpy(tmp, this->getDevData(), sizeof(Dtype)*dataLen, \ 477 | cudaMemcpyDeviceToHost); 478 | fout.write((char*)(tmp), sizeof(Dtype) * dataLen); 479 | fout.close(); 480 | delete tmp; 481 | } 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | -------------------------------------------------------------------------------- /dl/src/matrix_kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * filename:nvmatrix_kernel.cu 3 | */ 4 | 5 | #include 6 | #include 7 | #include "matrix_kernel.hpp" 8 | 9 | template 10 | __device__ Dtype mySigmoid(Dtype x) { 11 | if(x < -300) 12 | return 0; 13 | else if( x > 300) 14 | return 1; 15 | else 16 | return 1 / (1 + __expf(-x)); 17 | } 18 | 19 | 20 | template 21 | __global__ void kAddRowVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \ 22 | const int width, const int height, float scaleVec) { 23 | 24 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 25 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 26 | const int idx = idxY * width + idxX; 27 | const int numThreads = blockDim.x * gridDim.x * \ 28 | blockDim.y * gridDim.y; 29 | 30 | //此处控制了线程数要小于行列积 31 | for (int i = idx; i < width * height; i += numThreads) { 32 | tgtMat[idx] = mat[idx] + scaleVec * vec[idx % width]; 33 | 34 | } 35 | } 36 | 37 | template 38 | __global__ void kSoftmax(Dtype* gData, Dtype* target, const int width, \ 39 | const int height) { 40 | 41 | //跟同一个block里面值比较大小取最大值,减去最大值 42 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 43 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 44 | const int idx = idxY * width + idxX; 45 | //数据放入共享内存 46 | //计算离行值最近的2的次方 47 | int pow2Length = width; 48 | if(pow2Length & (pow2Length - 1)){ 49 | while(pow2Length & (pow2Length - 1)){ 50 | pow2Length &= pow2Length - 1; 51 | } 52 | } 53 | extern __shared__ Dtype ori[]; 54 | __shared__ Dtype max; 55 | 56 | if(idxX < width) 57 | ori[idxX] = gData[idx]; 58 | __syncthreads(); 59 | 60 | //先通过reduce来求最大值 61 | if(idxX >= pow2Length && idxX < width) 62 | ori[idxX - pow2Length] = ori[idxX - pow2Length] > ori[idxX] \ 63 | ? ori[idxX - pow2Length] : ori[idxX]; 64 | __syncthreads(); 65 | 66 | for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){ 67 | if(idxX < activeThreads){ 68 | ori[idxX] = ori[idxX + activeThreads] > ori[idxX] \ 69 | ? ori[idxX + activeThreads] : ori[idxX]; 70 | } 71 | __syncthreads(); 72 | 73 | } 74 | if(idxX == 0) 75 | max = ori[0]; 76 | __syncthreads(); 77 | 78 | if(idxX < width) 79 | target[idx] = __expf(gData[idx] - max); 80 | 81 | //reduce求和 82 | if(idxX < width) 83 | ori[idxX] = target[idx]; 84 | __syncthreads(); 85 | 86 | if(idxX >= pow2Length && idxX < width) 87 | ori[idxX - pow2Length] += ori[idxX]; 88 | __syncthreads(); 89 | 90 | for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){ 91 | if(idxX < activeThreads){ 92 | ori[idxX] += ori[idxX + activeThreads]; 93 | } 94 | __syncthreads(); 95 | } 96 | 97 | if(idxX < width) 98 | target[idx] = target[idx] / ori[0]; 99 | 100 | } 101 | 102 | template 103 | __global__ void kSetUpCurand(curandState *state, const int width, const int height) { 104 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 105 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 106 | const int idx = idxY * width + idxX; 107 | 108 | if(idxY < height && idxX < width){ 109 | curand_init(0, idx, 0, &state[idx]); 110 | } 111 | } 112 | 113 | template 114 | __global__ void kDropout(Dtype* gData, Dtype* target, int* record, \ 115 | curandState *state, const int width, const int height) { 116 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 117 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 118 | const int idx = idxY * width + idxX; 119 | 120 | if(idxY < height && idxX < width){ 121 | curandState local_state = state[idx]; 122 | Dtype local_prob = curand_uniform(&local_state); 123 | 124 | if(local_prob > 0.5){ 125 | target[idx] = gData[idx]; 126 | record[idx] = 1; 127 | }else{ 128 | target[idx] = 0; 129 | record[idx] = 0; 130 | } 131 | state[idx] = local_state; 132 | } 133 | } 134 | 135 | template 136 | __global__ void kRelu(Dtype* gData, Dtype* target, int* record, const int length) { 137 | const int idx = blockIdx.x * blockDim.x + threadIdx.x; 138 | 139 | if(idx < length){ 140 | if(gData[idx] > 0){ 141 | target[idx] = gData[idx]; 142 | record[idx] = 1; 143 | }else{ 144 | target[idx] = 0; 145 | record[idx] = 0; 146 | } 147 | } 148 | } 149 | template 150 | __global__ void kReluBack(Dtype* gData, Dtype* target, int* record, const int length) { 151 | const int idx = blockIdx.x * blockDim.x + threadIdx.x; 152 | 153 | if(idx < length){ 154 | if(record[idx] == 1){ 155 | target[idx] = gData[idx]; 156 | }else{ 157 | target[idx] = 0; 158 | } 159 | } 160 | } 161 | 162 | template 163 | __global__ void kSigmoid(Dtype* gData, Dtype* target, const int width, \ 164 | const int height) { 165 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 166 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 167 | const int idx = idxY * width + idxX; 168 | 169 | if(idxY < height && idxX < width) 170 | target[idx] = mySigmoid(gData[idx]); 171 | } 172 | 173 | template 174 | __global__ void kReciprocal(Dtype* gData, Dtype* target, const int width, \ 175 | const int height) { 176 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 177 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 178 | const int idx = idxY * width + idxX; 179 | 180 | if(idxY < height && idxX < width) 181 | target[idx] = 1 / gData[idx]; 182 | } 183 | 184 | template 185 | __global__ void kLog(Dtype* gData, Dtype* target, const int width, \ 186 | const int height) { 187 | 188 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 189 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 190 | const int idx = idxY * width + idxX; 191 | 192 | if(idxY < height && idxX < width){ 193 | double tmp = gData[idx] < 1 - 10e-15 ? gData[idx] : 1 - 10e-15; 194 | tmp = tmp > 10e-15 ? tmp : 10e-15; 195 | target[idx] = __logf(gData[idx]); 196 | } 197 | } 198 | 199 | template 200 | __global__ void kCompactCol(const Dtype* ori, Dtype* target, const int interval, \ 201 | const int width, const int height){ 202 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 203 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 204 | const int oriIdx = idxY * width * interval + idxX * interval; 205 | const int tarIdx = idxY * width + idxX; 206 | 207 | if(idxY < height && idxX < width){ 208 | target[tarIdx] = 0; 209 | for(int i = 0; i < interval; i++){ 210 | target[tarIdx] += ori[i + oriIdx]; 211 | } 212 | } 213 | 214 | } 215 | 216 | 217 | template 218 | __global__ void kDumbSumCols(Dtype* mat, Dtype* vec, const int width, \ 219 | const int height) { 220 | 221 | extern __shared__ Dtype ori[]; 222 | 223 | //距离width最近的2次幂 224 | int pow2Length = width; 225 | if(pow2Length & (pow2Length - 1)){ 226 | while(pow2Length & (pow2Length - 1)){ 227 | pow2Length &= pow2Length - 1; 228 | } 229 | } 230 | 231 | 232 | //reduce求和 233 | int i = threadIdx.x; 234 | while(i < width){ 235 | ori[i] = mat[blockIdx.x * width + i]; 236 | i += blockDim.x; 237 | } 238 | __syncthreads(); 239 | int reduce_len = pow2Length > blockDim.x ? blockDim.x : pow2Length; 240 | 241 | //需要执行reduce的次数,一次性只能执行最多32*32 242 | int times = width / reduce_len; 243 | 244 | //把最后无法整除的地方先处理 245 | int idxX = threadIdx.x + reduce_len * times; 246 | if(idxX > (reduce_len * times) && idxX < width) 247 | ori[idxX - reduce_len] += ori[idxX]; 248 | __syncthreads(); 249 | 250 | 251 | for(int j = times - 1; j >= 0; j--){ 252 | idxX = threadIdx.x + j * reduce_len; 253 | if(threadIdx.x == 0 && ((j + 1) * reduce_len) < width) 254 | ori[0] += ori[(j + 1) * reduce_len]; 255 | __syncthreads(); 256 | for(int activeThreads = (reduce_len >> 1); activeThreads; activeThreads >>= 1){ 257 | if(threadIdx.x < activeThreads){ 258 | ori[idxX] += ori[idxX + activeThreads]; 259 | } 260 | __syncthreads(); 261 | } 262 | } 263 | 264 | if(threadIdx.x == 0){ 265 | vec[blockIdx.x] = ori[0]; 266 | } 267 | __syncthreads(); 268 | 269 | } 270 | 271 | 272 | template 273 | __global__ void kDumbMaxPosInRow(Dtype* mat, Dtype* vec, const int width, \ 274 | const int height) { 275 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 276 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 277 | const int idx = idxY * width + idxX; 278 | 279 | extern __shared__ Dtype ori[]; 280 | 281 | int pow2Length = width; 282 | if(pow2Length & (pow2Length - 1)){ 283 | while(pow2Length & (pow2Length - 1)){ 284 | pow2Length &= pow2Length - 1; 285 | } 286 | } 287 | 288 | //reduce求最大值 289 | if(idxX < width) 290 | ori[idxX] = mat[idx]; 291 | __syncthreads(); 292 | 293 | if(idxX >= pow2Length && idxX < width) 294 | ori[idxX - pow2Length] = ori[idxX - pow2Length] > ori[idxX] \ 295 | ? ori[idxX - pow2Length] : ori[idxX]; 296 | __syncthreads(); 297 | 298 | for(int activeThreads = (pow2Length >> 1); activeThreads; activeThreads >>= 1){ 299 | if(idxX < activeThreads){ 300 | ori[idxX] = ori[idxX + activeThreads] > ori[idxX] \ 301 | ? ori[idxX + activeThreads] : ori[idxX]; 302 | } 303 | __syncthreads(); 304 | } 305 | 306 | if(mat[idx] == ori[0] && idxX < width) 307 | vec[idxY] = idxX; 308 | 309 | __syncthreads(); 310 | } 311 | 312 | template 313 | __global__ void kMultByColVector(Dtype* mat, Dtype* vec, Dtype* tgtMat, \ 314 | const int width, const int height) { 315 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 316 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 317 | const int idx = idxY * width + idxX; 318 | 319 | if(idxY < height && idxX < width) 320 | tgtMat[idx] = mat[idx] * vec[idxY]; 321 | } 322 | 323 | template 324 | __global__ void kSubtractFromScalar(Dtype* gData, float scalar, Dtype* target, \ 325 | const int width, const int height) { 326 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 327 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 328 | const int idx = idxY * width + idxX; 329 | 330 | if(idxY < height && idxX < width) 331 | target[idx] = scalar - gData[idx]; 332 | } 333 | 334 | template 335 | __global__ void kMult(Dtype* matA, Dtype* matB, Dtype* tgtMat, \ 336 | const int width, const int height) { 337 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 338 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 339 | const int idx = idxY * width + idxX; 340 | 341 | if(idxY < height && idxX < width) 342 | tgtMat[idx] = matA[idx] * matB[idx]; 343 | } 344 | 345 | template 346 | __global__ void kAdd(Dtype* matA, Dtype* matB, Dtype* tgtMat, float scaleA, \ 347 | float scaleB, const int width, const int height) { 348 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 349 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 350 | const int idx = idxY * width + idxX; 351 | 352 | if(idxY < height && idxX < width) 353 | tgtMat[idx] = scaleA * matA[idx] + scaleB * matB[idx]; 354 | } 355 | 356 | 357 | template 358 | __global__ void kTranspose(Dtype* srcData, Dtype* dstData, \ 359 | const int width, const int height){ 360 | 361 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 362 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 363 | const int srcIdx = idxY * width + idxX; 364 | const int dstIdx = idxX * height + idxY; 365 | 366 | if(idxY < height && idxX < width) 367 | dstData[dstIdx] = srcData[srcIdx]; 368 | 369 | } 370 | 371 | template 372 | __global__ void kComputeNorm(const Dtype* vec, Dtype* norm, const int len){ 373 | //每一个block计算一个模 374 | extern __shared__ Dtype sh_norm[]; 375 | 376 | int pow2_len = len; 377 | if (pow2_len & (pow2_len - 1)) { 378 | while (pow2_len & (pow2_len - 1)){ 379 | pow2_len &= pow2_len - 1; 380 | } 381 | } 382 | 383 | int i = threadIdx.x; 384 | while (i < len) { 385 | sh_norm[i] = vec[i]*vec[i]; 386 | i += blockDim.x; 387 | } 388 | 389 | int reduce_len = pow2_len > blockDim.x ? blockDim.x : pow2_len; 390 | int times = len / reduce_len; 391 | 392 | int vec_pos = threadIdx.x + reduce_len * times; 393 | if (vec_pos > (reduce_len * times) && vec_pos < len) { 394 | sh_norm[vec_pos - reduce_len] += sh_norm[vec_pos]; 395 | } 396 | __syncthreads(); 397 | 398 | for (int j = times-1; j >= 0; j--) { 399 | vec_pos = threadIdx.x + j*reduce_len; 400 | if (threadIdx.x == 0 && (j + 1) * reduce_len < len) { 401 | sh_norm[0] += sh_norm[(j + 1) * reduce_len]; 402 | } 403 | __syncthreads(); 404 | for (int active_thread = (reduce_len >> 1); active_thread; active_thread >>= 1) { 405 | if (threadIdx.x < active_thread) { 406 | sh_norm[vec_pos] += sh_norm[vec_pos + active_thread]; 407 | } 408 | __syncthreads(); 409 | } 410 | } 411 | 412 | if (threadIdx.x == 0) { 413 | norm[0] = sqrt(sh_norm[0]); 414 | } 415 | 416 | __syncthreads(); 417 | } 418 | 419 | template 420 | __global__ void kCropImg(const Dtype* ori_img, Dtype* dst_img, \ 421 | const int row_start, const int cropped_height, \ 422 | const int col_start, const int cropped_width, \ 423 | const int ori_width){ 424 | 425 | int idx = threadIdx.x; 426 | 427 | while (idx < cropped_height*cropped_width) { 428 | int ori_row_idx = idx / cropped_width + row_start; 429 | int ori_col_idx = idx % cropped_width + col_start; 430 | dst_img[idx] = ori_img[ori_row_idx*ori_width + ori_col_idx]; 431 | idx += blockDim.x; 432 | } 433 | __syncthreads(); 434 | } 435 | 436 | template 437 | __global__ void kComputeHouseholderVec(const Dtype* src, Dtype* dst, \ 438 | Dtype added_value, Dtype scale, const int len) { 439 | int idx = threadIdx.x; 440 | while (idx < len) { 441 | if (idx == 0) { 442 | dst[idx] = scale * (src[idx] + added_value); 443 | } else 444 | dst[idx] = scale * src[idx]; 445 | idx += blockDim.x; 446 | } 447 | } 448 | 449 | template 450 | __global__ void kSubedByUnitMat(Dtype* matA, Dtype* tgtMat, \ 451 | const int width, const int height) { 452 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 453 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 454 | const int idx = idxY * width + idxX; 455 | 456 | if(idxY < height && idxX < width ){ 457 | if ( idxX == idxY) 458 | tgtMat[idx] = 1 - matA[idx]; 459 | else 460 | tgtMat[idx] = - matA[idx]; 461 | } 462 | 463 | } 464 | 465 | template 466 | __global__ void kSubPortion(Dtype* matA, Dtype* matB, Dtype* tgtMat, \ 467 | const int a_width, const int a_height, \ 468 | const int b_width, const int b_height){ 469 | 470 | const int row_dist = a_height - b_height; 471 | const int col_dist = a_width - b_width; 472 | const int idxY = blockIdx.y * blockDim.y + threadIdx.y; 473 | const int idxX = blockIdx.x * blockDim.x + threadIdx.x; 474 | const int idx = idxY * b_width + idxX; 475 | 476 | const int a_idx = (idxY+row_dist)*a_width + idxX+col_dist; 477 | 478 | if(idxY < b_height && idxX < b_width ){ 479 | tgtMat[a_idx] = matA[a_idx] - matB[idx]; 480 | } 481 | 482 | } 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | -------------------------------------------------------------------------------- /dl/src/model_component.cpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file model_component.cpp 3 | /// @brief 4 | #include "model_component.hpp" 5 | 6 | using namespace std; 7 | 8 | template 9 | ModelComponent::ModelComponent() { 10 | _string_map_layertype["CONVOLUTION"] = CONVOLUTION; 11 | _string_map_layertype["POOLING"] = POOLING; 12 | _string_map_layertype["SIGMOID"] = SIGMOID; 13 | _string_map_layertype["RECTIFIED"] = RECTIFIED; 14 | _string_map_layertype["INNERPRODUCT"] = INNERPRODUCT; 15 | _string_map_layertype["SOFTMAX"] = SOFTMAX; 16 | _string_map_layertype["DROPOUT"] = DROPOUT; 17 | 18 | _string_map_pooltype["MAX_POOLING"] = MAX_POOLING; 19 | _string_map_pooltype["AVG_POOLING"] = AVG_POOLING; 20 | 21 | 22 | _num_need_train_layers = 0; 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /dl/src/pooling_layer.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file pooling_layer.cu 3 | /// 4 | 5 | #include "pooling_layer.hpp" 6 | 7 | using namespace std; 8 | 9 | template 10 | PoolingLayer::PoolingLayer(PoolParam *lcp){ 11 | this->_lcp = lcp; 12 | _num_box = _lcp->getBoxNumHeight()*_lcp->getBoxNumWidth(); 13 | 14 | cublasCreate(&this->handle); 15 | 16 | } 17 | 18 | template 19 | PoolingLayer::~PoolingLayer() { 20 | 21 | delete this-> _y; 22 | delete this->_dE_dy; 23 | 24 | if(_lcp->getPoolType() == MAX_POOLING ) 25 | delete _max_pos; 26 | if((_lcp->getOutHeight() > MAX_THREAD_SIZE \ 27 | || _lcp->getOutWidth() > MAX_THREAD_SIZE) \ 28 | && (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)) 29 | delete unranged_dE_dx; 30 | cublasDestroy(this->handle); 31 | } 32 | 33 | template 34 | void PoolingLayer::initCuda() { 35 | 36 | 37 | this->_y = new Matrix(_lcp->getMinibatchSize(), \ 38 | _lcp->getOutHeight()*_lcp->getOutWidth()* _lcp->getOutChannel()); 39 | 40 | this->_dE_dy = new Matrix(this->_y); 41 | 42 | 43 | if(_lcp->getPoolType() == MAX_POOLING ){ 44 | _max_pos = new Matrix(_lcp->getMinibatchSize(), \ 45 | _lcp->getOutHeight()*_lcp->getOutWidth()* _lcp->getOutChannel()); 46 | 47 | } 48 | if((_lcp->getOutHeight() > MAX_THREAD_SIZE \ 49 | || _lcp->getOutWidth() > MAX_THREAD_SIZE) \ 50 | && (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){ 51 | unranged_dE_dx = new Matrix(_lcp->getMinibatchSize(), \ 52 | _lcp->getBoxInHeight()*_lcp->getBoxInWidth() \ 53 | * _lcp->getBoxNumHeight()*_lcp->getBoxNumWidth() \ 54 | * _lcp->getOutChannel()); 55 | } 56 | 57 | } 58 | 59 | template 60 | void PoolingLayer::computeOutput(Matrix* x){ 61 | 62 | this->_y->zeros(); 63 | 64 | dim3 blocks = dim3(_lcp->getMinibatchSize(), _lcp->getInChannel() * _num_box); 65 | dim3 threads = dim3(_lcp->getThreadWidth(), _lcp->getThreadHeight()); 66 | 67 | if(_lcp->getPoolType() == MAX_POOLING ){ 68 | max_pooling<<>>(x->getDevData(), \ 69 | this->_y->getDevData(), _max_pos->getDevData(), \ 70 | _lcp->getInHeight(), _lcp->getInWidth(), \ 71 | _lcp->getInChannel(), \ 72 | _lcp->getOutHeight(), _lcp->getOutWidth(), \ 73 | _lcp->getFilterHeight(), _lcp->getFilterWidth(), \ 74 | _lcp->getStrideHeight(), _lcp->getStrideWidth(), \ 75 | _lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \ 76 | _lcp->getBoxNumHeight(), _lcp->getBoxNumWidth()); 77 | 78 | }else if(_lcp->getPoolType() == AVG_POOLING){ 79 | avg_pooling<<>>(x->getDevData(), \ 80 | this->_y->getDevData(), \ 81 | _lcp->getInHeight(), _lcp->getInWidth(), \ 82 | _lcp->getInChannel(), \ 83 | _lcp->getOutHeight(), _lcp->getOutWidth(), \ 84 | _lcp->getFilterHeight(), _lcp->getFilterWidth(), \ 85 | _lcp->getStrideHeight(), _lcp->getStrideWidth(), \ 86 | _lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \ 87 | _lcp->getBoxNumHeight(), _lcp->getBoxNumWidth()); 88 | }else{ 89 | cout << "Pooling type is invalid !\n"; 90 | exit(EXIT_FAILURE); 91 | } 92 | 93 | cudaThreadSynchronize(); 94 | cudaCheckError(); 95 | 96 | } 97 | 98 | template 99 | void PoolingLayer::computeDerivsOfInput(Matrix* dE_dx){ 100 | 101 | dim3 blocks = dim3(_lcp->getMinibatchSize(), _lcp->getInChannel() * _num_box); 102 | dim3 threads = dim3(_lcp->getThreadWidth(), _lcp->getThreadHeight()); 103 | 104 | int box_in_height = MAX_THREAD_SIZE > _lcp->getOutHeight() \ 105 | ? _lcp->getInHeight() : _lcp->getBoxInHeight(); 106 | int box_in_width = MAX_THREAD_SIZE > _lcp->getOutWidth() \ 107 | ? _lcp->getInWidth() : _lcp->getBoxInWidth(); 108 | 109 | Dtype* p_dE_dx; 110 | if((_lcp->getOutHeight() > MAX_THREAD_SIZE \ 111 | || _lcp->getOutWidth() > MAX_THREAD_SIZE) \ 112 | && (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){ 113 | unranged_dE_dx->zeros(); 114 | p_dE_dx = unranged_dE_dx->getDevData(); 115 | }else{ 116 | dE_dx->zeros(); 117 | p_dE_dx = dE_dx->getDevData(); 118 | } 119 | 120 | if(_lcp->getPoolType() == MAX_POOLING ){ 121 | compute_dE_dy_max<<>>( \ 123 | this->_dE_dy->getDevData(), \ 124 | p_dE_dx, _max_pos->getDevData(), \ 125 | box_in_height, box_in_width, \ 126 | _lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \ 127 | _lcp->getInChannel(), \ 128 | _lcp->getOutHeight(), _lcp->getOutWidth(), \ 129 | _lcp->getFilterHeight(), _lcp->getFilterWidth(), \ 130 | _lcp->getStrideHeight(), _lcp->getStrideWidth(), \ 131 | _lcp->getBoxNumHeight(), _lcp->getBoxNumWidth()); 132 | cudaThreadSynchronize(); 133 | cudaCheckError(); 134 | 135 | 136 | }else if(_lcp->getPoolType() == AVG_POOLING){ 137 | compute_dE_dy_avg<<>>( \ 139 | this->_dE_dy->getDevData(), p_dE_dx, \ 140 | box_in_height, box_in_width, \ 141 | _lcp->getBoxOutHeight(), _lcp->getBoxOutWidth(), \ 142 | _lcp->getInChannel(), \ 143 | _lcp->getOutHeight(), _lcp->getOutWidth(), \ 144 | _lcp->getFilterHeight(), _lcp->getFilterWidth(), \ 145 | _lcp->getStrideHeight(), _lcp->getStrideWidth(), \ 146 | _lcp->getBoxNumHeight(), _lcp->getBoxNumWidth()); 147 | cudaThreadSynchronize(); 148 | cudaCheckError(); 149 | 150 | }else{ 151 | cout << "Pooling type is invalid !\n"; 152 | exit(EXIT_FAILURE); 153 | } 154 | 155 | if((_lcp->getOutHeight() > MAX_THREAD_SIZE \ 156 | || _lcp->getOutWidth() > MAX_THREAD_SIZE) \ 157 | && (_lcp->getOverlapHeight() > 0 || _lcp->getOverlapWidth() > 0)){ 158 | dE_dx->zeros(); 159 | 160 | compactOverlap<<<_lcp->getMinibatchSize(), _lcp->getInChannel()>>>( \ 161 | unranged_dE_dx->getDevData(), dE_dx->getDevData(), \ 162 | _lcp->getInHeight(), _lcp->getInWidth(), \ 163 | _lcp->getInChannel(), _lcp->getOverlapHeight(), \ 164 | _lcp->getOverlapWidth(), \ 165 | box_in_height, box_in_width, \ 166 | _lcp->getBoxNumHeight(), _lcp->getBoxNumWidth()); 167 | cudaThreadSynchronize(); 168 | cudaCheckError(); 169 | } 170 | } 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /dl/src/relu_layer.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file relu_layer.cu 3 | /// @brief 4 | 5 | 6 | using namespace std; 7 | 8 | template 9 | ReluLayer::ReluLayer(Param* p){ 10 | 11 | this->_p = p; 12 | } 13 | 14 | template 15 | ReluLayer::~ReluLayer() { 16 | delete this->_y; 17 | delete this->_dE_dy; 18 | delete _record; 19 | } 20 | 21 | template 22 | void ReluLayer::initCuda() { 23 | 24 | 25 | ConnectType ct = this->_p->getConnectType(); 26 | int col; 27 | if(ct == PARAM_CONNECT_TYPE_LOCAL) 28 | col = _p->getOutHeight()*_p->getOutWidth() \ 29 | * this->_p->getOutChannel(); 30 | else if(ct == PARAM_CONNECT_TYPE_FULL) 31 | col = this->_p->getNumOut(); 32 | this->_y = new Matrix(_p->getMinibatchSize(), \ 33 | col); 34 | this->_dE_dy = new Matrix(this->_y); 35 | 36 | _record = new Matrix(_p->getMinibatchSize(), col); 37 | 38 | } 39 | 40 | template 41 | void ReluLayer::computeOutput(Matrix* x){ 42 | 43 | this->_y->zeros(); 44 | x->applyRelu(this->_y, _record); 45 | 46 | } 47 | 48 | template 49 | void ReluLayer::computeDerivsOfInput(Matrix* dE_dx){ 50 | dE_dx->zeros(); 51 | 52 | this->_dE_dy->applyRelu(dE_dx, _record, false); 53 | 54 | } 55 | 56 | 57 | -------------------------------------------------------------------------------- /dl/src/sigmoid_layer.cu: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file sigmoid_layer.cu 3 | /// @brief 4 | 5 | #include "sigmoid_layer.hpp" 6 | 7 | using namespace std; 8 | 9 | template 10 | SigmoidLayer::SigmoidLayer(Param* fcp){ 11 | 12 | this->_fcp = fcp; 13 | } 14 | 15 | template 16 | SigmoidLayer::~SigmoidLayer() { 17 | delete this->_y; 18 | delete this->_dE_dy; 19 | } 20 | 21 | template 22 | void SigmoidLayer::initCuda() { 23 | 24 | 25 | ConnectType ct = this->_fcp->getConnectType(); 26 | int col; 27 | if(ct == PARAM_CONNECT_TYPE_LOCAL) 28 | col = _fcp->getOutHeight()*_fcp->getOutWidth() \ 29 | * this->_fcp->getOutChannel(); 30 | else if(ct == PARAM_CONNECT_TYPE_FULL) 31 | col = this->_fcp->getNumOut(); 32 | this->_y = new Matrix(_fcp->getMinibatchSize(), \ 33 | col); 34 | this->_dE_dy = new Matrix(this->_y); 35 | } 36 | 37 | template 38 | void SigmoidLayer::computeOutput(Matrix* x){ 39 | x->apply(Matrix::SIGMOID, this->_y); 40 | } 41 | 42 | template 43 | void SigmoidLayer::computeDerivsOfInput(Matrix* dE_dx){ 44 | 45 | 46 | this->_y->subtractFromScalar(1, dE_dx); 47 | 48 | dE_dx->eltWiseMult(this->_y); 49 | 50 | dE_dx->eltWiseMult(this->_dE_dy); 51 | 52 | } 53 | 54 | 55 | -------------------------------------------------------------------------------- /dl/src/train_classification.cpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file train_classification.cpp 3 | /// @brief 4 | 5 | 6 | #include 7 | #include 8 | #include 9 | #include "train_classification.hpp" 10 | 11 | using namespace std; 12 | 13 | template 14 | void TrainClassification::createPixelAndLabel(){ 15 | this->_model_component->_mini_data = new Matrix(this->_model_component->_minibatch_size, \ 16 | this->_model_component->_one_img_len); 17 | this->_model_component->_mini_label = new Matrix(this->_model_component->_minibatch_size, 1); 18 | } 19 | 20 | template 21 | void TrainClassification::parseImgBinary(string train_file, string valid_file){ 22 | this->_load_layer = new LoadCifar10(this->_model_component->_minibatch_size); 23 | this->_model_component->_num_train = this->_load_layer->getNumTrain(); 24 | this->_model_component->_num_valid = this->_load_layer->getNumValid(); 25 | this->_model_component->setNumTrainBatch(); 26 | this->_model_component->setNumValidBatch(); 27 | 28 | } 29 | 30 | template 31 | void TrainClassification::forwardLastLayer(){ 32 | 33 | this->_model_component->_layers[this->_model_component->_num_layers-1]->computeOutput(\ 34 | this->_model_component->_y[this->_model_component->_num_layers-1]); 35 | this->_likelihood += dynamic_cast* >( \ 36 | this->_model_component->_layers[this->_model_component->_num_layers-1]) \ 37 | ->computeError(this->_model_component->_mini_label, this->_error); 38 | } 39 | 40 | template 41 | void TrainClassification::backwardLastLayer(){ 42 | Logistic *last_layer = dynamic_cast* >( \ 43 | this->_model_component->_layers[this->_model_component->_num_layers-1]); 44 | last_layer->computeDerivsOfInput(this->_model_component->_dE_dy[ \ 45 | this->_model_component->_num_layers-2], \ 46 | this->_model_component->_mini_label); 47 | } 48 | 49 | template 50 | void TrainClassification::train() { 51 | 52 | clock_t t; 53 | t = clock(); 54 | 55 | int pixel_len = this->_model_component->_minibatch_size*this->_model_component->_one_img_len; 56 | int label_len = this->_model_component->_minibatch_size; 57 | Dtype *h_mini_pixel = new Dtype[pixel_len]; //分配在主机内存上 58 | int *h_mini_label = new int[label_len]; 59 | 60 | for (int epoch_idx = 0; epoch_idx < this->_model_component->_num_epoch; \ 61 | epoch_idx++) { 62 | 63 | this->_likelihood = 0; 64 | this->_error = 0; 65 | 66 | Logistic *last_layer = dynamic_cast* >( \ 67 | this->_model_component->_layers[this->_model_component->_num_layers-1]); 68 | last_layer->setRecordToZero(); 69 | 70 | 71 | for(int batch_idx = 0; batch_idx < this->_model_component->_num_train_batch; \ 72 | batch_idx++){ 73 | 74 | this->_load_layer->loadTrainOneBatch(batch_idx, h_mini_pixel, h_mini_label); 75 | this->_model_component->_mini_data->copyFromHost(h_mini_pixel, \ 76 | pixel_len); 77 | this->_model_component->_mini_label->copyFromHost(h_mini_label, \ 78 | label_len); 79 | this->forwardPropagate(); 80 | forwardLastLayer(); 81 | backwardLastLayer(); 82 | this->backwardPropagate(); 83 | 84 | this->computeAndUpdatePars(); 85 | 86 | if(batch_idx == this->_model_component->_num_train_batch-1){ 87 | cout << "----------epoch_idx: " << epoch_idx << "-----------\n"; 88 | cout << "training likelihood: " << this->_likelihood << endl; 89 | cout << "classification training accuarcy: " << 1-(float)this->_error/ \ 90 | (this->_model_component->_num_train_batch \ 91 | *this->_model_component->getMinibatchSize()) << endl; 92 | Matrix* train_record = last_layer->getResultRecord(); 93 | train_record->showValue("train record"); 94 | 95 | this->_likelihood = 0; 96 | this->_error = 0; 97 | 98 | last_layer->setRecordToZero(); 99 | 100 | for(int valid_idx = 0; \ 101 | valid_idx < this->_model_component->_num_valid_batch; \ 102 | valid_idx++){ 103 | 104 | this->_load_layer->loadValidOneBatch( valid_idx, \ 105 | h_mini_pixel, h_mini_label); 106 | this->_model_component->_mini_data->copyFromHost(h_mini_pixel, \ 107 | pixel_len); 108 | this->_model_component->_mini_label->copyFromHost(h_mini_label, \ 109 | label_len); 110 | 111 | this->forwardPropagate(); 112 | forwardLastLayer(); 113 | 114 | } 115 | Matrix* valid_record = last_layer->getResultRecord(); 116 | valid_record->showValue("valid record"); 117 | 118 | cout << "validation likelihood: " << this->_likelihood << endl; 119 | cout << "classification valid accuarcy: " << 1-(float)this->_error/ \ 120 | (this->_model_component->_num_valid_batch \ 121 | *this->_model_component->getMinibatchSize()) << endl; 122 | 123 | 124 | } 125 | } 126 | 127 | t = clock() - t; 128 | cout << ((float)t/CLOCKS_PER_SEC) << "s.\n"; 129 | t = clock(); 130 | 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /dl/src/train_model.cpp: -------------------------------------------------------------------------------- 1 | /// 2 | /// \file train_model.cpp 3 | /// @brief 4 | 5 | 6 | #include 7 | #include 8 | #include 9 | #include "train_model.hpp" 10 | #include "json/json.h" 11 | #include "inner_product_layer.hpp" 12 | #include "logistic.hpp" 13 | #include "sigmoid_layer.hpp" 14 | #include "relu_layer.hpp" 15 | #include "convnet.hpp" 16 | #include "pooling_layer.hpp" 17 | #include "dropout_layer.hpp" 18 | 19 | using namespace std; 20 | 21 | template 22 | TrainModel::TrainModel(bool has_valid, bool is_test){ 23 | _model_component = new ModelComponent(); 24 | _likelihood = 0; 25 | _is_stop = false; 26 | _has_valid = has_valid; 27 | _is_test = is_test; 28 | if(has_valid) 29 | _num_data_type = 2; 30 | else 31 | _num_data_type = 1; 32 | } 33 | 34 | template 35 | TrainModel::~TrainModel() { 36 | delete _model_component; 37 | delete _load_layer; 38 | } 39 | 40 | template 41 | void TrainModel::parseNetJson(string json_file) { 42 | Json::Reader reader; 43 | Json::Value root; 44 | ifstream fin(json_file.c_str()); 45 | if (reader.parse(fin, root)) { 46 | _model_component->_minibatch_size = root["minibatch_size"].asInt(); 47 | Param::setMinibatchSize(_model_component->_minibatch_size); 48 | 49 | _model_component->_num_epoch = root["num_epoch"].asInt(); 50 | _model_component->_img_height = root["img_height"].asInt(); 51 | _model_component->_img_width = root["img_width"].asInt(); 52 | _model_component->_img_channel = root["img_channel"].asInt(); 53 | 54 | cout << "\n===========overall==============" \ 55 | << "\nnum_epoch: " << _model_component->_num_epoch \ 56 | << "\nbatchSize: " << _model_component->_minibatch_size; 57 | 58 | 59 | _model_component->_num_layers = root["layer"].size(); 60 | 61 | string layer_type, name; 62 | int pad_height, pad_width, stride_height, stride_width; 63 | int filter_height, filter_width, filter_channel, num_out, num_in; 64 | float w_lr, bias_lr, momentum, weight_decay, w_gauss; 65 | string p_type; 66 | Param* param; 67 | 68 | for (int i = 0; i < _model_component->_num_layers; ++i) { 69 | layer_type = root["layer"][i]["type"].asString(); 70 | name = root["layer"][i]["name"].asString(); 71 | if (!root["layer"][i]["filter_height"].isNull()) { 72 | pad_height = root["layer"][i]["pad_height"].asInt(); 73 | pad_width = root["layer"][i]["pad_width"].asInt(); 74 | stride_height = root["layer"][i]["stride_height"].asInt(); 75 | stride_width = root["layer"][i]["stride_width"].asInt(); 76 | filter_height = root["layer"][i]["filter_height"].asInt(); 77 | filter_width = root["layer"][i]["filter_width"].asInt(); 78 | } 79 | if (!root["layer"][i]["w_lr"].isNull()) { 80 | w_lr = root["layer"][i]["w_lr"].asFloat(); 81 | bias_lr = root["layer"][i]["bias_lr"].asFloat(); 82 | momentum = root["layer"][i]["momentum"].asFloat(); 83 | weight_decay = root["layer"][i]["weight_decay"].asFloat(); 84 | w_gauss = root["layer"][i]["w_gauss"].asFloat(); 85 | } 86 | if (!root["layer"][i]["num_out"].isNull()) { 87 | num_out = root["layer"][i]["num_out"].asInt(); 88 | } 89 | if (!root["layer"][i]["num_in"].isNull()) { 90 | num_in = root["layer"][i]["num_in"].asInt(); 91 | } 92 | if (!root["layer"][i]["pool_type"].isNull()) { 93 | p_type = root["layer"][i]["pool_type"].asString(); 94 | } 95 | if (!root["layer"][i]["filter_channel"].isNull()) { 96 | filter_channel = root["layer"][i]["filter_channel"].asInt(); 97 | }else{ 98 | filter_channel = 0; 99 | } 100 | if (layer_type == "CONVOLUTION") { 101 | if (_model_component->_layers_param.size() == 0) { 102 | param = new ConvParam( \ 103 | _model_component->_string_map_layertype[layer_type], \ 104 | name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \ 105 | _model_component->_img_height, _model_component->_img_width, \ 106 | pad_height, pad_width, stride_height, stride_width, \ 107 | _model_component->_img_channel, filter_height, \ 108 | filter_width, filter_channel); 109 | } else{ 110 | param = new ConvParam( \ 111 | _model_component->_string_map_layertype[layer_type], \ 112 | name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \ 113 | pad_height, pad_width, stride_height, stride_width, \ 114 | filter_height, filter_width, filter_channel, \ 115 | dynamic_cast( \ 116 | _model_component->_layers_param.back())); 117 | } 118 | } else if (layer_type == "POOLING") { 119 | param = new PoolParam( \ 120 | _model_component->_string_map_layertype[layer_type], \ 121 | name, pad_height, pad_width, stride_height, stride_width, \ 122 | filter_height, filter_width, 0, \ 123 | dynamic_cast( \ 124 | _model_component->_layers_param.at( \ 125 | _model_component->_layers_param.size() - 2)), \ 126 | _model_component->_string_map_pooltype[p_type]); 127 | } else if (layer_type == "SIGMOID" || layer_type == "RECTIFIED" \ 128 | || layer_type == "SOFTMAX" || layer_type == "DROPOUT") { 129 | param = new FullConnectParam( \ 130 | _model_component->_string_map_layertype[layer_type], \ 131 | name, 0, _model_component->_layers_param.back()); 132 | } else if (layer_type == "INNERPRODUCT" ) { 133 | if (_model_component->_layers_param.size() == 0) { 134 | num_in = _model_component->_img_height \ 135 | * _model_component->_img_width \ 136 | * _model_component->_img_channel; 137 | param = new InnerParam( \ 138 | _model_component->_string_map_layertype[layer_type], \ 139 | name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \ 140 | num_in, num_out); 141 | }else{ 142 | param = new InnerParam( \ 143 | _model_component->_string_map_layertype[layer_type], \ 144 | name, w_lr, bias_lr, momentum, weight_decay, w_gauss, \ 145 | num_out, _model_component->_layers_param.back()); 146 | } 147 | } else if(layer_type == "PREDICTOBJECT"){ 148 | param = new FullConnectParam( \ 149 | _model_component->_string_map_layertype[layer_type], \ 150 | name, 0, _model_component->_layers_param.back()); 151 | } else if(layer_type == "RECOMMENDSUBSTITUE"){ 152 | param = new FullConnectParam( \ 153 | _model_component->_string_map_layertype[layer_type], \ 154 | name, num_out, _model_component->_layers_param.back()); 155 | } else if(layer_type == "RECOMMENDCOMPATIBLE"){ 156 | param = new FullConnectParam( \ 157 | _model_component->_string_map_layertype[layer_type], \ 158 | name, num_out, _model_component->_layers_param.back()); 159 | } 160 | param->printParam(); 161 | _model_component->_layers_param.push_back(param); 162 | 163 | if (param->getParamTrainType() == NEED) { 164 | _model_component->_layers_need_train_param.push_back(param); 165 | _model_component->_num_need_train_layers++; 166 | } 167 | } 168 | } 169 | _model_component->_one_img_len = _model_component->_img_width \ 170 | *_model_component->_img_height \ 171 | *_model_component->_img_channel; 172 | } 173 | 174 | template 175 | void TrainModel::createLayer(){ 176 | cout << _model_component->_num_layers << endl; 177 | for (int i = 0; i < _model_component->_num_layers; ++i){ 178 | Layer *layer; 179 | Param *param = _model_component->_layers_param[i]; 180 | try{ 181 | if (param->getLayerType() == CONVOLUTION) { 182 | LocalConnectParam* lcp = dynamic_cast(param); 183 | if(lcp == NULL) 184 | throw 5; 185 | layer = new ConvNet(dynamic_cast(lcp)); 186 | } else if (param->getLayerType() == POOLING) { 187 | layer = new PoolingLayer(dynamic_cast(param)); 188 | } else if (param->getLayerType() == SIGMOID) { 189 | layer = new SigmoidLayer(dynamic_cast(param)); 190 | } else if (param->getLayerType() == RECTIFIED) { 191 | layer = new ReluLayer(dynamic_cast(param)); 192 | } else if (param->getLayerType() == SOFTMAX) { 193 | layer = new Logistic(dynamic_cast(param)); 194 | } else if (param->getLayerType() == DROPOUT) { 195 | layer = new DropoutLayer(dynamic_cast(param)); 196 | } else if (param->getLayerType() == INNERPRODUCT ) { 197 | FullConnectParam* fcp = dynamic_cast(param); 198 | layer = new InnerProductLayer(dynamic_cast(fcp)); 199 | } 200 | }catch(int e){ 201 | cout << "dynamic point is null\n"; 202 | } 203 | 204 | layer->initCuda(); 205 | _model_component->_layers.push_back(layer); 206 | 207 | if (param->getParamTrainType() == NEED) { 208 | _model_component->_layers_needed_train.push_back(layer); 209 | } 210 | } 211 | } 212 | 213 | template 214 | void TrainModel::createWBias() { 215 | for (int i = 0; i < _model_component->getNumNeedTrainLayers(); ++i) { 216 | TrainLayer* tl = dynamic_cast*>( \ 217 | _model_component->_layers_needed_train[i]); 218 | _model_component->_w.push_back(tl->getW()); 219 | _model_component->_bias.push_back(tl->getBias()); 220 | _model_component->_w_len.push_back(tl->getW()->getNumEles()); 221 | _model_component->_bias_len.push_back(tl->getBias()->getNumEles()); 222 | } 223 | } 224 | 225 | template 226 | void TrainModel::createYDEDY() { 227 | _model_component->_y.push_back(_model_component->_mini_data); 228 | _model_component->_y_needed_train.push_back(_model_component->_mini_data); 229 | for (int i = 0; i < _model_component->_num_layers; ++i){ 230 | _model_component->_y.push_back( \ 231 | _model_component->_layers[i]->getY()); 232 | _model_component->_dE_dy.push_back( \ 233 | _model_component->_layers[i]->getDEDY()); 234 | if (_model_component->_layers_param[i]->getParamTrainType() == NEED \ 235 | && i > 0) { 236 | ///> 为了反向对weight和bias求导时要用到 237 | _model_component->_y_needed_train.push_back( \ 238 | _model_component->_layers[i-1]->getY()); 239 | } 240 | } 241 | } 242 | 243 | template 244 | void TrainModel::initWeightByRandom() { 245 | 246 | srand((unsigned)time(NULL)); 247 | for (int k = 0; k < _model_component->_num_need_train_layers; ++k) { 248 | gaussRand(_model_component->_w[k], \ 249 | dynamic_cast( \ 250 | _model_component->_layers_need_train_param[k])->getWGauss()); 251 | cudaMemset(_model_component->_bias[k]->getDevData(), 0, \ 252 | sizeof(float) * _model_component->_bias_len[k]); 253 | } 254 | } 255 | 256 | template 257 | void TrainModel::initWeightByFile(vector w_file, \ 258 | vector bias_file) { 259 | for (int k = 0; k < _model_component->_num_need_train_layers; ++k) { 260 | _model_component->_w[k]->readPars(w_file[k]); 261 | _model_component->_bias[k]->readPars(bias_file[k]); 262 | } 263 | } 264 | 265 | template 266 | void TrainModel::forwardPropagate(){ 267 | for (int k = 0; k < _model_component->_num_layers-1; ++k) { 268 | _model_component->_layers[k]->computeOutput(\ 269 | _model_component->_y[k]); 270 | } 271 | } 272 | 273 | template 274 | void TrainModel::backwardPropagate(){ 275 | for (int k = _model_component->_num_layers-2; k > 0; --k) { 276 | _model_component->_layers[k]->computeDerivsOfInput( \ 277 | _model_component->_dE_dy[k-1]); 278 | } 279 | } 280 | 281 | template 282 | void TrainModel::computeAndUpdatePars(){ 283 | for (int k = _model_component->_num_need_train_layers-1; k >= 0; --k) { 284 | TrainLayer *tl = dynamic_cast< TrainLayer* >( \ 285 | _model_component->_layers_needed_train[k]); 286 | tl->computeDerivsOfPars(_model_component->_y_needed_train[k]); 287 | tl->updatePars(); 288 | } 289 | } 290 | 291 | template 292 | void TrainModel::earlyStopping(int epoch_idx) { 293 | if(_strip_likelihood.size() == 0){ 294 | _min_likelihood = _likelihood; 295 | _min_error = _error; 296 | _min_epoch = epoch_idx; 297 | _strip_likelihood.push_back(_likelihood); 298 | }else if(_strip_likelihood.size() < _num_strip){ 299 | _strip_likelihood.push_back(_likelihood); 300 | if(_min_likelihood > _likelihood){ 301 | _min_likelihood = _likelihood; 302 | _min_error = _error; 303 | _min_epoch = epoch_idx; 304 | } 305 | }else if(_strip_likelihood.size() == _num_strip){ 306 | if(_min_likelihood > _likelihood){ 307 | _min_likelihood = _likelihood; 308 | _min_error = _error; 309 | _min_epoch = epoch_idx; 310 | } 311 | _strip_likelihood.erase(_strip_likelihood.begin()); 312 | _strip_likelihood.push_back(_likelihood); 313 | 314 | double tmp = 0; 315 | 316 | vector::iterator min_value = min_element(_strip_likelihood.begin(), _strip_likelihood.end()); 317 | 318 | double generalization_loss = 100*(_likelihood/_min_likelihood - 1); 319 | double progress_loss = 1000 * (tmp / (_num_strip*(*min_value)) - 1); 320 | 321 | cout << generalization_loss << ":" << progress_loss << endl; 322 | 323 | if(generalization_loss / progress_loss > 0.8) 324 | _is_stop = true; 325 | }else{ 326 | cerr << "early Stopping parameters are wrong." << endl; 327 | exit(EXIT_FAILURE); 328 | } 329 | } 330 | 331 | 332 | 333 | 334 | 335 | 336 | -------------------------------------------------------------------------------- /dl/src/utils.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "utils.cuh" 3 | 4 | using namespace std; 5 | 6 | void printTime(clock_t &t, string s){ 7 | t = clock() - t; 8 | cout << "\n"<< s << ": " << ((float)t/CLOCKS_PER_SEC) << " s."; 9 | t = clock(); 10 | } 11 | 12 | void initW(Matrix* nvMat){ 13 | int length = nvMat->getNumRows() * nvMat->getNumCols(); 14 | float* a = new float[length]; 15 | srand((unsigned)time(NULL)); 16 | float bound = sqrt(1.0 / length); 17 | for(int i = 0; i < length; i++){ 18 | int k = rand() % 200; 19 | if(k < 100) 20 | a[i] = (k/100.0)*(-bound); 21 | else 22 | a[i] = ((k - 100)/100.0)*bound; 23 | } 24 | nvMat->copyFromHost(a, length); 25 | delete a; 26 | } 27 | 28 | void gaussRand(Matrix* nvMat, float var, float mean){ 29 | int length = nvMat->getNumRows() * nvMat->getNumCols(); 30 | float* a = new float[length]; 31 | // std::default_random_engine generator; 32 | // std::normal_distribution distribution(mean, var); 33 | 34 | for(int i = 0; i < length; i++){ 35 | // float k = distribution(generator); 36 | if(var == 0) 37 | a[i] = 0.0f; 38 | else 39 | a[i] = gaussGen(var, mean); 40 | } 41 | nvMat->copyFromHost(a, length); 42 | delete a; 43 | } 44 | 45 | void gaussRand(float *w, int length, float var, float mean){ 46 | // std::default_random_engine generator; 47 | // std::normal_distribution distribution(mean, var); 48 | 49 | for(int i = 0; i < length; i++){ 50 | // float k = distribution(generator); 51 | if(var == 0) 52 | w[i] = 0.0f; 53 | else 54 | w[i] = gaussGen(var, mean); 55 | } 56 | } 57 | 58 | float gaussGen(float var, float mean) 59 | { 60 | static float V1, V2, S; 61 | static int phase = 0; 62 | float X; 63 | 64 | if ( phase == 0 ) { 65 | do { 66 | float U1 = (float)rand() / RAND_MAX; 67 | float U2 = (float)rand() / RAND_MAX; 68 | 69 | V1 = 2 * U1 - 1; 70 | V2 = 2 * U2 - 1; 71 | S = V1 * V1 + V2 * V2; 72 | } while(S >= 1 || S == 0); 73 | 74 | X = V1 * sqrt(-2 * log(S) / S); 75 | } else 76 | X = V2 * sqrt(-2 * log(S) / S); 77 | 78 | phase = 1 - phase; 79 | 80 | return (X * var + mean); 81 | } 82 | 83 | void readData(Matrix* nvData, string filename, \ 84 | bool isData, int addZerosInFront){ 85 | int length = nvData->getNumRows() * nvData->getNumCols(); 86 | ifstream fin(filename.c_str(), ios::binary); 87 | float* data = new float[length]; 88 | char* readData = new char[length]; 89 | fin.read(readData + addZerosInFront, length - addZerosInFront); 90 | for(int i = 0; i < length; i++){ 91 | if(i < addZerosInFront) 92 | readData[i] = 0; 93 | unsigned char tmp = readData[i]; 94 | if(isData){ 95 | data[i] = (int)tmp / 255.0; 96 | } 97 | else 98 | data[i] = (int)tmp; 99 | } 100 | nvData->copyFromHost(data, length); 101 | fin.close(); 102 | delete data; 103 | delete readData; 104 | } 105 | 106 | -------------------------------------------------------------------------------- /dl/test/test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include "blob.cuh" 3 | 4 | int main(){ 5 | std::cout << "hi" << std::endl; 6 | } 7 | -------------------------------------------------------------------------------- /guichuideng/12345vs678.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/12345vs678.png -------------------------------------------------------------------------------- /guichuideng/1234678.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/1234678.png -------------------------------------------------------------------------------- /guichuideng/1234vs5678.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/1234vs5678.png -------------------------------------------------------------------------------- /guichuideng/12578vs346.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/12578vs346.png -------------------------------------------------------------------------------- /guichuideng/125vs34678.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/125vs34678.png -------------------------------------------------------------------------------- /guichuideng/125vs34vs678.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/125vs34vs678.png -------------------------------------------------------------------------------- /guichuideng/README: -------------------------------------------------------------------------------- 1 | 这个代码是为了实现将鬼吹灯中出现的介词、副词、助词当做特征字,统计每一万字它们出现的个数,并利用后续的分析来看鬼吹灯系列在写的过程中是否存在写作方式改变。 2 | 3 | 需要自己下载鬼吹灯的txt,并且将8本分别保存在section文件夹中,以1_1.txt、1_2.txt...2_4.txt的命名方式保存。 4 | 5 | feature_count.py, 这个文件统计了每个特征字在每一万字中出现次数,count/1_1_1_feature_count.txt对应的是每一万字中统计结果。同时将结果保存在二进制文件中方便后续读取。保存的格式是哪本书-哪一万字-601长度向量 6 | 7 | reduction.py,实现了用pca/tsne降维并显示 8 | 9 | anaylse.py,直接统计每个词出现次数的折线图 10 | 11 | lr.py,实现了逻辑回归用于二分类 12 | -------------------------------------------------------------------------------- /guichuideng/anaylse.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -* 3 | import codecs 4 | import numpy as np 5 | import pickle 6 | import itertools 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | file_read = open('input_features.bin', 'rb') 11 | s = file_read.read() 12 | input_features = pickle.loads(s) 13 | file_read.close() 14 | 15 | X = [] 16 | num_1 = 0 17 | for section_id in input_features: 18 | if section_id < 4: 19 | num_1 += len(input_features[section_id]) 20 | X.append(input_features[section_id]) 21 | 22 | Y = np.array(list(itertools.chain.from_iterable(X))) 23 | 24 | idx = np.linspace(0, len(Y[0])-1, num=len(Y[0]), dtype=np.int) 25 | np.random.shuffle(Y[num_1:]) 26 | 27 | print num_1 28 | 29 | for i in range(10): 30 | plt.plot(idx, Y[i+num_1]) 31 | plt.xlabel('Feature ID') 32 | plt.ylabel('Feature Count') 33 | plt.title('5~8 Feature Appearence Frequency') 34 | plt.show() 35 | 36 | -------------------------------------------------------------------------------- /guichuideng/feature.txt: -------------------------------------------------------------------------------- 1 | 乃 乌 乍 了 一 万 无 不 专 2 | 业 东 且 世 两 习 也 乱 举 3 | 公 共 其 具 勿 匆 决 况 净 4 | 历 分 初 刚 划 列 则 别 刬 5 | 剩 兀 允 光 先 兜 亏 互 亘 6 | 亟 匪 匿 阳 阴 阿 除 陡 险 7 | 都 隐 兹 兼 几 凡 即 却 再 8 | 罔 力 加 务 动 劣 勤 从 今 9 | 会 佥 仅 仍 休 但 何 侪 便 10 | 俄 俪 侵 信 俶 倒 健 俱 倏 11 | 假 偶 偏 偷 偕 傍 傥 傻 全 12 | 单 卒 南 亢 交 亦 亲 亶 讫 13 | 讵 许 识 诚 该 试 询 诮 诺 14 | 谛 谟 又 及 反 取 叠 芴 茀 15 | 苟 苦 荐 莫 蓦 蔑 径 很 徒 16 | 得 微 迄 还 近 连 迭 迥 逆 17 | 适 递 通 造 逐 逼 遂 逾 遽 18 | 寻 将 大 夫 太 奉 奇 奈 奄 19 | 飞 干 平 并 幸 巨 巧 左 差 20 | 弥 强 底 庚 庶 庸 廑 已 希 21 | 常 可 叵 只 合 各 同 向 否 22 | 咋 哪 咸 哿 唯 啻 善 嗣 嘣 23 | 噎 驯 骊 骎 骤 间 阑 阖 宁 24 | 安 定 审 实 宛 宜 害 容 宿 25 | 寔 寝 寡 好 妄 姑 姗 始 委 26 | 娄 犹 独 狠 猝 猛 岂 岗 崭 27 | 尽 层 展 屡 饱 才 扔 扩 挺 28 | 捴 擅 汔 沉 泛 没 浑 活 洒 29 | 洵 浸 浪 混 渐 深 滋 滚 溘 30 | 滥 溜 满 漫 潜 约 纯 终 给 31 | 绝 统 绷 缕 在 坏 坚 均 垂 32 | 填 增 固 多 少 尚 忝 尝 快 33 | 怫 怪 恒 恍 恰 恬 恂 惟 慌 34 | 愣 慎 慢 憬 尤 就 备 复 夐 35 | 子 孔 孛 财 赆 贼 赖 比 毕 36 | 焉 煞 长 较 辄 死 殆 殊 斗 37 | 危 方 旅 旋 风 成 或 所 烂 38 | 既 斩 断 老 毫 本 未 权 杀 39 | 杂 极 条 果 枚 枉 棐 概 横 40 | 特 改 放 故 敢 欻 日 早 时 41 | 昆 明 是 晃 暂 暗 暴 手 拜 42 | 永 毋 必 忒 忽 总 恶 恚 恐 43 | 恣 悉 愈 憙 更 曷 曾 最 朅 44 | 有 肯 朋 胡 胜 胥 脱 腾 臆 45 | 止 正 此 白 的 皆 登 甚 私 46 | 稍 稀 稔 立 竟 端 竭 盍 益 47 | 盛 盗 盖 每 直 相 真 睋 瞥 48 | 痛 生 砀 确 硬 碜 磕 申 畅 49 | 略 率 究 空 窃 突 窘 甫 蚤 50 | 蛮 聊 良 虚 类 粗 精 紧 素 51 | 綦 齐 舒 覃 覆 行 翻 肆 肇 52 | 至 致 笃 第 等 簇 自 臭 重 53 | 身 躬 豫 酣 酷 貌 赵 起 越 54 | 足 跃 踽 非 雅 魆 首 黕 默 55 | 黩 齁 顾 须 颇 顶 顿 频 顺 56 | 裁 57 | 临 乎 与 为 共 冲 到 兜 于 58 | 即 从 以 似 假 去 让 诸 及 59 | 往 迆 连 迎 道 遵 对 导 寻 60 | 将 当 叫 吃 合 同 向 和 问 61 | 如 尽 打 执 把 投 拦 按 捉 62 | 洎 给 维 缘 在 因 惟 就 比 63 | 照 较 方 爿 暨 拿 替 望 朝 64 | 爰 直 由 率 被 用 繇 齐 至 65 | 管 自 起 趁 践 跟 66 | 么 了 与 不 且 之 为 兮 其 67 | 到 云 阿 却 个 以 们 价 似 68 | 讫 诸 取 若 得 逝 将 夫 头 69 | 只 吗 向 吧 呗 呃 呀 员 呵 70 | 呢 哇 咦 哟 哉 啊 哩 啵 唻 71 | 啰 唯 嘛 噬 嚜 家 如 掉 给 72 | 维 圪 在 尔 惟 子 赊 焉 然 73 | 旃 所 见 斯 者 来 欤 是 毋 74 | 曰 的 每 看 着 矣 罢 而 耶 75 | 粤 聿 等 言 越 馨 76 | 77 | -------------------------------------------------------------------------------- /guichuideng/feature_count.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -* 3 | import codecs 4 | import numpy 5 | import jieba 6 | import pickle 7 | 8 | def get_feature_word(): 9 | ''' 10 | 切分feature文件用于生成关键字 11 | ''' 12 | feature_file = codecs.open('feature.txt', 'r') 13 | content = feature_file.read() 14 | feature_file.close() 15 | 16 | segments = [] 17 | segs = jieba.cut(content) 18 | 19 | #保存feature关键字 20 | feature_word = [] 21 | for seg in segs: 22 | if seg != '\n' and seg != '\t' and seg != ' ': 23 | feature_word.append(seg) 24 | 25 | 26 | return feature_word 27 | 28 | class FeatureCount: 29 | feature_count = {} 30 | def __init__(self, feature_word): 31 | #初始化每个关键字出现次数为0 32 | print len(feature_word) 33 | for i in range(len(feature_word)): 34 | self.feature_count[feature_word[i]] = 0 35 | print len(self.feature_count) 36 | 37 | def clear(self): 38 | for feature in self.feature_count: 39 | self.feature_count[feature] = 0 40 | 41 | def get_input_feature_from_one_section(section_name, fc): 42 | ''' 43 | 切分一部书的内容,同时统计上面的每个关键字出现的次数 44 | ''' 45 | section_file = codecs.open('section/'+section_name+'.txt', 'r') 46 | content = section_file.read() 47 | section_file.close() 48 | 49 | segments = [] 50 | segs = jieba.cut(content) 51 | 52 | #统计每一万个字中每个关键字出现次数 53 | input_feature = [] 54 | #用于计算是否到达一万字 55 | i = 0 56 | j = 0 57 | #用于保存每一万字文本 58 | c = '' 59 | 60 | for seg in segs: 61 | c += seg 62 | j += 1 63 | if seg != '\n' and seg != '\t' and seg != ' ': 64 | if seg in fc.feature_count: 65 | fc.feature_count[seg] += 1 66 | i += 1 67 | 68 | if i % 10000 == 0 or seg == object(): 69 | input_feature.append(fc.feature_count.values()) 70 | 71 | #保存这一万字中关键词出现次数 72 | output = codecs.open('count/'+section_name+'_' 73 | +str(i/10000)+'_feature_count.txt','w','utf-8') 74 | for feature in fc.feature_count: 75 | output.write(feature+'\t'+str(fc.feature_count[feature])+'\n') 76 | fc.clear() 77 | 78 | #保持一万字文本,这里制表符和换行符没有计算在内 79 | output = codecs.open('segment/'+section_name+'_' 80 | +str(i/10000)+'.txt','w','utf-8') 81 | output.write(c) 82 | c = '' 83 | 84 | print section_name, i, j, len(input_feature) 85 | 86 | return input_feature 87 | 88 | feature_word = get_feature_word() 89 | 90 | fc = FeatureCount(feature_word) 91 | 92 | sections = ['1_1', '1_2', '1_3', '1_4', '2_1', '2_2', '2_3', '2_4'] 93 | input_features = {} 94 | for i in range(len(sections)): 95 | input_features[i] = get_input_feature_from_one_section(sections[i], fc) 96 | 97 | #将逻辑回归输入先保存下来 98 | input_file = open('input_features.bin', 'wb') 99 | s = pickle.dumps(input_features) 100 | input_file.write(s) 101 | input_file.close() 102 | 103 | -------------------------------------------------------------------------------- /guichuideng/freq1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/freq1.png -------------------------------------------------------------------------------- /guichuideng/freq2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenrudan/deep-learning/4f3363acffadb7ed5c4a6b2d2454ef76003e5fe9/guichuideng/freq2.png -------------------------------------------------------------------------------- /guichuideng/lr.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # -*- coding: utf-8 -* 4 | import mxnet as mx 5 | import numpy as np 6 | import pickle 7 | import itertools 8 | 9 | def get_train_val(): 10 | 11 | pos_section = np.array([0,1,6,7]) 12 | 13 | file_read = open('input_features.bin', 'rb') 14 | s = file_read.read() 15 | input_features = pickle.loads(s) 16 | file_read.close() 17 | 18 | X = [] 19 | for section_id in input_features: 20 | X.append(input_features[section_id]) 21 | 22 | X = np.array(list(itertools.chain.from_iterable(X))) 23 | 24 | Y = [] 25 | # 26 | for section_id in input_features: 27 | for i in range(len(input_features[section_id])): 28 | if section_id in pos_section: 29 | Y.append(1) 30 | else: 31 | Y.append(0) 32 | Y = np.array(Y) 33 | 34 | idx = np.linspace(0, len(Y)-1, num=len(Y), dtype=np.int) 35 | np.random.shuffle(idx) 36 | idx = idx[:87] 37 | 38 | 39 | train_label = Y[idx] 40 | train_data = X[idx] 41 | val_label = np.delete(Y, idx) 42 | val_data = np.delete(X, idx, axis=0) 43 | 44 | return train_label, train_data, val_label, val_data 45 | 46 | train_label, train_data, val_label, val_data = get_train_val() 47 | 48 | print 'train_data:', train_data.shape 49 | print 'train_label:', train_label.shape 50 | print 'val_data:', val_data.shape 51 | print 'val_label:', val_label.shape 52 | 53 | batch_size = 3 54 | train_iter = mx.io.NDArrayIter(train_data, train_label, batch_size) 55 | val_iter = mx.io.NDArrayIter(val_data, val_label, batch_size) 56 | 57 | import logging 58 | logging.getLogger().setLevel(logging.DEBUG) 59 | 60 | data = mx.sym.Variable('data') 61 | fc = mx.sym.FullyConnected(data=data, name='fc', num_hidden=2) 62 | lr = mx.sym.SoftmaxOutput(data=fc, name='softmax') 63 | 64 | model = mx.model.FeedForward(symbol=lr, num_epoch=100, 65 | learning_rate=0.01) 66 | 67 | model.fit(X = train_iter, eval_data=val_iter, 68 | batch_end_callback = mx.callback.Speedometer(batch_size, 10)) 69 | i = 0 70 | j = 0 71 | m = 0 72 | n = 0 73 | for k in range(39): 74 | if model.predict(val_data)[k].argmax() == 1 and val_label[k] == 1: 75 | i += 1 76 | elif model.predict(val_data)[k].argmax() == 0 and val_label[k] == 1: 77 | j += 1 78 | elif model.predict(val_data)[k].argmax() == 1 and val_label[k] == 0: 79 | m += 1 80 | elif model.predict(val_data)[k].argmax() == 0 and val_label[k] == 0: 81 | n += 1 82 | print '\tPredict 1\tPredict 0' 83 | print 'True 1\t',i,'\t\t',j 84 | print 'True 0\t',m,'\t\t',n 85 | -------------------------------------------------------------------------------- /guichuideng/reduction.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -* 3 | import numpy as np 4 | from sklearn import decomposition, manifold 5 | import pickle 6 | import itertools 7 | import matplotlib.pyplot as plt 8 | import pylab 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | 12 | file_read = open('input_features.bin', 'rb') 13 | s = file_read.read() 14 | input_features = pickle.loads(s) 15 | file_read.close() 16 | 17 | high_dim_input = [] 18 | for section_id in input_features: 19 | high_dim_input.append(input_features[section_id]) 20 | 21 | high_dim_input = np.array(list(itertools.chain.from_iterable(high_dim_input))) 22 | 23 | labels = [] 24 | for section_id in input_features: 25 | for i in range(len(input_features[section_id])): 26 | labels.append(section_id) 27 | labels = np.array(labels) 28 | 29 | ''' 30 | 进行pca降维 31 | ''' 32 | pca = decomposition.PCA(n_components=2) 33 | #isomap = manifold.TSNE(n_components=2, init='pca', random_state=0) 34 | X_input = pca.fit_transform(high_dim_input) 35 | 36 | print 'Percentage of variance explained by each of the selected components:', pca.explained_variance_ratio_ 37 | 38 | colors = [ 39 | '#FC0E77', '#FC0E77', 40 | 'turquoise', 'turquoise', 41 | 'turquoise', 'turquoise', 42 | '#FC0E77', '#FC0E77' 43 | ] 44 | 45 | colors = ['#48A946', '#E55523', '#E5E223', '#23E5DF', '#F70DB4', '#0D77F7','#CD2E7C', '#F70D80'] 46 | markers = ['1', '2', '3', '4', '5', '6', '7', '8'] 47 | 48 | s = [] 49 | for color, i, marker in zip(colors, [0, 1, 2, 3, 4, 5, 6, 7], markers): 50 | s.append(plt.scatter(X_input[labels == i, 0], X_input[labels == i, 1], 51 | color=color, s=100, marker=r"${}$".format(marker))) 52 | plt.legend((s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]), 53 | ('1_1.txt', '1_2.txt', '1_3.txt', '1_4.txt', 54 | '2_1.txt', '2_2.txt', '2_3.txt', '2_4.txt'), loc='lower left') 55 | plt.title('1278 vs 3456') 56 | plt.show() 57 | -------------------------------------------------------------------------------- /rl/cartpole/policy_gradient.py: -------------------------------------------------------------------------------- 1 | 2 | import gym 3 | import numpy as np 4 | 5 | def generate_episode(env, weight): 6 | episode = [] 7 | pre_observation = env.reset() 8 | 9 | t = 0 10 | #generate 1 episodes for training. 11 | while 1: 12 | #env.render() 13 | pi, action = choose_action(weight, pre_observation) 14 | 15 | observation, reward, done, info = env.step(action) 16 | episode.append([pre_observation, action, pi, reward]) 17 | pre_observation = observation 18 | 19 | t += 1 20 | if done or t > 1000: 21 | break 22 | return episode 23 | 24 | def evaluate_given_parameter_sigmoid(env, weight): 25 | observation = env.reset() 26 | total_reward = 0. 27 | for t in range(1000): 28 | env.render() 29 | weighted_sum = np.dot(weight, observation) 30 | pi = 1 / (1 + np.exp(-weighted_sum)) 31 | if pi > 0.5: 32 | action = 1 33 | else: 34 | action = 0 35 | 36 | observation, reward, done, info = env.step(action) 37 | total_reward += reward 38 | if done: 39 | break 40 | return total_reward 41 | 42 | def monte_carlo_policy_gradient(env): 43 | 44 | learning_rate = -0.0001 45 | best_reward = -100.0 46 | 47 | weight = np.random.rand(4) 48 | 49 | for iiter in xrange(1000): 50 | 51 | cur_episode = generate_episode(env, weight) 52 | for t in range(len(cur_episode)): 53 | 54 | observation, action, pi, reward = cur_episode[t] 55 | 56 | #update theta 57 | weight += learning_rate*(1-pi)*np.transpose(-observation)*reward 58 | 59 | cur_reward = evaluate_given_parameter_sigmoid(env, weight) 60 | print 'Monte-Carlo policy gradient get reward', cur_reward 61 | 62 | def choose_action(weight, observation): 63 | 64 | weighted_sum = np.dot(weight, observation) 65 | pi = 1 / (1 + np.exp(-weighted_sum)) 66 | if pi > 0.5: 67 | action = 1 68 | else: 69 | action = 0 70 | return pi, action 71 | 72 | def actor_critic_policy_gradient(env): 73 | gamma = 1 74 | 75 | p_weight = np.random.rand(4) 76 | 77 | #weight for value function 78 | v_weight = np.random.rand(4) 79 | 80 | p_learning_rate = -0.0001 81 | v_learning_rate = -0.0001 82 | 83 | done = True 84 | 85 | for iiter in xrange(1000): 86 | 87 | t = 0 88 | while 1: 89 | if done: 90 | print 'start new training...' 91 | print 'p_weight', p_weight 92 | print 'v_weight', v_weight 93 | 94 | pre_observation = env.reset() 95 | pre_pi, pre_action = choose_action(p_weight, pre_observation) 96 | 97 | pre_phi = pre_observation 98 | pre_q = np.dot(v_weight, pre_phi) 99 | 100 | #env.render() 101 | 102 | observation, reward, done, info = env.step(pre_action) 103 | 104 | pi, action = choose_action(p_weight, observation) 105 | 106 | phi = observation 107 | q = np.dot(v_weight, phi) 108 | 109 | delta = reward + gamma*q - pre_q 110 | 111 | p_weight += p_learning_rate*(1-pre_pi)*np.transpose(-pre_observation)*pre_q 112 | 113 | v_weight += v_learning_rate*delta*np.transpose(pre_phi) 114 | 115 | pre_pi = pi 116 | pre_observation = observation 117 | pre_q = q 118 | pre_phi = phi 119 | pre_action = action 120 | 121 | t += 1 122 | if done: 123 | break 124 | 125 | cur_reward = evaluate_given_parameter_sigmoid(env, p_weight) 126 | print 'Actor critic policy gradient get reward', cur_reward 127 | 128 | env = gym.make('CartPole-v0') 129 | 130 | #env.monitor.start('cartpole-hill/', force=True) 131 | actor_critic_policy_gradient(env) 132 | #env.monitor.close() 133 | 134 | monte_carlo_policy_gradient(env) 135 | -------------------------------------------------------------------------------- /rl/cartpole/random_guess_hill_climbing.py: -------------------------------------------------------------------------------- 1 | 2 | import gym 3 | import numpy as np 4 | 5 | def evaluate_given_parameter_by_sign(env, weight): 6 | observation = env.reset() 7 | total_reward = 0. 8 | for t in range(1000): 9 | env.render() 10 | weighted_sum = np.dot(weight, observation) 11 | if weighted_sum >= 0: 12 | action = 1 13 | else: 14 | action = 0 15 | 16 | observation, reward, done, info = env.step(action) 17 | total_reward += reward 18 | if done: 19 | break 20 | return total_reward 21 | 22 | def random_guess(): 23 | env = gym.make('CartPole-v0') 24 | np.random.seed(10) 25 | best_reward = -100.0 26 | 27 | for iiter in xrange(1000): 28 | weight = np.random.rand(4) 29 | 30 | cur_reward = evaluate_given_parameter_by_sign(env, weight) 31 | if cur_reward > best_reward: 32 | best_reward = cur_reward 33 | best_weight = weight 34 | 35 | if best_reward == 1000: 36 | break 37 | 38 | print("Random guess algorithm best reward", best_reward) 39 | print("Random guess algorithm best weight", best_weight) 40 | 41 | def hill_climbing(): 42 | env = gym.make('CartPole-v0') 43 | best_reward = -100.0 44 | np.random.seed(10) 45 | best_weight = np.random.rand(4) 46 | 47 | for iiter in xrange(1000): 48 | weight = best_weight + np.random.normal(0, 0.01, 4) 49 | 50 | cur_reward = evaluate_given_parameter_by_sign(env, weight) 51 | if cur_reward > best_reward: 52 | best_reward = cur_reward 53 | best_weight = weight 54 | 55 | if best_reward == 1000: 56 | break 57 | 58 | print("Hill climbing algorithm best reward", best_reward) 59 | print("Hill climbing algorithm best weight", best_weight) 60 | 61 | random_guess() 62 | hill_climbing() 63 | -------------------------------------------------------------------------------- /rl/cartpole/upload.py: -------------------------------------------------------------------------------- 1 | 2 | import gym 3 | 4 | gym.upload('deep-learning/rl/cartpole-hill') 5 | -------------------------------------------------------------------------------- /tf_autoencoder/README.md: -------------------------------------------------------------------------------- 1 | This autoencoder.py implements the deep autoencoder network. Interfaces of the autoencoder is the same as sklearn's Manifold Learning. 2 | 3 | * fit(X) Fit the autoecoder network for data X 4 | * fit_transform(X) Fit the model from data in X and transform X. 5 | * get_params() Get parameters of this network. 6 | * reconstruction_error(X) Compute the reconstruction error for the data X. 7 | * set_params() Set the parameters which comes from saved file. 8 | * transform(X) Transform X. 9 | 10 | The test.py is an example for reducing the 28*28 mnist dataset images into 2 dimention and visualize it. 11 | 12 | I change some code from [Variational Autoencoder in TensorFlow](https://jmetzen.github.io/2015-11-27/vae.html) to this autoencoder network in tensorflow. 13 | -------------------------------------------------------------------------------- /tf_autoencoder/autoencoder.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import tensorflow as tf 4 | import time 5 | 6 | def xavier_init(fan_in, fan_out, constant=1): 7 | """ Xavier initialization of network weights""" 8 | low = -constant*np.sqrt(6.0/(fan_in + fan_out)) 9 | high = constant*np.sqrt(6.0/(fan_in + fan_out)) 10 | return tf.random_uniform((fan_in, fan_out), 11 | minval=low, maxval=high, 12 | dtype=tf.float32) 13 | 14 | class Autoencoder(object): 15 | """Initilize the autoencoder neural network 16 | 17 | Attributes: 18 | sess: the tensorflow session 19 | network_architecture: neural number of each layer 20 | transfer_fct: activation function, default is sigmoid 21 | lr: learning rate 22 | batch_size: 23 | """ 24 | def __init__(self, sess, network_architecture, 25 | transfer_fct=tf.nn.sigmoid, 26 | learning_rate=0.001, batch_size=100): 27 | """initlize the parameters and construct whole network""" 28 | 29 | self.network_architecture = network_architecture 30 | self.transfer_fct = transfer_fct 31 | self.lr = learning_rate 32 | self.batch_size = batch_size 33 | 34 | print "units number in each layer: " + str(self.network_architecture) 35 | print "learning rate: " + str(self.lr) 36 | print "batch size is: " + str(self.batch_size) 37 | 38 | #input of whole network 39 | self.x = tf.placeholder(tf.float32, [None, network_architecture[0]]) 40 | self.W = [] 41 | self.bias = [] 42 | 43 | self._create_network() 44 | 45 | self._create_loss_optimizer() 46 | 47 | init = tf.initialize_all_variables() 48 | 49 | self.sess =sess 50 | self.sess.run(init) 51 | 52 | self.saver = tf.train.Saver(tf.trainable_variables()) 53 | 54 | def _create_network(self): 55 | """according to the neural number in each layer, initilize 56 | weight and bias, then connect the whole net. 57 | 58 | """ 59 | self._create_forward(self.x) 60 | self._create_backward(self.y) 61 | 62 | def _create_forward(self, x): 63 | for i in xrange(1, len(self.network_architecture)): 64 | y = self._create_one_layer(x, self.network_architecture[i], i) 65 | x = y 66 | self.y = y 67 | 68 | def _create_backward(self, x): 69 | for i in xrange(len(self.network_architecture)-2, -1, -1): 70 | y = self._create_one_layer(x, self.network_architecture[i], i, is_encoder=False) 71 | x = y 72 | self.reconstruct_x = y 73 | 74 | def _create_one_layer(self, x, num_out, w_id, is_encoder=True): 75 | """construct one encoder or decoder layer 76 | 77 | Args: 78 | x: input of this layer 79 | num_out: neural number of this layer 80 | w_id: if this layer is decoder, weight of this layer comes from the saved weight list 81 | is_encoder: if True, create the new weight variable 82 | 83 | Returns: 84 | y: output of this layer 85 | """ 86 | if not is_encoder: 87 | weight = tf.transpose(self.W[w_id]) 88 | else: 89 | (batch_size, num_in) = tf.Tensor.get_shape(x).as_list() 90 | weight = tf.Variable(xavier_init(num_in, num_out)) 91 | self.W += [weight] 92 | 93 | bias = tf.Variable(tf.zeros([num_out], dtype=tf.float32)) 94 | self.bias += [bias] 95 | y = self.transfer_fct(tf.add(tf.matmul(x, weight), bias)) 96 | 97 | return y 98 | 99 | def _create_loss_optimizer(self): 100 | """construct the cost function 101 | 102 | reconstruction loss which comes from the cross entropy 103 | 104 | """ 105 | self.cost = -tf.reduce_sum(self.x * tf.log(1e-10 + self.reconstruct_x) 106 | + (1-self.x) * tf.log(1e-10 + 1 - self.reconstruct_x), 1) 107 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost) 108 | 109 | def partial_fit(self, X): 110 | """training one batch 111 | 112 | Args: 113 | X: input of this batch 114 | 115 | Returns: 116 | cost: cost of this batch 117 | 118 | """ 119 | opt, cost = self.sess.run((self.optimizer, self.cost), 120 | feed_dict={self.x: X}) 121 | return cost 122 | 123 | def set_params(self): 124 | self.saver.restore(self.sess, "model.ckpt") 125 | 126 | def reconstruction_error(self, X): 127 | return self.sess.run(self.cost, feed_dict={self.x: X}) 128 | 129 | def get_params(self): 130 | return self.W 131 | 132 | def fit(self, X, num_epochs=100): 133 | n_samples = len(X) 134 | total_batch = int(len(X)/self.batch_size) 135 | 136 | t = time.time() 137 | for epoch in xrange(num_epochs): 138 | avg_cost = 0.0 139 | for i in xrange(total_batch): 140 | batch_x = X[i*self.batch_size:(i+1)*self.batch_size] 141 | cost = self.partial_fit(batch_x) 142 | avg_cost += cost 143 | avg_cost = avg_cost / n_samples 144 | 145 | print "Epoch:" + str(epoch) + " cost=" + str(np.mean(avg_cost)) 146 | 147 | if epoch % 10 is 0: 148 | print 'until current epoch ' + str(epoch) +' cost: ' + str(time.time()-t) + ' s.' 149 | 150 | 151 | #save the net parameters 152 | self.saver.save(self.sess, "model.ckpt") 153 | 154 | def fit_transform(self, X, num_epochs=100): 155 | self.fit(X, num_epochs) 156 | return transform(X) 157 | 158 | def transform(self, X): 159 | """transform x 160 | """ 161 | return self.sess.run(self.y, feed_dict={self.x: X}) 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /tf_autoencoder/test.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | import tensorflow as tf 4 | import numpy as np 5 | import autoencoder 6 | 7 | 8 | from tensorflow.examples.tutorials.mnist import input_data 9 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 10 | n_samples = mnist.train.num_examples 11 | 12 | sess = tf.Session() 13 | 14 | np.random.seed(0) 15 | tf.set_random_seed(0) 16 | 17 | batch_size=100 18 | num_epochs = 50 19 | display_step=2 20 | network_architecture = [784, 500, 500, 2] 21 | 22 | ae = autoencoder.Autoencoder(sess, network_architecture, batch_size=batch_size) 23 | ae.fit(mnist.train.images, num_epochs) 24 | 25 | x_sample, y_sample = mnist.test.next_batch(5000) 26 | z_mu = ae.transform(x_sample) 27 | plt.figure(figsize=(8, 6)) 28 | plt.scatter(z_mu[:, 0], z_mu[:, 1], c=np.argmax(y_sample, 1)) 29 | plt.colorbar() 30 | plt.savefig('test_ae.png') 31 | 32 | 33 | 34 | --------------------------------------------------------------------------------