├── README.md ├── beginer_tutorial.py ├── beginner_tensorboard.py ├── expert_tutorial.py ├── extend_dataset.py ├── increase_picture.py ├── make_train_data_from_directory.py └── resize_photo.py /README.md: -------------------------------------------------------------------------------- 1 | # deeplearning_tool 2 | 3 | ## increase_picture.py 4 | tool for increaseing pictures for CNN classification accuracy 5 | -------------------------------------------------------------------------------- /beginer_tutorial.py: -------------------------------------------------------------------------------- 1 | from tensorflow.examples.tutorials.mnist import input_data 2 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | x = tf.placeholder(tf.float32, [None, 784]) 6 | 7 | W = tf.Variable(tf.zeros([784, 10])) 8 | b = tf.Variable(tf.zeros([10])) 9 | 10 | y = tf.nn.softmax(tf.matmul(x, W)+b) 11 | 12 | y_ = tf.placeholder(tf.float32, [None, 10]) 13 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 14 | 15 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 16 | 17 | init = tf.initialize_all_variables() 18 | sess = tf.Session() 19 | sess.run(init) 20 | 21 | for i in range(1000): 22 | batch_xs, batch_ys = mnist.train.next_batch(100) 23 | sess.run(train_step, feed_dict={x:batch_xs, y_:batch_ys}) 24 | 25 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) 26 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32")) 27 | 28 | print "train" + str(sess.run(accuracy, feed_dict={x:batch_xs, y_:batch_ys})) 29 | print "test" + str(sess.run(accuracy, feed_dict={x:mnist.test.images, y_:mnist.test.labels})) 30 | 31 | summary_writer = tf.train.SummaryWriter('data', graph_def=sess.graph_def) 32 | -------------------------------------------------------------------------------- /beginner_tensorboard.py: -------------------------------------------------------------------------------- 1 | from tensorflow.examples.tutorials.mnist import input_data 2 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 3 | 4 | import tensorflow as tf 5 | x = tf.placeholder(tf.float32, [None, 784]) 6 | W = tf.Variable(tf.zeros([784, 10])) 7 | b = tf.Variable(tf.zeros([10])) 8 | y = tf.nn.softmax(tf.matmul(x, W)+b) 9 | y_ = tf.placeholder(tf.float32, [None, 10]) 10 | 11 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 12 | 13 | # tensorboard 14 | w_hist = tf.histogram_summary("weights", W) 15 | b_hist = tf.histogram_summary("biases", b) 16 | y_hist = tf.histogram_summary("y", y) 17 | 18 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) 19 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32")) 20 | 21 | with tf.name_scope("train") as scope: 22 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 23 | acc_summary_train = tf.scalar_summary("train_acc", accuracy) 24 | loss_summary_train = tf.scalar_summary("cross_entropy_train", cross_entropy) 25 | 26 | 27 | with tf.name_scope("val") as scope: 28 | acc_summary_val = tf.scalar_summary("val_acc", accuracy) 29 | loss_summary_val = tf.scalar_summary("cross_entropy_val", cross_entropy) 30 | 31 | 32 | init = tf.initialize_all_variables() 33 | sess = tf.Session() 34 | sess.run(init) 35 | 36 | merged = tf.merge_all_summaries() 37 | writer = tf.train.SummaryWriter("data", sess.graph_def) 38 | 39 | for i in range(1000): 40 | batch_xs, batch_ys = mnist.train.next_batch(100) 41 | sess.run(train_step, feed_dict={x:batch_xs, y_:batch_ys}) 42 | 43 | if i%10==0: 44 | train_list = [accuracy, acc_summary_train, loss_summary_train, w_hist, b_hist, y_hist] 45 | result = sess.run(train_list, feed_dict={x:batch_xs, y_:batch_ys}) 46 | for j in range(1,len(result)): 47 | writer.add_summary(result[j], i) 48 | print("Train accuracy at step %s: %s" % (i, result[0])) 49 | 50 | val_list = [accuracy,acc_summary_val, loss_summary_val] 51 | result = sess.run(val_list, feed_dict={x:mnist.validation.images, y_:mnist.validation.labels}) 52 | for j in range(1,len(result)): 53 | writer.add_summary(result[j], i) 54 | 55 | print("Validation accuracy at step %s: %s" % (i, result[0])) 56 | 57 | 58 | #print "test" + str(sess.run(accuracy2, feed_dict={x:mnist.test.images, y_:mnist.test.labels})) 59 | 60 | -------------------------------------------------------------------------------- /expert_tutorial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from tensorflow.examples.tutorials.mnist import input_data 4 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 5 | 6 | import tensorflow as tf 7 | 8 | def weight_variable(shape): 9 | # 初期値が標準偏差0.1のテンソルを作成 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def bias_variable(shape): 14 | initial = tf.constant(0.1, shape=shape) 15 | return tf.Variable(initial) 16 | 17 | def conv2d(x, W): 18 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 19 | 20 | def max_pool_2x2(x): 21 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') 22 | 23 | 24 | sess = tf.InteractiveSession() 25 | 26 | x = tf.placeholder(tf.float32, shape=[None, 784]) 27 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 28 | 29 | 30 | W_conv1 = weight_variable([5,5,1,32]) 31 | b_conv1 = bias_variable([32]) 32 | 33 | x_image = tf.reshape(x,[-1,28,28,1]) 34 | 35 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1)+b_conv1) 36 | h_pool1 = max_pool_2x2(h_conv1) 37 | 38 | W_conv2 = weight_variable([5,5,32,64]) 39 | b_conv2 = bias_variable([64]) 40 | 41 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) +b_conv2) 42 | h_pool2 = max_pool_2x2(h_conv2) 43 | 44 | W_fc1 = weight_variable([7*7*64,1024]) 45 | b_fc1 = bias_variable([1024]) 46 | 47 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) 48 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) +b_fc1) 49 | 50 | keep_prob = tf.placeholder(tf.float32) 51 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 52 | 53 | W_fc2 = weight_variable([1024,10]) 54 | b_fc2 = bias_variable([10]) 55 | 56 | y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) 57 | 58 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) 59 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 60 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 61 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 62 | sess.run(tf.initialize_all_variables()) 63 | 64 | for i in range(20000): 65 | batch = mnist.train.next_batch(50) 66 | if i%10 ==0: 67 | train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_:batch[1], keep_prob:1.0}) 68 | print("step %d, training accuracy %g" %(i, train_accuracy)) 69 | train_step.run(feed_dict={x: batch[0], y_:batch[1], keep_prob:0.5}) 70 | 71 | print ("test accuracy %g" %accuracy.eval(feed_dict={x:mnist.test.images, y_:mnist.test.labels, keep_prob:1.0})) 72 | 73 | -------------------------------------------------------------------------------- /extend_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # usage: ./increase_picture.py image_directory distnation_directory 5 | # 6 | 7 | import cv2 8 | import numpy as np 9 | import sys 10 | import os 11 | import argparse 12 | import glob 13 | 14 | # ヒストグラム均一化 15 | def equalizeHistRGB(src): 16 | 17 | RGB = cv2.split(src) 18 | Blue = RGB[0] 19 | Green = RGB[1] 20 | Red = RGB[2] 21 | for i in range(3): 22 | cv2.equalizeHist(RGB[i]) 23 | 24 | img_hist = cv2.merge([RGB[0],RGB[1], RGB[2]]) 25 | return img_hist 26 | 27 | # ガウシアンノイズ 28 | def addGaussianNoise(src): 29 | row,col,ch= src.shape 30 | mean = 0 31 | var = 0.1 32 | sigma = 15 33 | gauss = np.random.normal(mean,sigma,(row,col,ch)) 34 | gauss = gauss.reshape(row,col,ch) 35 | noisy = src + gauss 36 | 37 | return noisy 38 | 39 | # salt&pepperノイズ 40 | def addSaltPepperNoise(src): 41 | row,col,ch = src.shape 42 | s_vs_p = 0.5 43 | amount = 0.004 44 | out = src.copy() 45 | # Salt mode 46 | num_salt = np.ceil(amount * src.size * s_vs_p) 47 | coords = [np.random.randint(0, i-1 , int(num_salt)) 48 | for i in src.shape] 49 | out[coords[:-1]] = (255,255,255) 50 | 51 | # Pepper mode 52 | num_pepper = np.ceil(amount* src.size * (1. - s_vs_p)) 53 | coords = [np.random.randint(0, i-1 , int(num_pepper)) 54 | for i in src.shape] 55 | out[coords[:-1]] = (0,0,0) 56 | return out 57 | 58 | def increasePicture(img_path, dist_dir): 59 | # ルックアップテーブルの生成 60 | min_table = 50 61 | max_table = 205 62 | diff_table = max_table - min_table 63 | gamma1 = 0.75 64 | gamma2 = 1.5 65 | 66 | LUT_HC = np.arange(256, dtype = 'uint8' ) 67 | LUT_LC = np.arange(256, dtype = 'uint8' ) 68 | LUT_G1 = np.arange(256, dtype = 'uint8' ) 69 | LUT_G2 = np.arange(256, dtype = 'uint8' ) 70 | 71 | LUTs = [] 72 | 73 | # 平滑化用 74 | average_square = (10,10) 75 | 76 | # ハイコントラストLUT作成 77 | for i in range(0, min_table): 78 | LUT_HC[i] = 0 79 | 80 | for i in range(min_table, max_table): 81 | LUT_HC[i] = 255 * (i - min_table) / diff_table 82 | 83 | for i in range(max_table, 255): 84 | LUT_HC[i] = 255 85 | 86 | # その他LUT作成 87 | for i in range(256): 88 | LUT_LC[i] = min_table + i * (diff_table) / 255 89 | LUT_G1[i] = 255 * pow(float(i) / 255, 1.0 / gamma1) 90 | LUT_G2[i] = 255 * pow(float(i) / 255, 1.0 / gamma2) 91 | 92 | LUTs.append(LUT_HC) 93 | LUTs.append(LUT_LC) 94 | LUTs.append(LUT_G1) 95 | LUTs.append(LUT_G2) 96 | 97 | # 画像の読み込み 98 | print("Processing " + str(img_path)) 99 | img_src = cv2.imread(img_path, 1) 100 | trans_img = [] 101 | trans_img.append(img_src) 102 | 103 | # LUT変換 104 | for i, LUT in enumerate(LUTs): 105 | trans_img.append( cv2.LUT(img_src, LUT)) 106 | 107 | # 平滑化 108 | trans_img.append(cv2.blur(img_src, average_square)) 109 | 110 | # ヒストグラム均一化 111 | trans_img.append(equalizeHistRGB(img_src)) 112 | 113 | # ノイズ付加 114 | trans_img.append(addGaussianNoise(img_src)) 115 | trans_img.append(addSaltPepperNoise(img_src)) 116 | 117 | # 反転 118 | flip_img = [] 119 | for img in trans_img: 120 | flip_img.append(cv2.flip(img, 1)) 121 | trans_img.extend(flip_img) 122 | 123 | # 保存 124 | if not os.path.exists(dist_dir): 125 | os.mkdir(dist_dir) 126 | 127 | base = os.path.splitext(os.path.basename(img_path))[0] + "_" 128 | for i, img in enumerate(trans_img): 129 | # 比較用 130 | # cv2.imwrite("trans_images/" + base + str(i) + ".jpg" ,cv2.hconcat([img_src.astype(np.float64), img.astype(np.float64)])) 131 | cv2.imwrite(dist_dir + "/" + base + str(i) + ".jpg" ,img) 132 | 133 | if __name__ == '__main__': 134 | parser = argparse.ArgumentParser( 135 | description='Extend dataset pictures by many ways') 136 | parser.add_argument('root', help='Path to original image directory') 137 | parser.add_argument('dist', help='Path to distnation directory') 138 | args = parser.parse_args() 139 | 140 | img_list = glob.glob(args.root + "/*.jpg") 141 | for img in img_list: 142 | increasePicture(img, args.dist) 143 | 144 | -------------------------------------------------------------------------------- /increase_picture.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # usage: ./increase_picture.py hogehoge.jpg 5 | # 6 | 7 | import cv2 8 | import numpy as np 9 | import sys 10 | import os 11 | 12 | # ヒストグラム均一化 13 | def equalizeHistRGB(src): 14 | 15 | RGB = cv2.split(src) 16 | Blue = RGB[0] 17 | Green = RGB[1] 18 | Red = RGB[2] 19 | for i in range(3): 20 | cv2.equalizeHist(RGB[i]) 21 | 22 | img_hist = cv2.merge([RGB[0],RGB[1], RGB[2]]) 23 | return img_hist 24 | 25 | # ガウシアンノイズ 26 | def addGaussianNoise(src): 27 | row,col,ch= src.shape 28 | mean = 0 29 | var = 0.1 30 | sigma = 15 31 | gauss = np.random.normal(mean,sigma,(row,col,ch)) 32 | gauss = gauss.reshape(row,col,ch) 33 | noisy = src + gauss 34 | 35 | return noisy 36 | 37 | # salt&pepperノイズ 38 | def addSaltPepperNoise(src): 39 | row,col,ch = src.shape 40 | s_vs_p = 0.5 41 | amount = 0.004 42 | out = src.copy() 43 | # Salt mode 44 | num_salt = np.ceil(amount * src.size * s_vs_p) 45 | coords = [np.random.randint(0, i-1 , int(num_salt)) 46 | for i in src.shape] 47 | out[coords[:-1]] = (255,255,255) 48 | 49 | # Pepper mode 50 | num_pepper = np.ceil(amount* src.size * (1. - s_vs_p)) 51 | coords = [np.random.randint(0, i-1 , int(num_pepper)) 52 | for i in src.shape] 53 | out[coords[:-1]] = (0,0,0) 54 | return out 55 | 56 | if __name__ == '__main__': 57 | # ルックアップテーブルの生成 58 | min_table = 50 59 | max_table = 205 60 | diff_table = max_table - min_table 61 | gamma1 = 0.75 62 | gamma2 = 1.5 63 | 64 | LUT_HC = np.arange(256, dtype = 'uint8' ) 65 | LUT_LC = np.arange(256, dtype = 'uint8' ) 66 | LUT_G1 = np.arange(256, dtype = 'uint8' ) 67 | LUT_G2 = np.arange(256, dtype = 'uint8' ) 68 | 69 | LUTs = [] 70 | 71 | # 平滑化用 72 | average_square = (10,10) 73 | 74 | # ハイコントラストLUT作成 75 | for i in range(0, min_table): 76 | LUT_HC[i] = 0 77 | 78 | for i in range(min_table, max_table): 79 | LUT_HC[i] = 255 * (i - min_table) / diff_table 80 | 81 | for i in range(max_table, 255): 82 | LUT_HC[i] = 255 83 | 84 | # その他LUT作成 85 | for i in range(256): 86 | LUT_LC[i] = min_table + i * (diff_table) / 255 87 | LUT_G1[i] = 255 * pow(float(i) / 255, 1.0 / gamma1) 88 | LUT_G2[i] = 255 * pow(float(i) / 255, 1.0 / gamma2) 89 | 90 | LUTs.append(LUT_HC) 91 | LUTs.append(LUT_LC) 92 | LUTs.append(LUT_G1) 93 | LUTs.append(LUT_G2) 94 | 95 | # 画像の読み込み 96 | img_src = cv2.imread(sys.argv[1], 1) 97 | trans_img = [] 98 | trans_img.append(img_src) 99 | 100 | # LUT変換 101 | for i, LUT in enumerate(LUTs): 102 | trans_img.append( cv2.LUT(img_src, LUT)) 103 | 104 | # 平滑化 105 | trans_img.append(cv2.blur(img_src, average_square)) 106 | 107 | # ヒストグラム均一化 108 | trans_img.append(equalizeHistRGB(img_src)) 109 | 110 | # ノイズ付加 111 | trans_img.append(addGaussianNoise(img_src)) 112 | trans_img.append(addSaltPepperNoise(img_src)) 113 | 114 | # 反転 115 | flip_img = [] 116 | for img in trans_img: 117 | flip_img.append(cv2.flip(img, 1)) 118 | trans_img.extend(flip_img) 119 | 120 | # 保存 121 | if not os.path.exists("trans_images"): 122 | os.mkdir("trans_images") 123 | 124 | base = os.path.splitext(os.path.basename(sys.argv[1]))[0] + "_" 125 | img_src.astype(np.float64) 126 | for i, img in enumerate(trans_img): 127 | # 比較用 128 | # cv2.imwrite("trans_images/" + base + str(i) + ".jpg" ,cv2.hconcat([img_src.astype(np.float64), img.astype(np.float64)])) 129 | cv2.imwrite("trans_images/" + base + str(i) + ".jpg" ,img) 130 | 131 | -------------------------------------------------------------------------------- /make_train_data_from_directory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding:utf-8 3 | 4 | # discription: 5 | # 6 | # generate dataset text data such as train.txt, test.txt, and label.txt for learning images 7 | # 8 | # waring: jpg only 9 | # 10 | import sys 11 | import commands 12 | import subprocess 13 | import glob 14 | import os 15 | import shutil 16 | import argparse 17 | import random 18 | 19 | def getdirs(path): 20 | dirs=[] 21 | for item in os.listdir(path): 22 | if os.path.isdir(os.path.join(path,item)): 23 | dirs.append(item) 24 | return dirs 25 | 26 | parser = argparse.ArgumentParser(description='script for generating dataset text') 27 | parser.add_argument('src_path', help= "directry root path") 28 | parser.add_argument('-l', '--limit', help= 'maximum number of photo in each class') 29 | args = parser.parse_args() 30 | 31 | 32 | #labels 33 | dirs = getdirs(args.src_path) 34 | print dirs 35 | labels = dirs[:] 36 | 37 | #make directries 38 | if not os.path.exists(args.src_path + "/images") : 39 | os.mkdir(args.src_path + "/images") 40 | 41 | #copy images and make train.txt 42 | imageDir = args.src_path +"/images" 43 | train = open( args.src_path + 'train.txt','w') 44 | test = open( args.src_path + 'test.txt','w') 45 | labelsTxt = open( args.src_path + 'labels.txt','w') 46 | resize = open( args.src_path + 'resize.txt','w') 47 | 48 | classNo=0 49 | cnt = 0 50 | #label = labels[classNo] 51 | for label in labels: 52 | workdir = args.src_path + label 53 | images = glob.glob(workdir + '/*.jpg') 54 | print(label) 55 | labelsTxt.write(label+"\n") 56 | startCnt=cnt 57 | if (args.limit): 58 | length = int(args.limit) 59 | else: 60 | length = len(images) 61 | prob = float(length) / float(len(images)) 62 | print "prob=" + str(prob) 63 | 64 | for image in images: 65 | if (prob < 1.0 and random.random() >= prob): 66 | continue 67 | savepath = imageDir+"/image%07d" %cnt +".jpg" 68 | shortpath = "images/image%07d" %cnt +".jpg" 69 | shutil.copyfile(image, savepath) 70 | if cnt-startCnt < length*0.75: 71 | train.write(shortpath+" %d\n" % classNo) 72 | else: 73 | test.write(shortpath+" %d\n" % classNo) 74 | resize.write(shortpath+" %d\n" % classNo) 75 | cnt += 1 76 | 77 | classNo += 1 78 | 79 | train.close() 80 | test.close() 81 | labelsTxt.close() 82 | -------------------------------------------------------------------------------- /resize_photo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # crop pistures from imagenet style list (eg. hoge/hoge/test.jpg 2) 5 | # 6 | # usage: ./crop_csv.py list_path destination_directory_path 7 | # 8 | import cv2 9 | import argparse 10 | import os 11 | import numpy 12 | import csv 13 | import numpy as np 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("list_file") 17 | parser.add_argument("target_dir") 18 | parser.add_argument('--root', '-r', default='', help='Root directory path of image files') 19 | parser.add_argument('--size', '-s', default=256, help='Root directory path of image files') 20 | 21 | args = parser.parse_args() 22 | 23 | target_shape = (args.size, args.size) 24 | 25 | # ファイルパスを配列に入れて返す 26 | def load_image_list(path): 27 | file_list = [] 28 | for line in open(path, 'r'): 29 | vals = line.strip().split(' ') 30 | file_list.append(vals[0]) 31 | print vals[0] 32 | return file_list 33 | 34 | image_list = load_image_list(args.list_file) 35 | 36 | for image in image_list: 37 | source_imgpath = image 38 | src = cv2.imread(args.root + source_imgpath) 39 | if (src == None) : 40 | print "fail" 41 | continue 42 | # resize image 43 | height, width, depth = src.shape 44 | new_height = target_shape[1] 45 | new_width = target_shape[0] 46 | if height > width: 47 | new_height = target_shape[1] * height / width 48 | else: 49 | new_width = target_shape[0] * width / height 50 | resized_img = cv2.resize(src, (new_width, new_height)) 51 | height_offset = (new_height - target_shape[1]) / 2 52 | width_offset = (new_width - target_shape[0]) / 2 53 | cropped_img = resized_img[height_offset:height_offset + target_shape[1], 54 | width_offset:width_offset + target_shape[0]] 55 | if os.path.isdir(args.target_dir) == False : 56 | os.makedirs(args.target_dir) 57 | cv2.imwrite(args.target_dir+"/"+ os.path.basename(source_imgpath), cropped_img) 58 | print(args.target_dir+"/"+ os.path.basename(source_imgpath)) 59 | --------------------------------------------------------------------------------