├── README.md ├── checkpoint └── help.md ├── inference.py ├── lbtoolbox.py ├── models ├── __init__.py └── hccr_cnnnet.py ├── test.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # HCCR-HWDB-tensorflow 2 | Reproduction of paper:
3 |   [Building fast and compact convolutional neural networks for offline handwritten Chinese character recognition](https://arxiv.org/abs/1702.07975) 4 | -------------------------------------------------------------------------------- /checkpoint/help.md: -------------------------------------------------------------------------------- 1 | A model trained 70,000 iters.
2 | Click [here](https://pan.baidu.com/s/1Msu_5299KDyUc0eqUr9hkQ).
3 | **Password**:ljvj 4 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | #-*- coding=utf-8 -*- 2 | import tensorflow as tf 3 | 4 | from models import hccr_cnnnet 5 | 6 | model_path='/.../.../checkpoint' #模型保存路径 7 | inf_pic='/.../.../input.jpg' #推理图片路径 8 | 9 | def inference(model_path,inf_pic): 10 | files=[] 11 | channels=1 12 | img_size=[96,96] 13 | 14 | def _parse_function(filename): 15 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels) 16 | image_decoded = tf.image.resize_images(image_decoded, img_size) 17 | return image_decoded 18 | 19 | with tf.Graph().as_default() as g: 20 | 21 | image_batch = tf.expand_dims(_parse_function(inf_pic),0) 22 | logits = hccr_cnnnet(image_batch,train=False,regularizer=None,channels=channels) 23 | label_pre = tf.argmax(logits, 1) 24 | saver=tf.train.Saver() 25 | 26 | with tf.Session() as sess: 27 | ckpt = tf.train.get_checkpoint_state(model_path) 28 | if ckpt and ckpt.model_checkpoint_path: 29 | saver.restore(sess, ckpt.model_checkpoint_path) 30 | label = sess.run(label_pre) 31 | else: 32 | print('No checkpoint file found !') 33 | return label 34 | 35 | result = inference(model_path=model_path,inf_dir=inf_dir) 36 | print(result) 37 | -------------------------------------------------------------------------------- /lbtoolbox.py: -------------------------------------------------------------------------------- 1 | import signal 2 | 3 | # Based on an original idea by https://gist.github.com/nonZero/2907502 and heavily modified. 4 | class Uninterrupt(object): 5 | """ 6 | Use as: 7 | with Uninterrupt() as u: 8 | while not u.interrupted: 9 | # train 10 | """ 11 | def __init__(self, sigs=(signal.SIGINT,), verbose=False): 12 | self.sigs = sigs 13 | self.verbose = verbose 14 | self.interrupted = False 15 | self.orig_handlers = None 16 | 17 | def __enter__(self): 18 | if self.orig_handlers is not None: 19 | raise ValueError("Can only enter `Uninterrupt` once!") 20 | 21 | self.interrupted = False 22 | self.orig_handlers = [signal.getsignal(sig) for sig in self.sigs] 23 | 24 | def handler(signum, frame): 25 | self.release() 26 | self.interrupted = True 27 | if self.verbose: 28 | print("\nInterruption scheduled...", flush=True) 29 | 30 | for sig in self.sigs: 31 | signal.signal(sig, handler) 32 | 33 | return self 34 | 35 | def __exit__(self, type_, value, tb): 36 | self.release() 37 | 38 | def release(self): 39 | if self.orig_handlers is not None: 40 | for sig, orig in zip(self.sigs, self.orig_handlers): 41 | signal.signal(sig, orig) 42 | self.orig_handlers = None 43 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .hccr_cnnnet import hccr_cnnnet 2 | -------------------------------------------------------------------------------- /models/hccr_cnnnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import tensorflow as tf 3 | 4 | NUM_LABELS=3755 5 | stddev=0.01 6 | prob=0.5 #dropout 7 | 8 | def parametric_relu(_x): 9 | alphas = tf.get_variable('alpha', _x.get_shape()[-1], 10 | initializer=tf.constant_initializer(0.0), 11 | dtype=tf.float32) 12 | pos = tf.nn.relu(_x) 13 | neg = alphas * (_x - abs(_x)) * 0.5 14 | return pos + neg 15 | 16 | def hccr_cnnnet(input_tensor,train,regularizer,channels): 17 | 18 | conv1_deep=96 19 | conv2_deep=128 20 | conv3_deep=160 21 | conv4_deep=256 22 | conv5_deep=256 23 | conv6_deep=384 24 | conv7_deep=384 25 | fc1_num=1024 26 | 27 | with tf.variable_scope('layer0-bn'): 28 | bn0 = tf.layers.batch_normalization(input_tensor,training=train,name='bn0') 29 | 30 | with tf.variable_scope('layer1-conv1'): 31 | conv1_weights = tf.get_variable("weight", [3, 3, channels, conv1_deep], 32 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 33 | conv1_biases = tf.get_variable("bias", [conv1_deep], initializer=tf.constant_initializer(0.0)) 34 | conv1 = tf.nn.conv2d(bn0, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 35 | bn_conv1 = tf.layers.batch_normalization(tf.nn.bias_add(conv1, conv1_biases),training=train,name='bn_conv1') 36 | prelu1 = parametric_relu(bn_conv1) 37 | 38 | with tf.name_scope("layer2-pool1"): 39 | pool1 = tf.nn.max_pool(prelu1, ksize = [1, 3, 3, 1],strides=[1, 2, 2, 1],padding="SAME") 40 | 41 | with tf.variable_scope("layer3-conv2"): 42 | conv2_weights = tf.get_variable("weight", [3,3,conv1_deep,conv2_deep], 43 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 44 | conv2_biases = tf.get_variable("bias", [conv2_deep], initializer=tf.constant_initializer(0.0)) 45 | conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 46 | bn_conv2 = tf.layers.batch_normalization(tf.nn.bias_add(conv2, conv2_biases),training=train,name='bn_conv2') 47 | prelu2 = parametric_relu(bn_conv2) 48 | 49 | with tf.name_scope("layer4-pool2"): 50 | pool2 = tf.nn.max_pool(prelu2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 51 | 52 | with tf.variable_scope("layer5-conv3"): 53 | conv3_weights = tf.get_variable("weight", [3,3,conv2_deep,conv3_deep], 54 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 55 | conv3_biases = tf.get_variable("bias", [conv3_deep], initializer=tf.constant_initializer(0.0)) 56 | conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') 57 | bn_conv3 = tf.layers.batch_normalization(tf.nn.bias_add(conv3, conv3_biases),training=train,name='bn_conv3') 58 | prelu3 = parametric_relu(bn_conv3) 59 | 60 | with tf.name_scope("layer6-pool3"): 61 | pool3 = tf.nn.max_pool(prelu3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 62 | 63 | with tf.variable_scope("layer7-conv4"): 64 | conv4_weights = tf.get_variable("weight", [3,3,conv3_deep,conv4_deep], 65 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 66 | conv4_biases = tf.get_variable("bias", [conv4_deep], initializer=tf.constant_initializer(0.0)) 67 | conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') 68 | bn_conv4 = tf.layers.batch_normalization(tf.nn.bias_add(conv4, conv4_biases),training=train,name='bn_conv4') 69 | prelu4 = parametric_relu(bn_conv4) 70 | 71 | with tf.variable_scope("layer8-conv5"): 72 | conv5_weights = tf.get_variable("weight", [3,3,conv4_deep,conv5_deep], 73 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 74 | conv5_biases = tf.get_variable("bias", [conv5_deep], initializer=tf.constant_initializer(0.0)) 75 | conv5 = tf.nn.conv2d(prelu4, conv5_weights, strides=[1, 1, 1, 1], padding='SAME') 76 | bn_conv5 = tf.layers.batch_normalization(tf.nn.bias_add(conv5, conv5_biases),training=train,name='bn_conv5') 77 | prelu5 = parametric_relu(bn_conv5) 78 | 79 | with tf.name_scope("layer9-pool4"): 80 | pool4 = tf.nn.max_pool(prelu5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 81 | 82 | with tf.variable_scope("layer10-conv6"): 83 | conv6_weights = tf.get_variable("weight", [3,3,conv5_deep,conv6_deep], 84 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 85 | conv6_biases = tf.get_variable("bias", [conv6_deep], initializer=tf.constant_initializer(0.0)) 86 | conv6 = tf.nn.conv2d(pool4, conv6_weights, strides=[1, 1, 1, 1], padding='SAME') 87 | bn_conv6 = tf.layers.batch_normalization(tf.nn.bias_add(conv6, conv6_biases),training=train,name='bn_conv6') 88 | prelu6 = parametric_relu(bn_conv6) 89 | 90 | with tf.variable_scope("layer11-conv7"): 91 | conv7_weights = tf.get_variable("weight", [3,3,conv6_deep,conv7_deep], 92 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 93 | conv7_biases = tf.get_variable("bias", [conv7_deep], initializer=tf.constant_initializer(0.0)) 94 | conv7 = tf.nn.conv2d(prelu6, conv7_weights, strides=[1, 1, 1, 1], padding='SAME') 95 | bn_conv7 = tf.layers.batch_normalization(tf.nn.bias_add(conv7, conv7_biases),training=train,name='bn_conv7') 96 | prelu7 = parametric_relu(bn_conv7) 97 | 98 | with tf.name_scope("layer12-pool5"): 99 | pool5 = tf.nn.max_pool(prelu7, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') 100 | 101 | pool_shape = pool5.get_shape().as_list() 102 | nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] 103 | reshaped = tf.reshape(pool5, [-1, nodes]) 104 | 105 | with tf.variable_scope('layer13-fc1'): 106 | fc1_weights = tf.get_variable("weight", [nodes, fc1_num],initializer=tf.truncated_normal_initializer(stddev=stddev)) 107 | if regularizer != None: 108 | tf.add_to_collection('losses', regularizer(fc1_weights)) 109 | fc1_biases = tf.get_variable("bias", [fc1_num], initializer=tf.constant_initializer(0.1)) 110 | bn_fc1=tf.layers.batch_normalization(tf.matmul(reshaped, fc1_weights) + fc1_biases,training=train,name='bn_fc1') 111 | fc1 = parametric_relu(bn_fc1) 112 | if train: 113 | fc1 = tf.nn.dropout(fc1, prob) 114 | 115 | with tf.variable_scope('layer14-output'): 116 | fc2_weights = tf.get_variable("weight", [fc1_num, NUM_LABELS],initializer=tf.truncated_normal_initializer(stddev=stddev)) 117 | if regularizer != None: 118 | tf.add_to_collection('losses', regularizer(fc2_weights)) 119 | fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1)) 120 | logit = tf.matmul(fc1, fc2_weights) + fc2_biases 121 | 122 | return logit 123 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #-*- coding=utf-8 -*- 2 | import os 3 | import tensorflow as tf 4 | 5 | from models import hccr_cnnnet 6 | 7 | gpunum='0' 8 | 9 | batch_size = 64 10 | img_size=[96,96] 11 | channels=1 12 | 13 | save_path='/.../.../checkpoint' #模型保存路径 14 | test_dir='/.../.../test' #测试图片路径 15 | 16 | files=[] 17 | labels=[] 18 | 19 | os.environ['CUDA_VISIBLE_DEVICES']=gpunum 20 | 21 | def _parse_function(filename, label): 22 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels) 23 | image_decoded = tf.image.resize_images(image_decoded, img_size) 24 | image_decoded = tf.cast(image_decoded , tf.float32) 25 | label = tf.cast(label,tf.int32) 26 | return image_decoded, label 27 | 28 | with tf.Graph().as_default() as g: 29 | 30 | for label_name in os.listdir(test_dir): 31 | for file_name in os.listdir(test_dir+'/'+label_name): 32 | files.append(test_dir + '/'+label_name+'/'+file_name) 33 | labels.append(int(label_name)) 34 | 35 | files=tf.constant(files) 36 | labels=tf.constant(labels) 37 | 38 | dataset = tf.contrib.data.Dataset.from_tensor_slices((files, labels)) 39 | dataset = dataset.map(_parse_function)#,num_parallel_calls=4) 40 | dataset = dataset.batch(batch_size) 41 | 42 | image_batch,label_batch= dataset.make_one_shot_iterator().get_next() 43 | 44 | logits=hccr_cnnnet(image_batch,train=False,regularizer=None,channels=channels) 45 | 46 | prob_batch = tf.nn.softmax(logits) 47 | accuracy_top1_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 1), tf.float32)) 48 | accuracy_top5_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 5), tf.float32)) 49 | accuracy_top10_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 10), tf.float32)) 50 | ''' 51 | variable_ave = tf.train.ExponentialMovingAverage(0.99) 52 | variables_to_restore = variable_ave.variables_to_restore() 53 | ''' 54 | saver=tf.train.Saver() 55 | 56 | with tf.Session() as sess: 57 | 58 | ckpt = tf.train.get_checkpoint_state(save_path) 59 | if ckpt and ckpt.model_checkpoint_path: 60 | saver.restore(sess, ckpt.model_checkpoint_path) 61 | iternum=0 62 | top1sum=0 63 | top5sum=0 64 | top10sum=0 65 | 66 | while True: 67 | try: 68 | top1,top5,top10 = sess.run([accuracy_top1_batch,accuracy_top5_batch,accuracy_top10_batch]) 69 | iternum=iternum+1 70 | top1sum=top1sum+top1 71 | top5sum=top5sum+top5 72 | top10sum=top10sum+top10 73 | if iternum%500==0: 74 | print("The current test accuracy (in %d pics) = top1: %g , top5: %g ,top10: %g." % (iternum*batch_size,top1sum/iternum,top5sum/iternum,top10sum/iternum)) 75 | except tf.errors.OutOfRangeError: 76 | print("The final test accuracy (in %d pics) = top1: %g , top5: %g ,top10: %g." % (iternum*batch_size,top1sum/iternum,top5sum/iternum,top10sum/iternum)) 77 | print('Test finished...') 78 | break 79 | else: 80 | print('No checkpoint file found !') 81 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #-*- coding=utf-8 -*- 2 | import os 3 | import numpy as np 4 | import tensorflow as tf 5 | from signal import SIGINT, SIGTERM 6 | 7 | import lbtoolbox as lb 8 | from models import hccr_cnnnet 9 | 10 | gpunum='0' 11 | lr_base=0.1 12 | lr_decay=0.1 13 | momentum=0.9 14 | lr_steps=7000 15 | save_steps=7000 16 | print_steps=100 17 | train_nums=30000 18 | buffer_size=100000 19 | regular_rate=0.0005 20 | 21 | batch_size = 128 22 | img_size=[96,96] 23 | channels=1 24 | 25 | save_path='/.../.../checkpoint' #模型保存路径 26 | train_dir='/.../.../train' #训练图片路径 27 | log_dir = '/.../.../log' #日志保存路径 28 | 29 | aug=False #是否进行图像增强? 30 | resume=False #是否继续训练模型? 31 | 32 | file_and_label=[] 33 | files=[] 34 | labels=[] 35 | ''' 36 | losslist = [] 37 | accuracy = [] 38 | ''' 39 | os.environ['CUDA_VISIBLE_DEVICES']=gpunum 40 | 41 | 42 | def data_augmentation(images): 43 | images = tf.image.random_brightness(images, max_delta=0.3) 44 | images = tf.image.random_contrast(images, 0.8, 1.2) 45 | return images 46 | 47 | def _parse_function(filename, label): 48 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels) 49 | image_decoded = tf.image.resize_images(image_decoded, img_size) 50 | image_decoded = tf.cast(image_decoded , tf.float32) 51 | if aug: 52 | image_decoded = data_augmentation(image_decoded) 53 | label = tf.cast(label,tf.int32) 54 | return image_decoded, label 55 | 56 | for label_name in os.listdir(train_dir): 57 | for file_name in os.listdir(train_dir+'/'+label_name): 58 | file_and_label.append([label_name,train_dir + '/'+label_name+'/'+file_name]) 59 | 60 | file_and_label=np.array(file_and_label) 61 | np.random.shuffle(file_and_label) 62 | labels=list(map(int,file_and_label[:,0])) 63 | files=list(file_and_label[:,1]) 64 | 65 | files=tf.constant(files) 66 | labels=tf.constant(labels) 67 | 68 | dataset = tf.contrib.data.Dataset.from_tensor_slices((files, labels)) 69 | dataset = dataset.map(_parse_function) 70 | dataset = dataset.shuffle(buffer_size=buffer_size).batch(batch_size).repeat() 71 | 72 | image_batch,label_batch = dataset.make_one_shot_iterator().get_next() 73 | 74 | regularizer=tf.contrib.layers.l2_regularizer(regular_rate) 75 | 76 | logits=hccr_cnnnet(image_batch,train=True,regularizer=regularizer,channels=channels) 77 | 78 | global_step=tf.Variable(0,trainable=False) 79 | 80 | prob_batch = tf.nn.softmax(logits) 81 | accuracy_top1_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 1), tf.float32)) 82 | accuracy_top5_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 5), tf.float32)) 83 | accuracy_top10_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 10), tf.float32)) 84 | 85 | update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 86 | 87 | #variable_ave = tf.train.ExponentialMovingAverage(0.99,global_step) 88 | #ave_op = variable_ave.apply(tf.trainable_variables()) 89 | 90 | cross_entropy_mean = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_batch)) 91 | if regularizer==None: 92 | loss=cross_entropy_mean 93 | else: 94 | loss=cross_entropy_mean+tf.add_n(tf.get_collection('losses')) 95 | 96 | lr=tf.train.exponential_decay(lr_base,global_step,lr_steps,lr_decay,staircase=True) 97 | train_step = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum) 98 | 99 | with tf.control_dependencies(update_op): 100 | grads = train_step.compute_gradients(loss) 101 | train_op = train_step.apply_gradients(grads, global_step=global_step) 102 | 103 | var_list = tf.trainable_variables() 104 | if global_step is not None: 105 | var_list.append(global_step) 106 | g_list = tf.global_variables() 107 | bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] 108 | bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] 109 | var_list += bn_moving_vars 110 | saver = tf.train.Saver(var_list=var_list) 111 | 112 | with tf.Session() as sess: 113 | tf.global_variables_initializer().run() 114 | 115 | if resume: 116 | last_checkpoint = tf.train.latest_checkpoint(save_path) 117 | saver.restore(sess, last_checkpoint) 118 | start_step = sess.run(global_step) 119 | print('Resume training ... Start from step %d / %d .'%(start_step,train_nums)) 120 | resume=False 121 | else: 122 | start_step = 0 123 | 124 | coord = tf.train.Coordinator() 125 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 126 | 127 | with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u: 128 | for i in range(start_step,train_nums): 129 | 130 | _,loss_value,step=sess.run([train_op,loss,global_step]) 131 | 132 | if i % print_steps == 0: 133 | top1,top5,top10=sess.run([accuracy_top1_batch,accuracy_top5_batch,accuracy_top10_batch]) 134 | print("After %d training step(s),loss on training batch is %g.The batch test accuracy = %g , %g ,%g."%(i,loss_value,top1,top5,top10)) 135 | ''' 136 | losslist.append([step,loss_value]) 137 | accuracy.append([step,top1]) 138 | ''' 139 | if (i!=0 and i % save_steps == 0): 140 | model_name="trainnum_%d_"%train_nums 141 | saver.save(sess, os.path.join(save_path, model_name), global_step=global_step) 142 | 143 | if u.interrupted: 144 | print("Interrupted on request...") 145 | break 146 | 147 | ''' 148 | file1=open(log_dir+'/loss.txt','a') 149 | for loss in losslist: 150 | loss = str(loss).strip('[').strip(']').replace(',','') 151 | file1.write(loss+'\n') 152 | file1.close() 153 | 154 | file2=open(log_dir+'/accu.txt','a') 155 | for acc in accuracy: 156 | acc = str(acc).strip('[').strip(']').replace(',','') 157 | file2.write(acc+'\n') 158 | file2.close() 159 | ''' 160 | 161 | model_name="trainnum_%d_"%train_nums 162 | saver.save(sess,os.path.join(save_path,model_name),global_step=global_step) 163 | print('Train finished...') 164 | 165 | coord.request_stop() 166 | coord.join(threads) 167 | --------------------------------------------------------------------------------