├── README.md
├── checkpoint
└── help.md
├── inference.py
├── lbtoolbox.py
├── models
├── __init__.py
└── hccr_cnnnet.py
├── test.py
└── train.py
/README.md:
--------------------------------------------------------------------------------
1 | # HCCR-HWDB-tensorflow
2 | Reproduction of paper:
3 | [Building fast and compact convolutional neural networks for offline handwritten Chinese character recognition](https://arxiv.org/abs/1702.07975)
4 |
--------------------------------------------------------------------------------
/checkpoint/help.md:
--------------------------------------------------------------------------------
1 | A model trained 70,000 iters.
2 | Click [here](https://pan.baidu.com/s/1Msu_5299KDyUc0eqUr9hkQ).
3 | **Password**:ljvj
4 |
--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
1 | #-*- coding=utf-8 -*-
2 | import tensorflow as tf
3 |
4 | from models import hccr_cnnnet
5 |
6 | model_path='/.../.../checkpoint' #模型保存路径
7 | inf_pic='/.../.../input.jpg' #推理图片路径
8 |
9 | def inference(model_path,inf_pic):
10 | files=[]
11 | channels=1
12 | img_size=[96,96]
13 |
14 | def _parse_function(filename):
15 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels)
16 | image_decoded = tf.image.resize_images(image_decoded, img_size)
17 | return image_decoded
18 |
19 | with tf.Graph().as_default() as g:
20 |
21 | image_batch = tf.expand_dims(_parse_function(inf_pic),0)
22 | logits = hccr_cnnnet(image_batch,train=False,regularizer=None,channels=channels)
23 | label_pre = tf.argmax(logits, 1)
24 | saver=tf.train.Saver()
25 |
26 | with tf.Session() as sess:
27 | ckpt = tf.train.get_checkpoint_state(model_path)
28 | if ckpt and ckpt.model_checkpoint_path:
29 | saver.restore(sess, ckpt.model_checkpoint_path)
30 | label = sess.run(label_pre)
31 | else:
32 | print('No checkpoint file found !')
33 | return label
34 |
35 | result = inference(model_path=model_path,inf_dir=inf_dir)
36 | print(result)
37 |
--------------------------------------------------------------------------------
/lbtoolbox.py:
--------------------------------------------------------------------------------
1 | import signal
2 |
3 | # Based on an original idea by https://gist.github.com/nonZero/2907502 and heavily modified.
4 | class Uninterrupt(object):
5 | """
6 | Use as:
7 | with Uninterrupt() as u:
8 | while not u.interrupted:
9 | # train
10 | """
11 | def __init__(self, sigs=(signal.SIGINT,), verbose=False):
12 | self.sigs = sigs
13 | self.verbose = verbose
14 | self.interrupted = False
15 | self.orig_handlers = None
16 |
17 | def __enter__(self):
18 | if self.orig_handlers is not None:
19 | raise ValueError("Can only enter `Uninterrupt` once!")
20 |
21 | self.interrupted = False
22 | self.orig_handlers = [signal.getsignal(sig) for sig in self.sigs]
23 |
24 | def handler(signum, frame):
25 | self.release()
26 | self.interrupted = True
27 | if self.verbose:
28 | print("\nInterruption scheduled...", flush=True)
29 |
30 | for sig in self.sigs:
31 | signal.signal(sig, handler)
32 |
33 | return self
34 |
35 | def __exit__(self, type_, value, tb):
36 | self.release()
37 |
38 | def release(self):
39 | if self.orig_handlers is not None:
40 | for sig, orig in zip(self.sigs, self.orig_handlers):
41 | signal.signal(sig, orig)
42 | self.orig_handlers = None
43 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .hccr_cnnnet import hccr_cnnnet
2 |
--------------------------------------------------------------------------------
/models/hccr_cnnnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding=utf-8 -*-
2 | import tensorflow as tf
3 |
4 | NUM_LABELS=3755
5 | stddev=0.01
6 | prob=0.5 #dropout
7 |
8 | def parametric_relu(_x):
9 | alphas = tf.get_variable('alpha', _x.get_shape()[-1],
10 | initializer=tf.constant_initializer(0.0),
11 | dtype=tf.float32)
12 | pos = tf.nn.relu(_x)
13 | neg = alphas * (_x - abs(_x)) * 0.5
14 | return pos + neg
15 |
16 | def hccr_cnnnet(input_tensor,train,regularizer,channels):
17 |
18 | conv1_deep=96
19 | conv2_deep=128
20 | conv3_deep=160
21 | conv4_deep=256
22 | conv5_deep=256
23 | conv6_deep=384
24 | conv7_deep=384
25 | fc1_num=1024
26 |
27 | with tf.variable_scope('layer0-bn'):
28 | bn0 = tf.layers.batch_normalization(input_tensor,training=train,name='bn0')
29 |
30 | with tf.variable_scope('layer1-conv1'):
31 | conv1_weights = tf.get_variable("weight", [3, 3, channels, conv1_deep],
32 | initializer=tf.truncated_normal_initializer(stddev=stddev))
33 | conv1_biases = tf.get_variable("bias", [conv1_deep], initializer=tf.constant_initializer(0.0))
34 | conv1 = tf.nn.conv2d(bn0, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
35 | bn_conv1 = tf.layers.batch_normalization(tf.nn.bias_add(conv1, conv1_biases),training=train,name='bn_conv1')
36 | prelu1 = parametric_relu(bn_conv1)
37 |
38 | with tf.name_scope("layer2-pool1"):
39 | pool1 = tf.nn.max_pool(prelu1, ksize = [1, 3, 3, 1],strides=[1, 2, 2, 1],padding="SAME")
40 |
41 | with tf.variable_scope("layer3-conv2"):
42 | conv2_weights = tf.get_variable("weight", [3,3,conv1_deep,conv2_deep],
43 | initializer=tf.truncated_normal_initializer(stddev=stddev))
44 | conv2_biases = tf.get_variable("bias", [conv2_deep], initializer=tf.constant_initializer(0.0))
45 | conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
46 | bn_conv2 = tf.layers.batch_normalization(tf.nn.bias_add(conv2, conv2_biases),training=train,name='bn_conv2')
47 | prelu2 = parametric_relu(bn_conv2)
48 |
49 | with tf.name_scope("layer4-pool2"):
50 | pool2 = tf.nn.max_pool(prelu2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
51 |
52 | with tf.variable_scope("layer5-conv3"):
53 | conv3_weights = tf.get_variable("weight", [3,3,conv2_deep,conv3_deep],
54 | initializer=tf.truncated_normal_initializer(stddev=stddev))
55 | conv3_biases = tf.get_variable("bias", [conv3_deep], initializer=tf.constant_initializer(0.0))
56 | conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
57 | bn_conv3 = tf.layers.batch_normalization(tf.nn.bias_add(conv3, conv3_biases),training=train,name='bn_conv3')
58 | prelu3 = parametric_relu(bn_conv3)
59 |
60 | with tf.name_scope("layer6-pool3"):
61 | pool3 = tf.nn.max_pool(prelu3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
62 |
63 | with tf.variable_scope("layer7-conv4"):
64 | conv4_weights = tf.get_variable("weight", [3,3,conv3_deep,conv4_deep],
65 | initializer=tf.truncated_normal_initializer(stddev=stddev))
66 | conv4_biases = tf.get_variable("bias", [conv4_deep], initializer=tf.constant_initializer(0.0))
67 | conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
68 | bn_conv4 = tf.layers.batch_normalization(tf.nn.bias_add(conv4, conv4_biases),training=train,name='bn_conv4')
69 | prelu4 = parametric_relu(bn_conv4)
70 |
71 | with tf.variable_scope("layer8-conv5"):
72 | conv5_weights = tf.get_variable("weight", [3,3,conv4_deep,conv5_deep],
73 | initializer=tf.truncated_normal_initializer(stddev=stddev))
74 | conv5_biases = tf.get_variable("bias", [conv5_deep], initializer=tf.constant_initializer(0.0))
75 | conv5 = tf.nn.conv2d(prelu4, conv5_weights, strides=[1, 1, 1, 1], padding='SAME')
76 | bn_conv5 = tf.layers.batch_normalization(tf.nn.bias_add(conv5, conv5_biases),training=train,name='bn_conv5')
77 | prelu5 = parametric_relu(bn_conv5)
78 |
79 | with tf.name_scope("layer9-pool4"):
80 | pool4 = tf.nn.max_pool(prelu5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
81 |
82 | with tf.variable_scope("layer10-conv6"):
83 | conv6_weights = tf.get_variable("weight", [3,3,conv5_deep,conv6_deep],
84 | initializer=tf.truncated_normal_initializer(stddev=stddev))
85 | conv6_biases = tf.get_variable("bias", [conv6_deep], initializer=tf.constant_initializer(0.0))
86 | conv6 = tf.nn.conv2d(pool4, conv6_weights, strides=[1, 1, 1, 1], padding='SAME')
87 | bn_conv6 = tf.layers.batch_normalization(tf.nn.bias_add(conv6, conv6_biases),training=train,name='bn_conv6')
88 | prelu6 = parametric_relu(bn_conv6)
89 |
90 | with tf.variable_scope("layer11-conv7"):
91 | conv7_weights = tf.get_variable("weight", [3,3,conv6_deep,conv7_deep],
92 | initializer=tf.truncated_normal_initializer(stddev=stddev))
93 | conv7_biases = tf.get_variable("bias", [conv7_deep], initializer=tf.constant_initializer(0.0))
94 | conv7 = tf.nn.conv2d(prelu6, conv7_weights, strides=[1, 1, 1, 1], padding='SAME')
95 | bn_conv7 = tf.layers.batch_normalization(tf.nn.bias_add(conv7, conv7_biases),training=train,name='bn_conv7')
96 | prelu7 = parametric_relu(bn_conv7)
97 |
98 | with tf.name_scope("layer12-pool5"):
99 | pool5 = tf.nn.max_pool(prelu7, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
100 |
101 | pool_shape = pool5.get_shape().as_list()
102 | nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
103 | reshaped = tf.reshape(pool5, [-1, nodes])
104 |
105 | with tf.variable_scope('layer13-fc1'):
106 | fc1_weights = tf.get_variable("weight", [nodes, fc1_num],initializer=tf.truncated_normal_initializer(stddev=stddev))
107 | if regularizer != None:
108 | tf.add_to_collection('losses', regularizer(fc1_weights))
109 | fc1_biases = tf.get_variable("bias", [fc1_num], initializer=tf.constant_initializer(0.1))
110 | bn_fc1=tf.layers.batch_normalization(tf.matmul(reshaped, fc1_weights) + fc1_biases,training=train,name='bn_fc1')
111 | fc1 = parametric_relu(bn_fc1)
112 | if train:
113 | fc1 = tf.nn.dropout(fc1, prob)
114 |
115 | with tf.variable_scope('layer14-output'):
116 | fc2_weights = tf.get_variable("weight", [fc1_num, NUM_LABELS],initializer=tf.truncated_normal_initializer(stddev=stddev))
117 | if regularizer != None:
118 | tf.add_to_collection('losses', regularizer(fc2_weights))
119 | fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
120 | logit = tf.matmul(fc1, fc2_weights) + fc2_biases
121 |
122 | return logit
123 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | #-*- coding=utf-8 -*-
2 | import os
3 | import tensorflow as tf
4 |
5 | from models import hccr_cnnnet
6 |
7 | gpunum='0'
8 |
9 | batch_size = 64
10 | img_size=[96,96]
11 | channels=1
12 |
13 | save_path='/.../.../checkpoint' #模型保存路径
14 | test_dir='/.../.../test' #测试图片路径
15 |
16 | files=[]
17 | labels=[]
18 |
19 | os.environ['CUDA_VISIBLE_DEVICES']=gpunum
20 |
21 | def _parse_function(filename, label):
22 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels)
23 | image_decoded = tf.image.resize_images(image_decoded, img_size)
24 | image_decoded = tf.cast(image_decoded , tf.float32)
25 | label = tf.cast(label,tf.int32)
26 | return image_decoded, label
27 |
28 | with tf.Graph().as_default() as g:
29 |
30 | for label_name in os.listdir(test_dir):
31 | for file_name in os.listdir(test_dir+'/'+label_name):
32 | files.append(test_dir + '/'+label_name+'/'+file_name)
33 | labels.append(int(label_name))
34 |
35 | files=tf.constant(files)
36 | labels=tf.constant(labels)
37 |
38 | dataset = tf.contrib.data.Dataset.from_tensor_slices((files, labels))
39 | dataset = dataset.map(_parse_function)#,num_parallel_calls=4)
40 | dataset = dataset.batch(batch_size)
41 |
42 | image_batch,label_batch= dataset.make_one_shot_iterator().get_next()
43 |
44 | logits=hccr_cnnnet(image_batch,train=False,regularizer=None,channels=channels)
45 |
46 | prob_batch = tf.nn.softmax(logits)
47 | accuracy_top1_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 1), tf.float32))
48 | accuracy_top5_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 5), tf.float32))
49 | accuracy_top10_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 10), tf.float32))
50 | '''
51 | variable_ave = tf.train.ExponentialMovingAverage(0.99)
52 | variables_to_restore = variable_ave.variables_to_restore()
53 | '''
54 | saver=tf.train.Saver()
55 |
56 | with tf.Session() as sess:
57 |
58 | ckpt = tf.train.get_checkpoint_state(save_path)
59 | if ckpt and ckpt.model_checkpoint_path:
60 | saver.restore(sess, ckpt.model_checkpoint_path)
61 | iternum=0
62 | top1sum=0
63 | top5sum=0
64 | top10sum=0
65 |
66 | while True:
67 | try:
68 | top1,top5,top10 = sess.run([accuracy_top1_batch,accuracy_top5_batch,accuracy_top10_batch])
69 | iternum=iternum+1
70 | top1sum=top1sum+top1
71 | top5sum=top5sum+top5
72 | top10sum=top10sum+top10
73 | if iternum%500==0:
74 | print("The current test accuracy (in %d pics) = top1: %g , top5: %g ,top10: %g." % (iternum*batch_size,top1sum/iternum,top5sum/iternum,top10sum/iternum))
75 | except tf.errors.OutOfRangeError:
76 | print("The final test accuracy (in %d pics) = top1: %g , top5: %g ,top10: %g." % (iternum*batch_size,top1sum/iternum,top5sum/iternum,top10sum/iternum))
77 | print('Test finished...')
78 | break
79 | else:
80 | print('No checkpoint file found !')
81 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | #-*- coding=utf-8 -*-
2 | import os
3 | import numpy as np
4 | import tensorflow as tf
5 | from signal import SIGINT, SIGTERM
6 |
7 | import lbtoolbox as lb
8 | from models import hccr_cnnnet
9 |
10 | gpunum='0'
11 | lr_base=0.1
12 | lr_decay=0.1
13 | momentum=0.9
14 | lr_steps=7000
15 | save_steps=7000
16 | print_steps=100
17 | train_nums=30000
18 | buffer_size=100000
19 | regular_rate=0.0005
20 |
21 | batch_size = 128
22 | img_size=[96,96]
23 | channels=1
24 |
25 | save_path='/.../.../checkpoint' #模型保存路径
26 | train_dir='/.../.../train' #训练图片路径
27 | log_dir = '/.../.../log' #日志保存路径
28 |
29 | aug=False #是否进行图像增强?
30 | resume=False #是否继续训练模型?
31 |
32 | file_and_label=[]
33 | files=[]
34 | labels=[]
35 | '''
36 | losslist = []
37 | accuracy = []
38 | '''
39 | os.environ['CUDA_VISIBLE_DEVICES']=gpunum
40 |
41 |
42 | def data_augmentation(images):
43 | images = tf.image.random_brightness(images, max_delta=0.3)
44 | images = tf.image.random_contrast(images, 0.8, 1.2)
45 | return images
46 |
47 | def _parse_function(filename, label):
48 | image_decoded = tf.image.decode_jpeg(tf.read_file(filename),channels=channels)
49 | image_decoded = tf.image.resize_images(image_decoded, img_size)
50 | image_decoded = tf.cast(image_decoded , tf.float32)
51 | if aug:
52 | image_decoded = data_augmentation(image_decoded)
53 | label = tf.cast(label,tf.int32)
54 | return image_decoded, label
55 |
56 | for label_name in os.listdir(train_dir):
57 | for file_name in os.listdir(train_dir+'/'+label_name):
58 | file_and_label.append([label_name,train_dir + '/'+label_name+'/'+file_name])
59 |
60 | file_and_label=np.array(file_and_label)
61 | np.random.shuffle(file_and_label)
62 | labels=list(map(int,file_and_label[:,0]))
63 | files=list(file_and_label[:,1])
64 |
65 | files=tf.constant(files)
66 | labels=tf.constant(labels)
67 |
68 | dataset = tf.contrib.data.Dataset.from_tensor_slices((files, labels))
69 | dataset = dataset.map(_parse_function)
70 | dataset = dataset.shuffle(buffer_size=buffer_size).batch(batch_size).repeat()
71 |
72 | image_batch,label_batch = dataset.make_one_shot_iterator().get_next()
73 |
74 | regularizer=tf.contrib.layers.l2_regularizer(regular_rate)
75 |
76 | logits=hccr_cnnnet(image_batch,train=True,regularizer=regularizer,channels=channels)
77 |
78 | global_step=tf.Variable(0,trainable=False)
79 |
80 | prob_batch = tf.nn.softmax(logits)
81 | accuracy_top1_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 1), tf.float32))
82 | accuracy_top5_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 5), tf.float32))
83 | accuracy_top10_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 10), tf.float32))
84 |
85 | update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
86 |
87 | #variable_ave = tf.train.ExponentialMovingAverage(0.99,global_step)
88 | #ave_op = variable_ave.apply(tf.trainable_variables())
89 |
90 | cross_entropy_mean = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_batch))
91 | if regularizer==None:
92 | loss=cross_entropy_mean
93 | else:
94 | loss=cross_entropy_mean+tf.add_n(tf.get_collection('losses'))
95 |
96 | lr=tf.train.exponential_decay(lr_base,global_step,lr_steps,lr_decay,staircase=True)
97 | train_step = tf.train.MomentumOptimizer(learning_rate=lr,momentum=momentum)
98 |
99 | with tf.control_dependencies(update_op):
100 | grads = train_step.compute_gradients(loss)
101 | train_op = train_step.apply_gradients(grads, global_step=global_step)
102 |
103 | var_list = tf.trainable_variables()
104 | if global_step is not None:
105 | var_list.append(global_step)
106 | g_list = tf.global_variables()
107 | bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
108 | bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
109 | var_list += bn_moving_vars
110 | saver = tf.train.Saver(var_list=var_list)
111 |
112 | with tf.Session() as sess:
113 | tf.global_variables_initializer().run()
114 |
115 | if resume:
116 | last_checkpoint = tf.train.latest_checkpoint(save_path)
117 | saver.restore(sess, last_checkpoint)
118 | start_step = sess.run(global_step)
119 | print('Resume training ... Start from step %d / %d .'%(start_step,train_nums))
120 | resume=False
121 | else:
122 | start_step = 0
123 |
124 | coord = tf.train.Coordinator()
125 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
126 |
127 | with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u:
128 | for i in range(start_step,train_nums):
129 |
130 | _,loss_value,step=sess.run([train_op,loss,global_step])
131 |
132 | if i % print_steps == 0:
133 | top1,top5,top10=sess.run([accuracy_top1_batch,accuracy_top5_batch,accuracy_top10_batch])
134 | print("After %d training step(s),loss on training batch is %g.The batch test accuracy = %g , %g ,%g."%(i,loss_value,top1,top5,top10))
135 | '''
136 | losslist.append([step,loss_value])
137 | accuracy.append([step,top1])
138 | '''
139 | if (i!=0 and i % save_steps == 0):
140 | model_name="trainnum_%d_"%train_nums
141 | saver.save(sess, os.path.join(save_path, model_name), global_step=global_step)
142 |
143 | if u.interrupted:
144 | print("Interrupted on request...")
145 | break
146 |
147 | '''
148 | file1=open(log_dir+'/loss.txt','a')
149 | for loss in losslist:
150 | loss = str(loss).strip('[').strip(']').replace(',','')
151 | file1.write(loss+'\n')
152 | file1.close()
153 |
154 | file2=open(log_dir+'/accu.txt','a')
155 | for acc in accuracy:
156 | acc = str(acc).strip('[').strip(']').replace(',','')
157 | file2.write(acc+'\n')
158 | file2.close()
159 | '''
160 |
161 | model_name="trainnum_%d_"%train_nums
162 | saver.save(sess,os.path.join(save_path,model_name),global_step=global_step)
163 | print('Train finished...')
164 |
165 | coord.request_stop()
166 | coord.join(threads)
167 |
--------------------------------------------------------------------------------