├── README.md ├── CNN ├── simple_example.py ├── BatchNormalization.py ├── Layers.py ├── VGG16.py ├── ResNet.py └── ResNet_CIFAR.py ├── data └── dataset.py └── RNN ├── simple_example.py ├── tokenizer.py └── LSTM.py /README.md: -------------------------------------------------------------------------------- 1 | Tensorflow 2.0 2 | 3 | The tutorial can refer to 4 | [https://zhuanlan.zhihu.com/c_1183794276223385600](https://zhuanlan.zhihu.com/c_1183794276223385600) 5 | 6 | Please let me know if English tutorial is needed. -------------------------------------------------------------------------------- /CNN/simple_example.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | from tensorflow.keras import models 5 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Reshape 6 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 7 | 8 | def load_MNIST(PATH): 9 | train_images = np.load(PATH + '/x_train.npy') 10 | train_labels = np.load(PATH + '/y_train.npy') 11 | test_images = np.load(PATH + '/x_test.npy') 12 | test_labels = np.load(PATH + '/y_test.npy') 13 | return train_images, train_labels, test_images, test_labels 14 | 15 | def Model(): 16 | model = models.Sequential() 17 | model.add(Reshape((28, 28, 1), input_shape=(28, 28))) 18 | model.add(Conv2D(32, (3, 3), activation='relu')) 19 | model.add(MaxPooling2D((2, 2))) 20 | model.add(Conv2D(64, (3, 3), activation='relu')) 21 | model.add(MaxPooling2D((2, 2))) 22 | model.add(Conv2D(64, (3, 3), activation='relu')) 23 | model.add(Flatten()) 24 | model.add(Dense(64, activation='relu')) 25 | model.add(Dense(10, activation='softmax')) 26 | return model 27 | 28 | if __name__ == '__main__': 29 | # gpu config 30 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 31 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 32 | 33 | # load data 34 | # (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() 35 | (train_images, train_labels, test_images, test_labels) = load_MNIST('/home/user/Documents/dataset/MNIST') 36 | 37 | # get CNN model 38 | model = Model() 39 | 40 | # show 41 | model.summary() 42 | 43 | # train 44 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 45 | model.fit(train_images, 46 | train_labels, 47 | batch_size=128, 48 | epochs=5) 49 | 50 | # test 51 | test_loss, test_acc = model.evaluate(test_images, test_labels) 52 | print(test_acc) -------------------------------------------------------------------------------- /data/dataset.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def add_noise(x, y): 5 | x += np.random.uniform(0.0, 0.01) 6 | return x, y 7 | 8 | def one_hot(x, y): 9 | if y == 0: 10 | return x, np.array([1, 0]) 11 | else: 12 | return x, np.array([0, 1]) 13 | 14 | if __name__ == '__main__': 15 | data = np.array([0.1, 0.4, 0.6, 0.2, 0.8, 0.8, 0.4, 0.9, 0.3, 0.2]) 16 | print(data) 17 | label = np.array([0, 0, 1, 0, 1, 1, 0, 1, 0, 0]) 18 | print(label) 19 | 20 | dataset = tf.data.Dataset.from_tensor_slices((data, label)) 21 | 22 | print('traversal all data') 23 | for x, y in dataset: 24 | print(x, y) 25 | 26 | print('use iterator') 27 | dataset = dataset.repeat() 28 | it = dataset.__iter__() 29 | for i in range(20): 30 | x, y = it.next() 31 | print(x, y) 32 | 33 | print('random shuffle') 34 | dataset = dataset.shuffle(buffer_size=10) 35 | it = dataset.__iter__() 36 | for i in range(10): 37 | x, y = it.next() 38 | print(x, y) 39 | 40 | print('batch') 41 | dataset_batch = dataset.batch(batch_size=5) 42 | it = dataset_batch.__iter__() 43 | for i in range(2): 44 | x, y = it.next() 45 | print(x, y) 46 | 47 | print('one hot') 48 | dataset_one_hot = dataset.map(one_hot) 49 | it = dataset_one_hot.__iter__() 50 | for i in range(10): 51 | x, y = it.next() 52 | print(x, y) 53 | 54 | print('add noise') 55 | dataset_add_noise = dataset.map(add_noise) 56 | it = dataset_add_noise.__iter__() 57 | for i in range(10): 58 | x, y = it.next() 59 | print(x, y) 60 | 61 | print('add noise') 62 | dataset_add_noise = dataset.map(lambda x, y: tf.py_function(add_noise, inp=[x, y], Tout=[tf.float64, tf.int64]), 63 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 64 | it = dataset_add_noise.__iter__() 65 | for i in range(10): 66 | x, y = it.next() 67 | print(x, y) 68 | -------------------------------------------------------------------------------- /CNN/BatchNormalization.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class BatchNormalization(tf.keras.layers.Layer): 4 | def __init__(self, decay=0.9, epsilon=1e-5, **kwargs): 5 | self.decay = decay 6 | self.epsilon = epsilon 7 | super(BatchNormalization, self).__init__(**kwargs) 8 | 9 | def build(self, input_shape): 10 | self.gamma = self.add_weight(name='gamma', 11 | shape=[input_shape[-1], ], 12 | initializer=tf.initializers.ones, 13 | trainable=True) 14 | self.beta = self.add_weight(name='beta', 15 | shape=[input_shape[-1], ], 16 | initializer=tf.initializers.zeros, 17 | trainable=True) 18 | self.moving_mean = self.add_weight(name='moving_mean', 19 | shape=[input_shape[-1], ], 20 | initializer=tf.initializers.zeros, 21 | trainable=False) 22 | self.moving_variance = self.add_weight(name='moving_variance', 23 | shape=[input_shape[-1], ], 24 | initializer=tf.initializers.ones, 25 | trainable=False) 26 | super(BatchNormalization, self).build(input_shape) 27 | 28 | def assign_moving_average(self, variable, value): 29 | """ 30 | variable = variable * decay + value * (1 - decay) 31 | """ 32 | delta = variable * self.decay + value * (1 - self.decay) 33 | return variable.assign(delta) 34 | 35 | @tf.function 36 | def call(self, inputs, training): 37 | if training: 38 | batch_mean, batch_variance = tf.nn.moments(inputs, list(range(len(inputs.shape) - 1))) 39 | mean_update = self.assign_moving_average(self.moving_mean, batch_mean) 40 | variance_update = self.assign_moving_average(self.moving_variance, batch_variance) 41 | self.add_update(mean_update) 42 | self.add_update(variance_update) 43 | mean, variance = batch_mean, batch_variance 44 | else: 45 | mean, variance = self.moving_mean, self.moving_variance 46 | output = tf.nn.batch_normalization(inputs, 47 | mean=mean, 48 | variance=variance, 49 | offset=self.beta, 50 | scale=self.gamma, 51 | variance_epsilon=self.epsilon) 52 | return output 53 | 54 | def compute_output_shape(self, input_shape): 55 | return input_shape -------------------------------------------------------------------------------- /RNN/simple_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.keras.preprocessing.sequence import pad_sequences 6 | from tensorflow.keras.layers import Dense, GlobalAveragePooling1D, Embedding 7 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 8 | 9 | """ 10 | Download link for imdb dataset and GLoVe weights. 11 | https://s3.amazonaws.com/text-datasets/imdb.npz 12 | https://s3.amazonaws.com/text-datasets/imdb_word_index.json 13 | https://nlp.stanford.edu/projects/glove/ 14 | """ 15 | 16 | data_path = '/home1/dataset/IMDB/imdb.npz' 17 | word_index_path = '/home1/dataset/IMDB/imdb_word_index.json' 18 | GLoVe_path = '/home1/dataset/GLoVe/glove.6B.100d.txt' 19 | word_num = 10000 20 | max_len = 256 21 | embedding_dim = 100 22 | 23 | def get_embedding_weight(weight_path, word_index): 24 | # embedding_weight = np.zeros([word_num, embedding_dim]) 25 | embedding_weight = np.random.uniform(-0.05, 0.05, size=[word_num, embedding_dim]) 26 | cnt = 0 27 | with open(weight_path, 'r') as f: 28 | for line in f: 29 | values = line.split() 30 | word = values[0] 31 | if word in word_index.keys() and word_index[word] + 3 < word_num: 32 | """ 33 | In tf.keras.dataset.imdb.load_data(), there are 4 special mark. 34 | : 0 35 | : 1 36 | : 2 37 | : 3 38 | So word_index loaded from offical file, "imdb_word_index.json", need to +3. 39 | """ 40 | weight = np.asarray(values[1:], dtype='float32') 41 | embedding_weight[word_index[word] + 3] = weight 42 | cnt += 1 43 | print('word num: {}, matched num: {}'.format(len(word_index), cnt)) 44 | return embedding_weight 45 | 46 | def Model(): 47 | with open(word_index_path, 'r') as f: 48 | word_index = json.load(f) 49 | embedding_weight = get_embedding_weight(GLoVe_path, word_index) 50 | 51 | model = tf.keras.Sequential() 52 | model.add(Embedding(word_num, embedding_dim, weights=[embedding_weight])) 53 | model.add(GlobalAveragePooling1D()) 54 | model.add(Dense(128, activation=tf.nn.relu)) 55 | model.add(Dense(2, activation='softmax')) 56 | return model 57 | 58 | if __name__ == '__main__': 59 | # gpu config 60 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 61 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 62 | 63 | # load data 64 | imdb = tf.keras.datasets.imdb 65 | (train_sequences, train_labels), (test_sequences, test_labels) = imdb.load_data(data_path, num_words=word_num) 66 | 67 | train_sequences = pad_sequences(train_sequences, maxlen=max_len) 68 | test_sequences = pad_sequences(test_sequences, maxlen=max_len) 69 | 70 | # get model 71 | model = Model() 72 | model.summary() 73 | 74 | # train 75 | model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 76 | model.fit(train_sequences, 77 | train_labels, 78 | batch_size=512, 79 | epochs=10) 80 | 81 | # test 82 | test_loss, test_acc = model.evaluate(test_sequences, test_labels) 83 | print(test_acc) -------------------------------------------------------------------------------- /CNN/Layers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | from tensorflow.keras import models 4 | from tensorflow.keras.layers import BatchNormalization, Flatten, Dense, Input, AveragePooling2D, Activation 5 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 6 | 7 | class Conv2D(tf.keras.layers.Layer): 8 | def __init__(self, output_dim, kernel_size=(3, 3), strides=(1, 1, 1, 1), **kwargs): 9 | self.output_dim = output_dim 10 | self.kernel_size = kernel_size 11 | self.strides = strides 12 | super(Conv2D, self).__init__(**kwargs) 13 | 14 | def build(self, input_shape): 15 | kernel_shape = tf.TensorShape((self.kernel_size[0], self.kernel_size[1], input_shape[-1], self.output_dim)) 16 | self.kernel = self.add_weight(name='kernel', 17 | shape=kernel_shape, 18 | initializer=tf.initializers.he_normal()) 19 | 20 | def call(self, inputs): 21 | output = tf.nn.conv2d(inputs, filters=self.kernel, strides=self.strides, padding='SAME') 22 | return output 23 | 24 | def compute_output_shape(self, input_shape): 25 | shape = tf.TensorShape(input_shape).as_list() 26 | shape[-1] = self.output_dim 27 | return tf.TensorShape(shape) 28 | 29 | class ResBlock(tf.keras.layers.Layer): 30 | def __init__(self, output_dim, strides=(1, 1, 1, 1), **kwargs): 31 | self.strides = strides 32 | if strides != (1, 1, 1, 1): 33 | self.shortcut = Conv2D(output_dim, kernel_size=(1, 1), strides=self.strides) 34 | self.conv_0 = Conv2D(output_dim, strides=self.strides) 35 | self.conv_1 = Conv2D(output_dim) 36 | self.bn_0 = BatchNormalization(momentum=0.9, epsilon=1e-5) 37 | self.bn_1 = BatchNormalization(momentum=0.9, epsilon=1e-5) 38 | super(ResBlock, self).__init__(**kwargs) 39 | 40 | def call(self, inputs, training): 41 | net = self.bn_0(inputs, training=training) 42 | net = tf.nn.relu(net) 43 | 44 | if self.strides != (1, 1, 1, 1): 45 | shortcut = self.shortcut(net) 46 | else: 47 | shortcut = inputs 48 | 49 | net = self.conv_0(net) 50 | net = self.bn_1(net, training=training) 51 | net = tf.nn.relu(net) 52 | net = self.conv_1(net) 53 | 54 | output = net + shortcut 55 | return output 56 | 57 | def ResNet(inputs): 58 | net = Conv2D(16)(inputs) 59 | 60 | for i in range(stack_n): 61 | net = ResBlock(16)(net) 62 | 63 | net = ResBlock(32, strides=(1, 2, 2, 1))(net) 64 | for i in range(stack_n - 1): 65 | net = ResBlock(32)(net) 66 | 67 | net = ResBlock(64, strides=(1, 2, 2, 1))(net) 68 | for i in range(stack_n - 1): 69 | net = ResBlock(64)(net) 70 | 71 | net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) 72 | net = Activation('relu')(net) 73 | net = AveragePooling2D(8, 8)(net) 74 | net = Flatten()(net) 75 | net = Dense(10, activation='softmax')(net) 76 | return net 77 | 78 | if __name__ == '__main__': 79 | # gpu config 80 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 81 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 82 | 83 | # get model 84 | stack_n = 3 85 | img_input = Input(shape=(32, 32, 3)) 86 | output = ResNet(img_input) 87 | model = models.Model(img_input, output) 88 | 89 | # show 90 | model.summary() 91 | -------------------------------------------------------------------------------- /CNN/VGG16.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pickle as p 4 | import os 5 | from tensorflow.keras import models, optimizers, regularizers 6 | from tensorflow.keras.callbacks import LearningRateScheduler 7 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense 8 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 9 | 10 | weight_decay = 5e-4 11 | batch_size = 128 12 | learning_rate = 1e-2 13 | dropout_rate = 0.5 14 | epoch_num = 50 15 | 16 | def load_CIFAR_batch(filename): 17 | """ load single batch of cifar """ 18 | with open(filename, 'rb')as f: 19 | datadict = p.load(f, encoding='iso-8859-1') 20 | X = datadict['data'] 21 | Y = datadict['labels'] 22 | X = X.reshape(10000, 3, 32, 32) 23 | Y = np.array(Y) 24 | return X, Y 25 | 26 | 27 | def load_CIFAR(Foldername): 28 | train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32) 29 | train_label = np.zeros([50000, 10], dtype=np.float32) 30 | test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32) 31 | test_label = np.zeros([10000, 10], dtype=np.float32) 32 | 33 | for sample in range(5): 34 | X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1)) 35 | 36 | for i in range(3): 37 | train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :] 38 | for i in range(10000): 39 | train_label[i + 10000 * sample][Y[i]] = 1 40 | 41 | X, Y = load_CIFAR_batch(Foldername + "/test_batch") 42 | for i in range(3): 43 | test_data[:, :, :, i] = X[:, i, :, :] 44 | for i in range(10000): 45 | test_label[i][Y[i]] = 1 46 | 47 | return train_data, train_label, test_data, test_label 48 | 49 | def VGG16(): 50 | model = models.Sequential() 51 | model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay))) 52 | model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 53 | model.add(MaxPooling2D((2, 2))) 54 | 55 | model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 56 | model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 57 | model.add(MaxPooling2D((2, 2))) 58 | 59 | model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 60 | model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 61 | model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 62 | model.add(MaxPooling2D((2, 2))) 63 | 64 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 65 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 66 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 67 | model.add(MaxPooling2D((2, 2))) 68 | 69 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 70 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 71 | model.add(Conv2D(512, (3, 3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))) 72 | 73 | model.add(Flatten()) # 2*2*512 74 | model.add(Dense(4096, activation='relu')) 75 | model.add(Dropout(0.5)) 76 | model.add(Dense(4096, activation='relu')) 77 | model.add(Dropout(0.5)) 78 | model.add(Dense(10, activation='softmax')) 79 | 80 | return model 81 | 82 | 83 | def scheduler(epoch): 84 | if epoch < epoch_num * 0.4: 85 | return learning_rate 86 | if epoch < epoch_num * 0.8: 87 | return learning_rate * 0.1 88 | return learning_rate * 0.01 89 | 90 | 91 | if __name__ == '__main__': 92 | # gpu config 93 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 94 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 95 | 96 | # load data 97 | # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10') 98 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() 99 | train_labels = tf.keras.utils.to_categorical(train_labels, 10) 100 | test_labels = tf.keras.utils.to_categorical(test_labels, 10) 101 | 102 | # get model 103 | model = VGG16() 104 | 105 | # show 106 | model.summary() 107 | 108 | # train 109 | sgd = optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True) 110 | change_lr = LearningRateScheduler(scheduler) 111 | model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 112 | model.fit(train_images, train_labels, 113 | batch_size=batch_size, 114 | epochs=epoch_num, 115 | callbacks=[change_lr], 116 | validation_data=(test_images, test_labels)) -------------------------------------------------------------------------------- /CNN/ResNet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Conv2D, GlobalAvgPool2D, BatchNormalization, Dense 3 | 4 | # network config 5 | block_type = {18: 'basic block', 6 | 34: 'basic block', 7 | 50: 'bottlenect block', 8 | 101: 'bottlenect block', 9 | 152: 'bottlenect block'} 10 | 11 | block_num = {18: (2, 2, 2, 2), 12 | 34: (3, 4, 6, 3), 13 | 50: (3, 4, 6, 3), 14 | 101: (3, 4, 23, 3), 15 | 152: (3, 4, 36, 3)} 16 | 17 | filter_num = (64, 128, 256, 512) 18 | 19 | class BasicBlock(tf.keras.layers.Layer): 20 | def __init__(self, filters, strides=(1, 1), **kwargs): 21 | self.strides = strides 22 | if strides != (1, 1): 23 | self.shortcut = Conv2D(filters, (1, 1), name='projection', padding='same', use_bias=False) 24 | 25 | self.conv_0 = Conv2D(filters, (3, 3), name='conv_0', strides=strides, padding='same', use_bias=False) 26 | self.conv_1 = Conv2D(filters, (3, 3), name='conv_1', padding='same', use_bias=False) 27 | self.bn_0 = BatchNormalization(name='bn_0', momentum=0.9, epsilon=1e-5) 28 | self.bn_1 = BatchNormalization(name='bn_1', momentum=0.9, epsilon=1e-5) 29 | super(BasicBlock, self).__init__(**kwargs) 30 | 31 | def call(self, inputs, training): 32 | net = self.bn_0(inputs, training=training) 33 | net = tf.nn.relu(net) 34 | 35 | if self.strides != (1, 1): 36 | shortcut = tf.nn.avg_pool2d(net, ksize=(2, 2), strides=(2, 2), padding='SAME') 37 | shortcut = self.shortcut(shortcut) 38 | else: 39 | shortcut = inputs 40 | 41 | net = self.conv_0(net) 42 | net = self.bn_1(net, training=training) 43 | net = tf.nn.relu(net) 44 | net = self.conv_1(net) 45 | 46 | output = net + shortcut 47 | return output 48 | 49 | class BottleneckBlock(tf.keras.layers.Layer): 50 | def __init__(self, filters, strides=(1, 1), projection=False, **kwargs): 51 | self.strides = strides 52 | self.projection = projection 53 | if projection or strides != (1, 1): 54 | self.shortcut = Conv2D(filters * 4, (1, 1), name='projection', padding='same', use_bias=False) 55 | 56 | self.conv_0 = Conv2D(filters, (1, 1), name='conv_0', padding='same', use_bias=False) 57 | self.conv_1 = Conv2D(filters, (3, 3), name='conv_1', strides=strides, padding='same', use_bias=False) 58 | self.conv_2 = Conv2D(filters * 4, (1, 1), name='conv_2', padding='same', use_bias=False) 59 | self.bn_0 = BatchNormalization(name='bn_0', momentum=0.9, epsilon=1e-5) 60 | self.bn_1 = BatchNormalization(name='bn_1', momentum=0.9, epsilon=1e-5) 61 | self.bn_2 = BatchNormalization(name='bn_2', momentum=0.9, epsilon=1e-5) 62 | super(BottleneckBlock, self).__init__(**kwargs) 63 | 64 | def call(self, inputs, training): 65 | net = self.bn_0(inputs, training=training) 66 | net = tf.nn.relu(net) 67 | 68 | if self.projection: 69 | shortcut = self.shortcut(net) 70 | elif self.strides != (1, 1): 71 | shortcut = tf.nn.avg_pool2d(net, ksize=(2, 2), strides=(2, 2), padding='SAME') 72 | shortcut = self.shortcut(shortcut) 73 | else: 74 | shortcut = inputs 75 | 76 | net = self.conv_0(net) 77 | net = self.bn_1(net, training=training) 78 | net = tf.nn.relu(net) 79 | net = self.conv_1(net) 80 | net = self.bn_2(net, training=training) 81 | net = tf.nn.relu(net) 82 | net = self.conv_2(net) 83 | 84 | output = net + shortcut 85 | return output 86 | 87 | 88 | class ResNet(tf.keras.models.Model): 89 | def __init__(self, layer_num, **kwargs): 90 | super(ResNet, self).__init__(**kwargs) 91 | if block_type[layer_num] == 'basic block': 92 | self.block = BasicBlock 93 | else: 94 | self.block = BottleneckBlock 95 | 96 | self.conv0 = Conv2D(64, (7, 7), strides=(2, 2), name='conv0', padding='same', use_bias=False) 97 | 98 | self.block_collector = [] 99 | for layer_index, (b, f) in enumerate(zip(block_num[layer_num], filter_num), start=1): 100 | if layer_index == 1: 101 | if block_type[layer_num] == 'basic block': 102 | self.block_collector.append(self.block(f, name='conv1_0')) 103 | else: 104 | self.block_collector.append(self.block(f, projection=True, name='conv1_0')) 105 | else: 106 | self.block_collector.append(self.block(f, strides=(2, 2), name='conv{}_0'.format(layer_index))) 107 | 108 | for block_index in range(1, b): 109 | self.block_collector.append(self.block(f, name='conv{}_{}'.format(layer_index, block_index))) 110 | 111 | self.bn = BatchNormalization(name='bn', momentum=0.9, epsilon=1e-5) 112 | self.global_average_pooling = GlobalAvgPool2D() 113 | self.fc = Dense(1000, name='fully_connected', activation='softmax', use_bias=False) 114 | 115 | def call(self, inputs, training): 116 | net = self.conv0(inputs) 117 | print('input', inputs.shape) 118 | print('conv0', net.shape) 119 | net = tf.nn.max_pool2d(net, ksize=(3, 3), strides=(2, 2), padding='SAME') 120 | print('max-pooling', net.shape) 121 | 122 | for block in self.block_collector: 123 | net = block(net, training) 124 | print(block.name, net.shape) 125 | net = self.bn(net, training) 126 | net = tf.nn.relu(net) 127 | 128 | net = self.global_average_pooling(net) 129 | print('global average-pooling', net.shape) 130 | net = self.fc(net) 131 | print('fully connected', net.shape) 132 | return net 133 | 134 | if __name__ == '__main__': 135 | model = ResNet(152) 136 | model.build(input_shape=(None, 224, 224, 3)) -------------------------------------------------------------------------------- /RNN/tokenizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | from tqdm import tqdm 5 | from tensorflow.keras import optimizers 6 | from tensorflow.keras.preprocessing.text import Tokenizer 7 | from tensorflow.keras.preprocessing.sequence import pad_sequences 8 | from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Embedding, GlobalAveragePooling1D 9 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 10 | 11 | train_data_path = '/home/user/Documents/dataset/20_newsgroup/20news-bydate-train' 12 | test_data_path = '/home/user/Documents/dataset/20_newsgroup/20news-bydate-test' 13 | GLoVe_path = '/home1/dataset/GLoVe/glove.6B.100d.txt' 14 | 15 | word_num = 20000 16 | max_len = 1024 17 | embedding_dim = 100 18 | 19 | # training config 20 | batch_size = 128 21 | # train_num = 11314 22 | train_num = 11270 23 | iterations_per_epoch = int(train_num / batch_size) 24 | epoch_num = 20 25 | 26 | # test config 27 | test_batch_size = 128 28 | # test_num = 7532 29 | test_num = 7503 30 | test_iterations = int(test_num / test_batch_size) 31 | 32 | def load_data(path): 33 | texts = [] 34 | labels = [] 35 | 36 | for i, label in enumerate(sorted(os.listdir(path))): 37 | for file_name in os.listdir(os.path.join(path, label)): 38 | file_path = os.path.join(path, label, file_name) 39 | try: 40 | with open(file_path, 'r') as f: 41 | texts.append(f.read()) 42 | labels.append(i) 43 | except: 44 | print('can not decode:', file_path) 45 | return texts, labels 46 | 47 | class Model(tf.keras.models.Model): 48 | def __init__(self, **kwargs): 49 | super(Model, self).__init__(**kwargs) 50 | 51 | embedding_weight = self.get_embedding_weight(GLoVe_path, word_index) 52 | self.embedding = Embedding(word_num, embedding_dim, weights=[embedding_weight]) 53 | self.conv1 = Conv1D(128, 5, activation='relu') 54 | self.pooling1 = MaxPooling1D(5) 55 | self.conv2 = Conv1D(128, 5, activation='relu') 56 | self.pooling2 = MaxPooling1D(5) 57 | self.conv3 = Conv1D(128, 5, activation='relu') 58 | self.global_pooling = GlobalAveragePooling1D() 59 | self.fc1 = Dense(128, activation='relu') 60 | self.fc2 = Dense(20, activation='softmax') 61 | 62 | def call(self, inputs): 63 | net = self.embedding(inputs) 64 | net = self.conv1(net) 65 | net = self.pooling1(net) 66 | net = self.conv2(net) 67 | net = self.pooling2(net) 68 | net = self.conv3(net) 69 | net = self.global_pooling(net) 70 | net = self.fc1(net) 71 | net = self.fc2(net) 72 | return net 73 | 74 | def get_embedding_weight(self, weight_path, word_index): 75 | # embedding_weight = np.zeros([word_num, embedding_dim]) 76 | embedding_weight = np.random.uniform(-0.05, 0.05, size=[word_num, embedding_dim]) 77 | cnt = 0 78 | with open(weight_path, 'r') as f: 79 | for line in f: 80 | values = line.split() 81 | word = values[0] 82 | if word in word_index.keys() and word_index[word]< word_num: 83 | weight = np.asarray(values[1:], dtype='float32') 84 | embedding_weight[word_index[word]] = weight 85 | cnt += 1 86 | print('word num: {}, matched num: {}'.format(len(word_index), cnt)) 87 | return embedding_weight 88 | 89 | def cross_entropy(y_true, y_pred): 90 | cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) 91 | return tf.reduce_mean(cross_entropy) 92 | 93 | def accuracy(y_true, y_pred): 94 | correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) 95 | accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) 96 | return accuracy 97 | 98 | @tf.function 99 | def train_step(model, optimizer, x, y): 100 | with tf.GradientTape() as tape: 101 | prediction = model(x, training=True) 102 | loss = cross_entropy(y, prediction) 103 | gradients = tape.gradient(loss, model.trainable_variables) 104 | optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 105 | return loss, prediction 106 | 107 | @tf.function 108 | def test_step(model, x, y): 109 | prediction = model(x, training=False) 110 | loss = cross_entropy(y, prediction) 111 | return loss, prediction 112 | 113 | def train(model, optimizer, sequences, labels): 114 | sum_loss = 0 115 | sum_accuracy = 0 116 | 117 | # random shuffle 118 | seed = np.random.randint(0, 65536) 119 | np.random.seed(seed) 120 | np.random.shuffle(sequences) 121 | np.random.seed(seed) 122 | np.random.shuffle(labels) 123 | 124 | for i in tqdm(range(iterations_per_epoch)): 125 | x = sequences[i * batch_size: (i + 1) * batch_size, :] 126 | y = labels[i * batch_size: (i + 1) * batch_size] 127 | y = tf.keras.utils.to_categorical(y, 20) 128 | 129 | loss, prediction = train_step(model, optimizer, x, y) 130 | sum_loss += loss 131 | sum_accuracy += accuracy(y, prediction) 132 | 133 | print('ce_loss:%f, accuracy:%f' % 134 | (sum_loss / iterations_per_epoch, sum_accuracy / iterations_per_epoch)) 135 | 136 | def test(model, sequences, labels): 137 | sum_loss = 0 138 | sum_accuracy = 0 139 | 140 | for i in tqdm(range(test_iterations)): 141 | x = sequences[i * test_batch_size: (i + 1) * test_batch_size, :] 142 | y = labels[i * test_batch_size: (i + 1) * test_batch_size] 143 | y = tf.keras.utils.to_categorical(y, 20) 144 | 145 | loss, prediction = test_step(model, x, y) 146 | sum_loss += loss 147 | sum_accuracy += accuracy(y, prediction) 148 | 149 | print('test, loss:%f, accuracy:%f' % 150 | (sum_loss / test_iterations, sum_accuracy / test_iterations)) 151 | 152 | 153 | if __name__ == '__main__': 154 | # gpu config 155 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 156 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 157 | 158 | train_texts, train_labels = load_data(train_data_path) 159 | test_texts, test_labels = load_data(test_data_path) 160 | print('train num: {}, test num: {}'.format(len(train_texts), len(test_texts))) 161 | 162 | tokenizer = Tokenizer(num_words=word_num) 163 | tokenizer.fit_on_texts(train_texts) 164 | 165 | print('most common words:\nword rank') 166 | word_index = tokenizer.word_index 167 | for w, c in word_index.items(): 168 | if c < 5: 169 | print(w, c) 170 | 171 | train_sequences = tokenizer.texts_to_sequences(train_texts) 172 | test_sequences = tokenizer.texts_to_sequences(test_texts) 173 | 174 | train_sequences = pad_sequences(train_sequences, maxlen=max_len) 175 | test_sequences = pad_sequences(test_sequences, maxlen=max_len) 176 | 177 | # get model 178 | model = Model() 179 | model.build(input_shape=(batch_size, max_len)) 180 | 181 | # show 182 | model.summary() 183 | 184 | # train 185 | optimizer = optimizers.Adam() 186 | for epoch in range(epoch_num): 187 | print('epoch %d' % epoch) 188 | train(model, optimizer, train_sequences, train_labels) 189 | test(model, test_sequences, test_labels) -------------------------------------------------------------------------------- /RNN/LSTM.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import json 4 | from tqdm import tqdm 5 | import os 6 | from tensorflow.keras import optimizers 7 | from tensorflow.keras.preprocessing.sequence import pad_sequences 8 | from tensorflow.keras.layers import Dense, Embedding 9 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 10 | 11 | data_path = '/home1/dataset/IMDB/imdb.npz' 12 | word_index_path = '/home1/dataset/IMDB/imdb_word_index.json' 13 | GLoVe_path = '/home1/dataset/GLoVe/glove.6B.100d.txt' 14 | word_num = 10000 15 | max_len = 256 16 | embedding_dim = 100 17 | 18 | # training config 19 | batch_size = 512 20 | train_num = 25000 21 | iterations_per_epoch = int(train_num / batch_size) 22 | epoch_num = 10 23 | 24 | # test config 25 | test_batch_size = 500 26 | test_num = 25000 27 | test_iterations = int(test_num / test_batch_size) 28 | 29 | class LSTMCell(tf.keras.layers.Layer): 30 | def __init__(self, output_dim, activation=tf.nn.tanh, forget_bias=1.0, **kwargs): 31 | self.output_dim = output_dim 32 | self.activation = activation 33 | self.forget_bias = forget_bias 34 | super(LSTMCell, self).__init__(**kwargs) 35 | 36 | def build(self, input_shape): 37 | self.kernel = self.add_weight(name='kernel', 38 | shape=(input_shape[-1] + self.output_dim, self.output_dim * 4), 39 | initializer=tf.initializers.glorot_uniform) 40 | self.bias = self.add_weight(name='bias', 41 | shape=(self.output_dim * 4,), 42 | initializer=tf.initializers.zeros) 43 | 44 | def call(self, inputs, state): 45 | c, h = state 46 | net = tf.concat([inputs, h], axis=-1) 47 | net = tf.matmul(net, self.kernel) + self.bias 48 | 49 | i, j, f, o = tf.split(net, num_or_size_splits=4, axis=-1) 50 | new_c = (c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * self.activation(j)) 51 | new_h = self.activation(new_c) * tf.sigmoid(o) 52 | 53 | new_state = (new_c, new_h) 54 | return new_h, new_state 55 | 56 | class LSTM(tf.keras.layers.Layer): 57 | def __init__(self, output_dim, **kwargs): 58 | self.output_dim = output_dim 59 | self.cell = LSTMCell(output_dim) 60 | super(LSTM, self).__init__(**kwargs) 61 | 62 | def call(self, inputs): 63 | inputs = tf.transpose(inputs, [1, 0, 2]) 64 | # zero initial state 65 | state = (tf.constant(0.0, shape=[inputs.shape[1], self.output_dim]), 66 | tf.constant(0.0, shape=[inputs.shape[1], self.output_dim])) 67 | 68 | output = [] 69 | inputs = tf.unstack(inputs, axis=0) 70 | for i in range(len(inputs)): 71 | h, state = self.cell(inputs[i], state) 72 | output.append(h) 73 | output = tf.stack(output, axis=0) 74 | output = tf.transpose(output, [1, 0, 2]) 75 | 76 | return output 77 | 78 | class Model(tf.keras.models.Model): 79 | def __init__(self, word_index, **kwargs): 80 | super(Model, self).__init__(**kwargs) 81 | 82 | embedding_weight = self.get_embedding_weight(GLoVe_path, word_index) 83 | self.embedding = Embedding(word_num, embedding_dim, weights=[embedding_weight]) 84 | self.LSTM = LSTM(128) 85 | self.fc = Dense(2, activation='softmax') 86 | 87 | def call(self, inputs): 88 | net = self.embedding(inputs) 89 | net = self.LSTM(net) 90 | net = self.fc(net[:, -1, :]) 91 | return net 92 | 93 | def get_embedding_weight(self, weight_path, word_index): 94 | # embedding_weight = np.zeros([word_num, embedding_dim]) 95 | embedding_weight = np.random.uniform(-0.05, 0.05, size=[word_num, embedding_dim]) 96 | cnt = 0 97 | with open(weight_path, 'r') as f: 98 | for line in f: 99 | values = line.split() 100 | word = values[0] 101 | if word in word_index.keys() and word_index[word] + 3 < word_num: 102 | """ 103 | In tf.keras.dataset.imdb.load_data(), there are 4 special mark. 104 | : 0 105 | : 1 106 | : 2 107 | : 3 108 | So word_index loaded from offical file, "mdb_word_index.json", need to +3. 109 | """ 110 | weight = np.asarray(values[1:], dtype='float32') 111 | embedding_weight[word_index[word] + 3] = weight 112 | cnt += 1 113 | print('word num: {}, matched num: {}'.format(len(word_index), cnt)) 114 | return embedding_weight 115 | 116 | def cross_entropy(y_true, y_pred): 117 | cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) 118 | return tf.reduce_mean(cross_entropy) 119 | 120 | def accuracy(y_true, y_pred): 121 | correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) 122 | accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) 123 | return accuracy 124 | 125 | @tf.function 126 | def train_step(model, optimizer, x, y): 127 | with tf.GradientTape() as tape: 128 | prediction = model(x, training=True) 129 | loss = cross_entropy(y, prediction) 130 | gradients = tape.gradient(loss, model.trainable_variables) 131 | optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 132 | return loss, prediction 133 | 134 | @tf.function 135 | def test_step(model, x, y): 136 | prediction = model(x, training=False) 137 | loss = cross_entropy(y, prediction) 138 | return loss, prediction 139 | 140 | def train(model, optimizer, sequences, labels): 141 | sum_loss = 0 142 | sum_accuracy = 0 143 | 144 | # random shuffle 145 | seed = np.random.randint(0, 65536) 146 | np.random.seed(seed) 147 | np.random.shuffle(sequences) 148 | np.random.seed(seed) 149 | np.random.shuffle(labels) 150 | 151 | for i in tqdm(range(iterations_per_epoch)): 152 | x = sequences[i * batch_size: (i + 1) * batch_size, :] 153 | y = labels[i * batch_size: (i + 1) * batch_size] 154 | y = tf.keras.utils.to_categorical(y, 2) 155 | 156 | loss, prediction = train_step(model, optimizer, x, y) 157 | sum_loss += loss 158 | sum_accuracy += accuracy(y, prediction) 159 | 160 | print('ce_loss:%f, accuracy:%f' % 161 | (sum_loss / iterations_per_epoch, sum_accuracy / iterations_per_epoch)) 162 | 163 | def test(model, sequences, labels): 164 | sum_loss = 0 165 | sum_accuracy = 0 166 | 167 | for i in tqdm(range(test_iterations)): 168 | x = sequences[i * test_batch_size: (i + 1) * test_batch_size, :] 169 | y = labels[i * test_batch_size: (i + 1) * test_batch_size] 170 | y = tf.keras.utils.to_categorical(y, 2) 171 | 172 | loss, prediction = test_step(model, x, y) 173 | sum_loss += loss 174 | sum_accuracy += accuracy(y, prediction) 175 | 176 | print('test, loss:%f, accuracy:%f' % 177 | (sum_loss / test_iterations, sum_accuracy / test_iterations)) 178 | 179 | 180 | if __name__ == '__main__': 181 | # gpu config 182 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 183 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 184 | 185 | # load data 186 | imdb = tf.keras.datasets.imdb 187 | (train_sequences, train_labels), (test_sequences, test_labels) = imdb.load_data(data_path, num_words=word_num) 188 | with open(word_index_path, 'r') as f: 189 | word_index = json.load(f) 190 | 191 | train_sequences = pad_sequences(train_sequences, maxlen=max_len) 192 | test_sequences = pad_sequences(test_sequences, maxlen=max_len) 193 | 194 | # get model 195 | model = Model(word_index) 196 | model.build(input_shape=(batch_size, max_len)) 197 | 198 | # show 199 | model.summary() 200 | 201 | # train 202 | optimizer = optimizers.Adam() 203 | for epoch in range(epoch_num): 204 | print('epoch %d' % epoch) 205 | train(model, optimizer, train_sequences, train_labels) 206 | test(model, test_sequences, test_labels) 207 | 208 | -------------------------------------------------------------------------------- /CNN/ResNet_CIFAR.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pickle as p 4 | from tqdm import tqdm 5 | import os 6 | import cv2 7 | import time 8 | from tensorflow.keras import models, optimizers, regularizers 9 | from tensorflow.keras.layers import Conv2D, AveragePooling2D, BatchNormalization, Flatten, Dense, Input, add, Activation 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 11 | 12 | # network config 13 | stack_n = 18 # layers = stack_n * 6 + 2 14 | weight_decay = 1e-4 15 | 16 | # training config 17 | batch_size = 128 18 | train_num = 50000 19 | iterations_per_epoch = int(train_num / batch_size) 20 | learning_rate = [0.1, 0.01, 0.001] 21 | boundaries = [80 * iterations_per_epoch, 120 * iterations_per_epoch] 22 | epoch_num = 200 23 | 24 | # test config 25 | test_batch_size = 200 26 | test_num = 10000 27 | test_iterations = int(test_num / test_batch_size) 28 | 29 | def load_CIFAR_batch(filename): 30 | """ load single batch of cifar """ 31 | with open(filename, 'rb')as f: 32 | datadict = p.load(f, encoding='iso-8859-1') 33 | X = datadict['data'] 34 | Y = datadict['labels'] 35 | X = X.reshape(10000, 3, 32, 32) 36 | Y = np.array(Y) 37 | return X, Y 38 | 39 | 40 | def load_CIFAR(Foldername): 41 | train_data = np.zeros([50000, 32, 32, 3], dtype=np.float32) 42 | train_label = np.zeros([50000, 10], dtype=np.float32) 43 | test_data = np.zeros([10000, 32, 32, 3], dtype=np.float32) 44 | test_label = np.zeros([10000, 10], dtype=np.float32) 45 | 46 | for sample in range(5): 47 | X, Y = load_CIFAR_batch(Foldername + "/data_batch_" + str(sample + 1)) 48 | 49 | for i in range(3): 50 | train_data[10000 * sample:10000 * (sample + 1), :, :, i] = X[:, i, :, :] 51 | for i in range(10000): 52 | train_label[i + 10000 * sample][Y[i]] = 1 53 | 54 | X, Y = load_CIFAR_batch(Foldername + "/test_batch") 55 | for i in range(3): 56 | test_data[:, :, :, i] = X[:, i, :, :] 57 | for i in range(10000): 58 | test_label[i][Y[i]] = 1 59 | 60 | return train_data, train_label, test_data, test_label 61 | 62 | def color_normalize(train_images, test_images): 63 | mean = [np.mean(train_images[:, :, :, i]) for i in range(3)] # [125.307, 122.95, 113.865] 64 | std = [np.std(train_images[:, :, :, i]) for i in range(3)] # [62.9932, 62.0887, 66.7048] 65 | for i in range(3): 66 | train_images[:, :, :, i] = (train_images[:, :, :, i] - mean[i]) / std[i] 67 | test_images[:, :, :, i] = (test_images[:, :, :, i] - mean[i]) / std[i] 68 | return train_images, test_images 69 | 70 | def images_augment(images): 71 | output = [] 72 | for img in images: 73 | img = cv2.copyMakeBorder(img, 4, 4, 4, 4, cv2.BORDER_CONSTANT, value=[0, 0, 0]) 74 | x = np.random.randint(0, 8) 75 | y = np.random.randint(0, 8) 76 | if np.random.randint(0, 2): 77 | img = cv2.flip(img, 1) 78 | output.append(img[x: x+32, y:y+32, :]) 79 | return np.ascontiguousarray(output, dtype=np.float32) 80 | 81 | 82 | def residual_block(inputs, channels, strides=(1, 1)): 83 | net = BatchNormalization(momentum=0.9, epsilon=1e-5)(inputs) 84 | net = Activation('relu')(net) 85 | 86 | if strides == (1, 1): 87 | shortcut = inputs 88 | else: 89 | shortcut = Conv2D(channels, (1, 1), strides=strides)(net) 90 | 91 | net = Conv2D(channels, (3, 3), padding='same', strides=strides)(net) 92 | net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) 93 | net = Activation('relu')(net) 94 | net = Conv2D(channels, (3, 3), padding='same')(net) 95 | 96 | net = add([net, shortcut]) 97 | return net 98 | 99 | def ResNet(inputs): 100 | net = Conv2D(16, (3, 3), padding='same')(inputs) 101 | 102 | for i in range(stack_n): 103 | net = residual_block(net, 16) 104 | 105 | net = residual_block(net, 32, strides=(2, 2)) 106 | for i in range(stack_n - 1): 107 | net = residual_block(net, 32) 108 | 109 | net = residual_block(net, 64, strides=(2, 2)) 110 | for i in range(stack_n - 1): 111 | net = residual_block(net, 64) 112 | 113 | net = BatchNormalization(momentum=0.9, epsilon=1e-5)(net) 114 | net = Activation('relu')(net) 115 | net = AveragePooling2D(8, 8)(net) 116 | net = Flatten()(net) 117 | net = Dense(10, activation='softmax')(net) 118 | return net 119 | 120 | def cross_entropy(y_true, y_pred): 121 | cross_entropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred) 122 | return tf.reduce_mean(cross_entropy) 123 | 124 | def l2_loss(model, weights=weight_decay): 125 | variable_list = [] 126 | for v in model.trainable_variables: 127 | if 'kernel' in v.name: 128 | variable_list.append(tf.nn.l2_loss(v)) 129 | return tf.add_n(variable_list) * weights 130 | 131 | def accuracy(y_true, y_pred): 132 | correct_num = tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)) 133 | accuracy = tf.reduce_mean(tf.cast(correct_num, dtype=tf.float32)) 134 | return accuracy 135 | 136 | @tf.function 137 | def train_step(model, optimizer, x, y): 138 | with tf.GradientTape() as tape: 139 | prediction = model(x, training=True) 140 | ce = cross_entropy(y, prediction) 141 | l2 = l2_loss(model) 142 | loss = ce + l2 143 | gradients = tape.gradient(loss, model.trainable_variables) 144 | optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 145 | return ce, prediction 146 | 147 | @tf.function 148 | def test_step(model, x, y): 149 | prediction = model(x, training=False) 150 | ce = cross_entropy(y, prediction) 151 | return ce, prediction 152 | 153 | def train(model, optimizer, images, labels): 154 | sum_loss = 0 155 | sum_accuracy = 0 156 | 157 | # random shuffle 158 | seed = np.random.randint(0, 65536) 159 | np.random.seed(seed) 160 | np.random.shuffle(images) 161 | np.random.seed(seed) 162 | np.random.shuffle(labels) 163 | 164 | for i in tqdm(range(iterations_per_epoch)): 165 | x = images[i * batch_size: (i + 1) * batch_size, :, :, :] 166 | y = labels[i * batch_size: (i + 1) * batch_size, :] 167 | x = images_augment(x) 168 | 169 | loss, prediction = train_step(model, optimizer, x, y) 170 | sum_loss += loss 171 | sum_accuracy += accuracy(y, prediction) 172 | 173 | print('ce_loss:%f, l2_loss:%f, accuracy:%f' % 174 | (sum_loss / iterations_per_epoch, l2_loss(model), sum_accuracy / iterations_per_epoch)) 175 | 176 | def test(model, images, labels): 177 | sum_loss = 0 178 | sum_accuracy = 0 179 | 180 | for i in tqdm(range(test_iterations)): 181 | x = images[i * test_batch_size: (i + 1) * test_batch_size, :, :, :] 182 | y = labels[i * test_batch_size: (i + 1) * test_batch_size, :] 183 | 184 | loss, prediction = test_step(model, x, y) 185 | sum_loss += loss 186 | sum_accuracy += accuracy(y, prediction) 187 | 188 | print('test, loss:%f, accuracy:%f' % 189 | (sum_loss / test_iterations, sum_accuracy / test_iterations)) 190 | 191 | 192 | if __name__ == '__main__': 193 | # gpu config 194 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 195 | tf.config.experimental.set_memory_growth(device=physical_devices[0], enable=True) 196 | 197 | # load data 198 | # (train_images, train_labels, test_images, test_labels) = load_CIFAR('/home/user/Documents/dataset/Cifar-10') 199 | (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() 200 | train_labels = tf.keras.utils.to_categorical(train_labels, 10) 201 | test_labels = tf.keras.utils.to_categorical(test_labels, 10) 202 | 203 | train_images = np.array(train_images, dtype=np.float32) 204 | test_images = np.array(test_images, dtype=np.float32) 205 | train_images, test_images = color_normalize(train_images, test_images) 206 | 207 | # get model 208 | img_input = Input(shape=(32, 32, 3)) 209 | output = ResNet(img_input) 210 | model = models.Model(img_input, output) 211 | 212 | # show 213 | model.summary() 214 | 215 | # train 216 | learning_rate_schedules = optimizers.schedules.PiecewiseConstantDecay(boundaries, learning_rate) 217 | optimizer = optimizers.SGD(learning_rate=learning_rate_schedules, momentum=0.9, nesterov=True) 218 | 219 | for epoch in range(epoch_num): 220 | print('epoch %d' % epoch) 221 | train(model, optimizer, train_images, train_labels) 222 | test(model, test_images, test_labels) 223 | 224 | --------------------------------------------------------------------------------