├── images ├── AK │ ├── AK1.jpg │ └── AK2.jpg └── SK │ ├── SK1.jpg │ └── SK2.jpg ├── __pycache__ ├── paths.cpython-35.pyc └── load_datasets.cpython-35.pyc ├── log └── events.out.tfevents.1557201695.room ├── paths.py ├── load_train_test_data.py ├── eval.py ├── predict.py ├── README.md ├── load_datasets.py └── train.py /images/AK/AK1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/AK/AK1.jpg -------------------------------------------------------------------------------- /images/AK/AK2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/AK/AK2.jpg -------------------------------------------------------------------------------- /images/SK/SK1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/SK/SK1.jpg -------------------------------------------------------------------------------- /images/SK/SK2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/SK/SK2.jpg -------------------------------------------------------------------------------- /__pycache__/paths.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/__pycache__/paths.cpython-35.pyc -------------------------------------------------------------------------------- /log/events.out.tfevents.1557201695.room: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/log/events.out.tfevents.1557201695.room -------------------------------------------------------------------------------- /__pycache__/load_datasets.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/__pycache__/load_datasets.cpython-35.pyc -------------------------------------------------------------------------------- /paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import inspect 3 | 4 | def mkdir_if_not_exist(dir_list): 5 | for directory in dir_list: 6 | if not os.path.exists(directory): 7 | os.makedirs(directory) 8 | 9 | curr_filename = inspect.getfile(inspect.currentframe()) 10 | root_dir = os.path.dirname(os.path.abspath(curr_filename)) 11 | 12 | -------------------------------------------------------------------------------- /load_train_test_data.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | 3 | from load_datasets import load_datasets 4 | import numpy as np 5 | 6 | X, y = load_datasets() 7 | 8 | def load_test_data(): 9 | X_test = X[650:] 10 | y_test = y[650:] 11 | return X_test, y_test 12 | 13 | def load_train_valid_data(test_split): 14 | X_tmp = X[:650] 15 | y_tmp = y[:650] 16 | X_train, X_valid, y_train, y_valid = train_test_split(X_tmp, y_tmp, test_size=test_split, random_state=1) 17 | 18 | return X_train, X_valid, y_train, y_valid 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | from load_train_test_data import load_test_data 3 | from paths import root_dir 4 | from keras.models import load_model 5 | import numpy as np 6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 7 | 8 | # 指定使用的GPU 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "8" 10 | 11 | model_path = os.path.join(root_dir, 'model_data', 'model.h5') 12 | 13 | if __name__ == '__main__': 14 | # 加载测试数据 15 | X_test, y_test = load_test_data() 16 | 17 | # 导入模型 18 | model = load_model(model_path) 19 | 20 | # 预测 21 | y_pred = model.predict(X_test) 22 | 23 | # one-hot ==> 标签 24 | y_test = np.argmax(y_test, axis=1) 25 | y_pred = np.argmax(y_pred, axis=1) 26 | 27 | # 计算准确率、精确率、召回率、F1 28 | accuracy = accuracy_score(y_test, y_pred) 29 | precision = precision_score(y_test, y_pred) 30 | recall = recall_score(y_test, y_pred) 31 | f1 = f1_score(y_test, y_pred) 32 | 33 | print("accuracy_score = %.2f" % accuracy) 34 | print("precision_score = %.2f" % precision) 35 | print("recall_score = %.2f" % recall) 36 | print("f1_score = %.2f" % f1) 37 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | from skimage import io 3 | from paths import root_dir 4 | import numpy as np 5 | from keras.preprocessing import image 6 | from keras.models import load_model 7 | 8 | # 指定使用的GPU 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "8" 10 | 11 | images_dir = os.path.join(root_dir, 'images') 12 | model_path = os.path.join(root_dir, 'model_data', 'model.h5') 13 | class_name = {0: 'AK', 1: 'SK'} 14 | 15 | if __name__ == '__main__': 16 | # 导入模型 17 | model = load_model(model_path) 18 | 19 | for AK_or_SK in os.listdir(images_dir): 20 | for picture_name in os.listdir(os.path.join(images_dir, AK_or_SK)): 21 | # 读取图片 22 | img_path = os.path.join(images_dir, AK_or_SK, picture_name) 23 | img = image.load_img(img_path, target_size=(224, 224)) # 通道3默认 24 | img = image.img_to_array(img) # 变为numpy数组 25 | img = np.expand_dims(img, axis=0) # 扩充维度 26 | 27 | # 预测 28 | preds = model.predict(img) 29 | 30 | # 打印图片类别 31 | # print(preds) 32 | y_pred = np.argmax(preds, axis=1) 33 | 34 | label = class_name[y_pred[0]] #y_pred[0]解释:打印出来类似于[0] [1] ,所以取列表的第一个元素,即索引[0] 35 | 36 | print(picture_name, '的预测概率是:') 37 | print(preds, ' --> ', label) 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 问题描述 2 | 3 | 要解决的是一个医学图像的二分类问题,有`AK`和`SK`两种病症,根据一定量数据,进行训练,对图像进行预测。 4 | 5 | **给定图片数据的格式:** 6 | 7 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g3hceviwbvj20ab0eqgm5.jpg) 8 | 9 | 10 | 11 | ### 解决思路 12 | 13 | 整体上采用迁移学习来训练神经网络,使用InceptionV3结构,框架采用keras. 14 | 15 | **具体思路:** 16 | 17 | 1. 读取图片数据,保存成`.npy`格式,方便后续加载 18 | 2. 标签采用one-hot形式,由于标签隐藏在文件夹命名中,所以需要自行添加标签,并保存到`.npy`文件中,方便后续加载 19 | 3. 将数据分为训练集、验证集、测试集 20 | 4. 使用keras建立InceptionV3基本模型,不包括顶层,使用预训练权重,在基本模型的基础上自定义几层神经网络,得到最后的模型,对模型进行训练 21 | 5. 优化模型,调整超参数,提高准确率 22 | 6. 在测试集上对模型进行评估,使用精确率、召回率 23 | 7. 对单张图片进行预测,并输出每种类别的概率 24 | 25 | 26 | 27 | ### 代码结构 28 | 29 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2sv3ux8klj20uq0fgmyf.jpg) 30 | 31 | 32 | 33 | ### 运行结果 34 | 35 | **1. 训练结果** 36 | 37 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svdxnpamj217v0fbjso.jpg) 38 | 39 | **2. 评估结果** 40 | 41 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svg4hlb4j20lq07i748.jpg) 42 | 43 | **3. 预测结果** 44 | 45 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svk9htyij20di07eaa8.jpg) 46 | 47 | 48 | 49 | ### 知识点总结 50 | 51 | 1. 如何加载实际数据,如何保存成npy文件,如何打乱数据,如何划分数据,如何进行交叉验证 52 | 2. 如何使用keras进行迁移学习 53 | 3. keras中数据增强、回调函数的使用,回调函数涉及:学习速率调整、保存最好模型、tensorboard可视化 54 | 4. 如何使用sklearn计算准确率,精确率,召回率,F1_score 55 | 5. 如何对单张图片进行预测,并打印分类概率 56 | 6. 如何指定特定GPU训练,如何指定使用GPU的内存情况 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /load_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from tqdm import tqdm 4 | from skimage import io 5 | from skimage import transform 6 | from paths import root_dir, mkdir_if_not_exist 7 | from sklearn.utils import shuffle 8 | 9 | import matplotlib.pyplot as plt # 画图 10 | 11 | datasets_dir = os.path.join(root_dir, 'datasets') 12 | cached_dir = os.path.join(root_dir, 'cache') 13 | mkdir_if_not_exist(dir_list=[cached_dir]) # paths.py文件处理 14 | 15 | 16 | def process_data(): 17 | images = [] 18 | labels = [] 19 | 20 | for AK_or_SK_dir in tqdm(os.listdir(datasets_dir)): 21 | # AK ==> [1,0] Sk ==> [0,1] 22 | if 'AK' in AK_or_SK_dir: 23 | label = [1, 0] 24 | elif 'SK' in AK_or_SK_dir: 25 | label = [0, 1] 26 | else: 27 | print('AK_or_SK_dir is error!') 28 | for person_name_dir in tqdm(os.listdir(os.path.join(datasets_dir, AK_or_SK_dir))): # 给路径,而不是文件名 29 | for image_name in os.listdir(os.path.join(datasets_dir, AK_or_SK_dir, person_name_dir)): 30 | img_path = os.path.join(datasets_dir, AK_or_SK_dir, person_name_dir, image_name) 31 | image = io.imread(img_path) 32 | image = transform.resize(image, (224, 224), 33 | order=1, mode='constant', 34 | cval=0, clip=True, 35 | preserve_range=True, 36 | anti_aliasing=True) 37 | image = image.astype(np.uint8) 38 | images.append(image) 39 | labels.append(label) 40 | return images, labels 41 | 42 | 43 | def load_datasets(): 44 | images_npy_filename = os.path.join(cached_dir, 'images_data.npy') 45 | labels_npy_filename = os.path.join(cached_dir, 'labels.npy') 46 | 47 | if os.path.exists(images_npy_filename) and os.path.exists(labels_npy_filename): 48 | images = np.load(images_npy_filename) 49 | labels = np.load(labels_npy_filename) 50 | else: 51 | images, labels = process_data() 52 | # 打乱后保存 53 | images, labels = shuffle(images, labels) 54 | np.save(images_npy_filename, images) 55 | np.save(labels_npy_filename, labels) 56 | 57 | return images, labels 58 | 59 | 60 | 61 | if __name__ == '__main__': 62 | 63 | X, y = load_datasets() 64 | plt.imshow(X[7]) #画在画布上 65 | plt.show() #显示 66 | print(X.shape) 67 | print(y.shape) 68 | print(len(X)) 69 | 70 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # 导包 2 | import os 3 | 4 | from load_train_test_data import load_train_valid_data 5 | from paths import root_dir, mkdir_if_not_exist 6 | 7 | from keras_preprocessing.image import ImageDataGenerator 8 | from keras.applications.inception_v3 import InceptionV3 9 | from keras.layers import Dense, GlobalAveragePooling2D, Dropout 10 | from keras.models import Model 11 | 12 | from keras.callbacks import TensorBoard, ReduceLROnPlateau,ModelCheckpoint 13 | 14 | from keras import regularizers 15 | from keras.optimizers import Adam 16 | 17 | # 指定使用的GPU 18 | os.environ["CUDA_VISIBLE_DEVICES"] = "8" 19 | 20 | file_name = os.path.join(root_dir, 'model_data','model.h5') 21 | 22 | # 超参数 23 | num_classes = 2 24 | batch_size = 64 25 | epochs = 30 26 | dropout_rate = 0.25 27 | reg = regularizers.l1(1e-4) 28 | test_split = 0.2 29 | lr = 1e-4 30 | 31 | # 数据增强超参数 32 | horizontal_flip = True 33 | vertical_flip = True 34 | rotation_angle = 180 35 | width_shift_range = 0.1 36 | height_shift_range = 0.1 37 | 38 | 39 | def build_model(): 40 | base_model = InceptionV3(weights='imagenet', include_top=False) 41 | 42 | x = base_model.output 43 | x = GlobalAveragePooling2D(name='GAP')(x) #全局平均池化 44 | x = Dropout(rate=dropout_rate)(x) 45 | 46 | x = Dense(256, activation='elu', name='FC1',kernel_regularizer=reg)(x) 47 | x = Dropout(rate=dropout_rate)(x) 48 | 49 | x = Dense(128, activation='elu',name='FC2', kernel_regularizer=reg)(x) 50 | x = Dropout(rate=dropout_rate)(x) 51 | 52 | outputs = Dense(num_classes, activation='softmax',name='Pre')(x) 53 | 54 | model = Model(inputs=base_model.input, outputs=outputs) 55 | model.compile(optimizer=Adam(lr = lr), loss='categorical_crossentropy', metrics=['acc', ]) 56 | model.summary() # 打印网络结构 57 | return model 58 | 59 | 60 | def train_model(model, X_train, y_train, X_valid, y_valid): 61 | 62 | tensorboard = TensorBoard(log_dir='./log', write_graph=False, 63 | write_grads=True, 64 | write_images=True) 65 | 66 | change_lr = ReduceLROnPlateau(monitor='val_loss', 67 | factor=0.25, 68 | patience=2, 69 | verbose=1, 70 | mode='auto', 71 | min_lr=1e-7) 72 | checkpoint = ModelCheckpoint(filepath=file_name, monitor='val_acc', mode='auto', save_best_only='True') 73 | 74 | callback_lists = [tensorboard, change_lr, checkpoint] 75 | 76 | datagen = ImageDataGenerator(rotation_range=rotation_angle, 77 | horizontal_flip=horizontal_flip, 78 | vertical_flip=vertical_flip, 79 | width_shift_range=width_shift_range, 80 | height_shift_range=height_shift_range, 81 | ) 82 | 83 | model.fit_generator(generator=datagen.flow(X_train, y_train, batch_size=batch_size), 84 | steps_per_epoch=X_train.shape[0] // batch_size * 2, 85 | epochs=epochs, 86 | initial_epoch=0, # 为啥要有这个参数 87 | verbose=1, 88 | validation_data=(X_valid, y_valid), 89 | callbacks=callback_lists 90 | ) 91 | 92 | if __name__ == '__main__': 93 | # 加载数据 94 | X_train, X_valid, y_train, y_valid = load_train_valid_data(test_split) 95 | 96 | # 建立模型 97 | model = build_model() 98 | 99 | # 训练模型 100 | train_model(model, X_train, y_train, X_valid, y_valid) 101 | 102 | 103 | --------------------------------------------------------------------------------