├── images
    ├── AK
    │   ├── AK1.jpg
    │   └── AK2.jpg
    └── SK
    │   ├── SK1.jpg
    │   └── SK2.jpg
├── __pycache__
    ├── paths.cpython-35.pyc
    └── load_datasets.cpython-35.pyc
├── log
    └── events.out.tfevents.1557201695.room
├── paths.py
├── load_train_test_data.py
├── eval.py
├── predict.py
├── README.md
├── load_datasets.py
└── train.py


/images/AK/AK1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/AK/AK1.jpg


--------------------------------------------------------------------------------
/images/AK/AK2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/AK/AK2.jpg


--------------------------------------------------------------------------------
/images/SK/SK1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/SK/SK1.jpg


--------------------------------------------------------------------------------
/images/SK/SK2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/images/SK/SK2.jpg


--------------------------------------------------------------------------------
/__pycache__/paths.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/__pycache__/paths.cpython-35.pyc


--------------------------------------------------------------------------------
/log/events.out.tfevents.1557201695.room:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/log/events.out.tfevents.1557201695.room


--------------------------------------------------------------------------------
/__pycache__/load_datasets.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jinghaiting/binary_classification_keras/HEAD/__pycache__/load_datasets.cpython-35.pyc


--------------------------------------------------------------------------------
/paths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import inspect
 3 | 
 4 | def mkdir_if_not_exist(dir_list):
 5 |     for directory in dir_list:
 6 |         if not os.path.exists(directory):
 7 |             os.makedirs(directory)
 8 | 
 9 | curr_filename = inspect.getfile(inspect.currentframe())
10 | root_dir = os.path.dirname(os.path.abspath(curr_filename))
11 | 
12 | 


--------------------------------------------------------------------------------
/load_train_test_data.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split
 2 | 
 3 | from load_datasets import load_datasets
 4 | import numpy as np
 5 | 
 6 | X, y = load_datasets()
 7 | 
 8 | def load_test_data():
 9 |     X_test = X[650:]
10 |     y_test = y[650:]
11 |     return X_test, y_test
12 | 
13 | def load_train_valid_data(test_split):
14 |     X_tmp = X[:650]
15 |     y_tmp = y[:650]
16 |     X_train, X_valid, y_train, y_valid = train_test_split(X_tmp, y_tmp, test_size=test_split, random_state=1)
17 | 
18 |     return  X_train, X_valid, y_train,  y_valid
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from load_train_test_data import load_test_data
 3 | from paths import root_dir
 4 | from keras.models import load_model
 5 | import numpy as np
 6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 7 | 
 8 | # 指定使用的GPU
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "8"
10 | 
11 | model_path = os.path.join(root_dir, 'model_data', 'model.h5')
12 | 
13 | if __name__ == '__main__':
14 |     # 加载测试数据
15 |     X_test, y_test = load_test_data()
16 | 
17 |     # 导入模型
18 |     model = load_model(model_path)
19 | 
20 |     # 预测
21 |     y_pred = model.predict(X_test)
22 | 
23 |     # one-hot ==> 标签
24 |     y_test = np.argmax(y_test, axis=1)
25 |     y_pred = np.argmax(y_pred, axis=1)
26 | 
27 |     # 计算准确率、精确率、召回率、F1
28 |     accuracy = accuracy_score(y_test, y_pred)
29 |     precision = precision_score(y_test, y_pred)
30 |     recall = recall_score(y_test, y_pred)
31 |     f1 = f1_score(y_test, y_pred)
32 | 
33 |     print("accuracy_score = %.2f" % accuracy)
34 |     print("precision_score = %.2f" % precision)
35 |     print("recall_score = %.2f" % recall)
36 |     print("f1_score = %.2f" % f1)
37 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from skimage import io
 3 | from paths import root_dir
 4 | import numpy as np
 5 | from keras.preprocessing import image
 6 | from keras.models import load_model
 7 | 
 8 | # 指定使用的GPU
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "8"
10 | 
11 | images_dir = os.path.join(root_dir, 'images')
12 | model_path = os.path.join(root_dir, 'model_data', 'model.h5')
13 | class_name = {0: 'AK', 1: 'SK'}
14 | 
15 | if __name__ == '__main__':
16 |     # 导入模型
17 |     model = load_model(model_path)
18 | 
19 |     for AK_or_SK in os.listdir(images_dir):
20 |         for picture_name in os.listdir(os.path.join(images_dir, AK_or_SK)):
21 |             # 读取图片
22 |             img_path = os.path.join(images_dir, AK_or_SK, picture_name)
23 |             img = image.load_img(img_path, target_size=(224, 224))  # 通道3默认
24 |             img = image.img_to_array(img)  # 变为numpy数组
25 |             img = np.expand_dims(img, axis=0)  # 扩充维度
26 | 
27 |             # 预测
28 |             preds = model.predict(img)
29 | 
30 |             # 打印图片类别
31 |             # print(preds)
32 |             y_pred = np.argmax(preds, axis=1)
33 | 
34 |             label = class_name[y_pred[0]]   #y_pred[0]解释：打印出来类似于[0]  [1] ,所以取列表的第一个元素，即索引[0]
35 | 
36 |             print(picture_name, '的预测概率是：')
37 |             print(preds, ' --> ', label)
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### 问题描述
 2 | 
 3 | 要解决的是一个医学图像的二分类问题，有`AK`和`SK`两种病症，根据一定量数据，进行训练，对图像进行预测。
 4 | 
 5 | **给定图片数据的格式：**
 6 | 
 7 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g3hceviwbvj20ab0eqgm5.jpg)
 8 | 
 9 | 
10 | 
11 | ### 解决思路
12 | 
13 | 整体上采用迁移学习来训练神经网络，使用InceptionV3结构，框架采用keras.
14 | 
15 | **具体思路：**
16 | 
17 | 1. 读取图片数据，保存成`.npy`格式，方便后续加载
18 | 2. 标签采用one-hot形式，由于标签隐藏在文件夹命名中，所以需要自行添加标签，并保存到`.npy`文件中，方便后续加载
19 | 3. 将数据分为训练集、验证集、测试集
20 | 4. 使用keras建立InceptionV3基本模型，不包括顶层，使用预训练权重，在基本模型的基础上自定义几层神经网络，得到最后的模型，对模型进行训练
21 | 5. 优化模型，调整超参数，提高准确率
22 | 6. 在测试集上对模型进行评估，使用精确率、召回率
23 | 7. 对单张图片进行预测，并输出每种类别的概率
24 | 
25 | 
26 | 
27 | ### 代码结构
28 | 
29 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2sv3ux8klj20uq0fgmyf.jpg)
30 | 
31 | 
32 | 
33 | ### 运行结果
34 | 
35 | **1. 训练结果**
36 | 
37 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svdxnpamj217v0fbjso.jpg)
38 | 
39 | **2. 评估结果**
40 | 
41 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svg4hlb4j20lq07i748.jpg)
42 | 
43 | **3. 预测结果**
44 | 
45 | ![](http://ww1.sinaimg.cn/large/e52819eagy1g2svk9htyij20di07eaa8.jpg)
46 | 
47 | 
48 | 
49 | ### 知识点总结
50 | 
51 | 1. 如何加载实际数据，如何保存成npy文件，如何打乱数据，如何划分数据，如何进行交叉验证
52 | 2. 如何使用keras进行迁移学习
53 | 3. keras中数据增强、回调函数的使用，回调函数涉及：学习速率调整、保存最好模型、tensorboard可视化
54 | 4. 如何使用sklearn计算准确率，精确率，召回率，F1_score
55 | 5. 如何对单张图片进行预测，并打印分类概率
56 | 6. 如何指定特定GPU训练，如何指定使用GPU的内存情况
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/load_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | from skimage import io
 5 | from skimage import transform
 6 | from paths import root_dir, mkdir_if_not_exist
 7 | from sklearn.utils import shuffle
 8 | 
 9 | import matplotlib.pyplot as plt  # 画图
10 | 
11 | datasets_dir = os.path.join(root_dir, 'datasets')
12 | cached_dir = os.path.join(root_dir, 'cache')
13 | mkdir_if_not_exist(dir_list=[cached_dir])  # paths.py文件处理
14 | 
15 | 
16 | def process_data():
17 |     images = []
18 |     labels = []
19 | 
20 |     for AK_or_SK_dir in tqdm(os.listdir(datasets_dir)):
21 |         # AK ==> [1,0]  Sk ==> [0,1]
22 |         if 'AK' in AK_or_SK_dir:
23 |             label = [1, 0]
24 |         elif 'SK' in AK_or_SK_dir:
25 |             label = [0, 1]
26 |         else:
27 |             print('AK_or_SK_dir is error!')
28 |         for person_name_dir in tqdm(os.listdir(os.path.join(datasets_dir, AK_or_SK_dir))):  # 给路径，而不是文件名
29 |             for image_name in os.listdir(os.path.join(datasets_dir, AK_or_SK_dir, person_name_dir)):
30 |                 img_path = os.path.join(datasets_dir, AK_or_SK_dir, person_name_dir, image_name)
31 |                 image = io.imread(img_path)
32 |                 image = transform.resize(image, (224, 224),
33 |                                          order=1, mode='constant',
34 |                                          cval=0, clip=True,
35 |                                          preserve_range=True,
36 |                                          anti_aliasing=True)
37 |                 image = image.astype(np.uint8)
38 |                 images.append(image)
39 |                 labels.append(label)
40 |     return images, labels
41 | 
42 | 
43 | def load_datasets():
44 |     images_npy_filename = os.path.join(cached_dir, 'images_data.npy')
45 |     labels_npy_filename = os.path.join(cached_dir, 'labels.npy')
46 | 
47 |     if os.path.exists(images_npy_filename) and os.path.exists(labels_npy_filename):
48 |         images = np.load(images_npy_filename)
49 |         labels = np.load(labels_npy_filename)
50 |     else:
51 |         images, labels = process_data()
52 |         # 打乱后保存
53 |         images, labels = shuffle(images, labels)
54 |         np.save(images_npy_filename, images)
55 |         np.save(labels_npy_filename, labels)
56 | 
57 |     return images, labels
58 | 
59 | 
60 | 
61 | if __name__ == '__main__':
62 | 
63 |     X, y = load_datasets()
64 |     plt.imshow(X[7])  #画在画布上
65 |     plt.show()        #显示
66 |     print(X.shape)
67 |     print(y.shape)
68 |     print(len(X))
69 | 
70 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # 导包
  2 | import os
  3 | 
  4 | from load_train_test_data import load_train_valid_data
  5 | from paths import root_dir, mkdir_if_not_exist
  6 | 
  7 | from keras_preprocessing.image import ImageDataGenerator
  8 | from keras.applications.inception_v3 import InceptionV3
  9 | from keras.layers import Dense, GlobalAveragePooling2D, Dropout
 10 | from keras.models import Model
 11 | 
 12 | from keras.callbacks import TensorBoard, ReduceLROnPlateau,ModelCheckpoint
 13 | 
 14 | from keras import regularizers
 15 | from keras.optimizers import Adam
 16 | 
 17 | # 指定使用的GPU
 18 | os.environ["CUDA_VISIBLE_DEVICES"] = "8"
 19 | 
 20 | file_name = os.path.join(root_dir, 'model_data','model.h5')
 21 | 
 22 | # 超参数
 23 | num_classes = 2
 24 | batch_size = 64
 25 | epochs = 30
 26 | dropout_rate = 0.25
 27 | reg = regularizers.l1(1e-4)
 28 | test_split = 0.2
 29 | lr = 1e-4
 30 | 
 31 | # 数据增强超参数
 32 | horizontal_flip = True
 33 | vertical_flip = True
 34 | rotation_angle = 180
 35 | width_shift_range = 0.1
 36 | height_shift_range = 0.1
 37 | 
 38 | 
 39 | def build_model():
 40 |     base_model = InceptionV3(weights='imagenet', include_top=False)
 41 | 
 42 |     x = base_model.output
 43 |     x = GlobalAveragePooling2D(name='GAP')(x)    #全局平均池化
 44 |     x = Dropout(rate=dropout_rate)(x)
 45 | 
 46 |     x = Dense(256, activation='elu', name='FC1',kernel_regularizer=reg)(x)
 47 |     x = Dropout(rate=dropout_rate)(x)
 48 | 
 49 |     x = Dense(128, activation='elu',name='FC2', kernel_regularizer=reg)(x)
 50 |     x = Dropout(rate=dropout_rate)(x)
 51 | 
 52 |     outputs = Dense(num_classes, activation='softmax',name='Pre')(x)
 53 | 
 54 |     model = Model(inputs=base_model.input, outputs=outputs)
 55 |     model.compile(optimizer=Adam(lr = lr), loss='categorical_crossentropy', metrics=['acc', ])
 56 |     model.summary()  # 打印网络结构
 57 |     return model
 58 | 
 59 | 
 60 | def train_model(model, X_train, y_train, X_valid, y_valid):
 61 | 
 62 |     tensorboard = TensorBoard(log_dir='./log', write_graph=False,
 63 |                  write_grads=True,
 64 |                  write_images=True)
 65 | 
 66 |     change_lr = ReduceLROnPlateau(monitor='val_loss',
 67 |                       factor=0.25,
 68 |                       patience=2,
 69 |                       verbose=1,
 70 |                       mode='auto',
 71 |                       min_lr=1e-7)
 72 |     checkpoint = ModelCheckpoint(filepath=file_name, monitor='val_acc', mode='auto', save_best_only='True')
 73 | 
 74 |     callback_lists = [tensorboard, change_lr, checkpoint]
 75 | 
 76 |     datagen = ImageDataGenerator(rotation_range=rotation_angle,
 77 |                                  horizontal_flip=horizontal_flip,
 78 |                                  vertical_flip=vertical_flip,
 79 |                                  width_shift_range=width_shift_range,
 80 |                                  height_shift_range=height_shift_range,
 81 |                                  )
 82 | 
 83 |     model.fit_generator(generator=datagen.flow(X_train, y_train, batch_size=batch_size),
 84 |                         steps_per_epoch=X_train.shape[0] // batch_size * 2,
 85 |                         epochs=epochs,
 86 |                         initial_epoch=0,  # 为啥要有这个参数
 87 |                         verbose=1,
 88 |                         validation_data=(X_valid, y_valid),
 89 |                         callbacks=callback_lists
 90 |                         )
 91 | 
 92 | if __name__ == '__main__':
 93 |     # 加载数据
 94 |     X_train, X_valid, y_train, y_valid = load_train_valid_data(test_split)
 95 | 
 96 |     # 建立模型
 97 |     model = build_model()
 98 | 
 99 |     # 训练模型
100 |     train_model(model, X_train, y_train, X_valid, y_valid)
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------