├── README.md ├── main.py └── pre_processing.py /README.md: -------------------------------------------------------------------------------- 1 | # Dog_Classification 2 | 百度西交大宠物狗识别比赛baseline【keras】 3 | 整体是按照杨培文大佬的开源做的:https://ypw.io/ 4 | 没有用Resnet50,因为用了发现效果不好 5 | 训练集进行了数据增强2倍,测试集进行了2倍增强得到三个测试集,分别预测最后取平均结果 6 | 线上成绩0.1867 7 | 机器配置:笔记本,显卡M1050ti,cpuI5 8 | 目的:为了保存代码 9 | 结尾:感谢大队长一路的帮助 10 | 感想:第一次接触深度学习,第一次接触keras,欠缺的东西太多了,还需要修炼 11 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from keras.models import * 2 | from keras.layers import * 3 | from keras.applications import * 4 | from keras.preprocessing.image import * 5 | import h5py 6 | ''' 7 | 训练集和验证集生成特征文件write_gap() 8 | ''' 9 | def write_gap(MODEL, gap_name, image_size, lambda_func=None): 10 | width = image_size[0] 11 | height = image_size[1] 12 | input_tensor = Input((height, width, 3)) 13 | x = input_tensor 14 | if lambda_func: 15 | x = Lambda(lambda_func)(x) 16 | base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False) 17 | model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output)) 18 | gen = ImageDataGenerator() 19 | train_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/train_class", image_size, shuffle=False, batch_size=32) 20 | val_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/val_class", image_size, shuffle=False, batch_size=32) 21 | train = model.predict_generator(train_generator, train_generator.samples//32+1,verbose=1) 22 | valid = model.predict_generator(val_generator, val_generator.samples//32+1, verbose=1) 23 | with h5py.File(gap_name) as h: 24 | h.create_dataset("train", data=train) 25 | h.create_dataset("valid", data=valid) 26 | h.create_dataset("y_train", data=train_generator.classes) 27 | h.create_dataset("y_valid", data=val_generator.classes) 28 | return train_generator.class_indices 29 | class_dict = write_gap(InceptionV3, "gap_t_v_InceptionV3.h5",(299, 299), inception_v3.preprocess_input) 30 | class_dict = write_gap(Xception, "gap_t_v_Xception.h5", (299, 299),xception.preprocess_input) 31 | ''' 32 | 测试集生成特征文件test_write_gap() 33 | ''' 34 | def test_write_gap(MODEL, gap_name, path, image_size, lambda_func=None): 35 | width = image_size[0] 36 | height = image_size[1] 37 | input_tensor = Input((height, width, 3)) 38 | x = input_tensor 39 | if lambda_func: 40 | x = Lambda(lambda_func)(x) 41 | base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False) 42 | model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output)) 43 | gen = ImageDataGenerator() 44 | test_generator = gen.flow_from_directory(path, image_size, shuffle=False, batch_size=32, class_mode=None) 45 | test = model.predict_generator(test_generator, test_generator.samples//32+1,verbose=1) 46 | with h5py.File(gap_name) as h: 47 | h.create_dataset("test", data=test) 48 | test_write_gap(InceptionV3, "gap_test_InceptionV3.h5","E:/PyData/BaiDuDog/test",(299, 299), inception_v3.preprocess_input) 49 | test_write_gap(Xception, "gap_test_Xception.h5","E:/PyData/BaiDuDog/test",(299, 299), xception.preprocess_input) 50 | test_write_gap(InceptionV3, "gap_test1_InceptionV3.h5","E:/PyData/BaiDuDog/test1",(299, 299), inception_v3.preprocess_input) 51 | test_write_gap(Xception, "gap_test1_Xception.h5","E:/PyData/BaiDuDog/test1",(299, 299), xception.preprocess_input) 52 | test_write_gap(InceptionV3, "gap_test2_InceptionV3.h5","E:/PyData/BaiDuDog/test2",(299, 299), inception_v3.preprocess_input) 53 | test_write_gap(Xception, "gap_test2_Xception.h5","E:/PyData/BaiDuDog/test2",(299, 299), xception.preprocess_input) 54 | ''' 55 | 载入特征向量load_feature() 56 | ''' 57 | import h5py 58 | import numpy as np 59 | import keras 60 | from sklearn.utils import shuffle 61 | np.random.seed(2017) 62 | 63 | X_train = [] 64 | X_valid = [] 65 | for filename in ["gap_t_v_InceptionV3.h5", "gap_t_v_Xception.h5"]: 66 | with h5py.File(filename, 'r') as h: 67 | X_train.append(np.array(h['train'])) 68 | y_train = np.array(h['y_train']) 69 | X_valid.append(np.array(h['valid'])) 70 | y_valid = np.array(h['y_valid']) 71 | X_train = np.concatenate(X_train, axis=1) 72 | y_train = keras.utils.to_categorical(y_train, 100) 73 | X_train, y_train = shuffle(X_train, y_train) 74 | X_valid = np.concatenate(X_valid, axis=1) 75 | y_valid = keras.utils.to_categorical(y_valid, 100) 76 | X_valid, y_valid = shuffle(X_valid, y_valid) 77 | X_test = [] 78 | for filename in ["gap_test_InceptionV3.h5", "gap_test_Xception.h5"]: 79 | with h5py.File(filename, 'r') as h: 80 | X_test.append(np.array(h['test'])) 81 | X_test = np.concatenate(X_test, axis=1) 82 | X_test1 = [] 83 | for filename in ["gap_test1_InceptionV3.h5", "gap_test1_Xception.h5"]: 84 | with h5py.File(filename, 'r') as h: 85 | X_test1.append(np.array(h['test'])) 86 | X_test1 = np.concatenate(X_test1, axis=1) 87 | X_test2 = [] 88 | for filename in ["gap_test2_InceptionV3.h5", "gap_test2_Xception.h5"]: 89 | with h5py.File(filename, 'r') as h: 90 | X_test2.append(np.array(h['test'])) 91 | X_test2 = np.concatenate(X_test2, axis=1) 92 | print(X_train.shape,y_train.shape,X_valid.shape,y_valid.shape,X_test.shape,X_test1.shape,X_test2.shape) 93 | ''' 94 | 构建模型 95 | ''' 96 | from keras.models import * 97 | from keras.layers import * 98 | np.random.seed(2017) 99 | 100 | input_tensor = Input(X_train.shape[1:]) 101 | x = Dropout(0.5)(input_tensor) 102 | x = Dense(100, activation='softmax')(x) 103 | model = Model(input_tensor, x) 104 | model.compile(optimizer='Adadelta',loss='categorical_crossentropy',metrics=['accuracy']) 105 | ''' 106 | 训练 107 | ''' 108 | model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_valid,y_valid)) 109 | ''' 110 | 预测 111 | ''' 112 | y_pred0 = model.predict(X_test, verbose=1) 113 | ''' 114 | 预测 115 | ''' 116 | y_pred1 = model.predict(X_test1, verbose=1) 117 | ''' 118 | 预测 119 | ''' 120 | y_pred2 = model.predict(X_test2, verbose=1) 121 | y_pred = (y_pred0+y_pred1+y_pred2)/3 122 | ''' 123 | 取每行概率最大索引 124 | ''' 125 | label_index = [] 126 | for i in y_pred: 127 | label_index.append(list(i).index(max(list(i)))) 128 | ''' 129 | 取图片id 130 | ''' 131 | test_ID = [] 132 | gen = ImageDataGenerator() 133 | test_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/test", (224, 224), shuffle=False, batch_size=16, class_mode=None) 134 | for i,fname in enumerate(test_generator.filenames): 135 | test_ID.append(fname[fname.rfind('\\')+1:fname.rfind('.')]) 136 | ''' 137 | 写入txt 138 | ''' 139 | file = open('E:/PyData/BaiDuDog/sample_sub.txt','w') 140 | for i in range(10593): 141 | file.writelines(list(class_dict.keys())[list(class_dict.values()).index(label_index[i])]) 142 | file.writelines('\t') 143 | file.writelines(test_ID[i]) 144 | file.writelines('\n') 145 | file.close() 146 | -------------------------------------------------------------------------------- /pre_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | ''' 5 | 删除训练txt中重复的delete_train_txt(),先print,手动删除。。。 6 | ''' 7 | def delete_train_txt(): 8 | file = open('E:/PyData/BaiDuDog/train.txt') 9 | lines = file.readlines() 10 | pic_id = [] 11 | for line in lines: 12 | pic_id.append(line.split(' ')[0]) 13 | print(len(pic_id)) 14 | pic_id_1 = list(set(pic_id)) 15 | print(len(pic_id_1)) 16 | for i in pic_id_1: 17 | pic_id.remove(i) 18 | print(pic_id) 19 | file.close() 20 | ''' 21 | 训练数据分类train_devide_class() 22 | ''' 23 | def train_devide_class(): 24 | train_path = 'E:/PyData/BaiDuDog/train' 25 | save_path = 'E:/PyData/BaiDuDog/train_class' 26 | file = open('E:/PyData/BaiDuDog/train.txt') 27 | lines = file.readlines() 28 | for line in lines: 29 | pic_id = line.split(' ')[0] + '.jpg' 30 | label = line.split(' ')[1].split(' ')[0] 31 | if (os.path.exists(save_path + '/' + label) == False): 32 | os.makedirs(save_path + '/' + label) 33 | shutil.copy(train_path + '/' + pic_id, save_path + '/' + label + '/') 34 | ''' 35 | 验证数据分类valid_devide_class() 36 | ''' 37 | def valid_devide_class(): 38 | train_path = 'E:/PyData/BaiDuDog/valid' 39 | save_path = 'E:/PyData/BaiDuDog/valid_class' 40 | file = open('E:/PyData/BaiDuDog/val.txt') 41 | lines = file.readlines() 42 | for line in lines: 43 | pic_id = line.split(' ')[0] + '.jpg' 44 | label = line.split(' ')[1].split(' ')[0] 45 | if (os.path.exists(save_path + '/' + label) == False): 46 | os.makedirs(save_path + '/' + label) 47 | shutil.copy(train_path + '/' + pic_id, save_path + '/' + label + '/') 48 | ''' 49 | 构建验证集set_val(),从分类好的验证集里每个类别拿出18张构建本地验证集,然后把剩下的验证集复制到训练集中去,都进行训练 50 | ''' 51 | def set_val(): 52 | valid_path = 'E:/PyData/BaiDuDog/valid_class' 53 | save_path = 'E:/PyData/BaiDuDog/val_class' 54 | for classname in os.listdir(valid_path): 55 | tmp_path = valid_path + '/' + classname 56 | i = 0 57 | for pic in os.listdir(tmp_path): 58 | if i < 18 : 59 | if (os.path.exists(save_path+'/'+classname) == False): 60 | os.makedirs(save_path+'/'+classname) 61 | shutil.move(tmp_path+'/'+pic,save_path+'/'+classname+'/'+pic) 62 | i += 1 63 | ''' 64 | 训练集数据扩增DataAugmentation() 65 | ''' 66 | from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 67 | def DataAugmentation(path): 68 | datagen = ImageDataGenerator( 69 | rotation_range=40, 70 | shear_range=0.2, 71 | zoom_range=0.2, 72 | horizontal_flip=True, 73 | fill_mode='nearest') 74 | j = 0 75 | for class_name in os.listdir(path): 76 | if j%20 == 0: 77 | print('leave:',100-j) 78 | j += 1 79 | tmp_path = path + '/' + class_name 80 | for pic in os.listdir(tmp_path): 81 | img = load_img(tmp_path + '/' + pic) 82 | x = img_to_array(img) 83 | x = x.reshape((1,) + x.shape) 84 | i = 0 85 | for batch in datagen.flow(x, batch_size=1,save_to_dir=tmp_path, save_prefix=pic.split('.')[0], save_format='jpg'): 86 | i += 1 87 | if i > 1: 88 | break 89 | ''' 90 | 测试集数据扩增testAugmentation(),测试集要增强2次,分开独立保存。每次要修改save_to_dir='E:/PyData/BaiDuDog/test_kz_2'分别为test_kz_1和test_kz_2 91 | ''' 92 | from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 93 | 94 | def testAugmentation(path): 95 | datagen = ImageDataGenerator( 96 | rotation_range=30, 97 | shear_range=0.2, 98 | zoom_range=0.2, 99 | horizontal_flip=True, 100 | fill_mode='nearest') 101 | for pic in os.listdir(path): 102 | img = load_img(path + '/' + pic) 103 | x = img_to_array(img) 104 | x = x.reshape((1,) + x.shape) 105 | i = 0 106 | for batch in datagen.flow(x, batch_size=1,save_to_dir='E:/PyData/BaiDuDog/test_kz_2', save_prefix=pic.split('.')[0], save_format='jpg'): 107 | i += 1 108 | if i > 0: 109 | break 110 | --------------------------------------------------------------------------------