├── README.md
├── main.py
└── pre_processing.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Dog_Classification
 2 | 百度西交大宠物狗识别比赛baseline【keras】  
 3 | 整体是按照杨培文大佬的开源做的：https://ypw.io/  
 4 | 没有用Resnet50，因为用了发现效果不好  
 5 | 训练集进行了数据增强2倍，测试集进行了2倍增强得到三个测试集，分别预测最后取平均结果  
 6 | 线上成绩0.1867  
 7 | 机器配置：笔记本，显卡M1050ti，cpuI5  
 8 | 目的：为了保存代码  
 9 | 结尾：感谢大队长一路的帮助   
10 | 感想：第一次接触深度学习，第一次接触keras，欠缺的东西太多了，还需要修炼
11 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from keras.models import *
  2 | from keras.layers import *
  3 | from keras.applications import *
  4 | from keras.preprocessing.image import *
  5 | import h5py
  6 | '''
  7 | 训练集和验证集生成特征文件write_gap()
  8 | '''
  9 | def write_gap(MODEL, gap_name, image_size, lambda_func=None):
 10 | 	width = image_size[0]
 11 | 	height = image_size[1]
 12 | 	input_tensor = Input((height, width, 3))
 13 | 	x = input_tensor
 14 | 	if lambda_func:
 15 | 		x = Lambda(lambda_func)(x)
 16 | 	base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
 17 | 	model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
 18 | 	gen = ImageDataGenerator()
 19 | 	train_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/train_class", image_size, shuffle=False, batch_size=32)
 20 | 	val_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/val_class", image_size, shuffle=False, batch_size=32)
 21 | 	train = model.predict_generator(train_generator, train_generator.samples//32+1,verbose=1)
 22 | 	valid = model.predict_generator(val_generator, val_generator.samples//32+1, verbose=1)
 23 | 	with h5py.File(gap_name) as h:
 24 | 		h.create_dataset("train", data=train)
 25 | 		h.create_dataset("valid", data=valid)
 26 | 		h.create_dataset("y_train", data=train_generator.classes)
 27 | 		h.create_dataset("y_valid", data=val_generator.classes)
 28 | 	return train_generator.class_indices
 29 |   class_dict = write_gap(InceptionV3, "gap_t_v_InceptionV3.h5",(299, 299), inception_v3.preprocess_input)
 30 |   class_dict = write_gap(Xception, "gap_t_v_Xception.h5", (299, 299),xception.preprocess_input)
 31 |   '''
 32 | 测试集生成特征文件test_write_gap()
 33 | '''
 34 | def test_write_gap(MODEL, gap_name, path, image_size, lambda_func=None):
 35 | 	width = image_size[0]
 36 | 	height = image_size[1]
 37 | 	input_tensor = Input((height, width, 3))
 38 | 	x = input_tensor
 39 | 	if lambda_func:
 40 | 		x = Lambda(lambda_func)(x)
 41 | 	base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
 42 | 	model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
 43 | 	gen = ImageDataGenerator()
 44 | 	test_generator = gen.flow_from_directory(path, image_size, shuffle=False, batch_size=32, class_mode=None)
 45 | 	test = model.predict_generator(test_generator, test_generator.samples//32+1,verbose=1)
 46 | 	with h5py.File(gap_name) as h:
 47 | 		h.create_dataset("test", data=test)
 48 |  test_write_gap(InceptionV3, "gap_test_InceptionV3.h5","E:/PyData/BaiDuDog/test",(299, 299), inception_v3.preprocess_input)
 49 |  test_write_gap(Xception, "gap_test_Xception.h5","E:/PyData/BaiDuDog/test",(299, 299), xception.preprocess_input)
 50 |  test_write_gap(InceptionV3, "gap_test1_InceptionV3.h5","E:/PyData/BaiDuDog/test1",(299, 299), inception_v3.preprocess_input)
 51 |  test_write_gap(Xception, "gap_test1_Xception.h5","E:/PyData/BaiDuDog/test1",(299, 299), xception.preprocess_input)
 52 |  test_write_gap(InceptionV3, "gap_test2_InceptionV3.h5","E:/PyData/BaiDuDog/test2",(299, 299), inception_v3.preprocess_input)
 53 |  test_write_gap(Xception, "gap_test2_Xception.h5","E:/PyData/BaiDuDog/test2",(299, 299), xception.preprocess_input)
 54 |  '''
 55 | 载入特征向量load_feature()
 56 | '''
 57 | import h5py
 58 | import numpy as np
 59 | import keras
 60 | from sklearn.utils import shuffle
 61 | np.random.seed(2017)
 62 | 
 63 | X_train = []
 64 | X_valid = []
 65 | for filename in ["gap_t_v_InceptionV3.h5", "gap_t_v_Xception.h5"]:
 66 | 	with h5py.File(filename, 'r') as h:
 67 | 		X_train.append(np.array(h['train']))
 68 | 		y_train = np.array(h['y_train'])
 69 | 		X_valid.append(np.array(h['valid']))
 70 | 		y_valid = np.array(h['y_valid'])
 71 | X_train = np.concatenate(X_train, axis=1)
 72 | y_train = keras.utils.to_categorical(y_train, 100)
 73 | X_train, y_train = shuffle(X_train, y_train)
 74 | X_valid = np.concatenate(X_valid, axis=1)
 75 | y_valid = keras.utils.to_categorical(y_valid, 100)
 76 | X_valid, y_valid = shuffle(X_valid, y_valid)
 77 | X_test = []
 78 | for filename in ["gap_test_InceptionV3.h5", "gap_test_Xception.h5"]:
 79 | 	with h5py.File(filename, 'r') as h:
 80 | 		X_test.append(np.array(h['test']))
 81 | X_test = np.concatenate(X_test, axis=1)
 82 | X_test1 = []
 83 | for filename in ["gap_test1_InceptionV3.h5", "gap_test1_Xception.h5"]:
 84 | 	with h5py.File(filename, 'r') as h:
 85 | 		X_test1.append(np.array(h['test']))
 86 | X_test1 = np.concatenate(X_test1, axis=1)
 87 | X_test2 = []
 88 | for filename in ["gap_test2_InceptionV3.h5", "gap_test2_Xception.h5"]:
 89 | 	with h5py.File(filename, 'r') as h:
 90 | 		X_test2.append(np.array(h['test']))
 91 | X_test2 = np.concatenate(X_test2, axis=1)
 92 | print(X_train.shape,y_train.shape,X_valid.shape,y_valid.shape,X_test.shape,X_test1.shape,X_test2.shape)
 93 | '''
 94 | 构建模型
 95 | '''
 96 | from keras.models import *
 97 | from keras.layers import *
 98 | np.random.seed(2017)
 99 | 
100 | input_tensor = Input(X_train.shape[1:])
101 | x = Dropout(0.5)(input_tensor)
102 | x = Dense(100, activation='softmax')(x)
103 | model = Model(input_tensor, x)
104 | model.compile(optimizer='Adadelta',loss='categorical_crossentropy',metrics=['accuracy'])
105 | '''
106 | 训练
107 | '''
108 | model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_valid,y_valid))
109 | '''
110 | 预测
111 | '''
112 | y_pred0 = model.predict(X_test, verbose=1)
113 | '''
114 | 预测
115 | '''
116 | y_pred1 = model.predict(X_test1, verbose=1)
117 | '''
118 | 预测
119 | '''
120 | y_pred2 = model.predict(X_test2, verbose=1)
121 | y_pred = (y_pred0+y_pred1+y_pred2)/3
122 | '''
123 | 取每行概率最大索引
124 | '''
125 | label_index = []
126 | for i in y_pred:
127 | 	label_index.append(list(i).index(max(list(i))))
128 | '''
129 | 取图片id
130 | '''
131 | test_ID = []
132 | gen = ImageDataGenerator()
133 | test_generator = gen.flow_from_directory("E:/PyData/BaiDuDog/test", (224, 224), shuffle=False, batch_size=16, class_mode=None)
134 | for i,fname in enumerate(test_generator.filenames):
135 | 	test_ID.append(fname[fname.rfind('\\')+1:fname.rfind('.')])
136 | '''
137 | 写入txt
138 | '''
139 | file = open('E:/PyData/BaiDuDog/sample_sub.txt','w')
140 | for i in range(10593):
141 | 	file.writelines(list(class_dict.keys())[list(class_dict.values()).index(label_index[i])])
142 | 	file.writelines('\t')
143 | 	file.writelines(test_ID[i])
144 | 	file.writelines('\n')
145 | file.close()
146 | 


--------------------------------------------------------------------------------
/pre_processing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | 
  4 | '''
  5 | 删除训练txt中重复的delete_train_txt()，先print，手动删除。。。
  6 | '''
  7 | def delete_train_txt():
  8 | 	file = open('E:/PyData/BaiDuDog/train.txt')
  9 | 	lines = file.readlines()
 10 | 	pic_id = []
 11 | 	for line in lines:
 12 | 		pic_id.append(line.split(' ')[0])
 13 | 	print(len(pic_id))
 14 | 	pic_id_1 = list(set(pic_id))
 15 | 	print(len(pic_id_1))
 16 | 	for i in pic_id_1:
 17 | 		pic_id.remove(i)
 18 | 	print(pic_id)
 19 | 	file.close()
 20 | '''
 21 | 训练数据分类train_devide_class()
 22 | '''
 23 | def train_devide_class():
 24 | 	train_path = 'E:/PyData/BaiDuDog/train'
 25 | 	save_path = 'E:/PyData/BaiDuDog/train_class'
 26 | 	file = open('E:/PyData/BaiDuDog/train.txt')
 27 | 	lines = file.readlines()
 28 | 	for line in lines:
 29 | 		pic_id = line.split(' ')[0] + '.jpg'
 30 | 		label = line.split(' ')[1].split(' ')[0]
 31 | 		if (os.path.exists(save_path + '/' + label) == False):
 32 | 			os.makedirs(save_path + '/' + label)
 33 | 		shutil.copy(train_path + '/' + pic_id,  save_path + '/' + label + '/')
 34 | '''
 35 | 验证数据分类valid_devide_class()
 36 | '''
 37 | def valid_devide_class():
 38 | 	train_path = 'E:/PyData/BaiDuDog/valid'
 39 | 	save_path = 'E:/PyData/BaiDuDog/valid_class'
 40 | 	file = open('E:/PyData/BaiDuDog/val.txt')
 41 | 	lines = file.readlines()
 42 | 	for line in lines:
 43 | 		pic_id = line.split(' ')[0] + '.jpg'
 44 | 		label = line.split(' ')[1].split(' ')[0]
 45 | 		if (os.path.exists(save_path + '/' + label) == False):
 46 | 			os.makedirs(save_path + '/' + label)
 47 | 		shutil.copy(train_path + '/' + pic_id,  save_path + '/' + label + '/')
 48 | '''
 49 | 构建验证集set_val()，从分类好的验证集里每个类别拿出18张构建本地验证集，然后把剩下的验证集复制到训练集中去，都进行训练
 50 | '''
 51 | def set_val():
 52 | 	valid_path = 'E:/PyData/BaiDuDog/valid_class'
 53 | 	save_path = 'E:/PyData/BaiDuDog/val_class'
 54 | 	for classname in os.listdir(valid_path):
 55 | 		tmp_path = valid_path + '/' + classname
 56 | 		i = 0
 57 | 		for pic in os.listdir(tmp_path):
 58 | 			if i < 18 :
 59 | 				if (os.path.exists(save_path+'/'+classname) == False):
 60 | 					os.makedirs(save_path+'/'+classname)
 61 | 				shutil.move(tmp_path+'/'+pic,save_path+'/'+classname+'/'+pic)
 62 | 				i += 1
 63 | '''
 64 | 训练集数据扩增DataAugmentation()
 65 | '''
 66 | from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
 67 | def DataAugmentation(path):
 68 | 	datagen = ImageDataGenerator(
 69 |         rotation_range=40,
 70 |         shear_range=0.2,
 71 |         zoom_range=0.2,
 72 |         horizontal_flip=True,
 73 |         fill_mode='nearest')
 74 | 	j = 0
 75 | 	for class_name in os.listdir(path):
 76 | 		if j%20 == 0:
 77 | 			print('leave:',100-j)
 78 | 		j += 1
 79 | 		tmp_path = path + '/' + class_name
 80 | 		for pic in os.listdir(tmp_path):
 81 | 			img = load_img(tmp_path + '/' + pic)
 82 | 			x = img_to_array(img)
 83 | 			x = x.reshape((1,) + x.shape)
 84 | 			i = 0
 85 | 			for batch in datagen.flow(x, batch_size=1,save_to_dir=tmp_path, save_prefix=pic.split('.')[0], save_format='jpg'):
 86 | 				i += 1
 87 | 				if i > 1:
 88 | 					break
 89 | '''
 90 | 测试集数据扩增testAugmentation()，测试集要增强2次，分开独立保存。每次要修改save_to_dir='E:/PyData/BaiDuDog/test_kz_2'分别为test_kz_1和test_kz_2
 91 | '''
 92 | from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
 93 | 
 94 | def testAugmentation(path):
 95 | 	datagen = ImageDataGenerator(
 96 |         rotation_range=30,
 97 |         shear_range=0.2,
 98 |         zoom_range=0.2,
 99 |         horizontal_flip=True,
100 |         fill_mode='nearest')
101 | 	for pic in os.listdir(path):
102 | 		img = load_img(path + '/' + pic)
103 | 		x = img_to_array(img)
104 | 		x = x.reshape((1,) + x.shape)
105 | 		i = 0
106 | 		for batch in datagen.flow(x, batch_size=1,save_to_dir='E:/PyData/BaiDuDog/test_kz_2', save_prefix=pic.split('.')[0], save_format='jpg'):
107 | 			i += 1
108 | 			if i > 0:
109 | 				break
110 | 


--------------------------------------------------------------------------------