├── images ├── 0_out.png ├── 10_out.png ├── 11_out.png ├── 12_out.png ├── 13_out.png ├── 14_out.png ├── 15_out.png ├── 16_out.png ├── 17_out.png ├── 18_out.png ├── 19_out.png ├── 1_out.png ├── 2_out.png ├── 3_out.png ├── 4_out.png ├── 5_out.png ├── 6_out.png ├── 7_out.png ├── 8_out.png ├── 9_out.png └── dataset.png ├── history └── 2018-5-20.png ├── .gitignore ├── .idea └── vcs.xml ├── config.py ├── template.py ├── model.py ├── utils.py ├── results.json ├── LICENSE ├── scene_classes.csv ├── evaluate.py ├── demo.py ├── README.t ├── README.md ├── custom_layers └── scale_layer.py ├── pre-process.py ├── hp_search.py └── train.py /images/0_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/0_out.png -------------------------------------------------------------------------------- /images/10_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/10_out.png -------------------------------------------------------------------------------- /images/11_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/11_out.png -------------------------------------------------------------------------------- /images/12_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/12_out.png -------------------------------------------------------------------------------- /images/13_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/13_out.png -------------------------------------------------------------------------------- /images/14_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/14_out.png -------------------------------------------------------------------------------- /images/15_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/15_out.png -------------------------------------------------------------------------------- /images/16_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/16_out.png -------------------------------------------------------------------------------- /images/17_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/17_out.png -------------------------------------------------------------------------------- /images/18_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/18_out.png -------------------------------------------------------------------------------- /images/19_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/19_out.png -------------------------------------------------------------------------------- /images/1_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/1_out.png -------------------------------------------------------------------------------- /images/2_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/2_out.png -------------------------------------------------------------------------------- /images/3_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/3_out.png -------------------------------------------------------------------------------- /images/4_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/4_out.png -------------------------------------------------------------------------------- /images/5_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/5_out.png -------------------------------------------------------------------------------- /images/6_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/6_out.png -------------------------------------------------------------------------------- /images/7_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/7_out.png -------------------------------------------------------------------------------- /images/8_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/8_out.png -------------------------------------------------------------------------------- /images/9_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/9_out.png -------------------------------------------------------------------------------- /images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/images/dataset.png -------------------------------------------------------------------------------- /history/2018-5-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foamliu/Scene-Classification/HEAD/history/2018-5-20.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | data/ 3 | models/ 4 | __pycache__/ 5 | custom_layers/__pycache__/ 6 | label_dict.txt 7 | logs/ 8 | 9 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | img_width, img_height = 299, 299 2 | num_channels = 3 3 | train_data = 'data/train' 4 | valid_data = 'data/valid' 5 | num_classes = 80 6 | num_train_samples = 53879 7 | num_valid_samples = 7120 8 | verbose = 1 9 | batch_size = 32 10 | num_epochs = 1000 11 | patience = 50 12 | best_model = 'model.11-0.6262.hdf5' -------------------------------------------------------------------------------- /template.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | 4 | if __name__ == '__main__': 5 | with open('README.t', 'r', encoding="utf-8") as file: 6 | text = file.readlines() 7 | text = ''.join(text) 8 | 9 | with open('results.json', 'r', encoding="utf-8") as file: 10 | results = json.load(file) 11 | 12 | for i in range(20): 13 | text = text.replace('$(result_{})'.format(i), '{}, prob: {}'.format(results[i]['label'], results[i]['prob'])) 14 | 15 | with open('README.md', 'w', encoding="utf-8") as file: 16 | file.write(text) 17 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from keras.applications.inception_resnet_v2 import InceptionResNetV2 2 | from keras.layers import Dense, GlobalAveragePooling2D 3 | from keras.models import Model 4 | 5 | from config import num_classes 6 | 7 | 8 | def build_model(): 9 | base_model = InceptionResNetV2(weights='imagenet', include_top=False) 10 | x = base_model.output 11 | x = GlobalAveragePooling2D()(x) 12 | x = Dense(1024, activation='relu')(x) 13 | predictions = Dense(num_classes, activation='softmax')(x) 14 | model = Model(inputs=base_model.input, outputs=predictions) 15 | return model 16 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | 3 | import cv2 as cv 4 | from tensorflow.python.client import device_lib 5 | 6 | 7 | # getting the number of GPUs 8 | def get_available_gpus(): 9 | local_device_protos = device_lib.list_local_devices() 10 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 11 | 12 | 13 | # getting the number of CPUs 14 | def get_available_cpus(): 15 | return multiprocessing.cpu_count() 16 | 17 | 18 | def draw_str(dst, target, s): 19 | x, y = target 20 | cv.putText(dst, s, (x + 1, y + 1), cv.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 0), thickness=2, lineType=cv.LINE_AA) 21 | cv.putText(dst, s, (x, y), cv.FONT_HERSHEY_PLAIN, 0.8, (255, 255, 255), lineType=cv.LINE_AA) 22 | -------------------------------------------------------------------------------- /results.json: -------------------------------------------------------------------------------- 1 | [{"label": "\u6559\u5ba4", "prob": "0.751"}, {"label": "\u4fee\u7406\u5e97", "prob": "0.4876"}, {"label": "\u6c99\u6f20", "prob": "0.9402"}, {"label": "\u9152\u5427", "prob": "0.8236"}, {"label": "\u5bab\u6bbf", "prob": "0.6837"}, {"label": "\u535a\u7269\u9986", "prob": "0.6911"}, {"label": "\u4f4f\u5b85", "prob": "0.5338"}, {"label": "\u4f1a\u8bae\u5ba4", "prob": "0.9461"}, {"label": "\u96c6\u5e02", "prob": "0.9636"}, {"label": "\u6865", "prob": "0.571"}, {"label": "\u822a\u7ad9\u697c", "prob": "0.9362"}, {"label": "\u6e38\u4e50\u573a", "prob": "0.5429"}, {"label": "\u4fdd\u9f84\u7403\u9986", "prob": "0.9995"}, {"label": "\u6f02\u6d41", "prob": "0.998"}, {"label": "\u6c34\u65cf\u9986", "prob": "0.9898"}, {"label": "\u505c\u673a\u576a", "prob": "0.9965"}, {"label": "\u8dd1\u9a6c\u573a", "prob": "0.9966"}, {"label": "\u5b9e\u9a8c\u5ba4", "prob": "0.8698"}, {"label": "\u6ed1\u96ea\u573a", "prob": "0.8024"}, {"label": "\u4f1a\u8bae\u5ba4", "prob": "0.6975"}] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 刘杨 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scene_classes.csv: -------------------------------------------------------------------------------- 1 | 0,航站楼,airport_terminal 2 | 1,停机坪,landing_field 3 | 2,机舱,airplane_cabin 4 | 3,游乐场,amusement_park 5 | 4,冰场,skating_rink 6 | 5,舞台,arena/performance 7 | 6,艺术室,art_room 8 | 7,流水线,assembly_line 9 | 8,棒球场,baseball_field 10 | 9,橄榄球场,football_field 11 | 10,足球场,soccer_field 12 | 11,排球场,volleyball_court 13 | 12,高尔夫球场,golf_course 14 | 13,田径场,athletic_field 15 | 14,滑雪场,ski_slope 16 | 15,篮球馆(场),basketball_court 17 | 16,健身房,gymnasium 18 | 17,保龄球馆,bowling_alley 19 | 18,游泳池,swimming_pool 20 | 19,拳击场,boxing_ring 21 | 20,跑马场,racecourse 22 | 21,田地/农场,farm/farm_field 23 | 22,果园菜园,orchard/vegetable 24 | 23,牧场,pasture 25 | 24,乡村,countryside 26 | 25,温室,greenhouse 27 | 26,电视台(演播室),television_studio 28 | 27,亚洲寺庙,temple/east_asia 29 | 28,亭子,pavilion 30 | 29,塔,tower 31 | 30,宫殿,palace 32 | 31,西式教堂,church 33 | 32,街道,street 34 | 33,餐厅食堂,dining_room 35 | 34,咖啡厅,coffee_shop 36 | 35,厨房,kitchen 37 | 36,广场,plaza 38 | 37,实验室,laboratory 39 | 38,酒吧,bar 40 | 39,会议室,conference_room 41 | 40,办公室,office 42 | 41,医院,hospital 43 | 42,售票处,ticket_booth 44 | 43,露营地(帐篷),campsite 45 | 44,音乐工作室,music_studio 46 | 45,电梯/楼梯,elevator/staircase 47 | 46,公园/花园,garden 48 | 47,建筑工地,construction_site 49 | 48,大型综合超市,general_store 50 | 49,商店,clothing_store 51 | 50,集市,bazaar 52 | 51,图书馆/书店,library/bookstore 53 | 52,教室,classroom 54 | 53,海洋沙滩,ocean/beach 55 | 54,消防,firefighting 56 | 55,加油站,gas_station 57 | 56,垃圾场,landfill 58 | 57,阳台,balcony 59 | 58,游戏/棋牌室,recreation_room 60 | 59,舞厅,discotheque 61 | 60,博物馆,museum 62 | 61,沙漠,desert/sand 63 | 62,漂流,raft 64 | 63,树林,forest 65 | 64,桥,bridge 66 | 65,住宅,residential_neighborhood 67 | 66,汽车展厅,auto_showroom 68 | 67,河流湖泊,lake/river 69 | 68,水族馆,aquarium 70 | 69,沟渠,aqueduct 71 | 70,宴会厅,banquet_hall 72 | 71,卧室,bedchamber 73 | 72,山,mountain 74 | 73,站台,station/platform 75 | 74,草地,lawn 76 | 75,育儿室,nursery 77 | 76,美容/美发店,beauty_salon 78 | 77,修理店,repair_shop 79 | 78,斗牛场,rodeo 80 | 79,雪屋/冰雕(山),igloo/ice_engraving 81 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | import cv2 as cv 6 | import keras.backend as K 7 | import numpy as np 8 | from keras.applications.inception_resnet_v2 import preprocess_input 9 | from tqdm import tqdm 10 | 11 | from config import img_width, img_height, best_model 12 | from model import build_model 13 | 14 | if __name__ == '__main__': 15 | # Parse arguments 16 | ap = argparse.ArgumentParser() 17 | ap.add_argument("-t", "--testsuite", help="name of test suite (e.g. test_a or test_b") 18 | args = vars(ap.parse_args()) 19 | test_suite = args["testsuite"] 20 | 21 | model = build_model() 22 | model_weights_path = os.path.join('models', best_model) 23 | model.load_weights(model_weights_path) 24 | 25 | test_a = 'data/ai_challenger_scene_{}_20180103'.format(test_suite) 26 | image_folder = 'data/ai_challenger_scene_{0}_20180103/scene_{0}_images_20180103'.format(test_suite) 27 | annotations = 'data/ai_challenger_scene_{0}_20180103/scene_{0}_annotations_20180103.json'.format(test_suite) 28 | with open(annotations, 'r') as f: 29 | data = json.load(f) 30 | 31 | num_samples = len(data) 32 | print('num_samples: ' + str(num_samples)) 33 | 34 | num_correct = 0 35 | for i in tqdm(range(num_samples)): 36 | image_id = data[i]['image_id'] 37 | label_id = int(data[i]['label_id']) 38 | filename = os.path.join(image_folder, image_id) 39 | image = cv.imread(filename) 40 | image = cv.resize(image, (img_width, img_height), cv.INTER_CUBIC) 41 | rgb_img = cv.cvtColor(image, cv.COLOR_BGR2RGB) 42 | rgb_img = np.expand_dims(rgb_img, 0).astype(np.float32) 43 | rgb_img = preprocess_input(rgb_img) 44 | preds = model.predict(rgb_img) 45 | top3 = np.argsort(preds)[0][::-1][:3] 46 | if label_id in top3: 47 | num_correct += 1 48 | 49 | print(num_correct / num_samples) 50 | K.clear_session() 51 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | import csv 3 | import json 4 | import os 5 | import random 6 | 7 | import cv2 as cv 8 | import keras.backend as K 9 | import numpy as np 10 | from keras.applications.inception_resnet_v2 import preprocess_input 11 | 12 | from config import best_model 13 | from model import build_model 14 | 15 | if __name__ == '__main__': 16 | model = build_model() 17 | model_weights_path = os.path.join('models', best_model) 18 | model.load_weights(model_weights_path) 19 | 20 | with open('scene_classes.csv') as file: 21 | reader = csv.reader(file) 22 | scene_classes_list = list(reader) 23 | 24 | scene_classes_dict = dict() 25 | for item in scene_classes_list: 26 | scene_classes_dict[int(item[0])] = item[1] 27 | 28 | test_path = 'data/test_a/' 29 | test_images = [f for f in os.listdir(test_path) if 30 | os.path.isfile(os.path.join(test_path, f)) and f.endswith('.jpg')] 31 | num_samples = 20 32 | samples = random.sample(test_images, num_samples) 33 | 34 | if not os.path.exists('images'): 35 | os.makedirs('images') 36 | 37 | results = [] 38 | for i in range(len(samples)): 39 | image_name = samples[i] 40 | filename = os.path.join(test_path, image_name) 41 | print('Start processing image: {}'.format(filename)) 42 | image = cv.imread(filename) 43 | rgb_img = cv.cvtColor(image, cv.COLOR_BGR2RGB) 44 | rgb_img = np.expand_dims(rgb_img, 0).astype(np.float32) 45 | rgb_img = preprocess_input(rgb_img) 46 | preds = model.predict(rgb_img) 47 | prob = np.max(preds) 48 | class_id = np.argmax(preds) 49 | print(scene_classes_dict[class_id]) 50 | results.append({'label': scene_classes_dict[class_id], 'prob': '{:.4}'.format(prob)}) 51 | cv.imwrite('images/{}_out.png'.format(i), image) 52 | 53 | print(results) 54 | with open('results.json', 'w') as file: 55 | json.dump(results, file) 56 | 57 | K.clear_session() 58 | -------------------------------------------------------------------------------- /README.t: -------------------------------------------------------------------------------- 1 | # 场景分类 2 | 3 | 微调 Inception-ResNet-V2, 解决 AI Challenger 2017 场景分类问题。 4 | 5 | 6 | ## 依赖 7 | 8 | - [NumPy](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html) 9 | - [Tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html) 10 | - [Keras](https://keras.io/#installation) 11 | - [OpenCV](https://opencv-python-tutroals.readthedocs.io/en/latest/) 12 | 13 | ## 数据集 14 | 15 | 我们使用AI Challenger 2017中的场景分类数据集,其中包含80,900种场景的60,999张图像。 数据分为53,879个训练图像和7,120个测试图像。 16 | 17 | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/dataset.png) 18 | 19 | 你可以从中得到它 [Scene Classification Dataset](https://challenger.ai/datasets/scene): 20 | 21 | ### 性能 22 | 用14118张测试图片计算平均准确率(mAP),结果如下: 23 | 24 | | |Test A|Test B| 25 | |---|---|---| 26 | |图片数|7040|7078| 27 | |Top3准确度|0.94346|0.91212| 28 | 29 | ## 用法 30 | 31 | ### 数据预处理 32 | 提取60,999个训练图像,并将它们分开(53,879个用于训练,7,120个用于验证): 33 | ```bash 34 | $ python pre-process.py 35 | ``` 36 | 37 | ### 训练 38 | ```bash 39 | $ python train.py 40 | ``` 41 | 42 | 如果想在培训期间进行可视化,请在终端中运行: 43 | ```bash 44 | $ tensorboard --logdir path_to_current_dir/logs 45 | ``` 46 | 47 | ### Demo 48 | 下载 [pre-trained model](https://github.com/foamliu/Scene-Classification/releases/download/v1.0/model.11-0.6262.hdf5) 放在 models 目录然后执行: 49 | 50 | ```bash 51 | $ python demo.py 52 | ``` 53 | 54 | 1 | 2 | 3 | 4 | 55 | |---|---|---|---| 56 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/0_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/1_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/2_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/3_out.png) | 57 | |$(result_0)|$(result_1)|$(result_2)|$(result_3)| 58 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/4_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/5_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/6_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/7_out.png) | 59 | |$(result_4)|$(result_5)|$(result_6)|$(result_7)| 60 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/8_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/9_out.png) |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/10_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/11_out.png)| 61 | |$(result_8)|$(result_9)|$(result_10)|$(result_11)| 62 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/12_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/13_out.png) |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/14_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/15_out.png)| 63 | |$(result_12)|$(result_13)|$(result_14)|$(result_15)| 64 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/16_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/17_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/18_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/19_out.png) | 65 | |$(result_16)|$(result_17)|$(result_18)|$(result_19)| 66 | 67 | 68 | ### 性能评估 69 | ```bash 70 | $ python evaluate.py 71 | ``` 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 场景分类 2 | 3 | 微调 Inception-ResNet-V2, 解决 AI Challenger 2017 场景分类问题。 4 | 5 | 6 | ## 依赖 7 | 8 | - [NumPy](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html) 9 | - [Tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html) 10 | - [Keras](https://keras.io/#installation) 11 | - [OpenCV](https://opencv-python-tutroals.readthedocs.io/en/latest/) 12 | 13 | ## 数据集 14 | 15 | 我们使用AI Challenger 2017中的场景分类数据集,其中包含80,900种场景的60,999张图像。 数据分为53,879个训练图像和7,120个测试图像。 16 | 17 | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/dataset.png) 18 | 19 | 你可以从中得到它 [Scene Classification Dataset](https://challenger.ai/datasets/scene): 20 | 21 | ### 性能 22 | 用14118张测试图片计算平均准确率(mAP),结果如下: 23 | 24 | | |Test A|Test B| 25 | |---|---|---| 26 | |图片数|7040|7078| 27 | |Top3准确度|0.94346|0.91212| 28 | 29 | ## 用法 30 | 31 | ### 数据预处理 32 | 提取60,999个训练图像,并将它们分开(53,879个用于训练,7,120个用于验证): 33 | ```bash 34 | $ python pre-process.py 35 | ``` 36 | 37 | ### 训练 38 | ```bash 39 | $ python train.py 40 | ``` 41 | 42 | 如果想在培训期间进行可视化,请在终端中运行: 43 | ```bash 44 | $ tensorboard --logdir path_to_current_dir/logs 45 | ``` 46 | 47 | ### Demo 48 | 下载 [pre-trained model](https://github.com/foamliu/Scene-Classification/releases/download/v1.0/model.11-0.6262.hdf5) 放在 models 目录然后执行: 49 | 50 | ```bash 51 | $ python demo.py 52 | ``` 53 | 54 | 1 | 2 | 3 | 4 | 55 | |---|---|---|---| 56 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/0_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/1_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/2_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/3_out.png) | 57 | |教室, prob: 0.751|修理店, prob: 0.4876|沙漠, prob: 0.9402|酒吧, prob: 0.8236| 58 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/4_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/5_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/6_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/7_out.png) | 59 | |宫殿, prob: 0.6837|博物馆, prob: 0.6911|住宅, prob: 0.5338|会议室, prob: 0.9461| 60 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/8_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/9_out.png) |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/10_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/11_out.png)| 61 | |集市, prob: 0.9636|桥, prob: 0.571|航站楼, prob: 0.9362|游乐场, prob: 0.5429| 62 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/12_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/13_out.png) |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/14_out.png)| ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/15_out.png)| 63 | |保龄球馆, prob: 0.9995|漂流, prob: 0.998|水族馆, prob: 0.9898|停机坪, prob: 0.9965| 64 | |![image](https://github.com/foamliu/Scene-Classification/raw/master/images/16_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/17_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/18_out.png) | ![image](https://github.com/foamliu/Scene-Classification/raw/master/images/19_out.png) | 65 | |跑马场, prob: 0.9966|实验室, prob: 0.8698|滑雪场, prob: 0.8024|会议室, prob: 0.6975| 66 | 67 | 68 | ### 性能评估 69 | ```bash 70 | $ python evaluate.py 71 | ``` 72 | -------------------------------------------------------------------------------- /custom_layers/scale_layer.py: -------------------------------------------------------------------------------- 1 | from keras.layers.core import Layer 2 | from keras.engine import InputSpec 3 | from keras import backend as K 4 | try: 5 | from keras import initializations 6 | except ImportError: 7 | from keras import initializers as initializations 8 | 9 | class Scale(Layer): 10 | '''Learns a set of weights and biases used for scaling the input data. 11 | the output consists simply in an element-wise multiplication of the input 12 | and a sum of a set of constants: 13 | 14 | out = in * gamma + beta, 15 | 16 | where 'gamma' and 'beta' are the weights and biases larned. 17 | 18 | # Arguments 19 | axis: integer, axis along which to normalize in mode 0. For instance, 20 | if your input tensor has shape (samples, channels, rows, cols), 21 | set axis to 1 to normalize per feature map (channels axis). 22 | momentum: momentum in the computation of the 23 | exponential average of the mean and standard deviation 24 | of the data, for feature-wise normalization. 25 | weights: Initialization weights. 26 | List of 2 Numpy arrays, with shapes: 27 | `[(input_shape,), (input_shape,)]` 28 | beta_init: name of initialization function for shift parameter 29 | (see [initializations](../initializations.md)), or alternatively, 30 | Theano/TensorFlow function to use for weights initialization. 31 | This parameter is only relevant if you don't pass a `weights` argument. 32 | gamma_init: name of initialization function for scale parameter (see 33 | [initializations](../initializations.md)), or alternatively, 34 | Theano/TensorFlow function to use for weights initialization. 35 | This parameter is only relevant if you don't pass a `weights` argument. 36 | ''' 37 | def __init__(self, weights=None, axis=-1, momentum = 0.9, beta_init='zero', gamma_init='one', **kwargs): 38 | self.momentum = momentum 39 | self.axis = axis 40 | self.beta_init = initializations.get(beta_init) 41 | self.gamma_init = initializations.get(gamma_init) 42 | self.initial_weights = weights 43 | super(Scale, self).__init__(**kwargs) 44 | 45 | def build(self, input_shape): 46 | self.input_spec = [InputSpec(shape=input_shape)] 47 | shape = (int(input_shape[self.axis]),) 48 | 49 | # Compatibility with TensorFlow >= 1.0.0 50 | self.gamma = K.variable(self.gamma_init(shape), name='{}_gamma'.format(self.name)) 51 | self.beta = K.variable(self.beta_init(shape), name='{}_beta'.format(self.name)) 52 | #self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name)) 53 | #self.beta = self.beta_init(shape, name='{}_beta'.format(self.name)) 54 | self.trainable_weights = [self.gamma, self.beta] 55 | 56 | if self.initial_weights is not None: 57 | self.set_weights(self.initial_weights) 58 | del self.initial_weights 59 | 60 | def call(self, x, mask=None): 61 | input_shape = self.input_spec[0].shape 62 | broadcast_shape = [1] * len(input_shape) 63 | broadcast_shape[self.axis] = input_shape[self.axis] 64 | 65 | out = K.reshape(self.gamma, broadcast_shape) * x + K.reshape(self.beta, broadcast_shape) 66 | return out 67 | 68 | def get_config(self): 69 | config = {"momentum": self.momentum, "axis": self.axis} 70 | base_config = super(Scale, self).get_config() 71 | return dict(list(base_config.items()) + list(config.items())) 72 | -------------------------------------------------------------------------------- /pre-process.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import zipfile 4 | 5 | import cv2 as cv 6 | from tqdm import tqdm 7 | 8 | from config import img_height, img_width 9 | 10 | 11 | def ensure_folder(folder): 12 | if not os.path.exists(folder): 13 | os.makedirs(folder) 14 | 15 | 16 | def extract(usage, package, image_path, json_path): 17 | filename = 'data/{}.zip'.format(package) 18 | print('Extracting {}...'.format(filename)) 19 | with zipfile.ZipFile(filename, 'r') as zip_ref: 20 | zip_ref.extractall('data') 21 | 22 | if not os.path.exists('data/{}'.format(usage)): 23 | os.makedirs('data/{}'.format(usage)) 24 | with open('data/{}/{}'.format(package, json_path)) as json_data: 25 | data = json.load(json_data) 26 | num_samples = len(data) 27 | print("num_samples: " + str(num_samples)) 28 | for i in tqdm(range(num_samples)): 29 | item = data[i] 30 | image_name = item['image_id'] 31 | label_id = item['label_id'] 32 | src_folder = 'data/{}/{}'.format(package, image_path) 33 | src_path = os.path.join(src_folder, image_name) 34 | dst_folder = 'data/{}'.format(usage) 35 | label = "%02d" % (int(label_id),) 36 | dst_path = os.path.join(dst_folder, label) 37 | if not os.path.exists(dst_path): 38 | os.makedirs(dst_path) 39 | dst_path = os.path.join(dst_path, image_name) 40 | src_image = cv.imread(src_path) 41 | dst_image = cv.resize(src_image, (img_height, img_width), cv.INTER_CUBIC) 42 | cv.imwrite(dst_path, dst_image) 43 | 44 | 45 | def extract_test(usage, package, image_path, json_path): 46 | filename = 'data/{}.zip'.format(package) 47 | print('Extracting {}...'.format(filename)) 48 | with zipfile.ZipFile(filename, 'r') as zip_ref: 49 | zip_ref.extractall('data') 50 | 51 | if not os.path.exists('data/{}'.format(usage)): 52 | os.makedirs('data/{}'.format(usage)) 53 | with open('data/{}/{}'.format(package, json_path)) as json_data: 54 | data = json.load(json_data) 55 | num_samples = len(data) 56 | print("num_samples: " + str(num_samples)) 57 | label_dict = dict() 58 | for i in tqdm(range(num_samples)): 59 | item = data[i] 60 | image_name = item['image_id'] 61 | label_id = item['label_id'] 62 | src_folder = 'data/{}/{}'.format(package, image_path) 63 | src_path = os.path.join(src_folder, image_name) 64 | dst_folder = 'data/{}'.format(usage) 65 | label = "%02d" % (int(label_id),) 66 | label_dict[image_name] = label 67 | dst_path = os.path.join(dst_folder, image_name) 68 | src_image = cv.imread(src_path) 69 | dst_image = cv.resize(src_image, (img_height, img_width), cv.INTER_CUBIC) 70 | cv.imwrite(dst_path, dst_image) 71 | with open('label_dict.txt', 'w') as outfile: 72 | json.dump(label_dict, outfile, indent=4, sort_keys=True) 73 | 74 | 75 | if __name__ == '__main__': 76 | # parameters 77 | ensure_folder('data') 78 | 79 | extract('train', 'ai_challenger_scene_train_20170904', 'scene_train_images_20170904', 80 | 'scene_train_annotations_20170904.json') 81 | 82 | extract('valid', 'ai_challenger_scene_validation_20170908', 'scene_validation_images_20170908', 83 | 'scene_validation_annotations_20170908.json') 84 | 85 | extract_test('test_a', 'ai_challenger_scene_test_a_20180103', 'scene_test_a_images_20180103', 86 | 'scene_test_a_annotations_20180103.json') 87 | 88 | extract_test('test_b', 'ai_challenger_scene_test_b_20180103', 'scene_test_b_images_20180103', 89 | 'scene_test_b_annotations_20180103.json') 90 | -------------------------------------------------------------------------------- /hp_search.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | from math import log 5 | 6 | import keras 7 | from hyperas import optim 8 | from hyperas.distributions import loguniform 9 | from hyperas.distributions import uniform 10 | from hyperopt import Trials, STATUS_OK, tpe 11 | from keras import regularizers 12 | from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input 13 | from keras.layers import GlobalAveragePooling2D 14 | from keras.layers.core import Dense, Dropout 15 | from keras.models import Model 16 | from keras.preprocessing.image import ImageDataGenerator 17 | 18 | from config import img_width, img_height, num_classes, batch_size, train_data, valid_data, num_train_samples, \ 19 | num_valid_samples, best_model 20 | 21 | 22 | def data(): 23 | train_datagen = ImageDataGenerator(shear_range=0.2, 24 | rotation_range=20., 25 | width_shift_range=0.3, 26 | height_shift_range=0.3, 27 | zoom_range=0.2, 28 | horizontal_flip=True, 29 | preprocessing_function=preprocess_input) 30 | test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) 31 | 32 | train_generator = train_datagen.flow_from_directory(train_data, (img_width, img_height), batch_size=batch_size, 33 | class_mode='categorical', shuffle=True) 34 | validation_generator = test_datagen.flow_from_directory(valid_data, (img_width, img_height), batch_size=batch_size, 35 | class_mode='categorical', shuffle=True) 36 | 37 | return train_generator, validation_generator 38 | 39 | 40 | def create_model(train_generator, validation_generator): 41 | l2_reg = regularizers.l2({{loguniform(log(1e-6), log(1e-2))}}) 42 | base_model = InceptionResNetV2(weights='imagenet', include_top=False) 43 | x = base_model.output 44 | x = GlobalAveragePooling2D()(x) 45 | x = Dropout({{uniform(0, 1)}})(x) 46 | x = Dense(1024, activation='relu', kernel_regularizer=l2_reg, activity_regularizer=l2_reg)(x) 47 | x = Dropout({{uniform(0, 1)}})(x) 48 | predictions = Dense(num_classes, activation='softmax', kernel_regularizer=l2_reg, activity_regularizer=l2_reg)(x) 49 | model = Model(inputs=base_model.input, outputs=predictions) 50 | 51 | model_weights_path = os.path.join('models', best_model) 52 | model.load_weights(model_weights_path) 53 | 54 | for i in range(int(len(base_model.layers) * {{uniform(0, 1)}})): 55 | layer = base_model.layers[i] 56 | layer.trainable = False 57 | 58 | adam = keras.optimizers.Adam(lr={{loguniform(log(1e-6), log(1e-3))}}) 59 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=adam) 60 | 61 | # print(model.summary()) 62 | 63 | model.fit_generator( 64 | train_generator, 65 | steps_per_epoch=num_train_samples // batch_size, 66 | validation_data=validation_generator, 67 | validation_steps=num_valid_samples // batch_size) 68 | 69 | score, acc = model.evaluate_generator(validation_generator) 70 | print('Test accuracy:', acc) 71 | return {'loss': -acc, 'status': STATUS_OK, 'model': model} 72 | 73 | 74 | if __name__ == '__main__': 75 | train_generator, validation_generator = data() 76 | best_run, best_model = optim.minimize(model=create_model, 77 | data=data, 78 | algo=tpe.suggest, 79 | max_evals=15, 80 | trials=Trials()) 81 | 82 | print("Evalutation of best performing model:") 83 | print(best_model.evaluate_generator(validation_generator)) 84 | print("Best performing model chosen hyper-parameters:") 85 | print(best_run) 86 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import keras 4 | import tensorflow as tf 5 | from keras.applications.inception_resnet_v2 import preprocess_input 6 | from keras.callbacks import ModelCheckpoint, EarlyStopping 7 | from keras.callbacks import ReduceLROnPlateau 8 | from keras.optimizers import SGD 9 | from keras.preprocessing.image import ImageDataGenerator 10 | from keras.utils import multi_gpu_model 11 | 12 | from config import img_height, img_width, batch_size, patience, train_data, valid_data, \ 13 | num_train_samples, num_valid_samples, num_epochs, verbose 14 | from model import build_model 15 | from utils import get_available_gpus, get_available_cpus 16 | 17 | if __name__ == '__main__': 18 | # Parse arguments 19 | ap = argparse.ArgumentParser() 20 | ap.add_argument("-p", "--pretrained", help="path to save pretrained model files") 21 | args = vars(ap.parse_args()) 22 | pretrained_path = args["pretrained"] 23 | 24 | # prepare data augmentation configuration 25 | train_data_gen = ImageDataGenerator(shear_range=0.2, 26 | rotation_range=20., 27 | width_shift_range=0.3, 28 | height_shift_range=0.3, 29 | zoom_range=0.2, 30 | horizontal_flip=True, 31 | preprocessing_function=preprocess_input) 32 | valid_data_gen = ImageDataGenerator(preprocessing_function=preprocess_input) 33 | 34 | # generators 35 | train_generator = train_data_gen.flow_from_directory(train_data, (img_width, img_height), batch_size=batch_size, 36 | class_mode='categorical', shuffle=True) 37 | valid_generator = valid_data_gen.flow_from_directory(valid_data, (img_width, img_height), batch_size=batch_size, 38 | class_mode='categorical', shuffle=True) 39 | 40 | 41 | class MyCbk(keras.callbacks.Callback): 42 | def __init__(self, model): 43 | keras.callbacks.Callback.__init__(self) 44 | self.model_to_save = model 45 | 46 | def on_epoch_end(self, epoch, logs=None): 47 | fmt = 'models/model.%02d-%.4f.hdf5' 48 | self.model_to_save.save(fmt % (epoch, logs['val_acc'])) 49 | 50 | 51 | # Callbacks 52 | tensor_board = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=True) 53 | # early_stop = EarlyStopping('val_acc', patience=patience) 54 | # reduce_lr = ReduceLROnPlateau('val_acc', factor=0.5, patience=int(patience / 4), verbose=1) 55 | trained_models_path = 'models/model' 56 | model_names = trained_models_path + '.{epoch:02d}-{val_loss:.4f}.hdf5' 57 | model_checkpoint = ModelCheckpoint(model_names, monitor='val_acc', verbose=1, save_best_only=True) 58 | 59 | num_gpu = len(get_available_gpus()) 60 | if num_gpu >= 2: 61 | with tf.device("/cpu:0"): 62 | model = build_model() 63 | if pretrained_path is not None: 64 | model.load_weights(pretrained_path) 65 | 66 | new_model = multi_gpu_model(model, gpus=num_gpu) 67 | # rewrite the callback: saving through the original model and not the multi-gpu model. 68 | model_checkpoint = MyCbk(model) 69 | else: 70 | new_model = build_model() 71 | if pretrained_path is not None: 72 | new_model.load_weights(pretrained_path) 73 | 74 | adam = keras.optimizers.Adam(lr=1e-6) 75 | new_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) 76 | 77 | callbacks = [tensor_board, model_checkpoint] 78 | 79 | # fine tune the model 80 | new_model.fit_generator( 81 | train_generator, 82 | steps_per_epoch=num_train_samples / batch_size, 83 | validation_data=valid_generator, 84 | validation_steps=num_valid_samples / batch_size, 85 | shuffle=True, 86 | epochs=num_epochs, 87 | callbacks=callbacks, 88 | verbose=verbose, 89 | use_multiprocessing=True, 90 | workers=int(get_available_cpus() * 0.80) 91 | ) 92 | --------------------------------------------------------------------------------