├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── file_util.cpython-35.pyc │ └── result_util.cpython-35.pyc ├── file_util.py ├── result_util.py ├── model_util.py └── img_preprocessing.py ├── .gitattributes ├── .idea ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── ai challenger.iml └── workspace.xml ├── README.md ├── input └── mapping.txt ├── make_submission.py ├── train_resnet50.py ├── train_resnet50_with_dropout.py ├── train_vgg19.py ├── train_vgg16.py ├── train_inceptionv3.py ├── train_inception_renset_v2.py └── predict_single_picture.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangfchen/AIChallenger_SceneClassification_ZJUGIVE/HEAD/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/file_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangfchen/AIChallenger_SceneClassification_ZJUGIVE/HEAD/utils/__pycache__/file_util.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/result_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fangfchen/AIChallenger_SceneClassification_ZJUGIVE/HEAD/utils/__pycache__/result_util.cpython-35.pyc -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AIChallenger_SceneClassification_ZJUGIVE 2 | TOP5 code for 2017 AI Challenger (Competition of Scene Classification) 3 | Team: ZJUGIVE 4 | Link: [https://challenger.ai/competition/scene](https://challenger.ai/competition/scene) 5 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/ai challenger.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /input/mapping.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 10 4 | 11 5 | 12 6 | 13 7 | 14 8 | 15 9 | 16 10 | 17 11 | 18 12 | 19 13 | 2 14 | 20 15 | 21 16 | 22 17 | 23 18 | 24 19 | 25 20 | 26 21 | 27 22 | 28 23 | 29 24 | 3 25 | 30 26 | 31 27 | 32 28 | 33 29 | 34 30 | 35 31 | 36 32 | 37 33 | 38 34 | 39 35 | 4 36 | 40 37 | 41 38 | 42 39 | 43 40 | 44 41 | 45 42 | 46 43 | 47 44 | 48 45 | 49 46 | 5 47 | 50 48 | 51 49 | 52 50 | 53 51 | 54 52 | 55 53 | 56 54 | 57 55 | 58 56 | 59 57 | 6 58 | 60 59 | 61 60 | 62 61 | 63 62 | 64 63 | 65 64 | 66 65 | 67 66 | 68 67 | 69 68 | 7 69 | 70 70 | 71 71 | 72 72 | 73 73 | 74 74 | 75 75 | 76 76 | 77 77 | 78 78 | 79 79 | 8 80 | 9 81 | -------------------------------------------------------------------------------- /utils/file_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import json 4 | from tqdm import tqdm 5 | 6 | def write_result2df_json(result, df_test, df_filename, json_filename): 7 | if result.shape[0] == 1: 8 | result = result[0] 9 | top3_result = np.argsort(-result) 10 | top1 = [] 11 | top2 = [] 12 | top3 = [] 13 | json_list = [] 14 | for i in tqdm(range(len(df_test))): 15 | top1.append(int(top3_result[i, 0])) 16 | top2.append(int(top3_result[i, 1])) 17 | top3.append(int(top3_result[i, 2])) 18 | json_list.append({"image_id": df_test.iloc[i, 0] + ".jpg", 19 | "label_id": [int(top3_result[i, 0]), int(top3_result[i, 1]), int(top3_result[i, 2])]}) 20 | pd.DataFrame({"image_id": df_test.iloc[:, 0].tolist(), "top1": top1, "top2": top2, "top3": top3}).\ 21 | to_csv(df_filename, index=None) 22 | 23 | with open(json_filename, "w") as f: 24 | f.write(json.dumps(json_list, sort_keys=True)) 25 | -------------------------------------------------------------------------------- /utils/result_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def get_top3_acc(pred_result, is_train): 5 | if not is_train: 6 | df_true = pd.read_csv("../output/valid_label.csv", header=0) # ordered valid labels with filename order ImageDataGenerator().filenames 7 | else: 8 | df_true = pd.read_csv("../output/train_label.csv", header=0) # ordered train labels with filename order ImageDataGenerator().filenames 9 | result_args = np.argsort(-pred_result) 10 | pred = pd.DataFrame(result_args) 11 | cnt = 0 12 | print(len(pred)) 13 | print(len(df_true)) 14 | for i in range(len(pred)): 15 | if df_true.iloc[i].lable_id.tolist() in pred.iloc[i, :3].tolist(): 16 | cnt += 1 17 | return 1.0 * cnt / len(pred) 18 | 19 | def map_result(result): 20 | li = pd.read_csv("../input/mapping.txt", header=None, names=["mapping"]).mapping.tolist() 21 | for i in range(result.shape[0]): 22 | for j in range(result.shape[1]): 23 | result[i][j] = int(li[result[i][j]]) 24 | return result -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 14 | 15 | 16 | 1512800247146 17 | 21 | 22 | 23 | 24 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /make_submission.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import pandas as pd 4 | import os 5 | import numpy as np 6 | from tqdm import tqdm 7 | import json 8 | from utils.img_preprocessing import get_img_gen 9 | 10 | test_dir = os.path.join("..", "input", "test_b", "test_b") 11 | 12 | input_size = 512 13 | batch_size = 32 14 | # model = load_inception_resnetv2(512) 15 | 16 | mapping = pd.read_csv("../input/mapping.txt",header=None, names=["maps"]).maps.tolist() 17 | 18 | def write_result2df_json(result, df_test, df_filename, json_filename): 19 | if result.shape[0] == 1: 20 | result = result[0] 21 | top3_result = np.argsort(-result) 22 | top1 = [] 23 | top2 = [] 24 | top3 = [] 25 | json_list = [] 26 | for i in tqdm(range(len(df_test))): 27 | top1.append(int(top3_result[i, 0])) 28 | top2.append(int(top3_result[i, 1])) 29 | top3.append(int(top3_result[i, 2])) 30 | json_list.append({"image_id": df_test.iloc[i, 0], 31 | "label_id": [int(top3_result[i, 0]), int(top3_result[i, 1]), int(top3_result[i, 2])]}) 32 | pd.DataFrame({"image_id": df_test.iloc[:, 0].tolist(), "top1": top1, "top2": top2, "top3": top3}).\ 33 | to_csv(df_filename, index=None) 34 | 35 | with open(json_filename, "w") as f: 36 | f.write(json.dumps(json_list, sort_keys=True)) 37 | 38 | 39 | if __name__ == "__main__": 40 | 41 | # copy all the result file into one folder named result/total_result 42 | result_dirs = "../result/total_result" 43 | total_result = [] 44 | predict_gen = get_img_gen()[1].\ 45 | flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 46 | target_size=(input_size, input_size), batch_size=batch_size) 47 | for result_filename in os.listdir(result_dirs): 48 | total_result.append(np.load(os.path.join(result_dirs, result_filename))) 49 | p_test = total_result[0] 50 | for i in range(1, len(total_result)): 51 | p_test += total_result[i] 52 | 53 | p_test /= len(total_result) 54 | filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 55 | df_result_file = "../result/test/fianl_submission.csv" 56 | josn_result_file = "../result/test/final_submission.json" 57 | df_test = pd.DataFrame({"image_id": filenames}) 58 | 59 | write_result2df_json(p_test, df_test=df_test, df_filename=df_result_file, 60 | json_filename=josn_result_file) # save result to file 61 | -------------------------------------------------------------------------------- /train_resnet50.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import os 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2" 5 | 6 | from utils.model_util import load_resnet50, train_model_imagedatagen 7 | from utils.img_preprocessing import get_img_gen 8 | import pandas as pd 9 | import numpy as np 10 | from utils.file_util import write_result2df_json 11 | 12 | input_size = 512 13 | input_channels = 3 14 | batch_size = 40 15 | epochs = 50 16 | n_classes = 80 17 | gpu_count = 3 18 | kfold = 5 19 | 20 | df_train_all = pd.read_csv("../output/train_label.csv") 21 | df_valid = pd.read_csv("../output/validation_label.csv") 22 | df_test = pd.read_csv("../output/sample_submission.csv") 23 | model_save_file = "weight/resnet50" 24 | result_save_dir = "../result/resnet50" 25 | 26 | train_gen = get_img_gen() 27 | 28 | def _predict(result, pred_filenames): 29 | converted_result = np.zeros_like(result) # real probability after converted 30 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 31 | print(mapping) 32 | for j in range(80): 33 | print(mapping[j]) 34 | converted_result[:, mapping[j]] = result[:, j] 35 | print(converted_result.shape) 36 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/resnet50/test_filename.csv", index=None) 37 | np.save("../result/resnet50/real_test_2_result.npy", result) 38 | 39 | write_result2df_json(result, df_test=pd.DataFrame({"filename": pred_filenames}), 40 | df_filename="../result/resnet50/real_test_2_result.csv", 41 | json_filename="../result/resnet50/real_test_2_result.json") 42 | 43 | model = load_resnet50(input_size=input_size) 44 | 45 | y_full_test = [] 46 | 47 | for fold in range(kfold): 48 | train_dir = os.path.join("../input/train", "train_fold_"+str(fold)) 49 | valid_dir = os.path.join("../input/valid", "valid_fold_"+str(fold)) 50 | checkpoint_file = os.path.join("weights", "resnet50", "fold_"+str(fold))+".hdf5" 51 | len_train, len_valid = 0, 0 52 | for dir in os.listdir(train_dir): 53 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 54 | for dir in os.listdir(valid_dir): 55 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 56 | train_model_imagedatagen(model=model, batch_size = batch_size, checkpoint_file=checkpoint_file, 57 | train_gen=train_gen[0].flow_from_directory(train_dir, shuffle=True, 58 | target_size=(input_size, input_size), batch_size=batch_size), 59 | valid_gen=train_gen[1].flow_from_directory(valid_dir, shuffle=True, 60 | target_size=(input_size, input_size), batch_size=batch_size), 61 | len_train=len(df_train_all), len_valid=len(df_valid), epochs=epochs) 62 | model.save_weights(os.path.join(model_save_file, str(fold) + ".hdf5")) 63 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 64 | target_size=(input_size, input_size), batch_size=batch_size) 65 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 66 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 67 | np.save(os.path.join(result_save_dir, "result_" + str(fold) + ".npy")) 68 | y_full_test.append(result) 69 | 70 | p_test = y_full_test[0] 71 | for i in range(1, kfold): 72 | p_test += y_full_test 73 | 74 | p_test /= kfold 75 | 76 | _predict(p_test, pred_filenames) 77 | -------------------------------------------------------------------------------- /train_resnet50_with_dropout.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import os 4 | import warnings 5 | 6 | warnings.filterwarnings("ignore") 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2" 8 | 9 | from utils.model_util import load_resnet50_with_dropout, train_model_imagedatagen 10 | from utils.img_preprocessing import get_img_gen 11 | import pandas as pd 12 | import numpy as np 13 | from utils.file_util import write_result2df_json 14 | 15 | input_size = 512 16 | input_channels = 3 17 | batch_size = 15 18 | epochs = 50 19 | kfold = 5 20 | 21 | df_train_all = pd.read_csv("../output/train_label.csv") 22 | df_valid = pd.read_csv("../output/validation_label.csv") 23 | train_path = "../input/train" 24 | valid_path = "../input/valid" 25 | model_save_file = "weight/resnet50_with_dropout" 26 | result_save_dir = "../result/resnet50_with_dropout" 27 | 28 | train_gen = get_img_gen() 29 | 30 | def _predict(result, pred_filenames): 31 | converted_result = np.zeros_like(result) # real probability after converted 32 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 33 | print(mapping) 34 | for j in range(80): 35 | print(mapping[j]) 36 | converted_result[:, mapping[j]] = result[:, j] 37 | print(converted_result.shape) 38 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/resnet50_with_dropout/test_filename.csv", index=None) 39 | np.save("../result/resnet50_with_dropout/real_test_2_result.npy", result) 40 | 41 | write_result2df_json(result, df_test=pd.DataFrame({"filename": pred_filenames}), 42 | df_filename="../result/resnet50_with_dropout/real_test_2_result.csv", 43 | json_filename="../result/resnet50_with_dropout/real_test_2_result.json") 44 | 45 | model = load_resnet50_with_dropout(input_size=input_size) 46 | y_full_test = [] 47 | 48 | for fold in range(kfold): 49 | train_dir = os.path.join("../input/train", "train_fold_"+str(fold)) 50 | valid_dir = os.path.join("../input/valid", "valid_fold_"+str(fold)) 51 | checkpoint_file = os.path.join("weights", "resnet50_with_dropout", "fold_"+str(fold))+".hdf5" 52 | len_train, len_valid = 0, 0 53 | for dir in os.listdir(train_dir): 54 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 55 | for dir in os.listdir(valid_dir): 56 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 57 | train_model_imagedatagen(model=model, batch_size = batch_size, checkpoint_file=checkpoint_file, 58 | train_gen=train_gen[0].flow_from_directory(train_dir, shuffle=True, 59 | target_size=(input_size, input_size), batch_size=batch_size), 60 | valid_gen=train_gen[1].flow_from_directory(valid_dir, shuffle=True, 61 | target_size=(input_size, input_size), batch_size=batch_size), 62 | len_train=len_train, len_valid=len_valid, epochs=epochs) 63 | model.save_weights(os.path.join(model_save_file, str(fold) + ".hdf5")) 64 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 65 | target_size=(input_size, input_size), batch_size=batch_size) 66 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 67 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 68 | np.save(os.path.join(result_save_dir, "result_" + str(fold) + ".npy")) 69 | y_full_test.append(result) 70 | 71 | p_test = y_full_test[0] 72 | for i in range(1, kfold): 73 | p_test += y_full_test 74 | 75 | p_test /= kfold 76 | 77 | _predict(p_test, pred_filenames) 78 | -------------------------------------------------------------------------------- /train_vgg19.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import os 4 | os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2" 5 | 6 | from utils.model_util import train_model_imagedatagen 7 | from utils.img_preprocessing import get_img_gen 8 | import pandas as pd 9 | from keras.optimizers import Adam, SGD 10 | from utils.model_util import load_vgg19 11 | from keras.utils.training_utils import multi_gpu_model 12 | from utils.file_util import write_result2df_json 13 | import numpy as np 14 | 15 | kfold = 5 16 | input_size = 512 17 | input_channels = 3 18 | batch_size = 15 19 | epochs = 50 20 | gpus = 3 21 | 22 | df_train_all = pd.read_csv("../output/train_label.csv") 23 | df_valid = pd.read_csv("../output/validation_label.csv") 24 | model_save_file = "weight/vgg16" 25 | result_save_dir = "../result/vgg16" 26 | 27 | def _predict(result, pred_filenames): 28 | converted_result = np.zeros_like(result) # real probability after converted 29 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 30 | print(mapping) 31 | for j in range(80): 32 | print(mapping[j]) 33 | converted_result[:, mapping[j]] = result[:, j] 34 | print(converted_result.shape) 35 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/vgg19/test_filename.csv", index=None) 36 | np.save("../result/vgg19/real_test_2_result.npy", result) 37 | 38 | write_result2df_json(result, df_test=pd.DataFrame({"filename": pred_filenames}), 39 | df_filename="../result/vgg19/real_test_2_result.csv", 40 | json_filename="../result/vgg19/real_test_2_result.json") 41 | 42 | model = load_vgg19(input_size=input_size) 43 | 44 | adam_optimizer = Adam(lr=1e-4) 45 | sgd_optimizer = SGD(lr=5*1e-4, momentum=0.8, decay=1e-6) 46 | 47 | train_gen = get_img_gen() 48 | y_full_test = [] 49 | 50 | for fold in range(kfold): 51 | train_dir = os.path.join("../input/train", "train_fold_" + str(fold)) 52 | valid_dir = os.path.join("../input/valid", "valid_fold_" + str(fold)) 53 | checkpoint_file = os.path.join("weights", "vgg16", "fold_" + str(fold)) + ".hdf5" 54 | len_train, len_valid = 0, 0 55 | for dir in os.listdir(train_dir): 56 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 57 | for dir in os.listdir(valid_dir): 58 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 59 | train_model_imagedatagen( 60 | model=multi_gpu_model(model, gpus=gpus), 61 | optimizer=sgd_optimizer, 62 | checkpoint_file=checkpoint_file, 63 | train_gen=train_gen[0].flow_from_directory(train_dir, 64 | shuffle=True,target_size=(input_size, input_size), batch_size=batch_size), 65 | valid_gen = train_gen[1].flow_from_directory(valid_dir, 66 | shuffle=True,target_size=(input_size,input_size), batch_size=batch_size), 67 | len_train=len_train, len_valid=len_valid, batch_size=batch_size, epochs=50) 68 | model.save_weights(os.path.join(model_save_file, str(fold) + ".hdf5")) 69 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 70 | target_size=(input_size, input_size), batch_size=batch_size) 71 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 72 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 73 | np.save(os.path.join(result_save_dir, "result_" + str(fold) + ".npy")) 74 | y_full_test.append(result) 75 | 76 | p_test = y_full_test[0] 77 | for i in range(1, kfold): 78 | p_test += y_full_test 79 | 80 | p_test /= kfold 81 | 82 | _predict(p_test, pred_filenames) -------------------------------------------------------------------------------- /train_vgg16.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import pandas as pd 4 | from utils.model_util import load_vgg16 5 | from keras.optimizers import SGD, Adam 6 | from utils.model_util import train_model_imagedatagen 7 | from utils.img_preprocessing import get_img_gen 8 | from keras.utils.training_utils import multi_gpu_model 9 | from utils.file_util import write_result2df_json 10 | import numpy as np 11 | import os 12 | 13 | input_size = 512 14 | input_channel = 3 15 | batch_size = 15 16 | gpus = 3 17 | n_classes = 80 18 | kfold = 5 19 | epochs = 50 20 | 21 | df_train_all = pd.read_csv("../output/train_label.csv") 22 | df_valid = pd.read_csv("../output/validation_label.csv") 23 | df_test = pd.read_csv("../output/sample_submission.csv") 24 | model_save_file = "weights/vgg16" 25 | result_save_dir = "../result/vgg16" 26 | 27 | def _predict(result, pred_filenames): 28 | converted_result = np.zeros_like(result) # real probability after converted 29 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 30 | print(mapping) 31 | for j in range(80): 32 | print(mapping[j]) 33 | converted_result[:, mapping[j]] = result[:, j] 34 | print(converted_result.shape) 35 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/vgg16/test_filename.csv", index=None) 36 | np.save("../result/vgg16/real_test_2_result.npy", result) 37 | 38 | write_result2df_json(result, df_test=pd.DataFrame({"filename": pred_filenames}), 39 | df_filename="../result/vgg16/real_test_2_result.csv", 40 | json_filename="../result/vgg16/real_test_2_result.json") 41 | 42 | model = load_vgg16(input_size) 43 | adam_optimizer = Adam(lr=1e-4) 44 | sgd_optimizer = SGD(lr=1e-4, momentum=0.9, decay=1e-6) 45 | train_gen = get_img_gen() 46 | y_full_test = [] 47 | 48 | for fold in range(kfold): 49 | train_dir = os.path.join("../input/train", "train_fold_" + str(fold)) 50 | valid_dir = os.path.join("../input/valid", "valid_fold_" + str(fold)) 51 | checkpoint_file = os.path.join("weights", "vgg16", "fold_" + str(fold)) + ".hdf5" 52 | len_train, len_valid = 0, 0 53 | for dir in os.listdir(train_dir): 54 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 55 | for dir in os.listdir(valid_dir): 56 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 57 | train_model_imagedatagen( 58 | model=multi_gpu_model(model, gpus=gpus), 59 | optimizer=sgd_optimizer, 60 | checkpoint_file=checkpoint_file, 61 | train_gen=train_gen[0].flow_from_directory(train_dir, shuffle=True, 62 | target_size=(input_size, input_size), batch_size=batch_size), 63 | valid_gen=get_img_gen()[1].flow_from_directory(valid_dir, shuffle=True, 64 | target_size=(input_size, input_size), batch_size=batch_size), 65 | len_train=len_train, len_valid=len_valid, batch_size=batch_size, epochs=epochs) 66 | model.save_weights(os.path.join(model_save_file, str(fold) + ".hdf5")) 67 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 68 | target_size=(input_size, input_size), batch_size=batch_size) 69 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 70 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 71 | np.save(os.path.join(result_save_dir, "result_" + str(fold) + ".npy")) 72 | y_full_test.append(result) 73 | 74 | p_test = y_full_test[0] 75 | for i in range(1, kfold): 76 | p_test += y_full_test 77 | 78 | p_test /= kfold 79 | 80 | _predict(p_test, pred_filenames) -------------------------------------------------------------------------------- /train_inceptionv3.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | from utils.model_util import train_model_imagedatagen 4 | from utils.model_util import load_inceptionV3_model 5 | from utils.img_preprocessing import get_img_gen 6 | import pandas as pd 7 | import numpy as np 8 | import os 9 | from keras.optimizers import Adam, SGD 10 | from utils.file_util import write_result2df_json 11 | from keras.utils.training_utils import multi_gpu_model 12 | 13 | input_size = 512 14 | input_channels = 3 15 | batch_size = 15 16 | epochs = 50 17 | gpus = 3 18 | kfold = 5 # five fold cross validation 19 | 20 | df_train_all = pd.read_csv("../output/filtered_train.csv") 21 | df_valid = pd.read_csv("../output/validation_label.csv") 22 | df_test = pd.read_csv("../output/sample_submission.csv") 23 | model_save_file = "weight/inceptionV3" 24 | result_save_dir = "../result/inceptionV3" 25 | 26 | train_gen = get_img_gen() 27 | 28 | def _predict(result, pred_filenames): 29 | converted_result = np.zeros_like(result) # real probability after converted 30 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 31 | print(mapping) 32 | for j in range(80): 33 | print(mapping[j]) 34 | converted_result[:, mapping[j]] = result[:, j] 35 | print(converted_result.shape) 36 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/inceptionV3/test_filename.csv", index=None) 37 | np.save("../result/inceptionV3/real_test_2_result.npy", result) 38 | 39 | write_result2df_json(result, df_test=pd.DataFrame({"filename": pred_filenames}), 40 | df_filename="../result/inceptionV3/real_test_2_result.csv", 41 | json_filename="../result/inceptionV3/real_test_2_result.json") 42 | 43 | model = load_inceptionV3_model(input_size=input_size) 44 | 45 | adam_optimizer = Adam(lr=5*1e-4) 46 | sgd_optimizer = SGD(lr=1e-4, decay=1e-6, momentum=0.8) 47 | y_full_test = [] 48 | 49 | for fold in range(kfold): 50 | train_dir = os.path.join("../input/train", "train_fold_"+str(fold)) 51 | valid_dir = os.path.join("../input/valid", "valid_fold_"+str(fold)) 52 | checkpoint_file = os.path.join("weights", "inceptionV3", "fold_"+str(fold))+".hdf5" 53 | len_train, len_valid = 0, 0 54 | for dir in os.listdir(train_dir): 55 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 56 | for dir in os.listdir(valid_dir): 57 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 58 | train_model_imagedatagen(model=multi_gpu_model(model, gpus=gpus), batch_size = batch_size, 59 | checkpoint_file=checkpoint_file, 60 | train_gen=train_gen[0].flow_from_directory(train_dir, shuffle=True, 61 | target_size=(input_size, input_size), batch_size=batch_size), 62 | valid_gen=train_gen[1].flow_from_directory(valid_dir, shuffle=True, 63 | target_size=(input_size, input_size), batch_size=batch_size), 64 | len_train=len(df_train_all), len_valid=len(df_valid), epochs=epochs, optimizer=sgd_optimizer) 65 | model.save_weights(os.path.join(model_save_file, str(fold)+".hdf5")) 66 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 67 | target_size=(input_size, input_size), batch_size=batch_size) 68 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 69 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 70 | np.save(os.path.join(result_save_dir, "result_"+str(fold)+".npy")) 71 | y_full_test.append(result) 72 | 73 | p_test = y_full_test[0] 74 | for i in range(1, kfold): 75 | p_test += y_full_test 76 | 77 | p_test /= kfold 78 | 79 | _predict(p_test, pred_filenames) 80 | -------------------------------------------------------------------------------- /train_inception_renset_v2.py: -------------------------------------------------------------------------------- 1 | __author__ = "ben" 2 | 3 | import os 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2" 5 | 6 | from utils.img_preprocessing import get_img_gen 7 | import pandas as pd 8 | import numpy as np 9 | from keras.optimizers import SGD,Adam 10 | from utils.file_util import write_result2df_json 11 | from utils.model_util import load_inception_resnetv2, train_model_imagedatagen 12 | from keras.utils.training_utils import multi_gpu_model 13 | 14 | input_size = 512 15 | input_channels = 3 16 | batch_size = 10 17 | epochs = 30 18 | gpus = 3 19 | kfold = 5 20 | 21 | df_train_all = pd.read_csv("../output/train_label.csv") 22 | df_valid = pd.read_csv("../output/validation_label.csv") 23 | df_test = pd.read_csv("../output/sample_submission.csv") 24 | train_path = "../input/train" 25 | valid_path = "../input/valid" 26 | mapping = pd.read_csv("../input/mapping.txt",header=None, names=["maps"]).maps.tolist() 27 | model_save_file = "weight/inception_resnet_v2" 28 | result_save_dir = "../result/inception_resnet_v2" 29 | 30 | train_gen = get_img_gen() 31 | 32 | def _predict(result, pred_filenames): 33 | converted_result = np.zeros_like(result) # real probability after converted 34 | mapping = pd.read_csv("../input/mapping.txt", header=None, names=["maps"]).maps.tolist() 35 | print(mapping) 36 | for j in range(80): 37 | print(mapping[j]) 38 | converted_result[:, mapping[j]] = result[:, j] 39 | print(converted_result.shape) 40 | pd.DataFrame({"filename": pred_filenames}).to_csv("../result/inception_resnet_v2/test_filename.csv", index=None) 41 | np.save("../result/inception_resnet_v2/test_result.npy", converted_result) 42 | 43 | write_result2df_json(converted_result, df_test=pd.DataFrame({"filename": pred_filenames}), 44 | df_filename="../result/inception_resnet_v2/test_result.csv", 45 | json_filename="../result/inception_resnet_v2/test_result.json") 46 | 47 | model = load_inception_resnetv2(input_size=input_size) 48 | 49 | # p_full = [] 50 | # for i in range(6): 51 | # model.evaluate_generator(train_gen[0].flow_from_directory("../input/valid_img", shuffle=True, 52 | # target_size=(input_size, input_size), batch_size=batch_size)) 53 | 54 | adam_optimizer = Adam(lr=1e-3) 55 | sgd_optimizer = SGD(lr=3*1e-5, decay=1e-6, momentum=0.8) 56 | y_full_test = [] 57 | 58 | for fold in range(kfold): 59 | train_dir = os.path.join("../input/train", "train_fold_"+str(fold)) 60 | valid_dir = os.path.join("../input/valid", "valid_fold_"+str(fold)) 61 | checkpoint_file = os.path.join("weights", "inception_resnet_v2", "fold_"+str(fold))+".hdf5" 62 | len_train, len_valid = 0, 0 63 | for dir in os.listdir(train_dir): 64 | len_train += len(os.listdir(os.path.join(train_dir, dir))) 65 | for dir in os.listdir(valid_dir): 66 | len_valid += len(os.listdir(os.path.join(valid_dir, dir))) 67 | train_model_imagedatagen(model=multi_gpu_model(model, gpus=gpus), optimizer=sgd_optimizer, batch_size = batch_size, 68 | checkpoint_file=checkpoint_file, 69 | train_gen=train_gen[1].flow_from_direcstory(train_dir, shuffle=True, 70 | target_size=(input_size, input_size), batch_size=batch_size), 71 | valid_gen=train_gen[1].flow_from_directory(valid_dir, shuffle=True, 72 | target_size=(input_size, input_size), batch_size=batch_size), 73 | len_train= len_train, len_valid=len_valid, epochs=epochs) 74 | 75 | model.save_weights(os.path.join(model_save_file, str(fold)+".hdf5")) 76 | predict_gen = train_gen[1].flow_from_directory("../input/test/test", shuffle=False, class_mode=None, 77 | target_size=(input_size, input_size), batch_size=batch_size) 78 | pred_filenames = [x.split("/")[1].split(".")[0] for x in predict_gen.filenames] 79 | result = model.predict_generator(generator=predict_gen, steps=(len(df_train_all) - 1) // batch_size + 1, verbose=1) 80 | np.save(os.path.join(result_save_dir, "result_"+str(fold)+".npy")) 81 | y_full_test.append(result) 82 | 83 | p_test = y_full_test[0] 84 | for i in range(1, kfold): 85 | p_test += y_full_test 86 | 87 | p_test /= kfold 88 | 89 | _predict(p_test, pred_filenames) 90 | -------------------------------------------------------------------------------- /predict_single_picture.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils.model_util import * 3 | from utils.result_util import map_result 4 | import os 5 | import pandas as pd 6 | from PIL import Image as pil_img 7 | from tqdm import tqdm 8 | 9 | input_size = 512 10 | input_channel = 3 11 | 12 | # return five rectangular crops for each picture 13 | def crop_1(img, rate=0.8): 14 | batch_imgs = [] 15 | height = np.asarray(img).shape[0] 16 | width = np.asarray(img).shape[1] 17 | 18 | # upper left 19 | batch_imgs.append( 20 | np.asarray(img.crop(box=(0, 0, int(width * rate), int(height * rate))). 21 | resize((input_size, input_size), resample=pil_img.BICUBIC)) 22 | ) 23 | 24 | # upper right 25 | batch_imgs.append( 26 | np.asarray(img.crop(box=(0, int(height * (1 - rate)), int(width * rate), height)). 27 | resize((input_size, input_size), resample=pil_img.BICUBIC)) 28 | ) 29 | 30 | # buttom left 31 | batch_imgs.append( 32 | np.asarray(img.crop(box=(int(width * (1 - rate)), 0, width, int(height * rate))). 33 | resize((input_size, input_size), resample=pil_img.BICUBIC)) 34 | ) 35 | 36 | # buttom right 37 | batch_imgs.append( 38 | np.asarray(img.crop(box=(int(width * (1 - rate)), int(height * (1 - rate)), width, height)). 39 | resize((input_size, input_size), resample=pil_img.BICUBIC)) 40 | ) 41 | 42 | # center 43 | batch_imgs.append( 44 | np.asarray(img.crop(box=( 45 | int(width * rate / 2), int(height * rate / 2), int(width * (1 - rate / 2)), int(height * (1 - rate / 2)))). 46 | resize((input_size, input_size), resample=pil_img.BICUBIC)) 47 | ) 48 | print(np.array(batch_imgs).shape) 49 | return np.array(batch_imgs) 50 | 51 | # return a square crop with edge length equals the shorter length of origional image 52 | def crop_2(img): 53 | src_array = np.asarray(img) 54 | height = src_array.shape[0] 55 | width = src_array.shape[1] 56 | min_len = min(height, width) 57 | if height > width: 58 | left = np.random.randint(0, height - width) 59 | return img.crop(box=(0, left, min_len, min_len + left)) 60 | elif height < width: 61 | right = np.random.randint(0, width - height) 62 | return img.crop(box=(right, 0, min_len + right, min_len)) 63 | else: 64 | return img 65 | 66 | img_prefix = "../input/valid_img/" 67 | dirs = os.listdir(img_prefix) 68 | 69 | origin_cnt = 0 70 | later_cnt = 0 71 | 72 | # vgg16, vgg19, resnet50, resnet50_with_dropout, inceptionV3, inception_resnet_v2 73 | model = load_inception_resnetv2(input_size=input_size) 74 | real_img_lable = pd.read_csv("../output/ordered_valid_result.csv", header=0) 75 | 76 | for dir in dirs: 77 | print("============================"+dir+"============================================") 78 | for _img in tqdm(os.listdir(os.path.join(img_prefix, dir))): 79 | img_name = os.path.join(img_prefix,dir,_img) 80 | img_id = img_name.split("_")[-1].split(".")[0] 81 | p_img = pil_img.open(img_name) 82 | 83 | img1 = np.asarray( 84 | np.expand_dims(p_img.resize((input_size, input_size), resample=pil_img.BICUBIC), axis=0) 85 | ) 86 | 87 | threshold = 0.5 # this threshold needs to change in order to get a higher accuracy in valid data 88 | result = model.predict(img1) 89 | if result[0, np.argsort(-result)[0, 0]] < threshold: 90 | croped_batch = crop_1(p_img, rate=0.6) # resized array, the two crop methods will be used together according to the accuracy 91 | result_1 = model.predict(croped_batch) 92 | mean_result = result_1[0] 93 | for i in range(1, result_1.shape[0]): 94 | mean_result += result_1[i] 95 | maped_mean = map_result(np.expand_dims(np.argsort(-mean_result), axis=0)) 96 | mean_result /= result_1.shape[0] 97 | 98 | arg_res1 = map_result(np.argsort(-mean_result)) 99 | else: 100 | arg_res = map_result(np.argsort(-result)) 101 | 102 | if real_img_lable.loc[real_img_lable.image_id == img_id, "lable_id"].tolist()[0] in list(maped_mean[0][:3]): 103 | later_cnt += 1 104 | if real_img_lable.loc[real_img_lable.image_id == img_id, "lable_id"].tolist()[0] in list(arg_res[0][:3]): 105 | origin_cnt += 1 106 | else: 107 | print("=====================" + img_name + "========================================") 108 | print(arg_res) 109 | # print(arg_res1[:, :3]) 110 | # for i in range(result_1.shape[0]): 111 | # print(result_1[i][false_result[i][0]], result_1[i][false_result[i][1]], result_1[i][false_result[i][2]]) 112 | print(map_result(np.expand_dims(np.argsort(-mean_result), axis=0))) 113 | 114 | print("origin cnt:",origin_cnt, "later cnt:",later_cnt) 115 | -------------------------------------------------------------------------------- /utils/model_util.py: -------------------------------------------------------------------------------- 1 | from keras.metrics import top_k_categorical_accuracy 2 | from keras.models import Model, Sequential 3 | from keras.applications.resnet50 import ResNet50 4 | from keras.applications.inception_resnet_v2 import InceptionResNetV2 5 | from keras.applications.inception_v3 import InceptionV3 6 | from keras.applications.vgg19 import VGG19 7 | from keras.applications.vgg16 import VGG16 8 | from keras.layers import Flatten, Dense, GlobalAveragePooling2D, BatchNormalization, Dropout 9 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint 10 | from keras.optimizers import Adam 11 | 12 | n_classes = 80 13 | input_channels = 3 14 | 15 | def top_3_accuracy(y_true, y_pred): 16 | return top_k_categorical_accuracy(y_true, y_pred, k=3) 17 | 18 | def load_vgg16(input_size): 19 | base_model = VGG16(include_top=False, input_shape=(input_size, input_size, input_channels), weights="imagenet") 20 | model = Sequential() 21 | model.add(BatchNormalization(input_shape=(input_size,input_size,input_channels))) 22 | model.add(base_model) 23 | model.add(Flatten()) 24 | model.add(Dense(n_classes, activation="softmax")) 25 | for layer in base_model.layers: 26 | layer.trainable = False # turn trainable flag to true when doing fine tuning 27 | return model 28 | 29 | def load_vgg19(input_size): 30 | base_model = VGG19(include_top=False, weights="imagenet", input_shape=(input_size, input_size, input_channels)) 31 | model = Sequential() 32 | model.add(BatchNormalization(input_shape=(input_size,input_size,input_channels))) 33 | model.add(base_model) 34 | model.add(Flatten()) 35 | model.add(Dense(n_classes, activation="softmax")) 36 | for layer in base_model.layers: 37 | layer.trainable = False # turn trainable flag to true when doing fine tuning 38 | model.summary() 39 | return model 40 | 41 | def load_inceptionV3_model(input_size): 42 | base_model = InceptionV3(include_top=False, weights="imagenet", 43 | input_shape=(input_size, input_size, input_channels)) 44 | x = GlobalAveragePooling2D()(base_model.output) 45 | predictions = Dense(n_classes, activation='softmax')(x) 46 | model = Model(input=base_model.input, output=predictions) 47 | for layer in base_model.layers: 48 | layer.trainable = False # turn trainable flag to true when doing fine tuning 49 | # for layer in base_model.layers: 50 | # layer.trainable = True 51 | print("training with inceptionV3") 52 | return model 53 | 54 | def load_inception_resnetv2(input_size): 55 | model = Sequential() 56 | model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels))) 57 | base_model = InceptionResNetV2(include_top=False, weights="imagenet", input_shape=(input_size, input_size, input_channels)) 58 | model.add(base_model) 59 | model.add(GlobalAveragePooling2D()) 60 | model.add(Dense(n_classes, activation="softmax")) 61 | 62 | # change the trainable to True when doing fine tuning 63 | # for layer in base_model.layers: 64 | # layer.trainable = True 65 | 66 | for layer in base_model.layers: 67 | layer.trainable = False # turn trainable flag to true when doing fine tuning 68 | model.summary() 69 | print("training with inception_resnetv2") 70 | return model 71 | 72 | def load_resnet50(input_size): 73 | base_model = ResNet50(include_top=False, weights="imagenet", input_shape=(input_size,input_size,input_channels)) 74 | model = Sequential() 75 | model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels))) 76 | model.add(base_model) 77 | model.add(Flatten()) 78 | model.add(Dense(80, activation="softmax")) 79 | 80 | # when doing fine tuning, change the trainable to True 81 | # for layer in base_model.layers: 82 | # layer.trainable = True 83 | 84 | for layer in base_model.layers: 85 | layer.trainable = False # turn trainable flag to true when doing fine tuning 86 | model.summary() 87 | return model 88 | 89 | def load_resnet50_with_dropout(input_size): 90 | model = Sequential() 91 | model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels))) 92 | base_model = ResNet50(include_top=False, weights="imagenet", input_shape=(input_size, input_size, input_channels)) 93 | model.add(base_model) 94 | model.add(Flatten()) 95 | model.add(Dense(2048, activation="relu")) 96 | model.add(Dropout(0.5)) # add a drop out layer 97 | model.add(Dense(80,activation="softmax")) 98 | for layer in base_model.layers: 99 | layer.trainable = False # turn trainable flag to true when doing fine tuning 100 | model.summary() 101 | print("training with resnet50 with dropout") 102 | return model 103 | 104 | def train_model_imagedatagen(model, checkpoint_file, train_gen, len_train, len_valid, epochs, 105 | batch_size, valid_gen, optimizer=Adam(lr=1e-4), initial_epoch=0): 106 | callbacks = [ 107 | EarlyStopping(monitor="val_loss", patience=10, verbose=1, min_delta=1e-4), 108 | ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=2, cooldown=2, verbose=1), 109 | ModelCheckpoint(filepath=checkpoint_file, save_best_only=True, save_weights_only=True) 110 | ] 111 | model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy", top_3_accuracy]) 112 | model.fit_generator(generator = train_gen, steps_per_epoch =((len_train - 1) // batch_size) + 1, epochs=epochs, 113 | callbacks=callbacks, validation_data=valid_gen, 114 | validation_steps=((len_valid - 1) // batch_size) + 1, verbose=1, initial_epoch=initial_epoch) 115 | 116 | return model -------------------------------------------------------------------------------- /utils/img_preprocessing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -* 2 | 3 | from keras.preprocessing.image import ImageDataGenerator 4 | from keras.preprocessing import image 5 | import numpy as np 6 | import cv2 7 | import math 8 | 9 | # keras.preprocessing.image源码简单修改，需要在keras.preprocessing.image.load_img函数中进行rescale之前先进行crop 10 | def preprocess_pointer(src): 11 | src_array = np.asarray(src) 12 | rate = np.random.randint(60,85) / 100 13 | 14 | # rate = 0.6 15 | height = src_array.shape[0] 16 | width = src_array.shape[1] 17 | 18 | # height_random = np.random.randint(0, int(height*(1-rate))) 19 | # width_random = np.random.randint(0, int(width*(1-rate))) 20 | # print(width_random, height_random, width_random+int(width*rate), height_random+int(height*rate)) 21 | # left, upper, right, lower 22 | # return src.crop(box=(width_random, height_random, width_random+int(width*rate), height_random+int(height*rate))) 23 | min_len = min(height, width) 24 | if height > width: 25 | left = np.random.randint(0, height-width) 26 | return src.crop(box=(0, left, min_len, min_len+left)) 27 | elif height < width: 28 | right = np.random.randint(0, width-height) 29 | return src.crop(box=(right, 0, min_len+right, min_len)) 30 | else: 31 | return src 32 | 33 | def get_img_gen(): 34 | 35 | ## crop操作修改 keras.preprocessing.image, 在里面加上preprocess_pointer函数。 36 | datagen1 = ImageDataGenerator( 37 | rotation_range=20, 38 | width_shift_range=0.2, 39 | height_shift_range=0.2, 40 | shear_range=0.1, 41 | zoom_range=0.1, 42 | horizontal_flip=True, 43 | vertical_flip=True, 44 | fill_mode='reflect' 45 | ) 46 | datagen2 = ImageDataGenerator() 47 | 48 | # set data augmentation parameters here 49 | datagen3 = ImageDataGenerator( 50 | rotation_range=50, 51 | width_shift_range=.25, 52 | height_shift_range=.25, 53 | shear_range=0.2, 54 | zoom_range=0.2, 55 | horizontal_flip=True, 56 | vertical_flip=True, 57 | fill_mode="reflect", 58 | channel_shift_range=50 59 | ) 60 | 61 | # normalization neccessary for correct image input to VGG16 62 | # datagen4.mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape(1, 1, 3) 63 | 64 | # no data augmentation for validation and test set 65 | # validgen = ImageDataGenerator(rescale=1., featurewise_center=True) 66 | # validgen.mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape(1, 1, 3) 67 | 68 | return datagen1, datagen2, datagen3 69 | 70 | 71 | def shift(x, wshift, hshift, row_axis=0, col_axis=1, channel_axis=2, fill_mode='nearest', cval=0.): 72 | h, w = x.shape[row_axis], x.shape[col_axis] 73 | tx = hshift * h 74 | ty = wshift * w 75 | translation_matrix = np.array([[1, 0, tx], 76 | [0, 1, ty], 77 | [0, 0, 1]]) 78 | transform_matrix = translation_matrix # no need to do offset 79 | x = image.apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 80 | return x 81 | 82 | def random_shift(img, mask, w_limit=(-0.1, 0.1), h_limit=(-0.1, 0.1), u=0.5): 83 | if np.random.random() < u: 84 | wshift = np.random.uniform(w_limit[0], w_limit[1]) 85 | hshift = np.random.uniform(h_limit[0], h_limit[1]) 86 | img = shift(img, wshift, hshift) 87 | mask = shift(mask, wshift, hshift) 88 | return img, mask 89 | 90 | 91 | def zoom(x, zx, zy, row_axis=0, col_axis=1, channel_axis=2, fill_mode='nearest', cval=0.): 92 | zoom_matrix = np.array([[zx, 0, 0], 93 | [0, zy, 0], 94 | [0, 0, 1]]) 95 | h, w = x.shape[row_axis], x.shape[col_axis] 96 | transform_matrix = image.transform_matrix_offset_center(zoom_matrix, h, w) 97 | x = image.apply_transform(x, transform_matrix, channel_axis, fill_mode, cval) 98 | return x 99 | 100 | 101 | def random_channel_shift(x, limit, channel_axis=2): 102 | x = np.rollaxis(x, channel_axis, 0) 103 | min_x, max_x = np.min(x), np.max(x) 104 | channel_images = [np.clip(x_ch + np.random.uniform(-limit, limit), min_x, max_x) for x_ch in x] 105 | x = np.stack(channel_images, axis=0) 106 | x = np.rollaxis(x, 0, channel_axis + 1) 107 | return x 108 | 109 | 110 | def random_gray(img, u=0.5): 111 | if np.random.random() < u: 112 | coef = np.array([[[0.114, 0.587, 0.299]]]) # rgb to gray (YCbCr) 113 | gray = np.sum(img * coef, axis=2) 114 | img = np.dstack((gray, gray, gray)) 115 | return img 116 | 117 | 118 | def random_contrast(img, limit=(-0.3, 0.3), u=0.5): 119 | if np.random.random() < u: 120 | alpha = 1.0 + np.random.uniform(limit[0], limit[1]) 121 | coef = np.array([[[0.114, 0.587, 0.299]]]) # rgb to gray (YCbCr) 122 | gray = img * coef 123 | gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray) 124 | img = alpha * img + gray 125 | img = np.clip(img, 0., 1.) 126 | return img 127 | 128 | 129 | def random_brightness(img, limit=(-0.3, 0.3), u=0.5): 130 | if np.random.random() < u: 131 | alpha = 1.0 + np.random.uniform(limit[0], limit[1]) 132 | img = alpha * img 133 | img = np.clip(img, 0., 1.) 134 | return img 135 | 136 | 137 | def random_saturation(img, limit=(-0.3, 0.3), u=0.5): 138 | if np.random.random() < u: 139 | alpha = 1.0 + np.random.uniform(limit[0], limit[1]) 140 | coef = np.array([[[0.114, 0.587, 0.299]]]) 141 | gray = img * coef 142 | gray = np.sum(gray, axis=2, keepdims=True) 143 | img = alpha * img + (1. - alpha) * gray 144 | img = np.clip(img, 0., 1.) 145 | return img 146 | 147 | 148 | def brightness_augment(img, factor=0.5): 149 | hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) #convert to hsv 150 | hsv = np.array(hsv, dtype=np.float64) 151 | hsv[:, :, 2] = hsv[:, :, 2] * (factor + np.random.uniform()) #scale channel V uniformly 152 | hsv[:, :, 2][hsv[:, :, 2] > 255] = 255 #reset out of range values 153 | rgb = cv2.cvtColor(np.array(hsv, dtype=np.uint8), cv2.COLOR_HSV2RGB) 154 | return rgb 155 | 156 | 157 | def color_normalize(img, meanstd): 158 | """ 159 | img为输入图像，meanstd为map{"mean": [m1, m2, m3], std: [s1, s2, s3]} 160 | :param img: 161 | :param meanstd: 162 | :return: 163 | """ 164 | dst = img 165 | for i in range(dst.shape[2]): 166 | dst[:, :, i] -= meanstd.mean[i] 167 | dst[:, :, i] /= meanstd.std[i] 168 | return dst 169 | 170 | 171 | def scale(img, size, interpolation=cv2.INTER_CUBIC): 172 | """ 173 | 用双立方插值法将原图像按照等比例缩放，小边的长度为size 174 | :param img: 175 | :param size: 176 | :param interpolation: 177 | :return: 178 | """ 179 | h, w = img.shape[0], img.shape[1] 180 | if (w <= h and w == size) or (h <= w and h == size): 181 | return img 182 | if w < h: 183 | return cv2.resize(img, dsize=(size, h/w * size), interpolation=interpolation) 184 | else: 185 | return cv2.resize(img, dsize=(w/h * size, size), interpolation=interpolation) 186 | 187 | def center_crop(img, size): 188 | """ 189 | 居中截取 190 | :param img: 191 | :param size: 192 | :return: 193 | """ 194 | w1 = math.ceil((img.shape[1]-size)/2) 195 | h1 = math.ceil((img.shape[0]-size)/2) 196 | return img[w1:w1+size, h1:h1+size, :] 197 | 198 | def random_crop(img, size, padding=0): 199 | """ 200 | 随机截取一个(size, size)大小的图片 201 | :param img: 202 | :param size: 203 | :param padding: 204 | :return: 205 | """ 206 | if padding > 0: 207 | temp = np.zeros((img.shape[0]+2*padding, img.shape[1]+2*padding, 3)) 208 | temp[padding+1:img.shape[0], padding+1:img.shape[1], :] = img 209 | img = temp 210 | 211 | h, w = img.shape[0], img.shape[1] 212 | if h == size and h == size: 213 | return img 214 | 215 | x1, y1 = np.random.randint(0, w-size), np.random.randint(0, h-size) 216 | output = img[y1:y1+size, x1:x1+size, :] 217 | return output 218 | 219 | def ten_crop(img, size): 220 | """ 221 | 截取4个角落和正中央的一部分，然后水平翻转和再次截取，每张图片截取10次返回 222 | :param img: 223 | :param size: 224 | :return: 225 | """ 226 | cen_crop = center_crop(img, size) 227 | w, h = img.shape[2], img.shape[2] 228 | output = [] 229 | for _img in [img, cv2.flip(img, flipCode=1)]: 230 | output.append(_img) 231 | output.append(_img[0:size, 0:size, :]) 232 | output.append(_img[w-size:w, 0:size, :]) 233 | output.append(_img[0:size, h-size:h, :]) 234 | output.append(_img[w-size:w, h-size:h, :]) 235 | 236 | return output.append(cen_crop) 237 | 238 | 239 | def randomScale(img, minSize, maxSize): 240 | """ 241 | 保留小边为targetSz，大边变为targetSz * 倍数，即等比例缩放 242 | :param img: 243 | :param minSize: 244 | :param maxSize: 245 | :return: 246 | """ 247 | w, h = img.shape[2], img.shape[1] 248 | targetSz = np.random.randint(minSize, maxSize) 249 | targetW, targetH = targetSz, targetSz 250 | if w < h: 251 | targetH = np.round(h / w * targetW) 252 | else: 253 | targetW = np.round(w / h * targetH) 254 | 255 | return cv2.resize(img, dsize=(targetW, targetH), interpolation=cv2.INTER_CUBIC) 256 | --------------------------------------------------------------------------------