├── IFE_1 ├── bin │ ├── gen_feat_test.sh │ ├── gen_feat_test001.sh │ ├── gen_feat_train_val.sh │ ├── gen_feat_train_val001.sh │ ├── preprocess.sh │ └── train.sh ├── conf │ ├── __pycache__ │ │ └── model001.cpython-36.pyc │ ├── model001.py │ └── model002.py ├── input │ └── readme.txt └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ ├── analyse_features.cpython-36.pyc │ │ ├── analyse_features3d.cpython-36.pyc │ │ └── make_submission.cpython-36.pyc │ ├── analyse_features.py │ ├── analyse_features3d.py │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── IFE_2 ├── bin │ ├── gen_feat_test.sh │ ├── gen_feat_test001.sh │ ├── gen_feat_train_val.sh │ ├── gen_feat_train_val001.sh │ └── train.sh ├── cache ├── conf │ ├── __pycache__ │ │ ├── model001.cpython-36.pyc │ │ ├── model002.cpython-36.pyc │ │ └── model003.cpython-36.pyc │ └── model001.py ├── input └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ ├── analyse_features.cpython-36.pyc │ │ ├── analyse_features3d.cpython-36.pyc │ │ ├── analyse_features_test.cpython-36.pyc │ │ └── make_submission.cpython-36.pyc │ ├── analyse_features.py │ ├── analyse_features3d.py │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── IFE_3 ├── bin │ ├── gen_feat_test.sh │ ├── gen_feat_test001.sh │ ├── gen_feat_train_val.sh │ ├── gen_feat_train_val001.sh │ └── train.sh ├── cache ├── conf │ ├── .model001.py.swp │ ├── __pycache__ │ │ └── model001.cpython-36.pyc │ └── model001.py ├── input └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ ├── analyse_features3d.cpython-36.pyc │ │ └── make_submission.cpython-36.pyc │ ├── analyse_features.py │ ├── analyse_features3d.py │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── README.md ├── cls_1 ├── bin │ ├── .train001_1.sh.swp │ ├── predict.sh │ └── train.sh ├── cache ├── conf │ ├── __pycache__ │ │ ├── model001.cpython-36.pyc │ │ ├── model001_0.cpython-36.pyc │ │ ├── model001_1.cpython-36.pyc │ │ ├── model001_2.cpython-36.pyc │ │ ├── model001_3.cpython-36.pyc │ │ └── model001_4.cpython-36.pyc │ ├── model001_0.py │ ├── model001_1.py │ ├── model001_2.py │ ├── model001_3.py │ ├── model001_4.py │ ├── model002_0.py │ ├── model002_1.py │ ├── model002_2.py │ ├── model002_3.py │ └── model002_4.py ├── input3d_model001 ├── input3d_model002 ├── input_model001 ├── input_model002 └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── models │ │ ├── __pycache__ │ │ │ └── model.cpython-36.pyc │ │ └── model.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ └── make_submission.cpython-36.pyc │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── cls_2 ├── bin │ ├── predict.sh │ └── train.sh ├── cache ├── conf │ ├── __pycache__ │ │ ├── model001.cpython-36.pyc │ │ ├── model001_0.cpython-36.pyc │ │ ├── model001_1.cpython-36.pyc │ │ ├── model001_2.cpython-36.pyc │ │ ├── model001_3.cpython-36.pyc │ │ └── model001_4.cpython-36.pyc │ ├── model001_0.py │ ├── model001_1.py │ ├── model001_2.py │ ├── model001_3.py │ └── model001_4.py ├── input └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── models │ │ ├── __pycache__ │ │ │ └── model.cpython-36.pyc │ │ └── model.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ └── make_submission.cpython-36.pyc │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── cls_3 ├── bin │ ├── .train001_1.sh.swp │ ├── predict.sh │ └── train.sh ├── cache ├── conf │ ├── __pycache__ │ │ ├── model001.cpython-36.pyc │ │ ├── model001_0.cpython-36.pyc │ │ ├── model001_1.cpython-36.pyc │ │ ├── model001_2.cpython-36.pyc │ │ ├── model001_3.cpython-36.pyc │ │ └── model001_4.cpython-36.pyc │ ├── model001_0.py │ ├── model001_1.py │ ├── model001_2.py │ ├── model001_3.py │ └── model001_4.py ├── input3d_model001 ├── input_model001 └── src │ ├── cnn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── factory.cpython-36.pyc │ │ └── main.cpython-36.pyc │ ├── dataset │ │ ├── __pycache__ │ │ │ └── custom_dataset.cpython-36.pyc │ │ └── custom_dataset.py │ ├── factory.py │ ├── main.py │ ├── models │ │ ├── __pycache__ │ │ │ └── model.cpython-36.pyc │ │ └── model.py │ ├── transforms │ │ ├── __pycache__ │ │ │ └── transforms.cpython-36.pyc │ │ └── transforms.py │ └── utils │ │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── logger.cpython-36.pyc │ │ └── util.cpython-36.pyc │ │ ├── config.py │ │ ├── logger.py │ │ └── util.py │ ├── postprocess │ ├── __pycache__ │ │ └── make_submission.cpython-36.pyc │ └── make_submission.py │ ├── preprocess │ ├── __pycache__ │ │ ├── create_dataset.cpython-36.pyc │ │ ├── dicom_to_dataframe.cpython-36.pyc │ │ └── make_folds.cpython-36.pyc │ ├── create_dataset.py │ ├── dicom_to_dataframe.py │ └── make_folds.py │ └── utils │ ├── __pycache__ │ ├── mappings.cpython-36.pyc │ └── misc.cpython-36.pyc │ ├── mappings.py │ └── misc.py ├── libs ├── P00_ensemble_csv.py └── ensemble.sh └── requirement.txt /IFE_1/bin/gen_feat_test.sh: -------------------------------------------------------------------------------- 1 | gpu=0 2 | ep=best 3 | tta=5 4 | 5 | 6 | for model in model001 model002 7 | do 8 | for fold in 0 1 2 3 4 9 | do 10 | sh ./bin/gen_feat_test001.sh ${model} ${fold} ${ep} ${tta} 11 | done 12 | done 13 | 14 | 15 | -------------------------------------------------------------------------------- /IFE_1/bin/gen_feat_test001.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | gpu=0 3 | fold=$2 4 | ep=$3 5 | tta=$4 6 | clip=1e-6 7 | conf=./conf/${model}.py 8 | 9 | snapshot=./model/${model}/fold${fold}_${ep}.pt 10 | test=./model/${model}/fold${fold}_${ep}_test_tta${tta}.pkl 11 | 12 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${test} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --genfeat 1 13 | 14 | 15 | 16 | output=./features/${model} 17 | output3d=./features3d/${model} 18 | mkdir ./features 19 | mkdir ./features3d 20 | mkdir ${output} 21 | mkdir ${output3d} 22 | 23 | for tta_id in 0 1 2 3 4 24 | do 25 | python -m src.postprocess.analyse_features3d ${conf} --pkl ${test} --output ${output} --ttaid ${tta_id} --istest 1 --fold ${fold} --output3d ${output3d} 26 | done 27 | 28 | -------------------------------------------------------------------------------- /IFE_1/bin/gen_feat_train_val.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | ep=best 3 | for modelfold in 0 1 2 3 4 4 | do 5 | for model in model001 model002 6 | do 7 | for datafold in 0 1 2 3 4 8 | do 9 | echo ${datafold} 10 | ./bin/gen_feat_train_val001.sh ${modelfold} ${datafold} ${ep} ${model} 11 | done 12 | done 13 | done 14 | -------------------------------------------------------------------------------- /IFE_1/bin/gen_feat_train_val001.sh: -------------------------------------------------------------------------------- 1 | model=$4 2 | gpu=0 3 | modelfold=$1 4 | datafold=$2 5 | ep=$3 6 | tta=1 7 | clip=1e-6 8 | conf=./conf/${model}.py 9 | 10 | snapshot=./model/${model}/fold${modelfold}_${ep}.pt 11 | valid=./model/${model}/fold${modelfold}_${ep}_datafold${datafold}_tta${tta}.pkl 12 | output=./features/${model} 13 | output3d=./features3d/${model} 14 | 15 | mkdir ./features 16 | mkdir ./features3d 17 | mkdir ${output} 18 | mkdir ${output3d} 19 | 20 | python -m src.cnn.main valid ${conf} --snapshot ${snapshot} --output ${valid} --n-tta ${tta} --fold ${datafold} --gpu ${gpu} --genfeat 1 21 | python -m src.postprocess.analyse_features3d ${conf} --pkl ${valid} --output ${output} --fold ${modelfold} --datafold ${datafold} --output3d ${output3d} 22 | -------------------------------------------------------------------------------- /IFE_1/bin/preprocess.sh: -------------------------------------------------------------------------------- 1 | mkdir -p cache model data/submission 2 | 3 | # train 4 | python -m src.preprocess.dicom_to_dataframe --input ./input/stage_2_train.csv --output ./cache/train_raw.pkl --imgdir ./input/stage_2_train_images 5 | python -m src.preprocess.create_dataset --input ./cache/train_raw.pkl --output ./cache/train.pkl 6 | 7 | for seed in 10 25 50 75 100 8 | do 9 | python -m src.preprocess.make_folds --input ./cache/train.pkl --output ./cache/train_folds_s${seed}.pkl --n-fold 5 --seed ${seed} 10 | done 11 | 12 | ln -s ../IFE_1/cache ../IFE_2/ 13 | ln -s ../IFE_1/cache ../IFE_3/ 14 | ln -s ../IFE_1/cache ../cls_1/ 15 | ln -s ../IFE_1/cache ../cls_2/ 16 | ln -s ../IFE_1/cache ../cls_3/ 17 | 18 | # test 19 | python -m src.preprocess.dicom_to_dataframe --input ./input/stage_2_sample_submission.csv --output ./cache/test_raw.pkl --imgdir ./input/stage_2_test_images 20 | python -m src.preprocess.create_dataset --input ./cache/test_raw.pkl --output ./cache/test.pkl 21 | -------------------------------------------------------------------------------- /IFE_1/bin/train.sh: -------------------------------------------------------------------------------- 1 | for model in model001 model002 2 | do 3 | conf=./conf/${model}.py 4 | for fold in 0 1 2 3 4 5 | do 6 | for epoch in 25 30 35 40 45 7 | do 8 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 9 | done 10 | done 11 | done 12 | 13 | -------------------------------------------------------------------------------- /IFE_1/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/input/readme.txt: -------------------------------------------------------------------------------- 1 | This folder contains the raw data provided by the competition. 2 | 3 | In stage 1, the raw data includes: 4 | 1. stage_1_train_images/ 5 | 2. stage_1_test_images/ 6 | 3. stage_1_train.csv 7 | 4. stage_1_sample_submission.csv 8 | 9 | 10 | -------------------------------------------------------------------------------- /IFE_1/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /IFE_1/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /IFE_1/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /IFE_1/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /IFE_1/src/postprocess/__pycache__/analyse_features.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/postprocess/__pycache__/analyse_features.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /IFE_1/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID' ,'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /IFE_1/src/preprocess/dicom_to_dataframe.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import functools 4 | import pickle 5 | from multiprocessing import Pool 6 | import copy 7 | 8 | import pydicom 9 | import pandas as pd 10 | from tqdm import tqdm 11 | import numpy as np 12 | np.seterr(over='ignore') 13 | 14 | from ..utils import misc 15 | 16 | 17 | def get_args(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--input', help='provided by kaggle, stage_1_train.csv for stage1') 20 | parser.add_argument('--output') 21 | parser.add_argument('--imgdir') 22 | parser.add_argument('--n-pool', default=4, type=int) 23 | parser.add_argument('--nrows', default=None, type=int) 24 | return parser.parse_args() 25 | 26 | 27 | def group_id_by_label(df): 28 | ids = {} 29 | for row in tqdm(df.itertuples(), total=len(df)): 30 | prefix, id, label = row.ID.split('_') 31 | id = '%s_%s' % (prefix, id) 32 | if id not in ids: 33 | ids[id] = [] 34 | if row.Label == 1: 35 | ids[id].append(label) 36 | return ids 37 | 38 | 39 | def remove_corrupted_images(ids): 40 | ids = ids.copy() 41 | for id in ['ID_6431af929']: 42 | try: 43 | ids.pop(id) 44 | except KeyError as e: 45 | print('%s not found' % id) 46 | else: 47 | print('removed %s' % id) 48 | 49 | return ids 50 | 51 | 52 | def create_record(item, dirname): 53 | 54 | id, labels = item 55 | 56 | path = '%s/%s.dcm' % (dirname, id) 57 | dicom = pydicom.dcmread(path) 58 | 59 | record = { 60 | 'ID': id, 61 | 'labels': ' '.join(labels), 62 | 'n_label': len(labels), 63 | } 64 | record.update(misc.get_dicom_raw(dicom)) 65 | 66 | raw = dicom.pixel_array 67 | slope = float(record['RescaleSlope']) 68 | intercept = float(record['RescaleIntercept']) 69 | center = misc.get_dicom_value(record['WindowCenter']) 70 | width = misc.get_dicom_value(record['WindowWidth']) 71 | 72 | image = misc.rescale_image(raw, slope, intercept) 73 | doctor = misc.apply_window(image, center, width) 74 | custom = misc.apply_window(image, 40, 80) 75 | 76 | record.update({ 77 | 'raw_max': raw.max(), 78 | 'raw_min': raw.min(), 79 | 'raw_mean': raw.mean(), 80 | 'raw_diff': raw.max() - raw.min(), 81 | 'doctor_max': doctor.max(), 82 | 'doctor_min': doctor.min(), 83 | 'doctor_mean': doctor.mean(), 84 | 'doctor_diff': doctor.max() - doctor.min(), 85 | 'custom_max': custom.max(), 86 | 'custom_min': custom.min(), 87 | 'custom_mean': custom.mean(), 88 | 'custom_diff': custom.max() - custom.min(), 89 | }) 90 | return record 91 | 92 | 93 | def create_df(ids, args): 94 | print('making records...') 95 | with Pool(args.n_pool) as pool: 96 | records = list(tqdm( 97 | iterable=pool.imap_unordered( 98 | functools.partial(create_record, dirname=args.imgdir), 99 | ids.items() 100 | ), 101 | total=len(ids), 102 | )) 103 | return pd.DataFrame(records).sort_values('ID').reset_index(drop=True) 104 | 105 | 106 | def main(): 107 | args = get_args() 108 | 109 | df_input = pd.read_csv(args.input, nrows=args.nrows) 110 | print('read %s (%d records)' % (args.input, len(df_input))) 111 | 112 | ids = group_id_by_label(df_input) 113 | ids = remove_corrupted_images(ids) 114 | 115 | df_output = create_df(ids, args) 116 | 117 | with open(args.output, 'wb') as f: 118 | pickle.dump(df_output, f) 119 | 120 | print('converted dicom to dataframe (%d records)' % len(df_output)) 121 | print('saved to %s' % args.output) 122 | 123 | 124 | if __name__ == '__main__': 125 | print(sys.argv) 126 | main() 127 | -------------------------------------------------------------------------------- /IFE_1/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /IFE_1/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_1/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_1/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /IFE_1/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /IFE_2/bin/gen_feat_test.sh: -------------------------------------------------------------------------------- 1 | gpu=0 2 | ep=best 3 | tta=5 4 | model=model001 5 | 6 | for fold in 0 1 2 3 4 7 | do 8 | sh ./bin/gen_feat_test001.sh ${model} ${fold} ${ep} ${tta} 9 | done 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /IFE_2/bin/gen_feat_test001.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | gpu=0 3 | fold=$2 4 | ep=$3 5 | tta=$4 6 | clip=1e-6 7 | conf=./conf/${model}.py 8 | 9 | snapshot=./model/${model}/fold${fold}_${ep}.pt 10 | test=./model/${model}/fold${fold}_${ep}_test_tta${tta}.pkl 11 | sub=./data/submission/${model}_fold${fold}_${ep}_test_tta${tta}.csv 12 | 13 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${test} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --genfeat 1 14 | 15 | 16 | 17 | output=./features/${model} 18 | mkdir ./features 19 | mkdir ${output} 20 | 21 | for tta_id in 0 1 2 3 4 22 | do 23 | 24 | python -m src.postprocess.analyse_features ${conf} --pkl ${test} --output ${output} --ttaid ${tta_id} --istest 1 --fold ${fold} 25 | 26 | done 27 | 28 | -------------------------------------------------------------------------------- /IFE_2/bin/gen_feat_train_val.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | ep=best 3 | model=model001 4 | for modelfold in 0 1 2 3 4 5 | do 6 | for datafold in 0 1 2 3 4 7 | do 8 | echo ${datafold} 9 | ./bin/gen_feat_train_val001.sh ${modelfold} ${datafold} ${ep} ${model} 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /IFE_2/bin/gen_feat_train_val001.sh: -------------------------------------------------------------------------------- 1 | model=model001 2 | gpu=0 3 | modelfold=$1 4 | datafold=$2 5 | ep=$3 6 | tta=1 7 | clip=1e-6 8 | conf=./conf/${model}.py 9 | 10 | snapshot=./model/${model}/fold${modelfold}_${ep}.pt 11 | valid=./model/${model}/fold${modelfold}_${ep}_datafold${datafold}_tta${tta}.pkl 12 | output=./features/${model} 13 | 14 | mkdir ./features 15 | mkdir ${output} 16 | 17 | python -m src.cnn.main valid ${conf} --snapshot ${snapshot} --output ${valid} --n-tta ${tta} --fold ${datafold} --gpu ${gpu} --genfeat 1 18 | python -m src.postprocess.analyse_features ${conf} --pkl ${valid} --output ${output} --fold ${modelfold} --datafold ${datafold} 19 | -------------------------------------------------------------------------------- /IFE_2/bin/train.sh: -------------------------------------------------------------------------------- 1 | model=model001 2 | conf=./conf/${model}.py 3 | for fold in 0 1 2 3 4 4 | do 5 | for epoch in 25 30 35 40 45 6 | do 7 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 8 | done 9 | done 10 | 11 | -------------------------------------------------------------------------------- /IFE_2/cache: -------------------------------------------------------------------------------- 1 | ../IFE_1/cache -------------------------------------------------------------------------------- /IFE_2/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/conf/__pycache__/model002.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/conf/__pycache__/model002.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/conf/__pycache__/model003.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/conf/__pycache__/model003.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/conf/model001.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001' 2 | seed = 100 3 | apex = True 4 | 5 | n_fold = 5 6 | epoch = 10 7 | resume_from = None 8 | 9 | batch_size = 28 10 | num_workers = 4 11 | imgsize = (512, 512) #(height, width) 12 | #imgsize = (448, 448) #(height, width) 13 | 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-4, 23 | ), 24 | ) 25 | 26 | model = dict( 27 | name='efficientnetB0', 28 | pretrained='imagenet', 29 | n_output=6, 30 | ) 31 | 32 | scheduler = dict( 33 | name='MultiStepLR', 34 | params=dict( 35 | milestones=[1,2,3,4,5,6], 36 | gamma=2/3, 37 | ), 38 | ) 39 | 40 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 41 | normalize = None 42 | 43 | crop = dict(name='RandomResizedCrop', params=dict(height=imgsize[0], width=imgsize[1], scale=(0.7,1.0), p=1.0)) 44 | resize = dict(name='Resize', params=dict(height=imgsize[0], width=imgsize[1])) 45 | hflip = dict(name='HorizontalFlip', params=dict(p=0.5,)) 46 | vflip = dict(name='VerticalFlip', params=dict(p=0.5,)) 47 | contrast = dict(name='RandomBrightnessContrast', params=dict(brightness_limit=0.08, contrast_limit=0.08, p=0.5)) 48 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 49 | rotate = dict(name='Rotate', params=dict(limit=30, border_mode=0), p=0.7) 50 | 51 | window_policy = 2 52 | 53 | data = dict( 54 | train=dict( 55 | dataset_type='CustomDataset', 56 | annotations='./cache/train_folds_s10.pkl', 57 | imgdir='./input/stage_1_train_images', 58 | imgsize=imgsize, 59 | n_grad_acc=1, 60 | loader=dict( 61 | shuffle=True, 62 | batch_size=batch_size, 63 | drop_last=True, 64 | num_workers=num_workers, 65 | pin_memory=False, 66 | ), 67 | transforms=[crop, hflip, rotate, contrast, totensor], 68 | dataset_policy='all', 69 | window_policy=window_policy, 70 | ), 71 | valid = dict( 72 | dataset_type='CustomDataset', 73 | annotations='./cache/train_folds_s10.pkl', 74 | imgdir='./input/stage_1_train_images', 75 | imgsize=imgsize, 76 | loader=dict( 77 | shuffle=False, 78 | batch_size=batch_size, 79 | drop_last=False, 80 | num_workers=num_workers, 81 | pin_memory=False, 82 | ), 83 | transforms=[crop, hflip, rotate, contrast, totensor], 84 | dataset_policy='all', 85 | window_policy=window_policy, 86 | ), 87 | test = dict( 88 | dataset_type='CustomDataset', 89 | annotations='./cache/test.pkl', 90 | imgdir='./input/stage_1_test_images', 91 | imgsize=imgsize, 92 | loader=dict( 93 | shuffle=False, 94 | batch_size=batch_size, 95 | drop_last=False, 96 | num_workers=num_workers, 97 | pin_memory=False, 98 | ), 99 | transforms=[crop, hflip, rotate, contrast, totensor], 100 | dataset_policy='all', 101 | window_policy=window_policy, 102 | ), 103 | ) 104 | -------------------------------------------------------------------------------- /IFE_2/input: -------------------------------------------------------------------------------- 1 | ../IFE_1/input -------------------------------------------------------------------------------- /IFE_2/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /IFE_2/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/dataset/custom_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import cv2 9 | import pydicom 10 | 11 | from .. import factory 12 | from ..utils.logger import log 13 | from ...utils import mappings, misc 14 | 15 | 16 | def apply_window_policy(image, row, policy): 17 | if policy == 1: 18 | image1 = misc.apply_window(image, 40, 80) # brain 19 | image2 = misc.apply_window(image, 80, 200) # subdural 20 | image3 = misc.apply_window(image, row.WindowCenter, row.WindowWidth) 21 | image1 = (image1 - 0) / 80 22 | image2 = (image2 - (-20)) / 200 23 | image3 = (image3 - image3.min()) / (image3.max()-image3.min()) 24 | image = np.array([ 25 | image1 - image1.mean(), 26 | image2 - image2.mean(), 27 | image3 - image3.mean(), 28 | ]).transpose(1,2,0) 29 | elif policy == 2: 30 | image1 = misc.apply_window(image, 40, 80) # brain 31 | image2 = misc.apply_window(image, 80, 200) # subdural 32 | image3 = misc.apply_window(image, 40, 380) # bone 33 | image1 = (image1 - 0) / 80 34 | image2 = (image2 - (-20)) / 200 35 | image3 = (image3 - (-150)) / 380 36 | image = np.array([ 37 | image1 - image1.mean(), 38 | image2 - image2.mean(), 39 | image3 - image3.mean(), 40 | ]).transpose(1,2,0) 41 | else: 42 | raise 43 | 44 | return image 45 | 46 | 47 | def apply_dataset_policy(df, policy): 48 | if policy == 'all': 49 | pass 50 | elif policy == 'pos==neg': 51 | df_positive = df[df.labels != ''] 52 | df_negative = df[df.labels == ''] 53 | df_sampled = df_negative.sample(len(df_positive)) 54 | df = pd.concat([df_positive, df_sampled], sort=False) 55 | else: 56 | raise 57 | log('applied dataset_policy %s (%d records)' % (policy, len(df))) 58 | 59 | return df 60 | 61 | 62 | class CustomDataset(torch.utils.data.Dataset): 63 | 64 | def __init__(self, cfg, folds): 65 | self.cfg = cfg 66 | 67 | log(f'dataset_policy: {self.cfg.dataset_policy}') 68 | log(f'window_policy: {self.cfg.window_policy}') 69 | 70 | self.transforms = factory.get_transforms(self.cfg) 71 | with open(cfg.annotations, 'rb') as f: 72 | self.df = pickle.load(f) 73 | 74 | if folds: 75 | self.df = self.df[self.df.fold.isin(folds)] 76 | log('read dataset (%d records)' % len(self.df)) 77 | 78 | self.df = apply_dataset_policy(self.df, self.cfg.dataset_policy) 79 | #self.df = self.df.sample(560) 80 | 81 | def __len__(self): 82 | return len(self.df) 83 | 84 | def __getitem__(self, idx): 85 | row = self.df.iloc[idx] 86 | 87 | path = '%s/%s.dcm' % (self.cfg.imgdir, row.ID) 88 | 89 | dicom = pydicom.dcmread(path) 90 | image = dicom.pixel_array 91 | image = misc.rescale_image(image, row.RescaleSlope, row.RescaleIntercept) 92 | image = apply_window_policy(image, row, self.cfg.window_policy) 93 | 94 | image = self.transforms(image=image)['image'] 95 | 96 | target = np.array([0.0] * len(mappings.label_to_num)) 97 | for label in row.labels.split(): 98 | cls = mappings.label_to_num[label] 99 | target[cls] = 1.0 100 | 101 | if hasattr(self.cfg, 'spread_diagnosis'): 102 | for label in row.LeftLabel.split() + row.RightLabel.split(): 103 | cls = mappings.label_to_num[label] 104 | target[cls] += self.cfg.propagate_diagnosis 105 | target = np.clip(target, 0.0, 1.0) 106 | 107 | return image, torch.FloatTensor(target), row.ID 108 | -------------------------------------------------------------------------------- /IFE_2/src/cnn/factory.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | from torch import nn 5 | from torch.utils.data import DataLoader 6 | import torch.optim 7 | from torch.optim import lr_scheduler 8 | import albumentations as A 9 | from albumentations.pytorch import ToTensor 10 | from efficientnet_pytorch import EfficientNet 11 | 12 | from .dataset.custom_dataset import CustomDataset 13 | from .transforms.transforms import RandomResizedCrop 14 | from .utils.logger import log 15 | 16 | 17 | def get_loss(cfg): 18 | #loss = getattr(nn, cfg.loss.name)(**cfg.loss.params) 19 | loss = getattr(nn, cfg.loss.name)(weight=torch.FloatTensor([2,1,1,1,1,1]).cuda(), **cfg.loss.params) 20 | log('loss: %s' % cfg.loss.name) 21 | return loss 22 | 23 | 24 | def get_dataloader(cfg, folds=None): 25 | dataset = CustomDataset(cfg, folds) 26 | log('use default(random) sampler') 27 | loader = DataLoader(dataset, **cfg.loader) 28 | return loader 29 | 30 | 31 | def get_transforms(cfg): 32 | def get_object(transform): 33 | if hasattr(A, transform.name): 34 | return getattr(A, transform.name) 35 | else: 36 | return eval(transform.name) 37 | transforms = [get_object(transform)(**transform.params) for transform in cfg.transforms] 38 | return A.Compose(transforms) 39 | 40 | 41 | def get_model(cfg): 42 | 43 | log(f'model: {cfg.model.name}') 44 | log(f'pretrained: {cfg.model.pretrained}') 45 | 46 | if cfg.model.name in ['resnext101_32x8d_wsl']: 47 | model = torch.hub.load('facebookresearch/WSL-Images', cfg.model.name) 48 | model.fc = torch.nn.Linear(2048, cfg.model.n_output) 49 | return model 50 | 51 | if cfg.model.name in ['efficientnetB0']: 52 | model = EfficientNet.from_pretrained('efficientnet-b0') 53 | model._fc = torch.nn.Linear(1280,cfg.model.n_output) 54 | return model 55 | 56 | 57 | 58 | 59 | def get_optim(cfg, parameters): 60 | optim = getattr(torch.optim, cfg.optim.name)(parameters, **cfg.optim.params) 61 | log(f'optim: {cfg.optim.name}') 62 | return optim 63 | 64 | 65 | def get_scheduler(cfg, optim, last_epoch): 66 | if cfg.scheduler.name == 'ReduceLROnPlateau': 67 | scheduler = lr_scheduler.ReduceLROnPlateau( 68 | optim, 69 | **cfg.scheduler.params, 70 | ) 71 | scheduler.last_epoch = last_epoch 72 | else: 73 | scheduler = getattr(lr_scheduler, cfg.scheduler.name)( 74 | optim, 75 | last_epoch=last_epoch, 76 | **cfg.scheduler.params, 77 | ) 78 | log(f'last_epoch: {last_epoch}') 79 | return scheduler 80 | 81 | -------------------------------------------------------------------------------- /IFE_2/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /IFE_2/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /IFE_2/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /IFE_2/src/postprocess/__pycache__/analyse_features.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/postprocess/__pycache__/analyse_features.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/postprocess/__pycache__/analyse_features_test.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/postprocess/__pycache__/analyse_features_test.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /IFE_2/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID' ,'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /IFE_2/src/preprocess/dicom_to_dataframe.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import functools 4 | import pickle 5 | from multiprocessing import Pool 6 | import copy 7 | 8 | import pydicom 9 | import pandas as pd 10 | from tqdm import tqdm 11 | import numpy as np 12 | np.seterr(over='ignore') 13 | 14 | from ..utils import misc 15 | 16 | 17 | def get_args(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--input', help='provided by kaggle, stage_1_train.csv for stage1') 20 | parser.add_argument('--output') 21 | parser.add_argument('--imgdir') 22 | parser.add_argument('--n-pool', default=4, type=int) 23 | parser.add_argument('--nrows', default=None, type=int) 24 | return parser.parse_args() 25 | 26 | 27 | def group_id_by_label(df): 28 | ids = {} 29 | for row in tqdm(df.itertuples(), total=len(df)): 30 | prefix, id, label = row.ID.split('_') 31 | id = '%s_%s' % (prefix, id) 32 | if id not in ids: 33 | ids[id] = [] 34 | if row.Label == 1: 35 | ids[id].append(label) 36 | return ids 37 | 38 | 39 | def remove_corrupted_images(ids): 40 | ids = ids.copy() 41 | for id in ['ID_6431af929']: 42 | try: 43 | ids.pop(id) 44 | except KeyError as e: 45 | print('%s not found' % id) 46 | else: 47 | print('removed %s' % id) 48 | 49 | return ids 50 | 51 | 52 | def create_record(item, dirname): 53 | 54 | id, labels = item 55 | 56 | path = '%s/%s.dcm' % (dirname, id) 57 | dicom = pydicom.dcmread(path) 58 | 59 | record = { 60 | 'ID': id, 61 | 'labels': ' '.join(labels), 62 | 'n_label': len(labels), 63 | } 64 | record.update(misc.get_dicom_raw(dicom)) 65 | 66 | raw = dicom.pixel_array 67 | slope = float(record['RescaleSlope']) 68 | intercept = float(record['RescaleIntercept']) 69 | center = misc.get_dicom_value(record['WindowCenter']) 70 | width = misc.get_dicom_value(record['WindowWidth']) 71 | 72 | image = misc.rescale_image(raw, slope, intercept) 73 | doctor = misc.apply_window(image, center, width) 74 | custom = misc.apply_window(image, 40, 80) 75 | 76 | record.update({ 77 | 'raw_max': raw.max(), 78 | 'raw_min': raw.min(), 79 | 'raw_mean': raw.mean(), 80 | 'raw_diff': raw.max() - raw.min(), 81 | 'doctor_max': doctor.max(), 82 | 'doctor_min': doctor.min(), 83 | 'doctor_mean': doctor.mean(), 84 | 'doctor_diff': doctor.max() - doctor.min(), 85 | 'custom_max': custom.max(), 86 | 'custom_min': custom.min(), 87 | 'custom_mean': custom.mean(), 88 | 'custom_diff': custom.max() - custom.min(), 89 | }) 90 | return record 91 | 92 | 93 | def create_df(ids, args): 94 | print('making records...') 95 | with Pool(args.n_pool) as pool: 96 | records = list(tqdm( 97 | iterable=pool.imap_unordered( 98 | functools.partial(create_record, dirname=args.imgdir), 99 | ids.items() 100 | ), 101 | total=len(ids), 102 | )) 103 | return pd.DataFrame(records).sort_values('ID').reset_index(drop=True) 104 | 105 | 106 | def main(): 107 | args = get_args() 108 | 109 | df_input = pd.read_csv(args.input, nrows=args.nrows) 110 | print('read %s (%d records)' % (args.input, len(df_input))) 111 | 112 | ids = group_id_by_label(df_input) 113 | ids = remove_corrupted_images(ids) 114 | 115 | df_output = create_df(ids, args) 116 | 117 | with open(args.output, 'wb') as f: 118 | pickle.dump(df_output, f) 119 | 120 | print('converted dicom to dataframe (%d records)' % len(df_output)) 121 | print('saved to %s' % args.output) 122 | 123 | 124 | if __name__ == '__main__': 125 | print(sys.argv) 126 | main() 127 | -------------------------------------------------------------------------------- /IFE_2/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /IFE_2/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_2/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_2/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /IFE_2/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /IFE_3/bin/gen_feat_test.sh: -------------------------------------------------------------------------------- 1 | gpu=0 2 | ep=best 3 | tta=5 4 | model=model001 5 | 6 | for fold in 0 1 2 3 4 7 | do 8 | sh ./bin/gen_feat_test001.sh ${model} ${fold} ${ep} ${tta} 9 | done 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /IFE_3/bin/gen_feat_test001.sh: -------------------------------------------------------------------------------- 1 | model=$1 2 | gpu=0 3 | fold=$2 4 | ep=$3 5 | tta=$4 6 | clip=1e-6 7 | conf=./conf/${model}.py 8 | 9 | snapshot=./model/${model}/fold${fold}_${ep}.pt 10 | test=./model/${model}/fold${fold}_${ep}_test_tta${tta}.pkl 11 | 12 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${test} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --genfeat 1 13 | 14 | 15 | 16 | output=./features/${model} 17 | output3d=./features3d/${model} 18 | mkdir ./features 19 | mkdir ./features3d 20 | mkdir ${output} 21 | mkdir ${output3d} 22 | 23 | for tta_id in 0 1 2 3 4 24 | do 25 | 26 | 27 | python -m src.postprocess.analyse_features3d ${conf} --pkl ${test} --output ${output} --ttaid ${tta_id} --istest 1 --fold ${fold} --output3d ${output3d} 28 | done 29 | 30 | -------------------------------------------------------------------------------- /IFE_3/bin/gen_feat_train_val.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | ep=best 3 | model=model001 4 | for modelfold in 0 1 2 3 4 5 | do 6 | for datafold in 0 1 2 3 4 7 | do 8 | echo ${datafold} 9 | ./bin/gen_feat_train_val001.sh ${modelfold} ${datafold} ${ep} ${model} 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /IFE_3/bin/gen_feat_train_val001.sh: -------------------------------------------------------------------------------- 1 | model=$4 2 | gpu=0 3 | modelfold=$1 4 | datafold=$2 5 | ep=$3 6 | tta=1 7 | clip=1e-6 8 | conf=./conf/${model}.py 9 | 10 | snapshot=./model/${model}/fold${modelfold}_${ep}.pt 11 | valid=./model/${model}/fold${modelfold}_${ep}_datafold${datafold}_tta${tta}.pkl 12 | output=./features/${model} 13 | output3d=./features3d/${model} 14 | 15 | mkdir ./features 16 | mkdir ./features3d 17 | mkdir ${output} 18 | mkdir ${output3d} 19 | 20 | python -m src.cnn.main valid ${conf} --snapshot ${snapshot} --output ${valid} --n-tta ${tta} --fold ${datafold} --gpu ${gpu} --genfeat 1 21 | python -m src.postprocess.analyse_features3d ${conf} --pkl ${valid} --output ${output} --fold ${modelfold} --datafold ${datafold} --output3d ${output3d} 22 | -------------------------------------------------------------------------------- /IFE_3/bin/train.sh: -------------------------------------------------------------------------------- 1 | model=model001 2 | conf=./conf/${model}.py 3 | for fold in 0 1 2 3 4 4 | do 5 | for epoch in 25 30 35 40 45 6 | do 7 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 8 | done 9 | done 10 | 11 | -------------------------------------------------------------------------------- /IFE_3/cache: -------------------------------------------------------------------------------- 1 | ../IFE_1/cache -------------------------------------------------------------------------------- /IFE_3/conf/.model001.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/conf/.model001.py.swp -------------------------------------------------------------------------------- /IFE_3/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/input: -------------------------------------------------------------------------------- 1 | ../IFE_1/input -------------------------------------------------------------------------------- /IFE_3/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /IFE_3/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /IFE_3/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /IFE_3/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /IFE_3/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/postprocess/__pycache__/analyse_features3d.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /IFE_3/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /IFE_3/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /IFE_3/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/IFE_3/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /IFE_3/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /IFE_3/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Hello! 2 | 3 | Below you can find a outline of how to reproduce my solution for the RSNA Intracranial Hemorrhage Detection competition.
4 | 5 | Visit kaggle forum for solution overview: [Kaggle RSNA Intracranial Hemorrhage Detection: 4th Place Solution](https://www.kaggle.com/c/rsna-intracranial-hemorrhage-detection/discussion/118249#latest-678438) 6 | 7 | Our code is based on Appian's repo: https://github.com/appian42/kaggle-rsna-intracranial-hemorrhage 8 | 9 | # HARDWARE 10 | 11 | * Ubuntu 16.04 12 | * NVIDIA 2080Ti 13 | 14 | # SOFTWARE 15 | (python packages are detailed separately in `requirements.txt`) 16 | * Python 3.6.7 17 | * CUDA 10.1 18 | * CUDNN 7501 19 | * NVIDIA Drivers 418.67 20 | 21 | # START 22 | 1. Setup environment
23 | 2. Place the raw data into ./IFE_1/input folder.
24 | 1. The test data correspond to the test data provided in the Stage 2 of competition.
25 | 2. Use stage 2 training data to train the model.
26 | 3. cd IFE_1, run ./bin/preprocess.py to preprocess the training and test images and split the training data into five folds.
27 | 4. To train:
28 | 1. Train feature extraction models
29 | * Go to IFE_1, IFE_2, IFE_3, run ./bin/train.sh to train five fold models. Models are saved in /model/. Best models are saved as foldx_best.pt.
30 | * It will take about 24 ~ 48 hours to train each model for one fold.
31 | 2. Extract features
32 | * Go to IFE_1, IFE_2, IFE_3, run ./bin/gen_feat_train.sh and ./bin/gen_feat_test.sh to generate 1D (and 3D features). Use the best models generated from step 4.1.1.
33 | * It will take around 5 hours to extract one feature set (train/test TTA5).
34 | 3. Train classification models.
35 | * Go to folder cls_1, cls_2, cls_3, run ./bin/train.sh, train five fold models for each folder.
36 | * It will take around 3 hours to train 1D+3D model (single model), and around 1.5 hours to train 1D model (single model).
37 | 5. To infer:
38 | 1. Extract test features.
39 | * Go to folder IFE_1, IFE_2, IFE_3, run ./bin/gen_feat_test.sh to extract test features.
40 | 2. Predict classification probabilities
41 | * Go to folder cls_1, cls_2, cls_3, run ./bin/predict.sh to predict result using extracted features.
42 | 3. Ensemble
43 | * run ./libs/ensemble.sh to ensemble all the predictions.
44 | 6. Models and features are generated in sequence. If one follows the above mentioned steps in order, all the softlinks should be valid by the time they are referred.
45 | -------------------------------------------------------------------------------- /cls_1/bin/.train001_1.sh.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/bin/.train001_1.sh.swp -------------------------------------------------------------------------------- /cls_1/bin/predict.sh: -------------------------------------------------------------------------------- 1 | ep=best 2 | gpu=0 3 | tta=1 4 | clip=1e-6 5 | 6 | for model in model001 model002 7 | do 8 | for fold in 0 1 2 3 4 9 | do 10 | conf=./conf/${model}_${fold}.py 11 | snapshot=./model/${model}/fold${fold}_${ep}.pt 12 | 13 | for tta_id in 0 1 2 3 4 14 | do 15 | output=./model/${model}/fold${fold}_${ep}_test_tta${tta}_${tta_id}.pkl 16 | submission=./data/submission/${model}_fold${fold}_${ep}_test_tta${tta}_${tta_id}.csv 17 | 18 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${output} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --ttaid ${tta_id} 19 | python -m src.postprocess.make_submission --input ${test} --output ${submission} --clip ${clip} --sample_submission ../IFE_1/input/stage_2_sample_submission.csv 20 | done 21 | 22 | done 23 | done 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /cls_1/bin/train.sh: -------------------------------------------------------------------------------- 1 | for model in model001 model002 2 | do 3 | for fold in 0 1 2 3 4 4 | do 5 | conf=./conf/${model}_${fold}.py 6 | for epoch in 25 30 35 40 45 7 | do 8 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 9 | done 10 | done 11 | done 12 | 13 | -------------------------------------------------------------------------------- /cls_1/cache: -------------------------------------------------------------------------------- 1 | ../IFE_1/cache -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001_0.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001_0.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001_1.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001_2.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001_3.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/__pycache__/model001_4.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/conf/__pycache__/model001_4.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/conf/model001_0.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_0' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold0_train' 5 | testdir = './input_model001/fold0_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model001_1.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_1' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold1_train' 5 | testdir = './input_model001/fold1_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model001_2.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_2' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold2_train' 5 | testdir = './input_model001/fold2_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model001_3.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_3' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold3_train' 5 | testdir = './input_model001/fold3_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model001_4.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_4' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold4_train' 5 | testdir = './input_model001/fold4_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model002_0.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model002_0' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model002/fold0_train' 5 | testdir = './input_model002/fold0_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s25.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s25.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model002_1.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model002_1' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model002/fold1_train' 5 | testdir = './input_model002/fold1_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s25.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s25.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model002_2.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model002_2' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model002/fold2_train' 5 | testdir = './input_model002/fold2_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s25.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s25.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model002_3.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model002_3' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model002/fold3_train' 5 | testdir = './input_model002/fold3_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s25.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s25.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/conf/model002_4.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model002_4' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model002/fold4_train' 5 | testdir = './input_model002/fold4_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s25.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s25.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_1/input3d_model001: -------------------------------------------------------------------------------- 1 | ../IFE_1/features3d/model001 -------------------------------------------------------------------------------- /cls_1/input3d_model002: -------------------------------------------------------------------------------- 1 | ../IFE_1/features3d/model002 -------------------------------------------------------------------------------- /cls_1/input_model001: -------------------------------------------------------------------------------- 1 | ../IFE_1/features/model001 -------------------------------------------------------------------------------- /cls_1/input_model002: -------------------------------------------------------------------------------- 1 | ../IFE_1/features/model002 -------------------------------------------------------------------------------- /cls_1/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /cls_1/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/models/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/models/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /cls_1/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /cls_1/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /cls_1/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /cls_1/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID' ,'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /cls_1/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /cls_1/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_1/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /cls_1/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /cls_1/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /cls_2/bin/predict.sh: -------------------------------------------------------------------------------- 1 | ep=best 2 | gpu=0 3 | tta=1 4 | clip=1e-6 5 | 6 | model=model001 7 | for fold in 0 1 2 3 4 8 | do 9 | conf=./conf/${model}_${fold}.py 10 | snapshot=./model/${model}/fold${fold}_${ep}.pt 11 | 12 | for tta_id in 0 1 2 3 4 13 | do 14 | output=./model/${model}/fold${fold}_${ep}_test_tta${tta}_${tta_id}.pkl 15 | submission=./data/submission/${model}_fold${fold}_${ep}_test_tta${tta}_${tta_id}.csv 16 | 17 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${output} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --ttaid ${tta_id} 18 | python -m src.postprocess.make_submission --input ${test} --output ${submission} --clip ${clip} --sample_submission ../IFE_1/input/stage_2_sample_submission.csv 19 | done 20 | 21 | done 22 | -------------------------------------------------------------------------------- /cls_2/bin/train.sh: -------------------------------------------------------------------------------- 1 | for model in model001 model002 2 | do 3 | for fold in 0 1 2 3 4 4 | do 5 | conf=./conf/${model}_${fold}.py 6 | for epoch in 25 30 35 40 45 7 | do 8 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 9 | done 10 | done 11 | done 12 | 13 | -------------------------------------------------------------------------------- /cls_2/cache: -------------------------------------------------------------------------------- 1 | ../IFE_1/cache -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001_0.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001_0.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001_1.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001_2.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001_3.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/__pycache__/model001_4.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/conf/__pycache__/model001_4.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/conf/model001_0.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_0' 2 | seed = 20 3 | apex = True 4 | traindir = './input/fold0_train' 5 | testdir = './input/fold0_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s10.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=3600, 63 | epoch_size_precisebn=500, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s10.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_2/conf/model001_1.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_1' 2 | seed = 20 3 | apex = True 4 | traindir = './input/fold1_train' 5 | testdir = './input/fold1_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=3600, 63 | epoch_size_precisebn=500, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_2/conf/model001_2.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_2' 2 | seed = 20 3 | apex = True 4 | traindir = './input/fold2_train' 5 | testdir = './input/fold2_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=3600, 63 | epoch_size_precisebn=500, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_2/conf/model001_3.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_3' 2 | seed = 20 3 | apex = True 4 | traindir = './input/fold3_train' 5 | testdir = './input/fold3_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=3600, 63 | epoch_size_precisebn=500, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_2/conf/model001_4.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_4' 2 | seed = 20 3 | apex = True 4 | traindir = './input/fold4_train' 5 | testdir = './input/fold4_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=3600, 63 | epoch_size_precisebn=500, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_2/input: -------------------------------------------------------------------------------- 1 | ../IFE_2/features/model001 -------------------------------------------------------------------------------- /cls_2/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /cls_2/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/models/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/models/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/models/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class SELayer(nn.Module): 5 | def __init__(self, channel, reduction=16): 6 | super(SELayer, self).__init__() 7 | self.avg_pool = nn.AdaptiveAvgPool1d(1) 8 | self.fc = nn.Sequential( 9 | nn.Linear(channel, channel // reduction, bias=False), 10 | nn.ELU(inplace=True), 11 | nn.Linear(channel // reduction, channel, bias=False), 12 | nn.Sigmoid() 13 | ) 14 | 15 | def forward(self, x): 16 | b, c, _ = x.size() 17 | y = self.avg_pool(x).view(b, c) 18 | y = self.fc(y).view(b, c, 1) 19 | return x * y.expand_as(x) 20 | 21 | 22 | class SimpleNet(nn.Module): 23 | def __init__(self,in_channel=1296, num_classes=6): 24 | super(SimpleNet, self).__init__() 25 | 26 | p = 0.3 27 | m = 4 28 | self.conv1 = nn.Conv1d(in_channels=in_channel, out_channels=100*m, kernel_size=3, stride=1, padding=1) 29 | self.elu1 = nn.ELU() 30 | 31 | self.conv2 = nn.Conv1d(in_channels=100*m, out_channels=100*m, kernel_size=3, stride=1, padding=1) 32 | self.elu2 = nn.ELU() 33 | 34 | self.conv3 = nn.Conv1d(in_channels=100*m, out_channels=200*m, kernel_size=3, stride=1, padding=1) 35 | self.elu3 = nn.ELU() 36 | 37 | self.conv4 = nn.Conv1d(in_channels=200*m, out_channels=200*m, kernel_size=3, stride=1, padding=1) 38 | self.se4 = SELayer(200*m) 39 | self.elu4 = nn.ELU() 40 | self.dropout = nn.Dropout(p) 41 | 42 | self.conv5 = nn.Conv1d(in_channels=200*m, out_channels=6, kernel_size=3, stride=1, padding=1) 43 | 44 | def forward(self, input): 45 | output = self.conv1(input) 46 | #print('input size: ' + str(input.size())) 47 | output = self.elu1(output) 48 | #print(output.size()) 49 | 50 | output = self.conv2(output) 51 | output = self.elu2(output) 52 | #print(output.size()) 53 | 54 | output = self.conv3(output) 55 | output = self.elu3(output) 56 | #print(output.size()) 57 | 58 | output = self.conv4(output) 59 | output = self.se4(output) 60 | output = self.elu4(output) 61 | #print(output.size()) 62 | output = self.dropout(output) 63 | 64 | output = self.conv5(output) 65 | 66 | #print(output.size()) 67 | #output = output.view(-1,6,) 68 | #print(output.size()) 69 | return output 70 | -------------------------------------------------------------------------------- /cls_2/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /cls_2/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /cls_2/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /cls_2/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /cls_2/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID' ,'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /cls_2/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /cls_2/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_2/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /cls_2/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /cls_2/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /cls_3/bin/.train001_1.sh.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/bin/.train001_1.sh.swp -------------------------------------------------------------------------------- /cls_3/bin/predict.sh: -------------------------------------------------------------------------------- 1 | ep=best 2 | gpu=0 3 | tta=1 4 | clip=1e-6 5 | 6 | model=model001 7 | for fold in 0 1 2 3 4 8 | do 9 | conf=./conf/${model}_${fold}.py 10 | snapshot=./model/${model}/fold${fold}_${ep}.pt 11 | 12 | for tta_id in 0 1 2 3 4 13 | do 14 | output=./model/${model}/fold${fold}_${ep}_test_tta${tta}_${tta_id}.pkl 15 | submission=./data/submission/${model}_fold${fold}_${ep}_test_tta${tta}_${tta_id}.csv 16 | 17 | python -m src.cnn.main test ${conf} --snapshot ${snapshot} --output ${output} --n-tta ${tta} --fold ${fold} --gpu ${gpu} --ttaid ${tta_id} 18 | python -m src.postprocess.make_submission --input ${test} --output ${submission} --clip ${clip} --sample_submission ../IFE_1/input/stage_2_sample_submission.csv 19 | done 20 | 21 | done 22 | -------------------------------------------------------------------------------- /cls_3/bin/train.sh: -------------------------------------------------------------------------------- 1 | model=model001 2 | for fold in 0 1 2 3 4 3 | do 4 | conf=./conf/${model}_${fold}.py 5 | for epoch in 25 30 35 40 45 6 | do 7 | python -m src.cnn.main train ${conf} --fold ${fold} --gpu 0 --epoch ${epoch} 8 | done 9 | done 10 | -------------------------------------------------------------------------------- /cls_3/cache: -------------------------------------------------------------------------------- 1 | ../IFE_1/cache -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001_0.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001_0.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001_1.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001_1.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001_2.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001_2.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001_3.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001_3.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/__pycache__/model001_4.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/conf/__pycache__/model001_4.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/conf/model001_0.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_0' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold0_train' 5 | testdir = './input_model001/fold0_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s50.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s50.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_3/conf/model001_1.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_1' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold1_train' 5 | testdir = './input_model001/fold1_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s50.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s50.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_3/conf/model001_2.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_2' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold2_train' 5 | testdir = './input_model001/fold2_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s50.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s50.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_3/conf/model001_3.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_3' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold3_train' 5 | testdir = './input_model001/fold3_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s50.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s50.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_3/conf/model001_4.py: -------------------------------------------------------------------------------- 1 | workdir = './model/model001_4' 2 | seed = 20 3 | apex = True 4 | traindir = './input_model001/fold4_train' 5 | testdir = './input_model001/fold4_test' 6 | 7 | n_fold = 5 8 | epoch = 30 9 | resume_from = None 10 | 11 | batch_size = 28 12 | num_workers = 8 13 | imgsize = (60,) 14 | loss = dict( 15 | name='BCEWithLogitsLoss', 16 | params=dict(), 17 | ) 18 | 19 | optim = dict( 20 | name='Adam', 21 | params=dict( 22 | lr=6e-5, 23 | ), 24 | ) 25 | 26 | scheduler1 = dict( 27 | name='MultiStepLR', 28 | params=dict( 29 | milestones=[5,10], 30 | gamma=2/3, 31 | ), 32 | ) 33 | scheduler2 = dict( 34 | name='CosineAnnealingLR', 35 | params=dict( 36 | T_max=epoch 37 | ), 38 | ) 39 | 40 | scheduler = scheduler2 41 | 42 | #normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],} 43 | normalize = None 44 | 45 | totensor = dict(name='ToTensor', params=dict(normalize=normalize)) 46 | 47 | data = dict( 48 | train=dict( 49 | dataset_type='CustomDataset', 50 | annotations='./cache/train_folds_s50.pkl', 51 | imgdir=traindir, 52 | imgsize=imgsize, 53 | n_grad_acc=1, 54 | loader=dict( 55 | shuffle=True, 56 | batch_size=batch_size, 57 | drop_last=True, 58 | num_workers=num_workers, 59 | pin_memory=True, 60 | ), 61 | dataset_policy='all', 62 | epoch_size=600, 63 | epoch_size_precisebn=0, 64 | transforms=[totensor], 65 | log_size=100 66 | ), 67 | valid = dict( 68 | dataset_type='CustomDataset', 69 | annotations='./cache/train_folds_s50.pkl', 70 | imgdir=traindir, 71 | imgsize=imgsize, 72 | loader=dict( 73 | shuffle=False, 74 | batch_size=batch_size, 75 | drop_last=False, 76 | num_workers=num_workers, 77 | pin_memory=False, 78 | ), 79 | transforms=[totensor], 80 | dataset_policy='all', 81 | ), 82 | test = dict( 83 | dataset_type='CustomDataset', 84 | annotations='./cache/test.pkl', 85 | imgdir=testdir, 86 | imgsize=imgsize, 87 | loader=dict( 88 | shuffle=False, 89 | batch_size=batch_size, 90 | drop_last=False, 91 | num_workers=num_workers, 92 | pin_memory=False, 93 | ), 94 | transforms=[totensor], 95 | dataset_policy='all', 96 | ), 97 | ) 98 | -------------------------------------------------------------------------------- /cls_3/input3d_model001: -------------------------------------------------------------------------------- 1 | ../IFE_3/features3d/model001 -------------------------------------------------------------------------------- /cls_3/input_model001: -------------------------------------------------------------------------------- 1 | ../IFE_3/features/model001 -------------------------------------------------------------------------------- /cls_3/src/cnn/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | cv2.setNumThreads(0) # fix potential pytorch worker issues 3 | -------------------------------------------------------------------------------- /cls_3/src/cnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/__pycache__/factory.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/__pycache__/factory.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/__pycache__/main.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/__pycache__/main.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/dataset/__pycache__/custom_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/models/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/models/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/transforms/__pycache__/transforms.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from albumentations.augmentations import functional as F 6 | from albumentations.core.transforms_interface import ImageOnlyTransform 7 | 8 | 9 | def resized_crop(image, height, width, x_min, y_min, x_max, y_max): 10 | image = F.crop(image, x_min, y_min, x_max, y_max) 11 | image = cv2.resize(image, (width, height)) 12 | return image 13 | 14 | 15 | class RandomResizedCrop(ImageOnlyTransform): 16 | 17 | def __init__(self, height, width, scale=(0.08, 1.0), ratio=(3/4, 4/3), always_apply=False, p=1.0): 18 | super().__init__(always_apply, p) 19 | self.height = height 20 | self.width = width 21 | self.scale = scale 22 | self.ratio = ratio 23 | 24 | def apply(self, image, **params): 25 | 26 | height, width = image.shape[:2] 27 | area = height * width 28 | 29 | for attempt in range(15): 30 | target_area = random.uniform(*self.scale) * area 31 | aspect_ratio = random.uniform(*self.ratio) 32 | 33 | w = int(round(math.sqrt(target_area * aspect_ratio))) 34 | h = int(round(math.sqrt(target_area / aspect_ratio))) 35 | 36 | if random.random() < 0.5 and min(self.ratio) <= (h / w) <= max(self.ratio): 37 | w, h = h, w 38 | 39 | if w <= width and h <= height: 40 | x_min = random.randint(0, width - w) 41 | y_min = random.randint(0, height - h) 42 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+w, y_min+h) 43 | 44 | min_side = min(height, width) 45 | x_min = random.randint(0, width - min_side) 46 | y_min = random.randint(0, height - min_side) 47 | return resized_crop(image, self.height, self.width, x_min, y_min, x_min+min_side, y_min+min_side) 48 | 49 | -------------------------------------------------------------------------------- /cls_3/src/cnn/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/utils/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/utils/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/cnn/utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/cnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from datetime import datetime 4 | import logging 5 | 6 | 7 | class Logger(object): 8 | 9 | def __init__(self): 10 | self.logger = logging.getLogger() 11 | self.logger.setLevel(logging.DEBUG) 12 | #self.logger.addHandler(logging.StreamHandler()) 13 | 14 | def setup(self, dirname, name): 15 | 16 | os.makedirs(dirname, exist_ok=True) 17 | 18 | path = f'{dirname}/{name}.log' 19 | file_handler = logging.FileHandler(path, 'a') 20 | 21 | self.logger.addHandler(file_handler) 22 | 23 | log('') 24 | log('----- %s -----' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 25 | log(' '.join(sys.argv)) 26 | log('logpath: %s' % path) 27 | 28 | 29 | def log(msg): 30 | print(msg) 31 | logger.logger.info(msg) 32 | 33 | 34 | logger = Logger() 35 | -------------------------------------------------------------------------------- /cls_3/src/cnn/utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import glob 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | 9 | from .logger import log 10 | 11 | 12 | def set_seed(seed): 13 | random.seed(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | 17 | 18 | def get_lr(optim): 19 | if optim: 20 | return optim.param_groups[0]['lr'] 21 | else: 22 | return 0 23 | 24 | 25 | def save_model(model, optim, detail, fold, dirname): 26 | path = os.path.join(dirname, 'fold%d_ep%d.pt' % (fold, detail['epoch'])) 27 | torch.save({ 28 | 'model': model.state_dict(), 29 | 'optim': optim.state_dict(), 30 | 'detail': detail, 31 | }, path) 32 | log('saved model to %s' % path) 33 | 34 | 35 | def load_model(path, model, optim=None): 36 | 37 | # remap everthing onto CPU 38 | state = torch.load(str(path), map_location=lambda storage, location: storage) 39 | 40 | model.load_state_dict(state['model']) 41 | if optim: 42 | log('loading optim too') 43 | optim.load_state_dict(state['optim']) 44 | else: 45 | log('not loading optim') 46 | 47 | model.cuda() 48 | 49 | detail = state['detail'] 50 | log('loaded model from %s' % path) 51 | 52 | return detail 53 | -------------------------------------------------------------------------------- /cls_3/src/postprocess/__pycache__/make_submission.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/postprocess/__pycache__/make_submission.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/postprocess/make_submission.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import argparse 4 | import pickle 5 | import time 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | from ..utils import mappings 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--inputs', help='for ensembling. can be recursively nested for averaging.') 17 | parser.add_argument('--output', required=True) 18 | parser.add_argument('--sample_submission', default='./input/stage_1_sample_submission.csv') 19 | parser.add_argument('--clip', type=float, default=1e-6) 20 | 21 | args = parser.parse_args() 22 | assert args.input or args.inputs 23 | 24 | return args 25 | 26 | 27 | def avg_predictions(results): 28 | outputs_all = np.array([result['outputs'] for result in results]) 29 | outputs = outputs_all.mean(axis=0) 30 | return { 31 | 'ids': results[0]['ids'], 32 | 'outputs': outputs, 33 | } 34 | 35 | 36 | def read_prediction(path): 37 | print('loading %s...' % path) 38 | with open(path, 'rb') as f: 39 | results = pickle.load(f) 40 | return avg_predictions(results) 41 | 42 | 43 | def parse_inputs(inputs): 44 | results = [] 45 | for elem in inputs: 46 | if type(elem) is list: 47 | result = parse_inputs(elem) 48 | else: 49 | result = read_prediction(elem) 50 | results.append(result) 51 | return avg_predictions(results) 52 | 53 | 54 | def main(): 55 | args = get_args() 56 | 57 | if args.input: 58 | result = read_prediction(args.input) 59 | else: 60 | result = parse_inputs(eval(args.inputs)) 61 | 62 | sub = pd.read_csv(args.sample_submission) 63 | IDs = {} 64 | for id, outputs in zip(result['ids'], result['outputs']): 65 | for i, output in enumerate(outputs): 66 | label = mappings.num_to_label[i] 67 | ID = '%s_%s' % (id, label) 68 | IDs[ID] = output 69 | 70 | sub['Label'] = sub.ID.map(IDs) 71 | sub.loc[sub.Label.isnull(),'Label'] = sub.Label.min() 72 | if args.clip: 73 | print('clip values by %e' % args.clip) 74 | sub['Label'] = np.clip(sub.Label, args.clip, 1-args.clip) 75 | 76 | sub.to_csv(args.output, index=False) 77 | print(sub.tail()) 78 | print('saved to %s' % args.output) 79 | 80 | 81 | if __name__ == '__main__': 82 | print(sys.argv) 83 | main() 84 | -------------------------------------------------------------------------------- /cls_3/src/preprocess/__pycache__/create_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/preprocess/__pycache__/create_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/preprocess/__pycache__/dicom_to_dataframe.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/preprocess/__pycache__/make_folds.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/preprocess/__pycache__/make_folds.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/preprocess/create_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from ..utils import misc 11 | 12 | 13 | def get_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--input') 16 | parser.add_argument('--output') 17 | return parser.parse_args() 18 | 19 | 20 | def show_distribution(dataset): 21 | counter = collections.defaultdict(int) 22 | for row in dataset.itertuples(): 23 | for label in row.labels.split(): 24 | counter[label] += 1 25 | if not row.labels: 26 | counter['negative'] += 1 27 | counter['all'] += 1 28 | pprint(counter) 29 | 30 | 31 | def parse_position(df): 32 | expanded = df.ImagePositionPatient.apply(lambda x: pd.Series(x)) 33 | expanded.columns = ['Position1', 'Position2', 'Position3'] 34 | return pd.concat([df, expanded], axis=1) 35 | 36 | 37 | def parse_orientation(df): 38 | expanded = df.ImageOrientationPatient.apply(lambda x: pd.Series(x)) 39 | expanded.columns = ['Orient1', 'Orient2', 'Orient3', 'Orient4', 'Orient5', 'Orient6'] 40 | return pd.concat([df, expanded], axis=1) 41 | 42 | 43 | def add_adjacent_labels(df): 44 | df = df.sort_values('PositionOrd') 45 | 46 | records = [] 47 | print('making adjacent labels...') 48 | for index,group in tqdm(df.groupby('StudyInstanceUID')): 49 | 50 | labels = list(group.labels) 51 | for j,id in enumerate(group.ID): 52 | if j == 0: 53 | left = labels[j-1] 54 | else: 55 | left = '' 56 | if j+1 == len(labels): 57 | right = '' 58 | else: 59 | right = labels[j+1] 60 | 61 | records.append({ 62 | 'LeftLabel': left, 63 | 'RightLabel': right, 64 | 'ID': id, 65 | }) 66 | return pd.merge(df, pd.DataFrame(records), on='ID') 67 | 68 | 69 | def main(): 70 | args = get_args() 71 | 72 | with open(args.input, 'rb') as f: 73 | df = pickle.load(f) 74 | print('read %s (%d records)' % (args.input, len(df))) 75 | 76 | show_distribution(df) 77 | 78 | df = df[df.custom_diff > 60] 79 | print('removed records by custom_diff (%d records)' % len(df)) 80 | 81 | df = parse_position(df) 82 | 83 | df['WindowCenter'] = df.WindowCenter.apply(lambda x: misc.get_dicom_value(x)) 84 | df['WindowWidth'] = df.WindowWidth.apply(lambda x: misc.get_dicom_value(x)) 85 | df['PositionOrd'] = df.groupby('SeriesInstanceUID')[['Position3']].rank() / df.groupby('SeriesInstanceUID')[['Position3']].transform('count') 86 | 87 | df = add_adjacent_labels(df) 88 | df = df[['ID', 'labels', 'PatientID', 'StudyInstanceUID', 'SeriesInstanceUID' ,'WindowCenter', 'WindowWidth', 'RescaleIntercept', 'RescaleSlope', 'Position3', 'PositionOrd', 'LeftLabel', 'RightLabel']] 89 | 90 | df = df.sort_values('ID') 91 | with open(args.output, 'wb') as f: 92 | pickle.dump(df, f) 93 | 94 | show_distribution(df) 95 | 96 | print('created dataset (%d records)' % len(df)) 97 | print('saved to %s' % args.output) 98 | 99 | 100 | if __name__ == '__main__': 101 | print(sys.argv) 102 | main() 103 | -------------------------------------------------------------------------------- /cls_3/src/preprocess/make_folds.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import collections 4 | import pickle 5 | from pprint import pprint 6 | import random 7 | 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | 12 | def get_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--input') 15 | parser.add_argument('--output') 16 | parser.add_argument('--n-fold', type=int, default=5) 17 | parser.add_argument('--seed', type=int, default=10) 18 | return parser.parse_args() 19 | 20 | 21 | def _make_folds(df, n_fold, seed): 22 | 23 | counter_gt = collections.defaultdict(int) 24 | for labels in df.labels.str.split(): 25 | for label in labels: 26 | counter_gt[label] += 1 27 | 28 | counter_folds = collections.Counter() 29 | 30 | folds = {} 31 | random.seed(seed) 32 | groups = df.groupby('PatientID') 33 | print('making %d folds...' % n_fold) 34 | for patient_id, group in tqdm(groups, total=len(groups)): 35 | 36 | labels = [] 37 | for row in group.itertuples(): 38 | for label in row.labels.split(): 39 | labels.append(label) 40 | if not labels: 41 | labels = [''] 42 | 43 | count_labels = [counter_gt[label] for label in labels] 44 | min_label = labels[np.argmin(count_labels)] 45 | count_folds = [(f, counter_folds[(f, min_label)]) for f in range(n_fold)] 46 | min_count = min([count for f,count in count_folds]) 47 | fold = random.choice([f for f,count in count_folds if count == min_count]) 48 | folds[patient_id] = fold 49 | 50 | for label in labels: 51 | counter_folds[(fold,label)] += 1 52 | 53 | pprint(counter_folds) 54 | 55 | return folds 56 | 57 | 58 | def main(): 59 | args = get_args() 60 | with open(args.input, 'rb') as f: 61 | df = pickle.load(f) 62 | 63 | folds = _make_folds(df, args.n_fold, args.seed) 64 | df['fold'] = df.PatientID.map(folds) 65 | with open(args.output, 'wb') as f: 66 | pickle.dump(df, f) 67 | 68 | print('saved to %s' % args.output) 69 | 70 | 71 | if __name__ == '__main__': 72 | print(sys.argv) 73 | main() 74 | -------------------------------------------------------------------------------- /cls_3/src/utils/__pycache__/mappings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/utils/__pycache__/mappings.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/utils/__pycache__/misc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XUXUSSS/kaggle_rsna2019_4th_solution/d753f9e71b408ffd7df3ccd3c58b3a32dd207bcd/cls_3/src/utils/__pycache__/misc.cpython-36.pyc -------------------------------------------------------------------------------- /cls_3/src/utils/mappings.py: -------------------------------------------------------------------------------- 1 | counter = { 2 | 'all': 674258, 3 | 'negative': 577155, 4 | 5 | 'any': 97103, 6 | 'epidural': 2761, 7 | 'subdural': 42496, 8 | 'subarachnoid': 32122, 9 | 'intraventricular': 23766, 10 | 'intraparenchymal': 32564, 11 | } 12 | 13 | 14 | label_to_num = { 15 | 'any': 0, 16 | 'epidural': 1, 17 | 'subdural': 2, 18 | 'subarachnoid': 3, 19 | 'intraventricular': 4, 20 | 'intraparenchymal': 5, 21 | } 22 | num_to_label = {v:k for k,v in label_to_num.items()} 23 | -------------------------------------------------------------------------------- /cls_3/src/utils/misc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pydicom 3 | 4 | 5 | def get_dicom_value(x, cast=int): 6 | if type(x) in [pydicom.multival.MultiValue, tuple]: 7 | return cast(x[0]) 8 | else: 9 | return cast(x) 10 | 11 | 12 | def cast(value): 13 | if type(value) is pydicom.valuerep.MultiValue: 14 | return tuple(value) 15 | return value 16 | 17 | 18 | def get_dicom_raw(dicom): 19 | return {attr:cast(getattr(dicom,attr)) for attr in dir(dicom) if attr[0].isupper() and attr not in ['PixelData']} 20 | 21 | 22 | def rescale_image(image, slope, intercept): 23 | return image * slope + intercept 24 | 25 | 26 | def apply_window(image, center, width): 27 | image = image.copy() 28 | min_value = center - width // 2 29 | max_value = center + width // 2 30 | image[image < min_value] = min_value 31 | image[image > max_value] = max_value 32 | return image 33 | 34 | 35 | def get_dicom_meta(dicom): 36 | return { 37 | 'PatientID': dicom.PatientID, # can be grouped (20-548) 38 | 'StudyInstanceUID': dicom.StudyInstanceUID, # can be grouped (20-60) 39 | 'SeriesInstanceUID': dicom.SeriesInstanceUID, # can be grouped (20-60) 40 | 'WindowWidth': get_dicom_value(dicom.WindowWidth), 41 | 'WindowCenter': get_dicom_value(dicom.WindowCenter), 42 | 'RescaleIntercept': float(dicom.RescaleIntercept), 43 | 'RescaleSlope': float(dicom.RescaleSlope), # all same (1.0) 44 | } 45 | -------------------------------------------------------------------------------- /libs/P00_ensemble_csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import argparse 4 | 5 | def get_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--input',help='Input submission list') 8 | parser.add_argument('--output',help='Output submission csv') 9 | parser.add_argument('--score',default=0.001,type=float) 10 | parser.add_argument('--gmean',default=False, type=float) 11 | return parser.parse_args() 12 | 13 | 14 | def main(): 15 | args = get_args() 16 | list_path = args.input 17 | with open(list_path,'r') as f: 18 | lines = f.readlines() 19 | 20 | 21 | cells = [] 22 | for line in lines: 23 | line = line.strip() 24 | csv_path = line 25 | df = pd.read_csv(csv_path) 26 | labels= df['Label'].to_numpy() 27 | cells.append(labels) 28 | 29 | 30 | if not args.gmean: 31 | df['Label'] = np.mean(cells,axis=0) 32 | else: 33 | df['Label'] = np.power(np.mean(np.power(np.array(cells),args.gmean), axis=0),1/args.gmean) 34 | #out_path = "{}_merge.csv".format(csv_path[:-4]) 35 | out_path = args.output 36 | print(out_path) 37 | 38 | df.to_csv(out_path,index=None) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /libs/ensemble.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | outlist=./list/submit.list 4 | outcsv=./csv/final_submit.csv 5 | rm ${outlist} 6 | find ../cls_1/data/submission -name '*.csv' >> ${outlist} 7 | find ../cls_2/data/submission -name '*.csv' >> ${outlist} 8 | find ../cls_3/data/submission -name '*.csv' >> ${outlist} 9 | python P00_ensemble_csv.py --input ${outlist} --output ${outcsv} 10 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | addict==2.2.1 2 | albumentations==0.3.3 3 | anykeystore==0.2 4 | apex==0.1 5 | backcall==0.1.0 6 | certifi==2019.9.11 7 | chardet==3.0.4 8 | cryptacular==1.5.5 9 | cycler==0.10.0 10 | decorator==4.4.1 11 | defusedxml==0.6.0 12 | efficientnet-pytorch==0.5.1 13 | hupper==1.9.1 14 | idna==2.8 15 | imageio==2.6.1 16 | imgaug==0.2.6 17 | ipython==7.9.0 18 | ipython-genutils==0.2.0 19 | jedi==0.15.1 20 | joblib==0.14.0 21 | kiwisolver==1.1.0 22 | matplotlib==3.1.1 23 | networkx==2.4 24 | numpy==1.17.4 25 | oauthlib==3.1.0 26 | opencv-python-headless==4.1.1.26 27 | pandas==0.25.3 28 | parso==0.5.1 29 | PasteDeploy==2.0.1 30 | pbkdf2==1.3 31 | pexpect==4.7.0 32 | pickleshare==0.7.5 33 | Pillow==6.2.1 34 | plaster==1.0 35 | plaster-pastedeploy==0.7 36 | prompt-toolkit==2.0.10 37 | protobuf==3.10.0 38 | ptyprocess==0.6.0 39 | pydicom==1.3.0 40 | Pygments==2.4.2 41 | pyparsing==2.4.5 42 | pyramid==1.10.4 43 | pyramid-mailer==0.15.1 44 | python-dateutil==2.8.1 45 | python3-openid==3.1.0 46 | pytz==2019.3 47 | PyWavelets==1.1.1 48 | PyYAML==5.1.2 49 | repoze.sendmail==4.4.1 50 | requests==2.22.0 51 | requests-oauthlib==1.3.0 52 | scikit-image==0.16.2 53 | scikit-learn==0.21.3 54 | scipy==1.3.2 55 | six==1.13.0 56 | sklearn==0.0 57 | SQLAlchemy==1.3.11 58 | tensorboardX==1.9 59 | torch==1.2.0 60 | torchvision==0.4.0 61 | tqdm==4.38.0 62 | traitlets==4.3.3 63 | transaction==2.4.0 64 | translationstring==1.3 65 | urllib3==1.25.7 66 | velruse==1.1.1 67 | venusian==3.0.0 68 | wcwidth==0.1.7 69 | WebOb==1.8.5 70 | WTForms==2.2.1 71 | wtforms-recaptcha==0.3.2 72 | zope.deprecation==4.4.0 73 | zope.interface==4.7.1 74 | zope.sqlalchemy==1.2 75 | --------------------------------------------------------------------------------