├── images ├── detection_example1.png ├── detection_example2.png ├── detection_example3.png ├── detection_example4.png ├── ssd300-vgg16-v0.1.png ├── ssd300-vgg16-v0.2.png └── ssd224-resnet101-v0.1.png ├── .gitignore ├── analysis ├── dataset_utils.py ├── class_occurrences.py └── visualize_bbox_sizes.py ├── setting-environment.sh ├── merge_dasatet.sh ├── test_utils.py ├── LICENSE ├── demo.py ├── showreport.py ├── utils.py ├── README.md ├── ssd_resnet101.py ├── train_extractor.py ├── road_damage_dataset.py └── train_detector.py /images/detection_example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/detection_example1.png -------------------------------------------------------------------------------- /images/detection_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/detection_example2.png -------------------------------------------------------------------------------- /images/detection_example3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/detection_example3.png -------------------------------------------------------------------------------- /images/detection_example4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/detection_example4.png -------------------------------------------------------------------------------- /images/ssd300-vgg16-v0.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/ssd300-vgg16-v0.1.png -------------------------------------------------------------------------------- /images/ssd300-vgg16-v0.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/ssd300-vgg16-v0.2.png -------------------------------------------------------------------------------- /images/ssd224-resnet101-v0.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IshitaTakeshi/RoadDamageDetector/HEAD/images/ssd224-resnet101-v0.1.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | .ipynb_checkpoints 4 | 5 | ._RoadDamageDataset 6 | RoadDamageDataset 7 | RoadDamageDataset.tar.gz 8 | 9 | nohup.out 10 | -------------------------------------------------------------------------------- /analysis/dataset_utils.py: -------------------------------------------------------------------------------- 1 | def load_labels_and_bboxes(dataset, indices=None): 2 | if indices is None: 3 | indices = range(len(dataset)) 4 | 5 | bboxes = [] 6 | labels = [] 7 | for i in indices: 8 | img, bbox, label = dataset.get_example(i) 9 | if len(bbox) == 0: 10 | continue 11 | labels.append(label) 12 | bboxes.append(bbox) 13 | return labels, bboxes 14 | -------------------------------------------------------------------------------- /setting-environment.sh: -------------------------------------------------------------------------------- 1 | # install basic tools (considering running on docker) 2 | apt update; apt upgrade -y 3 | apt install -y sudo git vim zsh python3 zip wget 4 | 5 | # install pip 6 | wget -c https://bootstrap.pypa.io/get-pip.py 7 | python3 get-pip.py 8 | rm get-pip.py 9 | 10 | # install chainercv and other tools 11 | apt install -y python3-dev 12 | pip3 install -U chainercv==v0.9.0 13 | 14 | apt install -y python3-tk libglib2.0-0 libsm-dev 15 | pip3 install -U matplotlib opencv-python 16 | 17 | # download and setup dataset 18 | wget -c https://mycityreport.s3-ap-northeast-1.amazonaws.com/02_RoadDamageDataset/RoadDamageDataset.tar.gz 19 | tar xvf RoadDamageDataset.tar.gz 20 | ./merge_dasatet.sh 21 | -------------------------------------------------------------------------------- /analysis/class_occurrences.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from matplotlib import pyplot as plt 5 | 6 | from road_damage_dataset import RoadDamageDataset 7 | from utils import roaddamage_label_names 8 | from dataset_utils import load_labels_and_bboxes 9 | 10 | dataset_dir = os.path.join("RoadDamageDataset", "All") 11 | dataset = RoadDamageDataset(dataset_dir, split="trainval") 12 | 13 | labels, bboxes = load_labels_and_bboxes(dataset) 14 | 15 | bboxes = np.vstack(bboxes) 16 | labels = np.concatenate(labels) 17 | 18 | n, bins, patches = plt.hist( 19 | labels, 20 | bins=range(len(roaddamage_label_names) + 1), 21 | rwidth=0.8 22 | ) 23 | 24 | positions = np.arange(len(roaddamage_label_names)) + 0.5 25 | plt.ylabel("Occurrences") 26 | plt.xticks(positions, roaddamage_label_names) 27 | plt.show() 28 | -------------------------------------------------------------------------------- /merge_dasatet.sh: -------------------------------------------------------------------------------- 1 | DATASET_DIR="RoadDamageDataset" 2 | MERGED_DIR="All" 3 | 4 | imageset_textfiles="train_train.txt train_val.txt train_trainval.txt 5 | D00_train.txt D01_train.txt D10_train.txt 6 | D11_train.txt D20_train.txt D40_train.txt 7 | D43_train.txt D44_train.txt train.txt 8 | D00_val.txt D01_val.txt D10_val.txt 9 | D11_val.txt D20_val.txt D40_val.txt 10 | D43_val.txt D44_val.txt val.txt 11 | D00_trainval.txt D01_trainval.txt D10_trainval.txt 12 | D11_trainval.txt D20_trainval.txt D40_trainval.txt 13 | D43_trainval.txt D44_trainval.txt trainval.txt" 14 | 15 | 16 | for dir in Annotations JPEGImages labels ImageSets/Main 17 | do 18 | mkdir -p $DATASET_DIR/$MERGED_DIR/$dir 19 | done 20 | 21 | for filename in $imageset_textfiles 22 | do 23 | cat $DATASET_DIR/*/ImageSets/Main/$filename > $DATASET_DIR/$MERGED_DIR/ImageSets/Main/$filename 24 | done 25 | 26 | 27 | for dir in Annotations JPEGImages labels 28 | do 29 | cp $DATASET_DIR/*/$dir/* $DATASET_DIR/$MERGED_DIR/$dir/ 30 | done 31 | -------------------------------------------------------------------------------- /test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from utils import are_overlapping, generate_background_bbox 3 | 4 | 5 | class TestUtils(unittest.TestCase): 6 | def test_are_overlapping(self): 7 | # Bounding box is represented by a list 8 | # in the form [ymin, xmin, ymax, xmax] 9 | self.assertTrue(are_overlapping([2, 1, 7, 6], [5, 4, 11, 8])) 10 | self.assertFalse(are_overlapping([2, 1, 7, 6], [8, 6, 13, 9])) 11 | 12 | def test_generate_background(self): 13 | existing_bboxes = [[3, 4, 8, 9], [13, 1, 16, 5]] 14 | bbox_shape = (5, 5) 15 | image_shape = (23, 10) 16 | 17 | def test(): 18 | try: 19 | bbox = generate_background_bbox( 20 | image_shape, 21 | bbox_shape, 22 | existing_bboxes 23 | ) 24 | except RuntimeError: 25 | return 26 | 27 | for existing_bbox in existing_bboxes: 28 | self.assertFalse(are_overlapping(bbox, existing_bbox)) 29 | 30 | for i in range(10): 31 | test() 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 Yusuke Niitani, Takeshi Ishita, Pasona Tech Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /analysis/visualize_bbox_sizes.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import random 4 | 5 | import numpy as np 6 | 7 | from matplotlib import pyplot as plt 8 | from matplotlib import rcParams 9 | from matplotlib.lines import Line2D 10 | 11 | from road_damage_dataset import RoadDamageDataset 12 | from utils import roaddamage_label_names 13 | from dataset_utils import load_labels_and_bboxes 14 | 15 | rcParams['figure.figsize'] = 14, 18 16 | rcParams['figure.dpi'] = 240 17 | 18 | dataset_dir = os.path.join("RoadDamageDataset", "All") 19 | dataset = RoadDamageDataset(dataset_dir, split="trainval") 20 | 21 | indices = np.arange(len(dataset)) 22 | np.random.shuffle(indices) 23 | N = 600 24 | 25 | labels, bboxes = load_labels_and_bboxes(dataset, indices[:N]) 26 | 27 | bboxes = np.vstack(bboxes) 28 | labels = np.concatenate(labels) 29 | 30 | color = labels / labels.max() 31 | 32 | label_names = [roaddamage_label_names[label] for label in labels] 33 | 34 | H = bboxes[:, 2] - bboxes[:, 0] 35 | W = bboxes[:, 3] - bboxes[:, 1] 36 | 37 | fig, axes = plt.subplots(1) 38 | 39 | axes.set_xlim([0, 610]) 40 | axes.set_ylim([0, 610]) 41 | 42 | axes.set_aspect(1) 43 | 44 | axes.set_title("Distribution of bounding box sizes") 45 | axes.set_xlabel("width") 46 | axes.set_xlabel("height") 47 | 48 | uniques = np.unique(labels) 49 | for i, label in enumerate(uniques): 50 | axes.scatter(W[labels==label], H[labels==label], s=100, 51 | marker=Line2D.filled_markers[i % len(uniques)], 52 | label=roaddamage_label_names[label]) 53 | axes.legend() 54 | plt.show() 55 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from matplotlib import pyplot as plt 3 | 4 | import chainer 5 | from chainer.serializers import load_npz 6 | 7 | import chainercv 8 | from chainercv import utils 9 | from chainercv.visualizations import vis_bbox 10 | 11 | import ssd_resnet101 12 | from utils import roaddamage_label_names 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--gpu', type=int, default=-1) 18 | parser.add_argument('--base-network', choices=('vgg16', 'resnet101'), 19 | default='vgg16', help='Base network') 20 | parser.add_argument('--pretrained-model', required=True) 21 | parser.add_argument('image') 22 | args = parser.parse_args() 23 | 24 | if args.base_network == 'vgg16': 25 | model = chainercv.links.SSD300( 26 | n_fg_class=len(roaddamage_label_names), 27 | pretrained_model=args.pretrained_model) 28 | elif args.base_network == 'resnet101': 29 | model = ssd_resnet101.SSD224( 30 | n_fg_class=len(roaddamage_label_names), 31 | pretrained_model=args.pretrained_model) 32 | else: 33 | raise ValueError('Invalid base network') 34 | 35 | if args.gpu >= 0: 36 | chainer.cuda.get_device_from_id(args.gpu).use() 37 | model.to_gpu() 38 | 39 | img = utils.read_image(args.image, color=True) 40 | bboxes, labels, scores = model.predict([img]) 41 | bbox, label, score = bboxes[0], labels[0], scores[0] 42 | 43 | vis_bbox( 44 | img, bbox, label, score, label_names=roaddamage_label_names) 45 | plt.axis('off') 46 | plt.show() 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /showreport.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import random 4 | 5 | from os.path import basename 6 | 7 | import numpy as np 8 | from matplotlib import pyplot as plt 9 | from matplotlib import rcParams 10 | 11 | rcParams['figure.figsize'] = 16, 20 12 | rcParams['figure.dpi'] = 240 13 | rcParams['font.size'] = 18 14 | 15 | linestyles = ['-', '--', ':', '-.'] 16 | 17 | 18 | 19 | def extract(logs, keys): 20 | def _extract(log): 21 | items = [] 22 | for key in keys: 23 | try: 24 | item = log[key] 25 | except: 26 | item = None 27 | items.append(item) 28 | return items 29 | L = list(zip(*[_extract(log) for log in logs])) 30 | return np.array(L, dtype=np.double) 31 | 32 | 33 | if len(sys.argv) < 2: 34 | print("Usage $python3 showreport.py ") 35 | exit(0) 36 | 37 | 38 | logfile_path = sys.argv[1] 39 | with open(logfile_path, "r") as f: 40 | logs = json.load(f) 41 | 42 | L = extract(logs, keys=[ 43 | "iteration", 44 | "main/loss", 45 | "main/loss/conf", 46 | "main/loss/loc" 47 | ]) 48 | 49 | labels = ["confidence loss", "location loss", "overall loss"] 50 | 51 | iteration, loss = L[0], L[1:] 52 | 53 | ax1 = plt.subplot(211) 54 | 55 | for loss_, label in zip(loss, labels): 56 | plt.plot(iteration, loss_, label=label) 57 | plt.legend(prop={'size': 16}) 58 | 59 | keys = [ 60 | 'validation/main/ap/D00', 61 | 'validation/main/ap/D01', 62 | 'validation/main/ap/D10', 63 | 'validation/main/ap/D11', 64 | 'validation/main/ap/D20', 65 | 'validation/main/ap/D40', 66 | 'validation/main/ap/D43', 67 | 'validation/main/ap/D44', 68 | 'validation/main/map' 69 | ] 70 | 71 | L = extract(logs, keys=["iteration"] + keys) 72 | 73 | labels = [basename(key) for key in keys] 74 | 75 | iteration, aps = L[0], L[1:] 76 | 77 | plt.subplot(212, sharex=ax1) 78 | plt.ylim([0, 1]) 79 | for ap, label in zip(aps, labels): 80 | masks = np.logical_not(np.isnan(ap)) 81 | plt.plot(iteration[masks], ap[masks], linestyle=random.choice(linestyles), label=label) 82 | 83 | plt.xlabel("iteration") 84 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.815), numpoints=1) 85 | 86 | plt.show() 87 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from numpy.random import randint 2 | 3 | 4 | roaddamage_label_names = ( 5 | 'D00', 'D01', 'D10', 'D11', 6 | 'D20', 'D40', 'D43', 'D44' 7 | ) 8 | 9 | 10 | def are_overlapping(bbox1, bbox2): 11 | ymin1, xmin1, ymax1, xmax1 = bbox1 12 | ymin2, xmin2, ymax2, xmax2 = bbox2 13 | 14 | vertexes = [ 15 | (xmin1, ymin1), 16 | (xmin1, ymax1), 17 | (xmax1, ymin1), 18 | (xmax1, ymax1) 19 | ] 20 | 21 | for (x, y) in vertexes: 22 | if xmin2 < x < xmax2 and ymin2 < y < ymax2: 23 | return True 24 | return False 25 | 26 | 27 | def generate_background_bbox(image_shape, bbox_shape, existing_bboxes, 28 | n_attempts=10): 29 | """ 30 | Generate a bounding box that does not overlap with any `existing_bboxes`. 31 | The function tries generating a bounding box at most `n_attempts` times. 32 | Raises `RuntimeError` if a bounding box that doesn't overlap with any 33 | existing bounding boxes cannot be generated. 34 | 35 | Args: 36 | image_shape (tuple): The shape of the original image in the format of 37 | (height, width). Bounding boxes are generated to fit within 38 | `image_shape`. 39 | bbox_shape (tuple): The shape of a bounding box to be generated. 40 | existing_bboxes (list of tuples): Existing bounding boxes. The 41 | generated bounding box should not overlap with any 42 | `existing_bboxes`. 43 | n_attempts (int): The number of attempts to generate a bounding box 44 | """ 45 | 46 | def generate_candidate(): 47 | xmin = randint(0, image_shape[0] - bbox_shape[0] + 1) 48 | ymin = randint(0, image_shape[1] - bbox_shape[1] + 1) 49 | xmax = xmin + bbox_shape[0] 50 | ymax = ymin + bbox_shape[1] 51 | return (ymin, xmin, ymax, xmax) 52 | 53 | def at_least_one_overlapping(candidate, bboxes): 54 | """ 55 | Whether there is at least one bbox that overlaps with the candidate 56 | """ 57 | for bbox in bboxes: 58 | if are_overlapping(candidate, bbox): 59 | return True 60 | return False 61 | 62 | for i in range(n_attempts): 63 | candidate = generate_candidate() 64 | if at_least_one_overlapping(candidate, existing_bboxes): 65 | continue 66 | # return if there is no existing bounding box 67 | # that overlaps with the candidate 68 | return candidate 69 | 70 | raise RuntimeError("Background could not be generated") 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RoadDamageDetector in Chainer 2 | ============================= 3 | 4 | [RoadDamageDetector](https://github.com/sekilab/RoadDamageDetector)を参考にした道路損傷検出モデルのChainerによる実装 5 | 6 | 道路のひび割れやセンターラインのかすれなどを検出することができる. 7 | 8 | 9 | 10 | 11 | 12 | データセットは[CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/deed.en)ライセンスのもとで配布されている. 13 | Creative Commons License 14 | 15 |
16 | 17 | 詳細は以下を参照 18 | 19 | * [Qiita](https://qiita.com/IshitaTakeshi/items/915de731d8081e711ae5) 20 | * [GitHub wiki](https://github.com/IshitaTakeshi/RoadDamageDetector/wiki) 21 | 22 | # 環境設定 23 | 24 | 依存パッケージのダウンロードやデータのダウンロード・展開は全て自動で行われる. 25 | GPUを用いる場合はCuPyを[手順](https://docs-cupy.chainer.org/en/stable/install.html)にしたがってインストールすることをおすすめする. 26 | 27 | ``` 28 | git clone https://github.com/IshitaTakeshi/RoadDamageDetector.git 29 | cd RoadDamageDetector 30 | ./setting-environment.sh 31 | ``` 32 | 33 | # 学習 34 | 35 | ## 識別器の学習 36 | 37 | 基本的には以下のコマンドですぐに学習が行えるようになっている. 38 | 39 | ``` 40 | python3 train_detector.py --base-network --gpu 41 | ``` 42 | 43 | GPU IDに負の値を指定すればCPUで実行することもできる. 44 | 45 | その他のオプションは 46 | 47 | ``` 48 | python3 train_detector.py -h 49 | ``` 50 | 51 | で確認できる. 52 | 53 | 54 | ### ResNet-101を用いる場合 55 | ベースネットワークとしてResNet-101を用いる場合は,Caffeの学習済みモデルを自分でダウンロードする必要がある.   56 | 方法は[Chainerのドキュメント](https://docs.chainer.org/en/stable/reference/generated/chainer.links.ResNet101Layers.html)に記述されている. 57 | 58 | ## ベースネットワークの学習 (ResNet-101のみ) 59 | 精度向上の試みとしてベースネットワークそのものを事前に学習することができる.この機能は現在ResNet-101のみに対して有効である. 60 | 61 | ``` 62 | python3 train_extractor.py --gpu 63 | ``` 64 | 65 | 得られたモデル`model-extractor.npz`を`--pretrained-extractor`オプションで指定することで,学習済みのベースネットワークをSSDに組み込むことができる. 66 | 67 | ``` 68 | python3 train_detector.py --base-network resnet101 \ 69 | --gpu \ 70 | --pretrained-extractor model-extractor.npz 71 | ``` 72 | 73 | # 実行 74 | 学習済みモデルを用いる場合はモデルファイル([link](https://drive.google.com/drive/u/0/folders/1T_LwA8sjK_yoE7Z7Hv22Dz20G-GNxn1Z))をダウンロードしておく. 75 | 76 | ``` 77 | python3 demo.py --base-network \ 78 | --gpu \ 79 | --pretrained_model models/ssd300-vgg16-v0.1/model.npz 80 | ``` 81 | 82 | 学習時と同様に,GPU IDに負の値を指定すればCPUで実行することもできる. 83 | 84 | ## データ 85 | データの詳細は[wiki](https://github.com/IshitaTakeshi/RoadDamageDetector/wiki/Road-Damage-Dataset)に書かれている. 86 | RoadDamageDatasetの全ての地区のデータをマージし,学習と評価に用いている. 87 | 学習には全地区のtrainをマージしたものを,評価には全地区のvalデータをマージしたものを用いている. 88 | -------------------------------------------------------------------------------- /ssd_resnet101.py: -------------------------------------------------------------------------------- 1 | import chainer 2 | from chainer.links import Linear, ResNet101Layers 3 | from chainer import functions as F 4 | from chainercv.links.model.ssd.ssd_vgg16 import (_load_npz, _imagenet_mean) 5 | from chainercv.links.model.ssd import Multibox, SSD 6 | 7 | 8 | class ResNet101FineTuning(chainer.Chain): 9 | def __init__(self, n_class, pretrained_model='auto'): 10 | super(ResNet101FineTuning, self).__init__() 11 | 12 | with self.init_scope(): 13 | self.base = ResNet101Layers(pretrained_model) 14 | self.fc6 = Linear(2048, n_class) 15 | 16 | def __call__(self, x): 17 | activations = self.base(x, layers=["pool5"]) 18 | h = activations["pool5"] 19 | return F.softmax(self.fc6(h)) 20 | 21 | 22 | class ResNet101Extractor(ResNet101Layers): 23 | insize = 224 24 | grids = (56, 28, 14, 7) 25 | 26 | def __init__(self, pretrained_model='auto'): 27 | super(ResNet101Extractor, self).__init__(pretrained_model) 28 | 29 | def __call__(self, x): 30 | layers = ["res2", "res3", "res4", "res5"] 31 | activations = super(ResNet101Extractor, self).__call__(x, layers) 32 | return [activations[layer] for layer in layers] 33 | 34 | 35 | class SSD224(SSD): 36 | """Single Shot Multibox Detector with 224x224 inputs. 37 | 38 | This is a model of Single Shot Multibox Detector [#]_. 39 | This model uses :class:`ResNet101Extractor` as its feature extractor. 40 | 41 | .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, 42 | Scott Reed, Cheng-Yang Fu, Alexander C. Berg. 43 | SSD: Single Shot MultiBox Detector. ECCV 2016. 44 | 45 | Args: 46 | n_fg_class (int): The number of classes excluding the background. 47 | pretrained_model (str): The weight file to be loaded. 48 | The default value is :obj:`None`. 49 | * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ 50 | must be specified properly. 51 | * :obj:`None`: Do not load weights. 52 | pretrained_extractor (str): The `npz` weight file of `ResNet101Layers`. 53 | If this argument is specified as `auto`, it automatically loads and 54 | converts the caffemodel. 55 | """ 56 | 57 | def __init__(self, n_fg_class=None, 58 | pretrained_extractor='auto', 59 | pretrained_model=None): 60 | 61 | super(SSD224, self).__init__( 62 | extractor=ResNet101Extractor(pretrained_extractor), 63 | multibox=Multibox( 64 | n_class=n_fg_class + 1, 65 | aspect_ratios=((2, 3), (2, 3), (2, 3), (2, 3))), 66 | steps=(4, 8, 16, 32), 67 | sizes=(15, 30, 60, 120, 244), 68 | mean=_imagenet_mean) 69 | 70 | if pretrained_model: 71 | _load_npz(pretrained_model, self) 72 | -------------------------------------------------------------------------------- /train_extractor.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import chainer 4 | from chainer import serializers 5 | from chainer import training 6 | from chainer.training import extensions 7 | from chainer import links as L 8 | from ssd_resnet101 import ResNet101FineTuning 9 | from road_damage_dataset import (roaddamage_label_names, 10 | RoadDamageClassificationDataset) 11 | 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--batchsize', type=int, default=32, 16 | help='Learning minibatch size') 17 | parser.add_argument('--val-batchsize', '-b', type=int, default=250, 18 | help='Validation minibatch size') 19 | parser.add_argument('--epoch', type=int, default=10, 20 | help='Number of epochs to train') 21 | parser.add_argument('--gpu', type=int, default=-1, 22 | help='GPU ID (negative value indicates CPU') 23 | parser.add_argument('--loaderjob', type=int, 24 | help='Number of parallel data loading processes') 25 | parser.add_argument('--resume', default='', 26 | help='Initialize the trainer from given file') 27 | parser.add_argument('--out', default='result-classification') 28 | parser.add_argument('--test', action='store_true') 29 | parser.set_defaults(test=False) 30 | 31 | args = parser.parse_args() 32 | 33 | resnet_fine_tuning = ResNet101FineTuning( 34 | n_class=len(roaddamage_label_names) + 1 35 | ) 36 | 37 | model = L.Classifier(resnet_fine_tuning) 38 | 39 | if args.gpu >= 0: 40 | chainer.cuda.get_device_from_id(args.gpu).use() # Make the GPU current 41 | model.to_gpu() 42 | 43 | # Load the datasets and mean file 44 | train = RoadDamageClassificationDataset( 45 | "RoadDamageDataset/All", split='train') 46 | val = RoadDamageClassificationDataset( 47 | "RoadDamageDataset/All", split='val') 48 | # These iterators load the images with subprocesses running in parallel to 49 | # the training/validation. 50 | train_iter = chainer.iterators.MultiprocessIterator( 51 | train, args.batchsize, n_processes=args.loaderjob) 52 | val_iter = chainer.iterators.MultiprocessIterator( 53 | val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) 54 | 55 | # Set up an optimizer 56 | optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) 57 | optimizer.setup(model) 58 | 59 | # Set up a trainer 60 | updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) 61 | trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) 62 | 63 | val_interval = (10 if args.test else 1000), 'iteration' 64 | log_interval = (10 if args.test else 1000), 'iteration' 65 | 66 | trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), 67 | trigger=val_interval) 68 | trainer.extend(extensions.dump_graph('main/loss')) 69 | trainer.extend(extensions.snapshot(), trigger=val_interval) 70 | trainer.extend(extensions.snapshot_object( 71 | model, 'model_iter_{.updater.iteration}'), trigger=val_interval) 72 | # Be careful to pass the interval directly to LogReport 73 | # (it determines when to emit log rather than when to read observations) 74 | trainer.extend(extensions.LogReport(trigger=log_interval)) 75 | trainer.extend(extensions.observe_lr(), trigger=log_interval) 76 | trainer.extend(extensions.PrintReport([ 77 | 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 78 | 'main/accuracy', 'validation/main/accuracy', 'lr' 79 | ]), trigger=log_interval) 80 | trainer.extend(extensions.ProgressBar(update_interval=10)) 81 | 82 | if args.resume: 83 | chainer.serializers.load_npz(args.resume, trainer) 84 | 85 | trainer.run() 86 | 87 | model.to_cpu() 88 | serializers.save_npz( 89 | "model-extractor.npz", 90 | resnet_fine_tuning.base) 91 | 92 | -------------------------------------------------------------------------------- /road_damage_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import xml.etree.ElementTree as ET 4 | 5 | import chainer 6 | from chainercv.utils import read_image 7 | from chainercv.transforms import random_flip 8 | from chainercv.links.model.ssd import random_distort 9 | 10 | from chainer.links.model.vision import resnet 11 | from utils import roaddamage_label_names, generate_background_bbox 12 | 13 | 14 | class RoadDamageDataset(chainer.dataset.DatasetMixin): 15 | 16 | """Bounding box dataset for RoadDamageDataset. 17 | 18 | The index corresponds to each image. 19 | 20 | When queried by an index, if :obj:`return_difficult == False`, 21 | this dataset returns a corresponding 22 | :obj:`img, bbox, label`, a tuple of an image, bounding boxes and labels. 23 | This is the default behaviour. 24 | If :obj:`return_difficult == True`, this dataset returns corresponding 25 | :obj:`img, bbox, label, difficult`. :obj:`difficult` is a boolean array 26 | that indicates whether bounding boxes are labeled as difficult or not. 27 | 28 | The bounding boxes are packed into a two dimensional tensor of shape 29 | :math:`(R, 4)`, where :math:`R` is the number of bounding boxes in 30 | the image. The second axis represents attributes of the bounding box. 31 | They are :math:`(y_{min}, x_{min}, y_{max}, x_{max})`, where the 32 | four attributes are coordinates of the top left and the bottom right 33 | vertices. 34 | 35 | The labels are packed into a one dimensional tensor of shape :math:`(R,)`. 36 | :math:`R` is the number of bounding boxes in the image. 37 | The class name of the label :math:`l` is :math:`l` th element of 38 | :obj:`roadddamage_label_names`. 39 | 40 | The array :obj:`difficult` is a one dimensional boolean array of shape 41 | :math:`(R,)`. :math:`R` is the number of bounding boxes in the image. 42 | If :obj:`use_difficult` is :obj:`False`, this array is 43 | a boolean array with all :obj:`False`. 44 | 45 | The type of the image, the bounding boxes and the labels are as follows. 46 | 47 | * :obj:`img.dtype == numpy.float32` 48 | * :obj:`bbox.dtype == numpy.float32` 49 | * :obj:`label.dtype == numpy.int32` 50 | * :obj:`difficult.dtype == numpy.bool` 51 | 52 | Args: 53 | data_dir (string): Path to the root of the training data. If this is 54 | :obj:`auto`, this class will automatically download data for you 55 | under :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/voc`. 56 | split ({'train', 'val', 'trainval', 'test'}): Select a split of the 57 | dataset. :obj:`test` split is only available for 58 | 2007 dataset. 59 | """ 60 | 61 | def __init__(self, data_dir, split='train'): 62 | 63 | if split not in ['train', 'trainval', 'val']: 64 | raise ValueError( 65 | "split must be either of 'train', 'traival', or 'val'" 66 | ) 67 | 68 | id_list_file = os.path.join( 69 | data_dir, 'ImageSets/Main/{0}.txt'.format(split)) 70 | 71 | self.ids = [id_.strip() for id_ in open(id_list_file)] 72 | 73 | self.data_dir = data_dir 74 | 75 | def __len__(self): 76 | return len(self.ids) 77 | 78 | def get_example(self, i): 79 | """Returns the i-th example. 80 | 81 | Returns a color image and bounding boxes. The image is in CHW format. 82 | The returned image is RGB. 83 | 84 | Args: 85 | i (int): The index of the example. 86 | 87 | Returns: 88 | tuple of an image and bounding boxes 89 | 90 | """ 91 | id_ = self.ids[i] 92 | anno = ET.parse( 93 | os.path.join(self.data_dir, 'Annotations', id_ + '.xml')) 94 | bbox = [] 95 | label = [] 96 | for obj in anno.findall('object'): 97 | bndbox_anno = obj.find('bndbox') 98 | 99 | # Ignore if the label is not listed 100 | name = obj.find('name').text.strip() 101 | 102 | if name not in roaddamage_label_names: 103 | continue 104 | 105 | label.append(roaddamage_label_names.index(name)) 106 | 107 | # subtract 1 to make pixel indexes 0-based 108 | bbox.append([ 109 | int(bndbox_anno.find(tag).text) - 1 110 | for tag in ('ymin', 'xmin', 'ymax', 'xmax')]) 111 | 112 | bbox = np.array(bbox).astype(np.int32) 113 | label = np.array(label).astype(np.int32) 114 | 115 | # Load an image 116 | img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg') 117 | img = read_image(img_file, color=True) 118 | return img, bbox, label 119 | 120 | 121 | class RoadDamageClassificationDataset(RoadDamageDataset): 122 | 123 | def __init__(self, data_dir, split, background_probability=None): 124 | """ 125 | Generates images for road damage classification. 126 | This dataset returns :obj:`image, label`, a tuple of an image and its 127 | label. The image is basically of a damage part, but in a certain 128 | probability which can be specified by `background_probability`, 129 | a random background image is returned. 130 | 131 | Args: 132 | background_probability (float64): Probability to generate 133 | a background image. 134 | The default value is 1 / (number of damage categories + 1). 135 | """ 136 | super(RoadDamageClassificationDataset, self).__init__( 137 | data_dir, split) 138 | 139 | self.background_probability = background_probability 140 | if background_probability is None: 141 | self.background_probability = 1 / (len(roaddamage_label_names) + 1) 142 | 143 | def _generate_damage(self, image, bboxes, labels): 144 | index = np.random.randint(len(labels)) 145 | 146 | label = labels[index] 147 | ymin, xmin, ymax, xmax = bboxes[index] 148 | damage = image[:, ymin:ymax, xmin:xmax] 149 | 150 | background = np.zeros(image.shape) 151 | background[:, ymin:ymax, xmin:xmax] = damage 152 | 153 | image = resnet.prepare(background) 154 | return image, label 155 | 156 | def _generate_background(self, image, bboxes): 157 | _, H, W = image.shape 158 | bbox = generate_background_bbox((H, W), (224, 224), bboxes) 159 | ymin, xmin, ymax, xmax = bbox 160 | image = resnet.prepare(image[:, ymin:ymax, xmin:xmax]) 161 | label = len(roaddamage_label_names) + 1 162 | return image, label 163 | 164 | def _data_augumentation(self, image): 165 | image = random_distort(image) 166 | image = random_flip(image, x_random=True) 167 | return image 168 | 169 | def get_example(self, i): 170 | 171 | image, bboxes, labels =\ 172 | super(RoadDamageClassificationDataset, self).get_example(i) 173 | 174 | if len(labels) == 0 or np.random.rand() < self.background_probability: 175 | # generate_background 176 | try: 177 | image, label = self._generate_background(image, bboxes) 178 | image = self._data_augumentation(image) 179 | return image, label 180 | except RuntimeError: 181 | # return damage if failed to generate background 182 | image, label = self._generate_damage(image, bboxes, labels) 183 | image = self._data_augumentation(image) 184 | return image, label 185 | 186 | image, label = self._generate_damage(image, bboxes, labels) 187 | image = self._data_augumentation(image) 188 | return image, label 189 | -------------------------------------------------------------------------------- /train_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import copy 4 | import warnings 5 | 6 | import numpy as np 7 | 8 | import chainer 9 | from chainer.datasets import TransformDataset 10 | from chainer.optimizer import WeightDecay 11 | from chainer import serializers 12 | from chainer import training 13 | from chainer.training import extensions 14 | from chainer.training import triggers 15 | from chainer.links.model.vision import resnet 16 | 17 | import chainercv 18 | from chainercv.extensions import DetectionVOCEvaluator 19 | from chainercv.links.model.ssd import GradientScaling 20 | from chainercv.links.model.ssd import multibox_loss 21 | from chainercv import transforms 22 | 23 | from chainercv.links.model.ssd import random_crop_with_bbox_constraints 24 | from chainercv.links.model.ssd import random_distort 25 | from chainercv.links.model.ssd import resize_with_random_interpolation 26 | 27 | import ssd_resnet101 28 | from road_damage_dataset import RoadDamageDataset, roaddamage_label_names 29 | 30 | 31 | class MultiboxTrainChain(chainer.Chain): 32 | 33 | def __init__(self, model, alpha=1, k=3): 34 | super(MultiboxTrainChain, self).__init__() 35 | with self.init_scope(): 36 | self.model = model 37 | self.alpha = alpha 38 | self.k = k 39 | 40 | def __call__(self, imgs, gt_mb_locs, gt_mb_labels): 41 | mb_locs, mb_confs = self.model(imgs) 42 | loc_loss, conf_loss = multibox_loss( 43 | mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, self.k) 44 | loss = loc_loss * self.alpha + conf_loss 45 | 46 | chainer.reporter.report( 47 | {'loss': loss, 'loss/loc': loc_loss, 'loss/conf': conf_loss}, 48 | self) 49 | 50 | return loss 51 | 52 | 53 | class MeanSubtraction(object): 54 | def __init__(self, mean): 55 | self.mean = mean.astype(np.float32) 56 | 57 | def __call__(self, in_data): 58 | img = in_data[0] 59 | img = img - self.mean 60 | return (img, *in_data[1:]) 61 | 62 | 63 | class ResNetPreparation(object): 64 | def __init__(self, size): 65 | self.size = size 66 | 67 | def __call__(self, in_data): 68 | img = in_data[0] 69 | img = resnet.prepare(img, (self.size, self.size)) 70 | return (img, *in_data[1:]) 71 | 72 | 73 | class Transform(object): 74 | 75 | def __init__(self, coder, size, mean): 76 | # to send cpu, make a copy 77 | self.coder = copy.copy(coder) 78 | self.coder.to_cpu() 79 | 80 | self.size = size 81 | self.mean = mean 82 | 83 | def __call__(self, in_data): 84 | # There are five data augmentation steps 85 | # 1. Color augmentation 86 | # 2. Random expansion 87 | # 3. Random cropping 88 | # 4. Resizing with random interpolation 89 | # 5. Random horizontal flipping 90 | 91 | img, bbox, label = in_data 92 | 93 | bbox = np.array(bbox).astype(np.float32) 94 | 95 | if len(bbox) == 0: 96 | warnings.warn("No bounding box detected", RuntimeWarning) 97 | img = resize_with_random_interpolation(img, (self.size, self.size)) 98 | mb_loc, mb_label = self.coder.encode(bbox, label) 99 | return img, mb_loc, mb_label 100 | 101 | # 1. Color augmentation 102 | img = random_distort(img) 103 | 104 | # 2. Random expansion 105 | if np.random.randint(2): 106 | img, param = transforms.random_expand( 107 | img, fill=self.mean, return_param=True) 108 | bbox = transforms.translate_bbox( 109 | bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) 110 | 111 | # 3. Random cropping 112 | img, param = random_crop_with_bbox_constraints( 113 | img, bbox, return_param=True) 114 | bbox, param = transforms.crop_bbox( 115 | bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], 116 | allow_outside_center=False, return_param=True) 117 | label = label[param['index']] 118 | 119 | # 4. Resizing with random interpolatation 120 | _, H, W = img.shape 121 | img = resize_with_random_interpolation(img, (self.size, self.size)) 122 | bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) 123 | 124 | # 5. Random horizontal flipping 125 | img, params = transforms.random_flip( 126 | img, x_random=True, return_param=True) 127 | bbox = transforms.flip_bbox( 128 | bbox, (self.size, self.size), x_flip=params['x_flip']) 129 | 130 | mb_loc, mb_label = self.coder.encode(bbox, label) 131 | return img, mb_loc, mb_label 132 | 133 | 134 | def main(): 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('--data-dir', type=str, 137 | default=os.path.join("RoadDamageDataset", "All")) 138 | parser.add_argument('--batchsize', type=int, default=32, 139 | help='Learning minibatch size') 140 | parser.add_argument('--gpu', type=int, default=-1, 141 | help='GPU ID (negative value indicates CPU') 142 | parser.add_argument('--base-network', choices=('vgg16', 'resnet101'), 143 | default='vgg16', help='Base network') 144 | parser.add_argument('--pretrained-model', default=None, 145 | help='Pretrained SSD model') 146 | parser.add_argument('--pretrained-extractor', default='auto', 147 | help='Pretrained CNN model to extract feature maps') 148 | parser.add_argument('--out', default='result-detection', 149 | help='Directory to output the result') 150 | parser.add_argument('--resume', default=None, 151 | help='Initialize the trainer from given file') 152 | 153 | args = parser.parse_args() 154 | 155 | print("Data directory : {}".format(args.data_dir)) 156 | print("Batchsize : {}".format(args.batchsize)) 157 | print("GPU ID : {}".format(args.gpu)) 158 | print("Base network : {}".format(args.base_network)) 159 | print("Pretrained extractor : {}".format(args.pretrained_extractor)) 160 | print("Pretrained model : {}".format(args.pretrained_model)) 161 | print("Output directory : {}".format(args.out)) 162 | print("Resume from : {}".format(args.resume)) 163 | 164 | 165 | if args.base_network == 'vgg16': 166 | # pretrained_extractor is currently not available for this class 167 | model = chainercv.links.SSD300( 168 | n_fg_class=len(roaddamage_label_names), 169 | pretrained_model=args.pretrained_model) 170 | preprocessing = MeanSubtraction(model.mean) 171 | elif args.base_network == 'resnet101': 172 | model = ssd_resnet101.SSD224( 173 | n_fg_class=len(roaddamage_label_names), 174 | pretrained_extractor=args.pretrained_extractor, 175 | pretrained_model=args.pretrained_model) 176 | preprocessing = ResNetPreparation(model.insize) 177 | else: 178 | raise ValueError('Invalid base network') 179 | 180 | model.use_preset('evaluate') 181 | train_chain = MultiboxTrainChain(model) 182 | if args.gpu >= 0: 183 | chainer.cuda.get_device_from_id(args.gpu).use() 184 | model.to_gpu() 185 | 186 | train = TransformDataset( 187 | RoadDamageDataset(args.data_dir, split='train'), 188 | Transform(model.coder, model.insize, model.mean) 189 | ) 190 | 191 | train = TransformDataset(train, preprocessing) 192 | 193 | train_iter = chainer.iterators.SerialIterator(train, args.batchsize) 194 | 195 | test = RoadDamageDataset(args.data_dir, split='val') 196 | test_iter = chainer.iterators.SerialIterator( 197 | test, args.batchsize, repeat=False, shuffle=False) 198 | 199 | # initial lr is set to 3e-4 by ExponentialShift 200 | optimizer = chainer.optimizers.MomentumSGD() 201 | optimizer.setup(train_chain) 202 | for param in train_chain.params(): 203 | if param.name == 'b': 204 | param.update_rule.add_hook(GradientScaling(2)) 205 | else: 206 | param.update_rule.add_hook(WeightDecay(0.0005)) 207 | 208 | updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) 209 | trainer = training.Trainer(updater, (120000, 'iteration'), args.out) 210 | trainer.extend( 211 | extensions.ExponentialShift('lr', 0.1, init=3e-4), 212 | trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) 213 | 214 | trainer.extend( 215 | DetectionVOCEvaluator( 216 | test_iter, model, use_07_metric=True, 217 | label_names=roaddamage_label_names), 218 | trigger=(4000, 'iteration')) 219 | 220 | log_interval = 10, 'iteration' 221 | trainer.extend(extensions.LogReport(trigger=log_interval)) 222 | trainer.extend(extensions.observe_lr(), trigger=log_interval) 223 | trainer.extend(extensions.PrintReport( 224 | ['epoch', 'iteration', 'lr', 225 | 'main/loss', 'main/loss/loc', 'main/loss/conf', 226 | 'validation/main/map']), 227 | trigger=log_interval) 228 | 229 | # trainer.extend(extensions.ProgressBar()) 230 | 231 | trainer.extend(extensions.snapshot(), trigger=(4000, 'iteration')) 232 | trainer.extend( 233 | extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), 234 | trigger=(4000, 'iteration')) 235 | 236 | if args.resume: 237 | serializers.load_npz(args.resume, trainer) 238 | 239 | print("setup finished") 240 | trainer.run() 241 | 242 | model.to_cpu() 243 | serializers.save_npz("model-detector.npz", model) 244 | 245 | 246 | if __name__ == '__main__': 247 | main() 248 | --------------------------------------------------------------------------------