├── .dockerignore ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── configs └── cascade_rcnn_dconv_c3-c5_r50_fpn.py ├── detection ├── __init__.py ├── convert.py ├── eval.py ├── prepare_solution.py ├── prepare_test.py ├── prune.py ├── test.py ├── train.py └── utils.py ├── figures ├── solution.pdf └── title.png ├── flags.txt ├── mmdetection ├── LICENSE ├── compile.sh ├── mmdet │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ ├── env.py │ │ ├── inference.py │ │ └── train.py │ ├── core │ │ ├── __init__.py │ │ ├── anchor │ │ │ ├── __init__.py │ │ │ ├── anchor_generator.py │ │ │ └── anchor_target.py │ │ ├── bbox │ │ │ ├── __init__.py │ │ │ ├── assign_sampling.py │ │ │ ├── assigners │ │ │ │ ├── __init__.py │ │ │ │ ├── assign_result.py │ │ │ │ ├── base_assigner.py │ │ │ │ └── max_iou_assigner.py │ │ │ ├── bbox_target.py │ │ │ ├── geometry.py │ │ │ ├── samplers │ │ │ │ ├── __init__.py │ │ │ │ ├── base_sampler.py │ │ │ │ ├── combined_sampler.py │ │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ │ ├── ohem_sampler.py │ │ │ │ ├── pseudo_sampler.py │ │ │ │ ├── random_sampler.py │ │ │ │ └── sampling_result.py │ │ │ └── transforms.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── bbox_overlaps.py │ │ │ ├── class_names.py │ │ │ ├── coco_utils.py │ │ │ ├── eval_hooks.py │ │ │ ├── mean_ap.py │ │ │ └── recall.py │ │ ├── loss │ │ │ ├── __init__.py │ │ │ └── losses.py │ │ ├── mask │ │ │ ├── __init__.py │ │ │ ├── mask_target.py │ │ │ └── utils.py │ │ ├── post_processing │ │ │ ├── __init__.py │ │ │ ├── bbox_nms.py │ │ │ └── merge_augs.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── dist_utils.py │ │ │ └── misc.py │ ├── datasets │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── custom.py │ │ ├── extra_aug.py │ │ ├── loader │ │ │ ├── __init__.py │ │ │ ├── build_loader.py │ │ │ └── sampler.py │ │ ├── repeat_dataset.py │ │ ├── transforms.py │ │ ├── utils.py │ │ ├── voc.py │ │ └── xml_style.py │ ├── models │ │ ├── __init__.py │ │ ├── anchor_heads │ │ │ ├── __init__.py │ │ │ ├── anchor_head.py │ │ │ ├── retina_head.py │ │ │ ├── rpn_head.py │ │ │ └── ssd_head.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── resnet.py │ │ │ ├── resnext.py │ │ │ └── ssd_vgg.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ └── convfc_bbox_head.py │ │ ├── builder.py │ │ ├── detectors │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── cascade_rcnn.py │ │ │ ├── fast_rcnn.py │ │ │ ├── faster_rcnn.py │ │ │ ├── mask_rcnn.py │ │ │ ├── retinanet.py │ │ │ ├── rpn.py │ │ │ ├── single_stage.py │ │ │ ├── test_mixins.py │ │ │ └── two_stage.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ └── fcn_mask_head.py │ │ ├── necks │ │ │ ├── __init__.py │ │ │ └── fpn.py │ │ ├── registry.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ └── single_level.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── conv_module.py │ │ │ ├── norm.py │ │ │ └── weight_init.py │ └── ops │ │ ├── __init__.py │ │ ├── dcn │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ └── deform_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ └── deform_pool.py │ │ ├── setup.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ │ ├── nms │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── cpu_nms.pyx │ │ ├── cpu_soft_nms.pyx │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ └── setup.py │ │ ├── roi_align │ │ ├── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── gradcheck.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── setup.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ │ └── roi_pool │ │ ├── __init__.py │ │ ├── functions │ │ ├── __init__.py │ │ └── roi_pool.py │ │ ├── gradcheck.py │ │ ├── modules │ │ ├── __init__.py │ │ └── roi_pool.py │ │ ├── setup.py │ │ └── src │ │ ├── roi_pool_cuda.cpp │ │ └── roi_pool_kernel.cu └── setup.py ├── reid ├── __init__.py ├── annotation_converter.py ├── fit_nmslib.py ├── insightface │ ├── __init__.py │ ├── model.py │ ├── mtcnn.py │ └── utils.py ├── knn.py ├── predict_embeddings.py ├── predict_nmslib.py └── prepare_solution.py ├── test.sh ├── test_detector.sh ├── test_knn.sh ├── train.sh ├── train_detector.sh └── train_knn.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | # custom: 2 | 3 | .git/* 4 | ipynb/* 5 | .idea/* 6 | 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv 3 | data/* 4 | weights/* 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-cudnn7-devel 2 | 3 | RUN apt-get update -y && apt-get install -y \ 4 | git \ 5 | wget \ 6 | curl \ 7 | cmake \ 8 | unzip \ 9 | yasm \ 10 | pkg-config \ 11 | libblas-dev \ 12 | liblapack-dev \ 13 | libatlas-base-dev \ 14 | gfortran \ 15 | build-essential \ 16 | libsm6 \ 17 | libxext6 \ 18 | libfontconfig1 \ 19 | libxrender1 \ 20 | libswscale-dev \ 21 | libtbb2 \ 22 | libtbb-dev \ 23 | libjpeg-dev \ 24 | libpng-dev \ 25 | libtiff-dev \ 26 | libjasper-dev \ 27 | libavformat-dev \ 28 | libpq-dev \ 29 | libboost-dev \ 30 | libboost-system-dev \ 31 | libboost-filesystem-dev \ 32 | software-properties-common \ 33 | libturbojpeg 34 | 35 | RUN add-apt-repository -y ppa:jonathonf/python-3.6 \ 36 | && apt-get update -y \ 37 | && apt-get install -y python3.6 python3.6-dev \ 38 | && ln -sfn /usr/bin/python3.6 /usr/local/bin/python \ 39 | && ln -sfn /usr/bin/python3.6 /usr/bin/python3 \ 40 | && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \ 41 | && python get-pip.py \ 42 | && rm get-pip.py \ 43 | && apt-get clean \ 44 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 45 | 46 | RUN pip install --upgrade --no-cache-dir pip && pip install --no-cache-dir \ 47 | cycler==0.10.0 \ 48 | dill==0.2.8.2 \ 49 | h5py==2.7.1 \ 50 | imgaug==0.2.5 \ 51 | matplotlib==2.2.2 \ 52 | numpy==1.14.3 \ 53 | opencv-contrib-python==3.4.2.17 \ 54 | pandas==0.23.0 \ 55 | Pillow==5.1.0 \ 56 | scikit-image==0.13.1 \ 57 | scikit-learn==0.19.1 \ 58 | scipy==1.1.0 \ 59 | setuptools==39.1.0 \ 60 | six==1.11.0 \ 61 | tqdm==4.23.4 \ 62 | ipython==7.3.0 \ 63 | ipdb==0.12 \ 64 | ninja==1.9.0 \ 65 | yacs==0.1.6 \ 66 | albumentations==0.2.2 \ 67 | click==7.0 \ 68 | pytest-runner==4.4 \ 69 | jpeg4py==0.1.4 \ 70 | cython==0.29.6 \ 71 | nmslib==1.7.3.6 \ 72 | mxnet==1.3.1 73 | 74 | RUN pip install --upgrade --no-cache-dir cython && pip install --no-cache-dir pycocotools==2.0.0 mmcv==0.2.5 75 | RUN pip install --no-cache-dir torch==1.0.0 torchvision==0.2.2 76 | 77 | COPY . /code 78 | 79 | WORKDIR /code/weights 80 | RUN wget https://www.dropbox.com/s/09xiyd4nukpsexz/my_best_checkpoint.pth 81 | RUN wget https://www.dropbox.com/s/tj96fsm6t6rq8ye/model-r100-arcface-ms1m-refine-v2.zip && \ 82 | unzip model-r100-arcface-ms1m-refine-v2.zip 83 | RUN wget https://www.dropbox.com/s/63t3lakuygyfqsl/mtcnn-model.zip && \ 84 | unzip mtcnn-model.zip 85 | RUN wget https://www.dropbox.com/s/rqn0v6kk9v3bst2/knn.bin 86 | ADD https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/dcn/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth . 87 | 88 | WORKDIR /code/mmdetection 89 | RUN bash compile.sh && \ 90 | python setup.py develop 91 | 92 | WORKDIR /code 93 | ENV TOPCODER_ROOT /code 94 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Miras Amir 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | APP_NAME=amirassov/topcoder-facial-detection 2 | CONTAINER_NAME=code 3 | 4 | # HELP 5 | .PHONY: help 6 | 7 | help: ## This help. 8 | @awk 'BEGIN (FS = ":.*?## ") /^[a-zA-Z_-]+:.*?## / (printf "\033[36m%-30s\033[0m %s\n", $$1, $$2)' $(MAKEFILE_LIST) 9 | 10 | build: ## Build the container 11 | nvidia-docker build -t $(APP_NAME) . 12 | 13 | run: ## Run container in dgx 14 | nvidia-docker run \ 15 | -it \ 16 | --ipc=host \ 17 | --name=$(CONTAINER_NAME) \ 18 | -v $(shell pwd):/code \ 19 | -v /raid/data_share/topcoder/data/:/data \ 20 | -v /raid/data_share/amirassov/detection/code:/wdata $(APP_NAME) 21 | 22 | run-omen: ## Run container in omen 23 | nvidia-docker run \ 24 | -it \ 25 | --ipc=host \ 26 | -v $(shell pwd):/topcoder-facial-detection \ 27 | -v /home/videoanalytics/data/topcoder:/data \ 28 | -v /home/videoanalytics/data/dumps:/wdata $(APP_NAME) 29 | --name=$(CONTAINER_NAME) $(APP_NAME) 30 | 31 | exec: ## Run a bash in a running container 32 | nvidia-docker exec -it $(CONTAINER_NAME) bash 33 | 34 | stop: ## Stop and remove a running container 35 | docker stop $(CONTAINER_NAME); docker rm $(CONTAINER_NAME) 36 | 37 | download: ## Download pretrained weights 38 | wget -O /wdata/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth $(WEIGHTS) --no-check-certificate 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Topcoder: Neptune - Facial Detection and Face Re-identification Marathon Match 2 | 3 | ## Problems 4 | * [Facial Detection](https://topcoder.com/challenges/30086997) 5 | * [Face Re-identification](https://topcoder.com/challenges/30086998) 6 | ![ensemble](figures/title.png) 7 | 8 | ## Solution 9 | [[presentation]](figures/solution.pdf) [[video]](https://youtu.be/pdIf4qy3ypE) 10 | 11 | ## Build 12 | ```bash 13 | make build 14 | make run 15 | ``` 16 | 17 | ## How to run? 18 | ```bash 19 | bash train.sh /data/training/ 20 | bash test.sh /data/test/ solution.csv 21 | ``` 22 | 23 | ## References 24 | * https://github.com/open-mmlab/mmdetection -------------------------------------------------------------------------------- /detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/detection/__init__.py -------------------------------------------------------------------------------- /detection/convert.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | from functools import partial 5 | from multiprocessing import Pool 6 | 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from detection.utils import prepare_bboxes_labels 11 | import jpeg4py as jpeg 12 | import cv2 13 | 14 | HEIGHT = 2048 15 | WIDTH = 3072 16 | 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--annotation', type=str) 21 | parser.add_argument('--root', type=str) 22 | parser.add_argument('--output', type=str) 23 | parser.add_argument('--output_root', type=str, default=None) 24 | parser.add_argument('--n_jobs', type=int, default=40) 25 | parser.add_argument('--n_samples', type=int, default=-1) 26 | return parser.parse_args() 27 | 28 | 29 | def convert(group: dict, root: str, output_root=None) -> dict: 30 | image_name, group = group 31 | image = jpeg.JPEG(os.path.join(root, image_name)).decode() 32 | h, w = image.shape[:2] 33 | if 'FACE_X' in group: 34 | bboxes, labels = prepare_bboxes_labels(group) 35 | else: 36 | bboxes, labels = None, None 37 | if output_root is not None: 38 | cv2.imwrite(os.path.join(output_root, image_name), cv2.resize(image, (WIDTH, HEIGHT))[:, :, ::-1]) 39 | bboxes[:, [0, 2]] *= WIDTH / w 40 | bboxes[:, [1, 3]] *= HEIGHT / h 41 | 42 | return {'filename': image_name, 'width': WIDTH, 'height': HEIGHT, 'ann': {'bboxes': bboxes, 'labels': labels}} 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | annotation = pd.read_csv(args.annotation) 48 | files = sorted(os.listdir(args.root)) 49 | if args.n_samples != -1: 50 | files = files[:args.n_samples] 51 | annotation = annotation.loc[annotation['FILE'].isin(set(files))] 52 | print(len(annotation), len(set(annotation['FILE']))) 53 | partial_convert = partial(convert, root=args.root, output_root=args.output_root) 54 | groups = list(annotation.groupby('FILE')) 55 | 56 | with Pool(args.n_jobs) as p: 57 | samples = list(tqdm(iterable=p.imap_unordered(partial_convert, groups), total=len(groups))) 58 | 59 | with open(args.output, 'wb') as f: 60 | pickle.dump(samples, f) 61 | 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /detection/eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def evaluate(result_file, dataset, iou_thresholds): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate( 21 | [np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)] 23 | ) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = None 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | 36 | mean_aps = [] 37 | for iou_thr in iou_thresholds: 38 | mean_ap, _ = eval_map( 39 | det_results, 40 | gt_bboxes, 41 | gt_labels, 42 | gt_ignore=gt_ignore, 43 | scale_ranges=None, 44 | iou_thr=iou_thr, 45 | dataset=dataset_name, 46 | print_summary=False 47 | ) 48 | mean_aps.append(mean_ap) 49 | print(f'MAP: {np.mean(mean_aps)}') 50 | 51 | 52 | def main(): 53 | parser = ArgumentParser(description='Evaluation') 54 | parser.add_argument('result', help='result file path') 55 | parser.add_argument('config', help='config file path') 56 | parser.add_argument('--ann_file', default=None, type=str) 57 | parser.add_argument('--img_prefix', default=None, type=str) 58 | args = parser.parse_args() 59 | cfg = mmcv.Config.fromfile(args.config) 60 | if args.ann_file is not None: 61 | cfg.data.test.ann_file = args.ann_file 62 | if args.img_prefix is not None: 63 | cfg.data.test.img_prefix = args.img_prefix 64 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 65 | evaluate(args.result, test_dataset, np.arange(0.5, 1.0, 0.05)) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /detection/prepare_solution.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing import Pool 3 | 4 | import mmcv 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | from detection.utils import test_submission 9 | 10 | COLUMNS = ['ImageId', 'FACE_X', 'FACE_Y', 'W', 'H', 'Confidence'] 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--predictions', type=str) 16 | parser.add_argument('--annotation', type=str) 17 | parser.add_argument('--output', type=str) 18 | parser.add_argument('--n_jobs', type=int, default=80) 19 | parser.add_argument('--identification', action='store_true') 20 | return parser.parse_args() 21 | 22 | 23 | def convert(prediction_annotation: tuple) -> pd.DataFrame: 24 | prediction, annotation = prediction_annotation 25 | prediction = prediction[0] 26 | solution = pd.DataFrame() 27 | solution['FACE_X'] = prediction[:, 0] 28 | solution['FACE_Y'] = prediction[:, 1] 29 | solution['W'] = prediction[:, 2] - solution['FACE_X'] 30 | solution['H'] = prediction[:, 3] - solution['FACE_Y'] 31 | solution['ImageId'] = annotation['filename'] 32 | solution['Confidence'] = prediction[:, 4] 33 | return solution[COLUMNS] 34 | 35 | 36 | def main(): 37 | args = parse_args() 38 | predictions = mmcv.load(args.predictions) 39 | annotation = mmcv.load(args.annotation) 40 | print(len(predictions)) 41 | with Pool(args.n_jobs) as p: 42 | samples = list(tqdm(iterable=p.imap(convert, zip(predictions, annotation)), total=len(predictions))) 43 | 44 | submission = pd.concat(samples) 45 | test_submission(submission) 46 | if args.identification: 47 | submission['SubjectId'] = range(len(submission)) 48 | submission.to_csv(args.output, index=False) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /detection/prepare_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import pandas as pd 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--root', type=str) 10 | parser.add_argument('--output', type=str) 11 | return parser.parse_args() 12 | 13 | 14 | def main(): 15 | args = parse_args() 16 | output = pd.DataFrame() 17 | output['FILE'] = sorted(os.listdir(args.root)) 18 | output.to_csv(args.output, index=False) 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /detection/prune.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--weights', type=str) 9 | parser.add_argument('--output', type=str) 10 | return parser.parse_args() 11 | 12 | 13 | def main(): 14 | args = parse_args() 15 | weights = torch.load(args.weights) 16 | weights['state_dict'] = {k: v for k, v in weights['state_dict'].items() if not k.startswith('bbox_head')} 17 | torch.save(weights, args.output) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /detection/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mmcv 4 | import torch 5 | from mmcv.parallel import scatter, collate, MMDataParallel 6 | from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict 7 | from mmdet import datasets 8 | from mmdet.datasets import build_dataloader 9 | from mmdet.models import build_detector, detectors 10 | 11 | 12 | def single_test(model, data_loader, show=False): 13 | model.eval() 14 | results = [] 15 | dataset = data_loader.dataset 16 | prog_bar = mmcv.ProgressBar(len(dataset)) 17 | for i, data in enumerate(data_loader): 18 | with torch.no_grad(): 19 | result = model(return_loss=False, rescale=not show, **data) 20 | results.append(result) 21 | 22 | if show: 23 | model.module.show_result(data, result, dataset.img_norm_cfg, dataset=dataset.CLASSES) 24 | 25 | batch_size = data['img'][0].size(0) 26 | for _ in range(batch_size): 27 | prog_bar.update() 28 | return results 29 | 30 | 31 | def _data_func(data, device_id): 32 | data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] 33 | return dict(return_loss=False, rescale=True, **data) 34 | 35 | 36 | def parse_args(): 37 | parser = argparse.ArgumentParser(description='MMDet test detector') 38 | parser.add_argument('config', help='test config file path') 39 | parser.add_argument('checkpoint', help='checkpoint file') 40 | parser.add_argument('--ann_file', default=None, type=str) 41 | parser.add_argument('--img_prefix', default=None, type=str) 42 | parser.add_argument('--gpus', default=1, type=int, help='GPU number used for testing') 43 | parser.add_argument('--proc_per_gpu', default=1, type=int, help='Number of processes per GPU') 44 | parser.add_argument('--out', help='output result file') 45 | parser.add_argument('--show', action='store_true', help='show results') 46 | args = parser.parse_args() 47 | return args 48 | 49 | 50 | def main(): 51 | args = parse_args() 52 | 53 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): 54 | raise ValueError('The output file must be a pkl file.') 55 | 56 | cfg = mmcv.Config.fromfile(args.config) 57 | # set cudnn_benchmark 58 | if cfg.get('cudnn_benchmark', False): 59 | torch.backends.cudnn.benchmark = True 60 | cfg.model.pretrained = None 61 | cfg.data.test.test_mode = True 62 | 63 | if args.ann_file is not None: 64 | cfg.data.test.ann_file = args.ann_file 65 | if args.img_prefix is not None: 66 | cfg.data.test.img_prefix = args.img_prefix 67 | 68 | dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) 69 | if args.gpus == 1: 70 | model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) 71 | load_checkpoint(model, args.checkpoint) 72 | model = MMDataParallel(model, device_ids=[0]) 73 | 74 | data_loader = build_dataloader( 75 | dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, num_gpus=1, dist=False, shuffle=False 76 | ) 77 | outputs = single_test(model, data_loader, args.show) 78 | else: 79 | model_args = cfg.model.copy() 80 | model_args.update(train_cfg=None, test_cfg=cfg.test_cfg) 81 | model_type = getattr(detectors, model_args.pop('type')) 82 | outputs = parallel_test( 83 | model_type, 84 | model_args, 85 | args.checkpoint, 86 | dataset, 87 | _data_func, 88 | range(args.gpus), 89 | workers_per_gpu=args.proc_per_gpu 90 | ) 91 | 92 | if args.out: 93 | print('writing results to {}'.format(args.out)) 94 | mmcv.dump(outputs, args.out) 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /detection/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmdet import __version__ 7 | from mmdet.datasets import get_dataset 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger, set_random_seed) 9 | from mmdet.models import build_detector 10 | import torch 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Train a detector') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument('--work_dir', help='the dir to save logs and models') 17 | parser.add_argument('--resume_from', help='the checkpoint file to resume from') 18 | parser.add_argument('--validate', action='store_true', help='whether to evaluate the checkpoint during training') 19 | parser.add_argument( 20 | '--gpus', type=int, default=1, help='number of gpus to use ' 21 | '(only applicable to non-distributed training)' 22 | ) 23 | parser.add_argument('--seed', type=int, default=None, help='random seed') 24 | parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher') 25 | parser.add_argument('--local_rank', type=int, default=0) 26 | parser.add_argument('--ann_file', default=None, type=str) 27 | parser.add_argument('--img_prefix', default=None, type=str) 28 | args = parser.parse_args() 29 | 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | cfg = Config.fromfile(args.config) 36 | # set cudnn_benchmark 37 | if cfg.get('cudnn_benchmark', False): 38 | torch.backends.cudnn.benchmark = True 39 | # update configs according to CLI args 40 | if args.ann_file is not None: 41 | cfg.data.train.ann_file = args.ann_file 42 | if args.img_prefix is not None: 43 | cfg.data.train.img_prefix = args.img_prefix 44 | 45 | if args.work_dir is not None: 46 | cfg.work_dir = args.work_dir 47 | if args.resume_from is not None: 48 | cfg.resume_from = args.resume_from 49 | cfg.gpus = args.gpus 50 | if cfg.checkpoint_config is not None: 51 | # save mmdet version in checkpoints as meta data 52 | cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text) 53 | 54 | # init distributed env first, since logger depends on the dist info. 55 | if args.launcher == 'none': 56 | distributed = False 57 | else: 58 | distributed = True 59 | init_dist(args.launcher, **cfg.dist_params) 60 | 61 | # init logger before other steps 62 | logger = get_root_logger(cfg.log_level) 63 | logger.info('Distributed training: {}'.format(distributed)) 64 | 65 | # set random seeds 66 | if args.seed is not None: 67 | logger.info('Set random seed to {}'.format(args.seed)) 68 | set_random_seed(args.seed) 69 | 70 | model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 71 | 72 | train_dataset = get_dataset(cfg.data.train) 73 | train_detector(model, train_dataset, cfg, distributed=distributed, validate=args.validate, logger=logger) 74 | 75 | 76 | if __name__ == '__main__': 77 | main() 78 | -------------------------------------------------------------------------------- /detection/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def prepare_bboxes_labels(group, is_test=False): 5 | x_min = group['FACE_X'].values 6 | y_min = group['FACE_Y'].values 7 | 8 | x_max = x_min + group[('FACE_WIDTH', 'W')[is_test]].values 9 | y_max = y_min + group[('FACE_HEIGHT', 'H')[is_test]].values 10 | 11 | bboxes = np.stack([x_min, y_min, x_max, y_max], axis=-1) 12 | labels = np.ones(len(bboxes)) 13 | return np.array(bboxes, dtype=np.float32), np.array(labels, dtype=np.int) 14 | 15 | 16 | def test_submission(submission): 17 | assert max(submission['Confidence']) <= 1.0 and min(submission['Confidence']) >= 0.0 18 | assert np.all(submission[['FACE_X', 'FACE_Y', 'W', 'H']] >= 0) 19 | -------------------------------------------------------------------------------- /figures/solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/figures/solution.pdf -------------------------------------------------------------------------------- /figures/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/figures/title.png -------------------------------------------------------------------------------- /flags.txt: -------------------------------------------------------------------------------- 1 | --ipc=host -------------------------------------------------------------------------------- /mmdetection/compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | make clean 22 | make PYTHON=${PYTHON} 23 | 24 | echo "Building dcn..." 25 | cd ../dcn 26 | if [ -d "build" ]; then 27 | rm -r build 28 | fi 29 | $PYTHON setup.py build_ext --inplace 30 | -------------------------------------------------------------------------------- /mmdetection/mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import inference_detector, show_result 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | import torch.multiprocessing as mp 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def init_dist(launcher, backend='nccl', **kwargs): 13 | if mp.get_start_method(allow_none=True) is None: 14 | mp.set_start_method('spawn') 15 | if launcher == 'pytorch': 16 | _init_dist_pytorch(backend, **kwargs) 17 | elif launcher == 'mpi': 18 | _init_dist_mpi(backend, **kwargs) 19 | elif launcher == 'slurm': 20 | _init_dist_slurm(backend, **kwargs) 21 | else: 22 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 23 | 24 | 25 | def _init_dist_pytorch(backend, **kwargs): 26 | # TODO: use local_rank instead of rank % num_gpus 27 | rank = int(os.environ['RANK']) 28 | num_gpus = torch.cuda.device_count() 29 | torch.cuda.set_device(rank % num_gpus) 30 | dist.init_process_group(backend=backend, **kwargs) 31 | 32 | 33 | def _init_dist_mpi(backend, **kwargs): 34 | raise NotImplementedError 35 | 36 | 37 | def _init_dist_slurm(backend, **kwargs): 38 | raise NotImplementedError 39 | 40 | 41 | def set_random_seed(seed): 42 | random.seed(seed) 43 | np.random.seed(seed) 44 | torch.manual_seed(seed) 45 | torch.cuda.manual_seed_all(seed) 46 | 47 | 48 | def get_root_logger(log_level=logging.INFO): 49 | logger = logging.getLogger() 50 | if not logger.hasHandlers(): 51 | logging.basicConfig( 52 | format='%(asctime)s - %(levelname)s - %(message)s', 53 | level=log_level) 54 | rank, _ = get_dist_info() 55 | if rank != 0: 56 | logger.setLevel('ERROR') 57 | return logger 58 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/inference.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | 5 | from mmdet.datasets import to_tensor 6 | from mmdet.datasets.transforms import ImageTransform 7 | from mmdet.core import get_classes 8 | 9 | 10 | def _prepare_data(img, img_transform, cfg, device): 11 | ori_shape = img.shape 12 | img, img_shape, pad_shape, scale_factor = img_transform( 13 | img, 14 | scale=cfg.data.test.img_scale, 15 | keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) 16 | img = to_tensor(img).to(device).unsqueeze(0) 17 | img_meta = [ 18 | dict( 19 | ori_shape=ori_shape, 20 | img_shape=img_shape, 21 | pad_shape=pad_shape, 22 | scale_factor=scale_factor, 23 | flip=False) 24 | ] 25 | return dict(img=[img], img_meta=[img_meta]) 26 | 27 | 28 | def _inference_single(model, img, img_transform, cfg, device): 29 | img = mmcv.imread(img) 30 | data = _prepare_data(img, img_transform, cfg, device) 31 | with torch.no_grad(): 32 | result = model(return_loss=False, rescale=True, **data) 33 | return result 34 | 35 | 36 | def _inference_generator(model, imgs, img_transform, cfg, device): 37 | for img in imgs: 38 | yield _inference_single(model, img, img_transform, cfg, device) 39 | 40 | 41 | def inference_detector(model, imgs, cfg, device='cuda:0'): 42 | img_transform = ImageTransform( 43 | size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) 44 | model = model.to(device) 45 | model.eval() 46 | 47 | if not isinstance(imgs, list): 48 | return _inference_single(model, imgs, img_transform, cfg, device) 49 | else: 50 | return _inference_generator(model, imgs, img_transform, cfg, device) 51 | 52 | 53 | def show_result(img, result, dataset='coco', score_thr=0.3): 54 | class_names = get_classes(dataset) 55 | labels = [ 56 | np.full(bbox.shape[0], i, dtype=np.int32) 57 | for i, bbox in enumerate(result) 58 | ] 59 | labels = np.concatenate(labels) 60 | bboxes = np.vstack(result) 61 | img = mmcv.imread(img) 62 | mmcv.imshow_det_bboxes( 63 | img.copy(), 64 | bboxes, 65 | labels, 66 | class_names=class_names, 67 | score_thr=score_thr) 68 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from collections import OrderedDict 4 | 5 | import torch 6 | from mmcv.runner import Runner, DistSamplerSeedHook 7 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 8 | 9 | from mmdet.core import (DistOptimizerHook, DistEvalmAPHook, 10 | CocoDistEvalRecallHook, CocoDistEvalmAPHook) 11 | from mmdet.datasets import build_dataloader 12 | from mmdet.models import RPN 13 | from .env import get_root_logger 14 | 15 | 16 | def parse_losses(losses): 17 | log_vars = OrderedDict() 18 | for loss_name, loss_value in losses.items(): 19 | if isinstance(loss_value, torch.Tensor): 20 | log_vars[loss_name] = loss_value.mean() 21 | elif isinstance(loss_value, list): 22 | log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) 23 | else: 24 | raise TypeError( 25 | '{} is not a tensor or list of tensors'.format(loss_name)) 26 | 27 | loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) 28 | 29 | log_vars['loss'] = loss 30 | for name in log_vars: 31 | log_vars[name] = log_vars[name].item() 32 | 33 | return loss, log_vars 34 | 35 | 36 | def batch_processor(model, data, train_mode): 37 | losses = model(**data) 38 | loss, log_vars = parse_losses(losses) 39 | 40 | outputs = dict( 41 | loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) 42 | 43 | return outputs 44 | 45 | 46 | def train_detector(model, 47 | dataset, 48 | cfg, 49 | distributed=False, 50 | validate=False, 51 | logger=None): 52 | if logger is None: 53 | logger = get_root_logger(cfg.log_level) 54 | 55 | # start training 56 | if distributed: 57 | _dist_train(model, dataset, cfg, validate=validate) 58 | else: 59 | _non_dist_train(model, dataset, cfg, validate=validate) 60 | 61 | 62 | def _dist_train(model, dataset, cfg, validate=False): 63 | # prepare data loaders 64 | data_loaders = [ 65 | build_dataloader( 66 | dataset, 67 | cfg.data.imgs_per_gpu, 68 | cfg.data.workers_per_gpu, 69 | dist=True) 70 | ] 71 | # put model on gpus 72 | model = MMDistributedDataParallel(model.cuda()) 73 | # build runner 74 | runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, 75 | cfg.log_level) 76 | # register hooks 77 | optimizer_config = DistOptimizerHook(**cfg.optimizer_config) 78 | runner.register_training_hooks(cfg.lr_config, optimizer_config, 79 | cfg.checkpoint_config, cfg.log_config) 80 | runner.register_hook(DistSamplerSeedHook()) 81 | # register eval hooks 82 | if validate: 83 | if isinstance(model.module, RPN): 84 | # TODO: implement recall hooks for other datasets 85 | runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) 86 | else: 87 | if cfg.data.val.type == 'CocoDataset': 88 | runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) 89 | else: 90 | runner.register_hook(DistEvalmAPHook(cfg.data.val)) 91 | 92 | if cfg.resume_from: 93 | runner.resume(cfg.resume_from) 94 | elif cfg.load_from: 95 | runner.load_checkpoint(cfg.load_from) 96 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs) 97 | 98 | 99 | def _non_dist_train(model, dataset, cfg, validate=False): 100 | # prepare data loaders 101 | data_loaders = [ 102 | build_dataloader( 103 | dataset, 104 | cfg.data.imgs_per_gpu, 105 | cfg.data.workers_per_gpu, 106 | cfg.gpus, 107 | dist=False) 108 | ] 109 | # put model on gpus 110 | model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() 111 | # build runner 112 | runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, 113 | cfg.log_level) 114 | runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, 115 | cfg.checkpoint_config, cfg.log_config) 116 | 117 | if cfg.resume_from: 118 | runner.resume(cfg.resume_from) 119 | elif cfg.load_from: 120 | runner.load_checkpoint(cfg.load_from) 121 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs) 122 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) 9 | from .bbox_target import bbox_target 10 | 11 | __all__ = [ 12 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 13 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 14 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 15 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 16 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 17 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict( 11 | cfg, assigners, default_args=kwargs) 12 | else: 13 | raise TypeError('Invalid type {} for building a sampler'.format( 14 | type(cfg))) 15 | 16 | 17 | def build_sampler(cfg, **kwargs): 18 | if isinstance(cfg, samplers.BaseSampler): 19 | return cfg 20 | elif isinstance(cfg, dict): 21 | return mmcv.runner.obj_from_dict( 22 | cfg, samplers, default_args=kwargs) 23 | else: 24 | raise TypeError('Invalid type {} for building a sampler'.format( 25 | type(cfg))) 26 | 27 | 28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 29 | bbox_assigner = build_assigner(cfg.assigner) 30 | bbox_sampler = build_sampler(cfg.sampler) 31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 32 | gt_labels) 33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 34 | gt_labels) 35 | return assign_result, sampling_result 36 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .assign_result import AssignResult 4 | 5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 6 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | if reg_classes > 1: 61 | bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, 62 | labels, reg_classes) 63 | 64 | return labels, label_weights, bbox_targets, bbox_weights 65 | 66 | 67 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 68 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 69 | 4 * num_classes)) 70 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 71 | 4 * num_classes)) 72 | for i in torch.nonzero(labels > 0).squeeze(-1): 73 | start, end = labels[i] * 4, (labels[i] + 1) * 4 74 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 75 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 76 | return bbox_targets_expand, bbox_weights_expand 77 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class IoUBalancedNegSampler(RandomSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | hard_thr=0.1, 13 | hard_fraction=0.5, 14 | **kwargs): 15 | super(IoUBalancedNegSampler, self).__init__(num, pos_fraction, 16 | **kwargs) 17 | assert hard_thr > 0 18 | assert 0 < hard_fraction < 1 19 | self.hard_thr = hard_thr 20 | self.hard_fraction = hard_fraction 21 | 22 | def _sample_neg(self, assign_result, num_expected, **kwargs): 23 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 24 | if neg_inds.numel() != 0: 25 | neg_inds = neg_inds.squeeze(1) 26 | if len(neg_inds) <= num_expected: 27 | return neg_inds 28 | else: 29 | max_overlaps = assign_result.max_overlaps.cpu().numpy() 30 | # balance sampling for negative samples 31 | neg_set = set(neg_inds.cpu().numpy()) 32 | easy_set = set( 33 | np.where( 34 | np.logical_and(max_overlaps >= 0, 35 | max_overlaps < self.hard_thr))[0]) 36 | hard_set = set(np.where(max_overlaps >= self.hard_thr)[0]) 37 | easy_neg_inds = list(easy_set & neg_set) 38 | hard_neg_inds = list(hard_set & neg_set) 39 | 40 | num_expected_hard = int(num_expected * self.hard_fraction) 41 | if len(hard_neg_inds) > num_expected_hard: 42 | sampled_hard_inds = self.random_choice(hard_neg_inds, 43 | num_expected_hard) 44 | else: 45 | sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) 46 | num_expected_easy = num_expected - len(sampled_hard_inds) 47 | if len(easy_neg_inds) > num_expected_easy: 48 | sampled_easy_inds = self.random_choice(easy_neg_inds, 49 | num_expected_easy) 50 | else: 51 | sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) 52 | sampled_inds = np.concatenate((sampled_easy_inds, 53 | sampled_hard_inds)) 54 | if len(sampled_inds) < num_expected: 55 | num_extra = num_expected - len(sampled_inds) 56 | extra_inds = np.array(list(neg_set - set(sampled_inds))) 57 | if len(extra_inds) > num_extra: 58 | extra_inds = self.random_choice(extra_inds, num_extra) 59 | sampled_inds = np.concatenate((sampled_inds, extra_inds)) 60 | sampled_inds = torch.from_numpy(sampled_inds).long().to( 61 | assign_result.gt_inds.device) 62 | return sampled_inds 63 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | self.bbox_roi_extractor = context.bbox_roi_extractor 19 | self.bbox_head = context.bbox_head 20 | 21 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 22 | with torch.no_grad(): 23 | rois = bbox2roi([bboxes]) 24 | bbox_feats = self.bbox_roi_extractor( 25 | feats[:self.bbox_roi_extractor.num_inputs], rois) 26 | cls_score, _ = self.bbox_head(bbox_feats) 27 | loss = self.bbox_head.loss( 28 | cls_score=cls_score, 29 | bbox_pred=None, 30 | labels=labels, 31 | label_weights=cls_score.new_ones(cls_score.size(0)), 32 | bbox_targets=None, 33 | bbox_weights=None, 34 | reduce=False)['loss_cls'] 35 | _, topk_loss_inds = loss.topk(num_expected) 36 | return inds[topk_loss_inds] 37 | 38 | def _sample_pos(self, 39 | assign_result, 40 | num_expected, 41 | bboxes=None, 42 | feats=None, 43 | **kwargs): 44 | # Sample some hard positive samples 45 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 46 | if pos_inds.numel() != 0: 47 | pos_inds = pos_inds.squeeze(1) 48 | if pos_inds.numel() <= num_expected: 49 | return pos_inds 50 | else: 51 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 52 | assign_result.labels[pos_inds], feats) 53 | 54 | def _sample_neg(self, 55 | assign_result, 56 | num_expected, 57 | bboxes=None, 58 | feats=None, 59 | **kwargs): 60 | # Sample some hard negative samples 61 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 62 | if neg_inds.numel() != 0: 63 | neg_inds = neg_inds.squeeze(1) 64 | if len(neg_inds) <= num_expected: 65 | return neg_inds 66 | else: 67 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 68 | assign_result.labels[neg_inds], feats) 69 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy, 2 | weighted_binary_cross_entropy, sigmoid_focal_loss, 3 | weighted_sigmoid_focal_loss, mask_cross_entropy, 4 | smooth_l1_loss, weighted_smoothl1, accuracy) 5 | 6 | __all__ = [ 7 | 'weighted_nll_loss', 'weighted_cross_entropy', 8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 9 | 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', 10 | 'weighted_smoothl1', 'accuracy' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/loss/losses.py: -------------------------------------------------------------------------------- 1 | # TODO merge naive and weighted loss. 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def weighted_nll_loss(pred, label, weight, avg_factor=None): 7 | if avg_factor is None: 8 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 9 | raw = F.nll_loss(pred, label, reduction='none') 10 | return torch.sum(raw * weight)[None] / avg_factor 11 | 12 | 13 | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True): 14 | if avg_factor is None: 15 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 16 | raw = F.cross_entropy(pred, label, reduction='none') 17 | if reduce: 18 | return torch.sum(raw * weight)[None] / avg_factor 19 | else: 20 | return raw * weight / avg_factor 21 | 22 | 23 | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): 24 | if avg_factor is None: 25 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 26 | return F.binary_cross_entropy_with_logits( 27 | pred, label.float(), weight.float(), 28 | reduction='sum')[None] / avg_factor 29 | 30 | 31 | def sigmoid_focal_loss(pred, 32 | target, 33 | weight, 34 | gamma=2.0, 35 | alpha=0.25, 36 | reduction='mean'): 37 | pred_sigmoid = pred.sigmoid() 38 | target = target.type_as(pred) 39 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 40 | weight = (alpha * target + (1 - alpha) * (1 - target)) * weight 41 | weight = weight * pt.pow(gamma) 42 | loss = F.binary_cross_entropy_with_logits( 43 | pred, target, reduction='none') * weight 44 | reduction_enum = F._Reduction.get_enum(reduction) 45 | # none: 0, mean:1, sum: 2 46 | if reduction_enum == 0: 47 | return loss 48 | elif reduction_enum == 1: 49 | return loss.mean() 50 | elif reduction_enum == 2: 51 | return loss.sum() 52 | 53 | 54 | def weighted_sigmoid_focal_loss(pred, 55 | target, 56 | weight, 57 | gamma=2.0, 58 | alpha=0.25, 59 | avg_factor=None, 60 | num_classes=80): 61 | if avg_factor is None: 62 | avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 63 | return sigmoid_focal_loss( 64 | pred, target, weight, gamma=gamma, alpha=alpha, 65 | reduction='sum')[None] / avg_factor 66 | 67 | 68 | def mask_cross_entropy(pred, target, label): 69 | num_rois = pred.size()[0] 70 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 71 | pred_slice = pred[inds, label].squeeze(1) 72 | return F.binary_cross_entropy_with_logits( 73 | pred_slice, target, reduction='mean')[None] 74 | 75 | 76 | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'): 77 | assert beta > 0 78 | assert pred.size() == target.size() and target.numel() > 0 79 | diff = torch.abs(pred - target) 80 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 81 | diff - 0.5 * beta) 82 | reduction_enum = F._Reduction.get_enum(reduction) 83 | # none: 0, mean:1, sum: 2 84 | if reduction_enum == 0: 85 | return loss 86 | elif reduction_enum == 1: 87 | return loss.sum() / pred.numel() 88 | elif reduction_enum == 2: 89 | return loss.sum() 90 | 91 | 92 | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): 93 | if avg_factor is None: 94 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 95 | loss = smooth_l1_loss(pred, target, beta, reduction='none') 96 | return torch.sum(loss * weight)[None] / avg_factor 97 | 98 | 99 | def accuracy(pred, target, topk=1): 100 | if isinstance(topk, int): 101 | topk = (topk, ) 102 | return_single = True 103 | else: 104 | return_single = False 105 | 106 | maxk = max(topk) 107 | _, pred_label = pred.topk(maxk, 1, True, True) 108 | pred_label = pred_label.t() 109 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 110 | 111 | res = [] 112 | for k in topk: 113 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 114 | res.append(correct_k.mul_(100.0 / pred.size(0))) 115 | return res[0] if return_single else res 116 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1): 7 | """NMS for multi-class bboxes. 8 | 9 | Args: 10 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 11 | multi_scores (Tensor): shape (n, #class) 12 | score_thr (float): bbox threshold, bboxes with scores lower than it 13 | will not be considered. 14 | nms_thr (float): NMS IoU threshold 15 | max_num (int): if there are more than max_num bboxes after NMS, 16 | only top max_num will be kept. 17 | 18 | Returns: 19 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 20 | are 0-based. 21 | """ 22 | num_classes = multi_scores.shape[1] 23 | bboxes, labels = [], [] 24 | nms_cfg_ = nms_cfg.copy() 25 | nms_type = nms_cfg_.pop('type', 'nms') 26 | nms_op = getattr(nms_wrapper, nms_type) 27 | for i in range(1, num_classes): 28 | cls_inds = multi_scores[:, i] > score_thr 29 | if not cls_inds.any(): 30 | continue 31 | # get bboxes and scores of this class 32 | if multi_bboxes.shape[1] == 4: 33 | _bboxes = multi_bboxes[cls_inds, :] 34 | else: 35 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 36 | _scores = multi_scores[cls_inds, i] 37 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 38 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 39 | cls_labels = multi_bboxes.new_full( 40 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 41 | bboxes.append(cls_dets) 42 | labels.append(cls_labels) 43 | if bboxes: 44 | bboxes = torch.cat(bboxes) 45 | labels = torch.cat(labels) 46 | if bboxes.shape[0] > max_num: 47 | _, inds = bboxes[:, -1].sort(descending=True) 48 | inds = inds[:max_num] 49 | bboxes = bboxes[inds] 50 | labels = labels[inds] 51 | else: 52 | bboxes = multi_bboxes.new_zeros((0, 5)) 53 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 54 | 55 | return bboxes, labels 56 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmdet.ops import nms 6 | from ..bbox import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | 75 | 76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 77 | """Merge augmented mask prediction. 78 | 79 | Args: 80 | aug_masks (list[ndarray]): shape (n, #class, h, w) 81 | img_shapes (list[ndarray]): shape (3, ). 82 | rcnn_test_cfg (dict): rcnn test config. 83 | 84 | Returns: 85 | tuple: (bboxes, scores) 86 | """ 87 | recovered_masks = [ 88 | mask if not img_info[0]['flip'] else mask[..., ::-1] 89 | for mask, img_info in zip(aug_masks, img_metas) 90 | ] 91 | if weights is None: 92 | merged_masks = np.mean(recovered_masks, axis=0) 93 | else: 94 | merged_masks = np.average( 95 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 96 | return merged_masks 97 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset 7 | from .concat_dataset import ConcatDataset 8 | from .repeat_dataset import RepeatDataset 9 | from .extra_aug import ExtraAugmentation 10 | 11 | __all__ = [ 12 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 13 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 14 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 15 | 'ExtraAugmentation' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/extra_aug.py: -------------------------------------------------------------------------------- 1 | import albumentations as A 2 | import mmcv 3 | import numpy as np 4 | from mmcv.runner import obj_from_dict 5 | from numpy import random 6 | from . import transforms 7 | 8 | 9 | class PhotoMetricDistortion(object): 10 | 11 | def __init__(self, 12 | brightness_delta=32, 13 | contrast_range=(0.5, 1.5), 14 | saturation_range=(0.5, 1.5), 15 | hue_delta=18): 16 | self.brightness_delta = brightness_delta 17 | self.contrast_lower, self.contrast_upper = contrast_range 18 | self.saturation_lower, self.saturation_upper = saturation_range 19 | self.hue_delta = hue_delta 20 | 21 | def __call__(self, img, boxes, labels): 22 | # random brightness 23 | if random.randint(2): 24 | delta = random.uniform(-self.brightness_delta, 25 | self.brightness_delta) 26 | img += delta 27 | 28 | # mode == 0 --> do random contrast first 29 | # mode == 1 --> do random contrast last 30 | mode = random.randint(2) 31 | if mode == 1: 32 | if random.randint(2): 33 | alpha = random.uniform(self.contrast_lower, 34 | self.contrast_upper) 35 | img *= alpha 36 | 37 | # convert color from BGR to HSV 38 | img = mmcv.bgr2hsv(img) 39 | 40 | # random saturation 41 | if random.randint(2): 42 | img[..., 1] *= random.uniform(self.saturation_lower, 43 | self.saturation_upper) 44 | 45 | # random hue 46 | if random.randint(2): 47 | img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) 48 | img[..., 0][img[..., 0] > 360] -= 360 49 | img[..., 0][img[..., 0] < 0] += 360 50 | 51 | # convert color from HSV to BGR 52 | img = mmcv.hsv2bgr(img) 53 | 54 | # random contrast 55 | if mode == 0: 56 | if random.randint(2): 57 | alpha = random.uniform(self.contrast_lower, 58 | self.contrast_upper) 59 | img *= alpha 60 | 61 | # randomly swap channels 62 | if random.randint(2): 63 | img = img[..., random.permutation(3)] 64 | 65 | return img, boxes, labels 66 | 67 | 68 | class Expand(object): 69 | 70 | def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): 71 | if to_rgb: 72 | self.mean = mean[::-1] 73 | else: 74 | self.mean = mean 75 | self.min_ratio, self.max_ratio = ratio_range 76 | 77 | def __call__(self, img, boxes, labels): 78 | if random.randint(2): 79 | return img, boxes, labels 80 | 81 | h, w, c = img.shape 82 | ratio = random.uniform(self.min_ratio, self.max_ratio) 83 | expand_img = np.full((int(h * ratio), int(w * ratio), c), 84 | self.mean).astype(img.dtype) 85 | left = int(random.uniform(0, w * ratio - w)) 86 | top = int(random.uniform(0, h * ratio - h)) 87 | expand_img[top:top + h, left:left + w] = img 88 | img = expand_img 89 | boxes += np.tile((left, top), 2) 90 | return img, boxes, labels 91 | 92 | 93 | class ExtraAugmentation(object): 94 | 95 | def __init__(self, **kwargs): 96 | self.transform = self.transform_from_dict(**kwargs) 97 | 98 | def transform_from_dict(self, **kwargs): 99 | if 'transforms' in kwargs: 100 | kwargs['transforms'] = [self.transform_from_dict(**transform) for transform in kwargs['transforms']] 101 | try: 102 | return obj_from_dict(kwargs, transforms) 103 | except AttributeError: 104 | return obj_from_dict(kwargs, A) 105 | 106 | def __call__(self, img, bboxes, labels): 107 | data = self.transform(image=img, bboxes=bboxes, labels=labels) 108 | return data['image'], np.array(data['bboxes'], dtype=np.float32), np.array(data['labels'], dtype=np.int) 109 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | if dist: 22 | rank, world_size = get_dist_info() 23 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, 24 | rank) 25 | batch_size = imgs_per_gpu 26 | num_workers = workers_per_gpu 27 | else: 28 | if not kwargs.get('shuffle', True): 29 | sampler = None 30 | else: 31 | sampler = GroupSampler(dataset, imgs_per_gpu) 32 | batch_size = num_gpus * imgs_per_gpu 33 | num_workers = num_gpus * workers_per_gpu 34 | 35 | data_loader = DataLoader( 36 | dataset, 37 | batch_size=batch_size, 38 | sampler=sampler, 39 | num_workers=num_workers, 40 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 41 | pin_memory=False, 42 | **kwargs) 43 | 44 | return data_loader 45 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/loader/sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import math 4 | import torch 5 | import numpy as np 6 | 7 | from torch.distributed import get_world_size, get_rank 8 | from torch.utils.data.sampler import Sampler 9 | 10 | 11 | class GroupSampler(Sampler): 12 | 13 | def __init__(self, dataset, samples_per_gpu=1): 14 | assert hasattr(dataset, 'flag') 15 | self.dataset = dataset 16 | self.samples_per_gpu = samples_per_gpu 17 | self.flag = dataset.flag.astype(np.int64) 18 | self.group_sizes = np.bincount(self.flag) 19 | self.num_samples = 0 20 | for i, size in enumerate(self.group_sizes): 21 | self.num_samples += int(np.ceil( 22 | size / self.samples_per_gpu)) * self.samples_per_gpu 23 | 24 | def __iter__(self): 25 | indices = [] 26 | for i, size in enumerate(self.group_sizes): 27 | if size == 0: 28 | continue 29 | indice = np.where(self.flag == i)[0] 30 | assert len(indice) == size 31 | np.random.shuffle(indice) 32 | num_extra = int(np.ceil(size / self.samples_per_gpu) 33 | ) * self.samples_per_gpu - len(indice) 34 | indice = np.concatenate([indice, indice[:num_extra]]) 35 | indices.append(indice) 36 | indices = np.concatenate(indices) 37 | indices = [ 38 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] 39 | for i in np.random.permutation( 40 | range(len(indices) // self.samples_per_gpu)) 41 | ] 42 | indices = np.concatenate(indices) 43 | indices = torch.from_numpy(indices).long() 44 | assert len(indices) == self.num_samples 45 | return iter(indices) 46 | 47 | def __len__(self): 48 | return self.num_samples 49 | 50 | 51 | class DistributedGroupSampler(Sampler): 52 | """Sampler that restricts data loading to a subset of the dataset. 53 | It is especially useful in conjunction with 54 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 55 | process can pass a DistributedSampler instance as a DataLoader sampler, 56 | and load a subset of the original dataset that is exclusive to it. 57 | .. note:: 58 | Dataset is assumed to be of constant size. 59 | Arguments: 60 | dataset: Dataset used for sampling. 61 | num_replicas (optional): Number of processes participating in 62 | distributed training. 63 | rank (optional): Rank of the current process within num_replicas. 64 | """ 65 | 66 | def __init__(self, 67 | dataset, 68 | samples_per_gpu=1, 69 | num_replicas=None, 70 | rank=None): 71 | if num_replicas is None: 72 | num_replicas = get_world_size() 73 | if rank is None: 74 | rank = get_rank() 75 | self.dataset = dataset 76 | self.samples_per_gpu = samples_per_gpu 77 | self.num_replicas = num_replicas 78 | self.rank = rank 79 | self.epoch = 0 80 | 81 | assert hasattr(self.dataset, 'flag') 82 | self.flag = self.dataset.flag 83 | self.group_sizes = np.bincount(self.flag) 84 | 85 | self.num_samples = 0 86 | for i, j in enumerate(self.group_sizes): 87 | self.num_samples += int( 88 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 89 | self.num_replicas)) * self.samples_per_gpu 90 | self.total_size = self.num_samples * self.num_replicas 91 | 92 | def __iter__(self): 93 | # deterministically shuffle based on epoch 94 | g = torch.Generator() 95 | g.manual_seed(self.epoch) 96 | 97 | indices = [] 98 | for i, size in enumerate(self.group_sizes): 99 | if size > 0: 100 | indice = np.where(self.flag == i)[0] 101 | assert len(indice) == size 102 | indice = indice[list(torch.randperm(int(size), 103 | generator=g))].tolist() 104 | extra = int( 105 | math.ceil( 106 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 107 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 108 | indice += indice[:extra] 109 | indices += indice 110 | 111 | assert len(indices) == self.total_size 112 | 113 | indices = [ 114 | indices[j] for i in list( 115 | torch.randperm( 116 | len(indices) // self.samples_per_gpu, generator=g)) 117 | for j in range(i * self.samples_per_gpu, (i + 1) * 118 | self.samples_per_gpu) 119 | ] 120 | 121 | # subsample 122 | offset = self.num_samples * self.rank 123 | indices = indices[offset:offset + self.num_samples] 124 | assert len(indices) == self.num_samples 125 | 126 | return iter(indices) 127 | 128 | def __len__(self): 129 | return self.num_samples 130 | 131 | def set_epoch(self, epoch): 132 | self.epoch = epoch 133 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import Sequence 3 | 4 | import mmcv 5 | from mmcv.runner import obj_from_dict 6 | import torch 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from .concat_dataset import ConcatDataset 11 | from .repeat_dataset import RepeatDataset 12 | from .. import datasets 13 | 14 | 15 | def to_tensor(data): 16 | """Convert objects of various python types to :obj:`torch.Tensor`. 17 | 18 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, 19 | :class:`Sequence`, :class:`int` and :class:`float`. 20 | """ 21 | if isinstance(data, torch.Tensor): 22 | return data 23 | elif isinstance(data, np.ndarray): 24 | return torch.from_numpy(data) 25 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 26 | return torch.tensor(data) 27 | elif isinstance(data, int): 28 | return torch.LongTensor([data]) 29 | elif isinstance(data, float): 30 | return torch.FloatTensor([data]) 31 | else: 32 | raise TypeError('type {} cannot be converted to tensor.'.format( 33 | type(data))) 34 | 35 | 36 | def random_scale(img_scales, mode='range'): 37 | """Randomly select a scale from a list of scales or scale ranges. 38 | 39 | Args: 40 | img_scales (list[tuple]): Image scale or scale range. 41 | mode (str): "range" or "value". 42 | 43 | Returns: 44 | tuple: Sampled image scale. 45 | """ 46 | num_scales = len(img_scales) 47 | if num_scales == 1: # fixed scale is specified 48 | img_scale = img_scales[0] 49 | elif num_scales == 2: # randomly sample a scale 50 | if mode == 'range': 51 | img_scale_long = [max(s) for s in img_scales] 52 | img_scale_short = [min(s) for s in img_scales] 53 | long_edge = np.random.randint( 54 | min(img_scale_long), 55 | max(img_scale_long) + 1) 56 | short_edge = np.random.randint( 57 | min(img_scale_short), 58 | max(img_scale_short) + 1) 59 | img_scale = (long_edge, short_edge) 60 | elif mode == 'value': 61 | img_scale = img_scales[np.random.randint(num_scales)] 62 | else: 63 | if mode != 'value': 64 | raise ValueError( 65 | 'Only "value" mode supports more than 2 image scales') 66 | img_scale = img_scales[np.random.randint(num_scales)] 67 | return img_scale 68 | 69 | 70 | def show_ann(coco, img, ann_info): 71 | plt.imshow(mmcv.bgr2rgb(img)) 72 | plt.axis('off') 73 | coco.showAnns(ann_info) 74 | plt.show() 75 | 76 | 77 | def get_dataset(data_cfg): 78 | if data_cfg['type'] == 'RepeatDataset': 79 | return RepeatDataset( 80 | get_dataset(data_cfg['dataset']), data_cfg['times']) 81 | 82 | if isinstance(data_cfg['ann_file'], (list, tuple)): 83 | ann_files = data_cfg['ann_file'] 84 | num_dset = len(ann_files) 85 | else: 86 | ann_files = [data_cfg['ann_file']] 87 | num_dset = 1 88 | 89 | if 'proposal_file' in data_cfg.keys(): 90 | if isinstance(data_cfg['proposal_file'], (list, tuple)): 91 | proposal_files = data_cfg['proposal_file'] 92 | else: 93 | proposal_files = [data_cfg['proposal_file']] 94 | else: 95 | proposal_files = [None] * num_dset 96 | assert len(proposal_files) == num_dset 97 | 98 | if isinstance(data_cfg['img_prefix'], (list, tuple)): 99 | img_prefixes = data_cfg['img_prefix'] 100 | else: 101 | img_prefixes = [data_cfg['img_prefix']] * num_dset 102 | assert len(img_prefixes) == num_dset 103 | 104 | dsets = [] 105 | for i in range(num_dset): 106 | data_info = copy.deepcopy(data_cfg) 107 | data_info['ann_file'] = ann_files[i] 108 | data_info['proposal_file'] = proposal_files[i] 109 | data_info['img_prefix'] = img_prefixes[i] 110 | dset = obj_from_dict(data_info, datasets) 111 | dsets.append(dset) 112 | if len(dsets) > 1: 113 | dset = ConcatDataset(dsets) 114 | else: 115 | dset = dsets[0] 116 | return dset 117 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann 77 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .bbox_heads import * # noqa: F401,F403 6 | from .mask_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 9 | from .builder import (build_backbone, build_neck, build_roi_extractor, 10 | build_head, build_detector) 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS', 14 | 'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head', 15 | 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | from .retina_head import RetinaHead 4 | from .ssd_head import SSDHead 5 | 6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead'] 7 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | **kwargs): 20 | self.stacked_convs = stacked_convs 21 | self.octave_base_scale = octave_base_scale 22 | self.scales_per_octave = scales_per_octave 23 | octave_scales = np.array( 24 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 25 | anchor_scales = octave_scales * octave_base_scale 26 | super(RetinaHead, self).__init__( 27 | num_classes, 28 | in_channels, 29 | anchor_scales=anchor_scales, 30 | use_sigmoid_cls=True, 31 | use_focal_loss=True, 32 | **kwargs) 33 | 34 | def _init_layers(self): 35 | self.relu = nn.ReLU(inplace=True) 36 | self.cls_convs = nn.ModuleList() 37 | self.reg_convs = nn.ModuleList() 38 | for i in range(self.stacked_convs): 39 | chn = self.in_channels if i == 0 else self.feat_channels 40 | self.cls_convs.append( 41 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 42 | self.reg_convs.append( 43 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 44 | self.retina_cls = nn.Conv2d( 45 | self.feat_channels, 46 | self.num_anchors * self.cls_out_channels, 47 | 3, 48 | padding=1) 49 | self.retina_reg = nn.Conv2d( 50 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 51 | 52 | def init_weights(self): 53 | for m in self.cls_convs: 54 | normal_init(m, std=0.01) 55 | for m in self.reg_convs: 56 | normal_init(m, std=0.01) 57 | bias_cls = bias_init_with_prob(0.01) 58 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 59 | normal_init(self.retina_reg, std=0.01) 60 | 61 | def forward_single(self, x): 62 | cls_feat = x 63 | reg_feat = x 64 | for cls_conv in self.cls_convs: 65 | cls_feat = self.relu(cls_conv(cls_feat)) 66 | for reg_conv in self.reg_convs: 67 | reg_feat = self.relu(reg_conv(reg_feat)) 68 | cls_score = self.retina_cls(cls_feat) 69 | bbox_pred = self.retina_reg(reg_feat) 70 | return cls_score, bbox_pred 71 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/anchor_heads/rpn_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn import normal_init 5 | 6 | from mmdet.core import delta2bbox 7 | from mmdet.ops import nms 8 | from .anchor_head import AnchorHead 9 | from ..registry import HEADS 10 | 11 | 12 | @HEADS.register_module 13 | class RPNHead(AnchorHead): 14 | 15 | def __init__(self, in_channels, **kwargs): 16 | super(RPNHead, self).__init__(2, in_channels, **kwargs) 17 | 18 | def _init_layers(self): 19 | self.rpn_conv = nn.Conv2d( 20 | self.in_channels, self.feat_channels, 3, padding=1) 21 | self.rpn_cls = nn.Conv2d(self.feat_channels, 22 | self.num_anchors * self.cls_out_channels, 1) 23 | self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1) 24 | 25 | def init_weights(self): 26 | normal_init(self.rpn_conv, std=0.01) 27 | normal_init(self.rpn_cls, std=0.01) 28 | normal_init(self.rpn_reg, std=0.01) 29 | 30 | def forward_single(self, x): 31 | x = self.rpn_conv(x) 32 | x = F.relu(x, inplace=True) 33 | rpn_cls_score = self.rpn_cls(x) 34 | rpn_bbox_pred = self.rpn_reg(x) 35 | return rpn_cls_score, rpn_bbox_pred 36 | 37 | def loss(self, 38 | cls_scores, 39 | bbox_preds, 40 | gt_bboxes, 41 | img_metas, 42 | cfg, 43 | gt_bboxes_ignore=None): 44 | losses = super(RPNHead, self).loss( 45 | cls_scores, 46 | bbox_preds, 47 | gt_bboxes, 48 | None, 49 | img_metas, 50 | cfg, 51 | gt_bboxes_ignore=gt_bboxes_ignore) 52 | return dict( 53 | loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg']) 54 | 55 | def get_bboxes_single(self, 56 | cls_scores, 57 | bbox_preds, 58 | mlvl_anchors, 59 | img_shape, 60 | scale_factor, 61 | cfg, 62 | rescale=False): 63 | mlvl_proposals = [] 64 | for idx in range(len(cls_scores)): 65 | rpn_cls_score = cls_scores[idx] 66 | rpn_bbox_pred = bbox_preds[idx] 67 | assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] 68 | anchors = mlvl_anchors[idx] 69 | rpn_cls_score = rpn_cls_score.permute(1, 2, 0) 70 | if self.use_sigmoid_cls: 71 | rpn_cls_score = rpn_cls_score.reshape(-1) 72 | scores = rpn_cls_score.sigmoid() 73 | else: 74 | rpn_cls_score = rpn_cls_score.reshape(-1, 2) 75 | scores = rpn_cls_score.softmax(dim=1)[:, 1] 76 | rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) 77 | if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: 78 | _, topk_inds = scores.topk(cfg.nms_pre) 79 | rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] 80 | anchors = anchors[topk_inds, :] 81 | scores = scores[topk_inds] 82 | proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, 83 | self.target_stds, img_shape) 84 | if cfg.min_bbox_size > 0: 85 | w = proposals[:, 2] - proposals[:, 0] + 1 86 | h = proposals[:, 3] - proposals[:, 1] + 1 87 | valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & 88 | (h >= cfg.min_bbox_size)).squeeze() 89 | proposals = proposals[valid_inds, :] 90 | scores = scores[valid_inds] 91 | proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) 92 | proposals, _ = nms(proposals, cfg.nms_thr) 93 | proposals = proposals[:cfg.nms_post, :] 94 | mlvl_proposals.append(proposals) 95 | proposals = torch.cat(mlvl_proposals, 0) 96 | if cfg.nms_across_levels: 97 | proposals, _ = nms(proposals, cfg.nms_thr) 98 | proposals = proposals[:cfg.max_num, :] 99 | else: 100 | scores = proposals[:, 4] 101 | num = min(cfg.max_num, proposals.shape[0]) 102 | _, topk_inds = scores.topk(num) 103 | proposals = proposals[topk_inds, :] 104 | return proposals 105 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | 5 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG'] 6 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/backbones/ssd_vgg.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init, 7 | normal_init) 8 | from mmcv.runner import load_checkpoint 9 | from ..registry import BACKBONES 10 | 11 | 12 | @BACKBONES.register_module 13 | class SSDVGG(VGG): 14 | extra_setting = { 15 | 300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256), 16 | 512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128), 17 | } 18 | 19 | def __init__(self, 20 | input_size, 21 | depth, 22 | with_last_pool=False, 23 | ceil_mode=True, 24 | out_indices=(3, 4), 25 | out_feature_indices=(22, 34), 26 | l2_norm_scale=20.): 27 | super(SSDVGG, self).__init__( 28 | depth, 29 | with_last_pool=with_last_pool, 30 | ceil_mode=ceil_mode, 31 | out_indices=out_indices) 32 | assert input_size in (300, 512) 33 | self.input_size = input_size 34 | 35 | self.features.add_module( 36 | str(len(self.features)), 37 | nn.MaxPool2d(kernel_size=3, stride=1, padding=1)) 38 | self.features.add_module( 39 | str(len(self.features)), 40 | nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)) 41 | self.features.add_module( 42 | str(len(self.features)), nn.ReLU(inplace=True)) 43 | self.features.add_module( 44 | str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1)) 45 | self.features.add_module( 46 | str(len(self.features)), nn.ReLU(inplace=True)) 47 | self.out_feature_indices = out_feature_indices 48 | 49 | self.inplanes = 1024 50 | self.extra = self._make_extra_layers(self.extra_setting[input_size]) 51 | self.l2_norm = L2Norm( 52 | self.features[out_feature_indices[0] - 1].out_channels, 53 | l2_norm_scale) 54 | 55 | def init_weights(self, pretrained=None): 56 | if isinstance(pretrained, str): 57 | logger = logging.getLogger() 58 | load_checkpoint(self, pretrained, strict=False, logger=logger) 59 | elif pretrained is None: 60 | for m in self.features.modules(): 61 | if isinstance(m, nn.Conv2d): 62 | kaiming_init(m) 63 | elif isinstance(m, nn.BatchNorm2d): 64 | constant_init(m, 1) 65 | elif isinstance(m, nn.Linear): 66 | normal_init(m, std=0.01) 67 | else: 68 | raise TypeError('pretrained must be a str or None') 69 | 70 | for m in self.extra.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | xavier_init(m, distribution='uniform') 73 | 74 | constant_init(self.l2_norm, self.l2_norm.scale) 75 | 76 | def forward(self, x): 77 | outs = [] 78 | for i, layer in enumerate(self.features): 79 | x = layer(x) 80 | if i in self.out_feature_indices: 81 | outs.append(x) 82 | for i, layer in enumerate(self.extra): 83 | x = F.relu(layer(x), inplace=True) 84 | if i % 2 == 1: 85 | outs.append(x) 86 | outs[0] = self.l2_norm(outs[0]) 87 | if len(outs) == 1: 88 | return outs[0] 89 | else: 90 | return tuple(outs) 91 | 92 | def _make_extra_layers(self, outplanes): 93 | layers = [] 94 | kernel_sizes = (1, 3) 95 | num_layers = 0 96 | outplane = None 97 | for i in range(len(outplanes)): 98 | if self.inplanes == 'S': 99 | self.inplanes = outplane 100 | continue 101 | k = kernel_sizes[num_layers % 2] 102 | if outplanes[i] == 'S': 103 | outplane = outplanes[i + 1] 104 | conv = nn.Conv2d( 105 | self.inplanes, outplane, k, stride=2, padding=1) 106 | else: 107 | outplane = outplanes[i] 108 | conv = nn.Conv2d( 109 | self.inplanes, outplane, k, stride=1, padding=0) 110 | layers.append(conv) 111 | self.inplanes = outplanes[i] 112 | num_layers += 1 113 | if self.input_size == 512: 114 | layers.append(nn.Conv2d(self.inplanes, 256, 4, padding=1)) 115 | 116 | return nn.Sequential(*layers) 117 | 118 | 119 | class L2Norm(nn.Module): 120 | 121 | def __init__(self, n_dims, scale=20., eps=1e-10): 122 | super(L2Norm, self).__init__() 123 | self.n_dims = n_dims 124 | self.weight = nn.Parameter(torch.Tensor(self.n_dims)) 125 | self.eps = eps 126 | self.scale = scale 127 | 128 | def forward(self, x): 129 | norm = x.pow(2).sum(1, keepdim=True).sqrt() + self.eps 130 | return self.weight[None, :, None, None].expand_as(x) * x / norm 131 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 5 | 6 | 7 | def _build_module(cfg, registry, default_args): 8 | assert isinstance(cfg, dict) and 'type' in cfg 9 | assert isinstance(default_args, dict) or default_args is None 10 | args = cfg.copy() 11 | obj_type = args.pop('type') 12 | if mmcv.is_str(obj_type): 13 | if obj_type not in registry.module_dict: 14 | raise KeyError('{} is not in the {} registry'.format( 15 | obj_type, registry.name)) 16 | obj_type = registry.module_dict[obj_type] 17 | elif not isinstance(obj_type, type): 18 | raise TypeError('type must be a str or valid type, but got {}'.format( 19 | type(obj_type))) 20 | if default_args is not None: 21 | for name, value in default_args.items(): 22 | args.setdefault(name, value) 23 | return obj_type(**args) 24 | 25 | 26 | def build(cfg, registry, default_args=None): 27 | if isinstance(cfg, list): 28 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 29 | return nn.Sequential(*modules) 30 | else: 31 | return _build_module(cfg, registry, default_args) 32 | 33 | 34 | def build_backbone(cfg): 35 | return build(cfg, BACKBONES) 36 | 37 | 38 | def build_neck(cfg): 39 | return build(cfg, NECKS) 40 | 41 | 42 | def build_roi_extractor(cfg): 43 | return build(cfg, ROI_EXTRACTORS) 44 | 45 | 46 | def build_head(cfg): 47 | return build(cfg, HEADS) 48 | 49 | 50 | def build_detector(cfg, train_cfg=None, test_cfg=None): 51 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 52 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .retinanet import RetinaNet 10 | 11 | __all__ = [ 12 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 13 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import mmcv 5 | import numpy as np 6 | import torch.nn as nn 7 | import pycocotools.mask as maskUtils 8 | 9 | from mmdet.core import tensor2imgs, get_classes 10 | 11 | 12 | class BaseDetector(nn.Module): 13 | """Base class for detectors""" 14 | 15 | __metaclass__ = ABCMeta 16 | 17 | def __init__(self): 18 | super(BaseDetector, self).__init__() 19 | 20 | @property 21 | def with_neck(self): 22 | return hasattr(self, 'neck') and self.neck is not None 23 | 24 | @property 25 | def with_bbox(self): 26 | return hasattr(self, 'bbox_head') and self.bbox_head is not None 27 | 28 | @property 29 | def with_mask(self): 30 | return hasattr(self, 'mask_head') and self.mask_head is not None 31 | 32 | @abstractmethod 33 | def extract_feat(self, imgs): 34 | pass 35 | 36 | def extract_feats(self, imgs): 37 | assert isinstance(imgs, list) 38 | for img in imgs: 39 | yield self.extract_feat(img) 40 | 41 | @abstractmethod 42 | def forward_train(self, imgs, img_metas, **kwargs): 43 | pass 44 | 45 | @abstractmethod 46 | def simple_test(self, img, img_meta, **kwargs): 47 | pass 48 | 49 | @abstractmethod 50 | def aug_test(self, imgs, img_metas, **kwargs): 51 | pass 52 | 53 | def init_weights(self, pretrained=None): 54 | if pretrained is not None: 55 | logger = logging.getLogger() 56 | logger.info('load model from: {}'.format(pretrained)) 57 | 58 | def forward_test(self, imgs, img_metas, **kwargs): 59 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 60 | if not isinstance(var, list): 61 | raise TypeError('{} must be a list, but got {}'.format( 62 | name, type(var))) 63 | 64 | num_augs = len(imgs) 65 | if num_augs != len(img_metas): 66 | raise ValueError( 67 | 'num of augmentations ({}) != num of image meta ({})'.format( 68 | len(imgs), len(img_metas))) 69 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 70 | imgs_per_gpu = imgs[0].size(0) 71 | assert imgs_per_gpu == 1 72 | 73 | if num_augs == 1: 74 | return self.simple_test(imgs[0], img_metas[0], **kwargs) 75 | else: 76 | return self.aug_test(imgs, img_metas, **kwargs) 77 | 78 | def forward(self, img, img_meta, return_loss=True, **kwargs): 79 | if return_loss: 80 | return self.forward_train(img, img_meta, **kwargs) 81 | else: 82 | return self.forward_test(img, img_meta, **kwargs) 83 | 84 | def show_result(self, 85 | data, 86 | result, 87 | img_norm_cfg, 88 | dataset='coco', 89 | score_thr=0.3): 90 | if isinstance(result, tuple): 91 | bbox_result, segm_result = result 92 | else: 93 | bbox_result, segm_result = result, None 94 | 95 | img_tensor = data['img'][0] 96 | img_metas = data['img_meta'][0].data[0] 97 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 98 | assert len(imgs) == len(img_metas) 99 | 100 | if isinstance(dataset, str): 101 | class_names = get_classes(dataset) 102 | elif isinstance(dataset, (list, tuple)) or dataset is None: 103 | class_names = dataset 104 | else: 105 | raise TypeError( 106 | 'dataset must be a valid dataset name or a sequence' 107 | ' of class names, not {}'.format(type(dataset))) 108 | 109 | for img, img_meta in zip(imgs, img_metas): 110 | h, w, _ = img_meta['img_shape'] 111 | img_show = img[:h, :w, :] 112 | 113 | bboxes = np.vstack(bbox_result) 114 | # draw segmentation masks 115 | if segm_result is not None: 116 | segms = mmcv.concat_list(segm_result) 117 | inds = np.where(bboxes[:, -1] > score_thr)[0] 118 | for i in inds: 119 | color_mask = np.random.randint( 120 | 0, 256, (1, 3), dtype=np.uint8) 121 | mask = maskUtils.decode(segms[i]).astype(np.bool) 122 | img_show[mask] = img_show[mask] * 0.5 + color_mask * 0.5 123 | # draw bounding boxes 124 | labels = [ 125 | np.full(bbox.shape[0], i, dtype=np.int32) 126 | for i, bbox in enumerate(bbox_result) 127 | ] 128 | labels = np.concatenate(labels) 129 | mmcv.imshow_det_bboxes( 130 | img_show, 131 | bboxes, 132 | labels, 133 | class_names=class_names, 134 | score_thr=score_thr) 135 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | mask_roi_extractor=None, 16 | mask_head=None, 17 | pretrained=None): 18 | super(FastRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | pretrained=pretrained) 28 | 29 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 30 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 31 | if not isinstance(var, list): 32 | raise TypeError('{} must be a list, but got {}'.format( 33 | name, type(var))) 34 | 35 | num_augs = len(imgs) 36 | if num_augs != len(img_metas): 37 | raise ValueError( 38 | 'num of augmentations ({}) != num of image meta ({})'.format( 39 | len(imgs), len(img_metas))) 40 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 41 | imgs_per_gpu = imgs[0].size(0) 42 | assert imgs_per_gpu == 1 43 | 44 | if num_augs == 1: 45 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 46 | **kwargs) 47 | else: 48 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 49 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | train_cfg, 15 | test_cfg, 16 | pretrained=None): 17 | super(FasterRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained) 26 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | mask_roi_extractor, 15 | mask_head, 16 | train_cfg, 17 | test_cfg, 18 | pretrained=None): 19 | super(MaskRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | 3 | __all__ = ['FCNMaskHead'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | 3 | __all__ = ['FPN'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/necks/fpn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | 5 | from ..utils import ConvModule 6 | from ..registry import NECKS 7 | 8 | 9 | @NECKS.register_module 10 | class FPN(nn.Module): 11 | 12 | def __init__(self, 13 | in_channels, 14 | out_channels, 15 | num_outs, 16 | start_level=0, 17 | end_level=-1, 18 | add_extra_convs=False, 19 | normalize=None, 20 | activation=None): 21 | super(FPN, self).__init__() 22 | assert isinstance(in_channels, list) 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | self.num_ins = len(in_channels) 26 | self.num_outs = num_outs 27 | self.activation = activation 28 | self.with_bias = normalize is None 29 | 30 | if end_level == -1: 31 | self.backbone_end_level = self.num_ins 32 | assert num_outs >= self.num_ins - start_level 33 | else: 34 | # if end_level < inputs, no extra level is allowed 35 | self.backbone_end_level = end_level 36 | assert end_level <= len(in_channels) 37 | assert num_outs == end_level - start_level 38 | self.start_level = start_level 39 | self.end_level = end_level 40 | self.add_extra_convs = add_extra_convs 41 | 42 | self.lateral_convs = nn.ModuleList() 43 | self.fpn_convs = nn.ModuleList() 44 | 45 | for i in range(self.start_level, self.backbone_end_level): 46 | l_conv = ConvModule( 47 | in_channels[i], 48 | out_channels, 49 | 1, 50 | normalize=normalize, 51 | bias=self.with_bias, 52 | activation=self.activation, 53 | inplace=False) 54 | fpn_conv = ConvModule( 55 | out_channels, 56 | out_channels, 57 | 3, 58 | padding=1, 59 | normalize=normalize, 60 | bias=self.with_bias, 61 | activation=self.activation, 62 | inplace=False) 63 | 64 | self.lateral_convs.append(l_conv) 65 | self.fpn_convs.append(fpn_conv) 66 | 67 | # lvl_id = i - self.start_level 68 | # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv) 69 | # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv) 70 | 71 | # add extra conv layers (e.g., RetinaNet) 72 | extra_levels = num_outs - self.backbone_end_level + self.start_level 73 | if add_extra_convs and extra_levels >= 1: 74 | for i in range(extra_levels): 75 | in_channels = (self.in_channels[self.backbone_end_level - 1] 76 | if i == 0 else out_channels) 77 | extra_fpn_conv = ConvModule( 78 | in_channels, 79 | out_channels, 80 | 3, 81 | stride=2, 82 | padding=1, 83 | normalize=normalize, 84 | bias=self.with_bias, 85 | activation=self.activation, 86 | inplace=False) 87 | self.fpn_convs.append(extra_fpn_conv) 88 | 89 | # default init_weights for conv(msra) and norm in ConvModule 90 | def init_weights(self): 91 | for m in self.modules(): 92 | if isinstance(m, nn.Conv2d): 93 | xavier_init(m, distribution='uniform') 94 | 95 | def forward(self, inputs): 96 | assert len(inputs) == len(self.in_channels) 97 | 98 | # build laterals 99 | laterals = [ 100 | lateral_conv(inputs[i + self.start_level]) 101 | for i, lateral_conv in enumerate(self.lateral_convs) 102 | ] 103 | 104 | # build top-down path 105 | used_backbone_levels = len(laterals) 106 | for i in range(used_backbone_levels - 1, 0, -1): 107 | laterals[i - 1] += F.interpolate( 108 | laterals[i], scale_factor=2, mode='nearest') 109 | 110 | # build outputs 111 | # part 1: from original levels 112 | outs = [ 113 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 114 | ] 115 | # part 2: add extra levels 116 | if self.num_outs > len(outs): 117 | # use max pool to get more levels on top of outputs 118 | # (e.g., Faster R-CNN, Mask R-CNN) 119 | if not self.add_extra_convs: 120 | for i in range(self.num_outs - used_backbone_levels): 121 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 122 | # add conv layers on top of original feature maps (RetinaNet) 123 | else: 124 | orig = inputs[self.backbone_end_level - 1] 125 | outs.append(self.fpn_convs[used_backbone_levels](orig)) 126 | for i in range(used_backbone_levels + 1, self.num_outs): 127 | # BUG: we should add relu before each extra conv 128 | outs.append(self.fpn_convs[i](outs[-1])) 129 | return tuple(outs) 130 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | HEADS = Registry('head') 43 | DETECTORS = Registry('detector') 44 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from ..registry import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module 11 | class SingleRoIExtractor(nn.Module): 12 | """Extract RoI features from a single level feature map. 13 | 14 | If there are mulitple input feature levels, each RoI is mapped to a level 15 | according to its scale. 16 | 17 | Args: 18 | roi_layer (dict): Specify RoI layer type and arguments. 19 | out_channels (int): Output channels of RoI layers. 20 | featmap_strides (int): Strides of input feature maps. 21 | finest_scale (int): Scale threshold of mapping to level 0. 22 | """ 23 | 24 | def __init__(self, 25 | roi_layer, 26 | out_channels, 27 | featmap_strides, 28 | finest_scale=56): 29 | super(SingleRoIExtractor, self).__init__() 30 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 31 | self.out_channels = out_channels 32 | self.featmap_strides = featmap_strides 33 | self.finest_scale = finest_scale 34 | 35 | @property 36 | def num_inputs(self): 37 | """int: Input feature map levels.""" 38 | return len(self.featmap_strides) 39 | 40 | def init_weights(self): 41 | pass 42 | 43 | def build_roi_layers(self, layer_cfg, featmap_strides): 44 | cfg = layer_cfg.copy() 45 | layer_type = cfg.pop('type') 46 | assert hasattr(ops, layer_type) 47 | layer_cls = getattr(ops, layer_type) 48 | roi_layers = nn.ModuleList( 49 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 50 | return roi_layers 51 | 52 | def map_roi_levels(self, rois, num_levels): 53 | """Map rois to corresponding feature levels by scales. 54 | 55 | - scale < finest_scale: level 0 56 | - finest_scale <= scale < finest_scale * 2: level 1 57 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 58 | - scale >= finest_scale * 4: level 3 59 | 60 | Args: 61 | rois (Tensor): Input RoIs, shape (k, 5). 62 | num_levels (int): Total level number. 63 | 64 | Returns: 65 | Tensor: Level index (0-based) of each RoI, shape (k, ) 66 | """ 67 | scale = torch.sqrt( 68 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 69 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 70 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 71 | return target_lvls 72 | 73 | def forward(self, feats, rois): 74 | if len(feats) == 1: 75 | return self.roi_layers[0](feats[0], rois) 76 | 77 | out_size = self.roi_layers[0].out_size 78 | num_levels = len(feats) 79 | target_lvls = self.map_roi_levels(rois, num_levels) 80 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 81 | out_size, out_size).fill_(0) 82 | for i in range(num_levels): 83 | inds = target_lvls == i 84 | if inds.any(): 85 | rois_ = rois[inds, :] 86 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 87 | roi_feats[inds] += roi_feats_t 88 | return roi_feats 89 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule 2 | from .norm import build_norm_layer 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 4 | bias_init_with_prob) 5 | 6 | __all__ = [ 7 | 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', 8 | 'uniform_init', 'kaiming_init', 'bias_init_with_prob' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/conv_module.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import kaiming_init, constant_init 5 | 6 | from .norm import build_norm_layer 7 | 8 | 9 | class ConvModule(nn.Module): 10 | 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | kernel_size, 15 | stride=1, 16 | padding=0, 17 | dilation=1, 18 | groups=1, 19 | bias=True, 20 | normalize=None, 21 | activation='relu', 22 | inplace=True, 23 | activate_last=True): 24 | super(ConvModule, self).__init__() 25 | self.with_norm = normalize is not None 26 | self.with_activatation = activation is not None 27 | self.with_bias = bias 28 | self.activation = activation 29 | self.activate_last = activate_last 30 | 31 | if self.with_norm and self.with_bias: 32 | warnings.warn('ConvModule has norm and bias at the same time') 33 | 34 | self.conv = nn.Conv2d( 35 | in_channels, 36 | out_channels, 37 | kernel_size, 38 | stride, 39 | padding, 40 | dilation, 41 | groups, 42 | bias=bias) 43 | 44 | self.in_channels = self.conv.in_channels 45 | self.out_channels = self.conv.out_channels 46 | self.kernel_size = self.conv.kernel_size 47 | self.stride = self.conv.stride 48 | self.padding = self.conv.padding 49 | self.dilation = self.conv.dilation 50 | self.transposed = self.conv.transposed 51 | self.output_padding = self.conv.output_padding 52 | self.groups = self.conv.groups 53 | 54 | if self.with_norm: 55 | norm_channels = out_channels if self.activate_last else in_channels 56 | self.norm_name, norm = build_norm_layer(normalize, norm_channels) 57 | self.add_module(self.norm_name, norm) 58 | 59 | if self.with_activatation: 60 | assert activation in ['relu'], 'Only ReLU supported.' 61 | if self.activation == 'relu': 62 | self.activate = nn.ReLU(inplace=inplace) 63 | 64 | # Default using msra init 65 | self.init_weights() 66 | 67 | @property 68 | def norm(self): 69 | return getattr(self, self.norm_name) 70 | 71 | def init_weights(self): 72 | nonlinearity = 'relu' if self.activation is None else self.activation 73 | kaiming_init(self.conv, nonlinearity=nonlinearity) 74 | if self.with_norm: 75 | constant_init(self.norm, 1, bias=0) 76 | 77 | def forward(self, x, activate=True, norm=True): 78 | if self.activate_last: 79 | x = self.conv(x) 80 | if norm and self.with_norm: 81 | x = self.norm(x) 82 | if activate and self.with_activatation: 83 | x = self.activate(x) 84 | else: 85 | if norm and self.with_norm: 86 | x = self.norm(x) 87 | if activate and self.with_activatation: 88 | x = self.activate(x) 89 | x = self.conv(x) 90 | return x 91 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', None), 8 | 'GN': ('gn', nn.GroupNorm), 9 | # and potentially 'SN' 10 | } 11 | 12 | 13 | def build_norm_layer(cfg, num_features, postfix=''): 14 | """ Build normalization layer 15 | 16 | Args: 17 | cfg (dict): cfg should contain: 18 | type (str): identify norm layer type. 19 | layer args: args needed to instantiate a norm layer. 20 | frozen (bool): [optional] whether stop gradient updates 21 | of norm layer, it is helpful to set frozen mode 22 | in backbone's norms. 23 | num_features (int): number of channels from input 24 | postfix (int, str): appended into norm abbreation to 25 | create named layer. 26 | 27 | Returns: 28 | name (str): abbreation + postfix 29 | layer (nn.Module): created norm layer 30 | """ 31 | assert isinstance(cfg, dict) and 'type' in cfg 32 | cfg_ = cfg.copy() 33 | 34 | layer_type = cfg_.pop('type') 35 | if layer_type not in norm_cfg: 36 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 37 | else: 38 | abbr, norm_layer = norm_cfg[layer_type] 39 | if norm_layer is None: 40 | raise NotImplementedError 41 | 42 | assert isinstance(postfix, (int, str)) 43 | name = abbr + str(postfix) 44 | 45 | frozen = cfg_.pop('frozen', False) 46 | cfg_.setdefault('eps', 1e-5) 47 | if layer_type != 'GN': 48 | layer = norm_layer(num_features, **cfg_) 49 | else: 50 | assert 'num_groups' in cfg_ 51 | layer = norm_layer(num_channels=num_features, **cfg_) 52 | 53 | if frozen: 54 | for param in layer.parameters(): 55 | param.requires_grad = False 56 | 57 | return name, layer 58 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack, 2 | ModulatedDeformRoIPoolingPack, ModulatedDeformConv, 3 | ModulatedDeformConvPack, deform_conv, modulated_deform_conv, 4 | deform_roi_pooling) 5 | from .nms import nms, soft_nms 6 | from .roi_align import RoIAlign, roi_align 7 | from .roi_pool import RoIPool, roi_pool 8 | 9 | __all__ = [ 10 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 11 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 12 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 13 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 14 | 'deform_roi_pooling' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 11 | 'ModulatedDeformConvPack', 'deform_conv', 12 | 'modulated_deform_conv', 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/dcn/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/dcn/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'detection/deform_conv_cuda.cpp', 9 | 'detection/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension('deform_pool_cuda', [ 12 | 'detection/deform_pool_cuda.cpp', 'detection/deform_pool_cuda_kernel.cu' 13 | ]), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | void DeformablePSROIPoolForward( 14 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 15 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 16 | const int height, const int width, const int num_bbox, 17 | const int channels_trans, const int no_trans, const float spatial_scale, 18 | const int output_dim, const int group_size, const int pooled_size, 19 | const int part_size, const int sample_per_part, const float trans_std); 20 | 21 | void DeformablePSROIPoolBackwardAcc( 22 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 23 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 24 | at::Tensor trans_grad, const int batch, const int channels, 25 | const int height, const int width, const int num_bbox, 26 | const int channels_trans, const int no_trans, const float spatial_scale, 27 | const int output_dim, const int group_size, const int pooled_size, 28 | const int part_size, const int sample_per_part, const float trans_std); 29 | 30 | void deform_psroi_pooling_cuda_forward( 31 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 32 | at::Tensor top_count, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std) { 35 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 36 | 37 | const int batch = input.size(0); 38 | const int channels = input.size(1); 39 | const int height = input.size(2); 40 | const int width = input.size(3); 41 | const int channels_trans = no_trans ? 2 : trans.size(1); 42 | 43 | const int num_bbox = bbox.size(0); 44 | if (num_bbox != out.size(0)) 45 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 46 | out.size(0), num_bbox); 47 | 48 | DeformablePSROIPoolForward( 49 | input, bbox, trans, out, top_count, batch, channels, height, width, 50 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 51 | pooled_size, part_size, sample_per_part, trans_std); 52 | } 53 | 54 | void deform_psroi_pooling_cuda_backward( 55 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 56 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 57 | const int no_trans, const float spatial_scale, const int output_dim, 58 | const int group_size, const int pooled_size, const int part_size, 59 | const int sample_per_part, const float trans_std) { 60 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 61 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 62 | 63 | const int batch = input.size(0); 64 | const int channels = input.size(1); 65 | const int height = input.size(2); 66 | const int width = input.size(3); 67 | const int channels_trans = no_trans ? 2 : trans.size(1); 68 | 69 | const int num_bbox = bbox.size(0); 70 | if (num_bbox != out_grad.size(0)) 71 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 72 | out_grad.size(0), num_bbox); 73 | 74 | DeformablePSROIPoolBackwardAcc( 75 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 76 | channels, height, width, num_bbox, channels_trans, no_trans, 77 | spatial_scale, output_dim, group_size, pooled_size, part_size, 78 | sample_per_part, trans_std); 79 | } 80 | 81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 82 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 83 | "deform psroi pooling forward(CUDA)"); 84 | m.def("deform_psroi_pooling_cuda_backward", 85 | &deform_psroi_pooling_cuda_backward, 86 | "deform psroi pooling backward(CUDA)"); 87 | } -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.cpp 2 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/Makefile: -------------------------------------------------------------------------------- 1 | PYTHON=${PYTHON:-python} 2 | 3 | all: 4 | echo "Compiling nms kernels..." 5 | $(PYTHON) setup.py build_ext --inplace 6 | 7 | clean: 8 | rm -f *.so 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # cython: language_level=3, boundscheck=False 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 14 | return a if a >= b else b 15 | 16 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 17 | return a if a <= b else b 18 | 19 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 20 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 21 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 22 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 23 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 24 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 25 | 26 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 27 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 28 | 29 | cdef int ndets = dets.shape[0] 30 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 31 | np.zeros((ndets), dtype=np.int) 32 | 33 | # nominal indices 34 | cdef int _i, _j 35 | # sorted indices 36 | cdef int i, j 37 | # temp variables for box i's (the box currently under consideration) 38 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 39 | # variables for computing overlap with box j (lower scoring box) 40 | cdef np.float32_t xx1, yy1, xx2, yy2 41 | cdef np.float32_t w, h 42 | cdef np.float32_t inter, ovr 43 | 44 | keep = [] 45 | for _i in range(ndets): 46 | i = order[_i] 47 | if suppressed[i] == 1: 48 | continue 49 | keep.append(i) 50 | ix1 = x1[i] 51 | iy1 = y1[i] 52 | ix2 = x2[i] 53 | iy2 = y2[i] 54 | iarea = areas[i] 55 | for _j in range(_i + 1, ndets): 56 | j = order[_j] 57 | if suppressed[j] == 1: 58 | continue 59 | xx1 = max(ix1, x1[j]) 60 | yy1 = max(iy1, y1[j]) 61 | xx2 = min(ix2, x2[j]) 62 | yy2 = min(iy2, y2[j]) 63 | w = max(0.0, xx2 - xx1 + 1) 64 | h = max(0.0, yy2 - yy1 + 1) 65 | inter = w * h 66 | ovr = inter / (iarea + areas[j] - inter) 67 | if ovr >= thresh: 68 | suppressed[j] = 1 69 | 70 | return keep 71 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/cpu_soft_nms.pyx: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------- 2 | # Soft-NMS: Improving Object Detection With One Line of Code 3 | # Copyright (c) University of Maryland, College Park 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Navaneeth Bodla and Bharat Singh 6 | # Modified by Kai Chen 7 | # ---------------------------------------------------------- 8 | 9 | # cython: language_level=3, boundscheck=False 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 16 | return a if a >= b else b 17 | 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 19 | return a if a <= b else b 20 | 21 | 22 | def cpu_soft_nms( 23 | np.ndarray[float, ndim=2] boxes_in, 24 | float iou_thr, 25 | unsigned int method=1, 26 | float sigma=0.5, 27 | float min_score=0.001, 28 | ): 29 | boxes = boxes_in.copy() 30 | cdef unsigned int N = boxes.shape[0] 31 | cdef float iw, ih, box_area 32 | cdef float ua 33 | cdef int pos = 0 34 | cdef float maxscore = 0 35 | cdef int maxpos = 0 36 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov 37 | inds = np.arange(N) 38 | 39 | for i in range(N): 40 | maxscore = boxes[i, 4] 41 | maxpos = i 42 | 43 | tx1 = boxes[i, 0] 44 | ty1 = boxes[i, 1] 45 | tx2 = boxes[i, 2] 46 | ty2 = boxes[i, 3] 47 | ts = boxes[i, 4] 48 | ti = inds[i] 49 | 50 | pos = i + 1 51 | # get max box 52 | while pos < N: 53 | if maxscore < boxes[pos, 4]: 54 | maxscore = boxes[pos, 4] 55 | maxpos = pos 56 | pos = pos + 1 57 | 58 | # add max box as a detection 59 | boxes[i, 0] = boxes[maxpos, 0] 60 | boxes[i, 1] = boxes[maxpos, 1] 61 | boxes[i, 2] = boxes[maxpos, 2] 62 | boxes[i, 3] = boxes[maxpos, 3] 63 | boxes[i, 4] = boxes[maxpos, 4] 64 | inds[i] = inds[maxpos] 65 | 66 | # swap ith box with position of max box 67 | boxes[maxpos, 0] = tx1 68 | boxes[maxpos, 1] = ty1 69 | boxes[maxpos, 2] = tx2 70 | boxes[maxpos, 3] = ty2 71 | boxes[maxpos, 4] = ts 72 | inds[maxpos] = ti 73 | 74 | tx1 = boxes[i, 0] 75 | ty1 = boxes[i, 1] 76 | tx2 = boxes[i, 2] 77 | ty2 = boxes[i, 3] 78 | ts = boxes[i, 4] 79 | 80 | pos = i + 1 81 | # NMS iterations, note that N changes if detection boxes fall below 82 | # threshold 83 | while pos < N: 84 | x1 = boxes[pos, 0] 85 | y1 = boxes[pos, 1] 86 | x2 = boxes[pos, 2] 87 | y2 = boxes[pos, 3] 88 | s = boxes[pos, 4] 89 | 90 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 91 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 92 | if iw > 0: 93 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 94 | if ih > 0: 95 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 96 | ov = iw * ih / ua # iou between max box and detection box 97 | 98 | if method == 1: # linear 99 | if ov > iou_thr: 100 | weight = 1 - ov 101 | else: 102 | weight = 1 103 | elif method == 2: # gaussian 104 | weight = np.exp(-(ov * ov) / sigma) 105 | else: # original NMS 106 | if ov > iou_thr: 107 | weight = 0 108 | else: 109 | weight = 1 110 | 111 | boxes[pos, 4] = weight * boxes[pos, 4] 112 | 113 | # if box score falls below threshold, discard the box by 114 | # swapping with last box update N 115 | if boxes[pos, 4] < min_score: 116 | boxes[pos, 0] = boxes[N-1, 0] 117 | boxes[pos, 1] = boxes[N-1, 1] 118 | boxes[pos, 2] = boxes[N-1, 2] 119 | boxes[pos, 3] = boxes[N-1, 3] 120 | boxes[pos, 4] = boxes[N-1, 4] 121 | inds[pos] = inds[N - 1] 122 | N = N - 1 123 | pos = pos - 1 124 | 125 | pos = pos + 1 126 | 127 | return boxes[:N], inds[:N] 128 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id, size_t base); 3 | size_t nms_Malloc(); 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # cython: language_level=3, boundscheck=False 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | assert sizeof(int) == sizeof(np.int32_t) 14 | 15 | cdef extern from "gpu_nms.hpp": 16 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil 17 | size_t nms_Malloc() nogil 18 | 19 | memory_pool = {} 20 | 21 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 22 | np.int32_t device_id=0): 23 | cdef int boxes_num = dets.shape[0] 24 | cdef int boxes_dim = 5 25 | cdef int num_out 26 | cdef size_t base 27 | cdef np.ndarray[np.int32_t, ndim=1] \ 28 | keep = np.zeros(boxes_num, dtype=np.int32) 29 | cdef np.ndarray[np.float32_t, ndim=1] \ 30 | scores = dets[:, 4] 31 | cdef np.ndarray[np.int_t, ndim=1] \ 32 | order = scores.argsort()[::-1] 33 | cdef np.ndarray[np.float32_t, ndim=2] \ 34 | sorted_dets = dets[order, :5] 35 | cdef float cthresh = thresh 36 | if device_id not in memory_pool: 37 | with nogil: 38 | base = nms_Malloc() 39 | memory_pool[device_id] = base 40 | # print "malloc", base 41 | base = memory_pool[device_id] 42 | with nogil: 43 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base) 44 | keep = keep[:num_out] 45 | return list(order[keep]) 46 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .gpu_nms import gpu_nms 5 | from .cpu_nms import cpu_nms 6 | from .cpu_soft_nms import cpu_soft_nms 7 | 8 | 9 | def nms(dets, iou_thr, device_id=None): 10 | """Dispatch to either CPU or GPU NMS implementations.""" 11 | if isinstance(dets, torch.Tensor): 12 | is_tensor = True 13 | if dets.is_cuda: 14 | device_id = dets.get_device() 15 | dets_np = dets.detach().cpu().numpy() 16 | elif isinstance(dets, np.ndarray): 17 | is_tensor = False 18 | dets_np = dets 19 | else: 20 | raise TypeError( 21 | 'dets must be either a Tensor or numpy array, but got {}'.format( 22 | type(dets))) 23 | 24 | if dets_np.shape[0] == 0: 25 | inds = [] 26 | else: 27 | inds = (gpu_nms(dets_np, iou_thr, device_id=device_id) 28 | if device_id is not None else cpu_nms(dets_np, iou_thr)) 29 | 30 | if is_tensor: 31 | inds = dets.new_tensor(inds, dtype=torch.long) 32 | else: 33 | inds = np.array(inds, dtype=np.int64) 34 | return dets[inds, :], inds 35 | 36 | 37 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 38 | if isinstance(dets, torch.Tensor): 39 | is_tensor = True 40 | dets_np = dets.detach().cpu().numpy() 41 | elif isinstance(dets, np.ndarray): 42 | is_tensor = False 43 | dets_np = dets 44 | else: 45 | raise TypeError( 46 | 'dets must be either a Tensor or numpy array, but got {}'.format( 47 | type(dets))) 48 | 49 | method_codes = {'linear': 1, 'gaussian': 2} 50 | if method not in method_codes: 51 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 52 | new_dets, inds = cpu_soft_nms( 53 | dets_np, 54 | iou_thr, 55 | method=method_codes[method], 56 | sigma=sigma, 57 | min_score=min_score) 58 | 59 | if is_tensor: 60 | return dets.new_tensor(new_dets), dets.new_tensor( 61 | inds, dtype=torch.long) 62 | else: 63 | return new_dets.astype(np.float32), inds.astype(np.int64) 64 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from distutils.core import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | 8 | # extensions 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('cpu_nms', ['cpu_nms.pyx'], **ext_args), 20 | Extension('cpu_soft_nms', ['cpu_soft_nms.pyx'], **ext_args), 21 | Extension('gpu_nms', ['gpu_nms.pyx', 'nms_kernel.cu'], **ext_args), 22 | ] 23 | 24 | 25 | def customize_compiler_for_nvcc(self): 26 | """inject deep into distutils to customize how the dispatch 27 | to cc/nvcc works. 28 | If you subclass UnixCCompiler, it's not trivial to get your subclass 29 | injected in, and still have the right customizations (i.e. 30 | distutils.sysconfig.customize_compiler) run on it. So instead of going 31 | the OO route, I have this. Note, it's kindof like a wierd functional 32 | subclassing going on.""" 33 | 34 | # tell the compiler it can processes .cu 35 | self.src_extensions.append('.cu') 36 | 37 | # save references to the default compiler_so and _comple methods 38 | default_compiler_so = self.compiler_so 39 | super = self._compile 40 | 41 | # now redefine the _compile method. This gets executed for each 42 | # object but distutils doesn't have the ability to change compilers 43 | # based on source extension: we add it. 44 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 45 | if osp.splitext(src)[1] == '.cu': 46 | # use the cuda for .cu files 47 | self.set_executable('compiler_so', 'nvcc') 48 | # use only a subset of the extra_postargs, which are 1-1 translated 49 | # from the extra_compile_args in the Extension class 50 | postargs = extra_postargs['nvcc'] 51 | else: 52 | postargs = extra_postargs['cc'] 53 | 54 | super(obj, src, ext, cc_args, postargs, pp_opts) 55 | # reset the default compiler_so, which we might have changed for cuda 56 | self.compiler_so = default_compiler_so 57 | 58 | # inject our redefined _compile method into the class 59 | self._compile = _compile 60 | 61 | 62 | # run the customize_compiler 63 | class custom_build_ext(build_ext): 64 | 65 | def build_extensions(self): 66 | customize_compiler_for_nvcc(self.compiler) 67 | build_ext.build_extensions(self) 68 | 69 | 70 | setup( 71 | name='nms', 72 | cmdclass={'build_ext': custom_build_ext}, 73 | ext_modules=cythonize(extensions), 74 | ) 75 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'detection/roi_align_cuda.cpp', 9 | 'detection/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/mmdetection/mmdet/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'detection/roi_pool_cuda.cpp', 9 | 'detection/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdetection/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | MAJOR = 0 8 | MINOR = 6 9 | PATCH = 'rc0' 10 | SUFFIX = '' 11 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 12 | 13 | version_file = 'mmdet/version.py' 14 | 15 | 16 | def get_git_hash(): 17 | 18 | def _minimal_ext_cmd(cmd): 19 | # construct minimal environment 20 | env = {} 21 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 22 | v = os.environ.get(k) 23 | if v is not None: 24 | env[k] = v 25 | # LANGUAGE is used on win32 26 | env['LANGUAGE'] = 'C' 27 | env['LANG'] = 'C' 28 | env['LC_ALL'] = 'C' 29 | out = subprocess.Popen( 30 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 31 | return out 32 | 33 | try: 34 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 35 | sha = out.strip().decode('ascii') 36 | except OSError: 37 | sha = 'unknown' 38 | 39 | return sha 40 | 41 | 42 | def get_hash(): 43 | if os.path.exists('.git'): 44 | sha = get_git_hash()[:7] 45 | elif os.path.exists(version_file): 46 | try: 47 | from mmdet.version import __version__ 48 | sha = __version__.split('+')[-1] 49 | except ImportError: 50 | raise ImportError('Unable to get git version') 51 | else: 52 | sha = 'unknown' 53 | 54 | return sha 55 | 56 | 57 | def write_version_py(): 58 | content = """# GENERATED VERSION FILE 59 | # TIME: {} 60 | 61 | __version__ = '{}' 62 | short_version = '{}' 63 | """ 64 | sha = get_hash() 65 | VERSION = SHORT_VERSION + '+' + sha 66 | 67 | with open(version_file, 'w') as f: 68 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 69 | 70 | 71 | def get_version(): 72 | with open(version_file, 'r') as f: 73 | exec(compile(f.read(), version_file, 'exec')) 74 | return locals()['__version__'] 75 | 76 | 77 | if __name__ == '__main__': 78 | write_version_py() 79 | setup( 80 | name='mmdet', 81 | version=get_version(), 82 | description='Open MMLab Detection Toolbox', 83 | keywords='computer vision, object detection', 84 | url='https://github.com/open-mmlab/mmdetection', 85 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 86 | package_data={'mmdet.ops': ['*/*.so']}, 87 | classifiers=[ 88 | 'Development Status :: 4 - Beta', 89 | 'License :: OSI Approved :: Apache Software License', 90 | 'Operating System :: OS Independent', 91 | 'Programming Language :: Python :: 2', 92 | 'Programming Language :: Python :: 2.7', 93 | 'Programming Language :: Python :: 3', 94 | 'Programming Language :: Python :: 3.4', 95 | 'Programming Language :: Python :: 3.5', 96 | 'Programming Language :: Python :: 3.6', 97 | ], 98 | license='GPLv3', 99 | setup_requires=['pytest-runner'], 100 | tests_require=['pytest'], 101 | install_requires=[ 102 | 'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables', 103 | 'pycocotools' 104 | ], 105 | zip_safe=False) 106 | -------------------------------------------------------------------------------- /reid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/reid/__init__.py -------------------------------------------------------------------------------- /reid/annotation_converter.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | from detection.utils import test_submission 4 | 5 | COLUMNS = ['ImageId', 'SubjectId', 'FACE_X', 'FACE_Y', 'W', 'H', 'Confidence'] 6 | TEST_COLUMNS = ['ImageId', 'SubjectId', 'W', 'H'] 7 | TRAIN_COLUMNS = ['FILE', 'SUBJECT_ID', 'FACE_WIDTH', 'FACE_HEIGHT'] 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('--annotation', type=str) 13 | parser.add_argument('--output', type=str) 14 | parser.add_argument('--to_test', action='store_true') 15 | return parser.parse_args() 16 | 17 | 18 | def main(): 19 | args = parse_args() 20 | annotation = pd.read_csv(args.annotation) 21 | if args.to_test: 22 | convert_dict = dict(zip(TRAIN_COLUMNS, TEST_COLUMNS)) 23 | else: 24 | convert_dict = dict(zip(TEST_COLUMNS, TRAIN_COLUMNS)) 25 | annotation['FACE_ID'] = range(len(annotation)) 26 | annotation['SUBJECT_ID'] = range(len(annotation)) 27 | annotation = annotation.rename(columns=convert_dict) 28 | if args.to_test: 29 | annotation = annotation[COLUMNS] 30 | test_submission(annotation) 31 | annotation.to_csv(args.output, index=False) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /reid/fit_nmslib.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from glob import glob 3 | 4 | from reid.knn import NMSLibNeighbours 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--embedding_path', type=str) 10 | parser.add_argument('--knn_path', type=str) 11 | parser.add_argument('--n_jobs', type=int, default=16) 12 | return parser.parse_args() 13 | 14 | 15 | def main(): 16 | args = parse_args() 17 | 18 | paths = sorted(glob(args.embedding_path)) 19 | knn = NMSLibNeighbours(n_neighbours=1, space='cosinesimil', n_jobs=args.n_jobs) 20 | knn.fit(paths=paths) 21 | knn.dump(args.knn_path) 22 | 23 | 24 | if __name__ == '__main__': 25 | main() 26 | -------------------------------------------------------------------------------- /reid/insightface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirassov/topcoder-facial-marathon/37f6828a589717d0004dd84d51eb7bc6a1b310fd/reid/insightface/__init__.py -------------------------------------------------------------------------------- /reid/insightface/model.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import mxnet as mx 4 | from sklearn.preprocessing import normalize 5 | 6 | from reid.insightface.mtcnn import MtcnnDetector 7 | from reid.insightface.utils import preprocess 8 | 9 | 10 | def get_embedder(ctx, image_size, model_prefix: str, layer): 11 | sym, arg_params, aux_params = mx.model.load_checkpoint(model_prefix, 0) 12 | all_layers = sym.get_internals() 13 | sym = all_layers[layer + '_output'] 14 | model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) 15 | model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) 16 | model.set_params(arg_params, aux_params) 17 | return model 18 | 19 | 20 | class ArcFaceModel: 21 | def __init__(self, embedder_path, mtcnn_path, image_size=(112, 112)): 22 | self.image_size = image_size 23 | self.ctx = mx.cpu() 24 | self.embedder = get_embedder(self.ctx, image_size, embedder_path, 'fc1') 25 | self.detector = MtcnnDetector( 26 | model_folder=mtcnn_path, 27 | ctx=self.ctx, 28 | accurate_landmark=True, 29 | threshold=[0.6, 0.7, 0.8] 30 | ) 31 | 32 | def predict(self, image): 33 | embedding = None 34 | preprocessed_img, bbox, landmark = self.detect(image) 35 | if preprocessed_img is not None: 36 | embedding = self.embed(preprocessed_img) 37 | return embedding 38 | 39 | def align(self, image, bbox, landmark): 40 | landmark = landmark.reshape((2, 5)).T 41 | preprocessed_img = preprocess(image, bbox, landmark, image_size=self.image_size) 42 | preprocessed_img = cv2.cvtColor(preprocessed_img, cv2.COLOR_BGR2RGB) 43 | preprocessed_img = np.transpose(preprocessed_img, (2, 0, 1)) 44 | return preprocessed_img, bbox, landmark 45 | 46 | def detect(self, image): 47 | bboxes, landmarks = self.detector.detect_face(image) 48 | if bboxes is None: 49 | return None, None, None 50 | 51 | bboxes, scores = bboxes[:, :4], bboxes[:, 4] 52 | return self.align(image, bboxes[0], landmarks[0]) 53 | 54 | def embed(self, image): 55 | input_blob = np.expand_dims(image, axis=0) 56 | data = mx.nd.array(input_blob) 57 | db = mx.io.DataBatch(data=(data,)) 58 | self.embedder.forward(db, is_train=False) 59 | embedding = self.embedder.get_outputs()[0].asnumpy() 60 | embedding = normalize(embedding).flatten() 61 | return embedding 62 | -------------------------------------------------------------------------------- /reid/knn.py: -------------------------------------------------------------------------------- 1 | import nmslib 2 | from collections import defaultdict 3 | import numpy as np 4 | from tqdm import tqdm 5 | 6 | 7 | class NMSLibNeighbours: 8 | def __init__(self, n_neighbours, space, n_jobs): 9 | self.n_neighbours = n_neighbours 10 | self.index = nmslib.init(method='hnsw', space=space, data_type=nmslib.DataType.DENSE_VECTOR) 11 | self.space = space 12 | self.n_jobs = n_jobs 13 | 14 | def fit(self, paths): 15 | for path in tqdm(paths): 16 | self.fit_sample(path) 17 | self.index.createIndex({'post': 2, 'indexThreadQty': self.n_jobs}, print_progress=True) 18 | 19 | def fit_sample(self, path): 20 | data = np.load(path) 21 | labels = data['labels'] 22 | embeddings = data['embeddings'] 23 | if len(embeddings): 24 | self.index.addDataPointBatch(data=embeddings, ids=labels) 25 | 26 | def predict(self, paths): 27 | predictions = defaultdict(list) 28 | for path in tqdm(paths): 29 | data = np.load(path) 30 | for i, (neighbours, distances) in enumerate(self.predict_sample(data)): 31 | predictions['neighbours'].append(neighbours) 32 | predictions['distances'].append(distances) 33 | predictions['labels'].append(data['labels'][i]) 34 | predictions['ids'].append(data['ids'][i]) 35 | return predictions 36 | 37 | def predict_sample(self, data: np.array): 38 | if len(data['embeddings']): 39 | return self.index.knnQueryBatch(data['embeddings'], k=self.n_neighbours, num_threads=self.n_jobs) 40 | else: 41 | return [] 42 | 43 | def dump(self, index_path): 44 | self.index.saveIndex(index_path) 45 | 46 | def load(self, index_path): 47 | self.index.loadIndex(index_path) 48 | -------------------------------------------------------------------------------- /reid/predict_embeddings.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from functools import partial 4 | 5 | import cv2 6 | import numpy as np 7 | import pandas as pd 8 | from multiprocessing import Pool 9 | from tqdm import tqdm 10 | 11 | from reid.insightface.model import ArcFaceModel 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--mtcnn_path', type=str) 17 | parser.add_argument('--embedder_path', type=str) 18 | parser.add_argument('--root', type=str) 19 | parser.add_argument('--annotation_path', type=str) 20 | parser.add_argument('--output_path', type=str) 21 | parser.add_argument('--n_jobs', type=int, default=40) 22 | return parser.parse_args() 23 | 24 | 25 | # dirty hack for parallel 26 | args = parse_args() 27 | model = ArcFaceModel(mtcnn_path=args.mtcnn_path, embedder_path=args.embedder_path, image_size=(112, 112)) 28 | 29 | 30 | def prepare_bboxes(group): 31 | x_min = group['FACE_X'].values 32 | y_min = group['FACE_Y'].values 33 | 34 | x_max = x_min + group['FACE_WIDTH'].values 35 | y_max = y_min + group['FACE_HEIGHT'].values 36 | 37 | bboxes = np.stack([x_min, y_min, x_max, y_max], axis=-1) 38 | return np.array(bboxes).astype(int) 39 | 40 | 41 | def predict(group, root, output_path): 42 | image_name, group = group 43 | image = cv2.imread(os.path.join(root, image_name)) 44 | embeddings = [] 45 | labels = [] 46 | ids = [] 47 | for label, id_, bbox in zip(group['SUBJECT_ID'].values, group['FACE_ID'].values, prepare_bboxes(group)): 48 | x_min, y_min, x_max, y_max = bbox 49 | embedding = model.predict(image[y_min:y_max, x_min:x_max].copy()) 50 | if embedding is not None: 51 | embeddings.append(embedding) 52 | labels.append(label) 53 | ids.append(id_) 54 | np.savez( 55 | file=os.path.join(output_path, image_name), 56 | embeddings=np.array(embeddings), 57 | labels=labels, 58 | ids=ids 59 | ) 60 | 61 | 62 | def main(): 63 | annotation = pd.read_csv(args.annotation_path) 64 | 65 | print(len(annotation), len(set(annotation['FILE']))) 66 | groups = list(annotation.groupby('FILE')) 67 | 68 | partial_predict = partial(predict, root=args.root, output_path=args.output_path) 69 | with Pool(args.n_jobs) as p: 70 | list(tqdm(iterable=p.imap_unordered(partial_predict, groups), total=len(groups))) 71 | 72 | 73 | if __name__ == '__main__': 74 | main() 75 | -------------------------------------------------------------------------------- /reid/predict_nmslib.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from glob import glob 3 | 4 | import numpy as np 5 | 6 | from reid.knn import NMSLibNeighbours 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--embedding_path', type=str) 12 | parser.add_argument('--knn_path', type=str) 13 | parser.add_argument('--ids_path', type=str) 14 | parser.add_argument('--n_jobs', type=int, default=16) 15 | parser.add_argument('--output_path', type=str) 16 | return parser.parse_args() 17 | 18 | 19 | def main(): 20 | args = parse_args() 21 | 22 | paths = sorted(glob(args.embedding_path)) 23 | knn = NMSLibNeighbours(n_neighbours=10, space='cosinesimil', n_jobs=args.n_jobs) 24 | knn.load(args.knn_path) 25 | predictions = knn.predict(paths) 26 | np.savez(args.output_path, **predictions) 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /reid/prepare_solution.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import pandas as pd 4 | from tqdm import tqdm 5 | import numpy as np 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--detector_solution', type=str) 11 | parser.add_argument('--nmslib_predictions', type=str) 12 | parser.add_argument('--output', type=str) 13 | parser.add_argument('--threshold', type=float, default=0.47) 14 | return parser.parse_args() 15 | 16 | 17 | def prepare(detector_solution, nmslib_predictions, threshold): 18 | detector_solution = detector_solution.set_index('FACE_ID') 19 | detector_solution['SUBJECT_ID'] = -1 20 | for label, distance, id_ in tqdm( 21 | zip(nmslib_predictions['neighbours'], nmslib_predictions['distances'], nmslib_predictions['ids']), 22 | total=len(nmslib_predictions['ids']) 23 | ): 24 | if distance[0] < threshold: 25 | detector_solution.loc[id_, 'SUBJECT_ID'] = label[0] 26 | detector_solution['FACE_ID'] = detector_solution.index 27 | return detector_solution 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | detector_solution = pd.read_csv(args.detector_solution) 33 | nmslib_predictions = np.load(args.nmslib_predictions) 34 | submission = prepare(detector_solution, nmslib_predictions, args.threshold) 35 | submission.to_csv(args.output, index=False) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | IMG_PREFIX=${1} 4 | SOLUTION=${2} 5 | 6 | bash test_detector.sh ${IMG_PREFIX} ${SOLUTION} 7 | bash test_knn.sh ${IMG_PREFIX} ${SOLUTION} -------------------------------------------------------------------------------- /test_detector.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | IMG_PREFIX=${1} 5 | SOLUTION=${2} 6 | 7 | MMDETECTION_ANN_FILE=/wdata/mmdetection/test.pickle 8 | OPEN_SET_FACE_ANN_FILE=/wdata/test.csv 9 | MODEL=cascade_rcnn_dconv_c3-c5_r50_fpn 10 | CONFIG_FILE=${TOPCODER_ROOT}/configs/${MODEL}.py 11 | PREDICTION=/wdata/${MODEL}/test.pkl 12 | CHECKPOINT=/code/weights/my_best_checkpoint.pth 13 | 14 | PYTHONPATH=${TOPCODER_ROOT} \ 15 | python ${TOPCODER_ROOT}/src/prepare_test.py \ 16 | --output=${OPEN_SET_FACE_ANN_FILE} \ 17 | --root=${IMG_PREFIX} 18 | 19 | mkdir /wdata/mmdetection 20 | PYTHONPATH=${TOPCODER_ROOT} \ 21 | python ${TOPCODER_ROOT}/src/convert.py \ 22 | --annotation=${OPEN_SET_FACE_ANN_FILE} \ 23 | --root=${IMG_PREFIX} \ 24 | --output=${MMDETECTION_ANN_FILE} 25 | 26 | PYTHONPATH=${TOPCODER_ROOT} MPLBACKEND=AGG CUDA_VISIBLE_DEVICES=0 \ 27 | python ${TOPCODER_ROOT}/src/test.py \ 28 | ${CONFIG_FILE} \ 29 | ${CHECKPOINT} \ 30 | --gpus 1 \ 31 | --proc_per_gpu=1 \ 32 | --out=${PREDICTION} \ 33 | --ann_file=${MMDETECTION_ANN_FILE} \ 34 | --img_prefix=${IMG_PREFIX} 35 | 36 | PYTHONPATH=${TOPCODER_ROOT} MPLBACKEND=AGG \ 37 | python ${TOPCODER_ROOT}/src/prepare_solution.py \ 38 | --annotation=${MMDETECTION_ANN_FILE} \ 39 | --predictions=${PREDICTION} \ 40 | --output=${SOLUTION} 41 | -------------------------------------------------------------------------------- /test_knn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | IMG_PREFIX=${1} 5 | SOLUTION=${2} 6 | 7 | SOLUTION_TRAIN=/wdata/solution_train.csv 8 | EMBEDDINGS_PATH=/wdata/test_embeddings 9 | KNN_PATH=/code/weights/knn.bin 10 | KNN_PREDICTIONS=/wdata/test_nmslib_predictions.npz 11 | 12 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/annotation_converter.py \ 13 | --annotation=${SOLUTION} \ 14 | --output=${SOLUTION_TRAIN} 15 | 16 | rm -rf ${EMBEDDINGS_PATH} 17 | mkdir ${EMBEDDINGS_PATH} 18 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/predict_embeddings.py \ 19 | --embedder_path=/code/weights/model-r100-ii/model \ 20 | --mtcnn_path=/code/weights/mtcnn-model \ 21 | --root=${IMG_PREFIX} \ 22 | --annotation_path=${SOLUTION_TRAIN} \ 23 | --output_path=${EMBEDDINGS_PATH} \ 24 | --n_jobs=60 25 | 26 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/predict_nmslib.py \ 27 | --embedding_path=${EMBEDDINGS_PATH}/*.npz \ 28 | --knn_path=${KNN_PATH} \ 29 | --output_path=${KNN_PREDICTIONS} \ 30 | --n_jobs=60 31 | 32 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/prepare_solution.py \ 33 | --output=${SOLUTION} \ 34 | --detector_solution=${SOLUTION_TRAIN} \ 35 | --nmslib_predictions=${KNN_PREDICTIONS} \ 36 | --threshold=0.47 37 | 38 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/annotation_converter.py \ 39 | --annotation=${SOLUTION} \ 40 | --output=${SOLUTION} \ 41 | --to_test 42 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | IMG_PREFIX=${1} 4 | 5 | bash train_detector.sh ${IMG_PREFIX} 6 | bash train_knn.sh ${IMG_PREFIX} 7 | -------------------------------------------------------------------------------- /train_detector.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | IMG_PREFIX=${1} 4 | 5 | MMDETECTION_ANN_FILE=/wdata/mmdetection/training_fix.pickle 6 | OPEN_SET_FACE_ANN_FILE=${TOPCODER_ROOT}/data/training_fix.csv 7 | RESIZE_IMG_PREFIX=/wdata/training_resize 8 | 9 | MODEL=cascade_rcnn_dconv_c3-c5_r50_fpn 10 | CONFIG_FILE=${TOPCODER_ROOT}/configs/${MODEL}.py 11 | 12 | echo "prepare pretrained weights" 13 | PYTHONPATH=${TOPCODER_ROOT} MPLBACKEND=AGG CUDA_VISIBLE_DEVICES=0 \ 14 | python ${TOPCODER_ROOT}/src/prune.py \ 15 | --weights=/code/weights/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166.pth \ 16 | --output=/wdata/cascade_rcnn_dconv_c3-c5_r50_fpn_1x_20190125-dfa53166_part.pth 17 | 18 | echo "prepare data for training" 19 | mkdir ${RESIZE_IMG_PREFIX} 20 | mkdir /wdata/mmdetection 21 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/src/convert.py \ 22 | --annotation=${OPEN_SET_FACE_ANN_FILE} \ 23 | --root=${IMG_PREFIX} \ 24 | --output_root=${RESIZE_IMG_PREFIX} \ 25 | --output=${MMDETECTION_ANN_FILE} 26 | 27 | echo "train" 28 | PYTHONPATH=${TOPCODER_ROOT} MPLBACKEND=AGG CUDA_VISIBLE_DEVICES=0 \ 29 | python -m torch.distributed.launch --nproc_per_node=1 ${TOPCODER_ROOT}/src/train.py \ 30 | ${CONFIG_FILE} \ 31 | --launcher pytorch \ 32 | --img_prefix=${RESIZE_IMG_PREFIX} \ 33 | --ann_file=${MMDETECTION_ANN_FILE} 34 | 35 | echo "copy best checkpoint to /topcoder-facial-marathon/weights" 36 | cp /wdata/cascade_rcnn_dconv_c3-c5_r50_fpn/latest.pth /code/weights/my_best_checkpoint.pth 37 | -------------------------------------------------------------------------------- /train_knn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | IMG_PREFIX=${1} 4 | 5 | EMBEDDINGS_PATH=/wdata/training_embeddings 6 | KNN_PATH=/code/weights/knn.bin 7 | 8 | rm -rf ${EMBEDDINGS_PATH} 9 | mkdir ${EMBEDDINGS_PATH} 10 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/predict_embeddings.py \ 11 | --embedder_path=/code/weights/model-r100-ii/model \ 12 | --mtcnn_path=/code/weights/mtcnn-model \ 13 | --root=${IMG_PREFIX} \ 14 | --annotation_path=${TOPCODER_ROOT}/data/training_fix_reid.csv \ 15 | --output_path=${EMBEDDINGS_PATH} \ 16 | --n_jobs=80 17 | 18 | PYTHONPATH=${TOPCODER_ROOT} python ${TOPCODER_ROOT}/reid/fit_nmslib.py \ 19 | --embedding_path=${EMBEDDINGS_PATH}/*.npz \ 20 | --knn_path=${KNN_PATH} \ 21 | --n_jobs=80 22 | --------------------------------------------------------------------------------