├── rcnn ├── __init__.py ├── PY_OP │ ├── __init__.py │ └── rpn_fpn_ohem3.py ├── core │ ├── __init__.py │ ├── callback.py │ ├── metric.py │ ├── module.py │ └── module_bak.py ├── cython │ ├── __init__.py │ ├── .gitignore │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── anchors.pyx │ ├── bbox.pyx │ ├── cpu_nms.pyx │ ├── nms_kernel.cu │ └── setup.py ├── io │ └── __init__.py ├── tools │ ├── __init__.py │ ├── reeval.py │ ├── test_rpn.py │ ├── test_rcnn.py │ ├── train_rcnn.py │ └── train_rpn.py ├── utils │ ├── __init__.py │ ├── combine_model.py │ ├── save_model.py │ ├── load_data.py │ └── load_model.py ├── processing │ ├── __init__.py │ ├── assign_levels.py │ ├── nms.py │ ├── generate_anchor.py │ ├── bbox_transform.py │ └── bbox_regression.py ├── pycocotools │ ├── __init__.py │ ├── UPSTREAM_REV │ ├── setup.py │ ├── maskApi.h │ ├── mask.py │ ├── maskApi.c │ └── _mask.pyx ├── dataset │ ├── __init__.py │ ├── ds_utils.py │ └── retinaface.py ├── symbol │ └── __init__.py ├── logger.py └── sample_config.py ├── testimg.jpeg ├── detector_test.jpg ├── dataset ├── group-pic.JPG ├── jaredleto.jpeg ├── Angelina-Jolie.jpg ├── bradleycooper.jpeg ├── channingtatum.jpeg ├── juliaroberts.jpeg ├── kevinspacey.jpeg ├── lupitanyongo.jpg ├── merylstreep.jpeg ├── jenniferlawrence.jpeg ├── 220px-Ellen_DeGeneres_2011.jpg ├── 220px-Brad_Pitt_2019_by_Glenn_Francis.jpg └── Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg ├── .gitmodules ├── docs ├── faces │ ├── 079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg │ ├── 0e78e2f1-f597-4d65-ac74-f490084f79de.jpg │ ├── 4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg │ ├── 50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg │ ├── 5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg │ ├── 574d60a5-0c06-48a4-bf73-914002c82c2a.jpg │ ├── 5e652366-c03f-47a5-929a-9311f65db9ce.jpg │ ├── 6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg │ ├── 63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg │ ├── 71a92092-b36f-41da-9e7f-85e5c075d65e.jpg │ ├── 73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg │ ├── 7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg │ ├── 830652cd-29a9-4237-a736-e161e891f4ce.jpg │ ├── 84e22415-6352-4442-95f1-d20319786b6b.jpg │ ├── 94cb0154-085e-4afc-9c6c-48073455f1ab.jpg │ ├── 9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg │ ├── 9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg │ ├── a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg │ ├── b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg │ ├── ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg │ ├── bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg │ ├── bcf97241-96e4-4726-8287-fdab88217584.jpg │ ├── c966eec9-6458-4a4e-8967-6bab69dd9965.jpg │ ├── d445a6d4-e28a-4903-8f37-037a357eae60.jpg │ ├── dbbd360f-4fea-4911-9233-063eda812445.jpg │ ├── e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg │ └── fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg └── results.md ├── scripts ├── build-conda-docker ├── get-data ├── conda-repl └── run-conda-docker ├── .gitignore ├── project.clj ├── dockerfiles └── CondaDockerfile ├── src └── facial_rec │ ├── face_feature.clj │ ├── detect.clj │ └── demo.clj ├── README.md └── LICENSE /rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/PY_OP/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/cython/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rcnn/cython/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /rcnn/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /testimg.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/testimg.jpeg -------------------------------------------------------------------------------- /detector_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/detector_test.jpg -------------------------------------------------------------------------------- /rcnn/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .imdb import IMDB 2 | from .retinaface import retinaface 3 | -------------------------------------------------------------------------------- /dataset/group-pic.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/group-pic.JPG -------------------------------------------------------------------------------- /dataset/jaredleto.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/jaredleto.jpeg -------------------------------------------------------------------------------- /dataset/Angelina-Jolie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/Angelina-Jolie.jpg -------------------------------------------------------------------------------- /dataset/bradleycooper.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/bradleycooper.jpeg -------------------------------------------------------------------------------- /dataset/channingtatum.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/channingtatum.jpeg -------------------------------------------------------------------------------- /dataset/juliaroberts.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/juliaroberts.jpeg -------------------------------------------------------------------------------- /dataset/kevinspacey.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/kevinspacey.jpeg -------------------------------------------------------------------------------- /dataset/lupitanyongo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/lupitanyongo.jpg -------------------------------------------------------------------------------- /dataset/merylstreep.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/merylstreep.jpeg -------------------------------------------------------------------------------- /rcnn/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/336d2a27c91e3c0663d2dcf0b13574674d30f88e 2 | -------------------------------------------------------------------------------- /dataset/jenniferlawrence.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/jenniferlawrence.jpeg -------------------------------------------------------------------------------- /rcnn/symbol/__init__.py: -------------------------------------------------------------------------------- 1 | from .symbol_ssh import * 2 | from .symbol_mnet import * 3 | from .symbol_resnet import * 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "insightface"] 2 | path = insightface 3 | url = https://github.com/deepinsight/insightface 4 | -------------------------------------------------------------------------------- /dataset/220px-Ellen_DeGeneres_2011.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/220px-Ellen_DeGeneres_2011.jpg -------------------------------------------------------------------------------- /rcnn/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | # set up logger 4 | logging.basicConfig() 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | -------------------------------------------------------------------------------- /dataset/220px-Brad_Pitt_2019_by_Glenn_Francis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/220px-Brad_Pitt_2019_by_Glenn_Francis.jpg -------------------------------------------------------------------------------- /dataset/Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg -------------------------------------------------------------------------------- /docs/faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg -------------------------------------------------------------------------------- /docs/faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg -------------------------------------------------------------------------------- /docs/faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg -------------------------------------------------------------------------------- /docs/faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg -------------------------------------------------------------------------------- /docs/faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg -------------------------------------------------------------------------------- /docs/faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg -------------------------------------------------------------------------------- /docs/faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg -------------------------------------------------------------------------------- /docs/faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg -------------------------------------------------------------------------------- /docs/faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg -------------------------------------------------------------------------------- /docs/faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg -------------------------------------------------------------------------------- /docs/faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg -------------------------------------------------------------------------------- /docs/faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg -------------------------------------------------------------------------------- /docs/faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg -------------------------------------------------------------------------------- /docs/faces/84e22415-6352-4442-95f1-d20319786b6b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/84e22415-6352-4442-95f1-d20319786b6b.jpg -------------------------------------------------------------------------------- /docs/faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg -------------------------------------------------------------------------------- /docs/faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg -------------------------------------------------------------------------------- /docs/faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg -------------------------------------------------------------------------------- /docs/faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg -------------------------------------------------------------------------------- /docs/faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg -------------------------------------------------------------------------------- /docs/faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg -------------------------------------------------------------------------------- /docs/faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg -------------------------------------------------------------------------------- /docs/faces/bcf97241-96e4-4726-8287-fdab88217584.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/bcf97241-96e4-4726-8287-fdab88217584.jpg -------------------------------------------------------------------------------- /docs/faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg -------------------------------------------------------------------------------- /docs/faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg -------------------------------------------------------------------------------- /docs/faces/dbbd360f-4fea-4911-9233-063eda812445.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/dbbd360f-4fea-4911-9233-063eda812445.jpg -------------------------------------------------------------------------------- /docs/faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg -------------------------------------------------------------------------------- /docs/faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg -------------------------------------------------------------------------------- /rcnn/cython/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /scripts/build-conda-docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | pushd dockerfiles 6 | docker build -t facerec-conda -f CondaDockerfile --build-arg USERID=$(id -u) --build-arg GROUPID=$(id -u) --build-arg USERNAME=$USER . 7 | popd 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | .hgignore 12 | .hg/ 13 | models 14 | dataset 15 | __pycache__ 16 | *.jpg 17 | faces 18 | results.md -------------------------------------------------------------------------------- /scripts/get-data: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | wget https://s3.us-east-2.amazonaws.com/tech.public.data/facerec-models.zip 5 | wget https://s3.us-east-2.amazonaws.com/tech.public.data/facerec-test-ds.zip 6 | unzip facerec-models.zip && rm facerec-models.zip 7 | unzip facerec-test-ds.zip && rm facerec-test-ds.zip 8 | -------------------------------------------------------------------------------- /scripts/conda-repl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source activate pyclj 4 | 5 | ## This is absolutely necessary. 6 | ## https://github.com/conda/conda/issues/9500#issuecomment-565753807 7 | export LD_LIBRARY_PATH="$(python3-config --prefix)/lib" 8 | 9 | lein update-in :dependencies conj \[nrepl\ \"0.6.0\"\]\ 10 | -- update-in :plugins conj \[cider/cider-nrepl\ \"0.25.0\"\]\ 11 | -- repl :headless :host localhost 12 | -------------------------------------------------------------------------------- /rcnn/dataset/ds_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def unique_boxes(boxes, scale=1.0): 5 | """ return indices of unique boxes """ 6 | v = np.array([1, 1e3, 1e6, 1e9]) 7 | hashes = np.round(boxes * scale).dot(v).astype(np.int) 8 | _, index = np.unique(hashes, return_index=True) 9 | return np.sort(index) 10 | 11 | 12 | def filter_small_boxes(boxes, min_size): 13 | w = boxes[:, 2] - boxes[:, 0] 14 | h = boxes[:, 3] - boxes[:, 1] 15 | keep = np.where((w >= min_size) & (h > min_size))[0] 16 | return keep 17 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject cnuernber/facial-rec "0.1.0-SNAPSHOT" 2 | :description "Demonstration of facial rec tech." 3 | :url "https://github.com/cnuernber/facial-rec" 4 | :license {:name "EPL-2.0" 5 | :url "https://www.eclipse.org/legal/epl-2.0/"} 6 | :dependencies [[org.clojure/clojure "1.10.1"] 7 | [techascent/tech.io "4.03" 8 | :exclusions [org.apache.commons/commons-compress]] 9 | [clj-python/libpython-clj "2.00-beta-3"] 10 | [ch.qos.logback/logback-classic "1.1.3"]]) 11 | -------------------------------------------------------------------------------- /rcnn/core/callback.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def do_checkpoint(prefix, means, stds): 5 | def _callback(iter_no, sym, arg, aux): 6 | if 'bbox_pred_weight' in arg: 7 | arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T 8 | arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means) 9 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 10 | if 'bbox_pred_weight' in arg: 11 | arg.pop('bbox_pred_weight_test') 12 | arg.pop('bbox_pred_bias_test') 13 | return _callback 14 | -------------------------------------------------------------------------------- /rcnn/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['maskApi.c', '_mask.pyx'], 13 | include_dirs=[np.get_include()], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | ext_modules=cythonize(ext_modules) 20 | ) 21 | -------------------------------------------------------------------------------- /scripts/run-conda-docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## This is incomplete, you still have to mess around with pythonpath and such. The 4 | ## goal at this stage is to get this docker container to launch a repl that allows 5 | ## initialize! to work out of the box. 6 | scripts/build-conda-docker 7 | 8 | docker run --rm -it -u $(id -u):$(id -g) \ 9 | -e LEIN_REPL_HOST="0.0.0.0" \ 10 | -v /$HOME/.m2:/home/$USER/.m2 \ 11 | -v /$HOME/.lein:/home/$USER/.lein \ 12 | -v $(pwd)/:/libpython-clj \ 13 | --net=host -w /libpython-clj \ 14 | facerec-conda scripts/conda-repl 15 | # lein update-in :dependencies conj \[nrepl\ \"0.6.0\"\]\ 16 | # -- update-in :plugins conj \[cider/cider-nrepl\ \"0.22.4\"\]\ 17 | # -- repl :headless :host localhost 18 | -------------------------------------------------------------------------------- /rcnn/utils/combine_model.py: -------------------------------------------------------------------------------- 1 | from .load_model import load_checkpoint 2 | from .save_model import save_checkpoint 3 | 4 | 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out): 6 | args1, auxs1 = load_checkpoint(prefix1, epoch1) 7 | args2, auxs2 = load_checkpoint(prefix2, epoch2) 8 | arg_names = args1.keys() + args2.keys() 9 | aux_names = auxs1.keys() + auxs2.keys() 10 | args = dict() 11 | for arg in arg_names: 12 | if arg in args1: 13 | args[arg] = args1[arg] 14 | else: 15 | args[arg] = args2[arg] 16 | auxs = dict() 17 | for aux in aux_names: 18 | if aux in auxs1: 19 | auxs[aux] = auxs1[aux] 20 | else: 21 | auxs[aux] = auxs2[aux] 22 | save_checkpoint(prefix_out, epoch_out, args, auxs) 23 | -------------------------------------------------------------------------------- /rcnn/utils/save_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params): 5 | """Checkpoint the model data into file. 6 | :param prefix: Prefix of model name. 7 | :param epoch: The epoch number of the model. 8 | :param arg_params: dict of str to NDArray 9 | Model parameter, dict of name to NDArray of net's weights. 10 | :param aux_params: dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's auxiliary states. 12 | :return: None 13 | prefix-epoch.params will be saved for parameters. 14 | """ 15 | save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()} 16 | save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()}) 17 | param_name = '%s-%04d.params' % (prefix, epoch) 18 | mx.nd.save(param_name, save_dict) 19 | -------------------------------------------------------------------------------- /rcnn/cython/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /rcnn/cython/anchors.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | DTYPE = np.float32 6 | ctypedef np.float32_t DTYPE_t 7 | 8 | def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors): 9 | """ 10 | Parameters 11 | ---------- 12 | height: height of plane 13 | width: width of plane 14 | stride: stride ot the original image 15 | anchors_base: (A, 4) a base set of anchors 16 | Returns 17 | ------- 18 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 19 | """ 20 | cdef unsigned int A = base_anchors.shape[0] 21 | cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) 22 | cdef unsigned int iw, ih 23 | cdef unsigned int k 24 | cdef unsigned int sh 25 | cdef unsigned int sw 26 | for iw in range(width): 27 | sw = iw * stride 28 | for ih in range(height): 29 | sh = ih * stride 30 | for k in range(A): 31 | all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw 32 | all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh 33 | all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw 34 | all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh 35 | return all_anchors -------------------------------------------------------------------------------- /rcnn/processing/assign_levels.py: -------------------------------------------------------------------------------- 1 | from rcnn.config import config 2 | import numpy as np 3 | 4 | 5 | def compute_assign_targets(rois, threshold): 6 | rois_area = np.sqrt((rois[:, 2] - rois[:, 0] + 1) * (rois[:, 3] - rois[:, 1] + 1)) 7 | num_rois = np.shape(rois)[0] 8 | assign_levels = np.zeros(num_rois, dtype=np.uint8) 9 | for i, stride in enumerate(config.RCNN_FEAT_STRIDE): 10 | thd = threshold[i] 11 | idx = np.logical_and(thd[1] <= rois_area, rois_area < thd[0]) 12 | assign_levels[idx] = stride 13 | 14 | assert 0 not in assign_levels, "All rois should assign to specify levels." 15 | return assign_levels 16 | 17 | 18 | def add_assign_targets(roidb): 19 | """ 20 | given roidb, add ['assign_level'] 21 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 22 | """ 23 | print 'add assign targets' 24 | assert len(roidb) > 0 25 | assert 'boxes' in roidb[0] 26 | 27 | area_threshold = [[np.inf, 448], 28 | [448, 224], 29 | [224, 112], 30 | [112, 0]] 31 | 32 | assert len(config.RCNN_FEAT_STRIDE) == len(area_threshold) 33 | 34 | num_images = len(roidb) 35 | for im_i in range(num_images): 36 | rois = roidb[im_i]['boxes'] 37 | roidb[im_i]['assign_levels'] = compute_assign_targets(rois, area_threshold) 38 | -------------------------------------------------------------------------------- /dockerfiles/CondaDockerfile: -------------------------------------------------------------------------------- 1 | # We will use Ubuntu for our image 2 | FROM ubuntu:latest 3 | 4 | # Updating Ubuntu packages 5 | 6 | 7 | RUN apt-get -qq update && apt-get -qq -y install curl wget bzip2 openjdk-8-jdk-headless \ 8 | gcc g++ \ 9 | && curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh \ 10 | && bash /tmp/miniconda.sh -bfp /usr/local \ 11 | && rm -rf /tmp/miniconda.sh \ 12 | && conda install -y python=3 \ 13 | && conda update conda \ 14 | && curl -O https://download.clojure.org/install/linux-install-1.10.1.492.sh \ 15 | && chmod +x linux-install-1.10.1.492.sh \ 16 | && ./linux-install-1.10.1.492.sh && rm linux-install-1.10.1.492.sh \ 17 | && wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein \ 18 | && chmod a+x lein \ 19 | && mv lein /usr/bin \ 20 | && apt-get -qq -y autoremove \ 21 | && apt-get autoclean \ 22 | && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log \ 23 | && conda clean --all --yes 24 | 25 | 26 | ENV PATH /opt/conda/bin:$PATH 27 | 28 | 29 | ARG USERID 30 | ARG GROUPID 31 | ARG USERNAME 32 | 33 | RUN groupadd -g $GROUPID $USERNAME 34 | RUN useradd -u $USERID -g $GROUPID $USERNAME 35 | RUN mkdir /home/$USERNAME && chown $USERNAME:$USERNAME /home/$USERNAME 36 | USER $USERNAME 37 | RUN conda create -n pyclj python=3.6\ 38 | && conda install -n pyclj numpy mxnet opencv cython scikit-image\ 39 | && echo "source activate pyclj" > /home/$USERNAME/.bashrc -------------------------------------------------------------------------------- /rcnn/tools/reeval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | try: 3 | import cPickle as pickle 4 | except ImportError: 5 | import pickle 6 | import os 7 | import mxnet as mx 8 | 9 | from ..logger import logger 10 | from ..config import config, default, generate_config 11 | from ..dataset import * 12 | 13 | 14 | def reeval(args): 15 | # load imdb 16 | imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) 17 | 18 | # load detection results 19 | cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl') 20 | with open(cache_file) as f: 21 | detections = pickle.load(f) 22 | 23 | # eval 24 | imdb.evaluate_detections(detections) 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description='imdb test') 29 | # general 30 | parser.add_argument('--network', help='network name', default=default.network, type=str) 31 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 32 | args, rest = parser.parse_known_args() 33 | generate_config(args.network, args.dataset) 34 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 35 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 36 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 37 | # other 38 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def main(): 44 | args = parse_args() 45 | logger.info('Called with argument: %s' % args) 46 | reeval(args) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /rcnn/processing/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..cython.cpu_nms import cpu_nms 3 | try: 4 | from ..cython.gpu_nms import gpu_nms 5 | except ImportError: 6 | gpu_nms = None 7 | 8 | 9 | def py_nms_wrapper(thresh): 10 | def _nms(dets): 11 | return nms(dets, thresh) 12 | return _nms 13 | 14 | 15 | def cpu_nms_wrapper(thresh): 16 | def _nms(dets): 17 | return cpu_nms(dets, thresh) 18 | return _nms 19 | 20 | 21 | def gpu_nms_wrapper(thresh, device_id): 22 | def _nms(dets): 23 | return gpu_nms(dets, thresh, device_id) 24 | if gpu_nms is not None: 25 | return _nms 26 | else: 27 | return cpu_nms_wrapper(thresh) 28 | 29 | 30 | def nms(dets, thresh): 31 | """ 32 | greedily select boxes with high confidence and overlap with current maximum <= thresh 33 | rule out overlap >= thresh 34 | :param dets: [[x1, y1, x2, y2 score]] 35 | :param thresh: retain overlap < thresh 36 | :return: indexes to keep 37 | """ 38 | x1 = dets[:, 0] 39 | y1 = dets[:, 1] 40 | x2 = dets[:, 2] 41 | y2 = dets[:, 3] 42 | scores = dets[:, 4] 43 | 44 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 45 | order = scores.argsort()[::-1] 46 | 47 | keep = [] 48 | while order.size > 0: 49 | i = order[0] 50 | keep.append(i) 51 | xx1 = np.maximum(x1[i], x1[order[1:]]) 52 | yy1 = np.maximum(y1[i], y1[order[1:]]) 53 | xx2 = np.minimum(x2[i], x2[order[1:]]) 54 | yy2 = np.minimum(y2[i], y2[order[1:]]) 55 | 56 | w = np.maximum(0.0, xx2 - xx1 + 1) 57 | h = np.maximum(0.0, yy2 - yy1 + 1) 58 | inter = w * h 59 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 60 | 61 | inds = np.where(ovr <= thresh)[0] 62 | order = order[inds + 1] 63 | 64 | return keep 65 | -------------------------------------------------------------------------------- /src/facial_rec/face_feature.clj: -------------------------------------------------------------------------------- 1 | (ns facial-rec.face-feature 2 | (:require [libpython-clj2.require :refer [require-python]] 3 | [libpython-clj2.python :refer [py. py.. py.-] :as py] 4 | ;;Important change from v1 - You have to require 'np-array' for numpy 5 | ;;support 6 | [libpython-clj2.python.np-array] 7 | [tech.v3.datatype :as dtype])) 8 | 9 | 10 | 11 | (require-python 'mxnet 12 | '(mxnet ndarray module io model)) 13 | (require-python 'cv2) 14 | (require-python '[numpy :as np]) 15 | 16 | 17 | (defn load-model 18 | [& {:keys [model-path checkpoint] 19 | :or {model-path "models/recognition/model" 20 | checkpoint 0}}] 21 | (let [[sym arg-params aux-params] (mxnet.model/load_checkpoint model-path checkpoint) 22 | all-layers (py. sym get_internals) 23 | target-layer (py/get-item all-layers "fc1_output") 24 | model (mxnet.module/Module :symbol target-layer 25 | :context (mxnet/cpu) 26 | :label_names nil)] 27 | (py. model bind :data_shapes [["data" [1 3 112 112]]]) 28 | (py. model set_params arg-params aux-params) 29 | model)) 30 | 31 | (defonce model (load-model)) 32 | 33 | 34 | 35 | (defn face->feature 36 | [img-path] 37 | (py/with-gil-stack-rc-context 38 | (if-let [new-img (cv2/imread img-path)] 39 | (let [new-img (cv2/cvtColor new-img cv2/COLOR_BGR2RGB) 40 | new-img (np/transpose new-img [2 0 1]) 41 | input-blob (np/expand_dims new-img :axis 0) 42 | data (mxnet.ndarray/array input-blob) 43 | batch (mxnet.io/DataBatch :data [data])] 44 | (py. model forward batch :is_train false) 45 | (-> (py. model get_outputs) 46 | first 47 | (py. asnumpy) 48 | (#(dtype/make-container :java-array :float32 %)))) 49 | (throw (Exception. (format "Failed to load img: %s" img-path)))))) 50 | -------------------------------------------------------------------------------- /rcnn/cython/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps_cython( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /rcnn/utils/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..logger import logger 3 | from ..config import config 4 | from ..dataset import * 5 | 6 | 7 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, 8 | flip=False): 9 | """ load ground truth roidb """ 10 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path) 11 | roidb = imdb.gt_roidb() 12 | print('roidb size', len(roidb)) 13 | if flip: 14 | roidb = imdb.append_flipped_images(roidb) 15 | print('flipped roidb size', len(roidb)) 16 | return roidb 17 | 18 | 19 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, 20 | proposal='rpn', append_gt=True, flip=False): 21 | """ load proposal roidb (append_gt when training) """ 22 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path) 23 | gt_roidb = imdb.gt_roidb() 24 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 25 | if flip: 26 | roidb = imdb.append_flipped_images(roidb) 27 | return roidb 28 | 29 | 30 | def merge_roidb(roidbs): 31 | """ roidb are list, concat them together """ 32 | roidb = roidbs[0] 33 | for r in roidbs[1:]: 34 | roidb.extend(r) 35 | return roidb 36 | 37 | 38 | def filter_roidb(roidb): 39 | """ remove roidb entries without usable rois """ 40 | 41 | def is_valid(entry): 42 | """ valid images have at least 1 fg or bg roi """ 43 | overlaps = entry['max_overlaps'] 44 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 45 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 46 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 47 | #valid = len(fg_inds) > 0 48 | return valid 49 | 50 | num = len(roidb) 51 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 52 | num_after = len(filtered_roidb) 53 | logger.info('load data: filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after)) 54 | 55 | return filtered_roidb 56 | -------------------------------------------------------------------------------- /rcnn/utils/load_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def load_checkpoint(prefix, epoch): 5 | """ 6 | Load model checkpoint from file. 7 | :param prefix: Prefix of model name. 8 | :param epoch: Epoch number of model we would like to load. 9 | :return: (arg_params, aux_params) 10 | arg_params : dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's weights. 12 | aux_params : dict of str to NDArray 13 | Model parameter, dict of name to NDArray of net's auxiliary states. 14 | """ 15 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 16 | arg_params = {} 17 | aux_params = {} 18 | for k, v in save_dict.items(): 19 | tp, name = k.split(':', 1) 20 | if tp == 'arg': 21 | arg_params[name] = v 22 | if tp == 'aux': 23 | aux_params[name] = v 24 | return arg_params, aux_params 25 | 26 | 27 | def convert_context(params, ctx): 28 | """ 29 | :param params: dict of str to NDArray 30 | :param ctx: the context to convert to 31 | :return: dict of str of NDArray with context ctx 32 | """ 33 | new_params = dict() 34 | for k, v in params.items(): 35 | new_params[k] = v.as_in_context(ctx) 36 | return new_params 37 | 38 | 39 | def load_param(prefix, epoch, convert=False, ctx=None, process=False): 40 | """ 41 | wrapper for load checkpoint 42 | :param prefix: Prefix of model name. 43 | :param epoch: Epoch number of model we would like to load. 44 | :param convert: reference model should be converted to GPU NDArray first 45 | :param ctx: if convert then ctx must be designated. 46 | :param process: model should drop any test 47 | :return: (arg_params, aux_params) 48 | """ 49 | arg_params, aux_params = load_checkpoint(prefix, epoch) 50 | if convert: 51 | if ctx is None: 52 | ctx = mx.cpu() 53 | arg_params = convert_context(arg_params, ctx) 54 | aux_params = convert_context(aux_params, ctx) 55 | if process: 56 | tests = [k for k in arg_params.keys() if '_test' in k] 57 | for test in tests: 58 | arg_params[test.replace('_test', '')] = arg_params.pop(test) 59 | return arg_params, aux_params 60 | -------------------------------------------------------------------------------- /rcnn/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /rcnn/cython/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /src/facial_rec/detect.clj: -------------------------------------------------------------------------------- 1 | (ns facial-rec.detect 2 | (:require [libpython-clj2.require :refer [require-python]] 3 | [libpython-clj2.python :refer [py. py.- py..] :as py] 4 | [clojure.tools.logging :as log])) 5 | 6 | (require-python '[distutils.core :refer [setup]]) 7 | (require-python '[numpy :as np]) 8 | (require-python 'pyximport) 9 | (pyximport/install :setup_args {:include_dirs (py/->py-list 10 | [(np/get_include)])}) 11 | (require-python '[retinaface :as rface]) 12 | (require-python 'cv2) 13 | (require-python '[builtins :refer [slice]]) 14 | (require-python '[skimage.transform :as trans]) 15 | 16 | 17 | (defonce model (rface/RetinaFace "models/detection/R50" 0 -1)) 18 | 19 | 20 | (defn detect-faces 21 | [img-path] 22 | (py/with-gil-stack-rc-context 23 | (if-let [cv-img (cv2/imread img-path)] 24 | (when-let [detection (py. model detect cv-img 0.8)] 25 | (let [[faces landmarks] detection] 26 | (->> (mapv (fn [face landmark] 27 | (let [face-bbox (->> (take 4 face) 28 | (map (comp long #(Math/round (double %))))) 29 | confidence (last face)] 30 | {:confidence confidence 31 | :bbox {:top-left (vec (take 2 face-bbox)) 32 | :bottom-right (vec (drop 2 face-bbox))} 33 | :landmarks (mapv #(mapv int %) landmark)})) 34 | faces landmarks)))) 35 | (throw (Exception. (format "Unable to open image %s" img-path)))))) 36 | 37 | 38 | (defn render-faces! 39 | "Draw the face detection result on an image. Presumably the same image the data came from." 40 | [img face-detection-result & {:keys [bbox-color landmark-color] 41 | :or {bbox-color [0 255 0] 42 | landmark-color [255 0 0]}}] 43 | (doseq [{:keys [bbox landmarks]} face-detection-result] 44 | (cv2/rectangle img (:top-left bbox) (:bottom-right bbox) bbox-color 2) 45 | (doseq [landmark landmarks] 46 | (cv2/circle img landmark 1 landmark-color 2))) 47 | img) 48 | 49 | 50 | (def ideal-face-landmarks 51 | (np/array [[30.2946, 51.6963] 52 | [65.5318, 51.5014] 53 | [48.0252, 71.7366] 54 | [33.5493, 92.3655] 55 | [62.7299, 92.2041]] 56 | :dtype np/float32)) 57 | 58 | 59 | (defn affine-warp-mat 60 | [landmarks] 61 | (try 62 | (let [landmark-ary (np/array landmarks :dtype np/float32) 63 | sim-trans (trans/SimilarityTransform) 64 | success? (py. sim-trans estimate landmark-ary ideal-face-landmarks)] 65 | (when success? 66 | (-> (py.- sim-trans params) 67 | (py/get-item [(slice 0 2) (slice nil)])))) 68 | (catch Throwable e 69 | (log/warnf e (format "Similarity transform failed for landmarks: %s" 70 | landmarks)) 71 | nil))) 72 | 73 | 74 | (defn crop-faces 75 | "Crop out faces. For each face detection result, return a new image. Returned images 76 | are scaled to a specific size (the size needed by the facial feature engine)." 77 | [img face-detection-result 78 | & {:keys [face-size align?] 79 | :or {face-size [112 112] 80 | align? true}}] 81 | ;;Alignment is also possible (we have landmarks) but not going to go there for 82 | ;;demo. Note function is cv2.warpAffine. 83 | (->> face-detection-result 84 | (mapv (fn [{:keys [bbox landmarks]}] 85 | (let [{:keys [top-left bottom-right]} bbox 86 | [min-x min-y] top-left 87 | [max-x max-y] bottom-right 88 | affine-mat (when align? 89 | (affine-warp-mat landmarks))] 90 | (if affine-mat 91 | (cv2/warpAffine img affine-mat face-size :borderValue 0.0) 92 | ;;Fallthrough in the case the estimate mechanism fails or 93 | ;;the user doesn't want alignment. 94 | (-> 95 | (py/get-item img [(slice min-y max-y) (slice min-x max-x)]) 96 | (cv2/resize [112 112])))))))) 97 | -------------------------------------------------------------------------------- /src/facial_rec/demo.clj: -------------------------------------------------------------------------------- 1 | (ns facial-rec.demo 2 | (:require [facial-rec.detect :as detect] 3 | [facial-rec.face-feature :as face-feature] 4 | [libpython-clj2.require :refer [require-python]] 5 | [libpython-clj2.python :as py] 6 | [tech.v3.io :as io] 7 | [tech.v3.datatype.functional :as dfn] 8 | ;;Include serialization support for tech.v3.datatype datatypes 9 | [tech.v3.datatype.nippy] 10 | [clojure.tools.logging :as log]) 11 | (:import [java.io File] 12 | [java.util UUID])) 13 | 14 | 15 | 16 | (io/make-parents "faces/face.jpg") 17 | (require-python 'cv2) 18 | 19 | 20 | (defn filename->faces 21 | [fname] 22 | (py/with-gil-stack-rc-context 23 | (let [detection (detect/detect-faces fname) 24 | cropped-faces (detect/crop-faces (cv2/imread fname) detection)] 25 | (mapv (fn [detection-result face-img] 26 | (let [face-id (UUID/randomUUID) 27 | dest-fname (format "faces/%s.png" face-id) 28 | dest-feature-fname (format "file://faces/%s.nippy" face-id) 29 | _ (cv2/imwrite dest-fname face-img) 30 | feature (face-feature/face->feature dest-fname) 31 | metadata (merge detection-result 32 | {:id face-id 33 | :src-file fname 34 | :feature feature})] 35 | (io/put-nippy! dest-feature-fname metadata) 36 | metadata)) 37 | detection cropped-faces)))) 38 | 39 | 40 | (defn delete-previously-found-faces! 41 | [] 42 | (->> (file-seq (io/file "faces")) 43 | (remove #(.isDirectory ^File %)) 44 | (map (fn [f] 45 | (.delete f))) 46 | (dorun))) 47 | 48 | 49 | (defn find-annotate-faces! 50 | [] 51 | (log/info "finding faces") 52 | (delete-previously-found-faces!) 53 | (py/with-gil-stack-rc-context 54 | (->> (file-seq (io/file "dataset")) 55 | (remove #(.isDirectory ^File %)) 56 | (mapcat (fn [^File src-img] 57 | (filename->faces (.toString src-img)))) 58 | vec 59 | (#(do (log/infof "Found %d faces" (count %)) 60 | %))))) 61 | 62 | 63 | (defn annotations 64 | [] 65 | (->> (file-seq (io/file "faces")) 66 | (map #(.toString ^File %)) 67 | (filter #(.endsWith ^String % "nippy")) 68 | (map (comp (juxt :id identity) io/get-nippy)) 69 | (into {}))) 70 | 71 | 72 | (defn annotations-by-file 73 | [] 74 | (group-by :src-file (vals (annotations)))) 75 | 76 | 77 | (defn nearest 78 | [ann-id] 79 | (let [{:keys [feature] :as target-annotation} (get (annotations) ann-id)] 80 | (->> (vals (annotations)) 81 | (map #(assoc % :distance-squared (dfn/distance-squared feature (:feature %)))) 82 | (sort-by :distance-squared) 83 | (map #(dissoc % :feature))))) 84 | 85 | 86 | (defn- display-face-img 87 | [{:keys [id] :as entry}] 88 | (format "![face-img](faces/%s.png) " id)) 89 | 90 | 91 | (defn- display-distance-and-face-img 92 | [{:keys [id distance-squared] :as entry}] 93 | (format "%02d %s" 94 | (long (Math/sqrt (double distance-squared))) 95 | (display-face-img entry))) 96 | 97 | 98 | (defn output-face-results! 99 | [& [all-faces]] 100 | (let [all-faces (or all-faces (find-annotate-faces!))] 101 | (spit "results.md" 102 | (with-out-str 103 | (println "## Results") 104 | (println "| face-img | 5 nearest |") 105 | (println "|-----|------|") 106 | (->> all-faces 107 | (map (fn [{:keys [id] :as entry}] 108 | (println "|" (display-face-img entry) 109 | "|" (->> (nearest id) 110 | (take 5) 111 | (map display-distance-and-face-img) 112 | (reduce str)) 113 | "|"))) 114 | (dorun)))))) 115 | 116 | 117 | (comment 118 | ;;Stress testing the system 119 | (dotimes [iter 100] 120 | (println "running") 121 | (find-annotate-faces!)) 122 | ) 123 | -------------------------------------------------------------------------------- /rcnn/processing/generate_anchor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate base anchors on index 0 3 | """ 4 | from __future__ import print_function 5 | import sys 6 | from builtins import range 7 | import numpy as np 8 | from ..cython.anchors import anchors_cython 9 | #from ..config import config 10 | 11 | 12 | def anchors_plane(feat_h, feat_w, stride, base_anchor): 13 | return anchors_cython(feat_h, feat_w, stride, base_anchor) 14 | 15 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 16 | scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False): 17 | """ 18 | Generate anchor (reference) windows by enumerating aspect ratios X 19 | scales wrt a reference (0, 0, 15, 15) window. 20 | """ 21 | 22 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 23 | ratio_anchors = _ratio_enum(base_anchor, ratios) 24 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 25 | for i in range(ratio_anchors.shape[0])]) 26 | if dense_anchor: 27 | assert stride%2==0 28 | anchors2 = anchors.copy() 29 | anchors2[:,:] += int(stride/2) 30 | anchors = np.vstack( (anchors, anchors2) ) 31 | #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape) 32 | return anchors 33 | 34 | #def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8): 35 | # """ 36 | # Generate anchor (reference) windows by enumerating aspect ratios X 37 | # scales wrt a reference (0, 0, 15, 15) window. 38 | # """ 39 | # anchors = [] 40 | # _ratios = ratios.reshape( (len(base_size), -1) ) 41 | # _scales = scales.reshape( (len(base_size), -1) ) 42 | # for i,bs in enumerate(base_size): 43 | # __ratios = _ratios[i] 44 | # __scales = _scales[i] 45 | # #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) 46 | # r = generate_anchors(bs, __ratios, __scales) 47 | # #print('anchors_fpn', r.shape, file=sys.stderr) 48 | # anchors.append(r) 49 | # return anchors 50 | 51 | def generate_anchors_fpn(dense_anchor=False, cfg = None): 52 | #assert(False) 53 | """ 54 | Generate anchor (reference) windows by enumerating aspect ratios X 55 | scales wrt a reference (0, 0, 15, 15) window. 56 | """ 57 | if cfg is None: 58 | from ..config import config 59 | cfg = config.RPN_ANCHOR_CFG 60 | RPN_FEAT_STRIDE = [] 61 | for k in cfg: 62 | RPN_FEAT_STRIDE.append( int(k) ) 63 | RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) 64 | anchors = [] 65 | for k in RPN_FEAT_STRIDE: 66 | v = cfg[str(k)] 67 | bs = v['BASE_SIZE'] 68 | __ratios = np.array(v['RATIOS']) 69 | __scales = np.array(v['SCALES']) 70 | stride = int(k) 71 | #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) 72 | r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor) 73 | #print('anchors_fpn', r.shape, file=sys.stderr) 74 | anchors.append(r) 75 | 76 | return anchors 77 | 78 | def _whctrs(anchor): 79 | """ 80 | Return width, height, x center, and y center for an anchor (window). 81 | """ 82 | 83 | w = anchor[2] - anchor[0] + 1 84 | h = anchor[3] - anchor[1] + 1 85 | x_ctr = anchor[0] + 0.5 * (w - 1) 86 | y_ctr = anchor[1] + 0.5 * (h - 1) 87 | return w, h, x_ctr, y_ctr 88 | 89 | 90 | def _mkanchors(ws, hs, x_ctr, y_ctr): 91 | """ 92 | Given a vector of widths (ws) and heights (hs) around a center 93 | (x_ctr, y_ctr), output a set of anchors (windows). 94 | """ 95 | 96 | ws = ws[:, np.newaxis] 97 | hs = hs[:, np.newaxis] 98 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 99 | y_ctr - 0.5 * (hs - 1), 100 | x_ctr + 0.5 * (ws - 1), 101 | y_ctr + 0.5 * (hs - 1))) 102 | return anchors 103 | 104 | 105 | def _ratio_enum(anchor, ratios): 106 | """ 107 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 108 | """ 109 | 110 | w, h, x_ctr, y_ctr = _whctrs(anchor) 111 | size = w * h 112 | size_ratios = size / ratios 113 | ws = np.round(np.sqrt(size_ratios)) 114 | hs = np.round(ws * ratios) 115 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 116 | return anchors 117 | 118 | 119 | def _scale_enum(anchor, scales): 120 | """ 121 | Enumerate a set of anchors for each scale wrt an anchor. 122 | """ 123 | 124 | w, h, x_ctr, y_ctr = _whctrs(anchor) 125 | ws = w * scales 126 | hs = h * scales 127 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 128 | return anchors 129 | -------------------------------------------------------------------------------- /rcnn/tools/test_rpn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..dataset import * 9 | from ..core.loader import TestLoader 10 | from ..core.tester import Predictor, generate_proposals, test_proposals 11 | from ..utils.load_model import load_param 12 | 13 | 14 | def test_rpn(network, dataset, image_set, root_path, dataset_path, 15 | ctx, prefix, epoch, 16 | vis, shuffle, thresh, test_output=False): 17 | # rpn generate proposal config 18 | config.TEST.HAS_RPN = True 19 | 20 | # print config 21 | logger.info(pprint.pformat(config)) 22 | 23 | # load symbol 24 | sym = eval('get_' + network + '_rpn_test')() 25 | 26 | # load dataset and prepare imdb for training 27 | imdb = eval(dataset)(image_set, root_path, dataset_path) 28 | roidb = imdb.gt_roidb() 29 | test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=True, withlabel=True) 30 | 31 | # load model 32 | arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx) 33 | 34 | # infer shape 35 | data_shape_dict = dict(test_data.provide_data) 36 | arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) 37 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 38 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 39 | 40 | # check parameters 41 | for k in sym.list_arguments(): 42 | if k in data_shape_dict or 'label' in k: 43 | continue 44 | assert k in arg_params, k + ' not initialized' 45 | assert arg_params[k].shape == arg_shape_dict[k], \ 46 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 47 | for k in sym.list_auxiliary_states(): 48 | assert k in aux_params, k + ' not initialized' 49 | assert aux_params[k].shape == aux_shape_dict[k], \ 50 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 51 | 52 | # decide maximum shape 53 | data_names = [k[0] for k in test_data.provide_data] 54 | label_names = None if test_data.provide_label is None else [k[0] for k in test_data.provide_label] 55 | max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 56 | 57 | # create predictor 58 | predictor = Predictor(sym, data_names, label_names, 59 | context=ctx, max_data_shapes=max_data_shape, 60 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 61 | arg_params=arg_params, aux_params=aux_params) 62 | 63 | # start testing 64 | if not test_output: 65 | imdb_boxes = generate_proposals(predictor, test_data, imdb, vis=vis, thresh=thresh) 66 | imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) 67 | else: 68 | test_proposals(predictor, test_data, imdb, roidb, vis=vis) 69 | 70 | 71 | def parse_args(): 72 | parser = argparse.ArgumentParser(description='Test a Region Proposal Network') 73 | # general 74 | parser.add_argument('--network', help='network name', default=default.network, type=str) 75 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 76 | args, rest = parser.parse_known_args() 77 | generate_config(args.network, args.dataset) 78 | parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str) 79 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 80 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 81 | # testing 82 | parser.add_argument('--prefix', help='model to test with', default=default.rpn_prefix, type=str) 83 | parser.add_argument('--epoch', help='model to test with', default=default.rpn_epoch, type=int) 84 | # rpn 85 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 86 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 87 | parser.add_argument('--thresh', help='rpn proposal threshold', default=0, type=float) 88 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 89 | args = parser.parse_args() 90 | return args 91 | 92 | 93 | def main(): 94 | args = parse_args() 95 | logger.info('Called with argument: %s' % args) 96 | ctx = mx.gpu(args.gpu) 97 | test_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 98 | ctx, args.prefix, args.epoch, 99 | args.vis, args.shuffle, args.thresh) 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /rcnn/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from rcnn.pycocotools import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] 104 | -------------------------------------------------------------------------------- /rcnn/tools/test_rcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..dataset import * 9 | from ..core.loader import TestLoader 10 | from ..core.tester import Predictor, pred_eval 11 | from ..utils.load_model import load_param 12 | 13 | 14 | def test_rcnn(network, dataset, image_set, root_path, dataset_path, 15 | ctx, prefix, epoch, 16 | vis, shuffle, has_rpn, proposal, thresh): 17 | # set config 18 | if has_rpn: 19 | config.TEST.HAS_RPN = True 20 | 21 | # print config 22 | logger.info(pprint.pformat(config)) 23 | 24 | # load symbol and testing data 25 | if has_rpn: 26 | sym = eval('get_' + network + '_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) 27 | imdb = eval(dataset)(image_set, root_path, dataset_path) 28 | roidb = imdb.gt_roidb() 29 | else: 30 | sym = eval('get_' + network + '_rcnn_test')(num_classes=config.NUM_CLASSES) 31 | imdb = eval(dataset)(image_set, root_path, dataset_path) 32 | gt_roidb = imdb.gt_roidb() 33 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) 34 | 35 | # get test data iter 36 | test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) 37 | 38 | # load model 39 | arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) 40 | 41 | # infer shape 42 | data_shape_dict = dict(test_data.provide_data) 43 | arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) 44 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 45 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 46 | 47 | # check parameters 48 | for k in sym.list_arguments(): 49 | if k in data_shape_dict or 'label' in k: 50 | continue 51 | assert k in arg_params, k + ' not initialized' 52 | assert arg_params[k].shape == arg_shape_dict[k], \ 53 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 54 | for k in sym.list_auxiliary_states(): 55 | assert k in aux_params, k + ' not initialized' 56 | assert aux_params[k].shape == aux_shape_dict[k], \ 57 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 58 | 59 | # decide maximum shape 60 | data_names = [k[0] for k in test_data.provide_data] 61 | label_names = None 62 | max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 63 | if not has_rpn: 64 | max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) 65 | 66 | # create predictor 67 | predictor = Predictor(sym, data_names, label_names, 68 | context=ctx, max_data_shapes=max_data_shape, 69 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 70 | arg_params=arg_params, aux_params=aux_params) 71 | 72 | # start detection 73 | pred_eval(predictor, test_data, imdb, vis=vis, thresh=thresh) 74 | 75 | 76 | def parse_args(): 77 | parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') 78 | # general 79 | parser.add_argument('--network', help='network name', default=default.network, type=str) 80 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 81 | args, rest = parser.parse_known_args() 82 | generate_config(args.network, args.dataset) 83 | parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str) 84 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 85 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 86 | # testing 87 | parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str) 88 | parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int) 89 | parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) 90 | # rcnn 91 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 92 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float) 93 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 94 | parser.add_argument('--has_rpn', help='generate proposals on the fly', action='store_true') 95 | parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str) 96 | args = parser.parse_args() 97 | return args 98 | 99 | 100 | def main(): 101 | args = parse_args() 102 | logger.info('Called with argument: %s' % args) 103 | ctx = mx.gpu(args.gpu) 104 | test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 105 | ctx, args.prefix, args.epoch, 106 | args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh) 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /rcnn/cython/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /rcnn/cython/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | 30 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | and values giving the absolute path to each directory. 32 | 33 | Starts by looking for the CUDAHOME env variable. If not found, everything 34 | is based on finding 'nvcc' in the PATH. 35 | """ 36 | 37 | # first check if the CUDAHOME env variable is in use 38 | if 'CUDAHOME' in os.environ: 39 | home = os.environ['CUDAHOME'] 40 | nvcc = pjoin(home, 'bin', 'nvcc') 41 | else: 42 | # otherwise, search the PATH for NVCC 43 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | if nvcc is None: 46 | raise EnvironmentError('The nvcc binary could not be ' 47 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | home = os.path.dirname(os.path.dirname(nvcc)) 49 | 50 | cudaconfig = {'home':home, 'nvcc':nvcc, 51 | 'include': pjoin(home, 'include'), 52 | 'lib64': pjoin(home, 'lib64')} 53 | for k, v in cudaconfig.items(): 54 | if not os.path.exists(v): 55 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | 57 | return cudaconfig 58 | 59 | 60 | # Test if cuda could be foun 61 | try: 62 | CUDA = locate_cuda() 63 | except EnvironmentError: 64 | CUDA = None 65 | 66 | 67 | # Obtain the numpy include directory. This logic works across numpy versions. 68 | try: 69 | numpy_include = np.get_include() 70 | except AttributeError: 71 | numpy_include = np.get_numpy_include() 72 | 73 | 74 | def customize_compiler_for_nvcc(self): 75 | """inject deep into distutils to customize how the dispatch 76 | to gcc/nvcc works. 77 | 78 | If you subclass UnixCCompiler, it's not trivial to get your subclass 79 | injected in, and still have the right customizations (i.e. 80 | distutils.sysconfig.customize_compiler) run on it. So instead of going 81 | the OO route, I have this. Note, it's kindof like a wierd functional 82 | subclassing going on.""" 83 | 84 | # tell the compiler it can processes .cu 85 | self.src_extensions.append('.cu') 86 | 87 | # save references to the default compiler_so and _comple methods 88 | default_compiler_so = self.compiler_so 89 | super = self._compile 90 | 91 | # now redefine the _compile method. This gets executed for each 92 | # object but distutils doesn't have the ability to change compilers 93 | # based on source extension: we add it. 94 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 95 | if os.path.splitext(src)[1] == '.cu': 96 | # use the cuda for .cu files 97 | self.set_executable('compiler_so', CUDA['nvcc']) 98 | # use only a subset of the extra_postargs, which are 1-1 translated 99 | # from the extra_compile_args in the Extension class 100 | postargs = extra_postargs['nvcc'] 101 | else: 102 | postargs = extra_postargs['gcc'] 103 | 104 | super(obj, src, ext, cc_args, postargs, pp_opts) 105 | # reset the default compiler_so, which we might have changed for cuda 106 | self.compiler_so = default_compiler_so 107 | 108 | # inject our redefined _compile method into the class 109 | self._compile = _compile 110 | 111 | 112 | # run the customize_compiler 113 | class custom_build_ext(build_ext): 114 | def build_extensions(self): 115 | customize_compiler_for_nvcc(self.compiler) 116 | build_ext.build_extensions(self) 117 | 118 | 119 | ext_modules = [ 120 | Extension( 121 | "bbox", 122 | ["bbox.pyx"], 123 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | include_dirs=[numpy_include] 125 | ), 126 | Extension( 127 | "anchors", 128 | ["anchors.pyx"], 129 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 130 | include_dirs=[numpy_include] 131 | ), 132 | Extension( 133 | "cpu_nms", 134 | ["cpu_nms.pyx"], 135 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 136 | include_dirs = [numpy_include] 137 | ), 138 | ] 139 | 140 | if CUDA is not None: 141 | ext_modules.append( 142 | Extension('gpu_nms', 143 | ['nms_kernel.cu', 'gpu_nms.pyx'], 144 | library_dirs=[CUDA['lib64']], 145 | libraries=['cudart'], 146 | language='c++', 147 | runtime_library_dirs=[CUDA['lib64']], 148 | # this syntax is specific to this build system 149 | # we're only going to use certain compiler args with nvcc and not with 150 | # gcc the implementation of this trick is in customize_compiler() below 151 | extra_compile_args={'gcc': ["-Wno-unused-function"], 152 | 'nvcc': ['-arch=sm_35', 153 | '--ptxas-options=-v', 154 | '-c', 155 | '--compiler-options', 156 | "'-fPIC'"]}, 157 | include_dirs = [numpy_include, CUDA['include']] 158 | ) 159 | ) 160 | else: 161 | print('Skipping GPU_NMS') 162 | 163 | 164 | setup( 165 | name='frcnn_cython', 166 | ext_modules=ext_modules, 167 | # inject our custom trigger 168 | cmdclass={'build_ext': custom_build_ext}, 169 | ) 170 | -------------------------------------------------------------------------------- /rcnn/core/metric.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | from rcnn.config import config 7 | 8 | 9 | def get_rpn_names(): 10 | pred = ['rpn_cls_prob', 'rpn_bbox_loss', 'rpn_label', 'rpn_bbox_weight'] 11 | label = ['rpn_label', 'rpn_bbox_target', 'rpn_bbox_weight'] 12 | return pred, label 13 | 14 | 15 | 16 | class RPNAccMetric(mx.metric.EvalMetric): 17 | def __init__(self, pred_idx=-1, label_idx=-1,name='RPNAcc'): 18 | super(RPNAccMetric, self).__init__(name) 19 | self.pred, self.label = get_rpn_names() 20 | #self.name = 'RPNAcc' 21 | self.name = [name, name+'_BG', name+'_FG'] 22 | self.pred_idx = pred_idx 23 | self.label_idx = label_idx 24 | self.STAT = [0, 0, 0] 25 | 26 | def reset(self): 27 | """Clear the internal statistics to initial state.""" 28 | if isinstance(self.name, str): 29 | self.num_inst = 0 30 | self.sum_metric = 0.0 31 | else: 32 | #print('reset to ',len(self.name), self.name, file=sys.stderr) 33 | self.num_inst = [0] * len(self.name) 34 | self.sum_metric = [0.0] * len(self.name) 35 | 36 | 37 | def get(self): 38 | if isinstance(self.name, str): 39 | if self.num_inst == 0: 40 | return (self.name, float('nan')) 41 | else: 42 | return (self.name, self.sum_metric / self.num_inst) 43 | else: 44 | names = ['%s'%(self.name[i]) for i in range(len(self.name))] 45 | values = [x / y if y != 0 else float('nan') \ 46 | for x, y in zip(self.sum_metric, self.num_inst)] 47 | return (names, values) 48 | 49 | def update(self, labels, preds): 50 | if self.pred_idx>=0 and self.label_idx>=0: 51 | pred = preds[self.pred_idx] 52 | label = preds[self.label_idx] 53 | else: 54 | pred = preds[self.pred.index('rpn_cls_prob')] 55 | label = labels[self.label.index('rpn_label')] 56 | #label = preds[self.pred.index('rpn_label')] 57 | 58 | num_images = pred.shape[0] 59 | #print(pred.shape, label.shape, file=sys.stderr) 60 | # pred (b, c, p) or (b, c, h, w) 61 | pred_label = mx.ndarray.argmax_channel(pred).asnumpy().astype('int32') 62 | #pred_label = pred_label.reshape((pred_label.shape[0], -1)) 63 | pred_label = pred_label.reshape(-1,) 64 | # label (b, p) 65 | label = label.asnumpy().astype('int32').reshape(-1,) 66 | #print(pred_label.shape, label.shape) 67 | 68 | # filter with keep_inds 69 | keep_inds = np.where(label != -1)[0] 70 | #print('in_metric acc', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr) 71 | #print(keep_inds, file=sys.stderr) 72 | _pred_label = pred_label[keep_inds] 73 | _label = label[keep_inds] 74 | #print('in_metric2', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr) 75 | if isinstance(self.name, str): 76 | self.sum_metric += np.sum(_pred_label.flat == _label.flat) 77 | self.num_inst += len(_pred_label.flat) 78 | else: 79 | self.sum_metric[0] += np.sum(_pred_label.flat == _label.flat) 80 | self.num_inst[0] += len(_pred_label.flat) 81 | 82 | keep_inds = np.where(label == 0)[0] 83 | _pred_label = pred_label[keep_inds] 84 | _label = label[keep_inds] 85 | self.sum_metric[1] += np.sum(_pred_label.flat == _label.flat) 86 | self.num_inst[1] += len(_pred_label.flat) 87 | 88 | keep_inds = np.where(label == 1)[0] 89 | _pred_label = pred_label[keep_inds] 90 | _label = label[keep_inds] 91 | a = np.sum(_pred_label.flat == _label.flat) 92 | b = len(_pred_label.flat) 93 | self.sum_metric[2] += a 94 | self.num_inst[2] += b 95 | 96 | #self.STAT[0]+=a 97 | #self.STAT[1]+=b 98 | #self.STAT[2]+=num_images 99 | #if self.STAT[2]%400==0: 100 | # print('FG_ACC', self.pred_idx, self.STAT[2], self.STAT[0], self.STAT[1], float(self.STAT[0])/self.STAT[1], file=sys.stderr) 101 | # self.STAT = [0,0,0] 102 | 103 | 104 | class RPNLogLossMetric(mx.metric.EvalMetric): 105 | def __init__(self, pred_idx=-1, label_idx=-1): 106 | super(RPNLogLossMetric, self).__init__('RPNLogLoss') 107 | self.pred, self.label = get_rpn_names() 108 | self.pred_idx = pred_idx 109 | self.label_idx = label_idx 110 | 111 | def update(self, labels, preds): 112 | if self.pred_idx>=0 and self.label_idx>=0: 113 | pred = preds[self.pred_idx] 114 | label = preds[self.label_idx] 115 | else: 116 | pred = preds[self.pred.index('rpn_cls_prob')] 117 | label = labels[self.label.index('rpn_label')] 118 | #label = preds[self.pred.index('rpn_label')] 119 | 120 | # label (b, p) 121 | label = label.asnumpy().astype('int32').reshape((-1)) 122 | # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c) 123 | pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1)) 124 | pred = pred.reshape((label.shape[0], -1)) 125 | 126 | # filter with keep_inds 127 | keep_inds = np.where(label != -1)[0] 128 | label = label[keep_inds] 129 | cls = pred[keep_inds, label] 130 | #print('in_metric log', label.shape, cls.shape, file=sys.stderr) 131 | 132 | cls += 1e-14 133 | cls_loss = -1 * np.log(cls) 134 | cls_loss = np.sum(cls_loss) 135 | self.sum_metric += cls_loss 136 | self.num_inst += label.shape[0] 137 | 138 | 139 | class RPNL1LossMetric(mx.metric.EvalMetric): 140 | def __init__(self, loss_idx=-1, weight_idx=-1, name='RPNL1Loss'): 141 | super(RPNL1LossMetric, self).__init__(name) 142 | self.pred, self.label = get_rpn_names() 143 | self.loss_idx = loss_idx 144 | self.weight_idx = weight_idx 145 | self.name = name 146 | 147 | def update(self, labels, preds): 148 | if self.loss_idx>=0 and self.weight_idx>=0: 149 | bbox_loss = preds[self.loss_idx].asnumpy() 150 | bbox_weight = preds[self.weight_idx].asnumpy() 151 | else: 152 | bbox_loss = preds[self.pred.index('rpn_bbox_loss')].asnumpy() 153 | bbox_weight = labels[self.label.index('rpn_bbox_weight')].asnumpy() 154 | #bbox_weight = preds[self.pred.index('rpn_bbox_weight')].asnumpy() 155 | 156 | #print('in_metric', self.name, bbox_weight.shape, bbox_loss.shape) 157 | 158 | # calculate num_inst (average on those fg anchors) 159 | if config.LR_MODE==0: 160 | num_inst = np.sum(bbox_weight > 0) / (bbox_weight.shape[1]/config.NUM_ANCHORS) 161 | else: 162 | num_inst = 1 163 | #print('in_metric log', bbox_loss.shape, num_inst, file=sys.stderr) 164 | 165 | self.sum_metric += np.sum(bbox_loss) 166 | self.num_inst += num_inst 167 | 168 | 169 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # facial-rec 2 | 3 | ![detection phase](detector_test.jpg) 4 | 5 | Demonstration of some pretty good facial rec tech using a 6 | [famous selfie](dataset/group-pic.JPG) with a 7 | [bunch of stars](dataset). 8 | 9 | 10 | TL/DR - jump to [results](docs/results.md) 11 | 12 | 13 | ## How It Works 14 | 15 | At a high level, facial recognition consists of two steps: *detection* and *embedding*. 16 | 17 | * *detection* takes a large image and produces a list of faces in the image. In our 18 | case this piece uses fully-convolutional approaches which means they take an input 19 | image and output an output 'image' where each pixel consists of a bounding box, 20 | and confidence measure, and a set of landmarks. A second step does 21 | non-maximal-suppression of the data which is a fancy way of saying it looks at 22 | overlaps and takes the highest confidence one. From here we get a list of faces 23 | and landmarks. We crop the faces out of the source image and align them using the 24 | landmarks to a set of 'standard' landmarks used during training. 25 | 26 | 27 | * *embedding* - The next step is to produce an embedding which is an N-dimensional 28 | vector. In our case that vector has 512 elements. These vectors have a special 29 | property that faces that look more similar will be closer in Euclidean space. So 30 | for example if I have two images of Tom Hanks and I create two embeddings I would 31 | expect the distance between them to be less than the distance between say, an 32 | image of Tom Hanks and an image of Denzel Washington. 33 | 34 | 35 | 36 | One of the most interesting aspects to me is that I can use a deep learning 37 | system to construct a [metric space](https://en.wikipedia.org/wiki/Metric_space) 38 | where distance corresponds to something very abstract like how similar these two 39 | people appear are or how how similar these two outfits are. In this sense the 40 | target of our learning algorithm is a good embedding in our new space. 41 | 42 | 43 | That is it. That is how facial rec works at the base level. If you are more 44 | curious as to how this works from scratch please review the 45 | [RetinaFace detector](https://arxiv.org/abs/1905.00641) and 46 | the [ArcFace loss function](https://arxiv.org/abs/1801.07698). 47 | 48 | 49 | If you notice, the ArcFace paper is specifically about only a loss function so given 50 | no more information let me tell you that the loss function is the device that you 51 | use to tell a deep learning system exactly what to target during its training. The 52 | loss function, however, isn't specific to the network it is used for training so one 53 | could use this loss function to train other types of similarity measures potentially 54 | with networks that aren't specific to image recognition. 55 | 56 | 57 | If anyone asks you how facial rec works you can say: 58 | 59 | >> Chris Nuernberger told me facial recognition uses, among other things, 60 | a deep learning system to embed an image of a face into a metrc 61 | space where distance is related to how likely the two images represent 62 | the same physical person. 63 | 64 | :-). Enjoy! 65 | 66 | 67 | ## Usage 68 | 69 | 70 | This system is built to show a realistic example of a cutting-edge system. As such 71 | it rests on four components: 72 | 1. docker 73 | 2. Conda 74 | 3. Python 75 | 4. Clojure 76 | 77 | 78 | The most advanced piece of the demo is actually the facial detection component. 79 | Luckily, it was nicely wrapped. To get it working we needed cython working and 80 | there is some [good information](src/facial_rec/detect.clj) there if you want to 81 | use a system that is based partially on cython. 82 | 83 | 84 | ### Install Docker 85 | 86 | 87 | Installing docker is system specific but on all systems you want to install it such 88 | that you can run it 89 | [without sudo](https://docs.docker.com/install/linux/linux-postinstall/). 90 | 91 | 92 | ### Get the data 93 | 94 | This script mainly downloads the models used for detection and feature embedding. 95 | 96 | ```console 97 | scripts/get-data 98 | ``` 99 | 100 | 101 | ### Start up a REPL 102 | 103 | 104 | ```console 105 | scripts/run-conda-docker 106 | ``` 107 | 108 | The port is printed out in a line like: 109 | 110 | ```console 111 | nREPL server started on port 44507 on host localhost - nrepl://localhost:44507 112 | ``` 113 | 114 | Now in emacs, vim or somewhere connect to the exposed port on localhost. 115 | 116 | 117 | ### Find/Annotate Faces 118 | 119 | 120 | ```clojure 121 | (require '[facial-rec.demo :as demo]) 122 | ;;long pause as things compile 123 | ``` 124 | 125 | At this point, we have to say that the system is dynamically compiling cython and 126 | upgrading the networks to the newest version of mxnet. This is a noisy process 127 | for a few reasons; we are loading a newer numpy, compiling files and loading networks. 128 | You will see warnings in the repl and your stdout of your docker will display 129 | some errors regarding compiling gpu non maximal suppression (nms) algorithms: 130 | ```console 131 | In file included from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/ndarraytypes.h:1832:0, 132 | from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/ndarrayobject.h:12, 133 | from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/arrayobject.h:4, 134 | from /home/chrisn/.pyxbld/temp.linux-x86_64-3.6/pyrex/rcnn/cython/gpu_nms.c:598: 135 | /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp] 136 | #warning "Using deprecated NumPy API, disable it with " \ 137 | ^~~~~~~ 138 | /home/chrisn/.pyxbld/temp.linux-x86_64-3.6/pyrex/rcnn/cython/gpu_nms.c:600:10: fatal error: gpu_nms.hpp: No such file or directory 139 | #include "gpu_nms.hpp" 140 | ^~~~~~~~~~~~~ 141 | ``` 142 | 143 | Interestingly enough, the system still works fine. The nms errors are around building 144 | the gpu version of the nms algorithms and we aren't using the gpu for this demo. 145 | [Nothing to see here](https://media.giphy.com/media/joV1k1sNOT5xC/giphy.gif) 146 | 147 | 148 | ```clojure 149 | (def faces (find-annotate-faces!)) 150 | ;;...pause... 151 | #'faces 152 | ``` 153 | 154 | Now there are cutout faces in the faces subdir. You can do nearest searches in the 155 | demo namespace and see how well this network does. 156 | 157 | 158 | ```clojure 159 | (output-face-results! faces) 160 | ``` 161 | 162 | This takes each face, find the 5 nearest, and outputs the results to `results.md`. 163 | We use [`grip`](https://github.com/joeyespo/grip) to view the markdown files locally. 164 | 165 | 166 | Going further, any non-directory files in the dataset directory will be scanned and 167 | added to the dataset so feel free to try it with your friends and family and see 168 | how good the results are. 169 | 170 | 171 | ## License 172 | 173 | Copyright © 2019 Chris Nuernberger 174 | 175 | This program and the accompanying materials are made available under the 176 | terms of the Eclipse Public License 2.0 which is available at 177 | http://www.eclipse.org/legal/epl-2.0. 178 | -------------------------------------------------------------------------------- /rcnn/dataset/retinaface.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | try: 3 | import cPickle as pickle 4 | except ImportError: 5 | import pickle 6 | import cv2 7 | import os 8 | import numpy as np 9 | import json 10 | #from PIL import Image 11 | 12 | from ..logger import logger 13 | from .imdb import IMDB 14 | from .ds_utils import unique_boxes, filter_small_boxes 15 | from ..config import config 16 | 17 | class retinaface(IMDB): 18 | def __init__(self, image_set, root_path, data_path): 19 | super(retinaface, self).__init__('retinaface', image_set, root_path, data_path) 20 | #assert image_set=='train' 21 | 22 | split = image_set 23 | self._split = image_set 24 | self._image_set = image_set 25 | 26 | 27 | self.root_path = root_path 28 | self.data_path = data_path 29 | 30 | 31 | self._dataset_path = self.data_path 32 | self._imgs_path = os.path.join(self._dataset_path, image_set, 'images') 33 | self._fp_bbox_map = {} 34 | label_file = os.path.join(self._dataset_path, image_set, 'label.txt') 35 | name = None 36 | for line in open(label_file, 'r'): 37 | line = line.strip() 38 | if line.startswith('#'): 39 | name = line[1:].strip() 40 | self._fp_bbox_map[name] = [] 41 | continue 42 | assert name is not None 43 | assert name in self._fp_bbox_map 44 | self._fp_bbox_map[name].append(line) 45 | print('origin image size', len(self._fp_bbox_map)) 46 | 47 | #self.num_images = len(self._image_paths) 48 | #self._image_index = range(len(self._image_paths)) 49 | self.classes = ['bg', 'face'] 50 | self.num_classes = len(self.classes) 51 | 52 | 53 | def gt_roidb(self): 54 | cache_file = os.path.join(self.cache_path, '{}_{}_gt_roidb.pkl'.format(self.name, self._split)) 55 | if os.path.exists(cache_file): 56 | with open(cache_file, 'rb') as fid: 57 | roidb = pickle.load(fid) 58 | print('{} gt roidb loaded from {}'.format(self.name, cache_file)) 59 | self.num_images = len(roidb) 60 | return roidb 61 | 62 | roidb = [] 63 | max_num_boxes = 0 64 | nonattr_box_num = 0 65 | landmark_num = 0 66 | 67 | pp = 0 68 | for fp in self._fp_bbox_map: 69 | pp += 1 70 | if pp%1000==0: 71 | print('loading', pp) 72 | if self._split=='test': 73 | image_path = os.path.join(self._imgs_path, fp) 74 | roi = {'image': image_path} 75 | roidb.append(roi) 76 | continue 77 | boxes = np.zeros([len(self._fp_bbox_map[fp]), 4], np.float) 78 | landmarks = np.zeros([len(self._fp_bbox_map[fp]), 5, 3], np.float) 79 | blur = np.zeros((len(self._fp_bbox_map[fp]),), np.float) 80 | boxes_mask = [] 81 | 82 | gt_classes = np.ones([len(self._fp_bbox_map[fp])], np.int32) 83 | overlaps = np.zeros([len(self._fp_bbox_map[fp]), 2], np.float) 84 | 85 | imsize = cv2.imread(os.path.join(self._imgs_path, fp)).shape[0:2][::-1] 86 | ix = 0 87 | 88 | for aline in self._fp_bbox_map[fp]: 89 | #imsize = Image.open(os.path.join(self._imgs_path, fp)).size 90 | values = [float(x) for x in aline.strip().split()] 91 | bbox = [values[0], values[1], values[0]+values[2], values[1]+values[3]] 92 | 93 | x1 = bbox[0] 94 | y1 = bbox[1] 95 | x2 = min(imsize[0], bbox[2]) 96 | y2 = min(imsize[1], bbox[3]) 97 | if x1>=x2 or y1>=y2: 98 | continue 99 | 100 | if config.BBOX_MASK_THRESH>0: 101 | if (x2 - x1) < config.BBOX_MASK_THRESH or y2 - y1 < config.BBOX_MASK_THRESH: 102 | boxes_mask.append(np.array([x1, y1, x2, y2], np.float)) 103 | continue 104 | if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or y2 - y1 < config.TRAIN.MIN_BOX_SIZE: 105 | continue 106 | 107 | boxes[ix, :] = np.array([x1, y1, x2, y2], np.float) 108 | if self._split=='train': 109 | landmark = np.array( values[4:19], dtype=np.float32 ).reshape((5,3)) 110 | for li in range(5): 111 | #print(landmark) 112 | if landmark[li][0]==-1. and landmark[li][1]==-1.: #missing landmark 113 | assert landmark[li][2]==-1 114 | else: 115 | assert landmark[li][2]>=0 116 | if li==0: 117 | landmark_num+=1 118 | if landmark[li][2]==0.0:#visible 119 | landmark[li][2] = 1.0 120 | else: 121 | landmark[li][2] = 0.0 122 | 123 | landmarks[ix] = landmark 124 | 125 | blur[ix] = values[19] 126 | #print(aline, blur[ix]) 127 | if blur[ix]<0.0: 128 | blur[ix] = 0.3 129 | nonattr_box_num+=1 130 | 131 | cls = int(1) 132 | gt_classes[ix] = cls 133 | overlaps[ix, cls] = 1.0 134 | ix += 1 135 | max_num_boxes = max(max_num_boxes, ix) 136 | #overlaps = scipy.sparse.csr_matrix(overlaps) 137 | if self._split=='train' and ix==0: 138 | continue 139 | boxes = boxes[:ix,:] 140 | landmarks = landmarks[:ix,:,:] 141 | blur = blur[:ix] 142 | gt_classes = gt_classes[:ix] 143 | overlaps = overlaps[:ix,:] 144 | image_path = os.path.join(self._imgs_path, fp) 145 | with open(image_path, 'rb') as fin: 146 | stream = fin.read() 147 | stream = np.fromstring(stream, dtype=np.uint8) 148 | 149 | roi = { 150 | 'image': image_path, 151 | 'stream': stream, 152 | 'height': imsize[1], 153 | 'width': imsize[0], 154 | 'boxes': boxes, 155 | 'landmarks': landmarks, 156 | 'blur': blur, 157 | 'gt_classes': gt_classes, 158 | 'gt_overlaps': overlaps, 159 | 'max_classes': overlaps.argmax(axis=1), 160 | 'max_overlaps': overlaps.max(axis=1), 161 | 'flipped': False, 162 | } 163 | if len(boxes_mask)>0: 164 | boxes_mask = np.array(boxes_mask) 165 | roi['boxes_mask'] = boxes_mask 166 | roidb.append(roi) 167 | for roi in roidb: 168 | roi['max_num_boxes'] = max_num_boxes 169 | self.num_images = len(roidb) 170 | print('roidb size', len(roidb)) 171 | print('non attr box num', nonattr_box_num) 172 | print('landmark num', landmark_num) 173 | with open(cache_file, 'wb') as fid: 174 | pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) 175 | print('wrote gt roidb to {}'.format(cache_file)) 176 | 177 | return roidb 178 | 179 | def write_detections(self, all_boxes, output_dir='./output/'): 180 | pass 181 | 182 | 183 | def evaluate_detections(self, all_boxes, output_dir='./output/',method_name='insightdetection'): 184 | pass 185 | 186 | 187 | -------------------------------------------------------------------------------- /rcnn/PY_OP/rpn_fpn_ohem3.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import sys 4 | import mxnet as mx 5 | import numpy as np 6 | from distutils.util import strtobool 7 | from ..config import config, generate_config 8 | 9 | 10 | STAT = {0:0} 11 | STEP = 28800 12 | 13 | class RPNFPNOHEM3Operator(mx.operator.CustomOp): 14 | def __init__(self, stride=0, network='', dataset='', prefix=''): 15 | super(RPNFPNOHEM3Operator, self).__init__() 16 | self.stride = int(stride) 17 | self.prefix = prefix 18 | generate_config(network, dataset) 19 | self.mode = config.TRAIN.OHEM_MODE #0 for random 10:245, 1 for 10:246, 2 for 10:30, mode 1 for default 20 | global STAT 21 | for k in config.RPN_FEAT_STRIDE: 22 | STAT[k] = [0,0,0] 23 | 24 | def forward(self, is_train, req, in_data, out_data, aux): 25 | global STAT 26 | 27 | cls_score = in_data[0].asnumpy() #BS, 2, ANCHORS 28 | labels_raw = in_data[1].asnumpy() # BS, ANCHORS 29 | 30 | A = config.NUM_ANCHORS 31 | anchor_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1],1), dtype=np.float32 ) 32 | valid_count = np.zeros( (labels_raw.shape[0],1), dtype=np.float32 ) 33 | #print('anchor_weight', anchor_weight.shape) 34 | 35 | #assert labels.shape[0]==1 36 | #assert cls_score.shape[0]==1 37 | #assert bbox_weight.shape[0]==1 38 | #print('shape', cls_score.shape, labels.shape, file=sys.stderr) 39 | #print('bbox_weight 0', bbox_weight.shape, file=sys.stderr) 40 | #bbox_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1], 4), dtype=np.float32) 41 | _stat = [0,0,0] 42 | for ibatch in range(labels_raw.shape[0]): 43 | _anchor_weight = np.zeros( (labels_raw.shape[1],1), dtype=np.float32) 44 | labels = labels_raw[ibatch] 45 | fg_score = cls_score[ibatch,1,:] - cls_score[ibatch,0,:] 46 | 47 | 48 | 49 | fg_inds = np.where(labels>0)[0] 50 | num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE) 51 | origin_num_fg = len(fg_inds) 52 | #print(len(fg_inds), num_fg, file=sys.stderr) 53 | if len(fg_inds) > num_fg: 54 | if self.mode==0: 55 | disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) 56 | labels[disable_inds] = -1 57 | else: 58 | pos_ohem_scores = fg_score[fg_inds] 59 | order_pos_ohem_scores = pos_ohem_scores.ravel().argsort() 60 | sampled_inds = fg_inds[order_pos_ohem_scores[:num_fg]] 61 | labels[fg_inds] = -1 62 | labels[sampled_inds] = 1 63 | 64 | n_fg = np.sum(labels>0) 65 | fg_inds = np.where(labels>0)[0] 66 | num_bg = config.TRAIN.RPN_BATCH_SIZE - n_fg 67 | if self.mode==2: 68 | num_bg = max(48, n_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1)) 69 | 70 | bg_inds = np.where(labels == 0)[0] 71 | origin_num_bg = len(bg_inds) 72 | if num_bg==0: 73 | labels[bg_inds] = -1 74 | elif len(bg_inds) > num_bg: 75 | # sort ohem scores 76 | 77 | if self.mode==0: 78 | disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) 79 | labels[disable_inds] = -1 80 | else: 81 | neg_ohem_scores = fg_score[bg_inds] 82 | order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1] 83 | sampled_inds = bg_inds[order_neg_ohem_scores[:num_bg]] 84 | #print('sampled_inds_bg', sampled_inds, file=sys.stderr) 85 | labels[bg_inds] = -1 86 | labels[sampled_inds] = 0 87 | 88 | if n_fg>0: 89 | order0_labels = labels.reshape( (1, A, -1) ).transpose( (0, 2, 1) ).reshape( (-1,) ) 90 | bbox_fg_inds = np.where(order0_labels>0)[0] 91 | #print('bbox_fg_inds, order0 ', bbox_fg_inds, file=sys.stderr) 92 | _anchor_weight[bbox_fg_inds,:] = 1.0 93 | anchor_weight[ibatch] = _anchor_weight 94 | valid_count[ibatch][0] = n_fg 95 | 96 | #if self.prefix=='face': 97 | # #print('fg-bg', self.stride, n_fg, num_bg) 98 | # STAT[0]+=1 99 | # STAT[self.stride][0] += config.TRAIN.RPN_BATCH_SIZE 100 | # STAT[self.stride][1] += n_fg 101 | # STAT[self.stride][2] += np.sum(fg_score[fg_inds]>=0) 102 | # #_stat[0] += config.TRAIN.RPN_BATCH_SIZE 103 | # #_stat[1] += n_fg 104 | # #_stat[2] += np.sum(fg_score[fg_inds]>=0) 105 | # #print('stride num_fg', self.stride, n_fg, file=sys.stderr) 106 | # #ACC[self.stride] += np.sum(fg_score[fg_inds]>=0) 107 | # #x = float(labels_raw.shape[0]*len(config.RPN_FEAT_STRIDE)) 108 | # x = 1.0 109 | # if STAT[0]%STEP==0: 110 | # _str = ['STAT'] 111 | # STAT[0] = 0 112 | # for k in config.RPN_FEAT_STRIDE: 113 | # acc = float(STAT[k][2])/STAT[k][1] 114 | # acc0 = float(STAT[k][1])/STAT[k][0] 115 | # #_str.append("%d: all-fg(%d, %d, %.4f), fg-fgcorrect(%d, %d, %.4f)"%(k,STAT[k][0], STAT[k][1], acc0, STAT[k][1], STAT[k][2], acc)) 116 | # _str.append("%d: (%d, %d, %.4f)"%(k, STAT[k][1], STAT[k][2], acc)) 117 | # STAT[k] = [0,0,0] 118 | # _str = ' | '.join(_str) 119 | # print(_str, file=sys.stderr) 120 | #if self.stride==4 and num_fg>0: 121 | # print('_stat_', self.stride, num_fg, num_bg, file=sys.stderr) 122 | 123 | #labels_ohem = mx.nd.array(labels_raw) 124 | #anchor_weight = mx.nd.array(anchor_weight) 125 | #print('valid_count', self.stride, np.sum(valid_count)) 126 | #print('_stat', _stat, valid_count) 127 | 128 | for ind, val in enumerate([labels_raw, anchor_weight, valid_count]): 129 | val = mx.nd.array(val) 130 | self.assign(out_data[ind], req[ind], val) 131 | 132 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 133 | for i in range(len(in_grad)): 134 | self.assign(in_grad[i], req[i], 0) 135 | 136 | 137 | @mx.operator.register('rpn_fpn_ohem3') 138 | class RPNFPNOHEM3Prop(mx.operator.CustomOpProp): 139 | def __init__(self, stride=0, network='', dataset='', prefix=''): 140 | super(RPNFPNOHEM3Prop, self).__init__(need_top_grad=False) 141 | self.stride = stride 142 | self.network=network 143 | self.dataset=dataset 144 | self.prefix = prefix 145 | 146 | def list_arguments(self): 147 | return ['cls_score', 'labels'] 148 | 149 | def list_outputs(self): 150 | return ['labels_ohem', 'anchor_weight', 'valid_count'] 151 | 152 | def infer_shape(self, in_shape): 153 | labels_shape = in_shape[1] 154 | #print('in_rpn_ohem', in_shape[0], in_shape[1], in_shape[2], file=sys.stderr) 155 | anchor_weight_shape = [labels_shape[0], labels_shape[1], 1] 156 | #print('in_rpn_ohem', labels_shape, anchor_weight_shape) 157 | 158 | return in_shape, \ 159 | [labels_shape, anchor_weight_shape, [labels_shape[0], 1]] 160 | 161 | def create_operator(self, ctx, shapes, dtypes): 162 | return RPNFPNOHEM3Operator(self.stride, self.network, self.dataset, self.prefix) 163 | 164 | def declare_backward_dependency(self, out_grad, in_data, out_data): 165 | return [] 166 | 167 | 168 | -------------------------------------------------------------------------------- /rcnn/processing/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rcnn.cython.bbox import bbox_overlaps_cython 3 | #from rcnn.config import config 4 | 5 | 6 | def bbox_overlaps(boxes, query_boxes): 7 | return bbox_overlaps_cython(boxes, query_boxes) 8 | 9 | 10 | def bbox_overlaps_py(boxes, query_boxes): 11 | """ 12 | determine overlaps between boxes and query_boxes 13 | :param boxes: n * 4 bounding boxes 14 | :param query_boxes: k * 4 bounding boxes 15 | :return: overlaps: n * k overlaps 16 | """ 17 | n_ = boxes.shape[0] 18 | k_ = query_boxes.shape[0] 19 | overlaps = np.zeros((n_, k_), dtype=np.float) 20 | for k in range(k_): 21 | query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) 22 | for n in range(n_): 23 | iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 24 | if iw > 0: 25 | ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 26 | if ih > 0: 27 | box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) 28 | all_area = float(box_area + query_box_area - iw * ih) 29 | overlaps[n, k] = iw * ih / all_area 30 | return overlaps 31 | 32 | 33 | def clip_boxes(boxes, im_shape): 34 | """ 35 | Clip boxes to image boundaries. 36 | :param boxes: [N, 4* num_classes] 37 | :param im_shape: tuple of 2 38 | :return: [N, 4* num_classes] 39 | """ 40 | # x1 >= 0 41 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 42 | # y1 >= 0 43 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 44 | # x2 < im_shape[1] 45 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 46 | # y2 < im_shape[0] 47 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 48 | return boxes 49 | 50 | 51 | def nonlinear_transform(ex_rois, gt_rois): 52 | """ 53 | compute bounding box regression targets from ex_rois to gt_rois 54 | :param ex_rois: [N, 4] 55 | :param gt_rois: [N, 4] 56 | :return: [N, 4] 57 | """ 58 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 59 | 60 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 61 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 62 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 63 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 64 | 65 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 66 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 67 | gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) 68 | gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) 69 | 70 | targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) 71 | targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) 72 | targets_dw = np.log(gt_widths / ex_widths) 73 | targets_dh = np.log(gt_heights / ex_heights) 74 | 75 | if gt_rois.shape[1]<=4: 76 | targets = np.vstack( 77 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 78 | return targets 79 | else: 80 | targets = [targets_dx, targets_dy, targets_dw, targets_dh] 81 | #if config.USE_BLUR: 82 | # for i in range(4, gt_rois.shape[1]): 83 | # t = gt_rois[:,i] 84 | # targets.append(t) 85 | targets = np.vstack(targets).transpose() 86 | return targets 87 | 88 | def landmark_transform(ex_rois, gt_rois): 89 | 90 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 91 | 92 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 93 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 94 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 95 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 96 | 97 | 98 | targets = [] 99 | for i in range(gt_rois.shape[1]): 100 | for j in range(gt_rois.shape[2]): 101 | #if not config.USE_OCCLUSION and j==2: 102 | # continue 103 | if j==2: 104 | continue 105 | if j==0: #w 106 | target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14) 107 | elif j==1: #h 108 | target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14) 109 | else: #visibile 110 | target = gt_rois[:,i,j] 111 | targets.append(target) 112 | 113 | 114 | targets = np.vstack(targets).transpose() 115 | return targets 116 | 117 | 118 | def nonlinear_pred(boxes, box_deltas): 119 | """ 120 | Transform the set of class-agnostic boxes into class-specific boxes 121 | by applying the predicted offsets (box_deltas) 122 | :param boxes: !important [N 4] 123 | :param box_deltas: [N, 4 * num_classes] 124 | :return: [N 4 * num_classes] 125 | """ 126 | if boxes.shape[0] == 0: 127 | return np.zeros((0, box_deltas.shape[1])) 128 | 129 | boxes = boxes.astype(np.float, copy=False) 130 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 131 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 132 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 133 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 134 | 135 | dx = box_deltas[:, 0::4] 136 | dy = box_deltas[:, 1::4] 137 | dw = box_deltas[:, 2::4] 138 | dh = box_deltas[:, 3::4] 139 | 140 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 141 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 142 | pred_w = np.exp(dw) * widths[:, np.newaxis] 143 | pred_h = np.exp(dh) * heights[:, np.newaxis] 144 | 145 | pred_boxes = np.zeros(box_deltas.shape) 146 | # x1 147 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) 148 | # y1 149 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) 150 | # x2 151 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) 152 | # y2 153 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 154 | 155 | return pred_boxes 156 | 157 | def landmark_pred(boxes, landmark_deltas): 158 | if boxes.shape[0] == 0: 159 | return np.zeros((0, landmark_deltas.shape[1])) 160 | boxes = boxes.astype(np.float, copy=False) 161 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 162 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 163 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 164 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 165 | preds = [] 166 | for i in range(landmark_deltas.shape[1]): 167 | if i%2==0: 168 | pred = (landmark_deltas[:,i]*widths + ctr_x) 169 | else: 170 | pred = (landmark_deltas[:,i]*heights + ctr_y) 171 | preds.append(pred) 172 | preds = np.vstack(preds).transpose() 173 | return preds 174 | 175 | def iou_transform(ex_rois, gt_rois): 176 | """ return bbox targets, IoU loss uses gt_rois as gt """ 177 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 178 | return gt_rois 179 | 180 | 181 | def iou_pred(boxes, box_deltas): 182 | """ 183 | Transform the set of class-agnostic boxes into class-specific boxes 184 | by applying the predicted offsets (box_deltas) 185 | :param boxes: !important [N 4] 186 | :param box_deltas: [N, 4 * num_classes] 187 | :return: [N 4 * num_classes] 188 | """ 189 | if boxes.shape[0] == 0: 190 | return np.zeros((0, box_deltas.shape[1])) 191 | 192 | boxes = boxes.astype(np.float, copy=False) 193 | x1 = boxes[:, 0] 194 | y1 = boxes[:, 1] 195 | x2 = boxes[:, 2] 196 | y2 = boxes[:, 3] 197 | 198 | dx1 = box_deltas[:, 0::4] 199 | dy1 = box_deltas[:, 1::4] 200 | dx2 = box_deltas[:, 2::4] 201 | dy2 = box_deltas[:, 3::4] 202 | 203 | pred_boxes = np.zeros(box_deltas.shape) 204 | # x1 205 | pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] 206 | # y1 207 | pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] 208 | # x2 209 | pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] 210 | # y2 211 | pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] 212 | 213 | return pred_boxes 214 | 215 | 216 | # define bbox_transform and bbox_pred 217 | bbox_transform = nonlinear_transform 218 | bbox_pred = nonlinear_pred 219 | -------------------------------------------------------------------------------- /rcnn/pycocotools/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | siz i; *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && athr) keep[j]=0; 105 | } 106 | } 107 | } 108 | 109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { 110 | double h, w, i, u, ga, da; siz g, d; int crowd; 111 | for( g=0; gthr) keep[j]=0; 129 | } 130 | } 131 | } 132 | 133 | void rleToBbox( const RLE *R, BB bb, siz n ) { 134 | siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); 173 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 174 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 175 | if(dx>=dy) for( d=0; d<=dx; d++ ) { 176 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 177 | } else for( d=0; d<=dy; d++ ) { 178 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 179 | } 180 | } 181 | /* get points along y-boundary and downsample */ 182 | free(x); free(y); k=m; m=0; double xd, yd; 183 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 184 | for( j=1; jw-1 ) continue; 187 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 189 | x[m]=(int) xd; y[m]=(int) yd; m++; 190 | } 191 | /* compute rle encoding given y-boundary points */ 192 | k=m; a=malloc(sizeof(uint)*(k+1)); 193 | for( j=0; j0) b[m++]=a[j++]; else { 199 | j++; if(jm, p=0; long x; int more; 206 | char *s=malloc(sizeof(char)*m*6); 207 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 209 | while( more ) { 210 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 211 | if(more) c |= 0x20; c+=48; s[p++]=c; 212 | } 213 | } 214 | s[p]=0; return s; 215 | } 216 | 217 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 218 | siz m=0, p=0, k; long x; int more; uint *cnts; 219 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 220 | while( s[p] ) { 221 | x=0; k=0; more=1; 222 | while( more ) { 223 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 224 | more = c & 0x20; p++; k++; 225 | if(!more && (c & 0x10)) x |= -1 << 5*k; 226 | } 227 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 228 | } 229 | rleInit(R,h,w,m,cnts); free(cnts); 230 | } 231 | -------------------------------------------------------------------------------- /rcnn/core/module.py: -------------------------------------------------------------------------------- 1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape 2 | varying with training iterations. If shapes vary, executors will rebind, 3 | using shared arrays from the initial module binded with maximum shape. 4 | """ 5 | 6 | import logging 7 | 8 | from mxnet import context as ctx 9 | from mxnet.initializer import Uniform 10 | from mxnet.module.base_module import BaseModule 11 | from mxnet.module.module import Module 12 | 13 | class MutableModule(BaseModule): 14 | """A mutable module is a module that supports variable input data. 15 | 16 | Parameters 17 | ---------- 18 | symbol : Symbol 19 | data_names : list of str 20 | label_names : list of str 21 | logger : Logger 22 | context : Context or list of Context 23 | work_load_list : list of number 24 | max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary 25 | max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary 26 | fixed_param_prefix : list of str, indicating fixed parameters 27 | """ 28 | def __init__(self, symbol, data_names, label_names, 29 | logger=logging, context=ctx.cpu(), work_load_list=None, 30 | max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None): 31 | super(MutableModule, self).__init__(logger=logger) 32 | self._symbol = symbol 33 | self._data_names = data_names 34 | self._label_names = label_names 35 | self._context = context 36 | self._work_load_list = work_load_list 37 | 38 | self._curr_module = None 39 | self._max_data_shapes = max_data_shapes 40 | self._max_label_shapes = max_label_shapes 41 | self._fixed_param_prefix = fixed_param_prefix 42 | 43 | fixed_param_names = list() 44 | if fixed_param_prefix is not None: 45 | for name in self._symbol.list_arguments(): 46 | for prefix in self._fixed_param_prefix: 47 | if prefix in name: 48 | fixed_param_names.append(name) 49 | self._fixed_param_names = fixed_param_names 50 | 51 | def _reset_bind(self): 52 | self.binded = False 53 | self._curr_module = None 54 | 55 | @property 56 | def data_names(self): 57 | return self._data_names 58 | 59 | @property 60 | def output_names(self): 61 | return self._symbol.list_outputs() 62 | 63 | @property 64 | def data_shapes(self): 65 | assert self.binded 66 | return self._curr_module.data_shapes 67 | 68 | @property 69 | def label_shapes(self): 70 | assert self.binded 71 | return self._curr_module.label_shapes 72 | 73 | @property 74 | def output_shapes(self): 75 | assert self.binded 76 | return self._curr_module.output_shapes 77 | 78 | def get_params(self): 79 | assert self.binded and self.params_initialized 80 | return self._curr_module.get_params() 81 | 82 | def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None, 83 | allow_missing=False, force_init=False, allow_extra=False): 84 | if self.params_initialized and not force_init: 85 | return 86 | assert self.binded, 'call bind before initializing the parameters' 87 | self._curr_module.init_params(initializer=initializer, arg_params=arg_params, 88 | aux_params=aux_params, allow_missing=allow_missing, 89 | force_init=force_init, allow_extra=allow_extra) 90 | self.params_initialized = True 91 | 92 | def bind(self, data_shapes, label_shapes=None, for_training=True, 93 | inputs_need_grad=False, force_rebind=False, shared_module=None): 94 | # in case we already initialized params, keep it 95 | if self.params_initialized: 96 | arg_params, aux_params = self.get_params() 97 | 98 | # force rebinding is typically used when one want to switch from 99 | # training to prediction phase. 100 | if force_rebind: 101 | self._reset_bind() 102 | 103 | if self.binded: 104 | self.logger.warning('Already binded, ignoring bind()') 105 | return 106 | 107 | assert shared_module is None, 'shared_module for MutableModule is not supported' 108 | 109 | self.for_training = for_training 110 | self.inputs_need_grad = inputs_need_grad 111 | self.binded = True 112 | 113 | max_shapes_dict = dict() 114 | if self._max_data_shapes is not None: 115 | max_shapes_dict.update(dict(self._max_data_shapes)) 116 | if self._max_label_shapes is not None: 117 | max_shapes_dict.update(dict(self._max_label_shapes)) 118 | 119 | max_data_shapes = list() 120 | for name, shape in data_shapes: 121 | if name in max_shapes_dict: 122 | max_data_shapes.append((name, max_shapes_dict[name])) 123 | else: 124 | max_data_shapes.append((name, shape)) 125 | 126 | max_label_shapes = list() 127 | if label_shapes is not None: 128 | for name, shape in label_shapes: 129 | if name in max_shapes_dict: 130 | max_label_shapes.append((name, max_shapes_dict[name])) 131 | else: 132 | max_label_shapes.append((name, shape)) 133 | 134 | if len(max_label_shapes) == 0: 135 | max_label_shapes = None 136 | 137 | module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger, 138 | context=self._context, work_load_list=self._work_load_list, 139 | fixed_param_names=self._fixed_param_names) 140 | module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad, 141 | force_rebind=False, shared_module=None) 142 | self._curr_module = module 143 | 144 | # copy back saved params, if already initialized 145 | if self.params_initialized: 146 | self.set_params(arg_params, aux_params) 147 | 148 | def init_optimizer(self, kvstore='local', optimizer='sgd', 149 | optimizer_params=(('learning_rate', 0.01),), force_init=False): 150 | assert self.binded and self.params_initialized 151 | if self.optimizer_initialized and not force_init: 152 | self.logger.warning('optimizer already initialized, ignoring.') 153 | return 154 | 155 | self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params, 156 | force_init=force_init) 157 | self.optimizer_initialized = True 158 | 159 | def forward(self, data_batch, is_train=None): 160 | assert self.binded and self.params_initialized 161 | 162 | # get current_shapes 163 | if self._curr_module.label_shapes is not None: 164 | current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes) 165 | else: 166 | current_shapes = dict(self._curr_module.data_shapes) 167 | 168 | # get input_shapes 169 | if data_batch.provide_label is not None: 170 | input_shapes = dict(data_batch.provide_data + data_batch.provide_label) 171 | else: 172 | input_shapes = dict(data_batch.provide_data) 173 | 174 | # decide if shape changed 175 | shape_changed = False 176 | for k, v in current_shapes.items(): 177 | if v != input_shapes[k]: 178 | shape_changed = True 179 | 180 | if shape_changed: 181 | module = Module(self._symbol, self._data_names, self._label_names, 182 | logger=self.logger, context=self._context, 183 | work_load_list=self._work_load_list, 184 | fixed_param_names=self._fixed_param_names) 185 | module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training, 186 | self._curr_module.inputs_need_grad, force_rebind=False, 187 | shared_module=self._curr_module) 188 | self._curr_module = module 189 | 190 | self._curr_module.forward(data_batch, is_train=is_train) 191 | 192 | def backward(self, out_grads=None): 193 | assert self.binded and self.params_initialized 194 | self._curr_module.backward(out_grads=out_grads) 195 | 196 | def update(self): 197 | assert self.binded and self.params_initialized and self.optimizer_initialized 198 | self._curr_module.update() 199 | 200 | def get_outputs(self, merge_multi_context=True): 201 | assert self.binded and self.params_initialized 202 | return self._curr_module.get_outputs(merge_multi_context=merge_multi_context) 203 | 204 | def get_input_grads(self, merge_multi_context=True): 205 | assert self.binded and self.params_initialized and self.inputs_need_grad 206 | return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context) 207 | 208 | def update_metric(self, eval_metric, labels): 209 | assert self.binded and self.params_initialized 210 | self._curr_module.update_metric(eval_metric, labels) 211 | 212 | def install_monitor(self, mon): 213 | """ Install monitor on all executors """ 214 | assert self.binded 215 | self._curr_module.install_monitor(mon) 216 | -------------------------------------------------------------------------------- /rcnn/core/module_bak.py: -------------------------------------------------------------------------------- 1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape 2 | varying with training iterations. If shapes vary, executors will rebind, 3 | using shared arrays from the initial module binded with maximum shape. 4 | """ 5 | 6 | import logging 7 | 8 | from mxnet import context as ctx 9 | from mxnet.initializer import Uniform 10 | from mxnet.module.base_module import BaseModule 11 | from mxnet.module.module import Module 12 | 13 | class MutableModule(BaseModule): 14 | """A mutable module is a module that supports variable input data. 15 | 16 | Parameters 17 | ---------- 18 | symbol : Symbol 19 | data_names : list of str 20 | label_names : list of str 21 | logger : Logger 22 | context : Context or list of Context 23 | work_load_list : list of number 24 | max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary 25 | max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary 26 | fixed_param_prefix : list of str, indicating fixed parameters 27 | """ 28 | def __init__(self, symbol, data_names, label_names, 29 | logger=logging, context=ctx.cpu(), work_load_list=None, 30 | max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None): 31 | super(MutableModule, self).__init__(logger=logger) 32 | self._symbol = symbol 33 | self._data_names = data_names 34 | self._label_names = label_names 35 | self._context = context 36 | self._work_load_list = work_load_list 37 | 38 | self._curr_module = None 39 | self._max_data_shapes = max_data_shapes 40 | self._max_label_shapes = max_label_shapes 41 | self._fixed_param_prefix = fixed_param_prefix 42 | 43 | fixed_param_names = list() 44 | if fixed_param_prefix is not None: 45 | for name in self._symbol.list_arguments(): 46 | for prefix in self._fixed_param_prefix: 47 | if prefix in name: 48 | fixed_param_names.append(name) 49 | self._fixed_param_names = fixed_param_names 50 | 51 | def _reset_bind(self): 52 | self.binded = False 53 | self._curr_module = None 54 | 55 | @property 56 | def data_names(self): 57 | return self._data_names 58 | 59 | @property 60 | def output_names(self): 61 | return self._symbol.list_outputs() 62 | 63 | @property 64 | def data_shapes(self): 65 | assert self.binded 66 | return self._curr_module.data_shapes 67 | 68 | @property 69 | def label_shapes(self): 70 | assert self.binded 71 | return self._curr_module.label_shapes 72 | 73 | @property 74 | def output_shapes(self): 75 | assert self.binded 76 | return self._curr_module.output_shapes 77 | 78 | def get_params(self): 79 | assert self.binded and self.params_initialized 80 | return self._curr_module.get_params() 81 | 82 | def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None, 83 | allow_missing=False, force_init=False, allow_extra=False): 84 | if self.params_initialized and not force_init: 85 | return 86 | assert self.binded, 'call bind before initializing the parameters' 87 | self._curr_module.init_params(initializer=initializer, arg_params=arg_params, 88 | aux_params=aux_params, allow_missing=allow_missing, 89 | force_init=force_init, allow_extra=allow_extra) 90 | self.params_initialized = True 91 | 92 | def bind(self, data_shapes, label_shapes=None, for_training=True, 93 | inputs_need_grad=False, force_rebind=False, shared_module=None, grad_req='write'): 94 | # in case we already initialized params, keep it 95 | if self.params_initialized: 96 | arg_params, aux_params = self.get_params() 97 | 98 | # force rebinding is typically used when one want to switch from 99 | # training to prediction phase. 100 | if force_rebind: 101 | self._reset_bind() 102 | 103 | if self.binded: 104 | self.logger.warning('Already binded, ignoring bind()') 105 | return 106 | 107 | assert shared_module is None, 'shared_module for MutableModule is not supported' 108 | 109 | self.for_training = for_training 110 | self.inputs_need_grad = inputs_need_grad 111 | self.binded = True 112 | 113 | max_shapes_dict = dict() 114 | if self._max_data_shapes is not None: 115 | max_shapes_dict.update(dict(self._max_data_shapes)) 116 | if self._max_label_shapes is not None: 117 | max_shapes_dict.update(dict(self._max_label_shapes)) 118 | 119 | max_data_shapes = list() 120 | for name, shape in data_shapes: 121 | if name in max_shapes_dict: 122 | max_data_shapes.append((name, max_shapes_dict[name])) 123 | else: 124 | max_data_shapes.append((name, shape)) 125 | 126 | max_label_shapes = list() 127 | if label_shapes is not None: 128 | for name, shape in label_shapes: 129 | if name in max_shapes_dict: 130 | max_label_shapes.append((name, max_shapes_dict[name])) 131 | else: 132 | max_label_shapes.append((name, shape)) 133 | 134 | if len(max_label_shapes) == 0: 135 | max_label_shapes = None 136 | 137 | module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger, 138 | context=self._context, work_load_list=self._work_load_list, 139 | fixed_param_names=self._fixed_param_names) 140 | module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad, 141 | force_rebind=False, shared_module=None) 142 | self._curr_module = module 143 | 144 | # copy back saved params, if already initialized 145 | if self.params_initialized: 146 | self.set_params(arg_params, aux_params) 147 | 148 | def init_optimizer(self, kvstore='local', optimizer='sgd', 149 | optimizer_params=(('learning_rate', 0.01),), force_init=False): 150 | assert self.binded and self.params_initialized 151 | if self.optimizer_initialized and not force_init: 152 | self.logger.warning('optimizer already initialized, ignoring.') 153 | return 154 | 155 | self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params, 156 | force_init=force_init) 157 | self.optimizer_initialized = True 158 | 159 | def forward(self, data_batch, is_train=None): 160 | assert self.binded and self.params_initialized 161 | 162 | # get current_shapes 163 | if self._curr_module.label_shapes is not None: 164 | current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes) 165 | else: 166 | current_shapes = dict(self._curr_module.data_shapes) 167 | 168 | # get input_shapes 169 | if data_batch.provide_label is not None: 170 | input_shapes = dict(data_batch.provide_data + data_batch.provide_label) 171 | else: 172 | input_shapes = dict(data_batch.provide_data) 173 | 174 | # decide if shape changed 175 | shape_changed = False 176 | for k, v in current_shapes.items(): 177 | if v != input_shapes[k]: 178 | shape_changed = True 179 | 180 | if shape_changed: 181 | module = Module(self._symbol, self._data_names, self._label_names, 182 | logger=self.logger, context=self._context, 183 | work_load_list=self._work_load_list, 184 | fixed_param_names=self._fixed_param_names) 185 | module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training, 186 | self._curr_module.inputs_need_grad, force_rebind=False, 187 | shared_module=self._curr_module) 188 | self._curr_module = module 189 | 190 | self._curr_module.forward(data_batch, is_train=is_train) 191 | 192 | def backward(self, out_grads=None): 193 | assert self.binded and self.params_initialized 194 | self._curr_module.backward(out_grads=out_grads) 195 | 196 | def update(self): 197 | assert self.binded and self.params_initialized and self.optimizer_initialized 198 | self._curr_module.update() 199 | 200 | def get_outputs(self, merge_multi_context=True): 201 | assert self.binded and self.params_initialized 202 | return self._curr_module.get_outputs(merge_multi_context=merge_multi_context) 203 | 204 | def get_input_grads(self, merge_multi_context=True): 205 | assert self.binded and self.params_initialized and self.inputs_need_grad 206 | return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context) 207 | 208 | def update_metric(self, eval_metric, labels): 209 | assert self.binded and self.params_initialized 210 | self._curr_module.update_metric(eval_metric, labels) 211 | 212 | def install_monitor(self, mon): 213 | """ Install monitor on all executors """ 214 | assert self.binded 215 | self._curr_module.install_monitor(mon) 216 | -------------------------------------------------------------------------------- /rcnn/tools/train_rcnn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pprint 3 | import mxnet as mx 4 | 5 | from ..logger import logger 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..core import callback, metric 9 | from ..core.loader import ROIIter 10 | from ..core.module import MutableModule 11 | from ..processing.bbox_regression import add_bbox_regression_targets 12 | from ..utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb 13 | from ..utils.load_model import load_param 14 | 15 | 16 | def train_rcnn(network, dataset, image_set, root_path, dataset_path, 17 | frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, 18 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 19 | train_shared, lr, lr_step, proposal): 20 | # set up config 21 | config.TRAIN.BATCH_IMAGES = 2 22 | config.TRAIN.BATCH_ROIS = 128 23 | if proposal == 'ss': 24 | config.TRAIN.BG_THRESH_LO = 0.1 # reproduce Fast R-CNN 25 | 26 | # load symbol 27 | sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES) 28 | 29 | # setup multi-gpu 30 | batch_size = len(ctx) 31 | input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size 32 | 33 | # print config 34 | logger.info(pprint.pformat(config)) 35 | 36 | # load dataset and prepare imdb for training 37 | image_sets = [iset for iset in image_set.split('+')] 38 | roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, 39 | proposal=proposal, append_gt=True, flip=not no_flip) 40 | for image_set in image_sets] 41 | roidb = merge_roidb(roidbs) 42 | roidb = filter_roidb(roidb) 43 | means, stds = add_bbox_regression_targets(roidb) 44 | 45 | # load training data 46 | train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 47 | ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING) 48 | 49 | # infer max shape 50 | max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 51 | logger.info('providing maximum shape %s' % max_data_shape) 52 | 53 | # infer shape 54 | data_shape_dict = dict(train_data.provide_data + train_data.provide_label) 55 | arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) 56 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 57 | out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) 58 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 59 | logger.info('output shape %s' % pprint.pformat(out_shape_dict)) 60 | 61 | # load and initialize params 62 | if resume: 63 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 64 | else: 65 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 66 | arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) 67 | arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) 68 | arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) 69 | arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) 70 | 71 | # check parameter shapes 72 | for k in sym.list_arguments(): 73 | if k in data_shape_dict: 74 | continue 75 | assert k in arg_params, k + ' not initialized' 76 | assert arg_params[k].shape == arg_shape_dict[k], \ 77 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 78 | for k in sym.list_auxiliary_states(): 79 | assert k in aux_params, k + ' not initialized' 80 | assert aux_params[k].shape == aux_shape_dict[k], \ 81 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 82 | 83 | # prepare training 84 | # create solver 85 | data_names = [k[0] for k in train_data.provide_data] 86 | label_names = [k[0] for k in train_data.provide_label] 87 | if train_shared: 88 | fixed_param_prefix = config.FIXED_PARAMS_SHARED 89 | else: 90 | fixed_param_prefix = config.FIXED_PARAMS 91 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 92 | logger=logger, context=ctx, work_load_list=work_load_list, 93 | max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix) 94 | 95 | # decide training params 96 | # metric 97 | eval_metric = metric.RCNNAccMetric() 98 | cls_metric = metric.RCNNLogLossMetric() 99 | bbox_metric = metric.RCNNL1LossMetric() 100 | eval_metrics = mx.metric.CompositeEvalMetric() 101 | for child_metric in [eval_metric, cls_metric, bbox_metric]: 102 | eval_metrics.add(child_metric) 103 | # callback 104 | batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent, auto_reset=False) 105 | epoch_end_callback = callback.do_checkpoint(prefix, means, stds) 106 | # decide learning rate 107 | base_lr = lr 108 | lr_factor = 0.1 109 | lr_epoch = [int(epoch) for epoch in lr_step.split(',')] 110 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 111 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 112 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 113 | logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) 114 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) 115 | # optimizer 116 | optimizer_params = {'momentum': 0.9, 117 | 'wd': 0.0005, 118 | 'learning_rate': lr, 119 | 'lr_scheduler': lr_scheduler, 120 | 'rescale_grad': (1.0 / batch_size), 121 | 'clip_gradient': 5} 122 | 123 | # train 124 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 125 | batch_end_callback=batch_end_callback, kvstore=kvstore, 126 | optimizer='sgd', optimizer_params=optimizer_params, 127 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 128 | 129 | 130 | def parse_args(): 131 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN Network') 132 | # general 133 | parser.add_argument('--network', help='network name', default=default.network, type=str) 134 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 135 | args, rest = parser.parse_known_args() 136 | generate_config(args.network, args.dataset) 137 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 138 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 139 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 140 | # training 141 | parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int) 142 | parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str) 143 | parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list) 144 | parser.add_argument('--no_flip', help='disable flip images', action='store_true') 145 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 146 | parser.add_argument('--resume', help='continue training', action='store_true') 147 | # rcnn 148 | parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str) 149 | parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str) 150 | parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int) 151 | parser.add_argument('--prefix', help='new model prefix', default=default.rcnn_prefix, type=str) 152 | parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int) 153 | parser.add_argument('--end_epoch', help='end epoch of training', default=default.rcnn_epoch, type=int) 154 | parser.add_argument('--lr', help='base learning rate', default=default.rcnn_lr, type=float) 155 | parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rcnn_lr_step, type=str) 156 | parser.add_argument('--train_shared', help='second round train shared params', action='store_true') 157 | parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str) 158 | args = parser.parse_args() 159 | return args 160 | 161 | 162 | def main(): 163 | args = parse_args() 164 | logger.info('Called with argument: %s' % args) 165 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] 166 | train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 167 | args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, 168 | ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, 169 | train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step, proposal=args.proposal) 170 | 171 | if __name__ == '__main__': 172 | main() 173 | -------------------------------------------------------------------------------- /docs/results.md: -------------------------------------------------------------------------------- 1 | ## Results 2 | | face-img | 5 nearest | 3 | |-----|------| 4 | | ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) | 00 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 16 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 16 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 17 ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg) | 5 | | ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg) | 00 ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg) 16 ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg) 19 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) | 6 | | ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) | 00 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 16 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 18 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) | 7 | | ![face-img](faces/bcf97241-96e4-4726-8287-fdab88217584.jpg) | 00 ![face-img](faces/bcf97241-96e4-4726-8287-fdab88217584.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 20 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 21 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 21 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) | 8 | | ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) | 00 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 15 ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) | 9 | | ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) | 00 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 16 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) | 10 | | ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg) | 00 ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg) 15 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) | 11 | | ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg) | 00 ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg) 18 ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg) 22 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 22 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 23 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) | 12 | | ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg) | 00 ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg) 16 ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) | 13 | | ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg) | 00 ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg) 17 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 20 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 20 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 21 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) | 14 | | ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) | 00 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 16 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) | 15 | | ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) | 00 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 12 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 16 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 16 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) | 16 | | ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) | 00 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) | 17 | | ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) | 00 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 16 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) | 18 | | ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg) | 00 ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg) 15 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 20 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 21 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 21 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) | 19 | | ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) | 00 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 18 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) | 20 | | ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) | 00 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 12 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 16 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 17 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) | 21 | | ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg) | 00 ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg) 18 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) | 22 | | ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) | 00 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 16 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 16 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 17 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 17 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) | 23 | | ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) | 00 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) 18 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 18 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) | 24 | | ![face-img](faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg) | 00 ![face-img](faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg) 20 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 21 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 21 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) 22 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) | 25 | | ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) | 00 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 15 ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg) 19 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 19 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 19 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) | 26 | | ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) | 00 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 12 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 16 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 17 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) | 27 | | ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) | 00 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 16 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) | 28 | | ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) | 00 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 16 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 16 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 17 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 17 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) | 29 | | ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) | 00 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 16 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 18 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) | 30 | | ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) | 00 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 12 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 16 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 16 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) | 31 | -------------------------------------------------------------------------------- /rcnn/tools/train_rpn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import pprint 4 | import mxnet as mx 5 | 6 | from ..config import config, default, generate_config 7 | from ..symbol import * 8 | from ..core import callback, metric 9 | from ..core.loader import AnchorLoaderFPN 10 | from ..core.module import MutableModule 11 | from ..utils.load_data import load_gt_roidb, merge_roidb, filter_roidb 12 | from ..utils.load_model import load_param 13 | 14 | 15 | def train_rpn(network, dataset, image_set, root_path, dataset_path, 16 | frequent, kvstore, work_load_list, no_flip, no_shuffle, resume, 17 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 18 | train_shared, lr, lr_step): 19 | # set up logger 20 | logging.basicConfig() 21 | logger = logging.getLogger() 22 | logger.setLevel(logging.INFO) 23 | 24 | # setup config 25 | assert config.TRAIN.BATCH_IMAGES==1 26 | 27 | # load symbol 28 | sym = eval('get_' + network + '_rpn')() 29 | feat_sym = [] 30 | for stride in config.RPN_FEAT_STRIDE: 31 | feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) 32 | 33 | 34 | # setup multi-gpu 35 | batch_size = len(ctx) 36 | input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size 37 | 38 | # print config 39 | pprint.pprint(config) 40 | 41 | # load dataset and prepare imdb for training 42 | image_sets = [iset for iset in image_set.split('+')] 43 | roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, 44 | flip=not no_flip) 45 | for image_set in image_sets] 46 | roidb = merge_roidb(roidbs) 47 | roidb = filter_roidb(roidb) 48 | 49 | # load training data 50 | #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 51 | # ctx=ctx, work_load_list=work_load_list, 52 | # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, 53 | # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, 54 | # allowed_border=9999) 55 | train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle, 56 | ctx=ctx, work_load_list=work_load_list) 57 | 58 | # infer max shape 59 | max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] 60 | max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) 61 | print 'providing maximum shape', max_data_shape, max_label_shape 62 | 63 | # infer shape 64 | data_shape_dict = dict(train_data.provide_data + train_data.provide_label) 65 | arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) 66 | arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) 67 | out_shape_dict = zip(sym.list_outputs(), out_shape) 68 | aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) 69 | print 'output shape' 70 | pprint.pprint(out_shape_dict) 71 | 72 | # load and initialize params 73 | if resume: 74 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 75 | else: 76 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 77 | init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) 78 | init_internal = mx.init.Normal(sigma=0.01) 79 | for k in sym.list_arguments(): 80 | if k in data_shape_dict: 81 | continue 82 | if k not in arg_params: 83 | print 'init', k 84 | arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k]) 85 | if not k.endswith('bias'): 86 | init_internal(k, arg_params[k]) 87 | 88 | for k in sym.list_auxiliary_states(): 89 | if k not in aux_params: 90 | print 'init', k 91 | aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k]) 92 | init(k, aux_params[k]) 93 | 94 | # check parameter shapes 95 | for k in sym.list_arguments(): 96 | if k in data_shape_dict: 97 | continue 98 | assert k in arg_params, k + ' not initialized' 99 | assert arg_params[k].shape == arg_shape_dict[k], \ 100 | 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) 101 | for k in sym.list_auxiliary_states(): 102 | assert k in aux_params, k + ' not initialized' 103 | assert aux_params[k].shape == aux_shape_dict[k], \ 104 | 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) 105 | 106 | # create solver 107 | data_names = [k[0] for k in train_data.provide_data] 108 | label_names = [k[0] for k in train_data.provide_label] 109 | if train_shared: 110 | fixed_param_prefix = config.FIXED_PARAMS_SHARED 111 | else: 112 | fixed_param_prefix = config.FIXED_PARAMS 113 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 114 | logger=logger, context=ctx, work_load_list=work_load_list, 115 | max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, 116 | fixed_param_prefix=fixed_param_prefix) 117 | 118 | # decide training params 119 | # metric 120 | eval_metric = metric.RPNAccMetric() 121 | cls_metric = metric.RPNLogLossMetric() 122 | bbox_metric = metric.RPNL1LossMetric() 123 | eval_metrics = mx.metric.CompositeEvalMetric() 124 | for child_metric in [eval_metric,cls_metric,bbox_metric]: 125 | eval_metrics.add(child_metric) 126 | # callback 127 | batch_end_callback = [] 128 | batch_end_callback.append(mx.callback.Speedometer(train_data.batch_size, frequent=frequent)) 129 | epoch_end_callback = mx.callback.do_checkpoint(prefix) 130 | # decide learning rate 131 | base_lr = lr 132 | lr_factor = 0.1 133 | lr_epoch = [int(epoch) for epoch in lr_step.split(',')] 134 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 135 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 136 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 137 | print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters 138 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) 139 | # optimizer 140 | optimizer_params = {'momentum': 0.9, 141 | 'wd': 0.0001, 142 | 'learning_rate': lr, 143 | 'lr_scheduler': lr_scheduler, 144 | 'rescale_grad': (1.0 / batch_size), 145 | 'clip_gradient': 5} 146 | 147 | # train 148 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 149 | batch_end_callback=batch_end_callback, kvstore=kvstore, 150 | optimizer='sgd', optimizer_params=optimizer_params, 151 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 152 | 153 | 154 | def parse_args(): 155 | parser = argparse.ArgumentParser(description='Train a Region Proposal Network') 156 | # general 157 | parser.add_argument('--network', help='network name', default=default.network, type=str) 158 | parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str) 159 | args, rest = parser.parse_known_args() 160 | generate_config(args.network, args.dataset) 161 | parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str) 162 | parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str) 163 | parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str) 164 | # training 165 | parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int) 166 | parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str) 167 | parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list) 168 | parser.add_argument('--no_flip', help='disable flip images', action='store_true') 169 | parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true') 170 | parser.add_argument('--resume', help='continue training', action='store_true') 171 | # rpn 172 | parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str) 173 | parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str) 174 | parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int) 175 | parser.add_argument('--prefix', help='new model prefix', default=default.rpn_prefix, type=str) 176 | parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int) 177 | parser.add_argument('--end_epoch', help='end epoch of training', default=default.rpn_epoch, type=int) 178 | parser.add_argument('--lr', help='base learning rate', default=default.rpn_lr, type=float) 179 | parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rpn_lr_step, type=str) 180 | parser.add_argument('--train_shared', help='second round train shared params', action='store_true') 181 | args = parser.parse_args() 182 | return args 183 | 184 | 185 | def main(): 186 | args = parse_args() 187 | print 'Called with argument:', args 188 | ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] 189 | train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path, 190 | args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume, 191 | ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch, 192 | train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step) 193 | 194 | if __name__ == '__main__': 195 | main() 196 | -------------------------------------------------------------------------------- /rcnn/sample_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from easydict import EasyDict as edict 3 | 4 | config = edict() 5 | 6 | # network related params 7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68]) 8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 9 | config.PIXEL_SCALE = 1.0 10 | config.IMAGE_STRIDE = 0 11 | 12 | # dataset related params 13 | config.NUM_CLASSES = 2 14 | config.PRE_SCALES = [(1200, 1600)] # first is scale (the shorter side); second is max size 15 | config.SCALES = [(640, 640)] # first is scale (the shorter side); second is max size 16 | #config.SCALES = [(800, 800)] # first is scale (the shorter side); second is max size 17 | config.ORIGIN_SCALE = False 18 | 19 | _ratio = (1.,) 20 | 21 | RAC_SSH = { 22 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 23 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 24 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 25 | } 26 | 27 | _ratio = (1.,1.5) 28 | RAC_SSH2 = { 29 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 30 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 31 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 32 | } 33 | 34 | _ratio = (1.,1.5) 35 | RAC_SSH3 = { 36 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 37 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 38 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 39 | '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 40 | } 41 | 42 | RAC_RETINA = {} 43 | _ratios = (1.0,) 44 | _ass = 2.0**(1.0/3) 45 | _basescale = 1.0 46 | for _stride in [4, 8, 16, 32, 64]: 47 | key = str(_stride) 48 | value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999} 49 | scales = [] 50 | for _ in range(3): 51 | scales.append(_basescale) 52 | _basescale *= _ass 53 | value['SCALES'] = tuple(scales) 54 | RAC_RETINA[key] = value 55 | 56 | 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default 58 | 59 | config.NET_MODE = 2 60 | config.HEAD_MODULE = 'SSH' 61 | #config.HEAD_MODULE = 'RF' 62 | config.LR_MODE = 0 63 | config.LANDMARK_LR_MULT = 2.0 64 | config.HEAD_FILTER_NUM = 256 65 | config.CONTEXT_FILTER_RATIO = 1 66 | config.max_feat_channel = 9999 67 | 68 | config.USE_CROP = True 69 | config.USE_FPN = True 70 | config.USE_DCN = 0 71 | config.FACE_LANDMARK = True 72 | config.USE_OCCLUSION = False 73 | config.USE_BLUR = False 74 | config.MORE_SMALL_BOX = True 75 | 76 | config.LAYER_FIX = False 77 | 78 | config.CASCADE = 0 79 | config.CASCADE_MODE = 1 80 | #config.CASCADE_CLS_STRIDES = [16,8,4] 81 | #config.CASCADE_BBOX_STRIDES = [64,32] 82 | config.CASCADE_CLS_STRIDES = [64,32,16,8,4] 83 | config.CASCADE_BBOX_STRIDES = [64,32,16,8,4] 84 | #config.CASCADE_BBOX_STRIDES = [64,32,16,8] 85 | 86 | config.HEAD_BOX = False 87 | config.DENSE_ANCHOR = False 88 | config.USE_MAXOUT = 0 89 | config.SHARE_WEIGHT_BBOX = False 90 | config.SHARE_WEIGHT_LANDMARK = False 91 | 92 | config.RANDOM_FEAT_STRIDE = False 93 | config.NUM_CPU = 4 94 | config.MIXUP = 0.0 95 | config.USE_3D = False 96 | 97 | #config.BBOX_MASK_THRESH = 0 98 | config.COLOR_MODE = 2 99 | config.COLOR_JITTERING = 0.125 100 | #config.COLOR_JITTERING = 0 101 | #config.COLOR_JITTERING = 0.2 102 | 103 | 104 | config.TRAIN = edict() 105 | 106 | config.TRAIN.IMAGE_ALIGN = 0 107 | config.TRAIN.MIN_BOX_SIZE = 0 108 | config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE 109 | # R-CNN and RPN 110 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 111 | config.TRAIN.BATCH_IMAGES = 8 112 | # e2e changes behavior of anchor loader and metric 113 | config.TRAIN.END2END = True 114 | # group images with similar aspect ratio 115 | config.TRAIN.ASPECT_GROUPING = False 116 | 117 | # RPN anchor loader 118 | # rpn anchors batch size 119 | config.TRAIN.RPN_ENABLE_OHEM = 2 120 | config.TRAIN.RPN_BATCH_SIZE = 256 121 | # rpn anchors sampling params 122 | config.TRAIN.RPN_FG_FRACTION = 0.25 123 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5 124 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 125 | if config.CASCADE>0: 126 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 127 | config.TRAIN.CASCADE_OVERLAP = [0.4, 0.5] 128 | config.TRAIN.RPN_CLOBBER_POSITIVES = False 129 | config.TRAIN.RPN_FORCE_POSITIVE = False 130 | # rpn bounding box regression params 131 | config.TRAIN.BBOX_STDS = (1.0, 1.0, 1.0, 1.0) 132 | config.TRAIN.LANDMARK_STD = 1.0 133 | 134 | 135 | config.TEST = edict() 136 | 137 | # R-CNN testing 138 | # use rpn to generate proposal 139 | config.TEST.HAS_RPN = False 140 | # size of images for each device 141 | config.TEST.BATCH_IMAGES = 1 142 | 143 | # RPN proposal 144 | config.TEST.CXX_PROPOSAL = True 145 | config.TEST.RPN_NMS_THRESH = 0.3 146 | config.TEST.RPN_PRE_NMS_TOP_N = 1000 147 | config.TEST.RPN_POST_NMS_TOP_N = 3000 148 | #config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE 149 | #config.TEST.RPN_MIN_SIZE = [0,0,0] 150 | 151 | # RCNN nms 152 | config.TEST.NMS = 0.3 153 | 154 | config.TEST.SCORE_THRESH = 0.05 155 | config.TEST.IOU_THRESH = 0.5 156 | 157 | 158 | # network settings 159 | network = edict() 160 | 161 | network.ssh = edict() 162 | 163 | network.mnet = edict() 164 | #network.mnet.pretrained = 'model/mnasnet' 165 | #network.mnet.pretrained = 'model/mobilenetv2_0_5' 166 | #network.mnet.pretrained = 'model/mobilenet_0_5' 167 | #network.mnet.MULTIPLIER = 0.5 168 | #network.mnet.pretrained = 'model/mobilenet_0_25' 169 | #network.mnet.pretrained_epoch = 0 170 | #network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485]) 171 | #network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229]) 172 | #network.mnet.PIXEL_SCALE = 255.0 173 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 174 | network.mnet.BATCH_IMAGES = 16 175 | network.mnet.HEAD_FILTER_NUM = 64 176 | network.mnet.CONTEXT_FILTER_RATIO = 1 177 | 178 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 179 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 180 | network.mnet.PIXEL_SCALE = 1.0 181 | #network.mnet.pretrained = 'model/mobilenetfd_0_25' #78 182 | #network.mnet.pretrained = 'model/mobilenetfd2' #75 183 | network.mnet.pretrained = 'model/mobilenet025fd0' #78 184 | #network.mnet.pretrained = 'model/mobilenet025fd1' #75 185 | #network.mnet.pretrained = 'model/mobilenet025fd2' # 186 | network.mnet.pretrained_epoch = 0 187 | network.mnet.max_feat_channel = 8888 188 | network.mnet.COLOR_MODE = 1 189 | network.mnet.USE_CROP = True 190 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH 191 | network.mnet.LAYER_FIX = True 192 | network.mnet.LANDMARK_LR_MULT = 2.5 193 | 194 | 195 | network.resnet = edict() 196 | #network.resnet.pretrained = 'model/ResNet50_v1d' 197 | #network.resnet.pretrained = 'model/resnet-50' 198 | network.resnet.pretrained = 'model/resnet-152' 199 | #network.resnet.pretrained = 'model/senet154' 200 | #network.resnet.pretrained = 'model/densenet161' 201 | network.resnet.pretrained_epoch = 0 202 | #network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68]) 203 | #network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393]) 204 | #network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485]) 205 | #network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229]) 206 | #network.resnet.PIXEL_SCALE = 255.0 207 | network.resnet.lr_step = '1,2,3,4,5,55,68,80' 208 | network.resnet.lr = 0.001 209 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0]) 210 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0]) 211 | network.resnet.PIXEL_SCALE = 1.0 212 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling'] 213 | network.resnet.BATCH_IMAGES = 8 214 | network.resnet.HEAD_FILTER_NUM = 256 215 | network.resnet.CONTEXT_FILTER_RATIO = 1 216 | network.resnet.USE_DCN = 2 217 | network.resnet.RPN_BATCH_SIZE = 256 218 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA 219 | 220 | network.resnet.USE_DCN = 0 221 | network.resnet.pretrained = 'model/resnet-50' 222 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH 223 | 224 | 225 | # dataset settings 226 | dataset = edict() 227 | 228 | dataset.widerface = edict() 229 | dataset.widerface.dataset = 'widerface' 230 | dataset.widerface.image_set = 'train' 231 | dataset.widerface.test_image_set = 'val' 232 | dataset.widerface.root_path = 'data' 233 | dataset.widerface.dataset_path = 'data/widerface' 234 | dataset.widerface.NUM_CLASSES = 2 235 | 236 | dataset.retinaface = edict() 237 | dataset.retinaface.dataset = 'retinaface' 238 | dataset.retinaface.image_set = 'train' 239 | dataset.retinaface.test_image_set = 'val' 240 | dataset.retinaface.root_path = 'data' 241 | dataset.retinaface.dataset_path = 'data/retinaface' 242 | dataset.retinaface.NUM_CLASSES = 2 243 | 244 | # default settings 245 | default = edict() 246 | 247 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling'] 248 | #config.FIXED_PARAMS = ['^.*upsampling'] 249 | #config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3'] 250 | #config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta'] #for resnet 251 | 252 | # default network 253 | default.network = 'resnet' 254 | default.pretrained = 'model/resnet-152' 255 | #default.network = 'resnetssh' 256 | default.pretrained_epoch = 0 257 | # default dataset 258 | default.dataset = 'retinaface' 259 | default.image_set = 'train' 260 | default.test_image_set = 'val' 261 | default.root_path = 'data' 262 | default.dataset_path = 'data/retinaface' 263 | # default training 264 | default.frequent = 20 265 | default.kvstore = 'device' 266 | # default e2e 267 | default.prefix = 'model/retinaface' 268 | default.end_epoch = 10000 269 | default.lr_step = '55,68,80' 270 | default.lr = 0.01 271 | 272 | def generate_config(_network, _dataset): 273 | for k, v in network[_network].items(): 274 | if k in config: 275 | config[k] = v 276 | elif k in default: 277 | default[k] = v 278 | if k in config.TRAIN: 279 | config.TRAIN[k] = v 280 | for k, v in dataset[_dataset].items(): 281 | if k in config: 282 | config[k] = v 283 | elif k in default: 284 | default[k] = v 285 | if k in config.TRAIN: 286 | config.TRAIN[k] = v 287 | config.network = _network 288 | config.dataset = _dataset 289 | config.RPN_FEAT_STRIDE = [] 290 | num_anchors = [] 291 | for k in config.RPN_ANCHOR_CFG: 292 | config.RPN_FEAT_STRIDE.append( int(k) ) 293 | _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS']) 294 | if config.DENSE_ANCHOR: 295 | _num_anchors *= 2 296 | config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors 297 | num_anchors.append(_num_anchors) 298 | config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True) 299 | for j in range(1,len(num_anchors)): 300 | assert num_anchors[0]==num_anchors[j] 301 | config.NUM_ANCHORS = num_anchors[0] 302 | 303 | -------------------------------------------------------------------------------- /rcnn/processing/bbox_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file has functions about generating bounding box regression targets 3 | """ 4 | 5 | from ..pycocotools.mask import encode 6 | import numpy as np 7 | 8 | from ..logger import logger 9 | from .bbox_transform import bbox_overlaps, bbox_transform 10 | from rcnn.config import config 11 | import math 12 | import cv2 13 | import PIL.Image as Image 14 | import threading 15 | import Queue 16 | 17 | 18 | def compute_bbox_regression_targets(rois, overlaps, labels): 19 | """ 20 | given rois, overlaps, gt labels, compute bounding box regression targets 21 | :param rois: roidb[i]['boxes'] k * 4 22 | :param overlaps: roidb[i]['max_overlaps'] k * 1 23 | :param labels: roidb[i]['max_classes'] k * 1 24 | :return: targets[i][class, dx, dy, dw, dh] k * 5 25 | """ 26 | # Ensure ROIs are floats 27 | rois = rois.astype(np.float, copy=False) 28 | 29 | # Sanity check 30 | if len(rois) != len(overlaps): 31 | logger.warning('bbox regression: len(rois) != len(overlaps)') 32 | 33 | # Indices of ground-truth ROIs 34 | gt_inds = np.where(overlaps == 1)[0] 35 | if len(gt_inds) == 0: 36 | logger.warning('bbox regression: len(gt_inds) == 0') 37 | 38 | # Indices of examples for which we try to make predictions 39 | ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] 40 | 41 | # Get IoU overlap between each ex ROI and gt ROI 42 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 43 | 44 | # Find which gt ROI each ex ROI has max overlap with: 45 | # this will be the ex ROI's gt target 46 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 47 | gt_rois = rois[gt_inds[gt_assignment], :] 48 | ex_rois = rois[ex_inds, :] 49 | 50 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 51 | targets[ex_inds, 0] = labels[ex_inds] 52 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 53 | return targets 54 | 55 | 56 | def add_bbox_regression_targets(roidb): 57 | """ 58 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 59 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 60 | :return: means, std variances of targets 61 | """ 62 | logger.info('bbox regression: add bounding box regression targets') 63 | assert len(roidb) > 0 64 | assert 'max_classes' in roidb[0] 65 | 66 | num_images = len(roidb) 67 | num_classes = roidb[0]['gt_overlaps'].shape[1] 68 | for im_i in range(num_images): 69 | rois = roidb[im_i]['boxes'] 70 | max_overlaps = roidb[im_i]['max_overlaps'] 71 | max_classes = roidb[im_i]['max_classes'] 72 | roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) 73 | 74 | if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 75 | # use fixed / precomputed means and stds instead of empirical values 76 | means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) 77 | stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) 78 | else: 79 | # compute mean, std values 80 | class_counts = np.zeros((num_classes, 1)) + 1e-14 81 | sums = np.zeros((num_classes, 4)) 82 | squared_sums = np.zeros((num_classes, 4)) 83 | for im_i in range(num_images): 84 | targets = roidb[im_i]['bbox_targets'] 85 | for cls in range(1, num_classes): 86 | cls_indexes = np.where(targets[:, 0] == cls)[0] 87 | if cls_indexes.size > 0: 88 | class_counts[cls] += cls_indexes.size 89 | sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) 90 | squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) 91 | 92 | means = sums / class_counts 93 | # var(x) = E(x^2) - E(x)^2 94 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 95 | 96 | # normalized targets 97 | for im_i in range(num_images): 98 | targets = roidb[im_i]['bbox_targets'] 99 | for cls in range(1, num_classes): 100 | cls_indexes = np.where(targets[:, 0] == cls)[0] 101 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] 102 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] 103 | 104 | return means.ravel(), stds.ravel() 105 | 106 | 107 | def expand_bbox_regression_targets(bbox_targets_data, num_classes): 108 | """ 109 | expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets 110 | :param bbox_targets_data: [k * 5] 111 | :param num_classes: number of classes 112 | :return: bbox target processed [k * 4 num_classes] 113 | bbox_weights ! only foreground boxes have bbox regression computation! 114 | """ 115 | classes = bbox_targets_data[:, 0] 116 | bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) 117 | bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 118 | indexes = np.where(classes > 0)[0] 119 | for index in indexes: 120 | cls = classes[index] 121 | start = int(4 * cls) 122 | end = start + 4 123 | bbox_targets[index, start:end] = bbox_targets_data[index, 1:] 124 | bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS 125 | return bbox_targets, bbox_weights 126 | 127 | 128 | def compute_mask_and_label(ex_rois, ex_labels, seg, flipped): 129 | # assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt) 130 | # im = Image.open(seg_gt) 131 | # pixel = list(im.getdata()) 132 | # pixel = np.array(pixel).reshape([im.size[1], im.size[0]]) 133 | im = Image.open(seg) 134 | pixel = list(im.getdata()) 135 | ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]]) 136 | if flipped: 137 | ins_seg = ins_seg[:, ::-1] 138 | rois = ex_rois 139 | n_rois = ex_rois.shape[0] 140 | label = ex_labels 141 | class_id = config.CLASS_ID 142 | mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8) 143 | mask_label = np.zeros((n_rois), dtype=np.int8) 144 | for n in range(n_rois): 145 | target = ins_seg[int(rois[n, 1]): int(rois[n, 3]), int(rois[n, 0]): int(rois[n, 2])] 146 | ids = np.unique(target) 147 | ins_id = 0 148 | max_count = 0 149 | for id in ids: 150 | if math.floor(id / 1000) == class_id[int(label[int(n)])]: 151 | px = np.where(ins_seg == int(id)) 152 | x_min = np.min(px[1]) 153 | y_min = np.min(px[0]) 154 | x_max = np.max(px[1]) 155 | y_max = np.max(px[0]) 156 | x1 = max(rois[n, 0], x_min) 157 | y1 = max(rois[n, 1], y_min) 158 | x2 = min(rois[n, 2], x_max) 159 | y2 = min(rois[n, 3], y_max) 160 | iou = (x2 - x1) * (y2 - y1) 161 | iou = iou / ((rois[n, 2] - rois[n, 0]) * (rois[n, 3] - rois[n, 1]) 162 | + (x_max - x_min) * (y_max - y_min) - iou) 163 | if iou > max_count: 164 | ins_id = id 165 | max_count = iou 166 | 167 | if max_count == 0: 168 | continue 169 | # print max_count 170 | mask = np.zeros(target.shape) 171 | idx = np.where(target == ins_id) 172 | mask[idx] = 1 173 | mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST) 174 | 175 | mask_target[n] = mask 176 | mask_label[n] = label[int(n)] 177 | return mask_target, mask_label 178 | 179 | 180 | def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped): 181 | """ 182 | given rois, overlaps, gt labels, seg, compute bounding box mask targets 183 | :param rois: roidb[i]['boxes'] k * 4 184 | :param overlaps: roidb[i]['max_overlaps'] k * 1 185 | :param labels: roidb[i]['max_classes'] k * 1 186 | :return: targets[i][class, dx, dy, dw, dh] k * 5 187 | """ 188 | # Ensure ROIs are floats 189 | rois = rois.astype(np.float, copy=False) 190 | 191 | # Sanity check 192 | if len(rois) != len(overlaps): 193 | print 'bbox regression: this should not happen' 194 | 195 | # Indices of ground-truth ROIs 196 | gt_inds = np.where(overlaps == 1)[0] 197 | if len(gt_inds) == 0: 198 | print 'something wrong : zero ground truth rois' 199 | # Indices of examples for which we try to make predictions 200 | ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] 201 | 202 | # Get IoU overlap between each ex ROI and gt ROI 203 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 204 | 205 | 206 | # Find which gt ROI each ex ROI has max overlap with: 207 | # this will be the ex ROI's gt target 208 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 209 | gt_rois = rois[gt_inds[gt_assignment], :] 210 | ex_rois = rois[ex_inds, :] 211 | 212 | mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds], seg, flipped) 213 | return mask_targets, mask_label, ex_inds 214 | 215 | def add_mask_targets(roidb): 216 | """ 217 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 218 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 219 | :return: means, std variances of targets 220 | """ 221 | print 'add bounding box mask targets' 222 | assert len(roidb) > 0 223 | assert 'max_classes' in roidb[0] 224 | 225 | num_images = len(roidb) 226 | 227 | # Multi threads processing 228 | im_quene = Queue.Queue(maxsize=0) 229 | for im_i in range(num_images): 230 | im_quene.put(im_i) 231 | 232 | def process(): 233 | while not im_quene.empty(): 234 | im_i = im_quene.get() 235 | print "-----process img {}".format(im_i) 236 | rois = roidb[im_i]['boxes'] 237 | max_overlaps = roidb[im_i]['max_overlaps'] 238 | max_classes = roidb[im_i]['max_classes'] 239 | ins_seg = roidb[im_i]['ins_seg'] 240 | flipped = roidb[im_i]['flipped'] 241 | roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ 242 | compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped) 243 | threads = [threading.Thread(target=process, args=()) for i in range(10)] 244 | for t in threads: t.start() 245 | for t in threads: t.join() 246 | # Single thread 247 | # for im_i in range(num_images): 248 | # print "-----processing img {}".format(im_i) 249 | # rois = roidb[im_i]['boxes'] 250 | # max_overlaps = roidb[im_i]['max_overlaps'] 251 | # max_classes = roidb[im_i]['max_classes'] 252 | # ins_seg = roidb[im_i]['ins_seg'] 253 | # # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg) 254 | # roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ 255 | # compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg) 256 | -------------------------------------------------------------------------------- /rcnn/pycocotools/_mask.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c 2 | # distutils: sources = maskApi.c 3 | 4 | #************************************************************************** 5 | # Microsoft COCO Toolbox. version 2.0 6 | # Data, paper, and tutorials available at: http://mscoco.org/ 7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 8 | # Licensed under the Simplified BSD License [see coco/license.txt] 9 | #************************************************************************** 10 | 11 | __author__ = 'tsungyi' 12 | 13 | import sys 14 | PYTHON_VERSION = sys.version_info[0] 15 | 16 | # import both Python-level and C-level symbols of Numpy 17 | # the API uses Numpy to interface C and Python 18 | import numpy as np 19 | cimport numpy as np 20 | from libc.stdlib cimport malloc, free 21 | 22 | # intialized Numpy. must do. 23 | np.import_array() 24 | 25 | # import numpy C function 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 27 | cdef extern from "numpy/arrayobject.h": 28 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 29 | 30 | # Declare the prototype of the C functions in MaskApi.h 31 | cdef extern from "maskApi.h": 32 | ctypedef unsigned int uint 33 | ctypedef unsigned long siz 34 | ctypedef unsigned char byte 35 | ctypedef double* BB 36 | ctypedef struct RLE: 37 | siz h, 38 | siz w, 39 | siz m, 40 | uint* cnts, 41 | void rlesInit( RLE **R, siz n ) 42 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 43 | void rleDecode( const RLE *R, byte *mask, siz n ) 44 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 45 | void rleArea( const RLE *R, siz n, uint *a ) 46 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 47 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 48 | void rleToBbox( const RLE *R, BB bb, siz n ) 49 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 50 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 51 | char* rleToString( const RLE *R ) 52 | void rleFrString( RLE *R, char *s, siz h, siz w ) 53 | 54 | # python class to wrap RLE array in C 55 | # the class handles the memory allocation and deallocation 56 | cdef class RLEs: 57 | cdef RLE *_R 58 | cdef siz _n 59 | 60 | def __cinit__(self, siz n =0): 61 | rlesInit(&self._R, n) 62 | self._n = n 63 | 64 | # free the RLE array here 65 | def __dealloc__(self): 66 | if self._R is not NULL: 67 | for i in range(self._n): 68 | free(self._R[i].cnts) 69 | free(self._R) 70 | def __getattr__(self, key): 71 | if key == 'n': 72 | return self._n 73 | raise AttributeError(key) 74 | 75 | # python class to wrap Mask array in C 76 | # the class handles the memory allocation and deallocation 77 | cdef class Masks: 78 | cdef byte *_mask 79 | cdef siz _h 80 | cdef siz _w 81 | cdef siz _n 82 | 83 | def __cinit__(self, h, w, n): 84 | self._mask = malloc(h*w*n* sizeof(byte)) 85 | self._h = h 86 | self._w = w 87 | self._n = n 88 | # def __dealloc__(self): 89 | # the memory management of _mask has been passed to np.ndarray 90 | # it doesn't need to be freed here 91 | 92 | # called when passing into np.array() and return an np.ndarray in column-major order 93 | def __array__(self): 94 | cdef np.npy_intp shape[1] 95 | shape[0] = self._h*self._w*self._n 96 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 97 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 98 | # The _mask allocated by Masks is now handled by ndarray 99 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 100 | return ndarray 101 | 102 | # internal conversion from Python RLEs object to compressed RLE format 103 | def _toString(RLEs Rs): 104 | cdef siz n = Rs.n 105 | cdef bytes py_string 106 | cdef char* c_string 107 | objs = [] 108 | for i in range(n): 109 | c_string = rleToString( &Rs._R[i] ) 110 | py_string = c_string 111 | objs.append({ 112 | 'size': [Rs._R[i].h, Rs._R[i].w], 113 | 'counts': py_string 114 | }) 115 | free(c_string) 116 | return objs 117 | 118 | # internal conversion from compressed RLE format to Python RLEs object 119 | def _frString(rleObjs): 120 | cdef siz n = len(rleObjs) 121 | Rs = RLEs(n) 122 | cdef bytes py_string 123 | cdef char* c_string 124 | for i, obj in enumerate(rleObjs): 125 | if PYTHON_VERSION == 2: 126 | py_string = str(obj['counts']).encode('utf8') 127 | elif PYTHON_VERSION == 3: 128 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 129 | else: 130 | raise Exception('Python version must be 2 or 3') 131 | c_string = py_string 132 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 133 | return Rs 134 | 135 | # encode mask to RLEs objects 136 | # list of RLE string can be generated by RLEs member function 137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 138 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 139 | cdef RLEs Rs = RLEs(n) 140 | rleEncode(Rs._R,mask.data,h,w,n) 141 | objs = _toString(Rs) 142 | return objs 143 | 144 | # decode mask from compressed list of RLE string or RLEs object 145 | def decode(rleObjs): 146 | cdef RLEs Rs = _frString(rleObjs) 147 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 148 | masks = Masks(h, w, n) 149 | rleDecode(Rs._R, masks._mask, n); 150 | return np.array(masks) 151 | 152 | def merge(rleObjs, intersect=0): 153 | cdef RLEs Rs = _frString(rleObjs) 154 | cdef RLEs R = RLEs(1) 155 | rleMerge(Rs._R, R._R, Rs._n, intersect) 156 | obj = _toString(R)[0] 157 | return obj 158 | 159 | def area(rleObjs): 160 | cdef RLEs Rs = _frString(rleObjs) 161 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 162 | rleArea(Rs._R, Rs._n, _a) 163 | cdef np.npy_intp shape[1] 164 | shape[0] = Rs._n 165 | a = np.array((Rs._n, ), dtype=np.uint8) 166 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 167 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 168 | return a 169 | 170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 171 | def iou( dt, gt, pyiscrowd ): 172 | def _preproc(objs): 173 | if len(objs) == 0: 174 | return objs 175 | if type(objs) == np.ndarray: 176 | if len(objs.shape) == 1: 177 | objs = objs.reshape((objs[0], 1)) 178 | # check if it's Nx4 bbox 179 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 180 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 181 | objs = objs.astype(np.double) 182 | elif type(objs) == list: 183 | # check if list is in box format and convert it to np.ndarray 184 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 185 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 186 | if isbox: 187 | objs = np.array(objs, dtype=np.double) 188 | if len(objs.shape) == 1: 189 | objs = objs.reshape((1,objs.shape[0])) 190 | elif isrle: 191 | objs = _frString(objs) 192 | else: 193 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 194 | else: 195 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 196 | return objs 197 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 198 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 199 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 200 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 201 | def _len(obj): 202 | cdef siz N = 0 203 | if type(obj) == RLEs: 204 | N = obj.n 205 | elif len(obj)==0: 206 | pass 207 | elif type(obj) == np.ndarray: 208 | N = obj.shape[0] 209 | return N 210 | # convert iscrowd to numpy array 211 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 212 | # simple type checking 213 | cdef siz m, n 214 | dt = _preproc(dt) 215 | gt = _preproc(gt) 216 | m = _len(dt) 217 | n = _len(gt) 218 | if m == 0 or n == 0: 219 | return [] 220 | if not type(dt) == type(gt): 221 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 222 | 223 | # define local variables 224 | cdef double* _iou = 0 225 | cdef np.npy_intp shape[1] 226 | # check type and assign iou function 227 | if type(dt) == RLEs: 228 | _iouFun = _rleIou 229 | elif type(dt) == np.ndarray: 230 | _iouFun = _bbIou 231 | else: 232 | raise Exception('input data type not allowed.') 233 | _iou = malloc(m*n* sizeof(double)) 234 | iou = np.zeros((m*n, ), dtype=np.double) 235 | shape[0] = m*n 236 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 237 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 238 | _iouFun(dt, gt, iscrowd, m, n, iou) 239 | return iou.reshape((m,n), order='F') 240 | 241 | def toBbox( rleObjs ): 242 | cdef RLEs Rs = _frString(rleObjs) 243 | cdef siz n = Rs.n 244 | cdef BB _bb = malloc(4*n* sizeof(double)) 245 | rleToBbox( Rs._R, _bb, n ) 246 | cdef np.npy_intp shape[1] 247 | shape[0] = 4*n 248 | bb = np.array((1,4*n), dtype=np.double) 249 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 250 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 251 | return bb 252 | 253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 254 | cdef siz n = bb.shape[0] 255 | Rs = RLEs(n) 256 | rleFrBbox( Rs._R, bb.data, h, w, n ) 257 | objs = _toString(Rs) 258 | return objs 259 | 260 | def frPoly( poly, siz h, siz w ): 261 | cdef np.ndarray[np.double_t, ndim=1] np_poly 262 | n = len(poly) 263 | Rs = RLEs(n) 264 | for i, p in enumerate(poly): 265 | np_poly = np.array(p, dtype=np.double, order='F') 266 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 267 | objs = _toString(Rs) 268 | return objs 269 | 270 | def frUncompressedRLE(ucRles, siz h, siz w): 271 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 272 | cdef RLE R 273 | cdef uint *data 274 | n = len(ucRles) 275 | objs = [] 276 | for i in range(n): 277 | Rs = RLEs(1) 278 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 279 | # time for malloc can be saved here but it's fine 280 | data = malloc(len(cnts)* sizeof(uint)) 281 | for j in range(len(cnts)): 282 | data[j] = cnts[j] 283 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 284 | Rs._R[0] = R 285 | objs.append(_toString(Rs)[0]) 286 | return objs 287 | 288 | def frPyObjects(pyobj, h, w): 289 | # encode rle from a list of python objects 290 | if type(pyobj) == np.ndarray: 291 | objs = frBbox(pyobj, h, w) 292 | elif type(pyobj) == list and len(pyobj[0]) == 4: 293 | objs = frBbox(pyobj, h, w) 294 | elif type(pyobj) == list and len(pyobj[0]) > 4: 295 | objs = frPoly(pyobj, h, w) 296 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 297 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 298 | objs = frUncompressedRLE(pyobj, h, w) 299 | # encode rle from single python object 300 | elif type(pyobj) == list and len(pyobj) == 4: 301 | objs = frBbox([pyobj], h, w)[0] 302 | elif type(pyobj) == list and len(pyobj) > 4: 303 | objs = frPoly([pyobj], h, w)[0] 304 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 305 | objs = frUncompressedRLE([pyobj], h, w)[0] 306 | else: 307 | raise Exception('input type is not supported.') 308 | return objs 309 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), 267 | version(s), and exceptions or additional permissions here}." 268 | 269 | Simply including a copy of this Agreement, including this Exhibit A 270 | is not sufficient to license the Source Code under Secondary Licenses. 271 | 272 | If it is not possible or desirable to put the notice in a particular 273 | file, then You may include the notice in a location (such as a LICENSE 274 | file in a relevant directory) where a recipient would be likely to 275 | look for such a notice. 276 | 277 | You may add additional accurate notices of copyright ownership. 278 | --------------------------------------------------------------------------------