├── rcnn
    ├── __init__.py
    ├── PY_OP
    │   ├── __init__.py
    │   └── rpn_fpn_ohem3.py
    ├── core
    │   ├── __init__.py
    │   ├── callback.py
    │   ├── metric.py
    │   ├── module.py
    │   └── module_bak.py
    ├── cython
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── anchors.pyx
    │   ├── bbox.pyx
    │   ├── cpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── setup.py
    ├── io
    │   └── __init__.py
    ├── tools
    │   ├── __init__.py
    │   ├── reeval.py
    │   ├── test_rpn.py
    │   ├── test_rcnn.py
    │   ├── train_rcnn.py
    │   └── train_rpn.py
    ├── utils
    │   ├── __init__.py
    │   ├── combine_model.py
    │   ├── save_model.py
    │   ├── load_data.py
    │   └── load_model.py
    ├── processing
    │   ├── __init__.py
    │   ├── assign_levels.py
    │   ├── nms.py
    │   ├── generate_anchor.py
    │   ├── bbox_transform.py
    │   └── bbox_regression.py
    ├── pycocotools
    │   ├── __init__.py
    │   ├── UPSTREAM_REV
    │   ├── setup.py
    │   ├── maskApi.h
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── _mask.pyx
    ├── dataset
    │   ├── __init__.py
    │   ├── ds_utils.py
    │   └── retinaface.py
    ├── symbol
    │   └── __init__.py
    ├── logger.py
    └── sample_config.py
├── testimg.jpeg
├── detector_test.jpg
├── dataset
    ├── group-pic.JPG
    ├── jaredleto.jpeg
    ├── Angelina-Jolie.jpg
    ├── bradleycooper.jpeg
    ├── channingtatum.jpeg
    ├── juliaroberts.jpeg
    ├── kevinspacey.jpeg
    ├── lupitanyongo.jpg
    ├── merylstreep.jpeg
    ├── jenniferlawrence.jpeg
    ├── 220px-Ellen_DeGeneres_2011.jpg
    ├── 220px-Brad_Pitt_2019_by_Glenn_Francis.jpg
    └── Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg
├── .gitmodules
├── docs
    ├── faces
    │   ├── 079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg
    │   ├── 0e78e2f1-f597-4d65-ac74-f490084f79de.jpg
    │   ├── 4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg
    │   ├── 50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg
    │   ├── 5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg
    │   ├── 574d60a5-0c06-48a4-bf73-914002c82c2a.jpg
    │   ├── 5e652366-c03f-47a5-929a-9311f65db9ce.jpg
    │   ├── 6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg
    │   ├── 63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg
    │   ├── 71a92092-b36f-41da-9e7f-85e5c075d65e.jpg
    │   ├── 73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg
    │   ├── 7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg
    │   ├── 830652cd-29a9-4237-a736-e161e891f4ce.jpg
    │   ├── 84e22415-6352-4442-95f1-d20319786b6b.jpg
    │   ├── 94cb0154-085e-4afc-9c6c-48073455f1ab.jpg
    │   ├── 9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg
    │   ├── 9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg
    │   ├── a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg
    │   ├── b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg
    │   ├── ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg
    │   ├── bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg
    │   ├── bcf97241-96e4-4726-8287-fdab88217584.jpg
    │   ├── c966eec9-6458-4a4e-8967-6bab69dd9965.jpg
    │   ├── d445a6d4-e28a-4903-8f37-037a357eae60.jpg
    │   ├── dbbd360f-4fea-4911-9233-063eda812445.jpg
    │   ├── e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg
    │   └── fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg
    └── results.md
├── scripts
    ├── build-conda-docker
    ├── get-data
    ├── conda-repl
    └── run-conda-docker
├── .gitignore
├── project.clj
├── dockerfiles
    └── CondaDockerfile
├── src
    └── facial_rec
    │   ├── face_feature.clj
    │   ├── detect.clj
    │   └── demo.clj
├── README.md
└── LICENSE


/rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/PY_OP/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/cython/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/io/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/processing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rcnn/cython/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/testimg.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/testimg.jpeg


--------------------------------------------------------------------------------
/detector_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/detector_test.jpg


--------------------------------------------------------------------------------
/rcnn/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .imdb import IMDB
2 | from .retinaface import retinaface
3 | 


--------------------------------------------------------------------------------
/dataset/group-pic.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/group-pic.JPG


--------------------------------------------------------------------------------
/dataset/jaredleto.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/jaredleto.jpeg


--------------------------------------------------------------------------------
/dataset/Angelina-Jolie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/Angelina-Jolie.jpg


--------------------------------------------------------------------------------
/dataset/bradleycooper.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/bradleycooper.jpeg


--------------------------------------------------------------------------------
/dataset/channingtatum.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/channingtatum.jpeg


--------------------------------------------------------------------------------
/dataset/juliaroberts.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/juliaroberts.jpeg


--------------------------------------------------------------------------------
/dataset/kevinspacey.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/kevinspacey.jpeg


--------------------------------------------------------------------------------
/dataset/lupitanyongo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/lupitanyongo.jpg


--------------------------------------------------------------------------------
/dataset/merylstreep.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/merylstreep.jpeg


--------------------------------------------------------------------------------
/rcnn/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/336d2a27c91e3c0663d2dcf0b13574674d30f88e
2 | 


--------------------------------------------------------------------------------
/dataset/jenniferlawrence.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/jenniferlawrence.jpeg


--------------------------------------------------------------------------------
/rcnn/symbol/__init__.py:
--------------------------------------------------------------------------------
1 | from .symbol_ssh import *
2 | from .symbol_mnet import *
3 | from .symbol_resnet import *
4 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "insightface"]
2 | 	path = insightface
3 | 	url = https://github.com/deepinsight/insightface
4 | 


--------------------------------------------------------------------------------
/dataset/220px-Ellen_DeGeneres_2011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/220px-Ellen_DeGeneres_2011.jpg


--------------------------------------------------------------------------------
/rcnn/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | # set up logger
4 | logging.basicConfig()
5 | logger = logging.getLogger()
6 | logger.setLevel(logging.INFO)
7 | 


--------------------------------------------------------------------------------
/dataset/220px-Brad_Pitt_2019_by_Glenn_Francis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/220px-Brad_Pitt_2019_by_Glenn_Francis.jpg


--------------------------------------------------------------------------------
/dataset/Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/dataset/Lupita-Nyongo-Her-Brother-Peter-Nyongo.jpg


--------------------------------------------------------------------------------
/docs/faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg


--------------------------------------------------------------------------------
/docs/faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg


--------------------------------------------------------------------------------
/docs/faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg


--------------------------------------------------------------------------------
/docs/faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg


--------------------------------------------------------------------------------
/docs/faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg


--------------------------------------------------------------------------------
/docs/faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg


--------------------------------------------------------------------------------
/docs/faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg


--------------------------------------------------------------------------------
/docs/faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg


--------------------------------------------------------------------------------
/docs/faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg


--------------------------------------------------------------------------------
/docs/faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg


--------------------------------------------------------------------------------
/docs/faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg


--------------------------------------------------------------------------------
/docs/faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg


--------------------------------------------------------------------------------
/docs/faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg


--------------------------------------------------------------------------------
/docs/faces/84e22415-6352-4442-95f1-d20319786b6b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/84e22415-6352-4442-95f1-d20319786b6b.jpg


--------------------------------------------------------------------------------
/docs/faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg


--------------------------------------------------------------------------------
/docs/faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg


--------------------------------------------------------------------------------
/docs/faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg


--------------------------------------------------------------------------------
/docs/faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg


--------------------------------------------------------------------------------
/docs/faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg


--------------------------------------------------------------------------------
/docs/faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg


--------------------------------------------------------------------------------
/docs/faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg


--------------------------------------------------------------------------------
/docs/faces/bcf97241-96e4-4726-8287-fdab88217584.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/bcf97241-96e4-4726-8287-fdab88217584.jpg


--------------------------------------------------------------------------------
/docs/faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg


--------------------------------------------------------------------------------
/docs/faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg


--------------------------------------------------------------------------------
/docs/faces/dbbd360f-4fea-4911-9233-063eda812445.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/dbbd360f-4fea-4911-9233-063eda812445.jpg


--------------------------------------------------------------------------------
/docs/faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg


--------------------------------------------------------------------------------
/docs/faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnuernber/facial-rec/HEAD/docs/faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg


--------------------------------------------------------------------------------
/rcnn/cython/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/scripts/build-conda-docker:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | pushd dockerfiles
6 | docker build -t facerec-conda -f CondaDockerfile --build-arg USERID=$(id -u) --build-arg GROUPID=$(id -u) --build-arg USERNAME=$USER .
7 | popd
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | profiles.clj
 5 | pom.xml
 6 | pom.xml.asc
 7 | *.jar
 8 | *.class
 9 | /.lein-*
10 | /.nrepl-port
11 | .hgignore
12 | .hg/
13 | models
14 | dataset
15 | __pycache__
16 | *.jpg
17 | faces
18 | results.md


--------------------------------------------------------------------------------
/scripts/get-data:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | 
4 | wget https://s3.us-east-2.amazonaws.com/tech.public.data/facerec-models.zip
5 | wget https://s3.us-east-2.amazonaws.com/tech.public.data/facerec-test-ds.zip
6 | unzip facerec-models.zip && rm facerec-models.zip
7 | unzip facerec-test-ds.zip && rm facerec-test-ds.zip
8 | 


--------------------------------------------------------------------------------
/scripts/conda-repl:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source activate pyclj
 4 | 
 5 | ## This is absolutely necessary.
 6 | ## https://github.com/conda/conda/issues/9500#issuecomment-565753807
 7 | export LD_LIBRARY_PATH="$(python3-config --prefix)/lib"
 8 | 
 9 | lein update-in :dependencies conj \[nrepl\ \"0.6.0\"\]\
10 |      -- update-in :plugins conj \[cider/cider-nrepl\ \"0.25.0\"\]\
11 |      -- repl :headless :host localhost
12 | 


--------------------------------------------------------------------------------
/rcnn/dataset/ds_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def unique_boxes(boxes, scale=1.0):
 5 |     """ return indices of unique boxes """
 6 |     v = np.array([1, 1e3, 1e6, 1e9])
 7 |     hashes = np.round(boxes * scale).dot(v).astype(np.int)
 8 |     _, index = np.unique(hashes, return_index=True)
 9 |     return np.sort(index)
10 | 
11 | 
12 | def filter_small_boxes(boxes, min_size):
13 |     w = boxes[:, 2] - boxes[:, 0]
14 |     h = boxes[:, 3] - boxes[:, 1]
15 |     keep = np.where((w >= min_size) & (h > min_size))[0]
16 |     return keep
17 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject cnuernber/facial-rec "0.1.0-SNAPSHOT"
 2 |   :description "Demonstration of facial rec tech."
 3 |   :url "https://github.com/cnuernber/facial-rec"
 4 |   :license {:name "EPL-2.0"
 5 |             :url "https://www.eclipse.org/legal/epl-2.0/"}
 6 |   :dependencies [[org.clojure/clojure "1.10.1"]
 7 |                  [techascent/tech.io               "4.03"
 8 |                   :exclusions [org.apache.commons/commons-compress]]
 9 |                  [clj-python/libpython-clj "2.00-beta-3"]
10 |                  [ch.qos.logback/logback-classic "1.1.3"]])
11 | 


--------------------------------------------------------------------------------
/rcnn/core/callback.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def do_checkpoint(prefix, means, stds):
 5 |     def _callback(iter_no, sym, arg, aux):
 6 |       if 'bbox_pred_weight' in arg:
 7 |         arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T
 8 |         arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means)
 9 |       mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
10 |       if 'bbox_pred_weight' in arg:
11 |         arg.pop('bbox_pred_weight_test')
12 |         arg.pop('bbox_pred_bias_test')
13 |     return _callback
14 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['maskApi.c', '_mask.pyx'],
13 |         include_dirs=[np.get_include()],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       ext_modules=cythonize(ext_modules)
20 | )
21 | 


--------------------------------------------------------------------------------
/scripts/run-conda-docker:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ## This is incomplete, you still have to mess around with pythonpath and such.  The
 4 | ## goal at this stage is to get this docker container to launch a repl that allows
 5 | ## initialize! to work out of the box.
 6 | scripts/build-conda-docker
 7 | 
 8 | docker run --rm -it -u $(id -u):$(id -g) \
 9 |   -e LEIN_REPL_HOST="0.0.0.0" \
10 |   -v /$HOME/.m2:/home/$USER/.m2 \
11 |   -v /$HOME/.lein:/home/$USER/.lein \
12 |   -v $(pwd)/:/libpython-clj \
13 |   --net=host -w /libpython-clj \
14 |   facerec-conda scripts/conda-repl
15 |      # lein update-in :dependencies conj \[nrepl\ \"0.6.0\"\]\
16 |      #   -- update-in :plugins conj \[cider/cider-nrepl\ \"0.22.4\"\]\
17 |      #   -- repl :headless :host localhost
18 | 


--------------------------------------------------------------------------------
/rcnn/utils/combine_model.py:
--------------------------------------------------------------------------------
 1 | from .load_model import load_checkpoint
 2 | from .save_model import save_checkpoint
 3 | 
 4 | 
 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out):
 6 |     args1, auxs1 = load_checkpoint(prefix1, epoch1)
 7 |     args2, auxs2 = load_checkpoint(prefix2, epoch2)
 8 |     arg_names = args1.keys() + args2.keys()
 9 |     aux_names = auxs1.keys() + auxs2.keys()
10 |     args = dict()
11 |     for arg in arg_names:
12 |         if arg in args1:
13 |             args[arg] = args1[arg]
14 |         else:
15 |             args[arg] = args2[arg]
16 |     auxs = dict()
17 |     for aux in aux_names:
18 |         if aux in auxs1:
19 |             auxs[aux] = auxs1[aux]
20 |         else:
21 |             auxs[aux] = auxs2[aux]
22 |     save_checkpoint(prefix_out, epoch_out, args, auxs)
23 | 


--------------------------------------------------------------------------------
/rcnn/utils/save_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params):
 5 |     """Checkpoint the model data into file.
 6 |     :param prefix: Prefix of model name.
 7 |     :param epoch: The epoch number of the model.
 8 |     :param arg_params: dict of str to NDArray
 9 |         Model parameter, dict of name to NDArray of net's weights.
10 |     :param aux_params: dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's auxiliary states.
12 |     :return: None
13 |     prefix-epoch.params will be saved for parameters.
14 |     """
15 |     save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()}
16 |     save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()})
17 |     param_name = '%s-%04d.params' % (prefix, epoch)
18 |     mx.nd.save(param_name, save_dict)
19 | 


--------------------------------------------------------------------------------
/rcnn/cython/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/rcnn/cython/anchors.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | DTYPE = np.float32
 6 | ctypedef np.float32_t DTYPE_t
 7 | 
 8 | def anchors_cython(int height, int width, int stride, np.ndarray[DTYPE_t, ndim=2] base_anchors):
 9 |     """
10 |     Parameters
11 |     ----------
12 |     height: height of plane
13 |     width:  width of plane
14 |     stride: stride ot the original image
15 |     anchors_base: (A, 4) a base set of anchors
16 |     Returns
17 |     -------
18 |     all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
19 |     """
20 |     cdef unsigned int A = base_anchors.shape[0]
21 |     cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)
22 |     cdef unsigned int iw, ih
23 |     cdef unsigned int k
24 |     cdef unsigned int sh
25 |     cdef unsigned int sw
26 |     for iw in range(width):
27 |         sw = iw * stride
28 |         for ih in range(height):
29 |             sh = ih * stride
30 |             for k in range(A):
31 |                 all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw
32 |                 all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh
33 |                 all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw
34 |                 all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh
35 |     return all_anchors


--------------------------------------------------------------------------------
/rcnn/processing/assign_levels.py:
--------------------------------------------------------------------------------
 1 | from rcnn.config import config
 2 | import numpy as np
 3 | 
 4 | 
 5 | def compute_assign_targets(rois, threshold):
 6 |     rois_area = np.sqrt((rois[:, 2] - rois[:, 0] + 1) * (rois[:, 3] - rois[:, 1] + 1))
 7 |     num_rois = np.shape(rois)[0]
 8 |     assign_levels = np.zeros(num_rois, dtype=np.uint8)
 9 |     for i, stride in enumerate(config.RCNN_FEAT_STRIDE):
10 |         thd = threshold[i]
11 |         idx = np.logical_and(thd[1] <= rois_area, rois_area < thd[0])
12 |         assign_levels[idx] = stride
13 | 
14 |     assert 0 not in assign_levels, "All rois should assign to specify levels."
15 |     return assign_levels
16 | 
17 | 
18 | def add_assign_targets(roidb):
19 |     """
20 |     given roidb, add ['assign_level']
21 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
22 |     """
23 |     print 'add assign targets'
24 |     assert len(roidb) > 0
25 |     assert 'boxes' in roidb[0]
26 | 
27 |     area_threshold = [[np.inf, 448],
28 |                       [448,    224],
29 |                       [224,    112],
30 |                       [112,     0]]
31 | 
32 |     assert len(config.RCNN_FEAT_STRIDE) == len(area_threshold)
33 | 
34 |     num_images = len(roidb)
35 |     for im_i in range(num_images):
36 |         rois = roidb[im_i]['boxes']
37 |         roidb[im_i]['assign_levels'] = compute_assign_targets(rois, area_threshold)
38 | 


--------------------------------------------------------------------------------
/dockerfiles/CondaDockerfile:
--------------------------------------------------------------------------------
 1 | # We will use Ubuntu for our image
 2 | FROM ubuntu:latest
 3 | 
 4 | # Updating Ubuntu packages
 5 | 
 6 | 
 7 | RUN apt-get -qq update && apt-get -qq -y install curl wget bzip2 openjdk-8-jdk-headless \
 8 |     gcc g++ \
 9 |     && curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh \
10 |     && bash /tmp/miniconda.sh -bfp /usr/local \
11 |     && rm -rf /tmp/miniconda.sh \
12 |     && conda install -y python=3 \
13 |     && conda update conda \
14 |     && curl -O https://download.clojure.org/install/linux-install-1.10.1.492.sh \
15 |     && chmod +x linux-install-1.10.1.492.sh \
16 |     && ./linux-install-1.10.1.492.sh && rm linux-install-1.10.1.492.sh \
17 |     && wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein \
18 |     && chmod a+x lein \
19 |     && mv lein /usr/bin \
20 |     && apt-get -qq -y autoremove \
21 |     && apt-get autoclean \
22 |     && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log \
23 |     && conda clean --all --yes
24 | 
25 | 
26 | ENV PATH /opt/conda/bin:$PATH
27 | 
28 | 
29 | ARG USERID
30 | ARG GROUPID
31 | ARG USERNAME
32 | 
33 | RUN groupadd -g $GROUPID $USERNAME
34 | RUN useradd -u $USERID -g $GROUPID $USERNAME
35 | RUN mkdir /home/$USERNAME && chown $USERNAME:$USERNAME /home/$USERNAME
36 | USER $USERNAME
37 | RUN conda create -n pyclj python=3.6\
38 |     && conda install -n pyclj numpy mxnet opencv cython scikit-image\
39 |     && echo "source activate pyclj" > /home/$USERNAME/.bashrc


--------------------------------------------------------------------------------
/rcnn/tools/reeval.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | try:
 3 |     import cPickle as pickle
 4 | except ImportError:
 5 |     import pickle
 6 | import os
 7 | import mxnet as mx
 8 | 
 9 | from ..logger import logger
10 | from ..config import config, default, generate_config
11 | from ..dataset import *
12 | 
13 | 
14 | def reeval(args):
15 |     # load imdb
16 |     imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path)
17 | 
18 |     # load detection results
19 |     cache_file = os.path.join(imdb.cache_path, imdb.name, 'detections.pkl')
20 |     with open(cache_file) as f:
21 |         detections = pickle.load(f)
22 | 
23 |     # eval
24 |     imdb.evaluate_detections(detections)
25 | 
26 | 
27 | def parse_args():
28 |     parser = argparse.ArgumentParser(description='imdb test')
29 |     # general
30 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
31 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
32 |     args, rest = parser.parse_known_args()
33 |     generate_config(args.network, args.dataset)
34 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
35 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
36 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
37 |     # other
38 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
39 |     args = parser.parse_args()
40 |     return args
41 | 
42 | 
43 | def main():
44 |     args = parse_args()
45 |     logger.info('Called with argument: %s' % args)
46 |     reeval(args)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/rcnn/processing/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..cython.cpu_nms import cpu_nms
 3 | try:
 4 |     from ..cython.gpu_nms import gpu_nms
 5 | except ImportError:
 6 |     gpu_nms = None
 7 | 
 8 | 
 9 | def py_nms_wrapper(thresh):
10 |     def _nms(dets):
11 |         return nms(dets, thresh)
12 |     return _nms
13 | 
14 | 
15 | def cpu_nms_wrapper(thresh):
16 |     def _nms(dets):
17 |         return cpu_nms(dets, thresh)
18 |     return _nms
19 | 
20 | 
21 | def gpu_nms_wrapper(thresh, device_id):
22 |     def _nms(dets):
23 |         return gpu_nms(dets, thresh, device_id)
24 |     if gpu_nms is not None:
25 |         return _nms
26 |     else:
27 |         return cpu_nms_wrapper(thresh)
28 | 
29 | 
30 | def nms(dets, thresh):
31 |     """
32 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
33 |     rule out overlap >= thresh
34 |     :param dets: [[x1, y1, x2, y2 score]]
35 |     :param thresh: retain overlap < thresh
36 |     :return: indexes to keep
37 |     """
38 |     x1 = dets[:, 0]
39 |     y1 = dets[:, 1]
40 |     x2 = dets[:, 2]
41 |     y2 = dets[:, 3]
42 |     scores = dets[:, 4]
43 | 
44 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
45 |     order = scores.argsort()[::-1]
46 | 
47 |     keep = []
48 |     while order.size > 0:
49 |         i = order[0]
50 |         keep.append(i)
51 |         xx1 = np.maximum(x1[i], x1[order[1:]])
52 |         yy1 = np.maximum(y1[i], y1[order[1:]])
53 |         xx2 = np.minimum(x2[i], x2[order[1:]])
54 |         yy2 = np.minimum(y2[i], y2[order[1:]])
55 | 
56 |         w = np.maximum(0.0, xx2 - xx1 + 1)
57 |         h = np.maximum(0.0, yy2 - yy1 + 1)
58 |         inter = w * h
59 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
60 | 
61 |         inds = np.where(ovr <= thresh)[0]
62 |         order = order[inds + 1]
63 | 
64 |     return keep
65 | 


--------------------------------------------------------------------------------
/src/facial_rec/face_feature.clj:
--------------------------------------------------------------------------------
 1 | (ns facial-rec.face-feature
 2 |   (:require [libpython-clj2.require :refer [require-python]]
 3 |             [libpython-clj2.python :refer [py. py.. py.-] :as py]
 4 |             ;;Important change from v1 - You have to require 'np-array' for numpy
 5 |             ;;support
 6 |             [libpython-clj2.python.np-array]
 7 |             [tech.v3.datatype :as dtype]))
 8 | 
 9 | 
10 | 
11 | (require-python 'mxnet
12 |                 '(mxnet ndarray module io model))
13 | (require-python 'cv2)
14 | (require-python '[numpy :as np])
15 | 
16 | 
17 | (defn load-model
18 |   [& {:keys [model-path checkpoint]
19 |       :or {model-path "models/recognition/model"
20 |            checkpoint 0}}]
21 |   (let [[sym arg-params aux-params] (mxnet.model/load_checkpoint model-path checkpoint)
22 |         all-layers (py. sym get_internals)
23 |         target-layer (py/get-item all-layers "fc1_output")
24 |         model (mxnet.module/Module :symbol target-layer
25 |                                    :context (mxnet/cpu)
26 |                                    :label_names nil)]
27 |     (py. model bind :data_shapes [["data" [1 3 112 112]]])
28 |     (py. model set_params arg-params aux-params)
29 |     model))
30 | 
31 | (defonce model (load-model))
32 | 
33 | 
34 | 
35 | (defn face->feature
36 |   [img-path]
37 |   (py/with-gil-stack-rc-context
38 |     (if-let [new-img (cv2/imread img-path)]
39 |       (let [new-img (cv2/cvtColor new-img cv2/COLOR_BGR2RGB)
40 |             new-img (np/transpose new-img [2 0 1])
41 |             input-blob (np/expand_dims new-img :axis 0)
42 |             data (mxnet.ndarray/array input-blob)
43 |             batch (mxnet.io/DataBatch :data [data])]
44 |         (py. model forward batch :is_train false)
45 |         (-> (py. model get_outputs)
46 |             first
47 |             (py. asnumpy)
48 |             (#(dtype/make-container :java-array :float32 %))))
49 |       (throw (Exception. (format "Failed to load img: %s" img-path))))))
50 | 


--------------------------------------------------------------------------------
/rcnn/cython/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps_cython(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 


--------------------------------------------------------------------------------
/rcnn/utils/load_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..logger import logger
 3 | from ..config import config
 4 | from ..dataset import *
 5 | 
 6 | 
 7 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path,
 8 |                   flip=False):
 9 |     """ load ground truth roidb """
10 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path)
11 |     roidb = imdb.gt_roidb()
12 |     print('roidb size', len(roidb))
13 |     if flip:
14 |         roidb = imdb.append_flipped_images(roidb)
15 |     print('flipped roidb size', len(roidb))
16 |     return roidb
17 | 
18 | 
19 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path,
20 |                         proposal='rpn', append_gt=True, flip=False):
21 |     """ load proposal roidb (append_gt when training) """
22 |     imdb = eval(dataset_name)(image_set_name, root_path, dataset_path)
23 |     gt_roidb = imdb.gt_roidb()
24 |     roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt)
25 |     if flip:
26 |         roidb = imdb.append_flipped_images(roidb)
27 |     return roidb
28 | 
29 | 
30 | def merge_roidb(roidbs):
31 |     """ roidb are list, concat them together """
32 |     roidb = roidbs[0]
33 |     for r in roidbs[1:]:
34 |         roidb.extend(r)
35 |     return roidb
36 | 
37 | 
38 | def filter_roidb(roidb):
39 |     """ remove roidb entries without usable rois """
40 | 
41 |     def is_valid(entry):
42 |         """ valid images have at least 1 fg or bg roi """
43 |         overlaps = entry['max_overlaps']
44 |         fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
45 |         bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
46 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
47 |         #valid = len(fg_inds) > 0
48 |         return valid
49 | 
50 |     num = len(roidb)
51 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
52 |     num_after = len(filtered_roidb)
53 |     logger.info('load data: filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after))
54 | 
55 |     return filtered_roidb
56 | 


--------------------------------------------------------------------------------
/rcnn/utils/load_model.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | def load_checkpoint(prefix, epoch):
 5 |     """
 6 |     Load model checkpoint from file.
 7 |     :param prefix: Prefix of model name.
 8 |     :param epoch: Epoch number of model we would like to load.
 9 |     :return: (arg_params, aux_params)
10 |     arg_params : dict of str to NDArray
11 |         Model parameter, dict of name to NDArray of net's weights.
12 |     aux_params : dict of str to NDArray
13 |         Model parameter, dict of name to NDArray of net's auxiliary states.
14 |     """
15 |     save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
16 |     arg_params = {}
17 |     aux_params = {}
18 |     for k, v in save_dict.items():
19 |         tp, name = k.split(':', 1)
20 |         if tp == 'arg':
21 |             arg_params[name] = v
22 |         if tp == 'aux':
23 |             aux_params[name] = v
24 |     return arg_params, aux_params
25 | 
26 | 
27 | def convert_context(params, ctx):
28 |     """
29 |     :param params: dict of str to NDArray
30 |     :param ctx: the context to convert to
31 |     :return: dict of str of NDArray with context ctx
32 |     """
33 |     new_params = dict()
34 |     for k, v in params.items():
35 |         new_params[k] = v.as_in_context(ctx)
36 |     return new_params
37 | 
38 | 
39 | def load_param(prefix, epoch, convert=False, ctx=None, process=False):
40 |     """
41 |     wrapper for load checkpoint
42 |     :param prefix: Prefix of model name.
43 |     :param epoch: Epoch number of model we would like to load.
44 |     :param convert: reference model should be converted to GPU NDArray first
45 |     :param ctx: if convert then ctx must be designated.
46 |     :param process: model should drop any test
47 |     :return: (arg_params, aux_params)
48 |     """
49 |     arg_params, aux_params = load_checkpoint(prefix, epoch)
50 |     if convert:
51 |         if ctx is None:
52 |             ctx = mx.cpu()
53 |         arg_params = convert_context(arg_params, ctx)
54 |         aux_params = convert_context(aux_params, ctx)
55 |     if process:
56 |         tests = [k for k in arg_params.keys() if '_test' in k]
57 |         for test in tests:
58 |             arg_params[test.replace('_test', '')] = arg_params.pop(test)
59 |     return arg_params, aux_params
60 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/rcnn/cython/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/src/facial_rec/detect.clj:
--------------------------------------------------------------------------------
 1 | (ns facial-rec.detect
 2 |   (:require [libpython-clj2.require :refer [require-python]]
 3 |             [libpython-clj2.python :refer [py. py.- py..] :as py]
 4 |             [clojure.tools.logging :as log]))
 5 | 
 6 | (require-python '[distutils.core :refer [setup]])
 7 | (require-python '[numpy :as np])
 8 | (require-python 'pyximport)
 9 | (pyximport/install :setup_args {:include_dirs (py/->py-list
10 |                                                [(np/get_include)])})
11 | (require-python '[retinaface :as rface])
12 | (require-python 'cv2)
13 | (require-python '[builtins :refer [slice]])
14 | (require-python '[skimage.transform :as trans])
15 | 
16 | 
17 | (defonce model (rface/RetinaFace "models/detection/R50" 0 -1))
18 | 
19 | 
20 | (defn detect-faces
21 |   [img-path]
22 |   (py/with-gil-stack-rc-context
23 |     (if-let [cv-img (cv2/imread img-path)]
24 |       (when-let [detection (py. model detect cv-img 0.8)]
25 |         (let [[faces landmarks] detection]
26 |           (->> (mapv (fn [face landmark]
27 |                        (let [face-bbox (->> (take 4 face)
28 |                                             (map (comp long #(Math/round (double %)))))
29 |                              confidence (last face)]
30 |                          {:confidence confidence
31 |                           :bbox {:top-left (vec (take 2 face-bbox))
32 |                                  :bottom-right (vec (drop 2 face-bbox))}
33 |                           :landmarks (mapv #(mapv int %) landmark)}))
34 |                      faces landmarks))))
35 |       (throw (Exception. (format "Unable to open image %s" img-path))))))
36 | 
37 | 
38 | (defn render-faces!
39 |   "Draw the face detection result on an image.  Presumably the same image the data came from."
40 |   [img face-detection-result & {:keys [bbox-color landmark-color]
41 |                                 :or {bbox-color [0 255 0]
42 |                                      landmark-color [255 0 0]}}]
43 |   (doseq [{:keys [bbox landmarks]} face-detection-result]
44 |     (cv2/rectangle img (:top-left bbox) (:bottom-right bbox) bbox-color 2)
45 |     (doseq [landmark landmarks]
46 |       (cv2/circle img landmark 1 landmark-color 2)))
47 |   img)
48 | 
49 | 
50 | (def ideal-face-landmarks
51 |   (np/array [[30.2946, 51.6963]
52 |              [65.5318, 51.5014]
53 |              [48.0252, 71.7366]
54 |              [33.5493, 92.3655]
55 |              [62.7299, 92.2041]]
56 |             :dtype np/float32))
57 | 
58 | 
59 | (defn affine-warp-mat
60 |   [landmarks]
61 |   (try
62 |     (let [landmark-ary (np/array landmarks :dtype np/float32)
63 |           sim-trans (trans/SimilarityTransform)
64 |           success? (py. sim-trans estimate landmark-ary ideal-face-landmarks)]
65 |       (when success?
66 |         (-> (py.- sim-trans params)
67 |             (py/get-item [(slice 0 2) (slice nil)]))))
68 |     (catch Throwable e
69 |       (log/warnf e (format "Similarity transform failed for landmarks: %s"
70 |                            landmarks))
71 |       nil)))
72 | 
73 | 
74 | (defn crop-faces
75 |   "Crop out faces.  For each face detection result, return a new image.  Returned images
76 |   are scaled to a specific size (the size needed by the facial feature engine)."
77 |   [img face-detection-result
78 |    & {:keys [face-size align?]
79 |       :or {face-size [112 112]
80 |            align? true}}]
81 |   ;;Alignment is also possible (we have landmarks) but not going to go there for
82 |   ;;demo.  Note function is cv2.warpAffine.
83 |   (->> face-detection-result
84 |        (mapv (fn [{:keys [bbox landmarks]}]
85 |                (let [{:keys [top-left bottom-right]} bbox
86 |                      [min-x min-y] top-left
87 |                      [max-x max-y] bottom-right
88 |                      affine-mat (when align?
89 |                                   (affine-warp-mat landmarks))]
90 |                  (if affine-mat
91 |                    (cv2/warpAffine img affine-mat face-size :borderValue 0.0)
92 |                    ;;Fallthrough in the case the estimate mechanism fails or
93 |                    ;;the user doesn't want alignment.
94 |                    (->
95 |                     (py/get-item img [(slice min-y max-y) (slice min-x max-x)])
96 |                     (cv2/resize [112 112]))))))))
97 | 


--------------------------------------------------------------------------------
/src/facial_rec/demo.clj:
--------------------------------------------------------------------------------
  1 | (ns facial-rec.demo
  2 |   (:require [facial-rec.detect :as detect]
  3 |             [facial-rec.face-feature :as face-feature]
  4 |             [libpython-clj2.require :refer [require-python]]
  5 |             [libpython-clj2.python :as py]
  6 |             [tech.v3.io :as io]
  7 |             [tech.v3.datatype.functional :as dfn]
  8 |             ;;Include serialization support for tech.v3.datatype datatypes
  9 |             [tech.v3.datatype.nippy]
 10 |             [clojure.tools.logging :as log])
 11 |   (:import [java.io File]
 12 |            [java.util UUID]))
 13 | 
 14 | 
 15 | 
 16 | (io/make-parents "faces/face.jpg")
 17 | (require-python 'cv2)
 18 | 
 19 | 
 20 | (defn filename->faces
 21 |   [fname]
 22 |   (py/with-gil-stack-rc-context
 23 |     (let [detection (detect/detect-faces fname)
 24 |           cropped-faces (detect/crop-faces (cv2/imread fname) detection)]
 25 |       (mapv (fn [detection-result face-img]
 26 |               (let [face-id (UUID/randomUUID)
 27 |                     dest-fname (format "faces/%s.png" face-id)
 28 |                     dest-feature-fname (format "file://faces/%s.nippy" face-id)
 29 |                     _ (cv2/imwrite dest-fname face-img)
 30 |                     feature (face-feature/face->feature dest-fname)
 31 |                     metadata (merge detection-result
 32 |                                     {:id face-id
 33 |                                      :src-file fname
 34 |                                      :feature feature})]
 35 |                 (io/put-nippy! dest-feature-fname metadata)
 36 |                 metadata))
 37 |             detection cropped-faces))))
 38 | 
 39 | 
 40 | (defn delete-previously-found-faces!
 41 |   []
 42 |   (->> (file-seq (io/file "faces"))
 43 |        (remove #(.isDirectory ^File %))
 44 |        (map (fn [f]
 45 |               (.delete f)))
 46 |        (dorun)))
 47 | 
 48 | 
 49 | (defn find-annotate-faces!
 50 |   []
 51 |   (log/info "finding faces")
 52 |   (delete-previously-found-faces!)
 53 |   (py/with-gil-stack-rc-context
 54 |     (->> (file-seq (io/file "dataset"))
 55 |          (remove #(.isDirectory ^File %))
 56 |          (mapcat (fn [^File src-img]
 57 |                    (filename->faces (.toString src-img))))
 58 |          vec
 59 |          (#(do (log/infof "Found %d faces" (count %))
 60 |                %)))))
 61 | 
 62 | 
 63 | (defn annotations
 64 |   []
 65 |   (->> (file-seq (io/file "faces"))
 66 |        (map #(.toString ^File %))
 67 |        (filter #(.endsWith ^String % "nippy"))
 68 |        (map (comp (juxt :id identity) io/get-nippy))
 69 |        (into {})))
 70 | 
 71 | 
 72 | (defn annotations-by-file
 73 |   []
 74 |   (group-by :src-file (vals (annotations))))
 75 | 
 76 | 
 77 | (defn nearest
 78 |   [ann-id]
 79 |   (let [{:keys [feature] :as target-annotation} (get (annotations) ann-id)]
 80 |     (->> (vals (annotations))
 81 |          (map #(assoc % :distance-squared (dfn/distance-squared feature (:feature %))))
 82 |          (sort-by :distance-squared)
 83 |          (map #(dissoc % :feature)))))
 84 | 
 85 | 
 86 | (defn- display-face-img
 87 |   [{:keys [id] :as entry}]
 88 |   (format "![face-img](faces/%s.png) " id))
 89 | 
 90 | 
 91 | (defn- display-distance-and-face-img
 92 |   [{:keys [id distance-squared] :as entry}]
 93 |   (format "%02d %s"
 94 |           (long (Math/sqrt (double distance-squared)))
 95 |           (display-face-img entry)))
 96 | 
 97 | 
 98 | (defn output-face-results!
 99 |   [& [all-faces]]
100 |   (let [all-faces (or all-faces (find-annotate-faces!))]
101 |     (spit "results.md"
102 |           (with-out-str
103 |             (println "## Results")
104 |             (println "| face-img | 5 nearest |")
105 |             (println "|-----|------|")
106 |             (->> all-faces
107 |                  (map (fn [{:keys [id] :as entry}]
108 |                         (println "|" (display-face-img entry)
109 |                                  "|" (->> (nearest id)
110 |                                           (take 5)
111 |                                           (map display-distance-and-face-img)
112 |                                           (reduce str))
113 |                                  "|")))
114 |                  (dorun))))))
115 | 
116 | 
117 | (comment
118 |   ;;Stress testing the system
119 |   (dotimes [iter 100]
120 |     (println "running")
121 |     (find-annotate-faces!))
122 |   )
123 | 


--------------------------------------------------------------------------------
/rcnn/processing/generate_anchor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate base anchors on index 0
  3 | """
  4 | from __future__ import print_function
  5 | import sys
  6 | from builtins import range
  7 | import numpy as np
  8 | from ..cython.anchors import anchors_cython
  9 | #from ..config import config
 10 | 
 11 | 
 12 | def anchors_plane(feat_h, feat_w, stride, base_anchor):
 13 |     return anchors_cython(feat_h, feat_w, stride, base_anchor)
 14 | 
 15 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 16 |                      scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False):
 17 |     """
 18 |     Generate anchor (reference) windows by enumerating aspect ratios X
 19 |     scales wrt a reference (0, 0, 15, 15) window.
 20 |     """
 21 | 
 22 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 23 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 24 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 25 |                          for i in range(ratio_anchors.shape[0])])
 26 |     if dense_anchor:
 27 |       assert stride%2==0
 28 |       anchors2 = anchors.copy()
 29 |       anchors2[:,:] += int(stride/2)
 30 |       anchors = np.vstack( (anchors, anchors2) )
 31 |     #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape)
 32 |     return anchors
 33 | 
 34 | #def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8):
 35 | #    """
 36 | #    Generate anchor (reference) windows by enumerating aspect ratios X
 37 | #    scales wrt a reference (0, 0, 15, 15) window.
 38 | #    """
 39 | #    anchors = []
 40 | #    _ratios = ratios.reshape( (len(base_size), -1) )
 41 | #    _scales = scales.reshape( (len(base_size), -1) )
 42 | #    for i,bs in enumerate(base_size):
 43 | #      __ratios = _ratios[i]
 44 | #      __scales = _scales[i]
 45 | #      #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
 46 | #      r = generate_anchors(bs, __ratios, __scales)
 47 | #      #print('anchors_fpn', r.shape, file=sys.stderr)
 48 | #      anchors.append(r)
 49 | #    return anchors
 50 | 
 51 | def generate_anchors_fpn(dense_anchor=False, cfg = None):
 52 |     #assert(False)
 53 |     """
 54 |     Generate anchor (reference) windows by enumerating aspect ratios X
 55 |     scales wrt a reference (0, 0, 15, 15) window.
 56 |     """
 57 |     if cfg is None:
 58 |       from ..config import config
 59 |       cfg = config.RPN_ANCHOR_CFG
 60 |     RPN_FEAT_STRIDE = []
 61 |     for k in cfg:
 62 |       RPN_FEAT_STRIDE.append( int(k) )
 63 |     RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True)
 64 |     anchors = []
 65 |     for k in RPN_FEAT_STRIDE:
 66 |       v = cfg[str(k)]
 67 |       bs = v['BASE_SIZE']
 68 |       __ratios = np.array(v['RATIOS'])
 69 |       __scales = np.array(v['SCALES'])
 70 |       stride = int(k)
 71 |       #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
 72 |       r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor)
 73 |       #print('anchors_fpn', r.shape, file=sys.stderr)
 74 |       anchors.append(r)
 75 | 
 76 |     return anchors
 77 | 
 78 | def _whctrs(anchor):
 79 |     """
 80 |     Return width, height, x center, and y center for an anchor (window).
 81 |     """
 82 | 
 83 |     w = anchor[2] - anchor[0] + 1
 84 |     h = anchor[3] - anchor[1] + 1
 85 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 86 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 87 |     return w, h, x_ctr, y_ctr
 88 | 
 89 | 
 90 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 91 |     """
 92 |     Given a vector of widths (ws) and heights (hs) around a center
 93 |     (x_ctr, y_ctr), output a set of anchors (windows).
 94 |     """
 95 | 
 96 |     ws = ws[:, np.newaxis]
 97 |     hs = hs[:, np.newaxis]
 98 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 99 |                          y_ctr - 0.5 * (hs - 1),
100 |                          x_ctr + 0.5 * (ws - 1),
101 |                          y_ctr + 0.5 * (hs - 1)))
102 |     return anchors
103 | 
104 | 
105 | def _ratio_enum(anchor, ratios):
106 |     """
107 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
108 |     """
109 | 
110 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
111 |     size = w * h
112 |     size_ratios = size / ratios
113 |     ws = np.round(np.sqrt(size_ratios))
114 |     hs = np.round(ws * ratios)
115 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
116 |     return anchors
117 | 
118 | 
119 | def _scale_enum(anchor, scales):
120 |     """
121 |     Enumerate a set of anchors for each scale wrt an anchor.
122 |     """
123 | 
124 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
125 |     ws = w * scales
126 |     hs = h * scales
127 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
128 |     return anchors
129 | 


--------------------------------------------------------------------------------
/rcnn/tools/test_rpn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..dataset import *
  9 | from ..core.loader import TestLoader
 10 | from ..core.tester import Predictor, generate_proposals, test_proposals
 11 | from ..utils.load_model import load_param
 12 | 
 13 | 
 14 | def test_rpn(network, dataset, image_set, root_path, dataset_path,
 15 |              ctx, prefix, epoch,
 16 |              vis, shuffle, thresh, test_output=False):
 17 |     # rpn generate proposal config
 18 |     config.TEST.HAS_RPN = True
 19 | 
 20 |     # print config
 21 |     logger.info(pprint.pformat(config))
 22 | 
 23 |     # load symbol
 24 |     sym = eval('get_' + network + '_rpn_test')()
 25 | 
 26 |     # load dataset and prepare imdb for training
 27 |     imdb = eval(dataset)(image_set, root_path, dataset_path)
 28 |     roidb = imdb.gt_roidb()
 29 |     test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=True, withlabel=True)
 30 | 
 31 |     # load model
 32 |     arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx)
 33 | 
 34 |     # infer shape
 35 |     data_shape_dict = dict(test_data.provide_data)
 36 |     arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
 37 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 38 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 39 | 
 40 |     # check parameters
 41 |     for k in sym.list_arguments():
 42 |         if k in data_shape_dict or 'label' in k:
 43 |             continue
 44 |         assert k in arg_params, k + ' not initialized'
 45 |         assert arg_params[k].shape == arg_shape_dict[k], \
 46 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 47 |     for k in sym.list_auxiliary_states():
 48 |         assert k in aux_params, k + ' not initialized'
 49 |         assert aux_params[k].shape == aux_shape_dict[k], \
 50 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 51 | 
 52 |     # decide maximum shape
 53 |     data_names = [k[0] for k in test_data.provide_data]
 54 |     label_names = None if test_data.provide_label is None else [k[0] for k in test_data.provide_label]
 55 |     max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 56 | 
 57 |     # create predictor
 58 |     predictor = Predictor(sym, data_names, label_names,
 59 |                           context=ctx, max_data_shapes=max_data_shape,
 60 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
 61 |                           arg_params=arg_params, aux_params=aux_params)
 62 | 
 63 |     # start testing
 64 |     if not test_output:
 65 |       imdb_boxes = generate_proposals(predictor, test_data, imdb, vis=vis, thresh=thresh)
 66 |       imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
 67 |     else:
 68 |       test_proposals(predictor, test_data, imdb, roidb, vis=vis)
 69 | 
 70 | 
 71 | def parse_args():
 72 |     parser = argparse.ArgumentParser(description='Test a Region Proposal Network')
 73 |     # general
 74 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
 75 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
 76 |     args, rest = parser.parse_known_args()
 77 |     generate_config(args.network, args.dataset)
 78 |     parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str)
 79 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
 80 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
 81 |     # testing
 82 |     parser.add_argument('--prefix', help='model to test with', default=default.rpn_prefix, type=str)
 83 |     parser.add_argument('--epoch', help='model to test with', default=default.rpn_epoch, type=int)
 84 |     # rpn
 85 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
 86 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
 87 |     parser.add_argument('--thresh', help='rpn proposal threshold', default=0, type=float)
 88 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
 89 |     args = parser.parse_args()
 90 |     return args
 91 | 
 92 | 
 93 | def main():
 94 |     args = parse_args()
 95 |     logger.info('Called with argument: %s' % args)
 96 |     ctx = mx.gpu(args.gpu)
 97 |     test_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
 98 |              ctx, args.prefix, args.epoch,
 99 |              args.vis, args.shuffle, args.thresh)
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | from rcnn.pycocotools import _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]
104 | 


--------------------------------------------------------------------------------
/rcnn/tools/test_rcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..dataset import *
  9 | from ..core.loader import TestLoader
 10 | from ..core.tester import Predictor, pred_eval
 11 | from ..utils.load_model import load_param
 12 | 
 13 | 
 14 | def test_rcnn(network, dataset, image_set, root_path, dataset_path,
 15 |               ctx, prefix, epoch,
 16 |               vis, shuffle, has_rpn, proposal, thresh):
 17 |     # set config
 18 |     if has_rpn:
 19 |         config.TEST.HAS_RPN = True
 20 | 
 21 |     # print config
 22 |     logger.info(pprint.pformat(config))
 23 | 
 24 |     # load symbol and testing data
 25 |     if has_rpn:
 26 |         sym = eval('get_' + network + '_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
 27 |         imdb = eval(dataset)(image_set, root_path, dataset_path)
 28 |         roidb = imdb.gt_roidb()
 29 |     else:
 30 |         sym = eval('get_' + network + '_rcnn_test')(num_classes=config.NUM_CLASSES)
 31 |         imdb = eval(dataset)(image_set, root_path, dataset_path)
 32 |         gt_roidb = imdb.gt_roidb()
 33 |         roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)
 34 | 
 35 |     # get test data iter
 36 |     test_data = TestLoader(roidb, batch_size=1, shuffle=shuffle, has_rpn=has_rpn)
 37 | 
 38 |     # load model
 39 |     arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True)
 40 | 
 41 |     # infer shape
 42 |     data_shape_dict = dict(test_data.provide_data)
 43 |     arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
 44 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 45 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 46 | 
 47 |     # check parameters
 48 |     for k in sym.list_arguments():
 49 |         if k in data_shape_dict or 'label' in k:
 50 |             continue
 51 |         assert k in arg_params, k + ' not initialized'
 52 |         assert arg_params[k].shape == arg_shape_dict[k], \
 53 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 54 |     for k in sym.list_auxiliary_states():
 55 |         assert k in aux_params, k + ' not initialized'
 56 |         assert aux_params[k].shape == aux_shape_dict[k], \
 57 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 58 | 
 59 |     # decide maximum shape
 60 |     data_names = [k[0] for k in test_data.provide_data]
 61 |     label_names = None
 62 |     max_data_shape = [('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 63 |     if not has_rpn:
 64 |         max_data_shape.append(('rois', (1, config.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))
 65 | 
 66 |     # create predictor
 67 |     predictor = Predictor(sym, data_names, label_names,
 68 |                           context=ctx, max_data_shapes=max_data_shape,
 69 |                           provide_data=test_data.provide_data, provide_label=test_data.provide_label,
 70 |                           arg_params=arg_params, aux_params=aux_params)
 71 | 
 72 |     # start detection
 73 |     pred_eval(predictor, test_data, imdb, vis=vis, thresh=thresh)
 74 | 
 75 | 
 76 | def parse_args():
 77 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 78 |     # general
 79 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
 80 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
 81 |     args, rest = parser.parse_known_args()
 82 |     generate_config(args.network, args.dataset)
 83 |     parser.add_argument('--image_set', help='image_set name', default=default.test_image_set, type=str)
 84 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
 85 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
 86 |     # testing
 87 |     parser.add_argument('--prefix', help='model to test with', default=default.rcnn_prefix, type=str)
 88 |     parser.add_argument('--epoch', help='model to test with', default=default.rcnn_epoch, type=int)
 89 |     parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int)
 90 |     # rcnn
 91 |     parser.add_argument('--vis', help='turn on visualization', action='store_true')
 92 |     parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float)
 93 |     parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true')
 94 |     parser.add_argument('--has_rpn', help='generate proposals on the fly', action='store_true')
 95 |     parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str)
 96 |     args = parser.parse_args()
 97 |     return args
 98 | 
 99 | 
100 | def main():
101 |     args = parse_args()
102 |     logger.info('Called with argument: %s' % args)
103 |     ctx = mx.gpu(args.gpu)
104 |     test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
105 |               ctx, args.prefix, args.epoch,
106 |               args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh)
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/rcnn/cython/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/rcnn/cython/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | from setuptools import setup
 11 | from distutils.extension import Extension
 12 | from Cython.Distutils import build_ext
 13 | import numpy as np
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # Adapted fom
 19 |     # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | def locate_cuda():
 28 |     """Locate the CUDA environment on the system
 29 | 
 30 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 |     and values giving the absolute path to each directory.
 32 | 
 33 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 |     is based on finding 'nvcc' in the PATH.
 35 |     """
 36 | 
 37 |     # first check if the CUDAHOME env variable is in use
 38 |     if 'CUDAHOME' in os.environ:
 39 |         home = os.environ['CUDAHOME']
 40 |         nvcc = pjoin(home, 'bin', 'nvcc')
 41 |     else:
 42 |         # otherwise, search the PATH for NVCC
 43 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 |         if nvcc is None:
 46 |             raise EnvironmentError('The nvcc binary could not be '
 47 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 |         home = os.path.dirname(os.path.dirname(nvcc))
 49 | 
 50 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 51 |                   'include': pjoin(home, 'include'),
 52 |                   'lib64': pjoin(home, 'lib64')}
 53 |     for k, v in cudaconfig.items():
 54 |         if not os.path.exists(v):
 55 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | 
 57 |     return cudaconfig
 58 | 
 59 | 
 60 | # Test if cuda could be foun
 61 | try:
 62 |     CUDA = locate_cuda()
 63 | except EnvironmentError:
 64 |     CUDA = None
 65 | 
 66 | 
 67 | # Obtain the numpy include directory.  This logic works across numpy versions.
 68 | try:
 69 |     numpy_include = np.get_include()
 70 | except AttributeError:
 71 |     numpy_include = np.get_numpy_include()
 72 | 
 73 | 
 74 | def customize_compiler_for_nvcc(self):
 75 |     """inject deep into distutils to customize how the dispatch
 76 |     to gcc/nvcc works.
 77 | 
 78 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 79 |     injected in, and still have the right customizations (i.e.
 80 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 81 |     the OO route, I have this. Note, it's kindof like a wierd functional
 82 |     subclassing going on."""
 83 | 
 84 |     # tell the compiler it can processes .cu
 85 |     self.src_extensions.append('.cu')
 86 | 
 87 |     # save references to the default compiler_so and _comple methods
 88 |     default_compiler_so = self.compiler_so
 89 |     super = self._compile
 90 | 
 91 |     # now redefine the _compile method. This gets executed for each
 92 |     # object but distutils doesn't have the ability to change compilers
 93 |     # based on source extension: we add it.
 94 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 95 |         if os.path.splitext(src)[1] == '.cu':
 96 |             # use the cuda for .cu files
 97 |             self.set_executable('compiler_so', CUDA['nvcc'])
 98 |             # use only a subset of the extra_postargs, which are 1-1 translated
 99 |             # from the extra_compile_args in the Extension class
100 |             postargs = extra_postargs['nvcc']
101 |         else:
102 |             postargs = extra_postargs['gcc']
103 | 
104 |         super(obj, src, ext, cc_args, postargs, pp_opts)
105 |         # reset the default compiler_so, which we might have changed for cuda
106 |         self.compiler_so = default_compiler_so
107 | 
108 |     # inject our redefined _compile method into the class
109 |     self._compile = _compile
110 | 
111 | 
112 | # run the customize_compiler
113 | class custom_build_ext(build_ext):
114 |     def build_extensions(self):
115 |         customize_compiler_for_nvcc(self.compiler)
116 |         build_ext.build_extensions(self)
117 | 
118 | 
119 | ext_modules = [
120 |     Extension(
121 |         "bbox",
122 |         ["bbox.pyx"],
123 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 |         include_dirs=[numpy_include]
125 |     ),
126 |     Extension(
127 |         "anchors",
128 |         ["anchors.pyx"],
129 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
130 |         include_dirs=[numpy_include]
131 |     ),
132 |     Extension(
133 |         "cpu_nms",
134 |         ["cpu_nms.pyx"],
135 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
136 |         include_dirs = [numpy_include]
137 |     ),
138 | ]
139 | 
140 | if CUDA is not None:
141 |     ext_modules.append(
142 |         Extension('gpu_nms',
143 |             ['nms_kernel.cu', 'gpu_nms.pyx'],
144 |             library_dirs=[CUDA['lib64']],
145 |             libraries=['cudart'],
146 |             language='c++',
147 |             runtime_library_dirs=[CUDA['lib64']],
148 |             # this syntax is specific to this build system
149 |             # we're only going to use certain compiler args with nvcc and not with
150 |             # gcc the implementation of this trick is in customize_compiler() below
151 |             extra_compile_args={'gcc': ["-Wno-unused-function"],
152 |                                 'nvcc': ['-arch=sm_35',
153 |                                          '--ptxas-options=-v',
154 |                                          '-c',
155 |                                          '--compiler-options',
156 |                                          "'-fPIC'"]},
157 |             include_dirs = [numpy_include, CUDA['include']]
158 |         )
159 |     )
160 | else:
161 |     print('Skipping GPU_NMS')
162 | 
163 | 
164 | setup(
165 |     name='frcnn_cython',
166 |     ext_modules=ext_modules,
167 |     # inject our custom trigger
168 |     cmdclass={'build_ext': custom_build_ext},
169 | )
170 | 


--------------------------------------------------------------------------------
/rcnn/core/metric.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import mxnet as mx
  4 | import numpy as np
  5 | 
  6 | from rcnn.config import config
  7 | 
  8 | 
  9 | def get_rpn_names():
 10 |     pred = ['rpn_cls_prob', 'rpn_bbox_loss', 'rpn_label', 'rpn_bbox_weight']
 11 |     label = ['rpn_label', 'rpn_bbox_target', 'rpn_bbox_weight']
 12 |     return pred, label
 13 | 
 14 | 
 15 | 
 16 | class RPNAccMetric(mx.metric.EvalMetric):
 17 |     def __init__(self, pred_idx=-1, label_idx=-1,name='RPNAcc'):
 18 |         super(RPNAccMetric, self).__init__(name)
 19 |         self.pred, self.label = get_rpn_names()
 20 |         #self.name = 'RPNAcc'
 21 |         self.name = [name, name+'_BG', name+'_FG']
 22 |         self.pred_idx = pred_idx
 23 |         self.label_idx = label_idx
 24 |         self.STAT = [0, 0, 0]
 25 | 
 26 |     def reset(self):
 27 |         """Clear the internal statistics to initial state."""
 28 |         if isinstance(self.name, str):
 29 |           self.num_inst = 0
 30 |           self.sum_metric = 0.0
 31 |         else:
 32 |           #print('reset to ',len(self.name), self.name, file=sys.stderr)
 33 |           self.num_inst = [0] * len(self.name)
 34 |           self.sum_metric = [0.0] * len(self.name)
 35 | 
 36 | 
 37 |     def get(self):
 38 |         if isinstance(self.name, str):
 39 |             if self.num_inst == 0:
 40 |                 return (self.name, float('nan'))
 41 |             else:
 42 |                 return (self.name, self.sum_metric / self.num_inst)
 43 |         else:
 44 |             names = ['%s'%(self.name[i]) for i in range(len(self.name))]
 45 |             values = [x / y if y != 0 else float('nan') \
 46 |                 for x, y in zip(self.sum_metric, self.num_inst)]
 47 |             return (names, values)
 48 | 
 49 |     def update(self, labels, preds):
 50 |         if self.pred_idx>=0 and self.label_idx>=0:
 51 |           pred = preds[self.pred_idx]
 52 |           label = preds[self.label_idx]
 53 |         else:
 54 |           pred = preds[self.pred.index('rpn_cls_prob')]
 55 |           label = labels[self.label.index('rpn_label')]
 56 |           #label = preds[self.pred.index('rpn_label')]
 57 | 
 58 |         num_images = pred.shape[0]
 59 |         #print(pred.shape, label.shape, file=sys.stderr)
 60 |         # pred (b, c, p) or (b, c, h, w)
 61 |         pred_label = mx.ndarray.argmax_channel(pred).asnumpy().astype('int32')
 62 |         #pred_label = pred_label.reshape((pred_label.shape[0], -1))
 63 |         pred_label = pred_label.reshape(-1,)
 64 |         # label (b, p)
 65 |         label = label.asnumpy().astype('int32').reshape(-1,)
 66 |         #print(pred_label.shape, label.shape)
 67 | 
 68 |         # filter with keep_inds
 69 |         keep_inds = np.where(label != -1)[0]
 70 |         #print('in_metric acc', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr)
 71 |         #print(keep_inds, file=sys.stderr)
 72 |         _pred_label = pred_label[keep_inds]
 73 |         _label = label[keep_inds]
 74 |         #print('in_metric2', pred_label.shape, label.shape, len(keep_inds), file=sys.stderr)
 75 |         if isinstance(self.name, str):
 76 |           self.sum_metric += np.sum(_pred_label.flat == _label.flat)
 77 |           self.num_inst += len(_pred_label.flat)
 78 |         else:
 79 |           self.sum_metric[0] += np.sum(_pred_label.flat == _label.flat)
 80 |           self.num_inst[0] += len(_pred_label.flat)
 81 | 
 82 |           keep_inds = np.where(label == 0)[0]
 83 |           _pred_label = pred_label[keep_inds]
 84 |           _label = label[keep_inds]
 85 |           self.sum_metric[1] += np.sum(_pred_label.flat == _label.flat)
 86 |           self.num_inst[1] += len(_pred_label.flat)
 87 | 
 88 |           keep_inds = np.where(label == 1)[0]
 89 |           _pred_label = pred_label[keep_inds]
 90 |           _label = label[keep_inds]
 91 |           a = np.sum(_pred_label.flat == _label.flat)
 92 |           b = len(_pred_label.flat)
 93 |           self.sum_metric[2] += a
 94 |           self.num_inst[2] += b
 95 | 
 96 |           #self.STAT[0]+=a
 97 |           #self.STAT[1]+=b
 98 |           #self.STAT[2]+=num_images
 99 |           #if self.STAT[2]%400==0:
100 |           #  print('FG_ACC', self.pred_idx, self.STAT[2], self.STAT[0], self.STAT[1], float(self.STAT[0])/self.STAT[1], file=sys.stderr)
101 |           #  self.STAT = [0,0,0]
102 | 
103 | 
104 | class RPNLogLossMetric(mx.metric.EvalMetric):
105 |     def __init__(self, pred_idx=-1, label_idx=-1):
106 |         super(RPNLogLossMetric, self).__init__('RPNLogLoss')
107 |         self.pred, self.label = get_rpn_names()
108 |         self.pred_idx = pred_idx
109 |         self.label_idx = label_idx
110 | 
111 |     def update(self, labels, preds):
112 |         if self.pred_idx>=0 and self.label_idx>=0:
113 |           pred = preds[self.pred_idx]
114 |           label = preds[self.label_idx]
115 |         else:
116 |           pred = preds[self.pred.index('rpn_cls_prob')]
117 |           label = labels[self.label.index('rpn_label')]
118 |           #label = preds[self.pred.index('rpn_label')]
119 | 
120 |         # label (b, p)
121 |         label = label.asnumpy().astype('int32').reshape((-1))
122 |         # pred (b, c, p) or (b, c, h, w) --> (b, p, c) --> (b*p, c)
123 |         pred = pred.asnumpy().reshape((pred.shape[0], pred.shape[1], -1)).transpose((0, 2, 1))
124 |         pred = pred.reshape((label.shape[0], -1))
125 | 
126 |         # filter with keep_inds
127 |         keep_inds = np.where(label != -1)[0]
128 |         label = label[keep_inds]
129 |         cls = pred[keep_inds, label]
130 |         #print('in_metric log', label.shape, cls.shape, file=sys.stderr)
131 | 
132 |         cls += 1e-14
133 |         cls_loss = -1 * np.log(cls)
134 |         cls_loss = np.sum(cls_loss)
135 |         self.sum_metric += cls_loss
136 |         self.num_inst += label.shape[0]
137 | 
138 | 
139 | class RPNL1LossMetric(mx.metric.EvalMetric):
140 |     def __init__(self, loss_idx=-1, weight_idx=-1, name='RPNL1Loss'):
141 |         super(RPNL1LossMetric, self).__init__(name)
142 |         self.pred, self.label = get_rpn_names()
143 |         self.loss_idx = loss_idx
144 |         self.weight_idx = weight_idx
145 |         self.name = name
146 | 
147 |     def update(self, labels, preds):
148 |         if self.loss_idx>=0 and self.weight_idx>=0:
149 |           bbox_loss = preds[self.loss_idx].asnumpy()
150 |           bbox_weight = preds[self.weight_idx].asnumpy()
151 |         else:
152 |           bbox_loss = preds[self.pred.index('rpn_bbox_loss')].asnumpy()
153 |           bbox_weight = labels[self.label.index('rpn_bbox_weight')].asnumpy()
154 |           #bbox_weight = preds[self.pred.index('rpn_bbox_weight')].asnumpy()
155 | 
156 |         #print('in_metric', self.name, bbox_weight.shape, bbox_loss.shape)
157 | 
158 |         # calculate num_inst (average on those fg anchors)
159 |         if config.LR_MODE==0:
160 |           num_inst = np.sum(bbox_weight > 0) / (bbox_weight.shape[1]/config.NUM_ANCHORS)
161 |         else:
162 |           num_inst = 1
163 |         #print('in_metric log', bbox_loss.shape, num_inst, file=sys.stderr)
164 | 
165 |         self.sum_metric += np.sum(bbox_loss)
166 |         self.num_inst += num_inst
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # facial-rec
  2 | 
  3 | ![detection phase](detector_test.jpg)
  4 | 
  5 | Demonstration of some pretty good facial rec tech using a 
  6 | [famous selfie](dataset/group-pic.JPG) with a 
  7 | [bunch of stars](dataset). 
  8 | 
  9 | 
 10 | TL/DR - jump to [results](docs/results.md)
 11 | 
 12 | 
 13 | ## How It Works
 14 | 
 15 | At a high level, facial recognition consists of two steps: *detection* and *embedding*.
 16 | 
 17 | * *detection* takes a large image and produces a list of faces in the image.  In our
 18 |   case this piece uses fully-convolutional approaches which means they take an input
 19 |   image and output an output 'image' where each pixel consists of a bounding box,
 20 |   and confidence measure, and a set of landmarks.  A second step does
 21 |   non-maximal-suppression of the data which is a fancy way of saying it looks at
 22 |   overlaps and takes the highest confidence one.  From here we get a list of faces
 23 |   and landmarks.  We crop the faces out of the source image and align them using the
 24 |   landmarks to a set of 'standard' landmarks used during training.
 25 | 
 26 | 
 27 | * *embedding* - The next step is to produce an embedding which is an N-dimensional
 28 |   vector.  In our case that vector has 512 elements.  These vectors have a special
 29 |   property that faces that look more similar will be closer in Euclidean space.  So
 30 |   for example if I have two images of Tom Hanks and I create two embeddings I would
 31 |   expect the distance between them to be less than the distance between say, an
 32 |   image of Tom Hanks and an image of Denzel Washington.
 33 | 
 34 | 
 35 | 
 36 | One of the most interesting aspects to me is that I can use a deep learning
 37 | system to construct a [metric space](https://en.wikipedia.org/wiki/Metric_space)
 38 | where distance corresponds to something very abstract like how similar these two 
 39 | people appear are or how how similar these two outfits are.  In this sense the
 40 | target of our learning algorithm is a good embedding in our new space.
 41 | 
 42 | 
 43 | That is it.  That is how facial rec works at the base level.  If you are more
 44 | curious as to how this works from scratch please review the 
 45 | [RetinaFace detector](https://arxiv.org/abs/1905.00641) and 
 46 | the [ArcFace loss function](https://arxiv.org/abs/1801.07698).
 47 | 
 48 | 
 49 | If you notice, the ArcFace paper is specifically about only a loss function so given
 50 | no more information let me tell you that the loss function is the device that you
 51 | use to tell a deep learning system exactly what to target during its training.  The
 52 | loss function, however, isn't specific to the network it is used for training so one
 53 | could use this loss function to train other types of similarity measures potentially
 54 | with networks that aren't specific to image recognition.
 55 | 
 56 | 
 57 | If anyone asks you how facial rec works you can say:
 58 | 
 59 | >> Chris Nuernberger told me facial recognition uses, among other things, 
 60 |    a deep learning system to embed an image of a face into a metrc 
 61 |    space where distance is related to how likely the two images represent 
 62 |    the same physical person.
 63 |    
 64 | :-).  Enjoy!
 65 | 
 66 | 
 67 | ## Usage
 68 | 
 69 | 
 70 | This system is built to show a realistic example of a cutting-edge system.  As such
 71 | it rests on four components:
 72 | 1.  docker
 73 | 2.  Conda
 74 | 3.  Python
 75 | 4.  Clojure
 76 | 
 77 | 
 78 | The most advanced piece of the demo is actually the facial detection component.
 79 | Luckily, it was nicely wrapped.  To get it working we needed cython working and
 80 | there is some [good information](src/facial_rec/detect.clj) there if you want to
 81 | use a system that is based partially on cython.
 82 | 
 83 | 
 84 | ### Install Docker
 85 | 
 86 | 
 87 | Installing docker is system specific but on all systems you want to install it such
 88 | that you can run it 
 89 | [without sudo](https://docs.docker.com/install/linux/linux-postinstall/).
 90 | 
 91 | 
 92 | ### Get the data
 93 | 
 94 | This script mainly downloads the models used for detection and feature embedding.
 95 | 
 96 | ```console
 97 | scripts/get-data
 98 | ```
 99 | 
100 | 
101 | ### Start up a REPL
102 | 
103 | 
104 | ```console
105 | scripts/run-conda-docker
106 | ```
107 | 
108 | The port is printed out in a line like:
109 | 
110 | ```console
111 | nREPL server started on port 44507 on host localhost - nrepl://localhost:44507
112 | ```
113 | 
114 | Now in emacs, vim or somewhere connect to the exposed port on localhost.
115 | 
116 | 
117 | ### Find/Annotate Faces
118 | 
119 | 
120 | ```clojure
121 | (require '[facial-rec.demo :as demo])
122 | ;;long pause as things compile
123 | ```
124 | 
125 | At this point, we have to say that the system is dynamically compiling cython and
126 | upgrading the networks to the newest version of mxnet.  This is a noisy process
127 | for a few reasons; we are loading a newer numpy, compiling files and loading networks.
128 | You will see warnings in the repl and your stdout of your docker will display
129 | some errors regarding compiling gpu non maximal suppression (nms) algorithms:
130 | ```console
131 | In file included from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/ndarraytypes.h:1832:0,
132 |                  from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
133 | 				                  from /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/arrayobject.h:4,
134 | 								                   from /home/chrisn/.pyxbld/temp.linux-x86_64-3.6/pyrex/rcnn/cython/gpu_nms.c:598:
135 | 												   /home/chrisn/.conda/envs/pyclj/lib/python3.6/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]
136 | 												    #warning "Using deprecated NumPy API, disable it with " \
137 | 													  ^~~~~~~
138 | 													  /home/chrisn/.pyxbld/temp.linux-x86_64-3.6/pyrex/rcnn/cython/gpu_nms.c:600:10: fatal error: gpu_nms.hpp: No such file or directory
139 | 													   #include "gpu_nms.hpp"
140 | 													             ^~~~~~~~~~~~~
141 | ```
142 | 
143 | Interestingly enough, the system still works fine.  The nms errors are around building
144 | the gpu version of the nms algorithms and we aren't using the gpu for this demo.
145 | [Nothing to see here](https://media.giphy.com/media/joV1k1sNOT5xC/giphy.gif)
146 | 
147 | 
148 | ```clojure
149 | (def faces (find-annotate-faces!))
150 | ;;...pause...
151 | #'faces
152 | ```
153 | 
154 | Now there are cutout faces in the faces subdir.  You can do nearest searches in the
155 | demo namespace and see how well this network does.
156 | 
157 | 
158 | ```clojure
159 | (output-face-results! faces)
160 | ```
161 | 
162 | This takes each face, find the 5 nearest, and outputs the results to `results.md`.
163 | We use [`grip`](https://github.com/joeyespo/grip) to view the markdown files locally.
164 | 
165 | 
166 | Going further, any non-directory files in the dataset directory will be scanned and
167 | added to the dataset so feel free to try it with your friends and family and see
168 | how good the results are.
169 | 
170 | 
171 | ## License
172 | 
173 | Copyright © 2019 Chris Nuernberger
174 | 
175 | This program and the accompanying materials are made available under the
176 | terms of the Eclipse Public License 2.0 which is available at
177 | http://www.eclipse.org/legal/epl-2.0.
178 | 


--------------------------------------------------------------------------------
/rcnn/dataset/retinaface.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | try:
  3 |     import cPickle as pickle
  4 | except ImportError:
  5 |     import pickle
  6 | import cv2
  7 | import os
  8 | import numpy as np
  9 | import json
 10 | #from PIL import Image
 11 | 
 12 | from ..logger import logger
 13 | from .imdb import IMDB
 14 | from .ds_utils import unique_boxes, filter_small_boxes
 15 | from ..config import config
 16 | 
 17 | class retinaface(IMDB):
 18 |     def __init__(self, image_set, root_path, data_path):
 19 |         super(retinaface, self).__init__('retinaface', image_set, root_path, data_path)
 20 |         #assert image_set=='train'
 21 | 
 22 |         split = image_set
 23 |         self._split = image_set
 24 |         self._image_set = image_set
 25 | 
 26 | 
 27 |         self.root_path = root_path
 28 |         self.data_path = data_path
 29 | 
 30 | 
 31 |         self._dataset_path = self.data_path
 32 |         self._imgs_path = os.path.join(self._dataset_path, image_set, 'images')
 33 |         self._fp_bbox_map = {}
 34 |         label_file = os.path.join(self._dataset_path, image_set, 'label.txt')
 35 |         name = None
 36 |         for line in open(label_file, 'r'):
 37 |           line = line.strip()
 38 |           if line.startswith('#'):
 39 |             name = line[1:].strip()
 40 |             self._fp_bbox_map[name] = []
 41 |             continue
 42 |           assert name is not None
 43 |           assert name in self._fp_bbox_map
 44 |           self._fp_bbox_map[name].append(line)
 45 |         print('origin image size', len(self._fp_bbox_map))
 46 | 
 47 |         #self.num_images = len(self._image_paths)
 48 |         #self._image_index = range(len(self._image_paths))
 49 |         self.classes = ['bg', 'face']
 50 |         self.num_classes = len(self.classes)
 51 | 
 52 | 
 53 |     def gt_roidb(self):
 54 |         cache_file = os.path.join(self.cache_path, '{}_{}_gt_roidb.pkl'.format(self.name, self._split))
 55 |         if os.path.exists(cache_file):
 56 |             with open(cache_file, 'rb') as fid:
 57 |                 roidb = pickle.load(fid)
 58 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
 59 |             self.num_images = len(roidb)
 60 |             return roidb
 61 | 
 62 |         roidb = []
 63 |         max_num_boxes = 0
 64 |         nonattr_box_num = 0
 65 |         landmark_num = 0
 66 | 
 67 |         pp = 0
 68 |         for fp in self._fp_bbox_map:
 69 |             pp += 1
 70 |             if pp%1000==0:
 71 |               print('loading', pp)
 72 |             if self._split=='test':
 73 |               image_path = os.path.join(self._imgs_path, fp)
 74 |               roi = {'image': image_path}
 75 |               roidb.append(roi)
 76 |               continue
 77 |             boxes = np.zeros([len(self._fp_bbox_map[fp]), 4], np.float)
 78 |             landmarks = np.zeros([len(self._fp_bbox_map[fp]), 5, 3], np.float)
 79 |             blur = np.zeros((len(self._fp_bbox_map[fp]),), np.float)
 80 |             boxes_mask = []
 81 | 
 82 |             gt_classes = np.ones([len(self._fp_bbox_map[fp])], np.int32)
 83 |             overlaps = np.zeros([len(self._fp_bbox_map[fp]), 2], np.float)
 84 | 
 85 |             imsize = cv2.imread(os.path.join(self._imgs_path, fp)).shape[0:2][::-1]
 86 |             ix = 0
 87 | 
 88 |             for aline in self._fp_bbox_map[fp]:
 89 |                 #imsize = Image.open(os.path.join(self._imgs_path, fp)).size
 90 |                 values = [float(x) for x in aline.strip().split()]
 91 |                 bbox = [values[0], values[1], values[0]+values[2], values[1]+values[3]]
 92 | 
 93 |                 x1 = bbox[0]
 94 |                 y1 = bbox[1]
 95 |                 x2 = min(imsize[0], bbox[2])
 96 |                 y2 = min(imsize[1], bbox[3])
 97 |                 if x1>=x2 or y1>=y2:
 98 |                   continue
 99 | 
100 |                 if config.BBOX_MASK_THRESH>0:
101 |                   if (x2 - x1) < config.BBOX_MASK_THRESH or y2 - y1 < config.BBOX_MASK_THRESH:
102 |                     boxes_mask.append(np.array([x1, y1, x2, y2], np.float))
103 |                     continue
104 |                 if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or y2 - y1 < config.TRAIN.MIN_BOX_SIZE:
105 |                     continue
106 | 
107 |                 boxes[ix, :] = np.array([x1, y1, x2, y2], np.float)
108 |                 if self._split=='train':
109 |                   landmark = np.array( values[4:19], dtype=np.float32 ).reshape((5,3))
110 |                   for li in range(5):
111 |                     #print(landmark)
112 |                     if landmark[li][0]==-1. and landmark[li][1]==-1.: #missing landmark
113 |                       assert landmark[li][2]==-1
114 |                     else:
115 |                       assert landmark[li][2]>=0
116 |                       if li==0:
117 |                         landmark_num+=1
118 |                       if landmark[li][2]==0.0:#visible
119 |                         landmark[li][2] = 1.0
120 |                       else:
121 |                         landmark[li][2] = 0.0
122 | 
123 |                   landmarks[ix] = landmark
124 | 
125 |                   blur[ix] = values[19]
126 |                   #print(aline, blur[ix])
127 |                   if blur[ix]<0.0:
128 |                     blur[ix] = 0.3
129 |                     nonattr_box_num+=1
130 | 
131 |                 cls = int(1)
132 |                 gt_classes[ix] = cls
133 |                 overlaps[ix, cls] = 1.0
134 |                 ix += 1
135 |             max_num_boxes = max(max_num_boxes, ix)
136 |             #overlaps = scipy.sparse.csr_matrix(overlaps)
137 |             if self._split=='train' and ix==0:
138 |               continue
139 |             boxes = boxes[:ix,:]
140 |             landmarks = landmarks[:ix,:,:]
141 |             blur = blur[:ix]
142 |             gt_classes = gt_classes[:ix]
143 |             overlaps = overlaps[:ix,:]
144 |             image_path = os.path.join(self._imgs_path, fp)
145 |             with open(image_path, 'rb') as fin:
146 |                 stream = fin.read()
147 |             stream = np.fromstring(stream, dtype=np.uint8)
148 | 
149 |             roi = {
150 |               'image': image_path,
151 |               'stream': stream,
152 |               'height': imsize[1],
153 |               'width': imsize[0],
154 |               'boxes': boxes,
155 |               'landmarks': landmarks,
156 |               'blur': blur,
157 |               'gt_classes': gt_classes,
158 |               'gt_overlaps': overlaps,
159 |               'max_classes': overlaps.argmax(axis=1),
160 |               'max_overlaps': overlaps.max(axis=1),
161 |               'flipped': False,
162 |             }
163 |             if len(boxes_mask)>0:
164 |               boxes_mask = np.array(boxes_mask)
165 |               roi['boxes_mask'] = boxes_mask
166 |             roidb.append(roi)
167 |         for roi in roidb:
168 |           roi['max_num_boxes'] = max_num_boxes
169 |         self.num_images = len(roidb)
170 |         print('roidb size', len(roidb))
171 |         print('non attr box num', nonattr_box_num)
172 |         print('landmark num', landmark_num)
173 |         with open(cache_file, 'wb') as fid:
174 |             pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL)
175 |         print('wrote gt roidb to {}'.format(cache_file))
176 | 
177 |         return roidb
178 | 
179 |     def write_detections(self, all_boxes, output_dir='./output/'):
180 |       pass
181 | 
182 |         
183 |     def evaluate_detections(self, all_boxes, output_dir='./output/',method_name='insightdetection'):
184 |       pass
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/rcnn/PY_OP/rpn_fpn_ohem3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function
  3 | import sys
  4 | import mxnet as mx
  5 | import numpy as np
  6 | from distutils.util import strtobool
  7 | from ..config import config, generate_config
  8 | 
  9 | 
 10 | STAT = {0:0}
 11 | STEP = 28800
 12 | 
 13 | class RPNFPNOHEM3Operator(mx.operator.CustomOp):
 14 |     def __init__(self, stride=0, network='', dataset='', prefix=''):
 15 |         super(RPNFPNOHEM3Operator, self).__init__()
 16 |         self.stride = int(stride)
 17 |         self.prefix = prefix
 18 |         generate_config(network, dataset)
 19 |         self.mode = config.TRAIN.OHEM_MODE #0 for random 10:245, 1 for 10:246, 2 for 10:30, mode 1 for default
 20 |         global STAT
 21 |         for k in config.RPN_FEAT_STRIDE:
 22 |           STAT[k] = [0,0,0]
 23 | 
 24 |     def forward(self, is_train, req, in_data, out_data, aux):
 25 |         global STAT
 26 | 
 27 |         cls_score    = in_data[0].asnumpy() #BS, 2, ANCHORS
 28 |         labels_raw       = in_data[1].asnumpy() # BS, ANCHORS
 29 | 
 30 |         A = config.NUM_ANCHORS
 31 |         anchor_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1],1), dtype=np.float32 )
 32 |         valid_count = np.zeros( (labels_raw.shape[0],1), dtype=np.float32 )
 33 |         #print('anchor_weight', anchor_weight.shape)
 34 | 
 35 |         #assert labels.shape[0]==1
 36 |         #assert cls_score.shape[0]==1
 37 |         #assert bbox_weight.shape[0]==1
 38 |         #print('shape', cls_score.shape, labels.shape, file=sys.stderr)
 39 |         #print('bbox_weight 0', bbox_weight.shape, file=sys.stderr)
 40 |         #bbox_weight = np.zeros( (labels_raw.shape[0], labels_raw.shape[1], 4), dtype=np.float32)
 41 |         _stat = [0,0,0]
 42 |         for ibatch in range(labels_raw.shape[0]):
 43 |           _anchor_weight = np.zeros( (labels_raw.shape[1],1), dtype=np.float32)
 44 |           labels = labels_raw[ibatch]
 45 |           fg_score = cls_score[ibatch,1,:] - cls_score[ibatch,0,:]
 46 | 
 47 | 
 48 | 
 49 |           fg_inds = np.where(labels>0)[0]
 50 |           num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE)
 51 |           origin_num_fg = len(fg_inds)
 52 |           #print(len(fg_inds), num_fg, file=sys.stderr)
 53 |           if len(fg_inds) > num_fg:
 54 |             if self.mode==0:
 55 |               disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 56 |               labels[disable_inds] = -1
 57 |             else:
 58 |               pos_ohem_scores = fg_score[fg_inds]
 59 |               order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()
 60 |               sampled_inds = fg_inds[order_pos_ohem_scores[:num_fg]]
 61 |               labels[fg_inds] = -1
 62 |               labels[sampled_inds] = 1
 63 | 
 64 |           n_fg = np.sum(labels>0)
 65 |           fg_inds = np.where(labels>0)[0]
 66 |           num_bg = config.TRAIN.RPN_BATCH_SIZE - n_fg
 67 |           if self.mode==2:
 68 |             num_bg = max(48, n_fg*int(1.0/config.TRAIN.RPN_FG_FRACTION-1))
 69 | 
 70 |           bg_inds = np.where(labels == 0)[0]
 71 |           origin_num_bg = len(bg_inds)
 72 |           if num_bg==0:
 73 |             labels[bg_inds] = -1
 74 |           elif len(bg_inds) > num_bg:
 75 |             # sort ohem scores
 76 | 
 77 |             if self.mode==0:
 78 |               disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 79 |               labels[disable_inds] = -1
 80 |             else:
 81 |               neg_ohem_scores = fg_score[bg_inds]
 82 |               order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1]
 83 |               sampled_inds = bg_inds[order_neg_ohem_scores[:num_bg]]
 84 |               #print('sampled_inds_bg', sampled_inds, file=sys.stderr)
 85 |               labels[bg_inds] = -1
 86 |               labels[sampled_inds] = 0
 87 | 
 88 |           if n_fg>0:
 89 |             order0_labels = labels.reshape( (1, A, -1) ).transpose( (0, 2, 1) ).reshape( (-1,) )
 90 |             bbox_fg_inds = np.where(order0_labels>0)[0]
 91 |             #print('bbox_fg_inds, order0 ', bbox_fg_inds, file=sys.stderr)
 92 |             _anchor_weight[bbox_fg_inds,:] = 1.0
 93 |           anchor_weight[ibatch] = _anchor_weight
 94 |           valid_count[ibatch][0] = n_fg
 95 | 
 96 |           #if self.prefix=='face':
 97 |           #  #print('fg-bg', self.stride, n_fg, num_bg)
 98 |           #  STAT[0]+=1
 99 |           #  STAT[self.stride][0] += config.TRAIN.RPN_BATCH_SIZE
100 |           #  STAT[self.stride][1] += n_fg
101 |           #  STAT[self.stride][2] += np.sum(fg_score[fg_inds]>=0)
102 |           #  #_stat[0] += config.TRAIN.RPN_BATCH_SIZE
103 |           #  #_stat[1] += n_fg
104 |           #  #_stat[2] += np.sum(fg_score[fg_inds]>=0)
105 |           #  #print('stride num_fg', self.stride, n_fg, file=sys.stderr)
106 |           #  #ACC[self.stride] += np.sum(fg_score[fg_inds]>=0)
107 |           #  #x = float(labels_raw.shape[0]*len(config.RPN_FEAT_STRIDE))
108 |           #  x = 1.0
109 |           #  if STAT[0]%STEP==0:
110 |           #    _str = ['STAT']
111 |           #    STAT[0] = 0
112 |           #    for k in config.RPN_FEAT_STRIDE:
113 |           #      acc = float(STAT[k][2])/STAT[k][1]
114 |           #      acc0 = float(STAT[k][1])/STAT[k][0]
115 |           #      #_str.append("%d: all-fg(%d, %d, %.4f), fg-fgcorrect(%d, %d, %.4f)"%(k,STAT[k][0], STAT[k][1], acc0, STAT[k][1], STAT[k][2], acc))
116 |           #      _str.append("%d: (%d, %d, %.4f)"%(k, STAT[k][1], STAT[k][2], acc))
117 |           #      STAT[k] = [0,0,0]
118 |           #    _str = ' | '.join(_str)
119 |           #    print(_str, file=sys.stderr)
120 |           #if self.stride==4 and num_fg>0:
121 |           #  print('_stat_', self.stride, num_fg, num_bg, file=sys.stderr)
122 | 
123 |         #labels_ohem = mx.nd.array(labels_raw)
124 |         #anchor_weight = mx.nd.array(anchor_weight)
125 |         #print('valid_count', self.stride, np.sum(valid_count))
126 |         #print('_stat', _stat, valid_count)
127 | 
128 |         for ind, val in enumerate([labels_raw, anchor_weight, valid_count]):
129 |             val = mx.nd.array(val)
130 |             self.assign(out_data[ind], req[ind], val)
131 | 
132 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
133 |         for i in range(len(in_grad)):
134 |             self.assign(in_grad[i], req[i], 0)
135 | 
136 | 
137 | @mx.operator.register('rpn_fpn_ohem3')
138 | class RPNFPNOHEM3Prop(mx.operator.CustomOpProp):
139 |     def __init__(self, stride=0, network='', dataset='', prefix=''):
140 |         super(RPNFPNOHEM3Prop, self).__init__(need_top_grad=False)
141 |         self.stride = stride
142 |         self.network=network
143 |         self.dataset=dataset
144 |         self.prefix = prefix
145 | 
146 |     def list_arguments(self):
147 |         return ['cls_score', 'labels']
148 | 
149 |     def list_outputs(self):
150 |         return ['labels_ohem', 'anchor_weight', 'valid_count']
151 | 
152 |     def infer_shape(self, in_shape):
153 |         labels_shape = in_shape[1]
154 |         #print('in_rpn_ohem', in_shape[0], in_shape[1], in_shape[2], file=sys.stderr)
155 |         anchor_weight_shape = [labels_shape[0], labels_shape[1], 1]
156 |         #print('in_rpn_ohem', labels_shape, anchor_weight_shape)
157 | 
158 |         return in_shape, \
159 |                [labels_shape, anchor_weight_shape, [labels_shape[0], 1]]
160 | 
161 |     def create_operator(self, ctx, shapes, dtypes):
162 |         return RPNFPNOHEM3Operator(self.stride, self.network, self.dataset, self.prefix)
163 | 
164 |     def declare_backward_dependency(self, out_grad, in_data, out_data):
165 |         return []
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/rcnn/processing/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from rcnn.cython.bbox import bbox_overlaps_cython
  3 | #from rcnn.config import config
  4 | 
  5 | 
  6 | def bbox_overlaps(boxes, query_boxes):
  7 |     return bbox_overlaps_cython(boxes, query_boxes)
  8 | 
  9 | 
 10 | def bbox_overlaps_py(boxes, query_boxes):
 11 |     """
 12 |     determine overlaps between boxes and query_boxes
 13 |     :param boxes: n * 4 bounding boxes
 14 |     :param query_boxes: k * 4 bounding boxes
 15 |     :return: overlaps: n * k overlaps
 16 |     """
 17 |     n_ = boxes.shape[0]
 18 |     k_ = query_boxes.shape[0]
 19 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 20 |     for k in range(k_):
 21 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 22 |         for n in range(n_):
 23 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1
 24 |             if iw > 0:
 25 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1
 26 |                 if ih > 0:
 27 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1)
 28 |                     all_area = float(box_area + query_box_area - iw * ih)
 29 |                     overlaps[n, k] = iw * ih / all_area
 30 |     return overlaps
 31 | 
 32 | 
 33 | def clip_boxes(boxes, im_shape):
 34 |     """
 35 |     Clip boxes to image boundaries.
 36 |     :param boxes: [N, 4* num_classes]
 37 |     :param im_shape: tuple of 2
 38 |     :return: [N, 4* num_classes]
 39 |     """
 40 |     # x1 >= 0
 41 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 42 |     # y1 >= 0
 43 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 44 |     # x2 < im_shape[1]
 45 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 46 |     # y2 < im_shape[0]
 47 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 48 |     return boxes
 49 | 
 50 | 
 51 | def nonlinear_transform(ex_rois, gt_rois):
 52 |     """
 53 |     compute bounding box regression targets from ex_rois to gt_rois
 54 |     :param ex_rois: [N, 4]
 55 |     :param gt_rois: [N, 4]
 56 |     :return: [N, 4]
 57 |     """
 58 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 59 | 
 60 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 61 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 62 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 63 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 64 | 
 65 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 66 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 67 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 68 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 69 | 
 70 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 71 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 72 |     targets_dw = np.log(gt_widths / ex_widths)
 73 |     targets_dh = np.log(gt_heights / ex_heights)
 74 | 
 75 |     if gt_rois.shape[1]<=4:
 76 |       targets = np.vstack(
 77 |           (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 78 |       return targets
 79 |     else:
 80 |       targets = [targets_dx, targets_dy, targets_dw, targets_dh]
 81 |       #if config.USE_BLUR:
 82 |       #  for i in range(4, gt_rois.shape[1]):
 83 |       #    t = gt_rois[:,i]
 84 |       #    targets.append(t)
 85 |       targets = np.vstack(targets).transpose()
 86 |       return targets
 87 | 
 88 | def landmark_transform(ex_rois, gt_rois):
 89 | 
 90 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 91 | 
 92 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 93 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 94 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 95 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 96 | 
 97 | 
 98 |     targets = []
 99 |     for i in range(gt_rois.shape[1]):
100 |       for j in range(gt_rois.shape[2]):
101 |         #if not config.USE_OCCLUSION and j==2:
102 |         #  continue
103 |         if j==2:
104 |           continue
105 |         if j==0: #w
106 |           target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14)
107 |         elif j==1: #h
108 |           target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14)
109 |         else: #visibile
110 |           target = gt_rois[:,i,j]
111 |         targets.append(target)
112 | 
113 | 
114 |     targets = np.vstack(targets).transpose()
115 |     return targets
116 | 
117 | 
118 | def nonlinear_pred(boxes, box_deltas):
119 |     """
120 |     Transform the set of class-agnostic boxes into class-specific boxes
121 |     by applying the predicted offsets (box_deltas)
122 |     :param boxes: !important [N 4]
123 |     :param box_deltas: [N, 4 * num_classes]
124 |     :return: [N 4 * num_classes]
125 |     """
126 |     if boxes.shape[0] == 0:
127 |         return np.zeros((0, box_deltas.shape[1]))
128 | 
129 |     boxes = boxes.astype(np.float, copy=False)
130 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
131 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
132 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
133 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
134 | 
135 |     dx = box_deltas[:, 0::4]
136 |     dy = box_deltas[:, 1::4]
137 |     dw = box_deltas[:, 2::4]
138 |     dh = box_deltas[:, 3::4]
139 | 
140 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
141 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
142 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
143 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
144 | 
145 |     pred_boxes = np.zeros(box_deltas.shape)
146 |     # x1
147 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
148 |     # y1
149 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
150 |     # x2
151 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
152 |     # y2
153 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
154 | 
155 |     return pred_boxes
156 | 
157 | def landmark_pred(boxes, landmark_deltas):
158 |     if boxes.shape[0] == 0:
159 |         return np.zeros((0, landmark_deltas.shape[1]))
160 |     boxes = boxes.astype(np.float, copy=False)
161 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
162 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
163 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
164 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
165 |     preds = []
166 |     for i in range(landmark_deltas.shape[1]):
167 |       if i%2==0:
168 |         pred = (landmark_deltas[:,i]*widths + ctr_x)
169 |       else:
170 |         pred = (landmark_deltas[:,i]*heights + ctr_y)
171 |       preds.append(pred)
172 |     preds = np.vstack(preds).transpose()
173 |     return preds
174 | 
175 | def iou_transform(ex_rois, gt_rois):
176 |     """ return bbox targets, IoU loss uses gt_rois as gt """
177 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
178 |     return gt_rois
179 | 
180 | 
181 | def iou_pred(boxes, box_deltas):
182 |     """
183 |     Transform the set of class-agnostic boxes into class-specific boxes
184 |     by applying the predicted offsets (box_deltas)
185 |     :param boxes: !important [N 4]
186 |     :param box_deltas: [N, 4 * num_classes]
187 |     :return: [N 4 * num_classes]
188 |     """
189 |     if boxes.shape[0] == 0:
190 |         return np.zeros((0, box_deltas.shape[1]))
191 | 
192 |     boxes = boxes.astype(np.float, copy=False)
193 |     x1 = boxes[:, 0]
194 |     y1 = boxes[:, 1]
195 |     x2 = boxes[:, 2]
196 |     y2 = boxes[:, 3]
197 | 
198 |     dx1 = box_deltas[:, 0::4]
199 |     dy1 = box_deltas[:, 1::4]
200 |     dx2 = box_deltas[:, 2::4]
201 |     dy2 = box_deltas[:, 3::4]
202 | 
203 |     pred_boxes = np.zeros(box_deltas.shape)
204 |     # x1
205 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
206 |     # y1
207 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
208 |     # x2
209 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
210 |     # y2
211 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
212 | 
213 |     return pred_boxes
214 | 
215 | 
216 | # define bbox_transform and bbox_pred
217 | bbox_transform = nonlinear_transform
218 | bbox_pred = nonlinear_pred
219 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
142 |     }
143 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
144 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
145 |   }
146 | }
147 | 
148 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
149 |   siz i; for( i=0; i<n; i++ ) {
150 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
151 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
152 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
153 |     rleFrPoly( R+i, xy, 4, h, w );
154 |   }
155 | }
156 | 
157 | int uintCompare(const void *a, const void *b) {
158 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
159 | }
160 | 
161 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
162 |   /* upsample and get discrete points densely along entire boundary */
163 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
164 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
165 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
166 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
167 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
168 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
169 |   for( j=0; j<k; j++ ) {
170 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
171 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
172 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
173 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
174 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
175 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
176 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
177 |     } else for( d=0; d<=dy; d++ ) {
178 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
179 |     }
180 |   }
181 |   /* get points along y-boundary and downsample */
182 |   free(x); free(y); k=m; m=0; double xd, yd;
183 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
184 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
185 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
186 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
187 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
188 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
189 |     x[m]=(int) xd; y[m]=(int) yd; m++;
190 |   }
191 |   /* compute rle encoding given y-boundary points */
192 |   k=m; a=malloc(sizeof(uint)*(k+1));
193 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
194 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
195 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
196 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
197 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
198 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
199 |     j++; if(j<k) b[m-1]+=a[j++]; }
200 |   rleInit(R,h,w,m,b); free(a); free(b);
201 | }
202 | 
203 | char* rleToString( const RLE *R ) {
204 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
205 |   siz i, m=R->m, p=0; long x; int more;
206 |   char *s=malloc(sizeof(char)*m*6);
207 |   for( i=0; i<m; i++ ) {
208 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
209 |     while( more ) {
210 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
211 |       if(more) c |= 0x20; c+=48; s[p++]=c;
212 |     }
213 |   }
214 |   s[p]=0; return s;
215 | }
216 | 
217 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
218 |   siz m=0, p=0, k; long x; int more; uint *cnts;
219 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
220 |   while( s[p] ) {
221 |     x=0; k=0; more=1;
222 |     while( more ) {
223 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
224 |       more = c & 0x20; p++; k++;
225 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
226 |     }
227 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
228 |   }
229 |   rleInit(R,h,w,m,cnts); free(cnts);
230 | }
231 | 


--------------------------------------------------------------------------------
/rcnn/core/module.py:
--------------------------------------------------------------------------------
  1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape
  2 | varying with training iterations. If shapes vary, executors will rebind,
  3 | using shared arrays from the initial module binded with maximum shape.
  4 | """
  5 | 
  6 | import logging
  7 | 
  8 | from mxnet import context as ctx
  9 | from mxnet.initializer import Uniform
 10 | from mxnet.module.base_module import BaseModule
 11 | from mxnet.module.module import Module
 12 | 
 13 | class MutableModule(BaseModule):
 14 |     """A mutable module is a module that supports variable input data.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     symbol : Symbol
 19 |     data_names : list of str
 20 |     label_names : list of str
 21 |     logger : Logger
 22 |     context : Context or list of Context
 23 |     work_load_list : list of number
 24 |     max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 25 |     max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 26 |     fixed_param_prefix : list of str, indicating fixed parameters
 27 |     """
 28 |     def __init__(self, symbol, data_names, label_names,
 29 |                  logger=logging, context=ctx.cpu(), work_load_list=None,
 30 |                  max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None):
 31 |         super(MutableModule, self).__init__(logger=logger)
 32 |         self._symbol = symbol
 33 |         self._data_names = data_names
 34 |         self._label_names = label_names
 35 |         self._context = context
 36 |         self._work_load_list = work_load_list
 37 | 
 38 |         self._curr_module = None
 39 |         self._max_data_shapes = max_data_shapes
 40 |         self._max_label_shapes = max_label_shapes
 41 |         self._fixed_param_prefix = fixed_param_prefix
 42 | 
 43 |         fixed_param_names = list()
 44 |         if fixed_param_prefix is not None:
 45 |             for name in self._symbol.list_arguments():
 46 |                 for prefix in self._fixed_param_prefix:
 47 |                     if prefix in name:
 48 |                         fixed_param_names.append(name)
 49 |         self._fixed_param_names = fixed_param_names
 50 | 
 51 |     def _reset_bind(self):
 52 |         self.binded = False
 53 |         self._curr_module = None
 54 | 
 55 |     @property
 56 |     def data_names(self):
 57 |         return self._data_names
 58 | 
 59 |     @property
 60 |     def output_names(self):
 61 |         return self._symbol.list_outputs()
 62 | 
 63 |     @property
 64 |     def data_shapes(self):
 65 |         assert self.binded
 66 |         return self._curr_module.data_shapes
 67 | 
 68 |     @property
 69 |     def label_shapes(self):
 70 |         assert self.binded
 71 |         return self._curr_module.label_shapes
 72 | 
 73 |     @property
 74 |     def output_shapes(self):
 75 |         assert self.binded
 76 |         return self._curr_module.output_shapes
 77 | 
 78 |     def get_params(self):
 79 |         assert self.binded and self.params_initialized
 80 |         return self._curr_module.get_params()
 81 | 
 82 |     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
 83 |                     allow_missing=False, force_init=False, allow_extra=False):
 84 |         if self.params_initialized and not force_init:
 85 |             return
 86 |         assert self.binded, 'call bind before initializing the parameters'
 87 |         self._curr_module.init_params(initializer=initializer, arg_params=arg_params,
 88 |                                       aux_params=aux_params, allow_missing=allow_missing,
 89 |                                       force_init=force_init, allow_extra=allow_extra)
 90 |         self.params_initialized = True
 91 | 
 92 |     def bind(self, data_shapes, label_shapes=None, for_training=True,
 93 |              inputs_need_grad=False, force_rebind=False, shared_module=None):
 94 |         # in case we already initialized params, keep it
 95 |         if self.params_initialized:
 96 |             arg_params, aux_params = self.get_params()
 97 | 
 98 |         # force rebinding is typically used when one want to switch from
 99 |         # training to prediction phase.
100 |         if force_rebind:
101 |             self._reset_bind()
102 | 
103 |         if self.binded:
104 |             self.logger.warning('Already binded, ignoring bind()')
105 |             return
106 | 
107 |         assert shared_module is None, 'shared_module for MutableModule is not supported'
108 | 
109 |         self.for_training = for_training
110 |         self.inputs_need_grad = inputs_need_grad
111 |         self.binded = True
112 | 
113 |         max_shapes_dict = dict()
114 |         if self._max_data_shapes is not None:
115 |             max_shapes_dict.update(dict(self._max_data_shapes))
116 |         if self._max_label_shapes is not None:
117 |             max_shapes_dict.update(dict(self._max_label_shapes))
118 | 
119 |         max_data_shapes = list()
120 |         for name, shape in data_shapes:
121 |             if name in max_shapes_dict:
122 |                 max_data_shapes.append((name, max_shapes_dict[name]))
123 |             else:
124 |                 max_data_shapes.append((name, shape))
125 | 
126 |         max_label_shapes = list()
127 |         if label_shapes is not None:
128 |             for name, shape in label_shapes:
129 |                 if name in max_shapes_dict:
130 |                     max_label_shapes.append((name, max_shapes_dict[name]))
131 |                 else:
132 |                     max_label_shapes.append((name, shape))
133 | 
134 |         if len(max_label_shapes) == 0:
135 |             max_label_shapes = None
136 | 
137 |         module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger,
138 |                         context=self._context, work_load_list=self._work_load_list,
139 |                         fixed_param_names=self._fixed_param_names)
140 |         module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad,
141 |                     force_rebind=False, shared_module=None)
142 |         self._curr_module = module
143 | 
144 |         # copy back saved params, if already initialized
145 |         if self.params_initialized:
146 |             self.set_params(arg_params, aux_params)
147 | 
148 |     def init_optimizer(self, kvstore='local', optimizer='sgd',
149 |                        optimizer_params=(('learning_rate', 0.01),), force_init=False):
150 |         assert self.binded and self.params_initialized
151 |         if self.optimizer_initialized and not force_init:
152 |             self.logger.warning('optimizer already initialized, ignoring.')
153 |             return
154 | 
155 |         self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
156 |                                          force_init=force_init)
157 |         self.optimizer_initialized = True
158 | 
159 |     def forward(self, data_batch, is_train=None):
160 |         assert self.binded and self.params_initialized
161 | 
162 |         # get current_shapes
163 |         if self._curr_module.label_shapes is not None:
164 |             current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes)
165 |         else:
166 |             current_shapes = dict(self._curr_module.data_shapes)
167 | 
168 |         # get input_shapes
169 |         if data_batch.provide_label is not None:
170 |             input_shapes = dict(data_batch.provide_data + data_batch.provide_label)
171 |         else:
172 |             input_shapes = dict(data_batch.provide_data)
173 | 
174 |         # decide if shape changed
175 |         shape_changed = False
176 |         for k, v in current_shapes.items():
177 |             if v != input_shapes[k]:
178 |                 shape_changed = True
179 | 
180 |         if shape_changed:
181 |             module = Module(self._symbol, self._data_names, self._label_names,
182 |                             logger=self.logger, context=self._context,
183 |                             work_load_list=self._work_load_list,
184 |                             fixed_param_names=self._fixed_param_names)
185 |             module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training,
186 |                         self._curr_module.inputs_need_grad, force_rebind=False,
187 |                         shared_module=self._curr_module)
188 |             self._curr_module = module
189 | 
190 |         self._curr_module.forward(data_batch, is_train=is_train)
191 | 
192 |     def backward(self, out_grads=None):
193 |         assert self.binded and self.params_initialized
194 |         self._curr_module.backward(out_grads=out_grads)
195 | 
196 |     def update(self):
197 |         assert self.binded and self.params_initialized and self.optimizer_initialized
198 |         self._curr_module.update()
199 | 
200 |     def get_outputs(self, merge_multi_context=True):
201 |         assert self.binded and self.params_initialized
202 |         return self._curr_module.get_outputs(merge_multi_context=merge_multi_context)
203 | 
204 |     def get_input_grads(self, merge_multi_context=True):
205 |         assert self.binded and self.params_initialized and self.inputs_need_grad
206 |         return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context)
207 | 
208 |     def update_metric(self, eval_metric, labels):
209 |         assert self.binded and self.params_initialized
210 |         self._curr_module.update_metric(eval_metric, labels)
211 | 
212 |     def install_monitor(self, mon):
213 |         """ Install monitor on all executors """
214 |         assert self.binded
215 |         self._curr_module.install_monitor(mon)
216 | 


--------------------------------------------------------------------------------
/rcnn/core/module_bak.py:
--------------------------------------------------------------------------------
  1 | """A `MutableModule` implement the `BaseModule` API, and allows input shape
  2 | varying with training iterations. If shapes vary, executors will rebind,
  3 | using shared arrays from the initial module binded with maximum shape.
  4 | """
  5 | 
  6 | import logging
  7 | 
  8 | from mxnet import context as ctx
  9 | from mxnet.initializer import Uniform
 10 | from mxnet.module.base_module import BaseModule
 11 | from mxnet.module.module import Module
 12 | 
 13 | class MutableModule(BaseModule):
 14 |     """A mutable module is a module that supports variable input data.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     symbol : Symbol
 19 |     data_names : list of str
 20 |     label_names : list of str
 21 |     logger : Logger
 22 |     context : Context or list of Context
 23 |     work_load_list : list of number
 24 |     max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 25 |     max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary
 26 |     fixed_param_prefix : list of str, indicating fixed parameters
 27 |     """
 28 |     def __init__(self, symbol, data_names, label_names,
 29 |                  logger=logging, context=ctx.cpu(), work_load_list=None,
 30 |                  max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None):
 31 |         super(MutableModule, self).__init__(logger=logger)
 32 |         self._symbol = symbol
 33 |         self._data_names = data_names
 34 |         self._label_names = label_names
 35 |         self._context = context
 36 |         self._work_load_list = work_load_list
 37 | 
 38 |         self._curr_module = None
 39 |         self._max_data_shapes = max_data_shapes
 40 |         self._max_label_shapes = max_label_shapes
 41 |         self._fixed_param_prefix = fixed_param_prefix
 42 | 
 43 |         fixed_param_names = list()
 44 |         if fixed_param_prefix is not None:
 45 |             for name in self._symbol.list_arguments():
 46 |                 for prefix in self._fixed_param_prefix:
 47 |                     if prefix in name:
 48 |                         fixed_param_names.append(name)
 49 |         self._fixed_param_names = fixed_param_names
 50 | 
 51 |     def _reset_bind(self):
 52 |         self.binded = False
 53 |         self._curr_module = None
 54 | 
 55 |     @property
 56 |     def data_names(self):
 57 |         return self._data_names
 58 | 
 59 |     @property
 60 |     def output_names(self):
 61 |         return self._symbol.list_outputs()
 62 | 
 63 |     @property
 64 |     def data_shapes(self):
 65 |         assert self.binded
 66 |         return self._curr_module.data_shapes
 67 | 
 68 |     @property
 69 |     def label_shapes(self):
 70 |         assert self.binded
 71 |         return self._curr_module.label_shapes
 72 | 
 73 |     @property
 74 |     def output_shapes(self):
 75 |         assert self.binded
 76 |         return self._curr_module.output_shapes
 77 | 
 78 |     def get_params(self):
 79 |         assert self.binded and self.params_initialized
 80 |         return self._curr_module.get_params()
 81 | 
 82 |     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
 83 |                     allow_missing=False, force_init=False, allow_extra=False):
 84 |         if self.params_initialized and not force_init:
 85 |             return
 86 |         assert self.binded, 'call bind before initializing the parameters'
 87 |         self._curr_module.init_params(initializer=initializer, arg_params=arg_params,
 88 |                                       aux_params=aux_params, allow_missing=allow_missing,
 89 |                                       force_init=force_init, allow_extra=allow_extra)
 90 |         self.params_initialized = True
 91 | 
 92 |     def bind(self, data_shapes, label_shapes=None, for_training=True,
 93 |              inputs_need_grad=False, force_rebind=False, shared_module=None, grad_req='write'):
 94 |         # in case we already initialized params, keep it
 95 |         if self.params_initialized:
 96 |             arg_params, aux_params = self.get_params()
 97 | 
 98 |         # force rebinding is typically used when one want to switch from
 99 |         # training to prediction phase.
100 |         if force_rebind:
101 |             self._reset_bind()
102 | 
103 |         if self.binded:
104 |             self.logger.warning('Already binded, ignoring bind()')
105 |             return
106 | 
107 |         assert shared_module is None, 'shared_module for MutableModule is not supported'
108 | 
109 |         self.for_training = for_training
110 |         self.inputs_need_grad = inputs_need_grad
111 |         self.binded = True
112 | 
113 |         max_shapes_dict = dict()
114 |         if self._max_data_shapes is not None:
115 |             max_shapes_dict.update(dict(self._max_data_shapes))
116 |         if self._max_label_shapes is not None:
117 |             max_shapes_dict.update(dict(self._max_label_shapes))
118 | 
119 |         max_data_shapes = list()
120 |         for name, shape in data_shapes:
121 |             if name in max_shapes_dict:
122 |                 max_data_shapes.append((name, max_shapes_dict[name]))
123 |             else:
124 |                 max_data_shapes.append((name, shape))
125 | 
126 |         max_label_shapes = list()
127 |         if label_shapes is not None:
128 |             for name, shape in label_shapes:
129 |                 if name in max_shapes_dict:
130 |                     max_label_shapes.append((name, max_shapes_dict[name]))
131 |                 else:
132 |                     max_label_shapes.append((name, shape))
133 | 
134 |         if len(max_label_shapes) == 0:
135 |             max_label_shapes = None
136 | 
137 |         module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger,
138 |                         context=self._context, work_load_list=self._work_load_list,
139 |                         fixed_param_names=self._fixed_param_names)
140 |         module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad,
141 |                     force_rebind=False, shared_module=None)
142 |         self._curr_module = module
143 | 
144 |         # copy back saved params, if already initialized
145 |         if self.params_initialized:
146 |             self.set_params(arg_params, aux_params)
147 | 
148 |     def init_optimizer(self, kvstore='local', optimizer='sgd',
149 |                        optimizer_params=(('learning_rate', 0.01),), force_init=False):
150 |         assert self.binded and self.params_initialized
151 |         if self.optimizer_initialized and not force_init:
152 |             self.logger.warning('optimizer already initialized, ignoring.')
153 |             return
154 | 
155 |         self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
156 |                                          force_init=force_init)
157 |         self.optimizer_initialized = True
158 | 
159 |     def forward(self, data_batch, is_train=None):
160 |         assert self.binded and self.params_initialized
161 | 
162 |         # get current_shapes
163 |         if self._curr_module.label_shapes is not None:
164 |             current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes)
165 |         else:
166 |             current_shapes = dict(self._curr_module.data_shapes)
167 | 
168 |         # get input_shapes
169 |         if data_batch.provide_label is not None:
170 |             input_shapes = dict(data_batch.provide_data + data_batch.provide_label)
171 |         else:
172 |             input_shapes = dict(data_batch.provide_data)
173 | 
174 |         # decide if shape changed
175 |         shape_changed = False
176 |         for k, v in current_shapes.items():
177 |             if v != input_shapes[k]:
178 |                 shape_changed = True
179 | 
180 |         if shape_changed:
181 |             module = Module(self._symbol, self._data_names, self._label_names,
182 |                             logger=self.logger, context=self._context,
183 |                             work_load_list=self._work_load_list,
184 |                             fixed_param_names=self._fixed_param_names)
185 |             module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training,
186 |                         self._curr_module.inputs_need_grad, force_rebind=False,
187 |                         shared_module=self._curr_module)
188 |             self._curr_module = module
189 | 
190 |         self._curr_module.forward(data_batch, is_train=is_train)
191 | 
192 |     def backward(self, out_grads=None):
193 |         assert self.binded and self.params_initialized
194 |         self._curr_module.backward(out_grads=out_grads)
195 | 
196 |     def update(self):
197 |         assert self.binded and self.params_initialized and self.optimizer_initialized
198 |         self._curr_module.update()
199 | 
200 |     def get_outputs(self, merge_multi_context=True):
201 |         assert self.binded and self.params_initialized
202 |         return self._curr_module.get_outputs(merge_multi_context=merge_multi_context)
203 | 
204 |     def get_input_grads(self, merge_multi_context=True):
205 |         assert self.binded and self.params_initialized and self.inputs_need_grad
206 |         return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context)
207 | 
208 |     def update_metric(self, eval_metric, labels):
209 |         assert self.binded and self.params_initialized
210 |         self._curr_module.update_metric(eval_metric, labels)
211 | 
212 |     def install_monitor(self, mon):
213 |         """ Install monitor on all executors """
214 |         assert self.binded
215 |         self._curr_module.install_monitor(mon)
216 | 


--------------------------------------------------------------------------------
/rcnn/tools/train_rcnn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pprint
  3 | import mxnet as mx
  4 | 
  5 | from ..logger import logger
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..core import callback, metric
  9 | from ..core.loader import ROIIter
 10 | from ..core.module import MutableModule
 11 | from ..processing.bbox_regression import add_bbox_regression_targets
 12 | from ..utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb
 13 | from ..utils.load_model import load_param
 14 | 
 15 | 
 16 | def train_rcnn(network, dataset, image_set, root_path, dataset_path,
 17 |                frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
 18 |                ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 19 |                train_shared, lr, lr_step, proposal):
 20 |     # set up config
 21 |     config.TRAIN.BATCH_IMAGES = 2
 22 |     config.TRAIN.BATCH_ROIS = 128
 23 |     if proposal == 'ss':
 24 |         config.TRAIN.BG_THRESH_LO = 0.1  # reproduce Fast R-CNN
 25 | 
 26 |     # load symbol
 27 |     sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES)
 28 | 
 29 |     # setup multi-gpu
 30 |     batch_size = len(ctx)
 31 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 32 | 
 33 |     # print config
 34 |     logger.info(pprint.pformat(config))
 35 | 
 36 |     # load dataset and prepare imdb for training
 37 |     image_sets = [iset for iset in image_set.split('+')]
 38 |     roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
 39 |                                   proposal=proposal, append_gt=True, flip=not no_flip)
 40 |               for image_set in image_sets]
 41 |     roidb = merge_roidb(roidbs)
 42 |     roidb = filter_roidb(roidb)
 43 |     means, stds = add_bbox_regression_targets(roidb)
 44 | 
 45 |     # load training data
 46 |     train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 47 |                          ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING)
 48 | 
 49 |     # infer max shape
 50 |     max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 51 |     logger.info('providing maximum shape %s' % max_data_shape)
 52 | 
 53 |     # infer shape
 54 |     data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
 55 |     arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
 56 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 57 |     out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
 58 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 59 |     logger.info('output shape %s' % pprint.pformat(out_shape_dict))
 60 | 
 61 |     # load and initialize params
 62 |     if resume:
 63 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 64 |     else:
 65 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 66 |         arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
 67 |         arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
 68 |         arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
 69 |         arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])
 70 | 
 71 |     # check parameter shapes
 72 |     for k in sym.list_arguments():
 73 |         if k in data_shape_dict:
 74 |             continue
 75 |         assert k in arg_params, k + ' not initialized'
 76 |         assert arg_params[k].shape == arg_shape_dict[k], \
 77 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
 78 |     for k in sym.list_auxiliary_states():
 79 |         assert k in aux_params, k + ' not initialized'
 80 |         assert aux_params[k].shape == aux_shape_dict[k], \
 81 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
 82 | 
 83 |     # prepare training
 84 |     # create solver
 85 |     data_names = [k[0] for k in train_data.provide_data]
 86 |     label_names = [k[0] for k in train_data.provide_label]
 87 |     if train_shared:
 88 |         fixed_param_prefix = config.FIXED_PARAMS_SHARED
 89 |     else:
 90 |         fixed_param_prefix = config.FIXED_PARAMS
 91 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
 92 |                         logger=logger, context=ctx, work_load_list=work_load_list,
 93 |                         max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix)
 94 | 
 95 |     # decide training params
 96 |     # metric
 97 |     eval_metric = metric.RCNNAccMetric()
 98 |     cls_metric = metric.RCNNLogLossMetric()
 99 |     bbox_metric = metric.RCNNL1LossMetric()
100 |     eval_metrics = mx.metric.CompositeEvalMetric()
101 |     for child_metric in [eval_metric, cls_metric, bbox_metric]:
102 |         eval_metrics.add(child_metric)
103 |     # callback
104 |     batch_end_callback = mx.callback.Speedometer(train_data.batch_size, frequent=frequent, auto_reset=False)
105 |     epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
106 |     # decide learning rate
107 |     base_lr = lr
108 |     lr_factor = 0.1
109 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
110 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
111 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
112 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
113 |     logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
114 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
115 |     # optimizer
116 |     optimizer_params = {'momentum': 0.9,
117 |                         'wd': 0.0005,
118 |                         'learning_rate': lr,
119 |                         'lr_scheduler': lr_scheduler,
120 |                         'rescale_grad': (1.0 / batch_size),
121 |                         'clip_gradient': 5}
122 | 
123 |     # train
124 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
125 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
126 |             optimizer='sgd', optimizer_params=optimizer_params,
127 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
128 | 
129 | 
130 | def parse_args():
131 |     parser = argparse.ArgumentParser(description='Train a Fast R-CNN Network')
132 |     # general
133 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
134 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
135 |     args, rest = parser.parse_known_args()
136 |     generate_config(args.network, args.dataset)
137 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
138 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
139 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
140 |     # training
141 |     parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int)
142 |     parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str)
143 |     parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list)
144 |     parser.add_argument('--no_flip', help='disable flip images', action='store_true')
145 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
146 |     parser.add_argument('--resume', help='continue training', action='store_true')
147 |     # rcnn
148 |     parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str)
149 |     parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str)
150 |     parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int)
151 |     parser.add_argument('--prefix', help='new model prefix', default=default.rcnn_prefix, type=str)
152 |     parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int)
153 |     parser.add_argument('--end_epoch', help='end epoch of training', default=default.rcnn_epoch, type=int)
154 |     parser.add_argument('--lr', help='base learning rate', default=default.rcnn_lr, type=float)
155 |     parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rcnn_lr_step, type=str)
156 |     parser.add_argument('--train_shared', help='second round train shared params', action='store_true')
157 |     parser.add_argument('--proposal', help='can be ss for selective search or rpn', default='rpn', type=str)
158 |     args = parser.parse_args()
159 |     return args
160 | 
161 | 
162 | def main():
163 |     args = parse_args()
164 |     logger.info('Called with argument: %s' % args)
165 |     ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
166 |     train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
167 |                args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
168 |                ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
169 |                train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step, proposal=args.proposal)
170 | 
171 | if __name__ == '__main__':
172 |     main()
173 | 


--------------------------------------------------------------------------------
/docs/results.md:
--------------------------------------------------------------------------------
 1 | ## Results
 2 | | face-img | 5 nearest |
 3 | |-----|------|
 4 | | ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg)  | 00 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 16 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 16 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 17 ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg)  |
 5 | | ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg)  | 00 ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg) 16 ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg) 19 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg)  |
 6 | | ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg)  | 00 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 16 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 18 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg)  |
 7 | | ![face-img](faces/bcf97241-96e4-4726-8287-fdab88217584.jpg)  | 00 ![face-img](faces/bcf97241-96e4-4726-8287-fdab88217584.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 20 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 21 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 21 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg)  |
 8 | | ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg)  | 00 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 15 ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg)  |
 9 | | ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg)  | 00 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 16 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg)  |
10 | | ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg)  | 00 ![face-img](faces/574d60a5-0c06-48a4-bf73-914002c82c2a.jpg) 15 ![face-img](faces/c966eec9-6458-4a4e-8967-6bab69dd9965.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg)  |
11 | | ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg)  | 00 ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg) 18 ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg) 22 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 22 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 23 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg)  |
12 | | ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg)  | 00 ![face-img](faces/d445a6d4-e28a-4903-8f37-037a357eae60.jpg) 16 ![face-img](faces/dbbd360f-4fea-4911-9233-063eda812445.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 19 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 20 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg)  |
13 | | ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg)  | 00 ![face-img](faces/9aa417b2-8fea-4aa6-9734-9e26ae9388ff.jpg) 17 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 20 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 20 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 21 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg)  |
14 | | ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg)  | 00 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 16 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg)  |
15 | | ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg)  | 00 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 12 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 16 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 16 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg)  |
16 | | ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg)  | 00 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg)  |
17 | | ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg)  | 00 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 16 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 17 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg)  |
18 | | ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg)  | 00 ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg) 15 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 20 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 21 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 21 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg)  |
19 | | ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg)  | 00 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 17 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 18 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 18 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg)  |
20 | | ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg)  | 00 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 12 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 16 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 17 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg)  |
21 | | ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg)  | 00 ![face-img](faces/e7054fd4-9b31-41dd-90e7-8764e853f7f8.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/ba4dd9e4-8140-4f33-958b-9373b75fca8f.jpg) 18 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg)  |
22 | | ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg)  | 00 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 16 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 16 ![face-img](faces/6267aa0b-94f9-48db-90c7-5d84cdf01f52.jpg) 17 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg) 17 ![face-img](faces/9e5b59cc-5d2a-4bb0-b1b7-141353f479dd.jpg)  |
23 | | ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg)  | 00 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) 18 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 18 ![face-img](faces/71a92092-b36f-41da-9e7f-85e5c075d65e.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/50c5fbf6-a1f1-438e-b9ce-7cc9772a3fbf.jpg)  |
24 | | ![face-img](faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg)  | 00 ![face-img](faces/4dd291a3-92b3-4d2b-876c-f43ce05ad66d.jpg) 20 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 21 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 21 ![face-img](faces/7da618a1-9866-45b9-a1a0-fd70eda5adf1.jpg) 22 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg)  |
25 | | ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg)  | 00 ![face-img](faces/0e78e2f1-f597-4d65-ac74-f490084f79de.jpg) 15 ![face-img](faces/a0aeb78b-7f60-4023-92b1-741314e7fe88.jpg) 19 ![face-img](faces/fd0fbcf5-31c8-4aeb-a2f9-f7e672150d2b.jpg) 19 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 19 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg)  |
26 | | ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg)  | 00 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 12 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 16 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 17 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 18 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg)  |
27 | | ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg)  | 00 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 16 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg)  |
28 | | ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg)  | 00 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg) 16 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg) 16 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 17 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 17 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg)  |
29 | | ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg)  | 00 ![face-img](faces/079e990c-604a-432c-9a4e-aa3eb8c6f560.jpg) 16 ![face-img](faces/5e652366-c03f-47a5-929a-9311f65db9ce.jpg) 18 ![face-img](faces/94cb0154-085e-4afc-9c6c-48073455f1ab.jpg) 18 ![face-img](faces/73a4399a-a7ba-4e33-ac48-6f845bd975fb.jpg) 18 ![face-img](faces/5152fad8-dda3-4a62-935c-2ef03faf8d20.jpg)  |
30 | | ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg)  | 00 ![face-img](faces/84e22415-6352-4442-95f1-d20319786b6b.jpg) 12 ![face-img](faces/830652cd-29a9-4237-a736-e161e891f4ce.jpg) 16 ![face-img](faces/bb7c3b1d-1a4a-4cec-a456-894068f8b8fe.jpg) 16 ![face-img](faces/63ae41e8-1fc0-4069-ac17-ec331b3844c9.jpg) 17 ![face-img](faces/b6d5e843-2eb1-4174-9f99-4fce48d0d984.jpg)  |
31 | 


--------------------------------------------------------------------------------
/rcnn/tools/train_rpn.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import pprint
  4 | import mxnet as mx
  5 | 
  6 | from ..config import config, default, generate_config
  7 | from ..symbol import *
  8 | from ..core import callback, metric
  9 | from ..core.loader import AnchorLoaderFPN
 10 | from ..core.module import MutableModule
 11 | from ..utils.load_data import load_gt_roidb, merge_roidb, filter_roidb
 12 | from ..utils.load_model import load_param
 13 | 
 14 | 
 15 | def train_rpn(network, dataset, image_set, root_path, dataset_path,
 16 |               frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
 17 |               ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
 18 |               train_shared, lr, lr_step):
 19 |     # set up logger
 20 |     logging.basicConfig()
 21 |     logger = logging.getLogger()
 22 |     logger.setLevel(logging.INFO)
 23 | 
 24 |     # setup config
 25 |     assert config.TRAIN.BATCH_IMAGES==1
 26 | 
 27 |     # load symbol
 28 |     sym = eval('get_' + network + '_rpn')()
 29 |     feat_sym = []
 30 |     for stride in config.RPN_FEAT_STRIDE:
 31 |         feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride])
 32 | 
 33 | 
 34 |     # setup multi-gpu
 35 |     batch_size = len(ctx)
 36 |     input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size
 37 | 
 38 |     # print config
 39 |     pprint.pprint(config)
 40 | 
 41 |     # load dataset and prepare imdb for training
 42 |     image_sets = [iset for iset in image_set.split('+')]
 43 |     roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path,
 44 |                             flip=not no_flip)
 45 |               for image_set in image_sets]
 46 |     roidb = merge_roidb(roidbs)
 47 |     roidb = filter_roidb(roidb)
 48 | 
 49 |     # load training data
 50 |     #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 51 |     #                              ctx=ctx, work_load_list=work_load_list,
 52 |     #                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
 53 |     #                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
 54 |     #                              allowed_border=9999)
 55 |     train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
 56 |                                   ctx=ctx, work_load_list=work_load_list)
 57 | 
 58 |     # infer max shape
 59 |     max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
 60 |     max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
 61 |     print 'providing maximum shape', max_data_shape, max_label_shape
 62 | 
 63 |     # infer shape
 64 |     data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
 65 |     arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
 66 |     arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
 67 |     out_shape_dict = zip(sym.list_outputs(), out_shape)
 68 |     aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
 69 |     print 'output shape'
 70 |     pprint.pprint(out_shape_dict)
 71 | 
 72 |     # load and initialize params
 73 |     if resume:
 74 |         arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
 75 |     else:
 76 |         arg_params, aux_params = load_param(pretrained, epoch, convert=True)
 77 |         init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2)
 78 |         init_internal = mx.init.Normal(sigma=0.01)
 79 |         for k in sym.list_arguments():
 80 |             if k in data_shape_dict:
 81 |                 continue
 82 |             if k not in arg_params:
 83 |                 print 'init', k
 84 |                 arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
 85 |                 if not k.endswith('bias'):
 86 |                     init_internal(k, arg_params[k])
 87 | 
 88 |         for k in sym.list_auxiliary_states():
 89 |             if k not in aux_params:
 90 |                 print 'init', k
 91 |                 aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k])
 92 |                 init(k, aux_params[k])
 93 | 
 94 |     # check parameter shapes
 95 |     for k in sym.list_arguments():
 96 |         if k in data_shape_dict:
 97 |             continue
 98 |         assert k in arg_params, k + ' not initialized'
 99 |         assert arg_params[k].shape == arg_shape_dict[k], \
100 |             'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
101 |     for k in sym.list_auxiliary_states():
102 |         assert k in aux_params, k + ' not initialized'
103 |         assert aux_params[k].shape == aux_shape_dict[k], \
104 |             'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)
105 | 
106 |     # create solver
107 |     data_names = [k[0] for k in train_data.provide_data]
108 |     label_names = [k[0] for k in train_data.provide_label]
109 |     if train_shared:
110 |         fixed_param_prefix = config.FIXED_PARAMS_SHARED
111 |     else:
112 |         fixed_param_prefix = config.FIXED_PARAMS
113 |     mod = MutableModule(sym, data_names=data_names, label_names=label_names,
114 |                         logger=logger, context=ctx, work_load_list=work_load_list,
115 |                         max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
116 |                         fixed_param_prefix=fixed_param_prefix)
117 | 
118 |     # decide training params
119 |     # metric
120 |     eval_metric = metric.RPNAccMetric()
121 |     cls_metric = metric.RPNLogLossMetric()
122 |     bbox_metric = metric.RPNL1LossMetric()
123 |     eval_metrics = mx.metric.CompositeEvalMetric()
124 |     for child_metric in [eval_metric,cls_metric,bbox_metric]:
125 |         eval_metrics.add(child_metric)
126 |     # callback
127 |     batch_end_callback = []
128 |     batch_end_callback.append(mx.callback.Speedometer(train_data.batch_size, frequent=frequent))
129 |     epoch_end_callback = mx.callback.do_checkpoint(prefix)
130 |     # decide learning rate
131 |     base_lr = lr
132 |     lr_factor = 0.1
133 |     lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
134 |     lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
135 |     lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
136 |     lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
137 |     print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters
138 |     lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
139 |     # optimizer
140 |     optimizer_params = {'momentum': 0.9,
141 |                         'wd': 0.0001,
142 |                         'learning_rate': lr,
143 |                         'lr_scheduler': lr_scheduler,
144 |                         'rescale_grad': (1.0 / batch_size),
145 |                         'clip_gradient': 5}
146 | 
147 |     # train
148 |     mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
149 |             batch_end_callback=batch_end_callback, kvstore=kvstore,
150 |             optimizer='sgd', optimizer_params=optimizer_params,
151 |             arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
152 | 
153 | 
154 | def parse_args():
155 |     parser = argparse.ArgumentParser(description='Train a Region Proposal Network')
156 |     # general
157 |     parser.add_argument('--network', help='network name', default=default.network, type=str)
158 |     parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
159 |     args, rest = parser.parse_known_args()
160 |     generate_config(args.network, args.dataset)
161 |     parser.add_argument('--image_set', help='image_set name', default=default.image_set, type=str)
162 |     parser.add_argument('--root_path', help='output data folder', default=default.root_path, type=str)
163 |     parser.add_argument('--dataset_path', help='dataset path', default=default.dataset_path, type=str)
164 |     # training
165 |     parser.add_argument('--frequent', help='frequency of logging', default=default.frequent, type=int)
166 |     parser.add_argument('--kvstore', help='the kv-store type', default=default.kvstore, type=str)
167 |     parser.add_argument('--work_load_list', help='work load for different devices', default=None, type=list)
168 |     parser.add_argument('--no_flip', help='disable flip images', action='store_true')
169 |     parser.add_argument('--no_shuffle', help='disable random shuffle', action='store_true')
170 |     parser.add_argument('--resume', help='continue training', action='store_true')
171 |     # rpn
172 |     parser.add_argument('--gpus', help='GPU device to train with', default='0', type=str)
173 |     parser.add_argument('--pretrained', help='pretrained model prefix', default=default.pretrained, type=str)
174 |     parser.add_argument('--pretrained_epoch', help='pretrained model epoch', default=default.pretrained_epoch, type=int)
175 |     parser.add_argument('--prefix', help='new model prefix', default=default.rpn_prefix, type=str)
176 |     parser.add_argument('--begin_epoch', help='begin epoch of training', default=0, type=int)
177 |     parser.add_argument('--end_epoch', help='end epoch of training', default=default.rpn_epoch, type=int)
178 |     parser.add_argument('--lr', help='base learning rate', default=default.rpn_lr, type=float)
179 |     parser.add_argument('--lr_step', help='learning rate steps (in epoch)', default=default.rpn_lr_step, type=str)
180 |     parser.add_argument('--train_shared', help='second round train shared params', action='store_true')
181 |     args = parser.parse_args()
182 |     return args
183 | 
184 | 
185 | def main():
186 |     args = parse_args()
187 |     print 'Called with argument:', args
188 |     ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
189 |     train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
190 |               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
191 |               ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
192 |               train_shared=args.train_shared, lr=args.lr, lr_step=args.lr_step)
193 | 
194 | if __name__ == '__main__':
195 |     main()
196 | 


--------------------------------------------------------------------------------
/rcnn/sample_config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from easydict import EasyDict as edict
  3 | 
  4 | config = edict()
  5 | 
  6 | # network related params
  7 | config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
  8 | config.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
  9 | config.PIXEL_SCALE = 1.0
 10 | config.IMAGE_STRIDE = 0
 11 | 
 12 | # dataset related params
 13 | config.NUM_CLASSES = 2
 14 | config.PRE_SCALES = [(1200, 1600)]  # first is scale (the shorter side); second is max size
 15 | config.SCALES = [(640, 640)]  # first is scale (the shorter side); second is max size
 16 | #config.SCALES = [(800, 800)]  # first is scale (the shorter side); second is max size
 17 | config.ORIGIN_SCALE = False
 18 | 
 19 | _ratio = (1.,)
 20 | 
 21 | RAC_SSH = {
 22 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 23 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 24 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 25 | }
 26 | 
 27 | _ratio = (1.,1.5)
 28 | RAC_SSH2 = {
 29 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 30 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 31 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 32 | }
 33 | 
 34 | _ratio = (1.,1.5)
 35 | RAC_SSH3 = {
 36 |     '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 37 |     '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 38 |     '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 39 |     '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 40 | }
 41 | 
 42 | RAC_RETINA = {}
 43 | _ratios = (1.0,)
 44 | _ass = 2.0**(1.0/3)
 45 | _basescale = 1.0
 46 | for _stride in [4, 8, 16, 32, 64]:
 47 |   key = str(_stride)
 48 |   value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999}
 49 |   scales = []
 50 |   for _ in range(3):
 51 |     scales.append(_basescale)
 52 |     _basescale *= _ass
 53 |   value['SCALES'] = tuple(scales)
 54 |   RAC_RETINA[key] = value
 55 | 
 56 | 
 57 | config.RPN_ANCHOR_CFG = RAC_SSH #default
 58 | 
 59 | config.NET_MODE = 2
 60 | config.HEAD_MODULE = 'SSH'
 61 | #config.HEAD_MODULE = 'RF'
 62 | config.LR_MODE = 0
 63 | config.LANDMARK_LR_MULT = 2.0
 64 | config.HEAD_FILTER_NUM = 256
 65 | config.CONTEXT_FILTER_RATIO = 1
 66 | config.max_feat_channel = 9999
 67 | 
 68 | config.USE_CROP = True
 69 | config.USE_FPN = True
 70 | config.USE_DCN = 0
 71 | config.FACE_LANDMARK = True
 72 | config.USE_OCCLUSION = False
 73 | config.USE_BLUR = False
 74 | config.MORE_SMALL_BOX = True
 75 | 
 76 | config.LAYER_FIX = False
 77 | 
 78 | config.CASCADE = 0
 79 | config.CASCADE_MODE = 1
 80 | #config.CASCADE_CLS_STRIDES = [16,8,4]
 81 | #config.CASCADE_BBOX_STRIDES = [64,32]
 82 | config.CASCADE_CLS_STRIDES = [64,32,16,8,4]
 83 | config.CASCADE_BBOX_STRIDES = [64,32,16,8,4]
 84 | #config.CASCADE_BBOX_STRIDES = [64,32,16,8]
 85 | 
 86 | config.HEAD_BOX = False
 87 | config.DENSE_ANCHOR = False
 88 | config.USE_MAXOUT = 0
 89 | config.SHARE_WEIGHT_BBOX = False
 90 | config.SHARE_WEIGHT_LANDMARK = False
 91 | 
 92 | config.RANDOM_FEAT_STRIDE = False
 93 | config.NUM_CPU = 4
 94 | config.MIXUP = 0.0
 95 | config.USE_3D = False
 96 | 
 97 | #config.BBOX_MASK_THRESH = 0
 98 | config.COLOR_MODE = 2
 99 | config.COLOR_JITTERING = 0.125
100 | #config.COLOR_JITTERING = 0
101 | #config.COLOR_JITTERING = 0.2
102 | 
103 | 
104 | config.TRAIN = edict()
105 | 
106 | config.TRAIN.IMAGE_ALIGN = 0
107 | config.TRAIN.MIN_BOX_SIZE = 0
108 | config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE
109 | # R-CNN and RPN
110 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e
111 | config.TRAIN.BATCH_IMAGES = 8
112 | # e2e changes behavior of anchor loader and metric
113 | config.TRAIN.END2END = True
114 | # group images with similar aspect ratio
115 | config.TRAIN.ASPECT_GROUPING = False
116 | 
117 | # RPN anchor loader
118 | # rpn anchors batch size
119 | config.TRAIN.RPN_ENABLE_OHEM = 2
120 | config.TRAIN.RPN_BATCH_SIZE = 256
121 | # rpn anchors sampling params
122 | config.TRAIN.RPN_FG_FRACTION = 0.25
123 | config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5
124 | config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
125 | if config.CASCADE>0:
126 |     config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
127 | config.TRAIN.CASCADE_OVERLAP = [0.4, 0.5]
128 | config.TRAIN.RPN_CLOBBER_POSITIVES = False
129 | config.TRAIN.RPN_FORCE_POSITIVE = False
130 | # rpn bounding box regression params
131 | config.TRAIN.BBOX_STDS = (1.0, 1.0, 1.0, 1.0)
132 | config.TRAIN.LANDMARK_STD = 1.0
133 | 
134 | 
135 | config.TEST = edict()
136 | 
137 | # R-CNN testing
138 | # use rpn to generate proposal
139 | config.TEST.HAS_RPN = False
140 | # size of images for each device
141 | config.TEST.BATCH_IMAGES = 1
142 | 
143 | # RPN proposal
144 | config.TEST.CXX_PROPOSAL = True
145 | config.TEST.RPN_NMS_THRESH = 0.3
146 | config.TEST.RPN_PRE_NMS_TOP_N = 1000
147 | config.TEST.RPN_POST_NMS_TOP_N = 3000
148 | #config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE
149 | #config.TEST.RPN_MIN_SIZE = [0,0,0]
150 | 
151 | # RCNN nms
152 | config.TEST.NMS = 0.3
153 | 
154 | config.TEST.SCORE_THRESH = 0.05
155 | config.TEST.IOU_THRESH = 0.5
156 | 
157 | 
158 | # network settings
159 | network = edict()
160 | 
161 | network.ssh = edict()
162 | 
163 | network.mnet = edict()
164 | #network.mnet.pretrained = 'model/mnasnet'
165 | #network.mnet.pretrained = 'model/mobilenetv2_0_5'
166 | #network.mnet.pretrained = 'model/mobilenet_0_5'
167 | #network.mnet.MULTIPLIER = 0.5
168 | #network.mnet.pretrained = 'model/mobilenet_0_25'
169 | #network.mnet.pretrained_epoch = 0
170 | #network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
171 | #network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
172 | #network.mnet.PIXEL_SCALE = 255.0
173 | network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
174 | network.mnet.BATCH_IMAGES = 16
175 | network.mnet.HEAD_FILTER_NUM = 64
176 | network.mnet.CONTEXT_FILTER_RATIO = 1
177 | 
178 | network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
179 | network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
180 | network.mnet.PIXEL_SCALE = 1.0
181 | #network.mnet.pretrained = 'model/mobilenetfd_0_25' #78
182 | #network.mnet.pretrained = 'model/mobilenetfd2' #75
183 | network.mnet.pretrained = 'model/mobilenet025fd0' #78
184 | #network.mnet.pretrained = 'model/mobilenet025fd1' #75
185 | #network.mnet.pretrained = 'model/mobilenet025fd2' #
186 | network.mnet.pretrained_epoch = 0
187 | network.mnet.max_feat_channel = 8888
188 | network.mnet.COLOR_MODE = 1
189 | network.mnet.USE_CROP = True
190 | network.mnet.RPN_ANCHOR_CFG = RAC_SSH
191 | network.mnet.LAYER_FIX = True
192 | network.mnet.LANDMARK_LR_MULT = 2.5
193 | 
194 | 
195 | network.resnet = edict()
196 | #network.resnet.pretrained = 'model/ResNet50_v1d'
197 | #network.resnet.pretrained = 'model/resnet-50'
198 | network.resnet.pretrained = 'model/resnet-152'
199 | #network.resnet.pretrained = 'model/senet154'
200 | #network.resnet.pretrained = 'model/densenet161'
201 | network.resnet.pretrained_epoch = 0
202 | #network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
203 | #network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393])
204 | #network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
205 | #network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
206 | #network.resnet.PIXEL_SCALE = 255.0
207 | network.resnet.lr_step = '1,2,3,4,5,55,68,80'
208 | network.resnet.lr = 0.001
209 | network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
210 | network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
211 | network.resnet.PIXEL_SCALE = 1.0
212 | network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
213 | network.resnet.BATCH_IMAGES = 8
214 | network.resnet.HEAD_FILTER_NUM = 256
215 | network.resnet.CONTEXT_FILTER_RATIO = 1
216 | network.resnet.USE_DCN = 2
217 | network.resnet.RPN_BATCH_SIZE = 256
218 | network.resnet.RPN_ANCHOR_CFG = RAC_RETINA
219 | 
220 | network.resnet.USE_DCN = 0
221 | network.resnet.pretrained = 'model/resnet-50'
222 | network.resnet.RPN_ANCHOR_CFG = RAC_SSH
223 | 
224 | 
225 | # dataset settings
226 | dataset = edict()
227 | 
228 | dataset.widerface = edict()
229 | dataset.widerface.dataset = 'widerface'
230 | dataset.widerface.image_set = 'train'
231 | dataset.widerface.test_image_set = 'val'
232 | dataset.widerface.root_path = 'data'
233 | dataset.widerface.dataset_path = 'data/widerface'
234 | dataset.widerface.NUM_CLASSES = 2
235 | 
236 | dataset.retinaface = edict()
237 | dataset.retinaface.dataset = 'retinaface'
238 | dataset.retinaface.image_set = 'train'
239 | dataset.retinaface.test_image_set = 'val'
240 | dataset.retinaface.root_path = 'data'
241 | dataset.retinaface.dataset_path = 'data/retinaface'
242 | dataset.retinaface.NUM_CLASSES = 2
243 | 
244 | # default settings
245 | default = edict()
246 | 
247 | config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling']
248 | #config.FIXED_PARAMS = ['^.*upsampling']
249 | #config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3']
250 | #config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta']  #for resnet
251 | 
252 | # default network
253 | default.network = 'resnet'
254 | default.pretrained = 'model/resnet-152'
255 | #default.network = 'resnetssh'
256 | default.pretrained_epoch = 0
257 | # default dataset
258 | default.dataset = 'retinaface'
259 | default.image_set = 'train'
260 | default.test_image_set = 'val'
261 | default.root_path = 'data'
262 | default.dataset_path = 'data/retinaface'
263 | # default training
264 | default.frequent = 20
265 | default.kvstore = 'device'
266 | # default e2e
267 | default.prefix = 'model/retinaface'
268 | default.end_epoch = 10000
269 | default.lr_step = '55,68,80'
270 | default.lr = 0.01
271 | 
272 | def generate_config(_network, _dataset):
273 |     for k, v in network[_network].items():
274 |         if k in config:
275 |             config[k] = v
276 |         elif k in default:
277 |             default[k] = v
278 |         if k in config.TRAIN:
279 |           config.TRAIN[k] = v
280 |     for k, v in dataset[_dataset].items():
281 |         if k in config:
282 |             config[k] = v
283 |         elif k in default:
284 |             default[k] = v
285 |         if k in config.TRAIN:
286 |           config.TRAIN[k] = v
287 |     config.network = _network
288 |     config.dataset = _dataset
289 |     config.RPN_FEAT_STRIDE = []
290 |     num_anchors = []
291 |     for k in config.RPN_ANCHOR_CFG:
292 |       config.RPN_FEAT_STRIDE.append( int(k) )
293 |       _num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES'])*len(config.RPN_ANCHOR_CFG[k]['RATIOS'])
294 |       if config.DENSE_ANCHOR:
295 |         _num_anchors *= 2
296 |       config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors
297 |       num_anchors.append(_num_anchors)
298 |     config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True)
299 |     for j in range(1,len(num_anchors)):
300 |       assert num_anchors[0]==num_anchors[j]
301 |     config.NUM_ANCHORS = num_anchors[0]
302 | 
303 | 


--------------------------------------------------------------------------------
/rcnn/processing/bbox_regression.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file has functions about generating bounding box regression targets
  3 | """
  4 | 
  5 | from ..pycocotools.mask import encode
  6 | import numpy as np
  7 | 
  8 | from ..logger import logger
  9 | from .bbox_transform import bbox_overlaps, bbox_transform
 10 | from rcnn.config import config
 11 | import math
 12 | import cv2
 13 | import PIL.Image as Image
 14 | import threading
 15 | import Queue
 16 | 
 17 | 
 18 | def compute_bbox_regression_targets(rois, overlaps, labels):
 19 |     """
 20 |     given rois, overlaps, gt labels, compute bounding box regression targets
 21 |     :param rois: roidb[i]['boxes'] k * 4
 22 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
 23 |     :param labels: roidb[i]['max_classes'] k * 1
 24 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
 25 |     """
 26 |     # Ensure ROIs are floats
 27 |     rois = rois.astype(np.float, copy=False)
 28 | 
 29 |     # Sanity check
 30 |     if len(rois) != len(overlaps):
 31 |         logger.warning('bbox regression: len(rois) != len(overlaps)')
 32 | 
 33 |     # Indices of ground-truth ROIs
 34 |     gt_inds = np.where(overlaps == 1)[0]
 35 |     if len(gt_inds) == 0:
 36 |         logger.warning('bbox regression: len(gt_inds) == 0')
 37 | 
 38 |     # Indices of examples for which we try to make predictions
 39 |     ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
 40 | 
 41 |     # Get IoU overlap between each ex ROI and gt ROI
 42 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
 43 | 
 44 |     # Find which gt ROI each ex ROI has max overlap with:
 45 |     # this will be the ex ROI's gt target
 46 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
 47 |     gt_rois = rois[gt_inds[gt_assignment], :]
 48 |     ex_rois = rois[ex_inds, :]
 49 | 
 50 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
 51 |     targets[ex_inds, 0] = labels[ex_inds]
 52 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
 53 |     return targets
 54 | 
 55 | 
 56 | def add_bbox_regression_targets(roidb):
 57 |     """
 58 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
 59 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
 60 |     :return: means, std variances of targets
 61 |     """
 62 |     logger.info('bbox regression: add bounding box regression targets')
 63 |     assert len(roidb) > 0
 64 |     assert 'max_classes' in roidb[0]
 65 | 
 66 |     num_images = len(roidb)
 67 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 68 |     for im_i in range(num_images):
 69 |         rois = roidb[im_i]['boxes']
 70 |         max_overlaps = roidb[im_i]['max_overlaps']
 71 |         max_classes = roidb[im_i]['max_classes']
 72 |         roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes)
 73 | 
 74 |     if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
 75 |         # use fixed / precomputed means and stds instead of empirical values
 76 |         means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1))
 77 |         stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1))
 78 |     else:
 79 |         # compute mean, std values
 80 |         class_counts = np.zeros((num_classes, 1)) + 1e-14
 81 |         sums = np.zeros((num_classes, 4))
 82 |         squared_sums = np.zeros((num_classes, 4))
 83 |         for im_i in range(num_images):
 84 |             targets = roidb[im_i]['bbox_targets']
 85 |             for cls in range(1, num_classes):
 86 |                 cls_indexes = np.where(targets[:, 0] == cls)[0]
 87 |                 if cls_indexes.size > 0:
 88 |                     class_counts[cls] += cls_indexes.size
 89 |                     sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
 90 |                     squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
 91 | 
 92 |         means = sums / class_counts
 93 |         # var(x) = E(x^2) - E(x)^2
 94 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 95 | 
 96 |     # normalized targets
 97 |     for im_i in range(num_images):
 98 |         targets = roidb[im_i]['bbox_targets']
 99 |         for cls in range(1, num_classes):
100 |             cls_indexes = np.where(targets[:, 0] == cls)[0]
101 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
102 |             roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
103 | 
104 |     return means.ravel(), stds.ravel()
105 | 
106 | 
107 | def expand_bbox_regression_targets(bbox_targets_data, num_classes):
108 |     """
109 |     expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
110 |     :param bbox_targets_data: [k * 5]
111 |     :param num_classes: number of classes
112 |     :return: bbox target processed [k * 4 num_classes]
113 |     bbox_weights ! only foreground boxes have bbox regression computation!
114 |     """
115 |     classes = bbox_targets_data[:, 0]
116 |     bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
117 |     bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
118 |     indexes = np.where(classes > 0)[0]
119 |     for index in indexes:
120 |         cls = classes[index]
121 |         start = int(4 * cls)
122 |         end = start + 4
123 |         bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
124 |         bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS
125 |     return bbox_targets, bbox_weights
126 | 
127 | 
128 | def compute_mask_and_label(ex_rois, ex_labels, seg, flipped):
129 |     # assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt)
130 |     # im = Image.open(seg_gt)
131 |     # pixel = list(im.getdata())
132 |     # pixel = np.array(pixel).reshape([im.size[1], im.size[0]])
133 |     im = Image.open(seg)
134 |     pixel = list(im.getdata())
135 |     ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]])
136 |     if flipped:
137 |         ins_seg = ins_seg[:, ::-1]
138 |     rois = ex_rois
139 |     n_rois = ex_rois.shape[0]
140 |     label = ex_labels
141 |     class_id = config.CLASS_ID
142 |     mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8)
143 |     mask_label = np.zeros((n_rois), dtype=np.int8)
144 |     for n in range(n_rois):
145 |         target = ins_seg[int(rois[n, 1]): int(rois[n, 3]), int(rois[n, 0]): int(rois[n, 2])]
146 |         ids = np.unique(target)
147 |         ins_id = 0
148 |         max_count = 0
149 |         for id in ids:
150 |             if math.floor(id / 1000) == class_id[int(label[int(n)])]:
151 |                 px = np.where(ins_seg == int(id))
152 |                 x_min = np.min(px[1])
153 |                 y_min = np.min(px[0])
154 |                 x_max = np.max(px[1])
155 |                 y_max = np.max(px[0])
156 |                 x1 = max(rois[n, 0], x_min)
157 |                 y1 = max(rois[n, 1], y_min)
158 |                 x2 = min(rois[n, 2], x_max)
159 |                 y2 = min(rois[n, 3], y_max)
160 |                 iou = (x2 - x1) * (y2 - y1)
161 |                 iou = iou / ((rois[n, 2] - rois[n, 0]) * (rois[n, 3] - rois[n, 1])
162 |                              + (x_max - x_min) * (y_max - y_min) - iou)
163 |                 if iou > max_count:
164 |                     ins_id = id
165 |                     max_count = iou
166 | 
167 |         if max_count == 0:
168 |             continue
169 |         # print max_count
170 |         mask = np.zeros(target.shape)
171 |         idx = np.where(target == ins_id)
172 |         mask[idx] = 1
173 |         mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST)
174 | 
175 |         mask_target[n] = mask
176 |         mask_label[n] = label[int(n)]
177 |     return mask_target, mask_label
178 | 
179 | 
180 | def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped):
181 |     """
182 |     given rois, overlaps, gt labels, seg, compute bounding box mask targets
183 |     :param rois: roidb[i]['boxes'] k * 4
184 |     :param overlaps: roidb[i]['max_overlaps'] k * 1
185 |     :param labels: roidb[i]['max_classes'] k * 1
186 |     :return: targets[i][class, dx, dy, dw, dh] k * 5
187 |     """
188 |     # Ensure ROIs are floats
189 |     rois = rois.astype(np.float, copy=False)
190 | 
191 |     # Sanity check
192 |     if len(rois) != len(overlaps):
193 |         print 'bbox regression: this should not happen'
194 | 
195 |     # Indices of ground-truth ROIs
196 |     gt_inds = np.where(overlaps == 1)[0]
197 |     if len(gt_inds) == 0:
198 |         print 'something wrong : zero ground truth rois'
199 |     # Indices of examples for which we try to make predictions
200 |     ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
201 | 
202 |     # Get IoU overlap between each ex ROI and gt ROI
203 |     ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
204 | 
205 | 
206 |     # Find which gt ROI each ex ROI has max overlap with:
207 |     # this will be the ex ROI's gt target
208 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
209 |     gt_rois = rois[gt_inds[gt_assignment], :]
210 |     ex_rois = rois[ex_inds, :]
211 | 
212 |     mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds], seg, flipped)
213 |     return mask_targets, mask_label, ex_inds
214 | 
215 | def add_mask_targets(roidb):
216 |     """
217 |     given roidb, add ['bbox_targets'] and normalize bounding box regression targets
218 |     :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
219 |     :return: means, std variances of targets
220 |     """
221 |     print 'add bounding box mask targets'
222 |     assert len(roidb) > 0
223 |     assert 'max_classes' in roidb[0]
224 | 
225 |     num_images = len(roidb)
226 | 
227 |     # Multi threads processing
228 |     im_quene = Queue.Queue(maxsize=0)
229 |     for im_i in range(num_images):
230 |         im_quene.put(im_i)
231 | 
232 |     def process():
233 |         while not im_quene.empty():
234 |             im_i = im_quene.get()
235 |             print "-----process img {}".format(im_i)
236 |             rois = roidb[im_i]['boxes']
237 |             max_overlaps = roidb[im_i]['max_overlaps']
238 |             max_classes = roidb[im_i]['max_classes']
239 |             ins_seg = roidb[im_i]['ins_seg']
240 |             flipped = roidb[im_i]['flipped']
241 |             roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
242 |                 compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped)
243 |     threads = [threading.Thread(target=process, args=()) for i in range(10)]
244 |     for t in threads: t.start()
245 |     for t in threads: t.join()
246 |     # Single thread
247 |     # for im_i in range(num_images):
248 |     #     print "-----processing img {}".format(im_i)
249 |     #     rois = roidb[im_i]['boxes']
250 |     #     max_overlaps = roidb[im_i]['max_overlaps']
251 |     #     max_classes = roidb[im_i]['max_classes']
252 |     #     ins_seg = roidb[im_i]['ins_seg']
253 |     #     # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg)
254 |     #     roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
255 |     #         compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg)
256 | 


--------------------------------------------------------------------------------
/rcnn/pycocotools/_mask.pyx:
--------------------------------------------------------------------------------
  1 | # distutils: language = c
  2 | # distutils: sources = maskApi.c
  3 | 
  4 | #**************************************************************************
  5 | # Microsoft COCO Toolbox.      version 2.0
  6 | # Data, paper, and tutorials available at:  http://mscoco.org/
  7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  8 | # Licensed under the Simplified BSD License [see coco/license.txt]
  9 | #**************************************************************************
 10 | 
 11 | __author__ = 'tsungyi'
 12 | 
 13 | import sys
 14 | PYTHON_VERSION = sys.version_info[0]
 15 | 
 16 | # import both Python-level and C-level symbols of Numpy
 17 | # the API uses Numpy to interface C and Python
 18 | import numpy as np
 19 | cimport numpy as np
 20 | from libc.stdlib cimport malloc, free
 21 | 
 22 | # intialized Numpy. must do.
 23 | np.import_array()
 24 | 
 25 | # import numpy C function
 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
 27 | cdef extern from "numpy/arrayobject.h":
 28 |     void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
 29 | 
 30 | # Declare the prototype of the C functions in MaskApi.h
 31 | cdef extern from "maskApi.h":
 32 |     ctypedef unsigned int uint
 33 |     ctypedef unsigned long siz
 34 |     ctypedef unsigned char byte
 35 |     ctypedef double* BB
 36 |     ctypedef struct RLE:
 37 |         siz h,
 38 |         siz w,
 39 |         siz m,
 40 |         uint* cnts,
 41 |     void rlesInit( RLE **R, siz n )
 42 |     void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
 43 |     void rleDecode( const RLE *R, byte *mask, siz n )
 44 |     void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
 45 |     void rleArea( const RLE *R, siz n, uint *a )
 46 |     void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
 47 |     void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
 48 |     void rleToBbox( const RLE *R, BB bb, siz n )
 49 |     void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
 50 |     void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
 51 |     char* rleToString( const RLE *R )
 52 |     void rleFrString( RLE *R, char *s, siz h, siz w )
 53 | 
 54 | # python class to wrap RLE array in C
 55 | # the class handles the memory allocation and deallocation
 56 | cdef class RLEs:
 57 |     cdef RLE *_R
 58 |     cdef siz _n
 59 | 
 60 |     def __cinit__(self, siz n =0):
 61 |         rlesInit(&self._R, n)
 62 |         self._n = n
 63 | 
 64 |     # free the RLE array here
 65 |     def __dealloc__(self):
 66 |         if self._R is not NULL:
 67 |             for i in range(self._n):
 68 |                 free(self._R[i].cnts)
 69 |             free(self._R)
 70 |     def __getattr__(self, key):
 71 |         if key == 'n':
 72 |             return self._n
 73 |         raise AttributeError(key)
 74 | 
 75 | # python class to wrap Mask array in C
 76 | # the class handles the memory allocation and deallocation
 77 | cdef class Masks:
 78 |     cdef byte *_mask
 79 |     cdef siz _h
 80 |     cdef siz _w
 81 |     cdef siz _n
 82 | 
 83 |     def __cinit__(self, h, w, n):
 84 |         self._mask = <byte*> malloc(h*w*n* sizeof(byte))
 85 |         self._h = h
 86 |         self._w = w
 87 |         self._n = n
 88 |     # def __dealloc__(self):
 89 |         # the memory management of _mask has been passed to np.ndarray
 90 |         # it doesn't need to be freed here
 91 | 
 92 |     # called when passing into np.array() and return an np.ndarray in column-major order
 93 |     def __array__(self):
 94 |         cdef np.npy_intp shape[1]
 95 |         shape[0] = <np.npy_intp> self._h*self._w*self._n
 96 |         # Create a 1D array, and reshape it to fortran/Matlab column-major array
 97 |         ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
 98 |         # The _mask allocated by Masks is now handled by ndarray
 99 |         PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
100 |         return ndarray
101 | 
102 | # internal conversion from Python RLEs object to compressed RLE format
103 | def _toString(RLEs Rs):
104 |     cdef siz n = Rs.n
105 |     cdef bytes py_string
106 |     cdef char* c_string
107 |     objs = []
108 |     for i in range(n):
109 |         c_string = rleToString( <RLE*> &Rs._R[i] )
110 |         py_string = c_string
111 |         objs.append({
112 |             'size': [Rs._R[i].h, Rs._R[i].w],
113 |             'counts': py_string
114 |         })
115 |         free(c_string)
116 |     return objs
117 | 
118 | # internal conversion from compressed RLE format to Python RLEs object
119 | def _frString(rleObjs):
120 |     cdef siz n = len(rleObjs)
121 |     Rs = RLEs(n)
122 |     cdef bytes py_string
123 |     cdef char* c_string
124 |     for i, obj in enumerate(rleObjs):
125 |         if PYTHON_VERSION == 2:
126 |             py_string = str(obj['counts']).encode('utf8')
127 |         elif PYTHON_VERSION == 3:
128 |             py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
129 |         else:
130 |             raise Exception('Python version must be 2 or 3')
131 |         c_string = py_string
132 |         rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
133 |     return Rs
134 | 
135 | # encode mask to RLEs objects
136 | # list of RLE string can be generated by RLEs member function
137 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
138 |     h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
139 |     cdef RLEs Rs = RLEs(n)
140 |     rleEncode(Rs._R,<byte*>mask.data,h,w,n)
141 |     objs = _toString(Rs)
142 |     return objs
143 | 
144 | # decode mask from compressed list of RLE string or RLEs object
145 | def decode(rleObjs):
146 |     cdef RLEs Rs = _frString(rleObjs)
147 |     h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
148 |     masks = Masks(h, w, n)
149 |     rleDecode(<RLE*>Rs._R, masks._mask, n);
150 |     return np.array(masks)
151 | 
152 | def merge(rleObjs, intersect=0):
153 |     cdef RLEs Rs = _frString(rleObjs)
154 |     cdef RLEs R = RLEs(1)
155 |     rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
156 |     obj = _toString(R)[0]
157 |     return obj
158 | 
159 | def area(rleObjs):
160 |     cdef RLEs Rs = _frString(rleObjs)
161 |     cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
162 |     rleArea(Rs._R, Rs._n, _a)
163 |     cdef np.npy_intp shape[1]
164 |     shape[0] = <np.npy_intp> Rs._n
165 |     a = np.array((Rs._n, ), dtype=np.uint8)
166 |     a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
167 |     PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
168 |     return a
169 | 
170 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox).
171 | def iou( dt, gt, pyiscrowd ):
172 |     def _preproc(objs):
173 |         if len(objs) == 0:
174 |             return objs
175 |         if type(objs) == np.ndarray:
176 |             if len(objs.shape) == 1:
177 |                 objs = objs.reshape((objs[0], 1))
178 |             # check if it's Nx4 bbox
179 |             if not len(objs.shape) == 2 or not objs.shape[1] == 4:
180 |                 raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
181 |             objs = objs.astype(np.double)
182 |         elif type(objs) == list:
183 |             # check if list is in box format and convert it to np.ndarray
184 |             isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
185 |             isrle = np.all(np.array([type(obj) == dict for obj in objs]))
186 |             if isbox:
187 |                 objs = np.array(objs, dtype=np.double)
188 |                 if len(objs.shape) == 1:
189 |                     objs = objs.reshape((1,objs.shape[0]))
190 |             elif isrle:
191 |                 objs = _frString(objs)
192 |             else:
193 |                 raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
194 |         else:
195 |             raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
196 |         return objs
197 |     def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
198 |         rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
199 |     def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
200 |         bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
201 |     def _len(obj):
202 |         cdef siz N = 0
203 |         if type(obj) == RLEs:
204 |             N = obj.n
205 |         elif len(obj)==0:
206 |             pass
207 |         elif type(obj) == np.ndarray:
208 |             N = obj.shape[0]
209 |         return N
210 |     # convert iscrowd to numpy array
211 |     cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
212 |     # simple type checking
213 |     cdef siz m, n
214 |     dt = _preproc(dt)
215 |     gt = _preproc(gt)
216 |     m = _len(dt)
217 |     n = _len(gt)
218 |     if m == 0 or n == 0:
219 |         return []
220 |     if not type(dt) == type(gt):
221 |         raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
222 | 
223 |     # define local variables
224 |     cdef double* _iou = <double*> 0
225 |     cdef np.npy_intp shape[1]
226 |     # check type and assign iou function
227 |     if type(dt) == RLEs:
228 |         _iouFun = _rleIou
229 |     elif type(dt) == np.ndarray:
230 |         _iouFun = _bbIou
231 |     else:
232 |         raise Exception('input data type not allowed.')
233 |     _iou = <double*> malloc(m*n* sizeof(double))
234 |     iou = np.zeros((m*n, ), dtype=np.double)
235 |     shape[0] = <np.npy_intp> m*n
236 |     iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
237 |     PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
238 |     _iouFun(dt, gt, iscrowd, m, n, iou)
239 |     return iou.reshape((m,n), order='F')
240 | 
241 | def toBbox( rleObjs ):
242 |     cdef RLEs Rs = _frString(rleObjs)
243 |     cdef siz n = Rs.n
244 |     cdef BB _bb = <BB> malloc(4*n* sizeof(double))
245 |     rleToBbox( <const RLE*> Rs._R, _bb, n )
246 |     cdef np.npy_intp shape[1]
247 |     shape[0] = <np.npy_intp> 4*n
248 |     bb = np.array((1,4*n), dtype=np.double)
249 |     bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
250 |     PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
251 |     return bb
252 | 
253 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
254 |     cdef siz n = bb.shape[0]
255 |     Rs = RLEs(n)
256 |     rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
257 |     objs = _toString(Rs)
258 |     return objs
259 | 
260 | def frPoly( poly, siz h, siz w ):
261 |     cdef np.ndarray[np.double_t, ndim=1] np_poly
262 |     n = len(poly)
263 |     Rs = RLEs(n)
264 |     for i, p in enumerate(poly):
265 |         np_poly = np.array(p, dtype=np.double, order='F')
266 |         rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
267 |     objs = _toString(Rs)
268 |     return objs
269 | 
270 | def frUncompressedRLE(ucRles, siz h, siz w):
271 |     cdef np.ndarray[np.uint32_t, ndim=1] cnts
272 |     cdef RLE R
273 |     cdef uint *data
274 |     n = len(ucRles)
275 |     objs = []
276 |     for i in range(n):
277 |         Rs = RLEs(1)
278 |         cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
279 |         # time for malloc can be saved here but it's fine
280 |         data = <uint*> malloc(len(cnts)* sizeof(uint))
281 |         for j in range(len(cnts)):
282 |             data[j] = <uint> cnts[j]
283 |         R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
284 |         Rs._R[0] = R
285 |         objs.append(_toString(Rs)[0])
286 |     return objs
287 | 
288 | def frPyObjects(pyobj, h, w):
289 |     # encode rle from a list of python objects
290 |     if type(pyobj) == np.ndarray:
291 |         objs = frBbox(pyobj, h, w)
292 |     elif type(pyobj) == list and len(pyobj[0]) == 4:
293 |         objs = frBbox(pyobj, h, w)
294 |     elif type(pyobj) == list and len(pyobj[0]) > 4:
295 |         objs = frPoly(pyobj, h, w)
296 |     elif type(pyobj) == list and type(pyobj[0]) == dict \
297 |         and 'counts' in pyobj[0] and 'size' in pyobj[0]:
298 |         objs = frUncompressedRLE(pyobj, h, w)
299 |     # encode rle from single python object
300 |     elif type(pyobj) == list and len(pyobj) == 4:
301 |         objs = frBbox([pyobj], h, w)[0]
302 |     elif type(pyobj) == list and len(pyobj) > 4:
303 |         objs = frPoly([pyobj], h, w)[0]
304 |     elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
305 |         objs = frUncompressedRLE([pyobj], h, w)[0]
306 |     else:
307 |         raise Exception('input type is not supported.')
308 |     return objs
309 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Eclipse Public License - v 2.0
  2 | 
  3 |     THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
  4 |     PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION
  5 |     OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  6 | 
  7 | 1. DEFINITIONS
  8 | 
  9 | "Contribution" means:
 10 | 
 11 |   a) in the case of the initial Contributor, the initial content
 12 |      Distributed under this Agreement, and
 13 | 
 14 |   b) in the case of each subsequent Contributor:
 15 |      i) changes to the Program, and
 16 |      ii) additions to the Program;
 17 |   where such changes and/or additions to the Program originate from
 18 |   and are Distributed by that particular Contributor. A Contribution
 19 |   "originates" from a Contributor if it was added to the Program by
 20 |   such Contributor itself or anyone acting on such Contributor's behalf.
 21 |   Contributions do not include changes or additions to the Program that
 22 |   are not Modified Works.
 23 | 
 24 | "Contributor" means any person or entity that Distributes the Program.
 25 | 
 26 | "Licensed Patents" mean patent claims licensable by a Contributor which
 27 | are necessarily infringed by the use or sale of its Contribution alone
 28 | or when combined with the Program.
 29 | 
 30 | "Program" means the Contributions Distributed in accordance with this
 31 | Agreement.
 32 | 
 33 | "Recipient" means anyone who receives the Program under this Agreement
 34 | or any Secondary License (as applicable), including Contributors.
 35 | 
 36 | "Derivative Works" shall mean any work, whether in Source Code or other
 37 | form, that is based on (or derived from) the Program and for which the
 38 | editorial revisions, annotations, elaborations, or other modifications
 39 | represent, as a whole, an original work of authorship.
 40 | 
 41 | "Modified Works" shall mean any work in Source Code or other form that
 42 | results from an addition to, deletion from, or modification of the
 43 | contents of the Program, including, for purposes of clarity any new file
 44 | in Source Code form that contains any contents of the Program. Modified
 45 | Works shall not include works that contain only declarations,
 46 | interfaces, types, classes, structures, or files of the Program solely
 47 | in each case in order to link to, bind by name, or subclass the Program
 48 | or Modified Works thereof.
 49 | 
 50 | "Distribute" means the acts of a) distributing or b) making available
 51 | in any manner that enables the transfer of a copy.
 52 | 
 53 | "Source Code" means the form of a Program preferred for making
 54 | modifications, including but not limited to software source code,
 55 | documentation source, and configuration files.
 56 | 
 57 | "Secondary License" means either the GNU General Public License,
 58 | Version 2.0, or any later versions of that license, including any
 59 | exceptions or additional permissions as identified by the initial
 60 | Contributor.
 61 | 
 62 | 2. GRANT OF RIGHTS
 63 | 
 64 |   a) Subject to the terms of this Agreement, each Contributor hereby
 65 |   grants Recipient a non-exclusive, worldwide, royalty-free copyright
 66 |   license to reproduce, prepare Derivative Works of, publicly display,
 67 |   publicly perform, Distribute and sublicense the Contribution of such
 68 |   Contributor, if any, and such Derivative Works.
 69 | 
 70 |   b) Subject to the terms of this Agreement, each Contributor hereby
 71 |   grants Recipient a non-exclusive, worldwide, royalty-free patent
 72 |   license under Licensed Patents to make, use, sell, offer to sell,
 73 |   import and otherwise transfer the Contribution of such Contributor,
 74 |   if any, in Source Code or other form. This patent license shall
 75 |   apply to the combination of the Contribution and the Program if, at
 76 |   the time the Contribution is added by the Contributor, such addition
 77 |   of the Contribution causes such combination to be covered by the
 78 |   Licensed Patents. The patent license shall not apply to any other
 79 |   combinations which include the Contribution. No hardware per se is
 80 |   licensed hereunder.
 81 | 
 82 |   c) Recipient understands that although each Contributor grants the
 83 |   licenses to its Contributions set forth herein, no assurances are
 84 |   provided by any Contributor that the Program does not infringe the
 85 |   patent or other intellectual property rights of any other entity.
 86 |   Each Contributor disclaims any liability to Recipient for claims
 87 |   brought by any other entity based on infringement of intellectual
 88 |   property rights or otherwise. As a condition to exercising the
 89 |   rights and licenses granted hereunder, each Recipient hereby
 90 |   assumes sole responsibility to secure any other intellectual
 91 |   property rights needed, if any. For example, if a third party
 92 |   patent license is required to allow Recipient to Distribute the
 93 |   Program, it is Recipient's responsibility to acquire that license
 94 |   before distributing the Program.
 95 | 
 96 |   d) Each Contributor represents that to its knowledge it has
 97 |   sufficient copyright rights in its Contribution, if any, to grant
 98 |   the copyright license set forth in this Agreement.
 99 | 
100 |   e) Notwithstanding the terms of any Secondary License, no
101 |   Contributor makes additional grants to any Recipient (other than
102 |   those set forth in this Agreement) as a result of such Recipient's
103 |   receipt of the Program under the terms of a Secondary License
104 |   (if permitted under the terms of Section 3).
105 | 
106 | 3. REQUIREMENTS
107 | 
108 | 3.1 If a Contributor Distributes the Program in any form, then:
109 | 
110 |   a) the Program must also be made available as Source Code, in
111 |   accordance with section 3.2, and the Contributor must accompany
112 |   the Program with a statement that the Source Code for the Program
113 |   is available under this Agreement, and informs Recipients how to
114 |   obtain it in a reasonable manner on or through a medium customarily
115 |   used for software exchange; and
116 | 
117 |   b) the Contributor may Distribute the Program under a license
118 |   different than this Agreement, provided that such license:
119 |      i) effectively disclaims on behalf of all other Contributors all
120 |      warranties and conditions, express and implied, including
121 |      warranties or conditions of title and non-infringement, and
122 |      implied warranties or conditions of merchantability and fitness
123 |      for a particular purpose;
124 | 
125 |      ii) effectively excludes on behalf of all other Contributors all
126 |      liability for damages, including direct, indirect, special,
127 |      incidental and consequential damages, such as lost profits;
128 | 
129 |      iii) does not attempt to limit or alter the recipients' rights
130 |      in the Source Code under section 3.2; and
131 | 
132 |      iv) requires any subsequent distribution of the Program by any
133 |      party to be under a license that satisfies the requirements
134 |      of this section 3.
135 | 
136 | 3.2 When the Program is Distributed as Source Code:
137 | 
138 |   a) it must be made available under this Agreement, or if the
139 |   Program (i) is combined with other material in a separate file or
140 |   files made available under a Secondary License, and (ii) the initial
141 |   Contributor attached to the Source Code the notice described in
142 |   Exhibit A of this Agreement, then the Program may be made available
143 |   under the terms of such Secondary Licenses, and
144 | 
145 |   b) a copy of this Agreement must be included with each copy of
146 |   the Program.
147 | 
148 | 3.3 Contributors may not remove or alter any copyright, patent,
149 | trademark, attribution notices, disclaimers of warranty, or limitations
150 | of liability ("notices") contained within the Program from any copy of
151 | the Program which they Distribute, provided that Contributors may add
152 | their own appropriate notices.
153 | 
154 | 4. COMMERCIAL DISTRIBUTION
155 | 
156 | Commercial distributors of software may accept certain responsibilities
157 | with respect to end users, business partners and the like. While this
158 | license is intended to facilitate the commercial use of the Program,
159 | the Contributor who includes the Program in a commercial product
160 | offering should do so in a manner which does not create potential
161 | liability for other Contributors. Therefore, if a Contributor includes
162 | the Program in a commercial product offering, such Contributor
163 | ("Commercial Contributor") hereby agrees to defend and indemnify every
164 | other Contributor ("Indemnified Contributor") against any losses,
165 | damages and costs (collectively "Losses") arising from claims, lawsuits
166 | and other legal actions brought by a third party against the Indemnified
167 | Contributor to the extent caused by the acts or omissions of such
168 | Commercial Contributor in connection with its distribution of the Program
169 | in a commercial product offering. The obligations in this section do not
170 | apply to any claims or Losses relating to any actual or alleged
171 | intellectual property infringement. In order to qualify, an Indemnified
172 | Contributor must: a) promptly notify the Commercial Contributor in
173 | writing of such claim, and b) allow the Commercial Contributor to control,
174 | and cooperate with the Commercial Contributor in, the defense and any
175 | related settlement negotiations. The Indemnified Contributor may
176 | participate in any such claim at its own expense.
177 | 
178 | For example, a Contributor might include the Program in a commercial
179 | product offering, Product X. That Contributor is then a Commercial
180 | Contributor. If that Commercial Contributor then makes performance
181 | claims, or offers warranties related to Product X, those performance
182 | claims and warranties are such Commercial Contributor's responsibility
183 | alone. Under this section, the Commercial Contributor would have to
184 | defend claims against the other Contributors related to those performance
185 | claims and warranties, and if a court requires any other Contributor to
186 | pay any damages as a result, the Commercial Contributor must pay
187 | those damages.
188 | 
189 | 5. NO WARRANTY
190 | 
191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS"
193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF
195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR
196 | PURPOSE. Each Recipient is solely responsible for determining the
197 | appropriateness of using and distributing the Program and assumes all
198 | risks associated with its exercise of rights under this Agreement,
199 | including but not limited to the risks and costs of program errors,
200 | compliance with applicable laws, damage to or loss of data, programs
201 | or equipment, and unavailability or interruption of operations.
202 | 
203 | 6. DISCLAIMER OF LIABILITY
204 | 
205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE
213 | POSSIBILITY OF SUCH DAMAGES.
214 | 
215 | 7. GENERAL
216 | 
217 | If any provision of this Agreement is invalid or unenforceable under
218 | applicable law, it shall not affect the validity or enforceability of
219 | the remainder of the terms of this Agreement, and without further
220 | action by the parties hereto, such provision shall be reformed to the
221 | minimum extent necessary to make such provision valid and enforceable.
222 | 
223 | If Recipient institutes patent litigation against any entity
224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the
225 | Program itself (excluding combinations of the Program with other software
226 | or hardware) infringes such Recipient's patent(s), then such Recipient's
227 | rights granted under Section 2(b) shall terminate as of the date such
228 | litigation is filed.
229 | 
230 | All Recipient's rights under this Agreement shall terminate if it
231 | fails to comply with any of the material terms or conditions of this
232 | Agreement and does not cure such failure in a reasonable period of
233 | time after becoming aware of such noncompliance. If all Recipient's
234 | rights under this Agreement terminate, Recipient agrees to cease use
235 | and distribution of the Program as soon as reasonably practicable.
236 | However, Recipient's obligations under this Agreement and any licenses
237 | granted by Recipient relating to the Program shall continue and survive.
238 | 
239 | Everyone is permitted to copy and distribute copies of this Agreement,
240 | but in order to avoid inconsistency the Agreement is copyrighted and
241 | may only be modified in the following manner. The Agreement Steward
242 | reserves the right to publish new versions (including revisions) of
243 | this Agreement from time to time. No one other than the Agreement
244 | Steward has the right to modify this Agreement. The Eclipse Foundation
245 | is the initial Agreement Steward. The Eclipse Foundation may assign the
246 | responsibility to serve as the Agreement Steward to a suitable separate
247 | entity. Each new version of the Agreement will be given a distinguishing
248 | version number. The Program (including Contributions) may always be
249 | Distributed subject to the version of the Agreement under which it was
250 | received. In addition, after a new version of the Agreement is published,
251 | Contributor may elect to Distribute the Program (including its
252 | Contributions) under the new version.
253 | 
254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient
255 | receives no rights or licenses to the intellectual property of any
256 | Contributor under this Agreement, whether expressly, by implication,
257 | estoppel or otherwise. All rights in the Program not expressly granted
258 | under this Agreement are reserved. Nothing in this Agreement is intended
259 | to be enforceable by any entity that is not a Contributor or Recipient.
260 | No third-party beneficiary rights are created under this Agreement.
261 | 
262 | Exhibit A - Form of Secondary Licenses Notice
263 | 
264 | "This Source Code may also be made available under the following 
265 | Secondary Licenses when the conditions for such availability set forth 
266 | in the Eclipse Public License, v. 2.0 are satisfied: {name license(s),
267 | version(s), and exceptions or additional permissions here}."
268 | 
269 |   Simply including a copy of this Agreement, including this Exhibit A
270 |   is not sufficient to license the Source Code under Secondary Licenses.
271 | 
272 |   If it is not possible or desirable to put the notice in a particular
273 |   file, then You may include the notice in a location (such as a LICENSE
274 |   file in a relevant directory) where a recipient would be likely to
275 |   look for such a notice.
276 | 
277 |   You may add additional accurate notices of copyright ownership.
278 | 


--------------------------------------------------------------------------------