├── tests ├── __init__.py ├── utils │ ├── __init__.py │ └── test_transform.py ├── backend │ ├── __init__.py │ └── test_common.py ├── layers │ └── __init__.py ├── models │ ├── __init__.py │ ├── test_densenet.py │ └── test_mobilenet.py ├── preprocessing │ ├── __init__.py │ └── test_image.py ├── requirements.txt ├── test_losses.py └── bin │ └── test_train.py ├── snapshots └── .donotdelete ├── keras_retinanet ├── __init__.py ├── bin │ ├── __init__.py │ └── convert_model.py ├── utils │ ├── __init__.py │ ├── model.py │ ├── compute_overlap.pyx │ ├── gpu.py │ ├── tf_version.py │ ├── colors.py │ ├── coco_eval.py │ ├── config.py │ ├── visualization.py │ └── crops_sampling.py ├── preprocessing │ ├── __init__.py │ ├── coco.py │ └── kitti.py ├── models │ ├── mobilenetv3 │ │ ├── __init__.py │ │ ├── mobilenet_v3_small.py │ │ ├── mobilenet_v3_large.py │ │ └── mobilenet_v3_base.py │ ├── vgg.py │ ├── densenet.py │ ├── mobilenet.py │ ├── mobilenet_v3.py │ ├── __init__.py │ └── resnet.py ├── backend │ ├── __init__.py │ └── backend.py ├── callbacks │ ├── __init__.py │ ├── common.py │ ├── coco.py │ └── eval.py ├── layers │ └── __init__.py ├── initializers.py └── losses.py ├── labels.csv ├── examples └── 235.jpg ├── docs ├── imgs │ ├── la-logo.jpg │ ├── screenshot.png │ ├── examples │ │ ├── 01.png │ │ ├── 02.png │ │ ├── 03.png │ │ ├── 04.png │ │ ├── 05.png │ │ ├── 06.png │ │ ├── 07.png │ │ ├── 08.png │ │ ├── 09.png │ │ └── 10.png │ ├── lacmus-logo.png │ ├── partners │ │ ├── dtl-logo-200px.png │ │ ├── ods-logo-200px.png │ │ ├── gitbook-logo-200px.png │ │ ├── teplica-logo-128px.png │ │ ├── jetbrains_logo_200px.png │ │ ├── lizaalert-logo-128px.png │ │ └── novaya-gazeta-logo-128px.png │ └── skhemes │ │ ├── RescuerLaAppSkheme-v1.png │ │ ├── RescuerLaBackendSkheme-v1.png │ │ ├── RescuerLaAppSkheme-v1.drawio │ │ └── RescuerLaBackendSkheme-v1.drawio └── train-usage.md ├── data_utils ├── bboxCropper │ ├── screenshot.PNG │ ├── config.cfg │ ├── README.md │ └── bboxCropper.py ├── ImgGenerator │ ├── imgs │ │ ├── in_soup.PNG │ │ ├── in_soup2.PNG │ │ ├── on_snow.PNG │ │ └── in_forest.PNG │ ├── config.cfg │ ├── annotation_template.xml │ └── README.md ├── LaddAugmentor │ ├── LaddAugmentor.csproj │ └── Program.cs ├── LaddGenerator │ ├── LaddGenerator.csproj │ ├── ArgsParser.cs │ ├── Annotation.cs │ └── Program.cs ├── LaddValidator │ ├── LaddValidator.csproj │ ├── ArgsParser.cs │ └── Program.cs ├── LaddGenerator.sln ├── README.md ├── yolo2voc.py └── voc2coco.py ├── .gitmodules ├── openvino.dockerfile ├── setup.cfg ├── config.ini ├── config_p2_p5_low.ini ├── setup_conda_env.sh ├── .github └── workflows │ └── test.yaml ├── cpu.dockerfile ├── gpu.dockerfile ├── Makefile ├── README.md ├── keras2tf_2.py ├── setup.py ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md └── cli_inference.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snapshots/.donotdelete: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /labels.csv: -------------------------------------------------------------------------------- 1 | Pedestrian,0 2 | -------------------------------------------------------------------------------- /tests/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/bin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenetv3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras_retinanet/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from .backend import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /keras_retinanet/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .common import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /examples/235.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/examples/235.jpg -------------------------------------------------------------------------------- /docs/imgs/la-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/la-logo.jpg -------------------------------------------------------------------------------- /docs/imgs/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/screenshot.png -------------------------------------------------------------------------------- /docs/imgs/examples/01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/01.png -------------------------------------------------------------------------------- /docs/imgs/examples/02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/02.png -------------------------------------------------------------------------------- /docs/imgs/examples/03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/03.png -------------------------------------------------------------------------------- /docs/imgs/examples/04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/04.png -------------------------------------------------------------------------------- /docs/imgs/examples/05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/05.png -------------------------------------------------------------------------------- /docs/imgs/examples/06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/06.png -------------------------------------------------------------------------------- /docs/imgs/examples/07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/07.png -------------------------------------------------------------------------------- /docs/imgs/examples/08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/08.png -------------------------------------------------------------------------------- /docs/imgs/examples/09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/09.png -------------------------------------------------------------------------------- /docs/imgs/examples/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/examples/10.png -------------------------------------------------------------------------------- /docs/imgs/lacmus-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/lacmus-logo.png -------------------------------------------------------------------------------- /data_utils/bboxCropper/screenshot.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/data_utils/bboxCropper/screenshot.PNG -------------------------------------------------------------------------------- /docs/imgs/partners/dtl-logo-200px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/dtl-logo-200px.png -------------------------------------------------------------------------------- /docs/imgs/partners/ods-logo-200px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/ods-logo-200px.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/test-data"] 2 | path = tests/test-data 3 | url = https://github.com/lacmus-foundation/lacmus-test-data.git 4 | -------------------------------------------------------------------------------- /data_utils/ImgGenerator/imgs/in_soup.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/data_utils/ImgGenerator/imgs/in_soup.PNG -------------------------------------------------------------------------------- /data_utils/ImgGenerator/imgs/in_soup2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/data_utils/ImgGenerator/imgs/in_soup2.PNG -------------------------------------------------------------------------------- /data_utils/ImgGenerator/imgs/on_snow.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/data_utils/ImgGenerator/imgs/on_snow.PNG -------------------------------------------------------------------------------- /docs/imgs/partners/gitbook-logo-200px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/gitbook-logo-200px.png -------------------------------------------------------------------------------- /docs/imgs/partners/teplica-logo-128px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/teplica-logo-128px.png -------------------------------------------------------------------------------- /data_utils/ImgGenerator/imgs/in_forest.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/data_utils/ImgGenerator/imgs/in_forest.PNG -------------------------------------------------------------------------------- /docs/imgs/partners/jetbrains_logo_200px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/jetbrains_logo_200px.png -------------------------------------------------------------------------------- /docs/imgs/partners/lizaalert-logo-128px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/lizaalert-logo-128px.png -------------------------------------------------------------------------------- /docs/imgs/skhemes/RescuerLaAppSkheme-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/skhemes/RescuerLaAppSkheme-v1.png -------------------------------------------------------------------------------- /docs/imgs/partners/novaya-gazeta-logo-128px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/partners/novaya-gazeta-logo-128px.png -------------------------------------------------------------------------------- /docs/imgs/skhemes/RescuerLaBackendSkheme-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lacmus-foundation/lacmus/HEAD/docs/imgs/skhemes/RescuerLaBackendSkheme-v1.png -------------------------------------------------------------------------------- /keras_retinanet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes # noqa: F401 2 | from .filter_detections import FilterDetections # noqa: F401 3 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | check-manifest 2 | image-classifiers 3 | efficientnet 4 | # pytest 5 | pytest-xdist 6 | pytest-cov 7 | pytest-flake8 8 | # flake8 9 | coverage 10 | codecov 11 | -------------------------------------------------------------------------------- /data_utils/ImgGenerator/config.cfg: -------------------------------------------------------------------------------- 1 | DATASET_PATH = E:\test_dataset 2 | BACKGROUNDS_FOLDER_NAME = Backgrounds 3 | AUGMENTED_FOLDER_NAME = Augmented 4 | PADDING_WIDTH = 25 5 | INPAINT_PIXELS_WIDTH = 50 -------------------------------------------------------------------------------- /data_utils/bboxCropper/config.cfg: -------------------------------------------------------------------------------- 1 | CROP_SIZE = 512 2 | DATASET_PATH = C:\TMP\test 3 | CROPS_FOLDER_NAME = Crops 4 | FRAMES_FOLDER_NAME = Frames 5 | MASKS_FOLDER_NAME = Masks 6 | INVERT_MASKS = False 7 | -------------------------------------------------------------------------------- /openvino.dockerfile: -------------------------------------------------------------------------------- 1 | FROM openvino/ubuntu18_runtime:latest 2 | 3 | RUN mkdir /home/openvino/lacmus 4 | WORKDIR /home/openvino/lacmus 5 | COPY cli_inference_openvino.py . 6 | 7 | CMD bash -c "source ${INTEL_OPENVINO_DIR}/bin/setupvars.sh" -------------------------------------------------------------------------------- /data_utils/LaddAugmentor/LaddAugmentor.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.2 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /data_utils/LaddGenerator/LaddGenerator.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.2 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /data_utils/LaddValidator/LaddValidator.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.2 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /data_utils/LaddAugmentor/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace LaddAugmentor 4 | { 5 | class Program 6 | { 7 | static void Main(string[] args) 8 | { 9 | Console.WriteLine("Hello World!"); 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # ignore: 2 | # E201 whitespace after '[' 3 | # E202 whitespace before ']' 4 | # E203 whitespace before ':' 5 | # E221 multiple spaces before operator 6 | # E241 multiple spaces after ',' 7 | # E251 unexpected spaces around keyword / parameter equals 8 | # E501 line too long (85 > 79 characters) 9 | # W504 line break after binary operator 10 | [tool:pytest] 11 | flake8-max-line-length = 100 12 | flake8-ignore = E201 E202 E203 E221 E241 E251 E402 E501 W504 13 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [anchor_parameters] 2 | sizes = 16 32 64 128 256 3 | strides = 8 16 32 64 128 4 | ratios = 0.5 1 2 3 5 | scales = 1 1.2 1.6 6 | 7 | [random_transform_parameters] 8 | min_rotation = -0.1 9 | max_rotation = 0.1 10 | min_translation = -0.1 -0.1 11 | max_translation = 0.1 0.1 12 | min_shear = -0.1 13 | max_shear = 0.1 14 | min_scaling = 0.9 0.9 15 | max_scaling = 1.1 1.1 16 | flip_x_chance = 0.5 17 | flip_y_chance = 0.1 18 | 19 | [visual_effect_parameters] 20 | contrast_range = 0.9 1.1 21 | brightness_range = -.1 .1 22 | hue_range = -0.05 0.05 23 | saturation_range = 0.95 1.05 -------------------------------------------------------------------------------- /config_p2_p5_low.ini: -------------------------------------------------------------------------------- 1 | [anchor_parameters] 2 | sizes = 16 32 64 128 3 | strides = 4 8 16 32 4 | ratios = 0.5 1 2 3 5 | scales = 1 1.2 1.6 6 | 7 | [pyramid_levels] 8 | levels = 2 3 4 5 9 | 10 | [random_transform_parameters] 11 | min_rotation = -0.1 12 | max_rotation = 0.1 13 | min_translation = -0.1 -0.1 14 | max_translation = 0.1 0.1 15 | min_shear = -0.1 16 | max_shear = 0.1 17 | min_scaling = 0.9 0.9 18 | max_scaling = 1.1 1.1 19 | flip_x_chance = 0.5 20 | flip_y_chance = 0.1 21 | 22 | [visual_effect_parameters] 23 | contrast_range = 0.9 1.1 24 | brightness_range = -.1 .1 25 | hue_range = -0.05 0.05 26 | saturation_range = 0.95 1.05 27 | -------------------------------------------------------------------------------- /data_utils/ImgGenerator/annotation_template.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Unknown 4 | __ 5 | 6 | Unknown 7 | 8 | 9 | __ 10 | __ 11 | 3 12 | 13 | 0 14 | 15 | Pedestrian 16 | Unspecified 17 | 0 18 | 0 19 | 20 | __ 21 | __ 22 | __ 23 | __ 24 | 25 | 26 | -------------------------------------------------------------------------------- /setup_conda_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script creates and configures conda environment for lacmus project. 4 | # The name of the environment will be 'lacmusenv' by default or the one you passed as the first argument. 5 | # Usage: 6 | # ./setup_conda_env.sh [environment_name] 7 | 8 | # Do not forget to grant the script execute permission by: 9 | # chmod +x ./setup_conda_env.sh 10 | 11 | 12 | env_name=$1 13 | 14 | if [ -z $env_name ] 15 | then 16 | env_name="lacmusenv" 17 | fi 18 | 19 | conda create -n $env_name python=3.7 anaconda 20 | source activate $env_name 21 | conda install tensorflow-gpu==1.14 22 | pip install numpy --user 23 | pip install . --user 24 | python setup.py build_ext --inplace 25 | 26 | echo 27 | echo "Done creating $env_name environment" 28 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Unit testing on ubuntu 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v1 10 | - name: Set up python 11 | uses: actions/setup-python@v1 12 | with: 13 | python-version: 3.7 14 | - name: Install lacmus 15 | run: | 16 | python -m pip install --upgrade pip 17 | pip install tensorflow==2.3.0 18 | pip install keras==2.4.3 19 | pip install opencv-python 20 | pip install --upgrade setuptools 21 | pip install . 22 | python setup.py build_ext --inplace 23 | pip install pytest 24 | pip install pycocotools 25 | git clone https://github.com/lacmus-foundation/lacmus-test-data.git tests/test-data 26 | - name: Run tests 27 | run: pytest tests -------------------------------------------------------------------------------- /tests/preprocessing/test_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from PIL import Image 4 | from keras_retinanet.utils import image 5 | import numpy as np 6 | 7 | _STUB_IMG_FNAME = 'stub-image.jpg' 8 | 9 | 10 | @pytest.fixture(autouse=True) 11 | def run_around_tests(tmp_path): 12 | """Create a temp image for test""" 13 | rand_img = np.random.randint(0, 255, (3, 3, 3), dtype='uint8') 14 | Image.fromarray(rand_img).save(os.path.join(tmp_path, _STUB_IMG_FNAME)) 15 | yield 16 | 17 | 18 | def test_read_image_bgr(tmp_path): 19 | stub_image_path = os.path.join(tmp_path, _STUB_IMG_FNAME) 20 | 21 | original_img = np.asarray(Image.open( 22 | stub_image_path).convert('RGB'))[:, :, ::-1] 23 | loaded_image = image.read_image_bgr(stub_image_path) 24 | 25 | # Assert images are equal 26 | np.testing.assert_array_equal(original_img, loaded_image) 27 | -------------------------------------------------------------------------------- /cpu.dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.4.2 2 | 3 | # install debian packages 4 | ENV DEBIAN_FRONTEND noninteractive 5 | RUN apt-get update -qq \ 6 | && apt-get install --no-install-recommends -y \ 7 | # install essentials 8 | build-essential \ 9 | wget \ 10 | git \ 11 | cython \ 12 | ffmpeg \ 13 | libsm6 \ 14 | libxext6 \ 15 | # requirements for numpy 16 | libopenblas-base \ 17 | python3-numpy \ 18 | python3-scipy \ 19 | # requirements for keras 20 | python3-h5py \ 21 | python3-yaml \ 22 | python3-pydot \ 23 | && apt-get clean \ 24 | && rm -rf /var/lib/apt/lists/* 25 | 26 | RUN mkdir /opt/lacmus 27 | WORKDIR /opt/lacmus 28 | COPY . . 29 | 30 | RUN pip3 install --upgrade setuptools \ 31 | && pip3 install opencv-python \ 32 | && pip3 install git+https://github.com/lacmus-foundation/keras-resnet.git \ 33 | && pip3 install . \ 34 | && python3 setup.py build_ext --inplace 35 | 36 | ENTRYPOINT ["bash"] -------------------------------------------------------------------------------- /tests/test_losses.py: -------------------------------------------------------------------------------- 1 | import keras_retinanet.losses 2 | from tensorflow import keras 3 | 4 | import numpy as np 5 | 6 | import pytest 7 | 8 | 9 | def test_smooth_l1(): 10 | regression = np.array([ 11 | [ 12 | [0, 0, 0, 0], 13 | [0, 0, 0, 0], 14 | [0, 0, 0, 0], 15 | [0, 0, 0, 0], 16 | ] 17 | ], dtype=keras.backend.floatx()) 18 | regression = keras.backend.variable(regression) 19 | 20 | regression_target = np.array([ 21 | [ 22 | [0, 0, 0, 1, 1], 23 | [0, 0, 1, 0, 1], 24 | [0, 0, 0.05, 0, 1], 25 | [0, 0, 1, 0, 0], 26 | ] 27 | ], dtype=keras.backend.floatx()) 28 | regression_target = keras.backend.variable(regression_target) 29 | 30 | loss = keras_retinanet.losses.smooth_l1()(regression_target, regression) 31 | loss = keras.backend.eval(loss) 32 | 33 | assert loss == pytest.approx((((1 - 0.5 / 9) * 2 + (0.5 * 9 * 0.05 ** 2)) / 3)) 34 | -------------------------------------------------------------------------------- /gpu.dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.4.2-gpu 2 | 3 | # install debian packages 4 | ENV DEBIAN_FRONTEND noninteractive 5 | RUN apt-get update -qq \ 6 | && apt-get install --no-install-recommends -y \ 7 | # install essentials 8 | build-essential \ 9 | wget \ 10 | git \ 11 | g++ \ 12 | cython \ 13 | ffmpeg \ 14 | libsm6 \ 15 | libxext6 \ 16 | # requirements for numpy 17 | libopenblas-base \ 18 | python3-numpy \ 19 | python3-scipy \ 20 | # requirements for keras 21 | python3-h5py \ 22 | python3-yaml \ 23 | python3-pydot \ 24 | && apt-get clean \ 25 | && rm -rf /var/lib/apt/lists/* 26 | 27 | RUN mkdir /opt/lacmus 28 | WORKDIR /opt/lacmus 29 | COPY . . 30 | 31 | RUN pip3 install --upgrade setuptools \ 32 | && pip3 install opencv-python \ 33 | && pip3 install git+https://github.com/lacmus-foundation/keras-resnet.git \ 34 | && pip3 install . \ 35 | && python3 setup.py build_ext --inplace 36 | 37 | ENTRYPOINT ["bash"] -------------------------------------------------------------------------------- /data_utils/bboxCropper/README.md: -------------------------------------------------------------------------------- 1 | ### Cropping tool for dataset images 2 | Before running **bboxCropper.py**: update **config.cfg** with your data. 3 | 4 | 5 | #### Inputs 6 | Dataset, located at **DATASET_PATH**. 7 | 8 | 9 | #### Outputs 10 | - creates folders **CROPS_FOLDER_NAME**, **FRAMES_FOLDER_NAME**, **MASKS_FOLDER_NAME** in dataset folder. 11 | - crops square of **CROP_SIZE*****CROP_SIZE** around bbox center on the image, with random shift, and saves it to **CROPS_FOLDER_NAME** folder. 12 | - cuts out bbox content from this crop, saves it to **FRAMES_FOLDER_NAME** folder. 13 | - creates binary mask of the size of the crop, with True pixels under bbox and all other zeros, saves it to **MASKS_FOLDER_NAME** folder. 14 | 15 | #### About masks 16 | - by default, mask pixels are True in places where image inpainting required, others are False. For mask inversion: set INVERT_MASKS in cfg file. 17 | 18 | ![Example](https://github.com/lacmus-foundation/lacmus/blob/master/data_utils/bboxCropper/screenshot.PNG) 19 | -------------------------------------------------------------------------------- /keras_retinanet/utils/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | def freeze(model): 19 | """ Set all layers in a model to non-trainable. 20 | 21 | The weights for these layers will not be updated during training. 22 | 23 | This function modifies the given model in-place, 24 | but it also returns the modified model to allow easy chaining with other functions. 25 | """ 26 | for layer in model.layers: 27 | layer.trainable = False 28 | return model 29 | -------------------------------------------------------------------------------- /keras_retinanet/initializers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | 19 | import math 20 | 21 | 22 | class PriorProbability(keras.initializers.Initializer): 23 | """ Apply a prior probability to the weights. 24 | """ 25 | 26 | def __init__(self, probability=0.01): 27 | self.probability = probability 28 | 29 | def get_config(self): 30 | return { 31 | 'probability': self.probability 32 | } 33 | 34 | def __call__(self, shape, dtype=None): 35 | # set bias to -log((1 - p)/p) for foreground 36 | result = keras.backend.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability) 37 | 38 | return result 39 | -------------------------------------------------------------------------------- /docs/imgs/skhemes/RescuerLaAppSkheme-v1.drawio: -------------------------------------------------------------------------------- 1 | 7VnLcpswFP0aL5sxCIy9jB9p2jpNOl40WSoggxoZeYSIcb++FyMMWI4hHb/iduORjh6Wzr3nXkm00GCWfBZ4Htxxj7CW2faSFhq2TNOxe/CbAssMsEwF+IJ6GWQUwIT+JgpsKzSmHokqHSXnTNJ5FXR5GBJXVjAsBF9Uu005q/7rHPtEAyYuZjr6k3oyyNCu6RT4LaF+kP+z0VH7m+G8s9pJFGCPL0oQGrXQQHAus9IsGRCWcpfzko27eaN1vTBBQtlkwA8jGQT36Mft129hbxS/Pny3x5+sbJZXzGK1YbVYucwZEDwOPZJO0m6h/iKgkkzm2E1bF2BywAI5Y1AzoKimI0KS5M11Guvdg9cQPiNSLKFLPqCnCFMeg1R1UdBvOYr+oEx9R3XEyuT+euqCFSgoYt5BktHVWDI0mogHfqOqXMiA+zzEbFSg/SqRRZ8x53NF3y8i5VKJAMeSV8klCZWPpfJTOtWVrWrDRM28qizzSggMPJYrpVFptRi2quXjsv2lm9ptRuCAx8IlO+izlWyx8Inc0a+z3S0EYVjS1+o69m5ju14IIOB5WnSXjIIhRb0anjOLj5/XAHZf/JUf3McSZiH7k806YuaBVpdNLpCyaroHE01PY1Sn9L9odoqhgWgM65Sq6dSrpkYiOJpnaXtKk9TM+5ACMu06KXSPKQXnLFmyndqAcVSW8vh1zmeRdQg94WHE0GiaEAG71MiCbcsqI5EU/IUMOOOQvIYhT/NPf0oZ24Awo36YZjqgaZXnUhIpHIqvVcOMet4qPG8zQdVIB7CCoRvB2eKp5qFMYCLNBNYpU1uRzp5KLXWp7cq0K9mtJrdFkIrkdXqjSh2D4Siibg7fULZeU+jlnZQzAaLa95wiDbNpjmxvd6/j5Mh8mSVvGWKJJ7Bksz3sX65sN0+kRp6Wa46k6GChU9ftmGAR0tAH9I57MZBzseZAyD4vc5h6wkenDKMf7IZgWA3Dn3PS6Nfghem8L9ZW57wu1jmBJUbtD3j6+PvDhwqv5ZPHvqXV9MnKPOnt29AfrR4EzIzB/70soxEWXfgpA+R5tZnYminUOphC9Sta5wMq1HGsanJrt60ana5qD0RQIDL1jffcHPap38avZ6e9GejPZ1/CKYH9wu4u/0Bqd8/sfmC+62uYcZQXKMtGFZJMRycph45Dkv6uP+bgQwD9S85rbbyeom2fKffzKAXV4ivxqq30qR2N/gA= -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Version for docker images from git 3 | RLA_VERSION := $(shell git describe --abbrev=0 --tags) 4 | 5 | # Build both docker images 6 | .PHONY: build-all 7 | build-all: build build-gpu 8 | 9 | 10 | # Build docker image. Application using GPU (nvidia docker needed) 11 | .PHONY: build-gpu 12 | build-gpu: 13 | docker build --file Dockerfile.gpu -t lizaalert/lacmus:$(RLA_VERSION)-gpu . 14 | 15 | # Build docker image. Application using CPU 16 | .PHONY: build 17 | build: 18 | docker build -t lizaalert/lacmus:$(RLA_VERSION) . 19 | 20 | # Build and run docker image. Application using CPU 21 | .PHONY: run 22 | run: build 23 | docker run --rm \ 24 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 25 | -e DISPLAY=unix$(DISPLAY) \ 26 | --workdir=$(pwd) \ 27 | --volume="/home/$(USER):/home/$(USER)" \ 28 | --volume="/etc/group:/etc/group:ro" \ 29 | --volume="/etc/passwd:/etc/passwd:ro" \ 30 | --volume="/etc/shadow:/etc/shadow:ro" \ 31 | --volume="/etc/sudoers.d:/etc/sudoers.d:ro" \ 32 | lizaalert/lacmus:$(RLA_VERSION) 33 | 34 | # Build and run docker image. Application using GPU 35 | run-gpu: build-gpu 36 | docker run --rm \ 37 | --runtime=nvidia \ 38 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 39 | -e DISPLAY=unix$(DISPLAY) \ 40 | --workdir=$(pwd) \ 41 | --volume="/home/$(USER):/home/$(USER)" \ 42 | --volume="/etc/group:/etc/group:ro" \ 43 | --volume="/etc/passwd:/etc/passwd:ro" \ 44 | --volume="/etc/shadow:/etc/shadow:ro" \ 45 | --volume="/etc/sudoers.d:/etc/sudoers.d:ro" \ 46 | lizaalert/lacmus:$(RLA_VERSION)-gpu 47 | 48 | -------------------------------------------------------------------------------- /keras_retinanet/callbacks/common.py: -------------------------------------------------------------------------------- 1 | from tensorflow import keras 2 | 3 | 4 | class RedirectModel(keras.callbacks.Callback): 5 | """Callback which wraps another callback, but executed on a different model. 6 | 7 | ```python 8 | model = keras.models.load_model('model.h5') 9 | model_checkpoint = ModelCheckpoint(filepath='snapshot.h5') 10 | parallel_model = multi_gpu_model(model, gpus=2) 11 | parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)]) 12 | ``` 13 | 14 | Args 15 | callback : callback to wrap. 16 | model : model to use when executing callbacks. 17 | """ 18 | 19 | def __init__(self, 20 | callback, 21 | model): 22 | super(RedirectModel, self).__init__() 23 | 24 | self.callback = callback 25 | self.redirect_model = model 26 | 27 | def on_epoch_begin(self, epoch, logs=None): 28 | self.callback.on_epoch_begin(epoch, logs=logs) 29 | 30 | def on_epoch_end(self, epoch, logs=None): 31 | self.callback.on_epoch_end(epoch, logs=logs) 32 | 33 | def on_batch_begin(self, batch, logs=None): 34 | self.callback.on_batch_begin(batch, logs=logs) 35 | 36 | def on_batch_end(self, batch, logs=None): 37 | self.callback.on_batch_end(batch, logs=logs) 38 | 39 | def on_train_begin(self, logs=None): 40 | # overwrite the model with our custom model 41 | self.callback.set_model(self.redirect_model) 42 | 43 | self.callback.on_train_begin(logs=logs) 44 | 45 | def on_train_end(self, logs=None): 46 | self.callback.on_train_end(logs=logs) 47 | -------------------------------------------------------------------------------- /data_utils/LaddGenerator.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LaddGenerator", ".\LaddGenerator\LaddGenerator.csproj", "{D452A77C-D639-4376-8372-BA5B8AC67CF4}" 4 | EndProject 5 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LaddValidator", ".\LaddValidator\LaddValidator.csproj", "{C9A6B61C-8F39-4A32-BDA0-6C248153448E}" 6 | EndProject 7 | Global 8 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 9 | Debug|Any CPU = Debug|Any CPU 10 | Release|Any CPU = Release|Any CPU 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {D452A77C-D639-4376-8372-BA5B8AC67CF4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 14 | {D452A77C-D639-4376-8372-BA5B8AC67CF4}.Debug|Any CPU.Build.0 = Debug|Any CPU 15 | {D452A77C-D639-4376-8372-BA5B8AC67CF4}.Release|Any CPU.ActiveCfg = Release|Any CPU 16 | {D452A77C-D639-4376-8372-BA5B8AC67CF4}.Release|Any CPU.Build.0 = Release|Any CPU 17 | {2C83E9CE-CD86-4B8F-AC3A-60CB7ACB9F07}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 18 | {2C83E9CE-CD86-4B8F-AC3A-60CB7ACB9F07}.Debug|Any CPU.Build.0 = Debug|Any CPU 19 | {2C83E9CE-CD86-4B8F-AC3A-60CB7ACB9F07}.Release|Any CPU.ActiveCfg = Release|Any CPU 20 | {2C83E9CE-CD86-4B8F-AC3A-60CB7ACB9F07}.Release|Any CPU.Build.0 = Release|Any CPU 21 | {C9A6B61C-8F39-4A32-BDA0-6C248153448E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 22 | {C9A6B61C-8F39-4A32-BDA0-6C248153448E}.Debug|Any CPU.Build.0 = Debug|Any CPU 23 | {C9A6B61C-8F39-4A32-BDA0-6C248153448E}.Release|Any CPU.ActiveCfg = Release|Any CPU 24 | {C9A6B61C-8F39-4A32-BDA0-6C248153448E}.Release|Any CPU.Build.0 = Release|Any CPU 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /tests/models/test_densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 vidosits (https://github.com/vidosits/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import warnings 18 | import pytest 19 | import numpy as np 20 | from tensorflow import keras 21 | from keras_retinanet import losses 22 | from keras_retinanet.models.densenet import DenseNetBackbone 23 | 24 | parameters = ['densenet121'] 25 | 26 | 27 | @pytest.mark.parametrize("backbone", parameters) 28 | def test_backbone(backbone): 29 | # ignore warnings in this test 30 | warnings.simplefilter('ignore') 31 | 32 | num_classes = 10 33 | 34 | inputs = np.zeros((1, 200, 400, 3), dtype=np.float32) 35 | targets = [np.zeros((1, 14814, 5), dtype=np.float32), np.zeros((1, 14814, num_classes + 1))] 36 | 37 | inp = keras.layers.Input(inputs[0].shape) 38 | 39 | densenet_backbone = DenseNetBackbone(backbone) 40 | model = densenet_backbone.retinanet(num_classes=num_classes, inputs=inp) 41 | model.summary() 42 | 43 | # compile model 44 | model.compile( 45 | loss={ 46 | 'regression': losses.smooth_l1(), 47 | 'classification': losses.focal() 48 | }, 49 | optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001)) 50 | 51 | model.fit(inputs, targets, batch_size=1) 52 | -------------------------------------------------------------------------------- /data_utils/LaddGenerator/ArgsParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace LaddGenerator 5 | { 6 | public class ArgsParser 7 | { 8 | private readonly Dictionary _argsKeys; 9 | 10 | public ArgsParser(Dictionary argsKeys) 11 | { 12 | _argsKeys = argsKeys; 13 | } 14 | 15 | public Dictionary Parse(string[] args) 16 | { 17 | if (args.Length == 0) 18 | { 19 | List argsList = new List(); 20 | Console.Write("usage\n"); 21 | foreach (var (key, value) in _argsKeys) 22 | { 23 | Console.WriteLine($"\t{key}\t{value}"); 24 | } 25 | for (int i = 0; i < _argsKeys.Count; i++) 26 | { 27 | argsList.AddRange(Console.ReadLine()?.Split(' ')); 28 | } 29 | 30 | args = argsList.ToArray(); 31 | } 32 | else if (args.Length / 2 != _argsKeys.Count) 33 | { 34 | Console.Write("usage\n"); 35 | foreach (var (key, value) in _argsKeys) 36 | { 37 | Console.WriteLine($"\t{key}\t{value}"); 38 | } 39 | 40 | return null; 41 | } 42 | 43 | Dictionary result = new Dictionary(); 44 | foreach (var (key, value) in _argsKeys) 45 | { 46 | for (int i = 0; i < args.Length; i += 2) 47 | { 48 | if(args[i].Contains(key)) 49 | result.Add(key.Replace("--", ""), args[i+1]); 50 | } 51 | } 52 | 53 | return result; 54 | } 55 | } 56 | } -------------------------------------------------------------------------------- /data_utils/LaddValidator/ArgsParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace LaddValidator 5 | { 6 | public class ArgsParser 7 | { 8 | private readonly Dictionary _argsKeys; 9 | 10 | public ArgsParser(Dictionary argsKeys) 11 | { 12 | _argsKeys = argsKeys; 13 | } 14 | 15 | public Dictionary Parse(string[] args) 16 | { 17 | if (args.Length == 0) 18 | { 19 | List argsList = new List(); 20 | Console.Write("usage\n"); 21 | foreach (var (key, value) in _argsKeys) 22 | { 23 | Console.WriteLine($"\t{key}\t{value}"); 24 | } 25 | for (int i = 0; i < _argsKeys.Count; i++) 26 | { 27 | argsList.AddRange(Console.ReadLine()?.Split(' ')); 28 | } 29 | 30 | args = argsList.ToArray(); 31 | } 32 | else if (args.Length / 2 != _argsKeys.Count) 33 | { 34 | Console.Write("usage\n"); 35 | foreach (var (key, value) in _argsKeys) 36 | { 37 | Console.WriteLine($"\t{key}\t{value}"); 38 | } 39 | 40 | return null; 41 | } 42 | 43 | Dictionary result = new Dictionary(); 44 | foreach (var (key, value) in _argsKeys) 45 | { 46 | for (int i = 0; i < args.Length; i += 2) 47 | { 48 | if(args[i].Contains(key)) 49 | result.Add(key.Replace("--", ""), args[i+1]); 50 | } 51 | } 52 | 53 | return result; 54 | } 55 | } 56 | } -------------------------------------------------------------------------------- /keras_retinanet/utils/compute_overlap.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | def compute_overlap( 14 | np.ndarray[double, ndim=2] boxes, 15 | np.ndarray[double, ndim=2] query_boxes 16 | ): 17 | """ 18 | Args 19 | a: (N, 4) ndarray of float 20 | b: (K, 4) ndarray of float 21 | 22 | Returns 23 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 24 | """ 25 | cdef unsigned int N = boxes.shape[0] 26 | cdef unsigned int K = query_boxes.shape[0] 27 | cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64) 28 | cdef double iw, ih, box_area 29 | cdef double ua 30 | cdef unsigned int k, n 31 | for k in range(K): 32 | box_area = ( 33 | (query_boxes[k, 2] - query_boxes[k, 0]) * 34 | (query_boxes[k, 3] - query_boxes[k, 1]) 35 | ) 36 | for n in range(N): 37 | iw = ( 38 | min(boxes[n, 2], query_boxes[k, 2]) - 39 | max(boxes[n, 0], query_boxes[k, 0]) 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3], query_boxes[k, 3]) - 44 | max(boxes[n, 1], query_boxes[k, 1]) 45 | ) 46 | if ih > 0: 47 | ua = np.float64( 48 | (boxes[n, 2] - boxes[n, 0]) * 49 | (boxes[n, 3] - boxes[n, 1]) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /keras_retinanet/utils/gpu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/lacmus-foundation/lacmus 3 | Copyright (C) 2019-2020 lacmus-foundation 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | 19 | import tensorflow as tf 20 | 21 | 22 | def setup_gpu(gpu_id): 23 | try: 24 | visible_gpu_indices = [int(id) for id in gpu_id.split(',')] 25 | available_gpus = tf.config.list_physical_devices('GPU') 26 | visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices] 27 | 28 | if visible_gpus: 29 | try: 30 | # Currently, memory growth needs to be the same across GPUs. 31 | for gpu in available_gpus: 32 | tf.config.experimental.set_memory_growth(gpu, True) 33 | 34 | # Use only the selcted gpu. 35 | tf.config.set_visible_devices(visible_gpus, 'GPU') 36 | except RuntimeError as e: 37 | # Visible devices must be set before GPUs have been initialized. 38 | print(e) 39 | 40 | logical_gpus = tf.config.list_logical_devices('GPU') 41 | print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 42 | else: 43 | tf.config.set_visible_devices([], 'GPU') 44 | except ValueError: 45 | tf.config.set_visible_devices([], 'GPU') 46 | -------------------------------------------------------------------------------- /data_utils/ImgGenerator/README.md: -------------------------------------------------------------------------------- 1 | ### Tool for generation of new dataset images 2 | 3 | #### How it works 4 | - cuts all target images from existing dataset; 5 | - transforms crops (at the moment only random rotation is implemented); 6 | - puts crops on new backgrounds; 7 | - generates Pascal VOC-style annotations. 8 | 9 | #### Inputs 10 | 11 | Before running: update **config.cfg** with your data. 12 | 13 | Folders: 14 | - **DATASET_PATH** - location of existing dataset; 15 | - **BACKGROUNDS_FOLDER_NAME** - name of folder with new backgroungs, the folder shall be located in a folder with existing dataset; 16 | - **AUGMENTED_FOLDER_NAME** - name of folder for outputs, the folder also shall be located in a folder with existing dataset; 17 | 18 | Cropping details: 19 | 20 | Targets being cropped with some padding and pixels for smooth transition. 21 | 22 | Example: if the 23 | - target image is 50x50 pixels, 24 | - padding is 10% of image W and H, 25 | - transition area is 25 pixels at each side of image, 26 | 27 | then crop will have a size: 28 | **H = W = (50 + 50x0,1 + 50x0,1 + 25 + 25) = 110 px** 29 | 30 | Related variables: 31 | - **PADDING_WIDTH** - **percentage** of target image H and W to pad, 32 | - **INPAINT_PIXELS_WIDTH** - width of smooth transition area. 33 | 34 | 35 | 36 | #### Outputs 37 | 38 | All outputs are located in **AUGMENTED_FOLDER_NAME**: 39 | 40 | - **JPEGImages** - folder with resulting images, 41 | - **Annotations** - folder with xml annotations files, 42 | - **Targets** - crops of targets. 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /tests/models/test_mobilenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import warnings 18 | import pytest 19 | import numpy as np 20 | from tensorflow import keras 21 | from keras_retinanet import losses 22 | from keras_retinanet.models.mobilenet import MobileNetBackbone 23 | 24 | 25 | alphas = ['1.0'] 26 | parameters = [] 27 | 28 | for backbone in MobileNetBackbone.allowed_backbones: 29 | for alpha in alphas: 30 | parameters.append((backbone, alpha)) 31 | 32 | 33 | @pytest.mark.parametrize("backbone, alpha", parameters) 34 | def test_backbone(backbone, alpha): 35 | # ignore warnings in this test 36 | warnings.simplefilter('ignore') 37 | 38 | num_classes = 10 39 | 40 | inputs = np.zeros((1, 1024, 363, 3), dtype=np.float32) 41 | targets = [np.zeros((1, 68760, 5), dtype=np.float32), np.zeros((1, 68760, num_classes + 1))] 42 | 43 | inp = keras.layers.Input(inputs[0].shape) 44 | 45 | mobilenet_backbone = MobileNetBackbone(backbone='{}_{}'.format(backbone, format(alpha))) 46 | training_model = mobilenet_backbone.retinanet(num_classes=num_classes, inputs=inp) 47 | training_model.summary() 48 | 49 | # compile model 50 | training_model.compile( 51 | loss={ 52 | 'regression': losses.smooth_l1(), 53 | 'classification': losses.focal() 54 | }, 55 | optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001)) 56 | 57 | training_model.fit(inputs, targets, batch_size=1) 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lacmus 2 | 3 | ![logo](docs/imgs/lacmus-logo.png) 4 | 5 | The program for searching through photos from the air of lost people in the forest using Retina Net neural nwtwork. 6 | 7 | The project is being developed by a non-profit organization Liza Alert. 8 | 9 | ## Demonstration 10 | 11 | ![01](docs/imgs/examples/02.png) 12 | 13 | *Picture 1* 14 | 15 | ![02](docs/imgs/examples/08.png) 16 | 17 | *Picture 2* 18 | 19 | [![video](http://img.youtube.com/vi/9pVtPM4bzww/0.jpg)](http://www.youtube.com/watch?v=9pVtPM4bzww) 20 | 21 | *Video 1* 22 | 23 | See [more examples](docs/work-demo.md). 24 | 25 | ## Training data 26 | 27 | You can download `Lacmus Drone Dataset (LaDD)` from mail.ru cloud 28 | 29 | - https://cloud.mail.ru/public/2k53/2bJVwYSa7 30 | 31 | You also can download Lacmus version of `Stenford Drone Dataset (SDD)` from mail.ru cloud 32 | 33 | - https://cloud.mail.ru/public/4GKW/3FW26Sq77 34 | 35 | 36 | ## Usage 37 | 38 | Read more about training steps and atraining data at [train documentation](docs/train-usage.md) to learn how to train the model. 39 | 40 | ## Pretrained models 41 | 42 | The models are avalable [here](https://github.com/lizaalert/lacmus/releases/tag/0.1.1). 43 | 44 | ## Partners 45 | 46 | [![ODS][logoODS]](https://ods.ai) [![DTL][logoDTL]](http://immersiya.com/about) [![JB][logoJB]](https://www.jetbrains.com/) [![GitBook][logoGitBook]](https://www.gitbook.com/) 47 | [![Liza alert][logoLA]](https://lizaalert.org/) [![Novaya Gazeta][logoNovayaGazeta]](https://novayagazeta.ru/) [![Teplica][logoTeplica]](https://te-st.ru/) 48 | 49 | [logoDTL]: docs/imgs/partners/dtl-logo-200px.png "DTL" 50 | 51 | [logoODS]: docs/imgs/partners/ods-logo-200px.png "ODS" 52 | 53 | [logoLA]: docs/imgs/partners/lizaalert-logo-128px.png "Liza alert" 54 | 55 | [logoNovayaGazeta]: docs/imgs/partners/novaya-gazeta-logo-128px.png "Novaya Gazeta" 56 | 57 | [logoTeplica]: docs/imgs/partners/teplica-logo-128px.png "Teplica" 58 | 59 | [logoJB]: docs/imgs/partners/jetbrains_logo_200px.png "JetBrains" 60 | 61 | [logoGitBook]: docs/imgs/partners/gitbook-logo-200px.png "JetBrains" -------------------------------------------------------------------------------- /keras2tf_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import argparse 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 9 | 10 | from keras_retinanet import models 11 | 12 | 13 | def parse_args(args): 14 | parser = argparse.ArgumentParser(description='convert keras_retinanet model to tensorflow frozen graph') 15 | parser.add_argument( 16 | '--input', 17 | help='path to h5 keras inference model', 18 | type=str, 19 | required=True 20 | ) 21 | parser.add_argument( 22 | '--backbone', 23 | help='backbone name', 24 | type=str, 25 | required=False, 26 | default='resnet50' 27 | ) 28 | return parser.parse_args(args) 29 | 30 | def main(args=None): 31 | args = parse_args(args) 32 | weights_name = args.input 33 | backbone = args.backbone 34 | 35 | dirname = os.path.dirname(weights_name) 36 | basename = os.path.basename(weights_name) 37 | fn, ext = os.path.splitext(basename) 38 | 39 | model = models.load_model(weights_name, backbone_name=backbone) 40 | 41 | # Convert Keras model to ConcreteFunction 42 | full_model = tf.function(lambda input_1: model(input_1)) 43 | full_model = full_model.get_concrete_function( 44 | tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)) 45 | 46 | # Get frozen ConcreteFunction 47 | frozen_func = convert_variables_to_constants_v2(full_model) 48 | frozen_func.graph.as_graph_def() 49 | 50 | layers = [op.name for op in frozen_func.graph.get_operations()] 51 | 52 | print("Frozen model inputs: ") 53 | print(frozen_func.inputs) 54 | print("Frozen model outputs: ") 55 | print(frozen_func.outputs) 56 | 57 | # Save frozen graph to disk 58 | tf.io.write_graph(graph_or_graph_def=frozen_func.graph, 59 | logdir=dirname, 60 | name=f"{fn}.pb", 61 | as_text=False) 62 | print(f'weights saved: {dirname}') 63 | 64 | if __name__ == '__main__': 65 | main() -------------------------------------------------------------------------------- /keras_retinanet/utils/tf_version.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/lacmus-foundation/lacmus 3 | Copyright (C) 2019-2020 lacmus-foundation 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | import sys 23 | 24 | MINIMUM_TF_VERSION = 2, 3, 0 25 | BLACKLISTED_TF_VERSIONS = [] 26 | 27 | 28 | def tf_version(): 29 | """ Get the Tensorflow version. 30 | Returns 31 | tuple of (major, minor, patch). 32 | """ 33 | return tuple(map(int, tf.version.VERSION.split('-')[0].split('.'))) 34 | 35 | 36 | def tf_version_ok(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS): 37 | """ Check if the current Tensorflow version is higher than the minimum version. 38 | """ 39 | return tf_version() >= minimum_tf_version and tf_version() not in blacklisted 40 | 41 | 42 | def assert_tf_version(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS): 43 | """ Assert that the Tensorflow version is up to date. 44 | """ 45 | detected = tf.version.VERSION 46 | required = '.'.join(map(str, minimum_tf_version)) 47 | assert(tf_version_ok(minimum_tf_version, blacklisted)), 'You are using tensorflow version {}. The minimum required version is {} (blacklisted: {}).'.format(detected, required, blacklisted) 48 | 49 | 50 | def check_tf_version(): 51 | """ Check that the Tensorflow version is up to date. If it isn't, print an error message and exit the script. 52 | """ 53 | try: 54 | assert_tf_version() 55 | except AssertionError as e: 56 | print(e, file=sys.stderr) 57 | sys.exit(1) 58 | -------------------------------------------------------------------------------- /tests/bin/test_train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras_retinanet.backend 18 | import keras_retinanet.bin.train 19 | from tensorflow import keras 20 | 21 | import warnings 22 | 23 | import pytest 24 | 25 | 26 | @pytest.fixture(autouse=True) 27 | def clear_session(): 28 | # run before test (do nothing) 29 | yield 30 | # run after test, clear keras session 31 | keras.backend.clear_session() 32 | 33 | 34 | def test_coco(): 35 | # ignore warnings in this test 36 | warnings.simplefilter('ignore') 37 | 38 | # run training / evaluation 39 | keras_retinanet.bin.train.main([ 40 | '--epochs=1', 41 | '--steps=1', 42 | '--no-weights', 43 | '--no-snapshots', 44 | 'coco', 45 | 'tests/test-data/coco', 46 | ]) 47 | 48 | 49 | def test_pascal(): 50 | # ignore warnings in this test 51 | warnings.simplefilter('ignore') 52 | 53 | # run training / evaluation 54 | keras_retinanet.bin.train.main([ 55 | '--epochs=1', 56 | '--steps=1', 57 | '--no-weights', 58 | '--no-snapshots', 59 | 'pascal', 60 | 'tests/test-data/pascal', 61 | ]) 62 | 63 | 64 | def test_csv(): 65 | # ignore warnings in this test 66 | warnings.simplefilter('ignore') 67 | 68 | # run training / evaluation 69 | keras_retinanet.bin.train.main([ 70 | '--epochs=1', 71 | '--steps=1', 72 | '--no-weights', 73 | '--no-snapshots', 74 | 'csv', 75 | 'tests/test-data/csv/annotations.csv', 76 | 'tests/test-data/csv/classes.csv', 77 | ]) 78 | 79 | 80 | def test_vgg(): 81 | # ignore warnings in this test 82 | warnings.simplefilter('ignore') 83 | 84 | # run training / evaluation 85 | keras_retinanet.bin.train.main([ 86 | '--backbone=vgg16', 87 | '--epochs=1', 88 | '--steps=1', 89 | '--no-weights', 90 | '--no-snapshots', 91 | '--freeze-backbone', 92 | 'coco', 93 | 'tests/test-data/coco', 94 | ]) 95 | -------------------------------------------------------------------------------- /data_utils/README.md: -------------------------------------------------------------------------------- 1 | # Lacmus Drone Dataset (LADD) 2 | 3 | LADD is a dataset of drone created images for pedestrian detection. LADD annotations are into VOC format. 4 | 5 | You can [download the LADD](https://cloud.mail.ru/public/2k53/2bJVwYSa7) from Mail.Cloud directly. 6 | 7 | #### Overview of dataset 8 | 9 | * You can see a example of the labeled image. 10 | 11 | We have just one kind of label : 12 | 13 | * 0 - Pedestrian 14 | 15 | ![example](../docs/imgs/examples/01.png) 16 | 17 | 18 | * The structure of the `LADD_VERSION_SIZON` 19 | 20 | ``` 21 | ├── LADD 22 | │ ├── Annotations 23 | │ │ └── X.xml (419 items) 24 | │ ├── examples 25 | │ │ └── X.jpg (10 items) 26 | │ ├── ImageSets 27 | │ │ └── Main 28 | # *.txt which split the dataset 29 | │ │ └── test.txt 30 | │ │ └── train.txt 31 | │ │ └── trainval.txt 32 | │ │ └── val.txt 33 | │ └── JPEGImages 34 | │ └── X.jpg (419 items) 35 | 36 | ``` 37 | 38 | * The `JPEGImages`: 39 | 40 | * **Image Type** : *jpeg(JPEG)* 41 | * **Width** x **Height** : *4000 x 3000* 42 | 43 | * The `Annotations` : The VOC format `.xml` for Object Detection, automatically generate by the label tools. Below is an example of `.xml` file. 44 | 45 | ```xml 46 | 47 | VocGalsTfl 48 | 0 49 | 50 | Unknown 51 | 52 | 53 | 4000 54 | 3000 55 | 3 56 | 57 | 0 58 | 59 | Pedestrian 60 | Unspecified 61 | 0 62 | 0 63 | 64 | 1881 65 | 1409 66 | 1905 67 | 1469 68 | 69 | 70 | 71 | ... 72 | 73 | 74 | ``` 75 | 76 | The dataset is divided into 3 seasons: **summer**, **spring** and **summer**. All files are stored in archives `LADD_VERSION_SIZON_NUMBER` and are numb. You can independently collect your dataset. To do this, simply merge the corresponding folders and contents of text files into one. 77 | 78 | You can also use our official tools. 79 | 80 | ## License 81 | 82 | LADD is licensed under GNU General Public License v3.0. You can read the license text [here](https://github.com/lizaalert/lacmus/blob/master/LICENSE). 83 | 84 | This license applies not only to the dataset, but also to ALL SOFTWARE products that use it to one degree or another. 85 | 86 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.extension import Extension 3 | from distutils.command.build_ext import build_ext as DistUtilsBuildExt 4 | 5 | 6 | class BuildExtension(setuptools.Command): 7 | description = DistUtilsBuildExt.description 8 | user_options = DistUtilsBuildExt.user_options 9 | boolean_options = DistUtilsBuildExt.boolean_options 10 | help_options = DistUtilsBuildExt.help_options 11 | 12 | def __init__(self, *args, **kwargs): 13 | from setuptools.command.build_ext import build_ext as SetupToolsBuildExt 14 | 15 | # Bypass __setatrr__ to avoid infinite recursion. 16 | self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs) 17 | 18 | def __getattr__(self, name): 19 | return getattr(self._command, name) 20 | 21 | def __setattr__(self, name, value): 22 | setattr(self._command, name, value) 23 | 24 | def initialize_options(self, *args, **kwargs): 25 | return self._command.initialize_options(*args, **kwargs) 26 | 27 | def finalize_options(self, *args, **kwargs): 28 | ret = self._command.finalize_options(*args, **kwargs) 29 | import numpy 30 | self.include_dirs.append(numpy.get_include()) 31 | return ret 32 | 33 | def run(self, *args, **kwargs): 34 | return self._command.run(*args, **kwargs) 35 | 36 | 37 | extensions = [ 38 | Extension( 39 | 'keras_retinanet.utils.compute_overlap', 40 | ['keras_retinanet/utils/compute_overlap.pyx'] 41 | ), 42 | ] 43 | 44 | 45 | setuptools.setup( 46 | name = 'keras-retinanet', 47 | version = '2.5.0', 48 | description = 'Keras implementation of RetinaNet object detection.', 49 | url = 'https://github.com/fizyr/keras-retinanet', 50 | author = 'Hans Gaiser', 51 | author_email = 'h.gaiser@fizyr.com', 52 | maintainer = 'Georgy Perevozchikov', 53 | maintainer_email = 'gosha20777@live.ru', 54 | cmdclass = {'build_ext': BuildExtension}, 55 | packages = setuptools.find_packages(), 56 | install_requires = ['keras-resnet==0.2.1', 'efficientnet', 'image-classifiers', 'six', 'numpy', 'cython', 'Pillow', 'opencv-python', 'progressbar2'], 57 | entry_points = { 58 | 'console_scripts': [ 59 | 'retinanet-train=keras_retinanet.bin.train:main', 60 | 'retinanet-evaluate=keras_retinanet.bin.evaluate:main', 61 | 'retinanet-debug=keras_retinanet.bin.debug:main', 62 | 'retinanet-convert-model=keras_retinanet.bin.convert_model:main', 63 | ], 64 | }, 65 | ext_modules = extensions, 66 | setup_requires = ["cython>=0.28", "numpy>=1.14.0"] 67 | ) 68 | -------------------------------------------------------------------------------- /data_utils/LaddValidator/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | 6 | namespace LaddValidator 7 | { 8 | class Program 9 | { 10 | private static readonly Dictionary _argsKeys = new Dictionary() 11 | { 12 | {"--src", "source path"}, 13 | {"--val_file", "destination path"}, 14 | }; 15 | 16 | static void Main(string[] args) 17 | { 18 | var parser = new ArgsParser(_argsKeys); 19 | var parsedArgs = parser.Parse(args); 20 | if (parsedArgs == null) 21 | { 22 | return; 23 | } 24 | var splitPatch = parsedArgs["src"] + "ImageSets/Main/"; 25 | var valFilePatch = parsedArgs["val_file"]; 26 | if (!Directory.Exists(splitPatch)) 27 | { 28 | Console.Write("unable to open: " + splitPatch); 29 | return; 30 | } 31 | if (!File.Exists(valFilePatch)) 32 | { 33 | Console.Write("unable to open: " + valFilePatch); 34 | return; 35 | } 36 | 37 | var valLines = File.ReadLines(valFilePatch).ToList(); 38 | var trainLines = File.ReadLines(splitPatch + "train.txt").ToList(); 39 | var testLines = File.ReadLines(splitPatch + "test.txt").ToList(); 40 | trainLines.AddRange(testLines); 41 | 42 | for (int i = 0; i < trainLines.Count; i++) 43 | { 44 | for (int j = 0; j < valLines.Count; j++) 45 | { 46 | if (valLines[j] == trainLines[i]) 47 | { 48 | trainLines.RemoveAt(i); 49 | Console.WriteLine($"{valLines[j]} moved to val set"); 50 | if(i>0) 51 | i--; 52 | } 53 | } 54 | } 55 | Shuffle(valLines); 56 | File.WriteAllLines(splitPatch+"train.txt", trainLines); 57 | File.WriteAllLines(splitPatch+"trainval.txt", trainLines); 58 | File.WriteAllLines(splitPatch+"test.txt", valLines); 59 | File.WriteAllLines(splitPatch+"val.txt", valLines); 60 | } 61 | 62 | private static void Shuffle(IList list) 63 | { 64 | Random rng = new Random(); 65 | int n = list.Count; 66 | while (n > 1) { 67 | n--; 68 | int k = rng.Next(n + 1); 69 | T value = list[k]; 70 | list[k] = list[n]; 71 | list[n] = value; 72 | } 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenetv3/mobilenet_v3_small.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 small models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape 9 | from tensorflow.keras.utils import plot_model 10 | 11 | from .mobilenet_v3_base import MobileNetBase 12 | 13 | 14 | class MobileNetV3_Small(MobileNetBase): 15 | def __init__(self, shape, n_class, alpha=1.0, include_top=True): 16 | """Init. 17 | 18 | # Arguments 19 | input_shape: An integer or tuple/list of 3 integers, shape 20 | of input tensor. 21 | n_class: Integer, number of classes. 22 | alpha: Integer, width multiplier. 23 | include_top: if inculde classification layer. 24 | 25 | # Returns 26 | MobileNetv3 model. 27 | """ 28 | super(MobileNetV3_Small, self).__init__(shape, n_class, alpha) 29 | self.include_top = include_top 30 | 31 | def build(self, plot=False): 32 | """build MobileNetV3 Small. 33 | 34 | # Arguments 35 | plot: Boolean, weather to plot model. 36 | 37 | # Returns 38 | model: Model, model. 39 | """ 40 | 41 | inputs = Input(shape=self.shape) 42 | 43 | x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') 44 | 45 | x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE') 46 | x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE') 47 | x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE') 48 | x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS') 49 | x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 50 | x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 51 | x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS') 52 | x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS') 53 | x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS') 54 | x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 55 | x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 56 | 57 | x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS') 58 | x = GlobalAveragePooling2D()(x) 59 | x = Reshape((1, 1, 576))(x) 60 | 61 | x = Conv2D(1280, (1, 1), padding='same')(x) 62 | x = self._return_activation(x, 'HS') 63 | 64 | if self.include_top: 65 | x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x) 66 | x = Reshape((self.n_class,))(x) 67 | 68 | model = Model(inputs, x) 69 | 70 | if plot: 71 | plot_model(model, to_file='MobileNetv3_small.png', show_shapes=True) 72 | 73 | return model 74 | -------------------------------------------------------------------------------- /keras_retinanet/callbacks/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | from ..utils.coco_eval import evaluate_coco 19 | 20 | 21 | class CocoEval(keras.callbacks.Callback): 22 | """ Performs COCO evaluation on each epoch. 23 | """ 24 | def __init__(self, generator, tensorboard=None, threshold=0.05): 25 | """ CocoEval callback intializer. 26 | 27 | Args 28 | generator : The generator used for creating validation data. 29 | tensorboard : If given, the results will be written to tensorboard. 30 | threshold : The score threshold to use. 31 | """ 32 | self.generator = generator 33 | self.threshold = threshold 34 | self.tensorboard = tensorboard 35 | 36 | super(CocoEval, self).__init__() 37 | 38 | def on_epoch_end(self, epoch, logs=None): 39 | logs = logs or {} 40 | 41 | coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', 42 | 'AP @[ IoU=0.50 | area= all | maxDets=100 ]', 43 | 'AP @[ IoU=0.75 | area= all | maxDets=100 ]', 44 | 'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', 45 | 'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', 46 | 'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]', 47 | 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]', 48 | 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]', 49 | 'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', 50 | 'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', 51 | 'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', 52 | 'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]'] 53 | coco_eval_stats = evaluate_coco(self.generator, self.model, self.threshold) 54 | 55 | if coco_eval_stats is not None: 56 | for index, result in enumerate(coco_eval_stats): 57 | logs[coco_tag[index]] = result 58 | 59 | if self.tensorboard: 60 | import tensorflow as tf 61 | writer = tf.summary.create_file_writer(self.tensorboard.log_dir) 62 | with writer.as_default(): 63 | for index, result in enumerate(coco_eval_stats): 64 | tf.summary.scalar('{}. {}'.format(index + 1, coco_tag[index]), result, step=epoch) 65 | writer.flush() 66 | -------------------------------------------------------------------------------- /keras_retinanet/utils/colors.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def label_color(label): 5 | """ Return a color from a set of predefined colors. Contains 80 colors in total. 6 | 7 | Args 8 | label: The label to get the color for. 9 | 10 | Returns 11 | A list of three values representing a RGB color. 12 | 13 | If no color is defined for a certain label, the color green is returned and a warning is printed. 14 | """ 15 | if label < len(colors): 16 | return colors[label] 17 | else: 18 | warnings.warn('Label {} has no color, returning default.'.format(label)) 19 | return (0, 255, 0) 20 | 21 | 22 | """ 23 | Generated using: 24 | 25 | ``` 26 | colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] 27 | shuffle(colors) 28 | pprint(colors) 29 | ``` 30 | """ 31 | colors = [ 32 | [31 , 0 , 255] , 33 | [0 , 159 , 255] , 34 | [255 , 95 , 0] , 35 | [255 , 19 , 0] , 36 | [255 , 0 , 0] , 37 | [255 , 38 , 0] , 38 | [0 , 255 , 25] , 39 | [255 , 0 , 133] , 40 | [255 , 172 , 0] , 41 | [108 , 0 , 255] , 42 | [0 , 82 , 255] , 43 | [0 , 255 , 6] , 44 | [255 , 0 , 152] , 45 | [223 , 0 , 255] , 46 | [12 , 0 , 255] , 47 | [0 , 255 , 178] , 48 | [108 , 255 , 0] , 49 | [184 , 0 , 255] , 50 | [255 , 0 , 76] , 51 | [146 , 255 , 0] , 52 | [51 , 0 , 255] , 53 | [0 , 197 , 255] , 54 | [255 , 248 , 0] , 55 | [255 , 0 , 19] , 56 | [255 , 0 , 38] , 57 | [89 , 255 , 0] , 58 | [127 , 255 , 0] , 59 | [255 , 153 , 0] , 60 | [0 , 255 , 255] , 61 | [0 , 255 , 216] , 62 | [0 , 255 , 121] , 63 | [255 , 0 , 248] , 64 | [70 , 0 , 255] , 65 | [0 , 255 , 159] , 66 | [0 , 216 , 255] , 67 | [0 , 6 , 255] , 68 | [0 , 63 , 255] , 69 | [31 , 255 , 0] , 70 | [255 , 57 , 0] , 71 | [255 , 0 , 210] , 72 | [0 , 255 , 102] , 73 | [242 , 255 , 0] , 74 | [255 , 191 , 0] , 75 | [0 , 255 , 63] , 76 | [255 , 0 , 95] , 77 | [146 , 0 , 255] , 78 | [184 , 255 , 0] , 79 | [255 , 114 , 0] , 80 | [0 , 255 , 235] , 81 | [255 , 229 , 0] , 82 | [0 , 178 , 255] , 83 | [255 , 0 , 114] , 84 | [255 , 0 , 57] , 85 | [0 , 140 , 255] , 86 | [0 , 121 , 255] , 87 | [12 , 255 , 0] , 88 | [255 , 210 , 0] , 89 | [0 , 255 , 44] , 90 | [165 , 255 , 0] , 91 | [0 , 25 , 255] , 92 | [0 , 255 , 140] , 93 | [0 , 101 , 255] , 94 | [0 , 255 , 82] , 95 | [223 , 255 , 0] , 96 | [242 , 0 , 255] , 97 | [89 , 0 , 255] , 98 | [165 , 0 , 255] , 99 | [70 , 255 , 0] , 100 | [255 , 0 , 172] , 101 | [255 , 76 , 0] , 102 | [203 , 255 , 0] , 103 | [204 , 0 , 255] , 104 | [255 , 0 , 229] , 105 | [255 , 133 , 0] , 106 | [127 , 0 , 255] , 107 | [0 , 235 , 255] , 108 | [0 , 255 , 197] , 109 | [255 , 0 , 191] , 110 | [0 , 44 , 255] , 111 | [50 , 255 , 0] 112 | ] 113 | -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenetv3/mobilenet_v3_large.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 Large models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape 9 | from tensorflow.keras.utils import plot_model 10 | 11 | from .mobilenet_v3_base import MobileNetBase 12 | 13 | 14 | class MobileNetV3_Large(MobileNetBase): 15 | def __init__(self, shape, n_class, alpha=1.0, include_top=True): 16 | """Init. 17 | 18 | # Arguments 19 | input_shape: An integer or tuple/list of 3 integers, shape 20 | of input tensor. 21 | n_class: Integer, number of classes. 22 | alpha: Integer, width multiplier. 23 | include_top: if inculde classification layer. 24 | 25 | # Returns 26 | MobileNetv3 model. 27 | """ 28 | super(MobileNetV3_Large, self).__init__(shape, n_class, alpha) 29 | self.include_top = include_top 30 | 31 | def build(self, plot=False): 32 | """build MobileNetV3 Large. 33 | 34 | # Arguments 35 | plot: Boolean, weather to plot model. 36 | 37 | # Returns 38 | model: Model, model. 39 | """ 40 | inputs = Input(shape=self.shape) 41 | 42 | x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') 43 | 44 | x = self._bottleneck(x, 16, (3, 3), e=16, s=1, squeeze=False, nl='RE') 45 | x = self._bottleneck(x, 24, (3, 3), e=64, s=2, squeeze=False, nl='RE') 46 | x = self._bottleneck(x, 24, (3, 3), e=72, s=1, squeeze=False, nl='RE') 47 | x = self._bottleneck(x, 40, (5, 5), e=72, s=2, squeeze=True, nl='RE') 48 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 49 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 50 | x = self._bottleneck(x, 80, (3, 3), e=240, s=2, squeeze=False, nl='HS') 51 | x = self._bottleneck(x, 80, (3, 3), e=200, s=1, squeeze=False, nl='HS') 52 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 53 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 54 | x = self._bottleneck(x, 112, (3, 3), e=480, s=1, squeeze=True, nl='HS') 55 | x = self._bottleneck(x, 112, (3, 3), e=672, s=1, squeeze=True, nl='HS') 56 | x = self._bottleneck(x, 160, (5, 5), e=672, s=2, squeeze=True, nl='HS') 57 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 58 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 59 | 60 | x = self._conv_block(x, 960, (1, 1), strides=(1, 1), nl='HS') 61 | x = GlobalAveragePooling2D()(x) 62 | x = Reshape((1, 1, 960))(x) 63 | 64 | x = Conv2D(1280, (1, 1), padding='same')(x) 65 | x = self._return_activation(x, 'HS') 66 | 67 | if self.include_top: 68 | x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x) 69 | x = Reshape((self.n_class,))(x) 70 | 71 | model = Model(inputs, x) 72 | 73 | if plot: 74 | plot_model(model, to_file='MobileNetv3_large.png', show_shapes=True) 75 | 76 | return model 77 | -------------------------------------------------------------------------------- /docs/imgs/skhemes/RescuerLaBackendSkheme-v1.drawio: -------------------------------------------------------------------------------- 1 | 7V1tc5u4Fv41ntm9M/GAxOtH24m7u9Pem67bbffTDsayzQYjL8hNvL/+CpCwhGSHpGCTpP3gwkES0nl5ztGRRAZwsnl4lwbb9Qe8QPEAGIuHAbweAGBC4NP/csq+pLi2WRJWabRghQ6EWfQvYkSDUXfRAmVSQYJxTKKtTAxxkqCQSLQgTfG9XGyJY/mt22CFFMIsDGKV+iVakHVJ9YB7oP+CotWav9l02IA3AS/MRpKtgwW+F0jwZgAnKcakvNo8TFCcM4/zpaw3PfK06liKEtKkwrtb7xd8+xueWf8lk78+Y9fzsyvWyrcg3rEBs86SPedAinfJAuWNGAM4vl9HBM22QZg/vacyp7Q12cT0zqSXyyiOJzjGaVEXLgLkLUNKz0iK75DwxAk9NF/SJ+oweJ9QStCDQGLDeofwBpF0T4vwpwZjMdMxi93eHwQGeJG1ICzoMGLAlGRVNX3gI71grHwCW4HC1j8idK+wlo6PyPyT+ZTgBNWYykhBHK0SehtSriFKH+fciqjajtiDTbRY5K/RCkwWaRsScLyhLcnA9IeO+A8qIvE0EgFdCcQ2VEWfGgN/OvCtga8qPVpQFGC3OCVrvMJJEN8cqDUeHsq8x3jLhPk3ImTPIC3YESyLGj1E5Ktw/WfeFOVieXf9wFoubvb8JqHM+FoUNAyTE8qaputwwqFycSfVvkVpRHmaK01BzEiQklGOlLk6xUGWRSEnT6OY9/aokmR4l4boBOuZ5Gl7K0ROQRFrMGf9SZ1LURyQ6JsM0K1rDHwKMpqPI2MbVgZknHNUnDMdjVWZdldmZWmY5MQFptGhOKv86nNGdQ0Yn3/lj+Ypf8Qp9N2H8py45YQrWjvGqygZ2JNvQRotKc6RCCdC7e3JyukuKdQmxKskelLNDKGiZraLSXay/0I7rYF8RpUpSlaUYB/uPhXwcgWOOWNM9WkZF9a8pj4AJZ3gPXRreO/aqi4CW6OL3UF8/w0WwItbrK3BtZfsCK9yTwhlT2j5vXSEzst0hE4DjB8tNlHy3SC/Kz3FJkiC1SZnb0PA1TQVsA5lVPMoaDbGbp33QFuq1D/QX0R/T4Y1SwP+Fjwn+Ls9BH/Tl7hkAQ2Xzgz+Gja9ZPCvbspang16CfzeywR+rwnwb7ffDft4uYyjJA+0Fyi7IzhvMSja/YG5YsQto4kmyXVmzOUpWkFBvqB5i9LoW5KrlmaEnioCS4Pn3c14vJeH2DRa92TUBl6VyOoVanM0fhS2gdcr2OYhxXm1oqE45HkbkDXB9J6VwTwq4kclB2HbkmNVb3GUT184cri+jBymVYOEUhlZrYP8KT+DvVBsmxfIjr/HN/XvOahT2eJBuaoxfocbaHsNqQWohpbMCI2ztGwNVNt1wbTnLIHCpdEtDZuMnyaYKjWOY5T+/Hp9Z10gpj/UJQzP6T3NHmb4oft4ht/VTRm701tdir9nXNLmH7RsgnZXbOph9rnOJsu8vDYdTybO+XTuD2mJJ9/YweBRmPvNO5j79Q0x61DQUMe7w0tdjq2UyCL6Jk7oB5QHPsh/x/aAysArKGOvuLb4Nf0dFb9GQXeEMlPWyHha/N4Uv+Pi0aS4dgU6EBqpXlqWr6593iAoLmhJU3idqdQt32IPBZUTxnhi2COD96bqpSX0z2XXQOzujcCL8vXX0vAAZx+7GMk8Ldmkba1k2Y3wasaOowNr3ZpitCSXtyVPtiWvaSbW7MyYjifXKmz7FGR32Q8EFNy8qwsazwuCviI3EfGonY11wFXZtGjfHqMwMFCRYyxglMsrlq+gvzbHA1eoKINNBXqPQHENfg0dulbd1vahdaCu0E4c10Tg27XQ4QrARTaKiHigD7m0boQSU0UAQPJPkoRsPjBKgfKAqwYN6fqNoCpwtPZ5XmDlln8KWItdyj+QVZwZXBpXgZpEuhIslZqRyQ2uFsuoWFoBxY0AHSeg5lqwciX40mCdJeCEFhIncnxnK0FmE5itRjQRYLzyL0AaIBBR3R7UY11D4YDHMOyNIJMFL49Lav5P4f65EwR2jU2XzzYBXU6uht7XAQlmiLxh+FbkdnH0Pr4PWJkjV3NvUwrOjgbBEwFDVdzXBqOPArRYbCxnHMQAWozlTKEboAoln5QdqOJFsX9jZUg1V1abN1SNOM8e/3+qjmv3VbyBfIBdW4/TLOSf2z30MH9chxlggabz7w49RIMM8oxkZPeW43tVcpfPnAA1fcwj/LEY4YseQHQRJ2bkz8iZNHQRnuCytJN7NYHMM9uaHtaGU7wOqO5FzEB4wtN81D8JQy3LQV6n8gyuwiljMCrchT8VphNlPzyh06ZQ0ZNZIPICDEsWDn9WEky16ZbWoY0FkUNpCnSYOJ1WAuPANWC8S4MlSoJBdcz21XurXuRZbF9h98X2noHnH5q03Go3Wq82n4Gme4bb38L0fSivW9c4Gsc02AxUs8QWzMmrbUUBhqUak24fJ3SdrrimrirMCE7zTxOA6XUetHzcIfrk1QYsHt/BwbcH6Y4Two7ClS+rUToNyd/xfDr7EOJP/m3y9UpV2zPC22E77Z/CEz26ac3/mSe1jaagc9mT2WqK/xZnZJWi2cf3itiydbDNL8N9HFHppI9DzrwU4/t5RQjCu1Uh3P/tSHFUQcamyojasARbtgToaZ29qbEFrytfD9Xk/K+b/DMpHKPqLC9AiH8uBdQQislj87DKvxgzzE9OhGvqN4ebXUyiqwUOd8XhP72cOvAHtnyazdLtDuA+QuR3Z18PgWqu+Hc0n0fkw0dK/UfrCjhbtykOUZY10PKzKnXN5QJNwkW3+bM7Fj9p8+dF4hTTgDLXdJ+18TVM624TM1TzvOyLT681MFGE0IONyzzdf6HYZPCMYz/m0HDrn6ux+nlYky8VPhoEWf069gN1GWRhJcLglkWp/+zyj46NmY1V9+LGM1rhQ34OX0qdnloaeFJ2ugsnY9JJW5OPkujcTFfbwqE6sZuk+y21rfwzduEdSlrmY5CGzILN1o6xK2zVzs7O6r4tdQowHA5fhE4eyd6dlX2O8/I8iOw9XPsR59G5n7CaHjLsmZ+w1MncCbt5QuTbrt3UvpwEPe3ao+4bgt0deLfUedmLgBzLuDzkWG2ftuuGd7Xsi8UPvVyOb+qU6wtO71Ca9ZF99X2loAeKp8bF2z31XfnZMoWFr2feasI6gKqCOOvHKix148YKx0GyesVCMGp5L1vN4JxVBo6lMLv/i7Zy4Hf5uK/xyiyb+/Ul7utqZbZVD1K3mIaoVR1gaN9kYH9M5rTFdK/5fkPN5xLqi+arSRiW4Sq+wPFmvI9uf39X7ke7pq7OPH9HiyjfqHmt5hPPv26rsFYjgMbrtsDX7CppadWW3h7+bET5GZ/DH9+AN/8H -------------------------------------------------------------------------------- /keras_retinanet/utils/coco_eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from pycocotools.cocoeval import COCOeval 18 | 19 | from tensorflow import keras 20 | import numpy as np 21 | import json 22 | 23 | import progressbar 24 | assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead." 25 | 26 | 27 | def evaluate_coco(generator, model, threshold=0.05): 28 | """ Use the pycocotools to evaluate a COCO model on a dataset. 29 | 30 | Args 31 | generator : The generator for generating the evaluation data. 32 | model : The model to evaluate. 33 | threshold : The score threshold to use. 34 | """ 35 | # start collecting results 36 | results = [] 37 | image_ids = [] 38 | for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '): 39 | image = generator.load_image(index) 40 | image = generator.preprocess_image(image) 41 | image, scale = generator.resize_image(image) 42 | 43 | if keras.backend.image_data_format() == 'channels_first': 44 | image = image.transpose((2, 0, 1)) 45 | 46 | # run network 47 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 48 | 49 | # correct boxes for image scale 50 | boxes /= scale 51 | 52 | # change to (x, y, w, h) (MS COCO standard) 53 | boxes[:, :, 2] -= boxes[:, :, 0] 54 | boxes[:, :, 3] -= boxes[:, :, 1] 55 | 56 | # compute predicted labels and scores 57 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 58 | # scores are sorted, so we can break 59 | if score < threshold: 60 | break 61 | 62 | # append detection for each positively labeled class 63 | image_result = { 64 | 'image_id' : generator.image_ids[index], 65 | 'category_id' : generator.label_to_coco_label(label), 66 | 'score' : float(score), 67 | 'bbox' : box.tolist(), 68 | } 69 | 70 | # append detection to results 71 | results.append(image_result) 72 | 73 | # append image to list of processed images 74 | image_ids.append(generator.image_ids[index]) 75 | 76 | if not len(results): 77 | return 78 | 79 | # write output 80 | json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4) 81 | json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4) 82 | 83 | # load results in COCO evaluation tool 84 | coco_true = generator.coco 85 | coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name)) 86 | 87 | # run COCO evaluation 88 | coco_eval = COCOeval(coco_true, coco_pred, 'bbox') 89 | coco_eval.params.imgIds = image_ids 90 | coco_eval.evaluate() 91 | coco_eval.accumulate() 92 | coco_eval.summarize() 93 | return coco_eval.stats 94 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at gosha20777@live.ru. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /data_utils/yolo2voc.py: -------------------------------------------------------------------------------- 1 | # Script to convert yolo annotations to voc format 2 | import os 3 | import xml.etree.cElementTree as ET 4 | from PIL import Image 5 | import argparse 6 | 7 | CLASS_MAPPING = { 8 | '0': 'Pedestrian' 9 | # Add remaining classes here. 10 | } 11 | 12 | 13 | def create_root(file_prefix, width, height): 14 | root = ET.Element("annotation") 15 | ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix) 16 | ET.SubElement(root, "folder").text = "images" 17 | size = ET.SubElement(root, "size") 18 | ET.SubElement(size, "width").text = str(width) 19 | ET.SubElement(size, "height").text = str(height) 20 | ET.SubElement(size, "depth").text = "3" 21 | return root 22 | 23 | 24 | def create_object_annotation(root, voc_labels): 25 | for voc_label in voc_labels: 26 | obj = ET.SubElement(root, "object") 27 | ET.SubElement(obj, "name").text = voc_label[0] 28 | ET.SubElement(obj, "pose").text = "Unspecified" 29 | ET.SubElement(obj, "truncated").text = str(0) 30 | ET.SubElement(obj, "difficult").text = str(0) 31 | bbox = ET.SubElement(obj, "bndbox") 32 | ET.SubElement(bbox, "xmin").text = str(voc_label[1]) 33 | ET.SubElement(bbox, "ymin").text = str(voc_label[2]) 34 | ET.SubElement(bbox, "xmax").text = str(voc_label[3]) 35 | ET.SubElement(bbox, "ymax").text = str(voc_label[4]) 36 | return root 37 | 38 | 39 | def create_file(file_prefix, width, height, voc_labels, dest_dir): 40 | root = create_root(file_prefix, width, height) 41 | root = create_object_annotation(root, voc_labels) 42 | tree = ET.ElementTree(root) 43 | tree.write("{}/{}.xml".format(dest_dir, file_prefix)) 44 | 45 | 46 | def read_file(filename, src_dir, dest_dir): 47 | file_prefix = filename.split(".txt")[0] 48 | if os.path.isfile("{}/{}.JPG".format(src_dir, file_prefix)): 49 | os.rename("{}/{}.JPG".format(src_dir, file_prefix), "{}/{}.jpg".format(src_dir, file_prefix)) 50 | print("renamed to {}.jpg".format(file_prefix)) 51 | 52 | image_file_name = "{}.jpg".format(file_prefix) 53 | img = Image.open("{}/{}".format(src_dir, image_file_name)) 54 | w, h = img.size 55 | with open("{}/{}".format(src_dir, filename), 'r') as file: 56 | lines = file.readlines() 57 | voc_labels = [] 58 | for line in lines: 59 | voc = [] 60 | line = line.strip() 61 | data = line.split() 62 | voc.append(CLASS_MAPPING.get(data[0])) 63 | bbox_width = float(data[3]) * w 64 | bbox_height = float(data[4]) * h 65 | center_x = float(data[1]) * w 66 | center_y = float(data[2]) * h 67 | voc.append(int(center_x - (bbox_width / 2))) 68 | voc.append(int(center_y - (bbox_height / 2))) 69 | voc.append(int(center_x + (bbox_width / 2))) 70 | voc.append(int(center_y + (bbox_height / 2))) 71 | voc_labels.append(voc) 72 | create_file(file_prefix, w, h, voc_labels, dest_dir) 73 | print("Processing complete for file: {}".format(filename)) 74 | 75 | def parse_args(args): 76 | """ Parse the arguments. 77 | """ 78 | parser = argparse.ArgumentParser(description='Script which converts yolo to pascal voc') 79 | parser.add_argument('--src', help='source annotation dir.') 80 | parser.add_argument('--dest', help='destination annotation dir.') 81 | return parser.parse_args(args) 82 | 83 | def main(args=None): 84 | args = parse_args(args) 85 | 86 | if not os.path.exists(args.dest): 87 | os.makedirs(args.dest) 88 | for filename in os.listdir(args.src): 89 | if filename.endswith('txt'): 90 | read_file(filename,args.src , args.dest) 91 | else: 92 | print("Skipping file: {}".format(filename)) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /keras_retinanet/utils/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import configparser 18 | import numpy as np 19 | from tensorflow import keras 20 | from ..utils.anchors import AnchorParameters 21 | 22 | 23 | def read_config_file(config_path): 24 | config = configparser.ConfigParser() 25 | 26 | with open(config_path, 'r') as file: 27 | config.read_file(file) 28 | 29 | assert 'anchor_parameters' in config, \ 30 | "Malformed config file. Verify that it contains the anchor_parameters section." 31 | 32 | config_keys = set(config['anchor_parameters']) 33 | default_keys = set(AnchorParameters.default.__dict__.keys()) 34 | 35 | assert config_keys <= default_keys, \ 36 | "Malformed config file. These keys are not valid: {}".format(config_keys - default_keys) 37 | 38 | if 'pyramid_levels' in config: 39 | assert('levels' in config['pyramid_levels']), "pyramid levels specified by levels key" 40 | 41 | return config 42 | 43 | 44 | def parse_anchor_parameters(config): 45 | ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx()) 46 | scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx()) 47 | sizes = list(map(int, config['anchor_parameters']['sizes'].split(' '))) 48 | strides = list(map(int, config['anchor_parameters']['strides'].split(' '))) 49 | assert (len(sizes) == len(strides)), "sizes and strides should have an equal number of values" 50 | 51 | return AnchorParameters(sizes, strides, ratios, scales) 52 | 53 | 54 | def parse_pyramid_levels(config): 55 | levels = list(map(int, config['pyramid_levels']['levels'].split(' '))) 56 | 57 | return levels 58 | 59 | def parse_random_transform_parameters(config): 60 | kwargs = dict() 61 | kwargs['min_rotation'] = float(config['random_transform_parameters']['min_rotation']) 62 | kwargs['max_rotation'] = float(config['random_transform_parameters']['max_rotation']) 63 | kwargs['min_translation'] = tuple(map(float, config['random_transform_parameters']['min_translation'].split())) 64 | kwargs['max_translation'] = tuple(map(float, config['random_transform_parameters']['max_translation'].split())) 65 | kwargs['min_shear'] = float(config['random_transform_parameters']['min_shear']) 66 | kwargs['max_shear'] = float(config['random_transform_parameters']['max_shear']) 67 | kwargs['min_scaling'] = tuple(map(float, config['random_transform_parameters']['min_scaling'].split())) 68 | kwargs['max_scaling'] = tuple(map(float, config['random_transform_parameters']['max_scaling'].split())) 69 | kwargs['flip_x_chance'] = float(config['random_transform_parameters']['flip_x_chance']) 70 | kwargs['flip_y_chance'] = float(config['random_transform_parameters']['flip_y_chance']) 71 | 72 | return kwargs 73 | 74 | 75 | def parse_visual_effect_parameters(config): 76 | kwargs = dict() 77 | kwargs['contrast_range'] = tuple(map(float, config['visual_effect_parameters']['contrast_range'].split())) 78 | kwargs['brightness_range'] = tuple(map(float, config['visual_effect_parameters']['brightness_range'].split())) 79 | kwargs['hue_range'] = tuple(map(float, config['visual_effect_parameters']['hue_range'].split())) 80 | kwargs['saturation_range'] = tuple(map(float, config['visual_effect_parameters']['saturation_range'].split())) 81 | 82 | return kwargs 83 | -------------------------------------------------------------------------------- /data_utils/LaddGenerator/Annotation.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Xml.Serialization; 6 | 7 | namespace LaddGenerator 8 | { 9 | [Serializable] 10 | //[XmlRoot("annotation")] 11 | [XmlRoot("annotation")] 12 | public class Annotation 13 | { 14 | [XmlElement("folder")] 15 | public string Folder { get; set; } = "VocGalsTfl"; 16 | [XmlElement("filename")] 17 | public string Filename { get; set; } 18 | [XmlElement("source")] 19 | public Sourse Source { get; set; } = new Sourse(); 20 | [XmlElement("size")] 21 | public Size Size { get; set; } 22 | [XmlElement("segmented")] 23 | public int Segmented { get; set; } = 0; 24 | [XmlElement("object")] 25 | public List Objects { get; set; } = new List(); 26 | 27 | public static Annotation ParseFromXml(string annotationFileName) 28 | { 29 | var formatter = new XmlSerializer(type:typeof(Annotation)); 30 | try 31 | { 32 | using (var fs = new FileStream(annotationFileName, FileMode.Open)) 33 | { 34 | return (Annotation)formatter.Deserialize(fs); 35 | } 36 | } 37 | catch (Exception e) 38 | { 39 | throw new Exception("unable to create annotation! " + e.Message); 40 | } 41 | } 42 | 43 | public void SaveToXml(string annotationFileName) 44 | { 45 | try 46 | { 47 | var formatter = new XmlSerializer(type:typeof(Annotation)); 48 | using (var fs = new FileStream(annotationFileName, FileMode.Create)) 49 | { 50 | formatter.Serialize(fs, this); 51 | } 52 | } 53 | catch (Exception e) 54 | { 55 | throw new Exception("unable to save annotation! " + e.Message); 56 | } 57 | } 58 | } 59 | 60 | public class Object 61 | { 62 | [XmlElement("name")] 63 | public string Name { get; set; } 64 | [XmlElement("pose")] 65 | public string Pose { get; set; } = "Unspecified"; 66 | [XmlElement("truncated")] 67 | public int Truncated { get; set; } = 0; 68 | [XmlElement("difficult")] 69 | public int Difficult { get; set; } = 0; 70 | [XmlElement("bndbox")] 71 | public Box Box { get; set; } 72 | } 73 | 74 | public class Box 75 | { 76 | [XmlElement("ymin")] 77 | public string Ymin 78 | { 79 | get; set; 80 | } 81 | 82 | [XmlElement("xmin")] 83 | public string Xmin 84 | { 85 | get; set; 86 | } 87 | 88 | [XmlElement("ymax")] 89 | public string Ymax 90 | { 91 | get; set; 92 | } 93 | 94 | [XmlElement("xmax")] 95 | public string Xmax 96 | { 97 | get; set; 98 | } 99 | 100 | public void Normalize() 101 | { 102 | Xmin = ParseInt(Xmin); 103 | Xmax = ParseInt(Xmax); 104 | Ymin = ParseInt(Ymin); 105 | Ymax = ParseInt(Ymax); 106 | } 107 | 108 | private string ParseInt(string str) 109 | { 110 | str = str.Split('.').First(); 111 | if (int.TryParse(str, out var r)) 112 | { 113 | if(r < 0) 114 | r = 0; 115 | return $"{r}"; 116 | } 117 | throw new Exception(); 118 | } 119 | } 120 | 121 | public class Size 122 | { 123 | [XmlElement("height")] 124 | public uint Height { get; set; } 125 | [XmlElement("width")] 126 | public uint Width { get; set; } 127 | [XmlElement("depth")] 128 | public byte Depth { get; set; } = 3; 129 | } 130 | 131 | public class Sourse 132 | { 133 | [XmlElement("database")] 134 | public string DtatBase { get; set; } = "Unknown"; 135 | } 136 | } -------------------------------------------------------------------------------- /keras_retinanet/bin/convert_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Copyright 2017-2018 Fizyr (https://fizyr.com) 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import argparse 20 | import os 21 | import sys 22 | 23 | # Allow relative imports when being executed as script. 24 | if __name__ == "__main__" and __package__ is None: 25 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) 26 | import keras_retinanet.bin # noqa: F401 27 | __package__ = "keras_retinanet.bin" 28 | 29 | # Change these to absolute imports if you copy this script outside the keras_retinanet package. 30 | from .. import models 31 | from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels 32 | from ..utils.gpu import setup_gpu 33 | from ..utils.tf_version import check_tf_version 34 | 35 | 36 | def parse_args(args): 37 | parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.') 38 | 39 | parser.add_argument('model_in', help='The model to convert.') 40 | parser.add_argument('model_out', help='Path to save the converted model to.') 41 | parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50') 42 | parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false') 43 | parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false') 44 | parser.add_argument('--config', help='Path to a configuration parameters .ini file.') 45 | parser.add_argument('--nms-threshold', help='Value for non maximum suppression threshold.', type=float, default=0.5) 46 | parser.add_argument('--score-threshold', help='Threshold for prefiltering boxes.', type=float, default=0.05) 47 | parser.add_argument('--max-detections', help='Maximum number of detections to keep.', type=int, default=300) 48 | parser.add_argument('--parallel-iterations', help='Number of batch items to process in parallel.', type=int, default=32) 49 | 50 | return parser.parse_args(args) 51 | 52 | 53 | def main(args=None): 54 | # parse arguments 55 | if args is None: 56 | args = sys.argv[1:] 57 | args = parse_args(args) 58 | 59 | # make sure tensorflow is the minimum required version 60 | check_tf_version() 61 | 62 | # set modified tf session to avoid using the GPUs 63 | setup_gpu('cpu') 64 | 65 | # optionally load config parameters 66 | anchor_parameters = None 67 | pyramid_levels = None 68 | if args.config: 69 | args.config = read_config_file(args.config) 70 | if 'anchor_parameters' in args.config: 71 | anchor_parameters = parse_anchor_parameters(args.config) 72 | 73 | if 'pyramid_levels' in args.config: 74 | pyramid_levels = parse_pyramid_levels(args.config) 75 | 76 | # load the model 77 | model = models.load_model(args.model_in, backbone_name=args.backbone) 78 | 79 | # check if this is indeed a training model 80 | models.check_training_model(model) 81 | 82 | # convert the model 83 | model = models.convert_model( 84 | model, 85 | nms=args.nms, 86 | class_specific_filter=args.class_specific_filter, 87 | anchor_params=anchor_parameters, 88 | pyramid_levels=pyramid_levels, 89 | nms_threshold=args.nms_threshold, 90 | score_threshold=args.score_threshold, 91 | max_detections=args.max_detections, 92 | parallel_iterations=args.parallel_iterations 93 | ) 94 | 95 | # save model 96 | model.save(args.model_out) 97 | 98 | 99 | if __name__ == '__main__': 100 | main() 101 | -------------------------------------------------------------------------------- /keras_retinanet/models/vgg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 cgratie (https://github.com/cgratie/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | from tensorflow import keras 19 | 20 | from . import retinanet 21 | from . import Backbone 22 | from ..utils.image import preprocess_image 23 | 24 | 25 | class VGGBackbone(Backbone): 26 | """ Describes backbone information and provides utility functions. 27 | """ 28 | 29 | def retinanet(self, *args, **kwargs): 30 | """ Returns a retinanet model using the correct backbone. 31 | """ 32 | return vgg_retinanet(*args, backbone=self.backbone, **kwargs) 33 | 34 | def download_imagenet(self): 35 | """ Downloads ImageNet weights and returns path to weights file. 36 | Weights can be downloaded at https://github.com/fizyr/keras-models/releases . 37 | """ 38 | if self.backbone == 'vgg16': 39 | resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP 40 | checksum = '6d6bbae143d832006294945121d1f1fc' 41 | elif self.backbone == 'vgg19': 42 | resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP 43 | checksum = '253f8cb515780f3b799900260a226db6' 44 | else: 45 | raise ValueError("Backbone '{}' not recognized.".format(self.backbone)) 46 | 47 | return keras.utils.get_file( 48 | '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone), 49 | resource, 50 | cache_subdir='models', 51 | file_hash=checksum 52 | ) 53 | 54 | def validate(self): 55 | """ Checks whether the backbone string is correct. 56 | """ 57 | allowed_backbones = ['vgg16', 'vgg19'] 58 | 59 | if self.backbone not in allowed_backbones: 60 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones)) 61 | 62 | def preprocess_image(self, inputs): 63 | """ Takes as input an image and prepares it for being passed through the network. 64 | """ 65 | return preprocess_image(inputs, mode='caffe') 66 | 67 | 68 | def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs): 69 | """ Constructs a retinanet model using a vgg backbone. 70 | 71 | Args 72 | num_classes: Number of classes to predict. 73 | backbone: Which backbone to use (one of ('vgg16', 'vgg19')). 74 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 75 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 76 | 77 | Returns 78 | RetinaNet model with a VGG backbone. 79 | """ 80 | # choose default input 81 | if inputs is None: 82 | inputs = keras.layers.Input(shape=(None, None, 3)) 83 | 84 | # create the vgg backbone 85 | if backbone == 'vgg16': 86 | vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None) 87 | elif backbone == 'vgg19': 88 | vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None) 89 | else: 90 | raise ValueError("Backbone '{}' not recognized.".format(backbone)) 91 | 92 | if modifier: 93 | vgg = modifier(vgg) 94 | 95 | # create the full model 96 | layer_names = ["block3_pool", "block4_pool", "block5_pool"] 97 | layer_outputs = [vgg.get_layer(name).output for name in layer_names] 98 | 99 | # C2 not provided 100 | backbone_layers = { 101 | 'C3': layer_outputs[0], 102 | 'C4': layer_outputs[1], 103 | 'C5': layer_outputs[2] 104 | } 105 | 106 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs) 107 | -------------------------------------------------------------------------------- /tests/backend/test_common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import numpy as np 18 | from tensorflow import keras 19 | import keras_retinanet.backend 20 | 21 | 22 | def test_bbox_transform_inv(): 23 | boxes = np.array([[ 24 | [100, 100, 200, 200], 25 | [100, 100, 300, 300], 26 | [100, 100, 200, 300], 27 | [100, 100, 300, 200], 28 | [80, 120, 200, 200], 29 | [80, 120, 300, 300], 30 | [80, 120, 200, 300], 31 | [80, 120, 300, 200], 32 | ]]) 33 | boxes = keras.backend.variable(boxes) 34 | 35 | deltas = np.array([[ 36 | [0 , 0 , 0 , 0 ], 37 | [0 , 0.1, 0 , 0 ], 38 | [-0.3, 0 , 0 , 0 ], 39 | [0.2 , 0.2, 0 , 0 ], 40 | [0 , 0 , 0.1 , 0 ], 41 | [0 , 0 , 0 , -0.3], 42 | [0 , 0 , 0.2 , 0.2 ], 43 | [0.1 , 0.2, -0.3, 0.4 ], 44 | ]]) 45 | deltas = keras.backend.variable(deltas) 46 | 47 | expected = np.array([[ 48 | [100 , 100 , 200 , 200 ], 49 | [100 , 104 , 300 , 300 ], 50 | [ 94 , 100 , 200 , 300 ], 51 | [108 , 104 , 300 , 200 ], 52 | [ 80 , 120 , 202.4 , 200 ], 53 | [ 80 , 120 , 300 , 289.2], 54 | [ 80 , 120 , 204.8 , 307.2], 55 | [ 84.4, 123.2, 286.8 , 206.4] 56 | ]]) 57 | 58 | result = keras_retinanet.backend.bbox_transform_inv(boxes, deltas) 59 | result = keras.backend.eval(result) 60 | 61 | np.testing.assert_array_almost_equal(result, expected, decimal=2) 62 | 63 | 64 | def test_shift(): 65 | shape = (2, 3) 66 | stride = 8 67 | 68 | anchors = np.array([ 69 | [-8, -8, 8, 8], 70 | [-16, -16, 16, 16], 71 | [-12, -12, 12, 12], 72 | [-12, -16, 12, 16], 73 | [-16, -12, 16, 12] 74 | ], dtype=keras.backend.floatx()) 75 | 76 | expected = [ 77 | # anchors for (0, 0) 78 | [4 - 8, 4 - 8, 4 + 8, 4 + 8], 79 | [4 - 16, 4 - 16, 4 + 16, 4 + 16], 80 | [4 - 12, 4 - 12, 4 + 12, 4 + 12], 81 | [4 - 12, 4 - 16, 4 + 12, 4 + 16], 82 | [4 - 16, 4 - 12, 4 + 16, 4 + 12], 83 | 84 | # anchors for (0, 1) 85 | [12 - 8, 4 - 8, 12 + 8, 4 + 8], 86 | [12 - 16, 4 - 16, 12 + 16, 4 + 16], 87 | [12 - 12, 4 - 12, 12 + 12, 4 + 12], 88 | [12 - 12, 4 - 16, 12 + 12, 4 + 16], 89 | [12 - 16, 4 - 12, 12 + 16, 4 + 12], 90 | 91 | # anchors for (0, 2) 92 | [20 - 8, 4 - 8, 20 + 8, 4 + 8], 93 | [20 - 16, 4 - 16, 20 + 16, 4 + 16], 94 | [20 - 12, 4 - 12, 20 + 12, 4 + 12], 95 | [20 - 12, 4 - 16, 20 + 12, 4 + 16], 96 | [20 - 16, 4 - 12, 20 + 16, 4 + 12], 97 | 98 | # anchors for (1, 0) 99 | [4 - 8, 12 - 8, 4 + 8, 12 + 8], 100 | [4 - 16, 12 - 16, 4 + 16, 12 + 16], 101 | [4 - 12, 12 - 12, 4 + 12, 12 + 12], 102 | [4 - 12, 12 - 16, 4 + 12, 12 + 16], 103 | [4 - 16, 12 - 12, 4 + 16, 12 + 12], 104 | 105 | # anchors for (1, 1) 106 | [12 - 8, 12 - 8, 12 + 8, 12 + 8], 107 | [12 - 16, 12 - 16, 12 + 16, 12 + 16], 108 | [12 - 12, 12 - 12, 12 + 12, 12 + 12], 109 | [12 - 12, 12 - 16, 12 + 12, 12 + 16], 110 | [12 - 16, 12 - 12, 12 + 16, 12 + 12], 111 | 112 | # anchors for (1, 2) 113 | [20 - 8, 12 - 8, 20 + 8, 12 + 8], 114 | [20 - 16, 12 - 16, 20 + 16, 12 + 16], 115 | [20 - 12, 12 - 12, 20 + 12, 12 + 12], 116 | [20 - 12, 12 - 16, 20 + 12, 12 + 16], 117 | [20 - 16, 12 - 12, 20 + 16, 12 + 12], 118 | ] 119 | 120 | result = keras_retinanet.backend.shift(shape, stride, anchors) 121 | result = keras.backend.eval(result) 122 | 123 | np.testing.assert_array_equal(result, expected) 124 | -------------------------------------------------------------------------------- /keras_retinanet/callbacks/eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | from ..utils.eval import evaluate 19 | 20 | 21 | class Evaluate(keras.callbacks.Callback): 22 | """ Evaluation callback for arbitrary datasets. 23 | """ 24 | 25 | def __init__( 26 | self, 27 | generator, 28 | iou_threshold=0.5, 29 | score_threshold=0.05, 30 | max_detections=100, 31 | save_path=None, 32 | tensorboard=None, 33 | weighted_average=False, 34 | verbose=1 35 | ): 36 | """ Evaluate a given dataset using a given model at the end of every epoch during training. 37 | 38 | # Arguments 39 | generator : The generator that represents the dataset to evaluate. 40 | iou_threshold : The threshold used to consider when a detection is positive or negative. 41 | score_threshold : The score confidence threshold to use for detections. 42 | max_detections : The maximum number of detections to use per image. 43 | save_path : The path to save images with visualized detections to. 44 | tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value. 45 | weighted_average : Compute the mAP using the weighted average of precisions among classes. 46 | verbose : Set the verbosity level, by default this is set to 1. 47 | """ 48 | self.generator = generator 49 | self.iou_threshold = iou_threshold 50 | self.score_threshold = score_threshold 51 | self.max_detections = max_detections 52 | self.save_path = save_path 53 | self.tensorboard = tensorboard 54 | self.weighted_average = weighted_average 55 | self.verbose = verbose 56 | 57 | super(Evaluate, self).__init__() 58 | 59 | def on_epoch_end(self, epoch, logs=None): 60 | logs = logs or {} 61 | 62 | # run evaluation 63 | average_precisions, _ = evaluate( 64 | self.generator, 65 | self.model, 66 | iou_threshold=self.iou_threshold, 67 | score_threshold=self.score_threshold, 68 | max_detections=self.max_detections, 69 | save_path=self.save_path 70 | ) 71 | 72 | # compute per class average precision 73 | total_instances = [] 74 | precisions = [] 75 | for label, (average_precision, num_annotations) in average_precisions.items(): 76 | if self.verbose == 1: 77 | print('{:.0f} instances of class'.format(num_annotations), 78 | self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) 79 | total_instances.append(num_annotations) 80 | precisions.append(average_precision) 81 | if self.weighted_average: 82 | self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances) 83 | else: 84 | self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances) 85 | 86 | if self.tensorboard: 87 | import tensorflow as tf 88 | writer = tf.summary.create_file_writer(self.tensorboard.log_dir) 89 | with writer.as_default(): 90 | tf.summary.scalar("mAP", self.mean_ap, step=epoch) 91 | if self.verbose == 1: 92 | for label, (average_precision, num_annotations) in average_precisions.items(): 93 | tf.summary.scalar("AP_" + self.generator.label_to_name(label), average_precision, step=epoch) 94 | writer.flush() 95 | 96 | logs['mAP'] = self.mean_ap 97 | 98 | if self.verbose == 1: 99 | print('mAP: {:.4f}'.format(self.mean_ap)) 100 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Pull Request Process 9 | 10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 11 | build. 12 | 2. Update the README.md with details of changes to the interface, this includes new environment 13 | variables, exposed ports, useful file locations and container parameters. 14 | 3. Increase the version numbers in any examples files and the README.md to the new version that this 15 | Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). 16 | 4. You may merge the Pull Request in once you have the sign-off of two other developers, or if you 17 | do not have permission to do that, you may request the second reviewer to merge it for you. 18 | 19 | ## Code of Conduct 20 | 21 | ### Our Pledge 22 | 23 | In the interest of fostering an open and welcoming environment, we as 24 | contributors and maintainers pledge to making participation in our project and 25 | our community a harassment-free experience for everyone, regardless of age, body 26 | size, disability, ethnicity, gender identity and expression, level of experience, 27 | nationality, personal appearance, race, religion, or sexual identity and 28 | orientation. 29 | 30 | ### Our Standards 31 | 32 | Examples of behavior that contributes to creating a positive environment 33 | include: 34 | 35 | * Using welcoming and inclusive language 36 | * Being respectful of differing viewpoints and experiences 37 | * Gracefully accepting constructive criticism 38 | * Focusing on what is best for the community 39 | * Showing empathy towards other community members 40 | 41 | Examples of unacceptable behavior by participants include: 42 | 43 | * The use of sexualized language or imagery and unwelcome sexual attention or 44 | advances 45 | * Trolling, insulting/derogatory comments, and personal or political attacks 46 | * Public or private harassment 47 | * Publishing others' private information, such as a physical or electronic 48 | address, without explicit permission 49 | * Other conduct which could reasonably be considered inappropriate in a 50 | professional setting 51 | 52 | ### Our Responsibilities 53 | 54 | Project maintainers are responsible for clarifying the standards of acceptable 55 | behavior and are expected to take appropriate and fair corrective action in 56 | response to any instances of unacceptable behavior. 57 | 58 | Project maintainers have the right and responsibility to remove, edit, or 59 | reject comments, commits, code, wiki edits, issues, and other contributions 60 | that are not aligned to this Code of Conduct, or to ban temporarily or 61 | permanently any contributor for other behaviors that they deem inappropriate, 62 | threatening, offensive, or harmful. 63 | 64 | ### Scope 65 | 66 | This Code of Conduct applies both within project spaces and in public spaces 67 | when an individual is representing the project or its community. Examples of 68 | representing a project or community include using an official project e-mail 69 | address, posting via an official social media account, or acting as an appointed 70 | representative at an online or offline event. Representation of a project may be 71 | further defined and clarified by project maintainers. 72 | 73 | ### Enforcement 74 | 75 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 76 | reported by contacting the project team at gosha20777@live.ru. All 77 | complaints will be reviewed and investigated and will result in a response that 78 | is deemed necessary and appropriate to the circumstances. The project team is 79 | obligated to maintain confidentiality with regard to the reporter of an incident. 80 | Further details of specific enforcement policies may be posted separately. 81 | 82 | Project maintainers who do not follow or enforce the Code of Conduct in good 83 | faith may face temporary or permanent repercussions as determined by other 84 | members of the project's leadership. 85 | 86 | ### Attribution 87 | 88 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 89 | available at [http://contributor-covenant.org/version/1/4][version] 90 | 91 | [homepage]: http://contributor-covenant.org 92 | [version]: http://contributor-covenant.org/version/1/4/ 93 | -------------------------------------------------------------------------------- /cli_inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import tensorflow as tf 3 | from keras_retinanet import models 4 | from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image, compute_resize_scale 5 | from keras_retinanet.utils.visualization import draw_box, draw_caption 6 | from keras_retinanet.utils.colors import label_color 7 | import cv2 8 | import argparse 9 | import numpy as np 10 | import time 11 | 12 | 13 | def parse_args(args): 14 | parser = argparse.ArgumentParser(description='convert model') 15 | parser.add_argument( 16 | '--img', 17 | help='path to image', 18 | type=str, 19 | required=True 20 | ) 21 | parser.add_argument( 22 | '--bin', 23 | help='path to h5 keras inference model', 24 | type=str, 25 | required=True 26 | ) 27 | parser.add_argument( 28 | '--backbone', 29 | help='backbone name', 30 | type=str, 31 | required=False, 32 | default='resnet50' 33 | ) 34 | parser.add_argument( 35 | '--count', 36 | help='iference count', 37 | type=int, 38 | required=False, 39 | default=1 40 | ) 41 | parser.add_argument( 42 | '--height', 43 | help='iference count', 44 | type=int, 45 | required=False, 46 | default=2100 47 | ) 48 | parser.add_argument( 49 | '--width', 50 | help='iference count', 51 | type=int, 52 | required=False, 53 | default=2100 54 | ) 55 | parser.add_argument( 56 | '--gpu', 57 | help='use gpu', 58 | action='store_true', 59 | required=False, 60 | ) 61 | return parser.parse_args(args) 62 | 63 | def create_model(backbone_name, num_classes=1): 64 | backbone_factory = models.backbone(backbone_name) 65 | model = backbone_factory.retinanet(num_classes) 66 | return models.convert_model(model) 67 | 68 | def setup_gpu(gpu_id: int): 69 | if gpu_id == -1: 70 | tf.config.experimental.set_visible_devices([], 'GPU') 71 | return 72 | 73 | gpus = tf.config.experimental.list_physical_devices('GPU') 74 | if gpus: 75 | try: 76 | tf.config.experimental.set_virtual_device_configuration( 77 | gpus[gpu_id], 78 | [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]) 79 | logical_gpus = tf.config.experimental.list_logical_devices('GPU') 80 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 81 | except RuntimeError as e: 82 | print(e) 83 | 84 | def main(args=None): 85 | args=parse_args(args) 86 | 87 | model_bin = args.bin 88 | img_fn = args.img 89 | predict_count = args.count 90 | backbone = args.backbone 91 | min_side = min(args.height, args.width) 92 | max_side = max(args.height, args.width) 93 | 94 | print("loading model...") 95 | if args.gpu: 96 | setup_gpu(0) 97 | 98 | model = models.load_model(model_bin, backbone_name=backbone) 99 | 100 | print(f'model input shape: {model.inputs[0].shape}') 101 | 102 | start_time = time.time() 103 | 104 | image = cv2.imread(img_fn) 105 | image, scale = resize_image(image, min_side=min_side, max_side=max_side) 106 | image = preprocess_image(image) 107 | print("prepoocess image at {} s".format(time.time() - start_time)) 108 | 109 | labels_to_names = {0: 'Pedestrian'} 110 | 111 | print(f'make {predict_count} predictions:') 112 | for _ in range(0, predict_count): 113 | start_time = time.time() 114 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 115 | print("\t{} s".format(time.time() - start_time)) 116 | 117 | 118 | print("*"*20) 119 | print('bboxes:', boxes.shape) 120 | print('scores:', scores.shape) 121 | print('labels:', labels.shape) 122 | 123 | boxes /= scale 124 | 125 | objects_count = 0 126 | 127 | print("*"*20) 128 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 129 | # scores are sorted so we can break 130 | if score < 0.5: 131 | break 132 | b = np.array(box.astype(int)).astype(int) 133 | # x1 y1 x2 y2 134 | print(f'{labels_to_names[label]}:') 135 | print(f'\tscore: {score}') 136 | print(f'\tbox: {b[0]} {b[1]} {b[2]} {b[3]}') 137 | objects_count = objects_count + 1 138 | print(f'found objects: {objects_count}') 139 | 140 | 141 | if __name__ == '__main__': 142 | main() -------------------------------------------------------------------------------- /keras_retinanet/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import cv2 18 | import numpy as np 19 | 20 | from .colors import label_color 21 | 22 | 23 | def draw_box(image, box, color, thickness=2): 24 | """ Draws a box on an image with a given color. 25 | 26 | # Arguments 27 | image : The image to draw on. 28 | box : A list of 4 elements (x1, y1, x2, y2). 29 | color : The color of the box. 30 | thickness : The thickness of the lines to draw a box with. 31 | """ 32 | b = np.array(box).astype(int) 33 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA) 34 | 35 | 36 | def draw_caption(image, box, caption): 37 | """ Draws a caption above the box in an image. 38 | 39 | # Arguments 40 | image : The image to draw on. 41 | box : A list of 4 elements (x1, y1, x2, y2). 42 | caption : String containing the text to draw. 43 | """ 44 | b = np.array(box).astype(int) 45 | cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) 46 | cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) 47 | 48 | 49 | def draw_boxes(image, boxes, color, thickness=2): 50 | """ Draws boxes on an image with a given color. 51 | 52 | # Arguments 53 | image : The image to draw on. 54 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 55 | color : The color of the boxes. 56 | thickness : The thickness of the lines to draw boxes with. 57 | """ 58 | for b in boxes: 59 | draw_box(image, b, color, thickness=thickness) 60 | 61 | 62 | def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5): 63 | """ Draws detections in an image. 64 | 65 | # Arguments 66 | image : The image to draw on. 67 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 68 | scores : A list of N classification scores. 69 | labels : A list of N labels. 70 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 71 | label_to_name : (optional) Functor for mapping a label to a name. 72 | score_threshold : Threshold used for determining what detections to draw. 73 | """ 74 | selection = np.where(scores > score_threshold)[0] 75 | 76 | for i in selection: 77 | c = color if color is not None else label_color(labels[i]) 78 | draw_box(image, boxes[i, :], color=c) 79 | 80 | # draw labels 81 | caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) 82 | draw_caption(image, boxes[i, :], caption) 83 | 84 | 85 | def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): 86 | """ Draws annotations in an image. 87 | 88 | # Arguments 89 | image : The image to draw on. 90 | annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). 91 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 92 | label_to_name : (optional) Functor for mapping a label to a name. 93 | """ 94 | if isinstance(annotations, np.ndarray): 95 | annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} 96 | 97 | assert('bboxes' in annotations) 98 | assert('labels' in annotations) 99 | assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) 100 | 101 | for i in range(annotations['bboxes'].shape[0]): 102 | label = annotations['labels'][i] 103 | c = color if color is not None else label_color(label) 104 | caption = '{}'.format(label_to_name(label) if label_to_name else label) 105 | draw_caption(image, annotations['bboxes'][i], caption) 106 | draw_box(image, annotations['bboxes'][i], color=c) 107 | -------------------------------------------------------------------------------- /keras_retinanet/models/densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 vidosits (https://github.com/vidosits/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | 19 | from . import retinanet 20 | from . import Backbone 21 | from ..utils.image import preprocess_image 22 | 23 | 24 | allowed_backbones = { 25 | 'densenet121': ([6, 12, 24, 16], keras.applications.densenet.DenseNet121), 26 | 'densenet169': ([6, 12, 32, 32], keras.applications.densenet.DenseNet169), 27 | 'densenet201': ([6, 12, 48, 32], keras.applications.densenet.DenseNet201), 28 | } 29 | 30 | 31 | class DenseNetBackbone(Backbone): 32 | """ Describes backbone information and provides utility functions. 33 | """ 34 | 35 | def retinanet(self, *args, **kwargs): 36 | """ Returns a retinanet model using the correct backbone. 37 | """ 38 | return densenet_retinanet(*args, backbone=self.backbone, **kwargs) 39 | 40 | def download_imagenet(self): 41 | """ Download pre-trained weights for the specified backbone name. 42 | This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop 43 | where backbone is the densenet + number of layers (e.g. densenet121). 44 | For more info check the explanation from the keras densenet script itself: 45 | https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py 46 | """ 47 | origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/' 48 | file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5' 49 | 50 | # load weights 51 | if keras.backend.image_data_format() == 'channels_first': 52 | raise ValueError('Weights for "channels_first" format are not available.') 53 | 54 | weights_url = origin + file_name.format(self.backbone) 55 | return keras.utils.get_file(file_name.format(self.backbone), weights_url, cache_subdir='models') 56 | 57 | def validate(self): 58 | """ Checks whether the backbone string is correct. 59 | """ 60 | backbone = self.backbone.split('_')[0] 61 | 62 | if backbone not in allowed_backbones: 63 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys())) 64 | 65 | def preprocess_image(self, inputs): 66 | """ Takes as input an image and prepares it for being passed through the network. 67 | """ 68 | return preprocess_image(inputs, mode='tf') 69 | 70 | 71 | def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs): 72 | """ Constructs a retinanet model using a densenet backbone. 73 | 74 | Args 75 | num_classes: Number of classes to predict. 76 | backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')). 77 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 78 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 79 | 80 | Returns 81 | RetinaNet model with a DenseNet backbone. 82 | """ 83 | # choose default input 84 | if inputs is None: 85 | inputs = keras.layers.Input((None, None, 3)) 86 | 87 | blocks, creator = allowed_backbones[backbone] 88 | model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None) 89 | 90 | # get last conv layer from the end of each dense block 91 | layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)] 92 | 93 | # create the densenet backbone 94 | # layer_outputs contains 4 layers 95 | model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name) 96 | 97 | # invoke modifier if given 98 | if modifier: 99 | model = modifier(model) 100 | 101 | # create the full model 102 | backbone_layers = { 103 | 'C2': model.outputs[0], 104 | 'C3': model.outputs[1], 105 | 'C4': model.outputs[2], 106 | 'C5': model.outputs[3] 107 | } 108 | 109 | model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs) 110 | 111 | return model 112 | -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | from ..utils.image import preprocess_image 19 | 20 | from . import retinanet 21 | from . import Backbone 22 | 23 | 24 | class MobileNetBackbone(Backbone): 25 | """ Describes backbone information and provides utility functions. 26 | """ 27 | 28 | allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224'] 29 | 30 | def retinanet(self, *args, **kwargs): 31 | """ Returns a retinanet model using the correct backbone. 32 | """ 33 | return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs) 34 | 35 | def download_imagenet(self): 36 | """ Download pre-trained weights for the specified backbone name. 37 | This name is in the format mobilenet{rows}_{alpha} where rows is the 38 | imagenet shape dimension and 'alpha' controls the width of the network. 39 | For more info check the explanation from the keras mobilenet script itself. 40 | """ 41 | 42 | alpha = float(self.backbone.split('_')[1]) 43 | rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) 44 | 45 | # load weights 46 | if keras.backend.image_data_format() == 'channels_first': 47 | raise ValueError('Weights for "channels_last" format ' 48 | 'are not available.') 49 | if alpha == 1.0: 50 | alpha_text = '1_0' 51 | elif alpha == 0.75: 52 | alpha_text = '7_5' 53 | elif alpha == 0.50: 54 | alpha_text = '5_0' 55 | else: 56 | alpha_text = '2_5' 57 | 58 | model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) 59 | weights_url = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' + model_name 60 | weights_path = keras.utils.get_file(model_name, weights_url, cache_subdir='models') 61 | 62 | return weights_path 63 | 64 | def validate(self): 65 | """ Checks whether the backbone string is correct. 66 | """ 67 | backbone = self.backbone.split('_')[0] 68 | 69 | if backbone not in MobileNetBackbone.allowed_backbones: 70 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones)) 71 | 72 | def preprocess_image(self, inputs): 73 | """ Takes as input an image and prepares it for being passed through the network. 74 | """ 75 | return preprocess_image(inputs, mode='tf') 76 | 77 | 78 | def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs): 79 | """ Constructs a retinanet model using a mobilenet backbone. 80 | 81 | Args 82 | num_classes: Number of classes to predict. 83 | backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')). 84 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 85 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 86 | 87 | Returns 88 | RetinaNet model with a MobileNet backbone. 89 | """ 90 | alpha = float(backbone.split('_')[1]) 91 | 92 | # choose default input 93 | if inputs is None: 94 | inputs = keras.layers.Input((None, None, 3)) 95 | 96 | backbone = keras.applications.mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None) 97 | 98 | # create the full model 99 | layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] 100 | layer_outputs = [backbone.get_layer(name).output for name in layer_names] 101 | backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name) 102 | 103 | # invoke modifier if given 104 | if modifier: 105 | backbone = modifier(backbone) 106 | 107 | # C2 not provided 108 | backbone_layers = { 109 | 'C3': backbone.outputs[0], 110 | 'C4': backbone.outputs[1], 111 | 'C5': backbone.outputs[2] 112 | } 113 | 114 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs) 115 | -------------------------------------------------------------------------------- /keras_retinanet/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import tensorflow 18 | from tensorflow import keras 19 | 20 | 21 | def focal(alpha=0.25, gamma=2.0, cutoff=0.5): 22 | """ Create a functor for computing the focal loss. 23 | 24 | Args 25 | alpha: Scale the focal weight with alpha. 26 | gamma: Take the power of the focal weight with gamma. 27 | cutoff: Positive prediction cutoff for soft targets 28 | 29 | Returns 30 | A functor that computes the focal loss using the alpha and gamma. 31 | """ 32 | def _focal(y_true, y_pred): 33 | """ Compute the focal loss given the target tensor and the predicted tensor. 34 | 35 | As defined in https://arxiv.org/abs/1708.02002 36 | 37 | Args 38 | y_true: Tensor of target data from the generator with shape (B, N, num_classes). 39 | y_pred: Tensor of predicted data from the network with shape (B, N, num_classes). 40 | 41 | Returns 42 | The focal loss of y_pred w.r.t. y_true. 43 | """ 44 | labels = y_true[:, :, :-1] 45 | anchor_state = y_true[:, :, -1] # -1 for ignore, 0 for background, 1 for object 46 | classification = y_pred 47 | 48 | # filter out "ignore" anchors 49 | indices = tensorflow.where(keras.backend.not_equal(anchor_state, -1)) 50 | labels = tensorflow.gather_nd(labels, indices) 51 | classification = tensorflow.gather_nd(classification, indices) 52 | 53 | # compute the focal loss 54 | alpha_factor = keras.backend.ones_like(labels) * alpha 55 | alpha_factor = tensorflow.where(keras.backend.greater(labels, cutoff), alpha_factor, 1 - alpha_factor) 56 | focal_weight = tensorflow.where(keras.backend.greater(labels, cutoff), 1 - classification, classification) 57 | focal_weight = alpha_factor * focal_weight ** gamma 58 | 59 | cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification) 60 | 61 | # compute the normalizer: the number of positive anchors 62 | normalizer = tensorflow.where(keras.backend.equal(anchor_state, 1)) 63 | normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx()) 64 | normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer) 65 | 66 | return keras.backend.sum(cls_loss) / normalizer 67 | 68 | return _focal 69 | 70 | 71 | def smooth_l1(sigma=3.0): 72 | """ Create a smooth L1 loss functor. 73 | 74 | Args 75 | sigma: This argument defines the point where the loss changes from L2 to L1. 76 | 77 | Returns 78 | A functor for computing the smooth L1 loss given target data and predicted data. 79 | """ 80 | sigma_squared = sigma ** 2 81 | 82 | def _smooth_l1(y_true, y_pred): 83 | """ Compute the smooth L1 loss of y_pred w.r.t. y_true. 84 | 85 | Args 86 | y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). 87 | y_pred: Tensor from the network of shape (B, N, 4). 88 | 89 | Returns 90 | The smooth L1 loss of y_pred w.r.t. y_true. 91 | """ 92 | # separate target and state 93 | regression = y_pred 94 | regression_target = y_true[:, :, :-1] 95 | anchor_state = y_true[:, :, -1] 96 | 97 | # filter out "ignore" anchors 98 | indices = tensorflow.where(keras.backend.equal(anchor_state, 1)) 99 | regression = tensorflow.gather_nd(regression, indices) 100 | regression_target = tensorflow.gather_nd(regression_target, indices) 101 | 102 | # compute smooth L1 loss 103 | # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma 104 | # |x| - 0.5 / sigma / sigma otherwise 105 | regression_diff = regression - regression_target 106 | regression_diff = keras.backend.abs(regression_diff) 107 | regression_loss = tensorflow.where( 108 | keras.backend.less(regression_diff, 1.0 / sigma_squared), 109 | 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2), 110 | regression_diff - 0.5 / sigma_squared 111 | ) 112 | 113 | # compute the normalizer: the number of positive anchors 114 | normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) 115 | normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx()) 116 | return keras.backend.sum(regression_loss) / normalizer 117 | 118 | return _smooth_l1 119 | -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenet_v3.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/lacmus-foundation/lacmus 3 | Copyright (C) 2019-2020 lacmus-foundation 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | ''' 18 | 19 | from . import Backbone 20 | from . import retinanet 21 | 22 | from ..utils.image import preprocess_image 23 | 24 | from tensorflow import keras 25 | from tensorflow.keras.utils import get_file 26 | 27 | from .mobilenetv3.mobilenet_v3_base import relu6, hard_swish 28 | from .mobilenetv3.mobilenet_v3_large import MobileNetV3_Large 29 | from .mobilenetv3.mobilenet_v3_small import MobileNetV3_Small 30 | 31 | 32 | class MobileNetV3Backbone(Backbone): 33 | """ Describes backbone information and provides utility functions. 34 | """ 35 | 36 | def __init__(self, backbone): 37 | super(MobileNetV3Backbone, self).__init__(backbone) 38 | self.custom_objects['hard_swish'] = hard_swish 39 | self.custom_objects['relu6'] = relu6 40 | 41 | allowed_backbones = ['mobilenet_v3_small', 'mobilenet_v3_large'] 42 | 43 | def retinanet(self, *args, **kwargs): 44 | """ Returns a retinanet model using the correct backbone. 45 | """ 46 | return mobilenetv3_retinanet(*args, backbone_name=self.backbone, **kwargs) 47 | 48 | def download_imagenet(self): 49 | """ Downloads ImageNet weights and returns path to weights file. 50 | """ 51 | err_msg = "ImageNet weights are not available for the backbone {0}. Please train with --no-weights option" 52 | err_msg = err_msg.format(self.backbone) 53 | raise NotImplementedError(err_msg) 54 | 55 | def validate(self): 56 | """ Checks whether the backbone string is correct. 57 | """ 58 | name_parts = self.backbone.split('_') 59 | 60 | if '_'.join(name_parts[:3]) not in MobileNetV3Backbone.allowed_backbones: 61 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetV3Backbone.allowed_backbones)) 62 | 63 | def preprocess_image(self, inputs): 64 | """ Takes as input an image and prepares it for being passed through the network. 65 | """ 66 | return preprocess_image(inputs, mode='tf') 67 | 68 | 69 | def mobilenetv3_retinanet(num_classes, backbone_name='mobilenet_v3_small', inputs=None, modifier=None, **kwargs): 70 | """ Constructs a retinanet model using a mobilenet backbone. 71 | 72 | Args 73 | num_classes: Number of classes to predict. 74 | backbone: Which backbone to use (mobilenet_v3_small or mobilenet_v3_large). 75 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 76 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 77 | 78 | Returns 79 | RetinaNet model with a MobileNet backbone. 80 | """ 81 | name_parts = backbone_name.split('_') 82 | if len(name_parts) > 3: 83 | alpha = float(name_parts[3]) 84 | else: 85 | alpha = 1.0 86 | 87 | # choose default input 88 | if inputs is None: 89 | if keras.backend.image_data_format() == 'channels_first': 90 | shape=(3, None, None) 91 | else: 92 | shape=(None, None, 3) 93 | else: 94 | shape = inputs.shape 95 | 96 | if 'mobilenet_v3_small' in backbone_name: 97 | backbone = MobileNetV3_Small(shape=shape, n_class=1, alpha=alpha, include_top=False).build() 98 | layer_outputs = [ 99 | backbone.layers[30].output, # activation_7, bneck 3 before pw, 28x28x88 100 | backbone.layers[98].output, # multiply_5, bneck 8 before pwl, 14x14x144 101 | backbone.layers[145].output # activation_24, just before global pooling, 7x7x576 102 | ] 103 | elif 'mobilenet_v3_large' in backbone_name: 104 | backbone = MobileNetV3_Large(shape=shape, n_class=1, alpha=alpha, include_top=False).build() 105 | layer_outputs = [ 106 | backbone.layers[67].output, # multiply_3, bneck 6 before pwl, 28x28x120 107 | backbone.layers[129].output, # multiply_5, bneck 12 before pwl, 14x14x672 108 | backbone.layers[176].output # activation_32, just before global pooling, 7x7x960 109 | ] 110 | 111 | inputs = backbone.inputs 112 | # create the full model 113 | backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone_name) 114 | 115 | # invoke modifier if given 116 | if modifier: 117 | backbone = modifier(backbone) 118 | 119 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs) 120 | -------------------------------------------------------------------------------- /keras_retinanet/models/mobilenetv3/mobilenet_v3_base.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D 8 | from tensorflow.keras.layers import Activation, BatchNormalization, Add, Multiply, Reshape 9 | 10 | from tensorflow.keras import backend as K 11 | 12 | def relu6(x): 13 | """Relu 6 14 | """ 15 | return K.relu(x, max_value=6.0) 16 | 17 | def hard_swish(x): 18 | """Hard swish 19 | """ 20 | return x * K.relu(x + 3.0, max_value=6.0) / 6.0 21 | 22 | class MobileNetBase: 23 | def __init__(self, shape, n_class, alpha=1.0): 24 | """Init 25 | 26 | # Arguments 27 | input_shape: An integer or tuple/list of 3 integers, shape 28 | of input tensor. 29 | n_class: Integer, number of classes. 30 | alpha: Integer, width multiplier. 31 | """ 32 | self.shape = shape 33 | self.n_class = n_class 34 | self.alpha = alpha 35 | 36 | def _return_activation(self, x, nl): 37 | """Convolution Block 38 | This function defines a activation choice. 39 | 40 | # Arguments 41 | x: Tensor, input tensor of conv layer. 42 | nl: String, nonlinearity activation type. 43 | 44 | # Returns 45 | Output tensor. 46 | """ 47 | if nl == 'HS': 48 | x = Activation(hard_swish)(x) 49 | if nl == 'RE': 50 | x = Activation(relu6)(x) 51 | 52 | return x 53 | 54 | def _conv_block(self, inputs, filters, kernel, strides, nl): 55 | """Convolution Block 56 | This function defines a 2D convolution operation with BN and activation. 57 | 58 | # Arguments 59 | inputs: Tensor, input tensor of conv layer. 60 | filters: Integer, the dimensionality of the output space. 61 | kernel: An integer or tuple/list of 2 integers, specifying the 62 | width and height of the 2D convolution window. 63 | strides: An integer or tuple/list of 2 integers, 64 | specifying the strides of the convolution along the width and height. 65 | Can be a single integer to specify the same value for 66 | all spatial dimensions. 67 | nl: String, nonlinearity activation type. 68 | 69 | # Returns 70 | Output tensor. 71 | """ 72 | 73 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 74 | 75 | x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs) 76 | x = BatchNormalization(axis=channel_axis)(x) 77 | 78 | return self._return_activation(x, nl) 79 | 80 | def _squeeze(self, inputs): 81 | """Squeeze and Excitation. 82 | This function defines a squeeze structure. 83 | 84 | # Arguments 85 | inputs: Tensor, input tensor of conv layer. 86 | """ 87 | input_channels = int(inputs.shape[-1]) 88 | 89 | x = GlobalAveragePooling2D()(inputs) 90 | x = Dense(input_channels, activation='relu')(x) 91 | x = Dense(input_channels, activation='hard_sigmoid')(x) 92 | x = Reshape((1, 1, input_channels))(x) 93 | x = Multiply()([inputs, x]) 94 | 95 | return x 96 | 97 | def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl): 98 | """Bottleneck 99 | This function defines a basic bottleneck structure. 100 | 101 | # Arguments 102 | inputs: Tensor, input tensor of conv layer. 103 | filters: Integer, the dimensionality of the output space. 104 | kernel: An integer or tuple/list of 2 integers, specifying the 105 | width and height of the 2D convolution window. 106 | e: Integer, expansion factor. 107 | t is always applied to the input size. 108 | s: An integer or tuple/list of 2 integers,specifying the strides 109 | of the convolution along the width and height.Can be a single 110 | integer to specify the same value for all spatial dimensions. 111 | squeeze: Boolean, Whether to use the squeeze. 112 | nl: String, nonlinearity activation type. 113 | 114 | # Returns 115 | Output tensor. 116 | """ 117 | 118 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 119 | input_shape = K.int_shape(inputs) 120 | 121 | tchannel = int(e) 122 | cchannel = int(self.alpha * filters) 123 | 124 | r = s == 1 and input_shape[3] == filters 125 | 126 | x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl) 127 | 128 | x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x) 129 | x = BatchNormalization(axis=channel_axis)(x) 130 | x = self._return_activation(x, nl) 131 | 132 | if squeeze: 133 | x = self._squeeze(x) 134 | 135 | x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x) 136 | x = BatchNormalization(axis=channel_axis)(x) 137 | 138 | if r: 139 | x = Add()([x, inputs]) 140 | 141 | return x 142 | 143 | def build(self): 144 | pass 145 | -------------------------------------------------------------------------------- /keras_retinanet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | 4 | 5 | class Backbone(object): 6 | """ This class stores additional information on backbones. 7 | """ 8 | def __init__(self, backbone): 9 | # a dictionary mapping custom layer names to the correct classes 10 | from .. import layers 11 | from .. import losses 12 | from .. import initializers 13 | self.custom_objects = { 14 | 'UpsampleLike' : layers.UpsampleLike, 15 | 'PriorProbability' : initializers.PriorProbability, 16 | 'RegressBoxes' : layers.RegressBoxes, 17 | 'FilterDetections' : layers.FilterDetections, 18 | 'Anchors' : layers.Anchors, 19 | 'ClipBoxes' : layers.ClipBoxes, 20 | '_smooth_l1' : losses.smooth_l1(), 21 | '_focal' : losses.focal(), 22 | } 23 | 24 | self.backbone = backbone 25 | self.validate() 26 | 27 | def retinanet(self, *args, **kwargs): 28 | """ Returns a retinanet model using the correct backbone. 29 | """ 30 | raise NotImplementedError('retinanet method not implemented.') 31 | 32 | def download_imagenet(self): 33 | """ Downloads ImageNet weights and returns path to weights file. 34 | """ 35 | raise NotImplementedError('download_imagenet method not implemented.') 36 | 37 | def validate(self): 38 | """ Checks whether the backbone string is correct. 39 | """ 40 | raise NotImplementedError('validate method not implemented.') 41 | 42 | def preprocess_image(self, inputs): 43 | """ Takes as input an image and prepares it for being passed through the network. 44 | Having this function in Backbone allows other backbones to define a specific preprocessing step. 45 | """ 46 | raise NotImplementedError('preprocess_image method not implemented.') 47 | 48 | 49 | def backbone(backbone_name): 50 | """ Returns a backbone object for the given backbone. 51 | """ 52 | if 'densenet' in backbone_name: 53 | from .densenet import DenseNetBackbone as b 54 | elif 'seresnext' in backbone_name or 'seresnet' in backbone_name or 'senet' in backbone_name: 55 | from .senet import SeBackbone as b 56 | elif 'resnet' in backbone_name: 57 | from .resnet import ResNetBackbone as b 58 | elif 'mobilenet' in backbone_name: 59 | from .mobilenet import MobileNetBackbone as b 60 | elif 'mobilenet_v3' in backbone_name: 61 | from .mobilenet_v3 import MobileNetV3Backbone as b 62 | elif 'vgg' in backbone_name: 63 | from .vgg import VGGBackbone as b 64 | elif 'EfficientNet' in backbone_name: 65 | from .effnet import EfficientNetBackbone as b 66 | else: 67 | raise NotImplementedError('Backbone class for \'{}\' not implemented.'.format(backbone)) 68 | 69 | return b(backbone_name) 70 | 71 | 72 | def load_model(filepath, backbone_name='resnet50'): 73 | """ Loads a retinanet model using the correct custom objects. 74 | 75 | Args 76 | filepath: one of the following: 77 | - string, path to the saved model, or 78 | - h5py.File object from which to load the model 79 | backbone_name : Backbone with which the model was trained. 80 | 81 | Returns 82 | A keras.models.Model object. 83 | 84 | Raises 85 | ImportError: if h5py is not available. 86 | ValueError: In case of an invalid savefile. 87 | """ 88 | from tensorflow import keras 89 | return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects) 90 | 91 | 92 | def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None, **kwargs): 93 | """ Converts a training model to an inference model. 94 | 95 | Args 96 | model : A retinanet training model. 97 | nms : Boolean, whether to add NMS filtering to the converted model. 98 | class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only. 99 | anchor_params : Anchor parameters object. If omitted, default values are used. 100 | **kwargs : Inference and minimal retinanet model settings. 101 | 102 | Returns 103 | A keras.models.Model object. 104 | 105 | Raises 106 | ImportError: if h5py is not available. 107 | ValueError: In case of an invalid savefile. 108 | """ 109 | from .retinanet import retinanet_bbox 110 | return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params, **kwargs) 111 | 112 | 113 | def assert_training_model(model): 114 | """ Assert that the model is a training model. 115 | """ 116 | assert(all(output in model.output_names for output in ['regression', 'classification'])), \ 117 | "Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format(model.output_names) 118 | 119 | 120 | def check_training_model(model): 121 | """ Check that model is a training model and exit otherwise. 122 | """ 123 | try: 124 | assert_training_model(model) 125 | except AssertionError as e: 126 | print(e, file=sys.stderr) 127 | sys.exit(1) 128 | -------------------------------------------------------------------------------- /keras_retinanet/backend/backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import tensorflow 18 | from tensorflow import keras 19 | 20 | 21 | def bbox_transform_inv(boxes, deltas, mean=None, std=None): 22 | """ Applies deltas (usually regression results) to boxes (usually anchors). 23 | 24 | Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed. 25 | The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes. 26 | 27 | Args 28 | boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2). 29 | deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height. 30 | mean : The mean value used when computing deltas (defaults to [0, 0, 0, 0]). 31 | std : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]). 32 | 33 | Returns 34 | A np.array of the same shape as boxes, but with deltas applied to each box. 35 | The mean and std are used during training to normalize the regression values (networks love normalization). 36 | """ 37 | if mean is None: 38 | mean = [0, 0, 0, 0] 39 | if std is None: 40 | std = [0.2, 0.2, 0.2, 0.2] 41 | 42 | width = boxes[:, :, 2] - boxes[:, :, 0] 43 | height = boxes[:, :, 3] - boxes[:, :, 1] 44 | 45 | x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width 46 | y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height 47 | x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width 48 | y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height 49 | 50 | pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2) 51 | 52 | return pred_boxes 53 | 54 | 55 | def shift(shape, stride, anchors): 56 | """ Produce shifted anchors based on shape of the map and stride size. 57 | 58 | Args 59 | shape : Shape to shift the anchors over. 60 | stride : Stride to shift the anchors with over the shape. 61 | anchors: The anchors to apply at each location. 62 | """ 63 | shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride 64 | shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride 65 | 66 | shift_x, shift_y = tensorflow.meshgrid(shift_x, shift_y) 67 | shift_x = keras.backend.reshape(shift_x, [-1]) 68 | shift_y = keras.backend.reshape(shift_y, [-1]) 69 | 70 | shifts = keras.backend.stack([ 71 | shift_x, 72 | shift_y, 73 | shift_x, 74 | shift_y 75 | ], axis=0) 76 | 77 | shifts = keras.backend.transpose(shifts) 78 | number_of_anchors = keras.backend.shape(anchors)[0] 79 | 80 | k = keras.backend.shape(shifts)[0] # number of base points = feat_h * feat_w 81 | 82 | shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast(keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx()) 83 | shifted_anchors = keras.backend.reshape(shifted_anchors, [k * number_of_anchors, 4]) 84 | 85 | return shifted_anchors 86 | 87 | 88 | def map_fn(*args, **kwargs): 89 | """ See https://www.tensorflow.org/api_docs/python/tf/map_fn . 90 | """ 91 | 92 | if "shapes" in kwargs: 93 | shapes = kwargs.pop("shapes") 94 | dtype = kwargs.pop("dtype") 95 | sig = [tensorflow.TensorSpec(shapes[i], dtype=t) for i, t in 96 | enumerate(dtype)] 97 | 98 | # Try to use the new feature fn_output_signature in TF 2.3, use fallback if this is not available 99 | try: 100 | return tensorflow.map_fn(*args, **kwargs, fn_output_signature=sig) 101 | except TypeError: 102 | kwargs["dtype"] = dtype 103 | 104 | return tensorflow.map_fn(*args, **kwargs) 105 | 106 | 107 | def resize_images(images, size, method='bilinear', align_corners=False): 108 | """ See https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/image/resize_images . 109 | 110 | Args 111 | method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area'). 112 | """ 113 | methods = { 114 | 'bilinear': tensorflow.image.ResizeMethod.BILINEAR, 115 | 'nearest' : tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR, 116 | 'bicubic' : tensorflow.image.ResizeMethod.BICUBIC, 117 | 'area' : tensorflow.image.ResizeMethod.AREA, 118 | } 119 | return tensorflow.compat.v1.image.resize_images(images, size, methods[method], align_corners) 120 | -------------------------------------------------------------------------------- /keras_retinanet/models/resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from tensorflow import keras 18 | import keras_resnet 19 | import keras_resnet.models 20 | 21 | from . import retinanet 22 | from . import Backbone 23 | from ..utils.image import preprocess_image 24 | 25 | 26 | class ResNetBackbone(Backbone): 27 | """ Describes backbone information and provides utility functions. 28 | """ 29 | 30 | def __init__(self, backbone): 31 | super(ResNetBackbone, self).__init__(backbone) 32 | self.custom_objects.update(keras_resnet.custom_objects) 33 | 34 | def retinanet(self, *args, **kwargs): 35 | """ Returns a retinanet model using the correct backbone. 36 | """ 37 | return resnet_retinanet(*args, backbone=self.backbone, **kwargs) 38 | 39 | def download_imagenet(self): 40 | """ Downloads ImageNet weights and returns path to weights file. 41 | """ 42 | resnet_filename = 'ResNet-{}-model.keras.h5' 43 | resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename) 44 | depth = int(self.backbone.replace('resnet', '')) 45 | 46 | filename = resnet_filename.format(depth) 47 | resource = resnet_resource.format(depth) 48 | if depth == 50: 49 | checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319' 50 | elif depth == 101: 51 | checksum = '05dc86924389e5b401a9ea0348a3213c' 52 | elif depth == 152: 53 | checksum = '6ee11ef2b135592f8031058820bb9e71' 54 | 55 | return keras.utils.get_file( 56 | filename, 57 | resource, 58 | cache_subdir='models', 59 | md5_hash=checksum 60 | ) 61 | 62 | def validate(self): 63 | """ Checks whether the backbone string is correct. 64 | """ 65 | allowed_backbones = ['resnet50', 'resnet101', 'resnet152'] 66 | backbone = self.backbone.split('_')[0] 67 | 68 | if backbone not in allowed_backbones: 69 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones)) 70 | 71 | def preprocess_image(self, inputs): 72 | """ Takes as input an image and prepares it for being passed through the network. 73 | """ 74 | return preprocess_image(inputs, mode='caffe') 75 | 76 | 77 | def resnet_retinanet(num_classes, backbone='resnet50', inputs=None, modifier=None, **kwargs): 78 | """ Constructs a retinanet model using a resnet backbone. 79 | 80 | Args 81 | num_classes: Number of classes to predict. 82 | backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')). 83 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 84 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 85 | 86 | Returns 87 | RetinaNet model with a ResNet backbone. 88 | """ 89 | # choose default input 90 | if inputs is None: 91 | if keras.backend.image_data_format() == 'channels_first': 92 | inputs = keras.layers.Input(shape=(3, None, None)) 93 | else: 94 | inputs = keras.layers.Input(shape=(None, None, 3)) 95 | 96 | # create the resnet backbone 97 | if backbone == 'resnet50': 98 | resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True) 99 | elif backbone == 'resnet101': 100 | resnet = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True) 101 | elif backbone == 'resnet152': 102 | resnet = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True) 103 | else: 104 | raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone)) 105 | 106 | # invoke modifier if given 107 | if modifier: 108 | resnet = modifier(resnet) 109 | 110 | # create the full model 111 | # resnet.outputs contains 4 layers 112 | backbone_layers = { 113 | 'C2': resnet.outputs[0], 114 | 'C3': resnet.outputs[1], 115 | 'C4': resnet.outputs[2], 116 | 'C5': resnet.outputs[3] 117 | } 118 | 119 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs) 120 | 121 | 122 | def resnet50_retinanet(num_classes, inputs=None, **kwargs): 123 | return resnet_retinanet(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs) 124 | 125 | 126 | def resnet101_retinanet(num_classes, inputs=None, **kwargs): 127 | return resnet_retinanet(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs) 128 | 129 | 130 | def resnet152_retinanet(num_classes, inputs=None, **kwargs): 131 | return resnet_retinanet(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs) 132 | -------------------------------------------------------------------------------- /docs/train-usage.md: -------------------------------------------------------------------------------- 1 | # Keras RetinaNet on Liza Alert Drone Data Set [![Build Status](https://travis-ci.org/fizyr/keras-retinanet.svg?branch=master)](https://travis-ci.org/fizyr/keras-retinanet) 2 | 3 | Keras implementation of RetinaNet object detection as described in [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002). 4 | 5 | This code is borrowed from Keras Implementation of this model at https://github.com/fizyr/keras-retinanet and updated to run on Liza Alert Drone Dataset (LADD) 6 | 7 | ## Installation 8 | 1. Install: 9 | - tensorflow >= 2.4.2 10 | - setuptools 11 | - opencv-python 12 | - our keras-resnet fork: `pip install git+https://github.com/lacmus-foundation/keras-resnet.git` 13 | 2. Clone this repository. 14 | 3. In the repository, execute `pip install .`. 15 | 4. To run the code directly from the cloned repository, you need to run `python setup.py build_ext --inplace` to compile Cython code first. 16 | 5. Optionally, install `pycocotools` if you want to train / test on the MS COCO dataset by running `pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI`. 17 | 18 | 19 | ## Training on custom data set (added by Priyanka Dwivedi, Georgy Perevozchikov) 20 | 21 | *First step is to create pre-trained backbone model using [Standford Drone Dataset (SDD)](http://cvgl.stanford.edu/projects/uav_data)* 22 | 23 | 24 | For training on a custom dataset, a CSV file can be used as a way to pass the data. 25 | See [below](train-usage.md#annotations-format) for more details on the format of these CSV files. 26 | 27 | 28 | To train using your CSV, run: 29 | 30 | ``` 31 | keras_retinanet/bin/train.py --weights snapshots/resnet50_coco_best_v2.1.0.h5 csv ../data/SDD-CSV/train_annotations.csv labels.csv --val-annotations ../data/SDD-CSV/val_annotations.csv 32 | ``` 33 | Here: 34 | * --weights: Path to the weights for initializing training 35 | * csv indicates retinanet is trained on a custom data set 36 | * train_annotations.csv is path to training annotations 37 | * labels.csv are the labels in the format class_name, class_id with 0 reserved for background class 38 | * --val_annotations: Path to validation annotations 39 | 40 | You can find the model resnet50_base_best.h5 trained this way in Assets [here](https://github.com/lizaalert/lacmus/releases/tag/0.1.0). 41 | 42 | *Second step is to train on LADD dataset and fine-tuning model* 43 | 44 | To train using your Pascal VOC, run: 45 | ``` 46 | keras_retinanet/bin/train.py --weights snapshots/resnet50_base_best.h5 --freeze-backbone --config config.ini pascal ../data/LADD 47 | ``` 48 | 49 | *Running directly from `RetinaNet` folder in the repository* 50 | 51 | Here: 52 | * pascal indicates retinanet is trained on a dataset of Pascal VOC format 53 | * --config config.ini: configuration file with anchor parameters 54 | 55 | If your dataset is small, you can also use the `--freeze-backbone argument` to freeze the backbone layers. 56 | 57 | You can find more arguments directly in parse_args function of [train.py](../keras_retinanet/bin/train.py) file. 58 | 59 | A model trained on a LADD dataset can be found [here](https://github.com/lizaalert/lacmus/releases/tag/0.1.1). 60 | 61 | ### Training Set 62 | I uploaded the images used for training and validation to the yandex disk link below. Please download the same: 63 | 64 | 65 | ### Annotations format 66 | The CSV file with annotations should contain one annotation per line. 67 | Images with multiple bounding boxes should use one row per bounding box. 68 | Note that indexing for pixel values starts at 0. 69 | The expected format of each line is: 70 | ``` 71 | path/to/image.jpg,x1,y1,x2,y2,class_name 72 | ``` 73 | 74 | **Labels format** 75 | 76 | The class name to ID mapping file should contain one mapping per line. 77 | Each line should use the following format: 78 | ``` 79 | class_name,id 80 | ``` 81 | 82 | ## Evaluating Results (added by Priyanka Dwivedi and Georgy Perevozchikov) 83 | 84 | To calculate mean average precision on the validation set, please run 85 | 86 | ``` 87 | keras_retinanet/bin/evaluate.py csv val_annotations.csv labels.csv snapshots/resnet50_csv_31_interface.h5 88 | ``` 89 | 90 | or for Pascal VOC 91 | 92 | ``` 93 | keras_retinanet/bin/evaluate.py pascal ../data/LADD snapshots/resnet50_pascal_06.h5 --convert-model 94 | ``` 95 | 96 | Here we pass the val_annotations, labels and path to the trained weights. 97 | 98 | **Note:** If you were using --config config.ini argument when training the model and pass option --convert-model when evaluating, you should provide the same configuaration file, otherwise you can encounter error "InvalidArgumentError: 99 | Incompatible shapes" like [in this issue](https://github.com/priya-dwivedi/aerial_pedestrian_detection/issues/3). 100 | 101 | 102 | ## Running Inference on Images and Videos (added by Priyanka Dwivedi and Georgy Perevozchikov) 103 | 104 | To run inference on the trained model, first step is to convert the trained model to a format that can be used by inference. The command for this is: 105 | 106 | ``` 107 | keras_retinanet/bin/convert_model.py snapshots/resnet50_pascal_08.h5 snapshots/resnet50_pascal_08_inference.h5 108 | ``` 109 | 110 | Here first path is the path to the trained model and the second would be the path to the converted inference model 111 | 112 | I created two notebooks from the original code that can be used to run inference on images and on videos. 113 | 114 | The notebooks are: 115 | * [RetinaNet/ResNet50RetinaNet-Image.ipynb](../ResNet50RetinaNet-Image.ipynb) : For inference on a batch of images 116 | * [RetinaNet/ResNet50RetinaNet-Video.ipynb](../ResNet50RetinaNet-Video.ipynb) : For inference on a video 117 | 118 | -------------------------------------------------------------------------------- /data_utils/LaddGenerator/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | 5 | namespace LaddGenerator 6 | { 7 | class Program 8 | { 9 | private static readonly Dictionary _argsKeys = new Dictionary() 10 | { 11 | {"--src", "source path"}, 12 | {"--dst", "destination path"}, 13 | {"--cnt", "start count with"}, 14 | }; 15 | 16 | static void Main(string[] args) 17 | { 18 | var parser = new ArgsParser(_argsKeys); 19 | var parsedArgs = parser.Parse(args); 20 | if (parsedArgs == null) 21 | { 22 | return; 23 | } 24 | 25 | var imgSrcPatch = parsedArgs["src"] + "JPEGImages/"; 26 | var annSrcPatch = parsedArgs["src"] + "Annotations/"; 27 | var imgDstPatch = parsedArgs["dst"] + "JPEGImages/"; 28 | var annDstPatch = parsedArgs["dst"] + "Annotations/"; 29 | var spltDstPatch = parsedArgs["dst"] + "ImageSets/Main/"; 30 | int beginCount = int.Parse(parsedArgs["cnt"]); 31 | 32 | if (!Directory.Exists(imgSrcPatch)) 33 | { 34 | Console.Write("unable to open: " + imgSrcPatch); 35 | return; 36 | } 37 | if (!Directory.Exists(annSrcPatch)) 38 | { 39 | Console.Write("unable to open: " + annSrcPatch); 40 | return; 41 | } 42 | if (!Directory.Exists(imgDstPatch)) 43 | { 44 | Directory.CreateDirectory(imgDstPatch); 45 | } 46 | if (!Directory.Exists(annDstPatch)) 47 | { 48 | Directory.CreateDirectory(annDstPatch); 49 | } 50 | if (!Directory.Exists(spltDstPatch)) 51 | { 52 | Directory.CreateDirectory(spltDstPatch); 53 | } 54 | 55 | var srcFiles = Directory.GetFiles(annSrcPatch); 56 | var dstImgFileNames = Directory.GetFiles(imgDstPatch); 57 | int count = 0; //420; 58 | 59 | if (dstImgFileNames == null || dstImgFileNames.Length == 0) 60 | count = beginCount; 61 | else 62 | { 63 | count = dstImgFileNames.Length; 64 | } 65 | Console.WriteLine(count); 66 | Console.ReadLine(); 67 | foreach (var sfile in srcFiles) 68 | { 69 | try 70 | { 71 | var srcAnnotation = Annotation.ParseFromXml(sfile); 72 | if (!srcAnnotation.Filename.ToLower().EndsWith(".jpg")) 73 | srcAnnotation.Filename += ".jpg"; 74 | 75 | var dstAnnotation = new Annotation(); 76 | dstAnnotation.Folder = "VocGalsTfl"; 77 | dstAnnotation.Source = new Sourse(); 78 | dstAnnotation.Filename = $"{count}"; 79 | dstAnnotation.Objects = srcAnnotation.Objects; 80 | dstAnnotation.Size = srcAnnotation.Size; 81 | if (dstAnnotation.Objects == null || dstAnnotation.Objects.Count <= 0) 82 | { 83 | dstAnnotation.Objects = new List(); 84 | //throw new Exception("no objects in the image!"); 85 | } 86 | foreach (var obj in dstAnnotation.Objects) 87 | { 88 | obj.Box.Normalize(); 89 | } 90 | dstAnnotation.SaveToXml(annDstPatch + $"{count}.xml"); 91 | File.Copy(imgSrcPatch + srcAnnotation.Filename, 92 | imgDstPatch + $"{count}.jpg"); 93 | Console.WriteLine($"saved for {srcAnnotation.Filename} => {count}.jpg"); 94 | count++; 95 | } 96 | catch (Exception e) 97 | { 98 | Console.WriteLine($"skip {sfile} : {e.Message}"); 99 | } 100 | } 101 | 102 | Console.Write($"Shuffling {count-1} files"); 103 | List files = new List(); 104 | for (int i = beginCount; i < count; i++) 105 | { 106 | files.Add(i); 107 | } 108 | Shuffle(files); 109 | int trainSplit = Convert.ToInt32((double)files.Count / 100 * 20); 110 | if (trainSplit == 0 && files.Count > 2) trainSplit = 1; 111 | List lines = new List(); 112 | for (int i = 0; i < files.Count-trainSplit; i++) 113 | { 114 | lines.Add($"{files[i]}"); 115 | } 116 | File.WriteAllLines(spltDstPatch+"train.txt", lines); 117 | File.WriteAllLines(spltDstPatch+"trainval.txt", lines); 118 | lines = new List(); 119 | for (int i = files.Count-trainSplit; i < files.Count; i++) 120 | { 121 | lines.Add($"{files[i]}"); 122 | } 123 | File.WriteAllLines(spltDstPatch+"test.txt", lines); 124 | File.WriteAllLines(spltDstPatch+"val.txt", lines); 125 | Console.WriteLine("Done!"); 126 | } 127 | 128 | private static void Shuffle(IList list) 129 | { 130 | Random rng = new Random(); 131 | int n = list.Count; 132 | while (n > 1) { 133 | n--; 134 | int k = rng.Next(n + 1); 135 | T value = list[k]; 136 | list[k] = list[n]; 137 | list[n] = value; 138 | } 139 | } 140 | } 141 | } -------------------------------------------------------------------------------- /data_utils/voc2coco.py: -------------------------------------------------------------------------------- 1 | # https://github.com/yukkyo/voc2coco 2 | 3 | import os 4 | import argparse 5 | import json 6 | import xml.etree.ElementTree as ET 7 | from typing import Dict, List 8 | from tqdm import tqdm 9 | import re 10 | 11 | 12 | def get_label2id(labels_path: str) -> Dict[str, int]: 13 | """id is 1 start""" 14 | with open(labels_path, 'r') as f: 15 | labels_str = f.read().split() 16 | labels_ids = list(range(1, len(labels_str)+1)) 17 | return dict(zip(labels_str, labels_ids)) 18 | 19 | 20 | def get_annpaths(ann_dir_path: str = None, 21 | ann_ids_path: str = None, 22 | ext: str = '', 23 | annpaths_list_path: str = None) -> List[str]: 24 | # If use annotation paths list 25 | if annpaths_list_path is not None: 26 | with open(annpaths_list_path, 'r') as f: 27 | ann_paths = f.read().split() 28 | return ann_paths 29 | 30 | # If use annotaion ids list 31 | ext_with_dot = '.' + ext if ext != '' else '' 32 | with open(ann_ids_path, 'r') as f: 33 | ann_ids = f.read().split() 34 | ann_paths = [os.path.join(ann_dir_path, aid+ext_with_dot) for aid in ann_ids] 35 | return ann_paths 36 | 37 | 38 | def get_image_info(annotation_root, extract_num_from_imgid=True): 39 | path = annotation_root.findtext('path') 40 | if path is None: 41 | filename = annotation_root.findtext('filename') 42 | else: 43 | filename = os.path.basename(path) 44 | img_name = os.path.basename(filename) 45 | img_id = os.path.splitext(img_name)[0] 46 | if extract_num_from_imgid and isinstance(img_id, str): 47 | img_id = int(re.findall(r'\d+', img_id)[0]) 48 | 49 | size = annotation_root.find('size') 50 | width = int(size.findtext('width')) 51 | height = int(size.findtext('height')) 52 | 53 | image_info = { 54 | 'file_name': filename, 55 | 'height': height, 56 | 'width': width, 57 | 'id': img_id 58 | } 59 | return image_info 60 | 61 | 62 | def get_coco_annotation_from_obj(obj, label2id): 63 | label = obj.findtext('name') 64 | assert label in label2id, f"Error: {label} is not in label2id !" 65 | category_id = label2id[label] 66 | bndbox = obj.find('bndbox') 67 | xmin = int(bndbox.findtext('xmin')) - 1 68 | ymin = int(bndbox.findtext('ymin')) - 1 69 | xmax = int(bndbox.findtext('xmax')) 70 | ymax = int(bndbox.findtext('ymax')) 71 | assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}" 72 | o_width = xmax - xmin 73 | o_height = ymax - ymin 74 | ann = { 75 | 'area': o_width * o_height, 76 | 'iscrowd': 0, 77 | 'bbox': [xmin, ymin, o_width, o_height], 78 | 'category_id': category_id, 79 | 'ignore': 0, 80 | 'segmentation': [] # This script is not for segmentation 81 | } 82 | return ann 83 | 84 | 85 | def convert_xmls_to_cocojson(annotation_paths: List[str], 86 | label2id: Dict[str, int], 87 | output_jsonpath: str, 88 | extract_num_from_imgid: bool = True): 89 | output_json_dict = { 90 | "images": [], 91 | "type": "instances", 92 | "annotations": [], 93 | "categories": [] 94 | } 95 | bnd_id = 1 # START_BOUNDING_BOX_ID, TODO input as args ? 96 | print('Start converting !') 97 | for a_path in tqdm(annotation_paths): 98 | # Read annotation xml 99 | ann_tree = ET.parse(a_path) 100 | ann_root = ann_tree.getroot() 101 | 102 | img_info = get_image_info(annotation_root=ann_root, 103 | extract_num_from_imgid=extract_num_from_imgid) 104 | img_id = img_info['id'] 105 | output_json_dict['images'].append(img_info) 106 | 107 | for obj in ann_root.findall('object'): 108 | ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id) 109 | ann.update({'image_id': img_id, 'id': bnd_id}) 110 | output_json_dict['annotations'].append(ann) 111 | bnd_id = bnd_id + 1 112 | 113 | for label, label_id in label2id.items(): 114 | category_info = {'supercategory': 'none', 'id': label_id, 'name': label} 115 | output_json_dict['categories'].append(category_info) 116 | 117 | with open(output_jsonpath, 'w') as f: 118 | output_json = json.dumps(output_json_dict) 119 | f.write(output_json) 120 | 121 | 122 | def main(): 123 | parser = argparse.ArgumentParser( 124 | description='This script support converting voc format xmls to coco format json') 125 | parser.add_argument('--ann_dir', type=str, default=None, 126 | help='path to annotation files directory. It is not need when use --ann_paths_list') 127 | parser.add_argument('--ann_ids', type=str, default=None, 128 | help='path to annotation files ids list. It is not need when use --ann_paths_list') 129 | parser.add_argument('--ann_paths_list', type=str, default=None, 130 | help='path of annotation paths list. It is not need when use --ann_dir and --ann_ids') 131 | parser.add_argument('--labels', type=str, default=None, 132 | help='path to label list.') 133 | parser.add_argument('--output', type=str, default='output.json', help='path to output json file') 134 | parser.add_argument('--ext', type=str, default='', help='additional extension of annotation file') 135 | args = parser.parse_args() 136 | label2id = get_label2id(labels_path=args.labels) 137 | ann_paths = get_annpaths( 138 | ann_dir_path=args.ann_dir, 139 | ann_ids_path=args.ann_ids, 140 | ext=args.ext, 141 | annpaths_list_path=args.ann_paths_list 142 | ) 143 | convert_xmls_to_cocojson( 144 | annotation_paths=ann_paths, 145 | label2id=label2id, 146 | output_jsonpath=args.output, 147 | extract_num_from_imgid=True 148 | ) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /keras_retinanet/preprocessing/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from ..preprocessing.generator import Generator 18 | from ..utils.image import read_image_bgr 19 | 20 | import os 21 | import numpy as np 22 | 23 | from pycocotools.coco import COCO 24 | 25 | 26 | class CocoGenerator(Generator): 27 | """ Generate data from the COCO dataset. 28 | 29 | See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information. 30 | """ 31 | 32 | def __init__(self, data_dir, set_name, **kwargs): 33 | """ Initialize a COCO data generator. 34 | 35 | Args 36 | data_dir: Path to where the COCO dataset is stored. 37 | set_name: Name of the set to parse. 38 | """ 39 | self.data_dir = data_dir 40 | self.set_name = set_name 41 | self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json')) 42 | self.image_ids = self.coco.getImgIds() 43 | 44 | self.load_classes() 45 | 46 | super(CocoGenerator, self).__init__(**kwargs) 47 | 48 | def load_classes(self): 49 | """ Loads the class to label mapping (and inverse) for COCO. 50 | """ 51 | # load class names (name -> label) 52 | categories = self.coco.loadCats(self.coco.getCatIds()) 53 | categories.sort(key=lambda x: x['id']) 54 | 55 | self.classes = {} 56 | self.coco_labels = {} 57 | self.coco_labels_inverse = {} 58 | for c in categories: 59 | self.coco_labels[len(self.classes)] = c['id'] 60 | self.coco_labels_inverse[c['id']] = len(self.classes) 61 | self.classes[c['name']] = len(self.classes) 62 | 63 | # also load the reverse (label -> name) 64 | self.labels = {} 65 | for key, value in self.classes.items(): 66 | self.labels[value] = key 67 | 68 | def size(self): 69 | """ Size of the COCO dataset. 70 | """ 71 | return len(self.image_ids) 72 | 73 | def num_classes(self): 74 | """ Number of classes in the dataset. For COCO this is 80. 75 | """ 76 | return len(self.classes) 77 | 78 | def has_label(self, label): 79 | """ Return True if label is a known label. 80 | """ 81 | return label in self.labels 82 | 83 | def has_name(self, name): 84 | """ Returns True if name is a known class. 85 | """ 86 | return name in self.classes 87 | 88 | def name_to_label(self, name): 89 | """ Map name to label. 90 | """ 91 | return self.classes[name] 92 | 93 | def label_to_name(self, label): 94 | """ Map label to name. 95 | """ 96 | return self.labels[label] 97 | 98 | def coco_label_to_label(self, coco_label): 99 | """ Map COCO label to the label as used in the network. 100 | COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes. 101 | """ 102 | return self.coco_labels_inverse[coco_label] 103 | 104 | def coco_label_to_name(self, coco_label): 105 | """ Map COCO label to name. 106 | """ 107 | return self.label_to_name(self.coco_label_to_label(coco_label)) 108 | 109 | def label_to_coco_label(self, label): 110 | """ Map label as used by the network to labels as used by COCO. 111 | """ 112 | return self.coco_labels[label] 113 | 114 | def image_path(self, image_index): 115 | """ Returns the image path for image_index. 116 | """ 117 | image_info = self.coco.loadImgs(self.image_ids[image_index])[0] 118 | path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name']) 119 | return path 120 | 121 | def image_aspect_ratio(self, image_index): 122 | """ Compute the aspect ratio for an image with image_index. 123 | """ 124 | image = self.coco.loadImgs(self.image_ids[image_index])[0] 125 | return float(image['width']) / float(image['height']) 126 | 127 | def load_image(self, image_index): 128 | """ Load an image at the image_index. 129 | """ 130 | path = self.image_path(image_index) 131 | return read_image_bgr(path) 132 | 133 | def load_annotations(self, image_index): 134 | """ Load annotations for an image_index. 135 | """ 136 | # get ground truth annotations 137 | annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False) 138 | annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))} 139 | 140 | # some images appear to miss annotations (like image with id 257034) 141 | if len(annotations_ids) == 0: 142 | return annotations 143 | 144 | # parse annotations 145 | coco_annotations = self.coco.loadAnns(annotations_ids) 146 | for idx, a in enumerate(coco_annotations): 147 | # some annotations have basically no width / height, skip them 148 | if a['bbox'][2] < 1 or a['bbox'][3] < 1: 149 | continue 150 | 151 | annotations['labels'] = np.concatenate([annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0) 152 | annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[ 153 | a['bbox'][0], 154 | a['bbox'][1], 155 | a['bbox'][0] + a['bbox'][2], 156 | a['bbox'][1] + a['bbox'][3], 157 | ]]], axis=0) 158 | 159 | return annotations 160 | -------------------------------------------------------------------------------- /keras_retinanet/utils/crops_sampling.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class Crop: 5 | def __init__(self, x_min, y_min, x_max, y_max): 6 | self.x_min = int(x_min) 7 | self.y_min = int(y_min) 8 | self.x_max = int(x_max) 9 | self.y_max = int(y_max) 10 | self.width = self.x_max - self.x_min 11 | self.height = self.y_max - self.y_min 12 | 13 | 14 | class EmptyCrop(Crop): 15 | def __init__(self, *args): 16 | super(EmptyCrop, self).__init__(*args) 17 | 18 | def intersect(self, bbox: '[x_min, y_min, x_max, y_max]'): 19 | x_inside = (self.x_min <= bbox[0] <= self.x_max) or (self.x_min <= bbox[2] <= self.x_max) 20 | y_inside = (self.y_min <= bbox[1] <= self.y_max) or (self.y_min <= bbox[3] <= self.y_max) 21 | return x_inside and y_inside 22 | 23 | def crop_around(self, bbox: '[x_min, y_min, x_max, y_max]', min_width, min_height): 24 | margins = [] 25 | 26 | if self.height > min_height: 27 | if bbox[0] - self.x_min > min_width: 28 | # left margin, from top to bottom 29 | margins.append(EmptyCrop(self.x_min, self.y_min, bbox[0] - 1, self.y_max)) 30 | 31 | if self.x_max - bbox[2] > min_width: 32 | # right margin, from top to bottom 33 | margins.append(EmptyCrop(bbox[2] + 1, self.y_min, self.x_max, self.y_max)) 34 | 35 | if self.width > min_width: 36 | if bbox[1] - self.y_min >= min_height: 37 | # top margin, fro left to right 38 | margins.append(EmptyCrop(self.x_min, self.y_min, self.x_max, bbox[1] - 1)) 39 | 40 | if self.y_max - bbox[3] >= min_height: 41 | # bottom margin, from left to right 42 | margins.append(EmptyCrop(self.x_min, bbox[3] + 1, self.x_max, self.y_max)) 43 | 44 | return margins 45 | 46 | 47 | class NegativeSampling: 48 | def __init__(self, image_width, image_height, crop_width, crop_height, bboxes): 49 | self.crop_width = crop_width 50 | self.crop_height = crop_height 51 | 52 | self.empty_areas = [EmptyCrop(0, 0, image_width, image_height)] 53 | 54 | for bbox in bboxes: 55 | area_index = 0 56 | while area_index < len(self.empty_areas): 57 | area = self.empty_areas[area_index] 58 | if area.intersect(bbox): 59 | around_bbox = area.crop_around(bbox, crop_width, crop_height) 60 | self.empty_areas[area_index:area_index + 1] = around_bbox 61 | area_index += len(around_bbox) 62 | else: 63 | area_index += 1 64 | 65 | def __get_random_crop_inside(self, area: EmptyCrop): 66 | gap_x = area.width - self.crop_width 67 | gap_y = area.height - self.crop_height 68 | x_min = area.x_min + random.randint(0, gap_x) 69 | y_min = area.y_min + random.randint(0, gap_y) 70 | x_max = x_min + self.crop_width - 1 71 | y_max = y_min + self.crop_height - 1 72 | 73 | return EmptyCrop(x_min, y_min, x_max, y_max) 74 | 75 | @property 76 | def samples_available(self): 77 | return len(self.empty_areas) > 0 78 | 79 | def get_samples(self, count): 80 | candidate_areas = random.choices(self.empty_areas, k=count) 81 | # as random shift is used, samples will be different even when there is only one empty area 82 | return [self.__get_random_crop_inside(area) for area in candidate_areas] 83 | 84 | 85 | class PositiveSampling: 86 | def __init__(self, image_width, image_height, crop_width, crop_height, bboxes): 87 | self.image_width = image_width 88 | self.image_height = image_height 89 | self.crop_width = crop_width 90 | self.crop_height = crop_height 91 | self.bboxes = bboxes 92 | 93 | def __get_random_crop_around(self, bbox): 94 | x_from = max(0, bbox[2] - self.crop_width) 95 | x_to = min(bbox[0], self.image_width - self.crop_width) 96 | y_from = max(0, bbox[3] - self.crop_height) 97 | y_to = min(bbox[1], self.image_height - self.crop_height) 98 | 99 | x_min = random.randint(x_from, x_to) 100 | x_max = x_min + self.crop_width - 1 101 | y_min = random.randint(y_from, y_to) 102 | y_max = y_min + self.crop_height - 1 103 | return Crop(x_min, y_min, x_max, y_max) 104 | 105 | @property 106 | def samples_available(self): 107 | return len(self.bboxes) > 0 108 | 109 | def get_samples(self, count=None): 110 | crops = [] 111 | if not count: 112 | samples_bboxes = self.bboxes 113 | else: 114 | samples_bboxes = random.choices(self.bboxes, k=count) 115 | 116 | # as random shift is used, samples will be different even when count > len(bboxes) 117 | return [self.__get_random_crop_around(bbox) for bbox in samples_bboxes] 118 | 119 | 120 | if __name__ == '__main__': 121 | import numpy as np 122 | import sys 123 | 124 | crop = EmptyCrop(0, 0, 200, 200) 125 | 126 | bbox_inside = np.array([50, 50, 150, 150]) 127 | if crop.intersect(bbox_inside): 128 | print('[OK] Test bbox inside crop') 129 | else: 130 | print('[FAILED] Test bbox inside crop') 131 | sys.exit(-1) 132 | 133 | bbox_outside = np.array([300, 300, 300, 300]) 134 | if not crop.intersect(bbox_outside): 135 | print('[OK] Test bbox outside crop') 136 | else: 137 | print('[FAILED] Test bbox outside crop') 138 | sys.exit(-1) 139 | 140 | bbox_intersect = np.array([150, 150, 250, 250]) 141 | if crop.intersect(bbox_intersect): 142 | print('[OK] Test bbox intersecting crop') 143 | else: 144 | print('[FAILED] Test bbox intersecting crop') 145 | sys.exit(-1) 146 | 147 | crops_around = crop.crop_around(bbox_inside, 0, 0) 148 | if (len(crops_around) == 4) and (not any([c.intersect(bbox_inside) for c in crops_around])): 149 | print('[OK] Test cropping around inner bbox') 150 | else: 151 | print('[FAILED] Test cropping around inner bbox') 152 | sys.exit(-1) 153 | 154 | crops_around = crop.crop_around(bbox_intersect, 100, 100) 155 | if (len(crops_around) == 2) and (not any([c.intersect(bbox_intersect) for c in crops_around])): 156 | print('[OK] Test cropping around intersecting bbox') 157 | else: 158 | print('[FAILED] Test cropping around intersecting bbox') 159 | sys.exit(-1) 160 | -------------------------------------------------------------------------------- /tests/utils/test_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_almost_equal 3 | from math import pi 4 | 5 | from keras_retinanet.utils.transform import ( 6 | colvec, 7 | transform_aabb, 8 | rotation, random_rotation, 9 | translation, random_translation, 10 | scaling, random_scaling, 11 | shear, random_shear, 12 | random_flip, 13 | random_transform, 14 | random_transform_generator, 15 | change_transform_origin, 16 | ) 17 | 18 | 19 | def test_colvec(): 20 | assert np.array_equal(colvec(0), np.array([[0]])) 21 | assert np.array_equal(colvec(1, 2, 3), np.array([[1], [2], [3]])) 22 | assert np.array_equal(colvec(-1, -2), np.array([[-1], [-2]])) 23 | 24 | 25 | def test_rotation(): 26 | assert_almost_equal(colvec( 1, 0, 1), rotation(0.0 * pi).dot(colvec(1, 0, 1))) 27 | assert_almost_equal(colvec( 0, 1, 1), rotation(0.5 * pi).dot(colvec(1, 0, 1))) 28 | assert_almost_equal(colvec(-1, 0, 1), rotation(1.0 * pi).dot(colvec(1, 0, 1))) 29 | assert_almost_equal(colvec( 0, -1, 1), rotation(1.5 * pi).dot(colvec(1, 0, 1))) 30 | assert_almost_equal(colvec( 1, 0, 1), rotation(2.0 * pi).dot(colvec(1, 0, 1))) 31 | 32 | assert_almost_equal(colvec( 0, 1, 1), rotation(0.0 * pi).dot(colvec(0, 1, 1))) 33 | assert_almost_equal(colvec(-1, 0, 1), rotation(0.5 * pi).dot(colvec(0, 1, 1))) 34 | assert_almost_equal(colvec( 0, -1, 1), rotation(1.0 * pi).dot(colvec(0, 1, 1))) 35 | assert_almost_equal(colvec( 1, 0, 1), rotation(1.5 * pi).dot(colvec(0, 1, 1))) 36 | assert_almost_equal(colvec( 0, 1, 1), rotation(2.0 * pi).dot(colvec(0, 1, 1))) 37 | 38 | 39 | def test_random_rotation(): 40 | prng = np.random.RandomState(0) 41 | for i in range(100): 42 | assert_almost_equal(1, np.linalg.det(random_rotation(-i, i, prng))) 43 | 44 | 45 | def test_translation(): 46 | assert_almost_equal(colvec( 1, 2, 1), translation(colvec( 0, 0)).dot(colvec(1, 2, 1))) 47 | assert_almost_equal(colvec( 4, 6, 1), translation(colvec( 3, 4)).dot(colvec(1, 2, 1))) 48 | assert_almost_equal(colvec(-2, -2, 1), translation(colvec(-3, -4)).dot(colvec(1, 2, 1))) 49 | 50 | 51 | def assert_is_translation(transform, min, max): 52 | assert transform.shape == (3, 3) 53 | assert np.array_equal(transform[:, 0:2], np.eye(3, 2)) 54 | assert transform[2, 2] == 1 55 | assert np.greater_equal(transform[0:2, 2], min).all() 56 | assert np.less( transform[0:2, 2], max).all() 57 | 58 | 59 | def test_random_translation(): 60 | prng = np.random.RandomState(0) 61 | min = (-10, -20) 62 | max = (20, 10) 63 | for i in range(100): 64 | assert_is_translation(random_translation(min, max, prng), min, max) 65 | 66 | 67 | def test_shear(): 68 | assert_almost_equal(colvec( 1, 2, 1), shear(0.0 * pi).dot(colvec(1, 2, 1))) 69 | assert_almost_equal(colvec(-1, 0, 1), shear(0.5 * pi).dot(colvec(1, 2, 1))) 70 | assert_almost_equal(colvec( 1, -2, 1), shear(1.0 * pi).dot(colvec(1, 2, 1))) 71 | assert_almost_equal(colvec( 3, 0, 1), shear(1.5 * pi).dot(colvec(1, 2, 1))) 72 | assert_almost_equal(colvec( 1, 2, 1), shear(2.0 * pi).dot(colvec(1, 2, 1))) 73 | 74 | 75 | def assert_is_shear(transform): 76 | assert transform.shape == (3, 3) 77 | assert np.array_equal(transform[:, 0], [1, 0, 0]) 78 | assert np.array_equal(transform[:, 2], [0, 0, 1]) 79 | assert transform[2, 1] == 0 80 | # sin^2 + cos^2 == 1 81 | assert_almost_equal(1, transform[0, 1] ** 2 + transform[1, 1] ** 2) 82 | 83 | 84 | def test_random_shear(): 85 | prng = np.random.RandomState(0) 86 | for i in range(100): 87 | assert_is_shear(random_shear(-pi, pi, prng)) 88 | 89 | 90 | def test_scaling(): 91 | assert_almost_equal(colvec(1.0, 2, 1), scaling(colvec(1.0, 1.0)).dot(colvec(1, 2, 1))) 92 | assert_almost_equal(colvec(0.0, 2, 1), scaling(colvec(0.0, 1.0)).dot(colvec(1, 2, 1))) 93 | assert_almost_equal(colvec(1.0, 0, 1), scaling(colvec(1.0, 0.0)).dot(colvec(1, 2, 1))) 94 | assert_almost_equal(colvec(0.5, 4, 1), scaling(colvec(0.5, 2.0)).dot(colvec(1, 2, 1))) 95 | 96 | 97 | def assert_is_scaling(transform, min, max): 98 | assert transform.shape == (3, 3) 99 | assert np.array_equal(transform[2, :], [0, 0, 1]) 100 | assert np.array_equal(transform[:, 2], [0, 0, 1]) 101 | assert transform[1, 0] == 0 102 | assert transform[0, 1] == 0 103 | assert np.greater_equal(np.diagonal(transform)[:2], min).all() 104 | assert np.less( np.diagonal(transform)[:2], max).all() 105 | 106 | 107 | def test_random_scaling(): 108 | prng = np.random.RandomState(0) 109 | min = (0.1, 0.2) 110 | max = (20, 10) 111 | for i in range(100): 112 | assert_is_scaling(random_scaling(min, max, prng), min, max) 113 | 114 | 115 | def assert_is_flip(transform): 116 | assert transform.shape == (3, 3) 117 | assert np.array_equal(transform[2, :], [0, 0, 1]) 118 | assert np.array_equal(transform[:, 2], [0, 0, 1]) 119 | assert transform[1, 0] == 0 120 | assert transform[0, 1] == 0 121 | assert abs(transform[0, 0]) == 1 122 | assert abs(transform[1, 1]) == 1 123 | 124 | 125 | def test_random_flip(): 126 | prng = np.random.RandomState(0) 127 | for i in range(100): 128 | assert_is_flip(random_flip(0.5, 0.5, prng)) 129 | 130 | 131 | def test_random_transform(): 132 | prng = np.random.RandomState(0) 133 | for i in range(100): 134 | transform = random_transform(prng=prng) 135 | assert np.array_equal(transform, np.identity(3)) 136 | 137 | for i, transform in zip(range(100), random_transform_generator(prng=np.random.RandomState())): 138 | assert np.array_equal(transform, np.identity(3)) 139 | 140 | 141 | def test_transform_aabb(): 142 | assert np.array_equal([1, 2, 3, 4], transform_aabb(np.identity(3), [1, 2, 3, 4])) 143 | assert_almost_equal([-3, -4, -1, -2], transform_aabb(rotation(pi), [1, 2, 3, 4])) 144 | assert_almost_equal([ 2, 4, 4, 6], transform_aabb(translation([1, 2]), [1, 2, 3, 4])) 145 | 146 | 147 | def test_change_transform_origin(): 148 | assert np.array_equal(change_transform_origin(translation([3, 4]), [1, 2]), translation([3, 4])) 149 | assert_almost_equal(colvec(1, 2, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(1, 2, 1))) 150 | assert_almost_equal(colvec(0, 0, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(2, 4, 1))) 151 | assert_almost_equal(colvec(0, 0, 1), change_transform_origin(scaling([0.5, 0.5]), [-2, -4]).dot(colvec(2, 4, 1))) 152 | -------------------------------------------------------------------------------- /keras_retinanet/preprocessing/kitti.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import csv 18 | import os.path 19 | 20 | import numpy as np 21 | from PIL import Image 22 | 23 | from .generator import Generator 24 | from ..utils.image import read_image_bgr 25 | 26 | kitti_classes = { 27 | 'Car': 0, 28 | 'Van': 1, 29 | 'Truck': 2, 30 | 'Pedestrian': 3, 31 | 'Person_sitting': 4, 32 | 'Cyclist': 5, 33 | 'Tram': 6, 34 | 'Misc': 7, 35 | 'DontCare': 7 36 | } 37 | 38 | 39 | class KittiGenerator(Generator): 40 | """ Generate data for a KITTI dataset. 41 | 42 | See http://www.cvlibs.net/datasets/kitti/ for more information. 43 | """ 44 | 45 | def __init__( 46 | self, 47 | base_dir, 48 | subset='train', 49 | **kwargs 50 | ): 51 | """ Initialize a KITTI data generator. 52 | 53 | Args 54 | base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file). 55 | subset: The subset to generate data for (defaults to 'train'). 56 | """ 57 | self.base_dir = base_dir 58 | 59 | label_dir = os.path.join(self.base_dir, subset, 'labels') 60 | image_dir = os.path.join(self.base_dir, subset, 'images') 61 | 62 | """ 63 | 1 type Describes the type of object: 'Car', 'Van', 'Truck', 64 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 65 | 'Misc' or 'DontCare' 66 | 1 truncated Float from 0 (non-truncated) to 1 (truncated), where 67 | truncated refers to the object leaving image boundaries 68 | 1 occluded Integer (0,1,2,3) indicating occlusion state: 69 | 0 = fully visible, 1 = partly occluded 70 | 2 = largely occluded, 3 = unknown 71 | 1 alpha Observation angle of object, ranging [-pi..pi] 72 | 4 bbox 2D bounding box of object in the image (0-based index): 73 | contains left, top, right, bottom pixel coordinates 74 | 3 dimensions 3D object dimensions: height, width, length (in meters) 75 | 3 location 3D object location x,y,z in camera coordinates (in meters) 76 | 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] 77 | """ 78 | 79 | self.labels = {} 80 | self.classes = kitti_classes 81 | for name, label in self.classes.items(): 82 | self.labels[label] = name 83 | 84 | self.image_data = dict() 85 | self.images = [] 86 | for i, fn in enumerate(os.listdir(label_dir)): 87 | label_fp = os.path.join(label_dir, fn) 88 | image_fp = os.path.join(image_dir, fn.replace('.txt', '.png')) 89 | 90 | self.images.append(image_fp) 91 | 92 | fieldnames = ['type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right', 'bottom', 'dh', 'dw', 'dl', 93 | 'lx', 'ly', 'lz', 'ry'] 94 | with open(label_fp, 'r') as csv_file: 95 | reader = csv.DictReader(csv_file, delimiter=' ', fieldnames=fieldnames) 96 | boxes = [] 97 | for line, row in enumerate(reader): 98 | label = row['type'] 99 | cls_id = kitti_classes[label] 100 | 101 | annotation = {'cls_id': cls_id, 'x1': row['left'], 'x2': row['right'], 'y2': row['bottom'], 'y1': row['top']} 102 | boxes.append(annotation) 103 | 104 | self.image_data[i] = boxes 105 | 106 | super(KittiGenerator, self).__init__(**kwargs) 107 | 108 | def size(self): 109 | """ Size of the dataset. 110 | """ 111 | return len(self.images) 112 | 113 | def num_classes(self): 114 | """ Number of classes in the dataset. 115 | """ 116 | return max(self.classes.values()) + 1 117 | 118 | def has_label(self, label): 119 | """ Return True if label is a known label. 120 | """ 121 | return label in self.labels 122 | 123 | def has_name(self, name): 124 | """ Returns True if name is a known class. 125 | """ 126 | return name in self.classes 127 | 128 | def name_to_label(self, name): 129 | """ Map name to label. 130 | """ 131 | raise NotImplementedError() 132 | 133 | def label_to_name(self, label): 134 | """ Map label to name. 135 | """ 136 | return self.labels[label] 137 | 138 | def image_aspect_ratio(self, image_index): 139 | """ Compute the aspect ratio for an image with image_index. 140 | """ 141 | # PIL is fast for metadata 142 | image = Image.open(self.images[image_index]) 143 | return float(image.width) / float(image.height) 144 | 145 | def image_path(self, image_index): 146 | """ Get the path to an image. 147 | """ 148 | return self.images[image_index] 149 | 150 | def load_image(self, image_index): 151 | """ Load an image at the image_index. 152 | """ 153 | return read_image_bgr(self.image_path(image_index)) 154 | 155 | def load_annotations(self, image_index): 156 | """ Load annotations for an image_index. 157 | """ 158 | image_data = self.image_data[image_index] 159 | annotations = {'labels': np.empty((len(image_data),)), 'bboxes': np.empty((len(image_data), 4))} 160 | 161 | for idx, ann in enumerate(image_data): 162 | annotations['bboxes'][idx, 0] = float(ann['x1']) 163 | annotations['bboxes'][idx, 1] = float(ann['y1']) 164 | annotations['bboxes'][idx, 2] = float(ann['x2']) 165 | annotations['bboxes'][idx, 3] = float(ann['y2']) 166 | annotations['labels'][idx] = int(ann['cls_id']) 167 | 168 | return annotations 169 | -------------------------------------------------------------------------------- /data_utils/bboxCropper/bboxCropper.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from pathlib import Path 3 | import xml.etree.ElementTree as ET 4 | import numpy as np 5 | import os 6 | import random 7 | 8 | def main(): 9 | 10 | # open CFG file, define paths and crops shapes 11 | 12 | config = open('config.cfg') 13 | for line in config: 14 | if line.startswith('CROP_SIZE'): 15 | crop_size = int(line.split('=')[1].strip().replace('\n','')) 16 | if line.startswith('DATASET_PATH'): 17 | dataset_path = Path(line.split('=')[1].strip().replace('\n','')) 18 | if line.startswith('CROPS_FOLDER'): 19 | crops_folder = line.split('=')[1].strip().replace('\n','') 20 | if line.startswith('FRAMES_FOLDER'): 21 | frames_folder = line.split('=')[1].strip().replace('\n','') 22 | if line.startswith('MASKS_FOLDER'): 23 | masks_folder = line.split('=')[1].strip().replace('\n','') 24 | if line.startswith('INVERT_MASKS'): 25 | invert_masks = line.split('=')[1].strip().replace('\n','') 26 | if invert_masks.lower() in ['false']: 27 | invert_masks = False 28 | else: 29 | invert_masks = True 30 | 31 | images_folder = 'JPEGImages' 32 | annotations_folder = 'Annotations' 33 | config.close() 34 | 35 | print('Dataset location: ', dataset_path) 36 | 37 | # Create folders for outputs 38 | if frames_folder not in os.listdir(dataset_path): 39 | os.mkdir(Path(dataset_path, frames_folder)) 40 | print('Created folder: ', Path(dataset_path, frames_folder)) 41 | if crops_folder not in os.listdir(dataset_path): 42 | os.mkdir(Path(dataset_path, crops_folder)) 43 | print('Created folder: ', Path(dataset_path, crops_folder)) 44 | if masks_folder not in os.listdir(dataset_path): 45 | os.mkdir(Path(dataset_path, masks_folder)) 46 | print('Created folder: ', Path(dataset_path, masks_folder)) 47 | 48 | print('Processing started...') 49 | 50 | # parse each annotation file 51 | 52 | n_files = len(os.listdir(Path(dataset_path, annotations_folder))) 53 | passed_files=1 54 | 55 | for filename in os.listdir(Path(dataset_path, annotations_folder)): 56 | 57 | if not filename.endswith('.xml'): continue 58 | 59 | fullname = Path(dataset_path, annotations_folder, filename) 60 | tree = ET.parse(fullname) 61 | root = tree.getroot() 62 | bbox_num = 0 63 | 64 | img = cv2.imread(str(Path(dataset_path, images_folder, filename[:-3]+'jpg'))) 65 | 66 | for rec in root: 67 | 68 | # get source image size 69 | if rec.tag == 'size': 70 | height = int(rec.findtext('height')) 71 | width = int(rec.findtext('width')) 72 | 73 | # list all available bboxes 74 | if rec.tag == 'object': 75 | for box in rec: 76 | if box.tag=='bndbox': 77 | 78 | # get initial bbox corners 79 | ymin = int(box.findtext('ymin')) 80 | ymax = int(box.findtext('ymax')) 81 | xmin = int(box.findtext('xmin')) 82 | xmax = int(box.findtext('xmax')) 83 | 84 | # calculate necessary padding to get crop of crop_size 85 | padding_w = int((crop_size - (xmax - xmin))/2.) 86 | padding_h = int((crop_size - (ymax - ymin))/2.) 87 | 88 | # get random shift within 25% of crop_size from bbox center 89 | random_dx = int((random.random()-.5)*.5*crop_size) 90 | random_dy = int((random.random()-.5)*.5*crop_size) 91 | 92 | # calculate crop corners 93 | new_xmin = xmin - padding_w + random_dx 94 | new_xmax = xmax + padding_w + random_dx 95 | new_ymin = ymin - padding_h + random_dy 96 | new_ymax = ymax + padding_h + random_dy 97 | 98 | # do not proceed if crop is outside of image 99 | if (new_xmin<1 or new_xmax>width-1 or new_ymin<1 or new_ymax>height-1):continue 100 | 101 | dx = new_xmax - new_xmin 102 | dy = new_ymax - new_ymin 103 | 104 | # correct crop corners to get exact crop_size 105 | if dx