├── cral ├── models │ ├── .gitkeep │ ├── __init__.py │ ├── object_detection │ │ ├── SSD │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ └── matching_utils.py │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ └── keras_layer_L2Normalization.py │ │ │ ├── base.py │ │ │ ├── helpers.py │ │ │ └── tfrecord_parser.py │ │ ├── YoloV3 │ │ │ ├── __init__.py │ │ │ └── predict.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── compute_overlap.pyx │ │ │ ├── predict_script.py │ │ │ ├── tfrecord_parser.py │ │ │ └── losses.py │ │ ├── FasterRCNN │ │ │ └── __init__.py │ │ └── __init__.py │ ├── semantic_segmentation │ │ ├── FpnNet │ │ │ ├── .gitkeep │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── Unet │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── PspNet │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── SegNet │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── LinkNet │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── deeplabv3 │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ ├── deeplab.py │ │ │ └── tfrecord_parser.py │ │ ├── UnetPlusPlus │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── tfrecord_parser.py │ │ ├── __init__.py │ │ └── utils.py │ ├── instance_segmentation │ │ ├── __init__.py │ │ └── MaskRCNN │ │ │ └── __init__.py │ └── classification │ │ ├── classification_utils.py │ │ └── darknet.py ├── data_feeder │ ├── __init__.py │ ├── utils.py │ ├── semantic_seg_data_feeder.py │ ├── parallel_data_feeder.py │ └── classification_parallel_data_feeder.py ├── metrics │ ├── __init__.py │ └── object_detection │ │ ├── __init__.py │ │ └── mAP_eval.py ├── augmentations │ ├── __init__.py │ └── engine.py ├── __init__.py ├── version.py ├── pipeline │ └── __init__.py ├── data_versioning │ ├── cral_hash.py │ ├── cral_util.py │ ├── __init__.py │ └── classification_data_parse_v2.py └── common.py ├── .gitignore ├── setup.cfg ├── .pre-commit-config.yaml ├── .github └── workflows │ └── python-publish.yml ├── README.md ├── test ├── test_instance_segmentation.py ├── test_object_detection.py └── test_segmentation.py └── setup.py /cral/models/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/data_feeder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/augmentations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/FpnNet/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cral/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import VERSION as __version__ 2 | -------------------------------------------------------------------------------- /cral/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Segmind Solutions Pvt Ltd. 2 | 3 | VERSION = '0.4.0' 4 | -------------------------------------------------------------------------------- /cral/metrics/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from cral.metrics.object_detection.mAP_eval import coco_mAP 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/* 3 | .eggs/ 4 | cral.egg-info/ 5 | pydocs/ 6 | docs/ 7 | mkdocs/ 8 | *.so 9 | build/ 10 | dist/ 11 | wheelhouse/ 12 | .vscode/ 13 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/Unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .tfrecord_parser import UNetGenerator 2 | from .unet import create_UNet 3 | from .utils import UNetConfig, UNetPredictor, log_UNet_config_params 4 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/FpnNet/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpnNet import create_FpnNet 2 | from .tfrecord_parser import FpnNetGenerator 3 | from .utils import FpnNetConfig, FpnNetPredictor, log_FpnNet_config_params 4 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/PspNet/__init__.py: -------------------------------------------------------------------------------- 1 | from .pspnet import create_PspNet 2 | from .tfrecord_parser import PspNetGenerator 3 | from .utils import PspNetConfig, PspNetPredictor, log_PspNet_config_params 4 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/SegNet/__init__.py: -------------------------------------------------------------------------------- 1 | from .segnet import create_SegNet 2 | from .tfrecord_parser import SegNetGenerator 3 | from .utils import SegNetConfig, SegNetPredictor, log_SegNet_config_params 4 | -------------------------------------------------------------------------------- /cral/models/object_detection/YoloV3/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import create_yolo_model 2 | from .losses import Yolo_Loss 3 | from .tfrecord_parser import YoloGenerator 4 | from .utils import YoloV3Config, log_yolo_config_params 5 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/LinkNet/__init__.py: -------------------------------------------------------------------------------- 1 | from .linknet import create_LinkNet 2 | from .tfrecord_parser import LinkNetGenerator 3 | from .utils import LinkNetConfig, log_LinkNet_config_params, LinkNetPredictor 4 | -------------------------------------------------------------------------------- /cral/models/instance_segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .MaskRCNN import (MaskRCNNConfig, MaskRCNNGenerator, MaskRCNNPredictor, 2 | create_MaskRCNN, log_MaskRCNN_config_params) 3 | from .utils import SparseMeanIoU, annotate_image 4 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import create_ssd_model, decode_detections 2 | from .helpers import SSD300Config, log_ssd_config_params 3 | from .keras_ssd_loss import SSDLoss 4 | from .tfrecord_parser import SSD300Generator 5 | -------------------------------------------------------------------------------- /cral/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import ClassificationPipe 2 | from .object_detection_pipeline import ObjectDetectionPipe 3 | from .semantic_segmentation_pipeline import SemanticSegPipe 4 | from .instance_segmentation_pipeline import InstanceSegPipe 5 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/deeplabv3/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplab import create_DeepLabv3Plus 2 | from .tfrecord_parser import DeepLabv3Generator 3 | from .utils import (Deeplabv3Config, Deeplabv3Predictor, 4 | log_deeplabv3_config_params) 5 | -------------------------------------------------------------------------------- /cral/models/object_detection/retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import get_retinanet, get_retinanet_fromconfig 2 | from .losses import focal, smooth_l1 3 | from .tfrecord_parser import RetinanetGenerator 4 | from .utils import RetinanetConfig, log_retinanet_config_params 5 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/UnetPlusPlus/__init__.py: -------------------------------------------------------------------------------- 1 | from .tfrecord_parser import UnetPlusPlusGenerator 2 | from .unetplusplus import create_UnetPlusPlus 3 | from .utils import (UnetPlusPlusConfig, UnetPlusPlusPredictor, 4 | log_UnetPlusPlus_config_params) 5 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/models/__init__.py: -------------------------------------------------------------------------------- 1 | # from cral.models.object_detection.SSD.models.keras_ssd300 import ssd_300 2 | # from cral.models.object_detection.SSD.models.keras_layer_DecodeDetections import decode_detections 3 | from .keras_layer_DecodeDetections import DecodeDetections 4 | from .keras_ssd300 import ssd_300 5 | -------------------------------------------------------------------------------- /cral/models/instance_segmentation/MaskRCNN/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import (MaskRCNNConfig, 2 | log_MaskRCNN_config_params, 3 | MaskRCNNPredictor) 4 | from .maskrcnn import create_MaskRCNN 5 | from .tfrecord_parser import MaskRCNNGenerator 6 | from .instance_seg_utils import * 7 | from .mrcnn_utils import * 8 | from .parsing_utils import * 9 | -------------------------------------------------------------------------------- /cral/models/object_detection/FasterRCNN/__init__.py: -------------------------------------------------------------------------------- 1 | from .fasterrcnn import create_FasterRCNN 2 | from .frcnn_utils import * 3 | from .obj_det_utils import * 4 | from .parsing_utils import * 5 | from .tfrecord_parser import FasterRCNNGenerator 6 | from .utils import (FasterRCNNConfig, 7 | log_FasterRCNN_config_params, 8 | FasterRCNNPredictor) 9 | -------------------------------------------------------------------------------- /cral/data_versioning/cral_hash.py: -------------------------------------------------------------------------------- 1 | import xxhash 2 | 3 | BUFF_SIZE = 65536 4 | 5 | 6 | def hashFile(file_path): 7 | with open(file_path, encoding='Latin-1') as file: 8 | hs = xxhash.xxh64() 9 | while True: 10 | data = file.read(BUFF_SIZE) 11 | if not data: 12 | break 13 | data = data.encode() 14 | hs.update(data) 15 | return hs.hexdigest() 16 | return None 17 | 18 | 19 | def hashStr(string): 20 | return xxhash.xxh64(string.encode()).hexdigest() 21 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_third_party = PIL,albumentations,boto3,botocore,cv2,jsonpickle,matplotlib,numpy,pandas,pycocotools,pydensecrf,scipy,skimage,tensorflow,tqdm,xxhash 6 | no_lines_before = STDLIB,LOCALFOLDER 7 | default_section = THIRDPARTY 8 | skip = cral/common.py, cral/pipeline/object_detection_pipeline.py 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /cral/data_versioning/cral_util.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | root_path = '' 5 | 6 | _ALLOWED_IMAGE_FORMATS = ('.jpg', '.jpeg', '.png') 7 | 8 | 9 | def fileName(file_path, ext=False): 10 | head, tail = os.path.split(file_path) 11 | if ext is False: 12 | file_name = '.'.join(tail.split('.')[:-1]) 13 | else: 14 | file_name = tail 15 | return file_name 16 | 17 | 18 | def find_images(path): 19 | 20 | res = list() 21 | res = [ 22 | f for f in glob.glob(os.path.join(path, '*.*')) 23 | if f.endswith(_ALLOWED_IMAGE_FORMATS) 24 | ] 25 | return res 26 | -------------------------------------------------------------------------------- /cral/data_feeder/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def _bytes_feature(value): 5 | """Returns a bytes_list from a string / byte.""" 6 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 7 | 8 | 9 | def _float_feature(value): 10 | """Returns a float_list from a float / double.""" 11 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 12 | 13 | 14 | def _int64_feature(value): 15 | """Returns an int64_list from a bool / enum / int / uint.""" 16 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 17 | 18 | 19 | def _bytes_list_feature(value): 20 | """Returns a bytes_list from a list of string / byte.""" 21 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 22 | 23 | 24 | def _float_list_feature(value): 25 | """Returns a float_list from a list of float / double.""" 26 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 27 | 28 | 29 | def _int64_list_feature(value): 30 | """Returns an int64_list from a list of bool / enum / int / uint.""" 31 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 32 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeplabv3 import (Deeplabv3Config, DeepLabv3Generator, 2 | Deeplabv3Predictor, create_DeepLabv3Plus, 3 | log_deeplabv3_config_params) 4 | from .FpnNet import (FpnNetConfig, FpnNetGenerator, FpnNetPredictor, 5 | create_FpnNet, log_FpnNet_config_params) 6 | from .LinkNet import (LinkNetConfig, LinkNetGenerator, 7 | LinkNetPredictor, create_LinkNet, 8 | log_LinkNet_config_params) 9 | from .PspNet import (PspNetConfig, PspNetGenerator, PspNetPredictor, 10 | create_PspNet, log_PspNet_config_params) 11 | from .SegNet import (SegNetConfig, SegNetGenerator, SegNetPredictor, 12 | create_SegNet, log_SegNet_config_params) 13 | from .Unet import (UNetConfig, UNetGenerator, UNetPredictor, create_UNet, 14 | log_UNet_config_params) 15 | from .UnetPlusPlus import (UnetPlusPlusConfig, UnetPlusPlusGenerator, 16 | UnetPlusPlusPredictor, create_UnetPlusPlus, 17 | log_UnetPlusPlus_config_params) 18 | from .utils import SparseMeanIoU, annotate_image # , densecrf 19 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/pycqa/flake8.git 3 | rev: 3.8.3 4 | hooks: 5 | - id: flake8 6 | args: ["--exclude", "*pb2.py,*__init__.py, test/*.py"] 7 | - repo: https://github.com/asottile/seed-isort-config 8 | rev: v2.2.0 9 | hooks: 10 | - id: seed-isort-config 11 | - repo: https://github.com/timothycrosley/isort 12 | rev: 4.3.21 13 | hooks: 14 | - id: isort 15 | args: [--filter-files] 16 | - repo: https://github.com/pre-commit/mirrors-yapf 17 | rev: v0.30.0 18 | hooks: 19 | - id: yapf 20 | - repo: https://github.com/pre-commit/pre-commit-hooks 21 | rev: v3.1.0 22 | hooks: 23 | - id: trailing-whitespace 24 | - id: check-yaml 25 | - id: end-of-file-fixer 26 | - id: requirements-txt-fixer 27 | - id: double-quote-string-fixer 28 | - id: check-merge-conflict 29 | - id: fix-encoding-pragma 30 | args: ["--remove"] 31 | - id: mixed-line-ending 32 | args: ["--fix=lf"] 33 | - repo: https://github.com/myint/docformatter 34 | rev: v1.3.1 35 | hooks: 36 | - id: docformatter 37 | args: ["--in-place", "--wrap-descriptions", "79"] 38 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-18.04 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.6' 21 | - name: Install dependencies 22 | run: | 23 | sudo python3 -m pip install --upgrade pip 24 | sudo pip3 install -U setuptools wheel twine auditwheel numpy 25 | sudo apt-get install -y patchelf 26 | 27 | - name: Build 28 | run: | 29 | sudo python3 setup.py build_ext --inplace 30 | sudo python3 setup.py sdist bdist_wheel 31 | 32 | - name: Repair and Publish 33 | env: 34 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 35 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 36 | run: | 37 | sudo auditwheel repair dist/*.whl 38 | twine upload wheelhouse/* 39 | -------------------------------------------------------------------------------- /cral/common.py: -------------------------------------------------------------------------------- 1 | from .models.classification import ( # noqa: F401 2 | VGG16, VGG19, Darknet53, DenseNet121, DenseNet169, DenseNet201, Detnet, 3 | EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, 4 | EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7, 5 | InceptionResNetV2, InceptionV3, MobileNet, MobileNetV2, NASNetLarge, 6 | NASNetMobile, ResNet50, ResNet50V2, ResNet101, ResNet101V2, ResNet152, 7 | ResNet152V2, Xception, densely_connected_head) 8 | 9 | classification_networks = dict( 10 | densenet121=DenseNet121, 11 | densenet169=DenseNet169, 12 | densenet201=DenseNet201, 13 | inceptionresnetv2=InceptionResNetV2, 14 | inceptionv3=InceptionV3, 15 | mobilenet=MobileNet, 16 | mobilenetv2=MobileNetV2, 17 | nasnetlarge=NASNetLarge, 18 | nasnetmobile=NASNetMobile, 19 | resnet50=ResNet50, 20 | resnet101=ResNet101, 21 | resnet152=ResNet152, 22 | resnet50v2=ResNet50V2, 23 | resnet101v2=ResNet101V2, 24 | resnet152v2=ResNet152V2, 25 | vgg16=VGG16, 26 | vgg19=VGG19, 27 | xception=Xception, 28 | efficientnetb0=EfficientNetB0, 29 | efficientnetb1=EfficientNetB1, 30 | efficientnetb2=EfficientNetB2, 31 | efficientnetb3=EfficientNetB3, 32 | efficientnetb4=EfficientNetB4, 33 | efficientnetb5=EfficientNetB5, 34 | efficientnetb6=EfficientNetB6, 35 | efficientnetb7=EfficientNetB7, 36 | darknet53=Darknet53, 37 | detnet=Detnet) 38 | -------------------------------------------------------------------------------- /cral/models/object_detection/retinanet/compute_overlap.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | def compute_overlap( 14 | np.ndarray[double, ndim=2] boxes, 15 | np.ndarray[double, ndim=2] query_boxes 16 | ): 17 | """ 18 | Args 19 | a: (N, 4) ndarray of float 20 | b: (K, 4) ndarray of float 21 | 22 | Returns 23 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 24 | """ 25 | cdef unsigned int N = boxes.shape[0] 26 | cdef unsigned int K = query_boxes.shape[0] 27 | cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64) 28 | cdef double iw, ih, box_area 29 | cdef double ua 30 | cdef unsigned int k, n 31 | for k in range(K): 32 | box_area = ( 33 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 34 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 35 | ) 36 | for n in range(N): 37 | iw = ( 38 | min(boxes[n, 2], query_boxes[k, 2]) - 39 | max(boxes[n, 0], query_boxes[k, 0]) + 1 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3], query_boxes[k, 3]) - 44 | max(boxes[n, 1], query_boxes[k, 1]) + 1 45 | ) 46 | if ih > 0: 47 | ua = np.float64( 48 | (boxes[n, 2] - boxes[n, 0] + 1) * 49 | (boxes[n, 3] - boxes[n, 1] + 1) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/base.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | 4 | from .models.keras_layer_DecodeDetections import DecodeDetections 5 | from .models.keras_ssd300 import ssd_300 6 | 7 | # from cral.tracking import log_params_decorator 8 | 9 | 10 | # @log_params_decorator 11 | def create_ssd_model(config, 12 | num_classes, 13 | feature_extractor='vgg16', 14 | weights='imagenet'): 15 | assert feature_extractor == 'vgg16', 'only vgg16 supported for now' 16 | # K.clear_session() 17 | 18 | model, preprocess_input, predictor_sizes = ssd_300( 19 | weights=weights, 20 | image_size=config.input_shape, 21 | n_classes=num_classes, 22 | mode='training', 23 | l2_regularization=0.0005, 24 | scales=config.scales, 25 | aspect_ratios_per_layer=config.aspect_ratios, 26 | two_boxes_for_ar1=config.two_boxes_for_ar1, 27 | steps=config.strides, 28 | offsets=config.offsets, 29 | clip_boxes=config.clip_boxes, 30 | variances=config.variances, 31 | normalize_coords=config.normalize_coords, 32 | return_predictor_sizes=True) 33 | 34 | return model, preprocess_input, predictor_sizes 35 | 36 | 37 | def decode_detections(training_model, config, **kwargs): 38 | decoded_predictions = DecodeDetections( 39 | # confidence_thresh=0.5, 40 | # iou_threshold=0.45, 41 | # top_k=200, 42 | # nms_max_output_size=400, 43 | coords=config.coords, 44 | normalize_coords=config.normalize_coords, 45 | img_height=config.height, 46 | img_width=config.width, 47 | name='SSD300', 48 | **kwargs)( 49 | training_model.output) 50 | 51 | model = keras.models.Model( 52 | inputs=training_model.input, 53 | outputs=[ 54 | decoded_predictions[:, :, 2:], decoded_predictions[:, :, 1], 55 | tf.cast(decoded_predictions[:, :, 0], tf.int32) 56 | ]) 57 | 58 | return model 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Upload Python Package](https://github.com/segmind/cral/workflows/Upload%20Python%20Package/badge.svg) 2 | ![GitHub tag (latest SemVer)](https://img.shields.io/github/v/tag/segmind/cral) 3 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/cral) 4 | ![PyPI](https://img.shields.io/pypi/v/cral) 5 | # CNN Research Abstraction Library 6 | 7 | The CNN Research Abstraction Library or CRAL in short is a deep learning computer vision library for data scientists, researchers, and developers. With a primary focus on applied deep learning, the CRAL library encourages rapid development and comes with ready-to-use state-of-the-art networks and other pragmatic tools for a variety of applications in the computer vision space. 8 | 9 | Our aim is also to make it easier to reproduce and extend the results of various Deep Learning-powered Computer Vision (DLCV) algorithms developed in academia and industrial labs. 10 | 11 | # List of Algorithms 12 | 13 | ## Object detection 14 | - RetinaNet 15 | - yolov3 16 | - SSD 17 | - FasterRCNN 18 | 19 | ## Instance Segmentation 20 | - MaskRCNN 21 | 22 | ## Semantic Segmentation 23 | - UNet 24 | - UNet ++ 25 | - Deeplabv3+ 26 | - FpnNet 27 | - PspNet 28 | - SegNet 29 | - LinkNet 30 | 31 | # Guiding Principles 32 | 33 | **Simple:** To make it easy for deep learning engineers & students alike to use neural networks to build computer vision applications of their choice, using low code approach. 34 | 35 | **Fast:** To accelerate going from experimentation to a working model. 36 | 37 | **Reproducible:** To offer implementations that can easily be trained and reproduced on your own data. 38 | 39 | # Components 40 | 41 | CRAL has a modular design to enable you to use each of its components independently, Alternatively, you can use the pipeline to get started quickly with multiple networks out-of-the-box. 42 | 43 | | Components | Description | 44 | |---|---| 45 | | [CNN models](/api/models) | Ready to use implementations of State-of-the-art (SOTA) algorithms. | 46 | | Pipeline tools | Load and validate your data before you start training. | 47 | | Optimization and debugging | Integration with Experiment Tracking, HP Optimization and other toolsets to help faster and build transparent models | 48 | 49 | # Detailed documentation: [Link](https://cral.segmind.com) 50 | -------------------------------------------------------------------------------- /cral/models/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from cral.models.object_detection.object_detection_utils import ( 4 | Predictor, annotate_image, convert_to_coco, display_image) 5 | from cral.models.object_detection.FasterRCNN import ( 6 | FasterRCNNConfig, FasterRCNNGenerator, FasterRCNNPredictor, 7 | create_FasterRCNN, log_FasterRCNN_config_params) 8 | from cral.models.object_detection.retinanet import (RetinanetConfig, 9 | RetinanetGenerator, 10 | get_retinanet, 11 | get_retinanet_fromconfig) 12 | from cral.models.object_detection.SSD import (SSD300Config, SSDLoss, 13 | create_ssd_model, 14 | decode_detections, 15 | log_ssd_config_params) 16 | from cral.models.object_detection.YoloV3 import YoloV3Config 17 | 18 | retinanet_resnet50 = partial(get_retinanet, 'resnet50') 19 | retinanet_resnet101 = partial(get_retinanet, 'resnet101') 20 | retinanet_resnet152 = partial(get_retinanet, 'resnet152') 21 | retinanet_resnet50v2 = partial(get_retinanet, 'resnet50v2') 22 | retinanet_resnet101v2 = partial(get_retinanet, 'resnet101v2') 23 | retinanet_resnet152v2 = partial(get_retinanet, 'resnet152v2') 24 | retinanet_densenet121 = partial(get_retinanet, 'densenet121') 25 | retinanet_densenet169 = partial(get_retinanet, 'densenet169') 26 | retinanet_densenet201 = partial(get_retinanet, 'densenet201') 27 | retinanet_mobilenet = partial(get_retinanet, 'mobilenet') 28 | retinanet_mobilenetv2 = partial(get_retinanet, 'mobilenetv2') #guessed 29 | retinanet_vgg16 = partial(get_retinanet, 'vgg16') 30 | retinanet_vgg19 = partial(get_retinanet, 'vgg19') 31 | retinanet_efficientnetb0 = partial(get_retinanet, 'efficientnetb0') 32 | retinanet_efficientnetb1 = partial(get_retinanet, 'efficientnetb1') 33 | retinanet_efficientnetb2 = partial(get_retinanet, 'efficientnetb2') 34 | retinanet_efficientnetb3 = partial(get_retinanet, 'efficientnetb3') 35 | retinanet_efficientnetb4 = partial(get_retinanet, 'efficientnetb4') 36 | retinanet_efficientnetb5 = partial(get_retinanet, 'efficientnetb5') 37 | retinanet_efficientnetb6 = partial(get_retinanet, 'efficientnetb6') 38 | retinanet_xception = partial(get_retinanet, 'xception') #guessed 39 | retinanet_detnet = partial(get_retinanet, 'detnet') 40 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/helpers.py: -------------------------------------------------------------------------------- 1 | # from cral.tracking import log_params 2 | 3 | 4 | class SSD300Config(object): 5 | """docstring for SSDConfig.""" 6 | 7 | def __init__( 8 | self, 9 | aspect_ratios=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], 10 | [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], 11 | [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], 12 | [1.0, 2.0, 0.5]], 13 | strides=[8, 16, 32, 64, 100, 300], 14 | scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], 15 | offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], 16 | max_boxes=300, 17 | variances=[0.1, 0.1, 0.2, 0.2], 18 | # score=0.3, 19 | alpha=1.0, 20 | neg_pos_ratio=3, 21 | pos_iou_threshold=0.5, 22 | neg_iou_limit=0.3): 23 | 24 | self.height = 300 25 | self.width = 300 26 | 27 | self.input_shape = (self.height, self.width, 3) 28 | 29 | self.pos_iou_threshold = pos_iou_threshold 30 | self.neg_iou_limit = neg_iou_limit 31 | self.max_boxes_per_image = max_boxes 32 | 33 | self.aspect_ratios = aspect_ratios 34 | self.strides = strides 35 | 36 | self.variances = variances 37 | 38 | assert len(aspect_ratios) == len(offsets) 39 | self.offsets = offsets 40 | self.scales = scales 41 | self.max_boxes = max_boxes 42 | # needs to be an argument 43 | # parameters for loss function 44 | self.neg_pos_ratio = neg_pos_ratio 45 | self.alpha = alpha 46 | 47 | self.two_boxes_for_ar1 = True 48 | self.clip_boxes = False 49 | self.normalize_coords = True 50 | self.input_anno_format = 'pascal_voc' 51 | 52 | self.coords = 'centroids' 53 | 54 | 55 | def log_ssd_config_params(config): 56 | config_data = {} 57 | config_data['ssd_aspect_ratios'] = config.aspect_ratios 58 | config_data['ssd_strides'] = config.strides 59 | config_data['ssd_scales'] = config.scales 60 | config_data['ssd_offsets'] = config.offsets 61 | config_data['ssd_max_boxes'] = config.max_boxes 62 | config_data['ssd_variances'] = config.variances 63 | config_data['ssd_pos_iou_threshold'] = config.pos_iou_threshold 64 | config_data['ssd_neg_iou_limit'] = config.neg_iou_limit 65 | config_data['ssd_alpha'] = config.alpha 66 | config_data['ssd_neg_pos_ratio'] = config.neg_pos_ratio 67 | # return log_params(config_data) 68 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/models/keras_layer_L2Normalization.py: -------------------------------------------------------------------------------- 1 | """A custom Keras layer to perform L2-normalization. 2 | 3 | Copyright (C) 2018 Pierluigi Ferrari 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from __future__ import division 19 | 20 | import numpy as np 21 | from tensorflow.keras import backend as K 22 | from tensorflow.keras.layers import InputSpec, Layer 23 | 24 | 25 | class L2Normalization(Layer): 26 | """Performs L2 normalization on the input tensor with a learnable scaling 27 | parameter as described in the paper "Parsenet: Looking Wider to See Better" 28 | (see references) and as used in the original SSD model. 29 | 30 | Arguments: 31 | gamma_init (int): The initial scaling parameter. Defaults to 20 32 | following the SSD paper. 33 | 34 | Input shape: 35 | 4D tensor of shape `(batch, channels, height, width)` if 36 | `dim_ordering = 'th'` or `(batch, height, width, channels)` if 37 | `dim_ordering = 'tf'`. 38 | 39 | Returns: 40 | The scaled tensor. Same shape as the input tensor. 41 | 42 | References: 43 | http://cs.unc.edu/~wliu/papers/parsenet.pdf 44 | """ 45 | 46 | def __init__(self, gamma_init=20, **kwargs): 47 | # if K.image_dim_ordering() == 'tf': 48 | # self.axis = 3 49 | # else: 50 | # self.axis = 1 51 | self.axis = 3 52 | self.gamma_init = gamma_init 53 | super(L2Normalization, self).__init__(**kwargs) 54 | 55 | def build(self, input_shape): 56 | self.input_spec = [InputSpec(shape=input_shape)] 57 | gamma = self.gamma_init * np.ones((input_shape[self.axis], )) 58 | self.gamma = K.variable(gamma, name='{}_gamma'.format(self.name)) 59 | self.trainable_weights = [self.gamma] 60 | super(L2Normalization, self).build(input_shape) 61 | 62 | def call(self, x, mask=None): 63 | output = K.l2_normalize(x, self.axis) 64 | return output * self.gamma 65 | 66 | def get_config(self): 67 | config = {'gamma_init': self.gamma_init} 68 | base_config = super(L2Normalization, self).get_config() 69 | return dict(list(base_config.items()) + list(config.items())) 70 | -------------------------------------------------------------------------------- /cral/metrics/object_detection/mAP_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tempfile 4 | 5 | from cral.metrics.object_detection.utils import (check_gt_catIds, coco_res, 6 | voc_to_coco_gt) 7 | from pycocotools.coco import COCO 8 | from pycocotools.cocoeval import COCOeval 9 | 10 | _ALLOWED_ANNOTATION_FORMATS = ('coco', 'pascal_voc') 11 | 12 | dataset_json = os.path.join(tempfile.gettempdir(), 'dataset.json') 13 | with open(dataset_json, 'r') as f: 14 | dataset_dict = json.load(f) 15 | 16 | 17 | def coco_mAP(prediction_func=None, 18 | test_images_dir=None, 19 | test_anno_dir=None, 20 | annotation_format='pascal_voc'): 21 | 22 | assert os.path.isdir( 23 | test_images_dir), f'{test_images_dir} is not a directory' 24 | assert isinstance( 25 | annotation_format, str 26 | ), f'annotation_format has to be of type str but got {type(annotation_format)} instead' # noqa: E501 27 | annotation_format = annotation_format.lower() 28 | assert annotation_format in _ALLOWED_ANNOTATION_FORMATS, f'supported annotation formats are {_ALLOWED_ANNOTATION_FORMATS}' # noqa: E501 29 | if annotation_format == 'coco': 30 | assert os.path.isfile(test_anno_dir) and test_anno_dir.endswith( 31 | '.json'), f'{test_anno_dir} is not a json file' 32 | else: 33 | assert os.path.isdir( 34 | test_anno_dir), f'{test_anno_dir} is not a directory' 35 | 36 | # Generate json file for ground truth 37 | if annotation_format == 'pascal_voc': 38 | path_coco_gt = voc_to_coco_gt( 39 | image_dir=test_images_dir, 40 | annotation_dir=test_anno_dir, 41 | label_list=dataset_dict['classes']) 42 | fix_gt_catIds = False 43 | 44 | elif annotation_format == 'coco': 45 | path_coco_gt = test_anno_dir 46 | fix_gt_catIds, num_categories = check_gt_catIds( 47 | path_coco_gt, dataset_dict['num_classes'] 48 | ) # return True when gt json has extra categories 49 | if fix_gt_catIds is True: 50 | print( 51 | f"expected {dataset_dict['num_classes']} categories but got {num_categories} instead in {os.path.split(path_coco_gt)[-1]}" # noqa: E501 52 | ) 53 | 54 | # Generate json file for the predictions 55 | path_coco_res = coco_res( 56 | path_coco_gt=path_coco_gt, 57 | prediction_func=prediction_func, 58 | image_dir=test_images_dir, 59 | score_threshold=0.5, 60 | annotation_format=annotation_format, 61 | autofix=fix_gt_catIds) 62 | 63 | # initiate COCO API 64 | coco_gt = COCO(path_coco_gt) 65 | coco_dt = coco_gt.loadRes(path_coco_res) 66 | iouType = 'bbox' 67 | 68 | coco_eval = COCOeval(cocoGt=coco_gt, cocoDt=coco_dt, iouType=iouType) 69 | 70 | coco_eval.evaluate() 71 | coco_eval.accumulate() 72 | coco_eval.summarize() 73 | -------------------------------------------------------------------------------- /test/test_instance_segmentation.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import tempfile 4 | import unittest 5 | import zipfile 6 | 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | 12 | class Test_InstanceSegmentationPipeline(unittest.TestCase): 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | 17 | zip_url = 'http://images.cocodataset.org/zips/val2017.zip' 18 | path_to_zip_file = tf.keras.utils.get_file( 19 | 'val2017.zip', 20 | zip_url, 21 | cache_dir=tempfile.gettempdir(), 22 | cache_subdir='', 23 | extract=False) 24 | directory_to_extract_to = os.path.join(tempfile.gettempdir(), 25 | 'coco2017') 26 | with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: 27 | zip_ref.extractall(directory_to_extract_to) 28 | 29 | cls.dataset = os.path.join(directory_to_extract_to, 'val2017') 30 | 31 | zip_anno_url = 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip' # noqa: E501 32 | path_to_zip_file = tf.keras.utils.get_file( 33 | 'annotations_trainval2017.zip', 34 | zip_anno_url, 35 | cache_dir=tempfile.gettempdir(), 36 | cache_subdir='', 37 | extract=False) 38 | directory_to_extract_to = os.path.join(tempfile.gettempdir(), 39 | 'coco2017_annotations') 40 | with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: 41 | zip_ref.extractall(directory_to_extract_to) 42 | 43 | cls.annotations = os.path.join(directory_to_extract_to, 44 | 'annotations') 45 | 46 | def setup(self): 47 | self.dataset = cls.dataset 48 | self.annotations = cls.annotations 49 | 50 | def test_MaskRCNN(self): 51 | from cral.pipeline import InstanceSegPipe 52 | from cral.models.instance_segmentation import MaskRCNNConfig 53 | 54 | pipe = InstanceSegPipe() 55 | 56 | pipe.add_data( 57 | train_images_dir=os.path.join(self.dataset), 58 | train_anno_dir=os.path.join(self.annotations, 59 | 'instances_val2017.json'), 60 | annotation_format='coco', 61 | split=0.2) 62 | 63 | meta_info = pipe.lock_data() 64 | 65 | pipe.set_algo( 66 | feature_extractor='resnet101', 67 | config=MaskRCNNConfig(height=256, 68 | width=256), 69 | weights='imagenet') 70 | 71 | pipe.train( 72 | num_epochs=2, 73 | snapshot_prefix='test_mrcnn', 74 | snapshot_path='/tmp', 75 | snapshot_every_n=1, 76 | steps_per_epoch=2) 77 | 78 | tf.keras.backend.clear_session() 79 | 80 | 81 | if __name__ == '__main__': 82 | unittest.main() 83 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | from distutils.command.build_ext import build_ext as DistUtilsBuildExt 4 | from importlib.machinery import SourceFileLoader 5 | from setuptools import Extension, find_packages, setup 6 | 7 | import numpy 8 | 9 | version = SourceFileLoader('cral.version', 10 | os.path.join('cral', 11 | 'version.py')).load_module().VERSION 12 | 13 | extensions = [ 14 | Extension('cral.models.object_detection.retinanet.compute_overlap', 15 | ['cral/models/object_detection/retinanet/compute_overlap.pyx']) 16 | ] 17 | 18 | 19 | class BuildExtension(setuptools.Command): 20 | description = DistUtilsBuildExt.description 21 | user_options = DistUtilsBuildExt.user_options 22 | boolean_options = DistUtilsBuildExt.boolean_options 23 | help_options = DistUtilsBuildExt.help_options 24 | 25 | def __init__(self, *args, **kwargs): 26 | from setuptools.command.build_ext import build_ext as \ 27 | SetupToolsBuildExt 28 | 29 | # Bypass __setatrr__ to avoid infinite recursion. 30 | self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs) 31 | 32 | def __getattr__(self, name): 33 | return getattr(self._command, name) 34 | 35 | def __setattr__(self, name, value): 36 | setattr(self._command, name, value) 37 | 38 | def initialize_options(self, *args, **kwargs): 39 | return self._command.initialize_options(*args, **kwargs) 40 | 41 | def finalize_options(self, *args, **kwargs): 42 | ret = self._command.finalize_options(*args, **kwargs) 43 | import numpy 44 | self.include_dirs.append(numpy.get_include()) 45 | return ret 46 | 47 | def run(self, *args, **kwargs): 48 | return self._command.run(*args, **kwargs) 49 | 50 | 51 | setup( 52 | name='cral', 53 | version=version, 54 | packages=find_packages(exclude=['tests', 'tests.*']), 55 | install_requires=[ 56 | 'tqdm', 57 | 'xxhash', 58 | 'pandas', 59 | # 'opencv-python==3.4.2.17', 60 | 'albumentations==0.4.5', 61 | 'jsonpickle', 62 | 'pycocotools', 63 | 'pydensecrf' 64 | ], 65 | ext_modules=extensions, 66 | include_dirs=[numpy.get_include()], 67 | cmdclass={'build_ext': BuildExtension}, 68 | author='T Pratik', 69 | author_email='pratik@segmind.com', 70 | keywords=[ 71 | 'CNN', 'Deep Learning', 'classification', 'object detection', 72 | 'segmentation', 'keras', 'tensorflow-keras' 73 | ], 74 | description='CRAL: Library for CNNs', 75 | long_description_content_type='text/markdown', 76 | long_description=open('README.md').read(), 77 | license='Apache License 2.0', 78 | classifiers=[ 79 | 'Intended Audience :: Developers', 80 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 81 | 'Programming Language :: Python :: 3.6', 82 | ], 83 | ) 84 | # sudo python3 setup.py build_ext --inplace 85 | -------------------------------------------------------------------------------- /cral/models/object_detection/retinanet/predict_script.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from cral.models.object_detection import Predictor 4 | from tensorflow import keras 5 | from tensorflow.keras.applications.resnet import \ 6 | preprocess_input as resnet_preprocess_input # noqa: F401 7 | 8 | 9 | def load_model(checkpoint_path): 10 | 11 | pred_model = keras.models.load_model( 12 | filepath=checkpoint_path, compile=False) 13 | 14 | print('Weights are Loaded ...') 15 | 16 | return pred_model 17 | 18 | 19 | def pad_resize(image, height, width, scale): 20 | """Summary. 21 | 22 | Args: 23 | image (TYPE): Description 24 | height (TYPE): Description 25 | width (TYPE): Description 26 | scale (TYPE): Description 27 | 28 | Returns: 29 | numpy nd.array: Description 30 | """ 31 | # pad image 32 | padded_image = np.zeros( 33 | shape=(int(height), int(width), 3), dtype=image.dtype) 34 | h, w, _ = image.shape 35 | padded_image[:h, :w, :] = image 36 | 37 | # resize image 38 | resized_image = cv2.resize( 39 | padded_image, None, fx=scale, fy=scale).astype(keras.backend.floatx()) 40 | return resized_image 41 | 42 | 43 | def predict(model, image_path, preprocess_fn, min_side=800, max_side=1333): 44 | 45 | im = np.array(keras.preprocessing.image.load_img(path=image_path)) 46 | 47 | smallest_side = min(im.shape[0], im.shape[1]) 48 | largest_side = max(im.shape[0], im.shape[1]) 49 | 50 | scale = min_side / smallest_side 51 | 52 | if largest_side * scale > max_side: 53 | scale = max_side / largest_side 54 | 55 | images_batch = [ 56 | cv2.resize(im, None, fx=scale, fy=scale).astype(keras.backend.floatx()) 57 | ] 58 | 59 | images_batch = preprocess_fn(np.array(images_batch)) 60 | 61 | bboxes, confidence, label = model.predict(images_batch) 62 | 63 | return bboxes[0].astype(int) / scale, confidence[0], label[0] 64 | 65 | 66 | class RetinanetPredictor(Predictor): 67 | """docstring for RetinanetPredictor.""" 68 | 69 | def __init__(self, min_side, max_side, *args, **kwargs): 70 | super(RetinanetPredictor, self).__init__(*args, **kwargs) 71 | self.min_side = min_side 72 | self.max_side = max_side 73 | 74 | def predict(self, image): 75 | im = self.load_image(image) 76 | 77 | smallest_side = min(im.shape[0], im.shape[1]) 78 | largest_side = max(im.shape[0], im.shape[1]) 79 | 80 | scale = self.min_side / smallest_side 81 | 82 | if largest_side * scale > self.max_side: 83 | scale = self.max_side / largest_side 84 | 85 | image = cv2.resize( 86 | im, None, fx=scale, fy=scale).astype(keras.backend.floatx()) 87 | 88 | images_batch = self.preprocessing_func(image) 89 | 90 | bboxes, confidence, label = self.model.predict(images_batch) 91 | 92 | return bboxes[0].astype(int) / scale, confidence[0], label[0] 93 | 94 | 95 | if __name__ == '__main__': 96 | load_model('./checkpoints/prediction') 97 | -------------------------------------------------------------------------------- /cral/data_versioning/__init__.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | from .classification_data_parse_v2 import \ 4 | make_csv as classification_dataset_hasher 5 | from .ObjectDetection_parse_data_v2 import \ 6 | make_csv as objectDetection_dataset_hasher 7 | from .segmentation_data_parse_v2 import make_csv as segmentation_dataset_hasher 8 | from .instanceSeg_data_parse_v2 import make_csv as instanceSeg_dataset_hasher 9 | 10 | 11 | def log_classification_dataset(*args, **kwargs): 12 | """Parses the classification data and logs to tracking server. 13 | 14 | Args: 15 | train_images_dir (str): path to images 16 | val_images_dir (str, optional): path to validation images 17 | split (float, optional): float to divide training dataset into training and validation 18 | """ 19 | 20 | dataset_hash, dataset_csv_path, dataset_json = classification_dataset_hasher( 21 | tempfile.gettempdir(), *args, **kwargs) 22 | 23 | 24 | def log_segmentation_dataset(*args, **kwargs): 25 | """Parses the segmentation data and logs to tracking server. 26 | 27 | Args: 28 | annotation_format (str): one of 'coco' or 'pascal' 29 | train_images_dir (str): path to images 30 | train_anno_dir (str): path to annotation 31 | img_to_anno (function, optional): Function to convert image name to annotation name 32 | val_images_dir (str, optional): path to validation images 33 | val_anno_dir (str, optional): path to vallidation annotation 34 | split (float, optional): float to divide training dataset into training and val 35 | """ 36 | dataset_hash, dataset_csv_path, dataset_json = segmentation_dataset_hasher( 37 | tempfile.gettempdir(), *args, **kwargs) 38 | 39 | 40 | def log_object_detection_dataset(*args, **kwargs): 41 | """Parses the object detection data and logs to tracking server. 42 | 43 | Args: 44 | annotation_format (str): one of 'yolo','coco','pascal' 45 | train_images_dir (str): path to images 46 | train_anno_dir (str): path to annotation 47 | img_to_anno (function, optional): Function to convert image name to annotation name 48 | val_images_dir (str, optional): path to validation images 49 | val_anno_dir (str, optional): path to vallidation annotation 50 | split (float, optional): float to divide training dataset into training and val 51 | """ 52 | dataset_hash, dataset_csv_path, dataset_json = objectDetection_dataset_hasher( 53 | tempfile.gettempdir(), *args, **kwargs) 54 | 55 | 56 | def log_instance_segmentation_dataset(*args, **kwargs): 57 | """Parses the instance segmentation data and logs to tracking server 58 | 59 | Args: 60 | annotation_format (str): one of 'coco' 61 | train_images_dir (str): path to images 62 | train_anno_dir (str): path to annotation 63 | img_to_anno (function, optional): Function to convert image name to annotation name 64 | val_images_dir (str, optional): path to validation images 65 | val_anno_dir (str, optional): path to vallidation annotation 66 | split (float, optional): float to divide training dataset into training and val 67 | """ 68 | dataset_hash, dataset_csv_path, dataset_json = instanceSeg_dataset_hasher( 69 | tempfile.gettempdir(), *args, **kwargs) 70 | try_mlflow_log(log_artifact, key='dataset_versioned.csv', path=dataset_csv_path) 71 | try_mlflow_log(log_artifact, key='dataset_meta.json', path=dataset_json) 72 | -------------------------------------------------------------------------------- /cral/models/object_detection/retinanet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from cral.data_feeder.object_detection_parser import DetectionBase 4 | 5 | from .preprocessing import anchor_targets_bbox, anchors_for_shape 6 | from .utils import RetinanetConfig 7 | 8 | 9 | class RetinanetGenerator(DetectionBase): 10 | """docstring for RetinanetGenerator.""" 11 | 12 | def __init__(self, config, *args, **kwargs): 13 | super(RetinanetGenerator, self).__init__(*args, **kwargs) 14 | assert isinstance( 15 | config, 16 | RetinanetConfig), 'please provide a `RetinanetConfig()` object' 17 | self.config = config 18 | 19 | def process_bboxes_labels(self, image_array, bboxes, labels): 20 | 21 | # delete bboxes containing [-1,-1,-1,-1] 22 | bboxes = bboxes[~np.all(bboxes < 0, axis=-1)] 23 | 24 | # delete labels containing[-1] 25 | labels = labels[labels > -1] 26 | 27 | # augment here 28 | if self.aug is not None: 29 | image_array, bboxes, labels = self.aug.apply( 30 | image_array, bboxes, labels) 31 | image_array = image_array.astype(np.uint8) 32 | 33 | # generate raw anchors 34 | raw_anchors = anchors_for_shape( 35 | image_shape=image_array.shape, 36 | sizes=self.config.sizes, 37 | ratios=self.config.ratios, 38 | scales=self.config.scales, 39 | strides=self.config.strides, 40 | pyramid_levels=self.config.pyramid_levels, 41 | shapes_callback=None) 42 | 43 | # generate anchorboxes and class labels 44 | gt_regression, gt_classification = anchor_targets_bbox( 45 | anchors=raw_anchors, 46 | image=image_array, 47 | bboxes=bboxes, 48 | gt_labels=labels, 49 | num_classes=self.num_classes, 50 | negative_overlap=0.4, 51 | positive_overlap=0.5) 52 | 53 | return image_array, gt_regression, gt_classification 54 | 55 | def yield_image_regression_classification(self, xmin_batch, ymin_batch, 56 | xmax_batch, ymax_batch, 57 | label_batch, image_batch): 58 | 59 | regression_batch = list() 60 | classification_batch = list() 61 | image_batch_aug = list() 62 | 63 | for index in range(self.batch_size): 64 | xmins, ymins, xmaxs, ymaxs, labels = xmin_batch[index], ymin_batch[ 65 | index], xmax_batch[index], ymax_batch[index], label_batch[ 66 | index] 67 | image_array = image_batch[index] 68 | bboxes = tf.convert_to_tensor([xmins, ymins, xmaxs, ymaxs], 69 | dtype=tf.keras.backend.floatx()) 70 | bboxes = tf.transpose(bboxes) 71 | augmented_image, gt_regression, gt_classification = tf.numpy_function( # noqa: E501 72 | self.process_bboxes_labels, [image_array, bboxes, labels], 73 | Tout=[ 74 | tf.uint8, 75 | tf.keras.backend.floatx(), 76 | tf.keras.backend.floatx() 77 | ]) 78 | 79 | regression_batch.append(gt_regression) 80 | classification_batch.append(gt_classification) 81 | image_batch_aug.append(augmented_image) 82 | 83 | return tf.convert_to_tensor(image_batch_aug), tf.convert_to_tensor( 84 | regression_batch), tf.convert_to_tensor(classification_batch) 85 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/FpnNet/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | # from cral.tracking import log_params 5 | from tensorflow.keras.layers import InputSpec, Layer 6 | 7 | 8 | class Upsample(Layer): 9 | """Image Upsample layer. 10 | 11 | Resize the batched image input to target height and width. The input should 12 | be a 4-D tensor in the format of NHWC. 13 | Arguments: 14 | height: Integer, the height of the output shape. 15 | width: Integer, the width of the output shape. 16 | interpolation: String, the interpolation method. Defaults to `bilinear`. 17 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 18 | `lanczos5`, `gaussian`, `mitchellcubic` 19 | name: A string, the name of the layer. 20 | """ 21 | 22 | def __init__( 23 | self, 24 | height, 25 | width, 26 | interpolation='bilinear', 27 | # name='Upsample', 28 | **kwargs): 29 | self.target_height = height 30 | self.target_width = width 31 | self.interpolation = interpolation 32 | # self._interpolation_method = get_interpolation(interpolation) 33 | self.input_spec = InputSpec(ndim=4) 34 | super(Upsample, self).__init__(**kwargs) 35 | 36 | def call(self, inputs): 37 | outputs = tf.image.resize( 38 | images=inputs, 39 | size=[self.target_height, self.target_width], 40 | method=self.interpolation) 41 | return outputs 42 | 43 | def compute_output_shape(self, input_shape): 44 | input_shape = tf.TensorShape(input_shape).as_list() 45 | return tf.TensorShape([ 46 | input_shape[0], self.target_height, self.target_width, 47 | input_shape[3] 48 | ]) 49 | 50 | def get_config(self): 51 | config = { 52 | 'height': self.target_height, 53 | 'width': self.target_width, 54 | 'interpolation': self.interpolation, 55 | } 56 | base_config = super(Upsample, self).get_config() 57 | return dict(list(base_config.items()) + list(config.items())) 58 | 59 | 60 | class FpnNetConfig(object): 61 | """docstring for Deeplabv3Config.""" 62 | 63 | def __init__(self, height=576, width=576): 64 | 65 | # assert output_stride in [8,] #,16] <--- support to be added 66 | assert height % 32 == 0 and width % 32 == 0, 'Height and width both should be a multiple of 32' # noqa: E501 67 | self.height = height 68 | self.width = width 69 | self.input_shape = (self.height, self.width, 3) 70 | 71 | 72 | def log_FpnNet_config_params(config): 73 | 74 | assert isinstance(config, 75 | FpnNetConfig), 'config not supported {}'.format(config) 76 | # config_data = vars(config) 77 | # log_params(config_data) 78 | 79 | 80 | class FpnNetPredictor(object): 81 | """docstring for Deeplabv3Predictor.""" 82 | 83 | def __init__(self, height, width, model, preprocessing_func, dcrf): 84 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 85 | self.height = height 86 | self.width = width 87 | self.model = model 88 | self.preprocessing_func = preprocessing_func 89 | self.allow_dcrf = dcrf 90 | 91 | def load_image(self, image_path): 92 | img_array = np.array( 93 | tf.keras.preprocessing.image.load_img(path=image_path)) 94 | return img_array 95 | 96 | def predict(self, image): 97 | im = self.load_image(image) 98 | 99 | image = cv2.resize(im, (self.width, self.height)) 100 | image_array = np.expand_dims(image, axis=0) 101 | 102 | images_batch = self.preprocessing_func(image_array) 103 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 104 | y = self.model.predict(images_batch)[0] 105 | y = np.argmax(y, axis=-1) 106 | 107 | return y 108 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/LinkNet/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow.keras.layers import InputSpec, Layer 5 | 6 | # from cral.tracking import log_params 7 | 8 | 9 | class Upsample(Layer): 10 | """Image Upsample layer. 11 | 12 | Resize the batched image input to target height and width. The input should 13 | be a 4-D tensor in the format of NHWC. 14 | Arguments: 15 | height: Integer, the height of the output shape. 16 | width: Integer, the width of the output shape. 17 | interpolation: String, the interpolation method. Defaults to `bilinear`. 18 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 19 | `lanczos5`, `gaussian`, `mitchellcubic` 20 | name: A string, the name of the layer. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | height, 26 | width, 27 | interpolation='bilinear', 28 | # name='Upsample', 29 | **kwargs): 30 | self.target_height = height 31 | self.target_width = width 32 | self.interpolation = interpolation 33 | # self._interpolation_method = get_interpolation(interpolation) 34 | self.input_spec = InputSpec(ndim=4) 35 | super(Upsample, self).__init__(**kwargs) 36 | 37 | def call(self, inputs): 38 | outputs = tf.image.resize( 39 | images=inputs, 40 | size=[self.target_height, self.target_width], 41 | method=self.interpolation) 42 | return outputs 43 | 44 | def compute_output_shape(self, input_shape): 45 | input_shape = tf.TensorShape(input_shape).as_list() 46 | return tf.TensorShape([ 47 | input_shape[0], self.target_height, self.target_width, 48 | input_shape[3] 49 | ]) 50 | 51 | def get_config(self): 52 | config = { 53 | 'height': self.target_height, 54 | 'width': self.target_width, 55 | 'interpolation': self.interpolation, 56 | } 57 | base_config = super(Upsample, self).get_config() 58 | return dict(list(base_config.items()) + list(config.items())) 59 | 60 | 61 | class LinkNetConfig(object): 62 | """docstring for LinkNetConfig.""" 63 | 64 | def __init__(self, height=576, width=576): 65 | 66 | # assert output_stride in [8,] #,16] <--- support to be added 67 | assert height % 32 == 0 and width % 32 == 0, 'Height and width both should be a multiple of 32' # noqa: E501 68 | self.height = height 69 | self.width = width 70 | self.input_shape = (self.height, self.width, 3) 71 | 72 | 73 | def log_LinkNet_config_params(config): 74 | 75 | assert isinstance(config, 76 | LinkNetConfig), 'config not supported {}'.format(config) 77 | # config_data = vars(config) 78 | # log_params(config_data) 79 | 80 | 81 | class LinkNetPredictor(object): 82 | """docstring for LinkNetPredictor.""" 83 | 84 | def __init__(self, height, width, model, preprocessing_func, dcrf): 85 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 86 | self.height = height 87 | self.width = width 88 | self.model = model 89 | self.preprocessing_func = preprocessing_func 90 | self.allow_dcrf = dcrf 91 | 92 | def load_image(self, image_path): 93 | img_array = np.array( 94 | tf.keras.preprocessing.image.load_img(path=image_path)) 95 | return img_array 96 | 97 | def predict(self, image): 98 | im = self.load_image(image) 99 | 100 | image = cv2.resize(im, (self.width, self.height)) 101 | image_array = np.expand_dims(image, axis=0) 102 | 103 | images_batch = self.preprocessing_func(image_array) 104 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 105 | y = self.model.predict(images_batch)[0] 106 | y = np.argmax(y, axis=-1) 107 | 108 | return y 109 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/Unet/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from cral.models.semantic_segmentation.utils import do_crf 5 | # from cral.tracking import log_params 6 | from tensorflow.keras.layers import InputSpec, Layer 7 | 8 | 9 | class Upsample(Layer): 10 | """Image Upsample layer. 11 | 12 | Resize the batched image input to target height and width. The input should 13 | be a 4-D tensor in the format of NHWC. 14 | Arguments: 15 | height: Integer, the height of the output shape. 16 | width: Integer, the width of the output shape. 17 | interpolation: String, the interpolation method. Defaults to `bilinear`. 18 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 19 | `lanczos5`, `gaussian`, `mitchellcubic` 20 | name: A string, the name of the layer. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | height, 26 | width, 27 | interpolation='bilinear', 28 | # name='Upsample', 29 | **kwargs): 30 | self.target_height = height 31 | self.target_width = width 32 | self.interpolation = interpolation 33 | # self._interpolation_method = get_interpolation(interpolation) 34 | self.input_spec = InputSpec(ndim=4) 35 | super(Upsample, self).__init__(**kwargs) 36 | 37 | def call(self, inputs): 38 | outputs = tf.image.resize( 39 | images=inputs, 40 | size=[self.target_height, self.target_width], 41 | method=self.interpolation) 42 | return outputs 43 | 44 | def compute_output_shape(self, input_shape): 45 | input_shape = tf.TensorShape(input_shape).as_list() 46 | return tf.TensorShape([ 47 | input_shape[0], self.target_height, self.target_width, 48 | input_shape[3] 49 | ]) 50 | 51 | def get_config(self): 52 | config = { 53 | 'height': self.target_height, 54 | 'width': self.target_width, 55 | 'interpolation': self.interpolation, 56 | } 57 | base_config = super(Upsample, self).get_config() 58 | return dict(list(base_config.items()) + list(config.items())) 59 | 60 | 61 | class UNetConfig(object): 62 | """docstring for Deeplabv3Config.""" 63 | 64 | def __init__(self, height=576, width=576): 65 | 66 | # assert output_stride in [8,] #,16] <--- support to be added 67 | assert height % 32 == 0 and width % 32 == 0, 'Height and width both should be a multiple of 32' # noqa: E501 68 | self.height = height 69 | self.width = width 70 | self.input_shape = (self.height, self.width, 3) 71 | 72 | 73 | def log_UNet_config_params(config): 74 | 75 | assert isinstance(config, 76 | UNetConfig), 'config not supported {}'.format(config) 77 | # config_data = vars(config) 78 | # log_params(config_data) 79 | 80 | 81 | class UNetPredictor(object): 82 | """docstring for Deeplabv3Predictor.""" 83 | 84 | def __init__(self, height, width, model, preprocessing_func, dcrf): 85 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 86 | self.height = height 87 | self.width = width 88 | self.model = model 89 | self.preprocessing_func = preprocessing_func 90 | self.allow_dcrf = dcrf 91 | 92 | def load_image(self, image_path): 93 | img_array = np.array( 94 | tf.keras.preprocessing.image.load_img(path=image_path)) 95 | # print(img_array.shape,img_array.dtype) 96 | return img_array 97 | 98 | def predict(self, image): 99 | im = self.load_image(image) 100 | 101 | image = cv2.resize(im, (self.width, self.height)) 102 | image_array = np.expand_dims(image, axis=0) 103 | 104 | images_batch = self.preprocessing_func(image_array) 105 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 106 | # print(images_batch.shape, images_batch.dtype) 107 | 108 | y = self.model.predict(images_batch)[0] 109 | y = np.argmax(y, axis=-1) 110 | 111 | if self.allow_dcrf: 112 | # return densecrf(image.astype(np.uint8), y) 113 | return do_crf(image.astype(np.uint8), y) 114 | 115 | return 116 | -------------------------------------------------------------------------------- /cral/models/object_detection/retinanet/losses.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | 4 | 5 | def focal(alpha=0.25, gamma=2.0): 6 | """Create a functor for computing the focal loss. 7 | 8 | Args 9 | alpha: Scale the focal weight with alpha. 10 | gamma: Take the power of the focal weight with gamma. 11 | 12 | Returns 13 | A functor that computes the focal loss using the alpha and gamma. 14 | """ 15 | 16 | def _focal(y_true, y_pred): 17 | """Compute the focal loss given the target tensor and the predicted 18 | tensor. 19 | 20 | As defined in https://arxiv.org/abs/1708.02002 21 | 22 | Args 23 | y_true: Tensor of target data from the generator with shape 24 | (B, N, num_classes). 25 | y_pred: Tensor of predicted data from the network with shape 26 | (B, N, num_classes). 27 | 28 | Returns 29 | The focal loss of y_pred w.r.t. y_true. 30 | """ 31 | # -1 for ignore, 0 for background, 1 for object 32 | labels = y_true[:, :, :-1] 33 | anchor_state = y_true[:, :, -1] 34 | classification = y_pred 35 | 36 | # filter out "ignore" anchors 37 | indices = tf.where(keras.backend.not_equal(anchor_state, -1)) 38 | labels = tf.gather_nd(labels, indices) 39 | classification = tf.gather_nd(classification, indices) 40 | 41 | # compute the focal loss 42 | alpha_factor = keras.backend.ones_like(labels) * alpha 43 | alpha_factor = tf.where( 44 | keras.backend.equal(labels, 1), alpha_factor, 1 - alpha_factor) 45 | focal_weight = tf.where( 46 | keras.backend.equal(labels, 1), 1 - classification, classification) 47 | focal_weight = alpha_factor * focal_weight**gamma 48 | 49 | cls_loss = focal_weight * keras.backend.binary_crossentropy( 50 | labels, classification) 51 | 52 | # compute the normalizer: the number of positive anchors 53 | normalizer = tf.where(keras.backend.equal(anchor_state, 1)) 54 | normalizer = keras.backend.cast( 55 | keras.backend.shape(normalizer)[0], keras.backend.floatx()) 56 | normalizer = keras.backend.maximum( 57 | keras.backend.cast_to_floatx(1.0), normalizer) 58 | 59 | return keras.backend.sum(cls_loss) / normalizer 60 | 61 | return _focal 62 | 63 | 64 | def smooth_l1(sigma=3.0): 65 | """Create a smooth L1 loss functor. 66 | 67 | Args 68 | sigma: This argument defines the point where the loss changes from 69 | L2 to L1. 70 | 71 | Returns 72 | A functor for computing the smooth L1 loss given target data and 73 | predicted data. 74 | """ 75 | sigma_squared = sigma**2 76 | 77 | def _smooth_l1(y_true, y_pred): 78 | """Compute the smooth L1 loss of y_pred w.r.t. y_true. 79 | 80 | Args 81 | y_true: Tensor from the generator of shape (B, N, 5). 82 | The last value for each box is the state of the anchor 83 | (ignore, negative, positive). 84 | y_pred: Tensor from the network of shape (B, N, 4). 85 | 86 | Returns 87 | The smooth L1 loss of y_pred w.r.t. y_true. 88 | """ 89 | # separate target and state 90 | regression = y_pred 91 | regression_target = y_true[:, :, :-1] 92 | anchor_state = y_true[:, :, -1] 93 | 94 | # filter out "ignore" anchors 95 | indices = tf.where(keras.backend.equal(anchor_state, 1)) 96 | regression = tf.gather_nd(regression, indices) 97 | regression_target = tf.gather_nd(regression_target, indices) 98 | 99 | # compute smooth L1 loss 100 | # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma 101 | # |x| - 0.5 / sigma / sigma otherwise 102 | regression_diff = regression - regression_target 103 | regression_diff = keras.backend.abs(regression_diff) 104 | regression_loss = tf.where( 105 | keras.backend.less(regression_diff, 1.0 / sigma_squared), 106 | 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2), 107 | regression_diff - 0.5 / sigma_squared) 108 | 109 | # compute the normalizer: the number of positive anchors 110 | normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) 111 | normalizer = keras.backend.cast( 112 | normalizer, dtype=keras.backend.floatx()) 113 | return keras.backend.sum(regression_loss) / normalizer 114 | 115 | return _smooth_l1 116 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/SegNet/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from cral.models.semantic_segmentation.utils import do_crf 5 | # from cral.tracking import log_params 6 | from tensorflow.keras import backend as K 7 | from tensorflow.keras.layers import InputSpec, Layer 8 | 9 | 10 | class Expand_Dims(Layer): 11 | """docstring for Expand_Dims.""" 12 | 13 | def call(self, inputs): 14 | outputs = K.expand_dims(inputs, 1) 15 | return outputs 16 | 17 | def compute_output_shape(self, input_shape): 18 | input_shape = tf.TensorShape(input_shape).as_list() 19 | return tf.TensorShape([input_shape[0], 1] + input_shape[1:]) 20 | 21 | 22 | class Upsample(Layer): 23 | """Image Upsample layer. 24 | 25 | Resize the batched image input to target height and width. The input should 26 | be a 4-D tensor in the format of NHWC. 27 | Arguments: 28 | height: Integer, the height of the output shape. 29 | width: Integer, the width of the output shape. 30 | interpolation: String, the interpolation method. Defaults to `bilinear`. 31 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 32 | `lanczos5`, `gaussian`, `mitchellcubic` 33 | name: A string, the name of the layer. 34 | """ 35 | 36 | def __init__( 37 | self, 38 | height, 39 | width, 40 | interpolation='bilinear', 41 | # name='Upsample', 42 | **kwargs): 43 | self.target_height = height 44 | self.target_width = width 45 | self.interpolation = interpolation 46 | # self._interpolation_method = get_interpolation(interpolation) 47 | self.input_spec = InputSpec(ndim=4) 48 | super(Upsample, self).__init__(**kwargs) 49 | 50 | def call(self, inputs): 51 | outputs = tf.image.resize( 52 | images=inputs, 53 | size=[self.target_height, self.target_width], 54 | method=self.interpolation) 55 | return outputs 56 | 57 | def compute_output_shape(self, input_shape): 58 | input_shape = tf.TensorShape(input_shape).as_list() 59 | return tf.TensorShape([ 60 | input_shape[0], self.target_height, self.target_width, 61 | input_shape[3] 62 | ]) 63 | 64 | def get_config(self): 65 | config = { 66 | 'height': self.target_height, 67 | 'width': self.target_width, 68 | 'interpolation': self.interpolation, 69 | } 70 | base_config = super(Upsample, self).get_config() 71 | return dict(list(base_config.items()) + list(config.items())) 72 | 73 | 74 | class SegNetConfig(object): 75 | """docstring for Deeplabv3Config.""" 76 | 77 | def __init__(self, height=576, width=576, num_upsample_layers=3): 78 | self.height = height 79 | self.width = width 80 | self.num_upsample_layers = num_upsample_layers 81 | assert num_upsample_layers in [ 82 | 2, 3, 4, 5 83 | ], 'num_upsample_layers should be in [2, 3, 4, 5]' 84 | assert height % 32 == 0 and width % 32 == 0, 'height and width should be multiple of 32' # noqa: E501 85 | self.input_shape = (self.height, self.width, 3) 86 | 87 | 88 | def log_SegNet_config_params(config): 89 | 90 | assert isinstance(config, 91 | SegNetConfig), 'config not supported {}'.format(config) 92 | # config_data = vars(config) 93 | # log_params(config_data) 94 | 95 | 96 | class SegNetPredictor(object): 97 | """docstring for Deeplabv3Predictor.""" 98 | 99 | def __init__(self, height, width, model, preprocessing_func, dcrf): 100 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 101 | self.height = height 102 | self.width = width 103 | self.model = model 104 | self.preprocessing_func = preprocessing_func 105 | self.allow_dcrf = dcrf 106 | 107 | def load_image(self, image_path): 108 | img_array = np.array( 109 | tf.keras.preprocessing.image.load_img(path=image_path)) 110 | # print(img_array.shape,img_array.dtype) 111 | return img_array 112 | 113 | def predict(self, image): 114 | im = self.load_image(image) 115 | 116 | image = cv2.resize(im, (self.width, self.height)) 117 | image_array = np.expand_dims(image, axis=0) 118 | 119 | images_batch = self.preprocessing_func(image_array) 120 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 121 | # print(images_batch.shape, images_batch.dtype) 122 | 123 | y = self.model.predict(images_batch)[0] 124 | y = np.argmax(y, axis=-1) 125 | 126 | if self.allow_dcrf: 127 | # return densecrf(image.astype(np.uint8), y) 128 | return do_crf(image.astype(np.uint8), y) 129 | 130 | return y 131 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/deeplabv3/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from cral.models.semantic_segmentation.utils import do_crf 5 | # from cral.tracking import log_params 6 | from tensorflow.keras import backend as K 7 | from tensorflow.keras.layers import InputSpec, Layer 8 | 9 | 10 | class Expand_Dims(Layer): 11 | """docstring for Expand_Dims.""" 12 | 13 | def call(self, inputs): 14 | outputs = K.expand_dims(inputs, 1) 15 | return outputs 16 | 17 | def compute_output_shape(self, input_shape): 18 | input_shape = tf.TensorShape(input_shape).as_list() 19 | return tf.TensorShape([input_shape[0], 1] + input_shape[1:]) 20 | 21 | 22 | class Upsample(Layer): 23 | """Image Upsample layer. 24 | 25 | Resize the batched image input to target height and width. The input should 26 | be a 4-D tensor in the format of NHWC. 27 | Arguments: 28 | height: Integer, the height of the output shape. 29 | width: Integer, the width of the output shape. 30 | interpolation: String, the interpolation method. Defaults to `bilinear`. 31 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 32 | `lanczos5`, `gaussian`, `mitchellcubic` 33 | name: A string, the name of the layer. 34 | """ 35 | 36 | def __init__( 37 | self, 38 | height, 39 | width, 40 | interpolation='bilinear', 41 | # name='Upsample', 42 | **kwargs): 43 | self.target_height = height 44 | self.target_width = width 45 | self.interpolation = interpolation 46 | # self._interpolation_method = get_interpolation(interpolation) 47 | self.input_spec = InputSpec(ndim=4) 48 | super(Upsample, self).__init__(**kwargs) 49 | 50 | def call(self, inputs): 51 | outputs = tf.image.resize( 52 | images=inputs, 53 | size=[self.target_height, self.target_width], 54 | method=self.interpolation) 55 | return outputs 56 | 57 | def compute_output_shape(self, input_shape): 58 | input_shape = tf.TensorShape(input_shape).as_list() 59 | return tf.TensorShape([ 60 | input_shape[0], self.target_height, self.target_width, 61 | input_shape[3] 62 | ]) 63 | 64 | def get_config(self): 65 | config = { 66 | 'height': self.target_height, 67 | 'width': self.target_width, 68 | 'interpolation': self.interpolation, 69 | } 70 | base_config = super(Upsample, self).get_config() 71 | return dict(list(base_config.items()) + list(config.items())) 72 | 73 | 74 | class Deeplabv3Config(object): 75 | """docstring for Deeplabv3Config.""" 76 | 77 | def __init__(self, height=576, width=576, output_stride=8): 78 | self.height = height 79 | self.width = width 80 | 81 | # assert output_stride in [8,] #,16] <--- support to be added 82 | assert output_stride in [ 83 | 8, 84 | ], 'Supported output stride -> 8' 85 | self.output_stride = output_stride 86 | 87 | self.atrous_rates = (12, 24, 36) 88 | 89 | if output_stride == 16: 90 | self.atrous_rates = [x // 2 for x in self.atrous_rates] 91 | 92 | self.input_shape = (self.height, self.width, 3) 93 | 94 | 95 | def log_deeplabv3_config_params(config): 96 | 97 | assert isinstance( 98 | config, Deeplabv3Config), 'config not supported {}'.format(config) 99 | # config_data = vars(config) 100 | # log_params(config_data) 101 | 102 | 103 | class Deeplabv3Predictor(object): 104 | """docstring for Deeplabv3Predictor.""" 105 | 106 | def __init__(self, height, width, model, preprocessing_func, dcrf): 107 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 108 | self.height = height 109 | self.width = width 110 | self.model = model 111 | self.preprocessing_func = preprocessing_func 112 | self.allow_dcrf = dcrf 113 | 114 | def load_image(self, image_path): 115 | img_array = np.array( 116 | tf.keras.preprocessing.image.load_img(path=image_path)) 117 | # print(img_array.shape,img_array.dtype) 118 | return img_array 119 | 120 | def predict(self, image): 121 | im = self.load_image(image) 122 | 123 | image = cv2.resize(im, (self.width, self.height)) 124 | image_array = np.expand_dims(image, axis=0) 125 | 126 | images_batch = self.preprocessing_func(image_array) 127 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 128 | # print(images_batch.shape, images_batch.dtype) 129 | 130 | y = self.model.predict(images_batch)[0] 131 | y = np.argmax(y, axis=-1) 132 | 133 | if self.allow_dcrf: 134 | # return densecrf(image.astype(np.uint8), y) 135 | return do_crf(image.astype(np.uint8), y) 136 | 137 | return y 138 | -------------------------------------------------------------------------------- /test/test_object_detection.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import tempfile 4 | import unittest 5 | import zipfile 6 | 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | 12 | class Test_DetectionPipeline(unittest.TestCase): 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | 17 | zip_url = 'https://segmind-data.s3.ap-south-1.amazonaws.com/edge/data/aerial-vehicles-dataset.zip' 18 | path_to_zip_file = tf.keras.utils.get_file( 19 | 'aerial-vehicles-dataset.zip', 20 | zip_url, 21 | cache_dir=tempfile.gettempdir(), 22 | cache_subdir='', 23 | extract=False) 24 | directory_to_extract_to = os.path.join(tempfile.gettempdir(), 25 | 'aerial-vehicles-dataset') 26 | with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: 27 | zip_ref.extractall(directory_to_extract_to) 28 | 29 | cls.dataset = directory_to_extract_to 30 | 31 | def setup(self): 32 | self.dataset = cls.dataset 33 | 34 | def test_retinanet(self): 35 | from cral.pipeline import ObjectDetectionPipe 36 | from cral.models.object_detection import RetinanetConfig 37 | 38 | pipe = ObjectDetectionPipe() 39 | 40 | pipe.add_data( 41 | train_images_dir=os.path.join(self.dataset, 'images'), 42 | train_anno_dir=os.path.join(self.dataset, 'annotations', 43 | 'pascalvoc_xml'), 44 | annotation_format='pascal_voc', 45 | split=0.2) 46 | 47 | pipe.lock_data() 48 | 49 | pipe.set_algo(feature_extractor='resnet50', config=RetinanetConfig()) 50 | 51 | pipe.train( 52 | num_epochs=2, 53 | snapshot_prefix='test_retinanet', 54 | snapshot_path='/tmp', 55 | snapshot_every_n=10, 56 | batch_size=1, 57 | steps_per_epoch=2) 58 | 59 | tf.keras.backend.clear_session() 60 | 61 | def test_yolov3(self): 62 | from cral.pipeline import ObjectDetectionPipe 63 | from cral.models.object_detection import YoloV3Config 64 | 65 | pipe = ObjectDetectionPipe() 66 | 67 | pipe.add_data( 68 | train_images_dir=os.path.join(self.dataset, 'images'), 69 | train_anno_dir=os.path.join(self.dataset, 'annotations', 70 | 'pascalvoc_xml'), 71 | annotation_format='pascal_voc', 72 | split=0.2) 73 | 74 | pipe.lock_data() 75 | 76 | pipe.set_algo(feature_extractor='darknet53', config=YoloV3Config()) 77 | 78 | pipe.train( 79 | num_epochs=2, 80 | snapshot_prefix='test_yolov3', 81 | snapshot_path='/tmp', 82 | snapshot_every_n=10, 83 | batch_size=1, 84 | steps_per_epoch=2) 85 | 86 | tf.keras.backend.clear_session() 87 | 88 | def test_ssd(self): 89 | from cral.pipeline import ObjectDetectionPipe 90 | from cral.models.object_detection import SSD300Config 91 | 92 | pipe = ObjectDetectionPipe() 93 | 94 | pipe.add_data( 95 | train_images_dir=os.path.join(self.dataset, 'images'), 96 | train_anno_dir=os.path.join(self.dataset, 'annotations', 97 | 'pascalvoc_xml'), 98 | annotation_format='pascal_voc', 99 | split=0.2) 100 | 101 | pipe.lock_data() 102 | 103 | pipe.set_algo(feature_extractor='vgg16', config=SSD300Config()) 104 | 105 | pipe.train( 106 | num_epochs=2, 107 | snapshot_prefix='test_ssd', 108 | snapshot_path='/tmp', 109 | snapshot_every_n=10, 110 | batch_size=1, 111 | steps_per_epoch=2) 112 | 113 | tf.keras.backend.clear_session() 114 | 115 | def test_fasterrcnn(self): 116 | from cral.pipeline import ObjectDetectionPipe 117 | from cral.models.object_detection import FasterRCNNConfig 118 | 119 | pipe = ObjectDetectionPipe() 120 | 121 | pipe.add_data( 122 | train_images_dir=os.path.join(self.dataset, 'images'), 123 | train_anno_dir=os.path.join(self.dataset, 'annotations', 124 | 'pascalvoc_xml'), 125 | annotation_format='pascal_voc', 126 | split=0.2) 127 | 128 | meta_info = pipe.lock_data() 129 | 130 | pipe.set_algo( 131 | feature_extractor='resnet101', 132 | config=FasterRCNNConfig(height=256, width=256), 133 | weights='imagenet') 134 | 135 | pipe.train( 136 | num_epochs=2, 137 | snapshot_prefix='test_fasterrcnn', 138 | snapshot_path='/tmp', 139 | snapshot_every_n=1, 140 | steps_per_epoch=2) 141 | 142 | tf.keras.backend.clear_session() 143 | 144 | 145 | if __name__ == '__main__': 146 | unittest.main() 147 | -------------------------------------------------------------------------------- /cral/models/classification/classification_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow import keras 4 | from tensorflow.keras.layers import (Activation, BatchNormalization, Dense, 5 | Dropout, GlobalAveragePooling2D) 6 | 7 | 8 | def BatchNorm_dense(input_tensor, activation_fn, num_units, dropout): 9 | x = Dense(num_units, activation=None)(input_tensor) 10 | x = BatchNormalization()(x) 11 | x = Activation(activation_fn)(x) 12 | 13 | if dropout is not None: 14 | x = Dropout(dropout)(x) 15 | return x 16 | 17 | 18 | def densely_connected_head(feature_extractor_model, fully_connected_layer, 19 | dropout_rate, hidden_layer_Activation): 20 | """Adds densely connected head to backbone. 21 | 22 | Args: 23 | feature_extractor_model (keras.Model): the backbone model 24 | fully_connected_layer (list, tuple): A list or tuple indicating 25 | number of neurons per hidden layer 26 | dropout_rate (list, tuple): A list or tuple indicating the dropout 27 | rate per hidden layer 28 | hidden_layer_Activation (list, tuple): A list or tuple indicating the 29 | activation function per hidden layer 30 | 31 | Returns: 32 | tf.tensor: tensor of the final hidden layer to which output layer 33 | should be attached. 34 | """ 35 | base_model_tensor = feature_extractor_model.output 36 | 37 | x = GlobalAveragePooling2D()(base_model_tensor) 38 | 39 | for index, unit in enumerate(fully_connected_layer): 40 | 41 | if isinstance(hidden_layer_Activation, (tuple, list)): 42 | activation_fn = hidden_layer_Activation[index] 43 | else: 44 | activation_fn = hidden_layer_Activation 45 | 46 | if dropout_rate is None: 47 | dropout = None 48 | elif isinstance(dropout_rate, (tuple, list)): 49 | dropout = dropout_rate[index] 50 | else: 51 | dropout = dropout_rate 52 | 53 | x = BatchNorm_dense( 54 | input_tensor=x, 55 | activation_fn=activation_fn, 56 | num_units=unit, 57 | dropout=dropout) 58 | 59 | return x 60 | 61 | 62 | class MLPConfig: 63 | """Config for Multilayered Perceptron at the top of the model. 64 | 65 | Attributes: 66 | dropout_rate (list, tuple): A list or tuple indicating the dropout 67 | rate per hidden layer 68 | final_layer_activation (str, optional): str indicating the activation 69 | function of the final prediction layer 70 | fully_connected_layer (list, tuple): A list or tuple indicating number 71 | of neurons per hidden layer 72 | height (int): height of images 73 | hidden_layer_activation (list, tuple): A list or tuple indicating the 74 | activation function per hidden layer 75 | width (int): width of images 76 | """ 77 | 78 | def __init__(self, 79 | height, 80 | width, 81 | fully_connected_layer=[], 82 | dropout_rate=None, 83 | hidden_layer_activation='relu', 84 | final_layer_activation='softmax'): 85 | """Config for Multilayered Perceptron. 86 | 87 | Args: 88 | height (int): height of images 89 | width (int): width of images 90 | fully_connected_layer (list, tuple): A list or tuple indicating 91 | number of neurons per hidden layer 92 | dropout_rate (list, tuple): A list or tuple indicating the dropout 93 | rate per hidden layer 94 | hidden_layer_activation (list, tuple): A list or tuple indicating 95 | the activation function per hidden layer 96 | final_layer_activation (str, optional): str indicating the 97 | activation function of the final prediction layer 98 | """ 99 | self.height = height 100 | self.width = width 101 | self.fully_connected_layer = fully_connected_layer 102 | self.dropout_rate = dropout_rate 103 | self.hidden_layer_activation = hidden_layer_activation 104 | self.final_layer_activation = final_layer_activation 105 | 106 | 107 | class ClassificationPredictor(object): 108 | """docstring for ClassificationPredictor.""" 109 | 110 | def __init__(self, model, preprocessing_func, size): 111 | self.model = model 112 | self.preprocessing_func = preprocessing_func 113 | self.size = size 114 | 115 | def load_image(self, image_path): 116 | return np.array( 117 | keras.preprocessing.image.load_img(path=image_path), 118 | dtype=np.uint8) 119 | 120 | # @abstractmethod 121 | def predict(self, image_path): 122 | image_array = self.load_image(image_path) 123 | image_array = tf.image.resize(image_array, self.size) 124 | preprocessed_image = self.preprocessing_func(image_array) 125 | preprocessed_image = tf.expand_dims(preprocessed_image, axis=0) 126 | result = self.model.predict(preprocessed_image) 127 | return np.argmax(result, axis=-1), np.amax(result, axis=-1) 128 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | # import denseCRF 4 | import pydensecrf.densecrf as dcrf 5 | import tensorflow as tf 6 | from PIL import Image 7 | from PIL.ImageColor import getrgb 8 | from pydensecrf.utils import unary_from_labels 9 | 10 | # getrgb(color) 11 | 12 | STANDARD_COLORS = [ 13 | 'black', 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 14 | 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 15 | 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 16 | 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 17 | 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 18 | 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 19 | 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 20 | 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 21 | 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 22 | 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 23 | 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 24 | 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 25 | 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 26 | 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 27 | 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 28 | 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 29 | 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 30 | 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 31 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 32 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 33 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 34 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 35 | 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 36 | 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' 37 | ] 38 | 39 | 40 | def getbgr(color_name): 41 | (R, G, B) = getrgb(color_name) 42 | return (B, G, R) 43 | 44 | 45 | STANDARD_COLORS_BGR = list(map(getbgr, STANDARD_COLORS)) 46 | 47 | 48 | def annotate_image(image_path, mask_array, ignore_bg=True): 49 | alpha = 0.5 50 | assert isinstance(mask_array, np.ndarray) 51 | mask_array = mask_array.astype(np.uint8) 52 | 53 | H, W = mask_array.shape 54 | 55 | # image_array = cv2.imread(image_path) 56 | image_array = np.array(Image.open(image_path)).astype(np.uint8) 57 | image_array = cv2.resize(image_array, (W, H)) 58 | 59 | img_color = image_array.copy() 60 | for i in np.unique(mask_array): 61 | if ignore_bg and i == 0: 62 | continue 63 | # if i in id_to_color: 64 | color_index = i % len(STANDARD_COLORS_BGR) 65 | img_color[mask_array == i] = STANDARD_COLORS_BGR[color_index] 66 | 67 | cv2.addWeighted(image_array, alpha, img_color, 1 - alpha, 0, img_color) 68 | 69 | return Image.fromarray(img_color) 70 | 71 | 72 | # Fully connected CRF post processing function 73 | def do_crf(im, mask, zero_unsure=True): 74 | colors, labels = np.unique(mask, return_inverse=True) 75 | image_size = mask.shape[:2] 76 | n_labels = len(set(labels.flat)) 77 | d = dcrf.DenseCRF2D(image_size[1], image_size[0], 78 | n_labels) # width, height, nlabels 79 | try: 80 | U = unary_from_labels( 81 | labels, n_labels, gt_prob=.7, zero_unsure=zero_unsure) 82 | except ZeroDivisionError: 83 | print("couldn't perform crf") 84 | return mask 85 | d.setUnaryEnergy(U) 86 | # This adds the color-independent term, features are the locations only. 87 | d.addPairwiseGaussian(sxy=(3, 3), compat=3) 88 | # This adds the color-dependent term, i.e. features are (x,y,r,g,b). 89 | # im is an image-array, e.g. im.dtype == np.uint8 and 90 | # im.shape == (640,480,3) 91 | d.addPairwiseBilateral( 92 | sxy=80, srgb=13, rgbim=im.astype('uint8'), compat=10) 93 | Q = d.inference(5) # 5 - num of iterations 94 | MAP = np.argmax(Q, axis=0).reshape(image_size) 95 | unique_map = np.unique(MAP) 96 | for u in unique_map: # get original labels back 97 | np.putmask(MAP, MAP == u, colors[u]) 98 | return MAP 99 | 100 | 101 | class SparseMeanIoU(tf.keras.metrics.MeanIoU): 102 | 103 | def __init__(self, num_classes=None, name='mean_iou', dtype=None): 104 | super(SparseMeanIoU, self).__init__( 105 | num_classes=num_classes, name=name, dtype=dtype) 106 | self.num_classes = num_classes 107 | 108 | def update_state(self, y_true, y_pred, sample_weight=None): 109 | y_pred = tf.math.argmax(y_pred, axis=-1) 110 | return super().update_state(y_true, y_pred, sample_weight) 111 | 112 | def get_config(self): 113 | config = {'num_classes': self.num_classes} 114 | base_config = super(SparseMeanIoU, self).get_config() 115 | return dict(list(base_config.items()) + list(config.items())) 116 | 117 | @classmethod 118 | def from_config(cls, config): 119 | return cls(**config) 120 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/UnetPlusPlus/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from cral.models.semantic_segmentation.utils import do_crf # noqa: F401 5 | # from segmind import log_params 6 | from tensorflow.keras import backend as K 7 | from tensorflow.keras.layers import Conv2D, Dropout, InputSpec, Layer 8 | 9 | 10 | class Expand_Dims(Layer): 11 | """docstring for Expand_Dims.""" 12 | 13 | def call(self, inputs): 14 | outputs = K.expand_dims(inputs, 1) 15 | return outputs 16 | 17 | def compute_output_shape(self, input_shape): 18 | input_shape = tf.TensorShape(input_shape).as_list() 19 | return tf.TensorShape([input_shape[0], 1] + input_shape[1:]) 20 | 21 | 22 | class Upsample(Layer): 23 | """Image Upsample layer. 24 | 25 | Resize the batched image input to target height and width. The input 26 | should be a 4-D tensor in the format of NHWC. 27 | Arguments: 28 | height: Integer, the height of the output shape. 29 | width: Integer, the width of the output shape. 30 | interpolation: String, the interpolation method. Defaults to `bilinear`. 31 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 32 | `lanczos5`, `gaussian`, `mitchellcubic` 33 | name: A string, the name of the layer. 34 | """ 35 | 36 | def __init__( 37 | self, 38 | height, 39 | width, 40 | interpolation='bilinear', 41 | # name='Upsample', 42 | **kwargs): 43 | self.target_height = height 44 | self.target_width = width 45 | self.interpolation = interpolation 46 | self.input_spec = InputSpec(ndim=4) 47 | super(Upsample, self).__init__(**kwargs) 48 | 49 | def call(self, inputs): 50 | outputs = tf.image.resize( 51 | images=inputs, 52 | size=[self.target_height, self.target_width], 53 | method=self.interpolation) 54 | return outputs 55 | 56 | def compute_output_shape(self, input_shape): 57 | input_shape = tf.TensorShape(input_shape).as_list() 58 | return tf.TensorShape([ 59 | input_shape[0], self.target_height, self.target_width, 60 | input_shape[3] 61 | ]) 62 | 63 | def get_config(self): 64 | config = { 65 | 'height': self.target_height, 66 | 'width': self.target_width, 67 | 'interpolation': self.interpolation, 68 | } 69 | base_config = super(Upsample, self).get_config() 70 | return dict(list(base_config.items()) + list(config.items())) 71 | 72 | 73 | def standard_unit(input_tensor, stage, nb_filter, kernel_size=3): 74 | act = 'relu' 75 | dropout_rate = 0.5 76 | x = Conv2D( 77 | nb_filter, (kernel_size, kernel_size), 78 | activation=act, 79 | name='conv' + stage + '_1', 80 | padding='same')( 81 | input_tensor) 82 | x = Dropout(dropout_rate, name='dp' + stage + '_1')(x) 83 | x = Conv2D( 84 | nb_filter, (kernel_size, kernel_size), 85 | activation=act, 86 | name='conv' + stage + '_2', 87 | padding='same')( 88 | x) 89 | x = Dropout(dropout_rate, name='dp' + stage + '_2')(x) 90 | return x 91 | 92 | 93 | class UnetPlusPlusConfig(object): 94 | """docstring for Deeplabv3Config.""" 95 | 96 | def __init__(self, 97 | height=320, 98 | width=320, 99 | num_upsample_layers=5, 100 | filters=[64, 128, 256, 512, 1024], 101 | deep_supervision=True): 102 | self.height = height 103 | self.width = width 104 | self.num_upsample_layers = num_upsample_layers 105 | self.filters = filters 106 | self.deep_supervision = deep_supervision 107 | assert len( 108 | filters 109 | ) >= num_upsample_layers, 'filters should be a list with length >= num_upsample_layers' # noqa: E501 110 | assert num_upsample_layers in [ 111 | 2, 3, 4, 5 112 | ], 'num_upsample_layers should be in [2, 3, 4, 5]' 113 | assert height % 32 == 0 and width % 32 == 0, 'height and width should be multiple of 32' # noqa: E501 114 | self.input_shape = (self.height, self.width, 3) 115 | 116 | 117 | def log_UnetPlusPlus_config_params(config): 118 | 119 | assert isinstance( 120 | config, UnetPlusPlusConfig), 'config not supported {}'.format(config) 121 | # config_data = vars(config) 122 | # log_params(config_data) 123 | 124 | 125 | class UnetPlusPlusPredictor(object): 126 | """docstring for Deeplabv3Predictor.""" 127 | 128 | def __init__(self, height, width, model, preprocessing_func, dcrf): 129 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 130 | self.height = height 131 | self.width = width 132 | self.model = model 133 | self.preprocessing_func = preprocessing_func 134 | self.allow_dcrf = dcrf 135 | 136 | def load_image(self, image_path): 137 | img_array = np.array( 138 | tf.keras.preprocessing.image.load_img(path=image_path)) 139 | return img_array 140 | 141 | def predict(self, image): 142 | im = self.load_image(image) 143 | 144 | image = cv2.resize(im, (self.width, self.height)) 145 | image_array = np.expand_dims(image, axis=0) 146 | 147 | images_batch = self.preprocessing_func(image_array) 148 | 149 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 150 | 151 | y = self.model.predict(images_batch)[0] 152 | y = np.argmax(y, axis=-1) 153 | 154 | return y 155 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/PspNet/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from cral.models.semantic_segmentation.utils import do_crf 5 | # from cral.tracking import log_params 6 | from tensorflow.keras import backend as K 7 | from tensorflow.keras.layers import (Activation, AveragePooling2D, 8 | BatchNormalization, Conv2D, InputSpec, 9 | Layer) 10 | 11 | 12 | class Expand_Dims(Layer): 13 | """docstring for Expand_Dims.""" 14 | 15 | def call(self, inputs): 16 | outputs = K.expand_dims(inputs, 1) 17 | return outputs 18 | 19 | def compute_output_shape(self, input_shape): 20 | input_shape = tf.TensorShape(input_shape).as_list() 21 | return tf.TensorShape([input_shape[0], 1] + input_shape[1:]) 22 | 23 | 24 | class Upsample(Layer): 25 | """Image Upsample layer. 26 | 27 | Resize the batched image input to target height and width. The input should 28 | be a 4-D tensor in the format of NHWC. 29 | Arguments: 30 | height: Integer, the height of the output shape. 31 | width: Integer, the width of the output shape. 32 | interpolation: String, the interpolation method. Defaults to `bilinear`. 33 | Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, 34 | `lanczos5`, `gaussian`, `mitchellcubic` 35 | name: A string, the name of the layer. 36 | """ 37 | 38 | def __init__( 39 | self, 40 | height, 41 | width, 42 | interpolation='bilinear', 43 | # name='Upsample', 44 | **kwargs): 45 | self.target_height = height 46 | self.target_width = width 47 | self.interpolation = interpolation 48 | # self._interpolation_method = get_interpolation(interpolation) 49 | self.input_spec = InputSpec(ndim=4) 50 | super(Upsample, self).__init__(**kwargs) 51 | 52 | def call(self, inputs): 53 | outputs = tf.image.resize( 54 | images=inputs, 55 | size=[self.target_height, self.target_width], 56 | method=self.interpolation) 57 | return outputs 58 | 59 | def compute_output_shape(self, input_shape): 60 | input_shape = tf.TensorShape(input_shape).as_list() 61 | return tf.TensorShape([ 62 | input_shape[0], self.target_height, self.target_width, 63 | input_shape[3] 64 | ]) 65 | 66 | def get_config(self): 67 | config = { 68 | 'height': self.target_height, 69 | 'width': self.target_width, 70 | 'interpolation': self.interpolation, 71 | } 72 | base_config = super(Upsample, self).get_config() 73 | return dict(list(base_config.items()) + list(config.items())) 74 | 75 | 76 | class PspNetConfig(object): 77 | """docstring for Deeplabv3Config.""" 78 | 79 | def __init__(self, height=576, width=576, down_sample_factor=8): 80 | self.height = height 81 | self.width = width 82 | self.down_sample_factor = down_sample_factor 83 | min_size = down_sample_factor * 6 84 | is_wrong_shape = ( 85 | height % min_size != 0 or width % min_size != 0 86 | or height < min_size or width < min_size) 87 | assert down_sample_factor in [ 88 | 4, 8, 16 89 | ], 'Supported down_sample_factor -> 4, 8, 16' 90 | assert is_wrong_shape is False, 'height and width should be multiple of down_sample_factor*6' # noqa: E501 91 | self.input_shape = (self.height, self.width, 3) 92 | 93 | 94 | def log_PspNet_config_params(config): 95 | 96 | assert isinstance(config, 97 | PspNetConfig), 'config not supported {}'.format(config) 98 | # config_data = vars(config) 99 | # log_params(config_data) 100 | 101 | 102 | def pool_block(feats, pool_factor): 103 | h = K.int_shape(feats)[1] 104 | w = K.int_shape(feats)[2] 105 | pool_size = strides = [ 106 | int(np.round(float(h) / pool_factor)), 107 | int(np.round(float(w) / pool_factor)) 108 | ] 109 | 110 | x = AveragePooling2D(pool_size, strides=strides, padding='same')(feats) 111 | x = Conv2D(512, (1, 1), padding='same', use_bias=False)(x) 112 | x = BatchNormalization()(x) 113 | x = Activation('relu')(x) 114 | 115 | x = Upsample( 116 | K.int_shape(x)[1] * strides[0], 117 | K.int_shape(x)[2] * strides[1])( 118 | x) 119 | return x 120 | 121 | 122 | class PspNetPredictor(object): 123 | """docstring for Deeplabv3Predictor.""" 124 | 125 | def __init__(self, height, width, model, preprocessing_func, dcrf): 126 | # super(RetinanetPredictor, self).__init__(*args, **kwargs) 127 | self.height = height 128 | self.width = width 129 | self.model = model 130 | self.preprocessing_func = preprocessing_func 131 | self.allow_dcrf = dcrf 132 | 133 | def load_image(self, image_path): 134 | img_array = np.array( 135 | tf.keras.preprocessing.image.load_img(path=image_path)) 136 | # print(img_array.shape,img_array.dtype) 137 | return img_array 138 | 139 | def predict(self, image): 140 | im = self.load_image(image) 141 | 142 | image = cv2.resize(im, (self.width, self.height)) 143 | image_array = np.expand_dims(image, axis=0) 144 | 145 | images_batch = self.preprocessing_func(image_array) 146 | images_batch = tf.cast(images_batch, tf.keras.backend.floatx()) 147 | # print(images_batch.shape, images_batch.dtype) 148 | 149 | y = self.model.predict(images_batch)[0] 150 | y = np.argmax(y, axis=-1) 151 | 152 | if self.allow_dcrf: 153 | # return densecrf(image.astype(np.uint8), y) 154 | return do_crf(image.astype(np.uint8), y) 155 | 156 | return y 157 | -------------------------------------------------------------------------------- /cral/models/classification/darknet.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import reduce, wraps 3 | 4 | import tensorflow.keras.backend as K 5 | from tensorflow.keras.layers import (Add, AveragePooling2D, BatchNormalization, 6 | Conv2D, GlobalAveragePooling2D, 7 | GlobalMaxPooling2D, Input, LeakyReLU, 8 | ZeroPadding2D) 9 | from tensorflow.keras.models import Model 10 | from tensorflow.keras.regularizers import l2 11 | from tensorflow.keras.utils import get_file, get_source_inputs 12 | from tensorflow.python.keras.applications.imagenet_utils import \ 13 | obtain_input_shape 14 | 15 | 16 | def compose(*funcs): 17 | """Compose arbitrarily many functions, evaluated left to right. 18 | 19 | Reference: https://mathieularose.com/function-composition-in-python/ 20 | """ 21 | if funcs: 22 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 23 | else: 24 | raise ValueError('Composition of empty sequence not supported.') 25 | 26 | 27 | def preprocess_input(image_array): 28 | image_array = image_array / 255. 29 | return image_array 30 | 31 | 32 | @wraps(Conv2D) 33 | def DarknetConv2D(*args, **kwargs): 34 | """Wrapper to set Darknet parameters for Convolution2D.""" 35 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 36 | darknet_conv_kwargs = {} 37 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides') == ( 38 | 2, 2) else 'same' 39 | darknet_conv_kwargs.update(kwargs) 40 | return Conv2D(*args, **darknet_conv_kwargs) 41 | 42 | 43 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 44 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 45 | no_bias_kwargs = {'use_bias': False} 46 | no_bias_kwargs.update(kwargs) 47 | return compose( 48 | DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), 49 | LeakyReLU(alpha=0.1)) 50 | 51 | 52 | def resblock_body(x, num_filters, num_blocks): 53 | """A series of resblocks starting with a downsampling Convolution2D.""" 54 | # Darknet uses left and top padding instead of 'same' mode 55 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 56 | x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) 57 | for i in range(num_blocks): 58 | y = compose( 59 | DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)), 60 | DarknetConv2D_BN_Leaky(num_filters, (3, 3)))( 61 | x) 62 | x = Add()([x, y]) 63 | return x 64 | 65 | 66 | def darknet_body(inputs): 67 | """Darknent body having 52 Convolution2D layers.""" 68 | x = DarknetConv2D_BN_Leaky(32, (3, 3))(inputs) 69 | x = resblock_body(x, 64, 1) 70 | x = resblock_body(x, 128, 2) 71 | x = resblock_body(x, 256, 8) 72 | x = resblock_body(x, 512, 8) 73 | x = resblock_body(x, 1024, 4) 74 | return x 75 | 76 | 77 | def Darknet53(input_shape=None, 78 | input_tensor=None, 79 | include_top=False, 80 | weights='imagenet', 81 | pooling=None, 82 | classes=1000, 83 | classifier_activation='softmax', 84 | **kwargs): 85 | """Generate darknet53 model for Imagenet classification.""" 86 | 87 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 88 | raise ValueError('The `weights` argument should be either ' 89 | '`None` (random initialization), `imagenet` ' 90 | '(pre-training on ImageNet), ' 91 | 'or the path to the weights file to be loaded.') 92 | 93 | if weights == 'imagenet' and include_top and classes != 1000: 94 | raise ValueError( 95 | 'If using `weights` as `"imagenet"` with `include_top`' 96 | ' as true, `classes` should be 1000') 97 | 98 | # Determine proper input shape 99 | input_shape = obtain_input_shape( 100 | input_shape, 101 | default_size=416, # multiple of 32 only 102 | min_size=28, 103 | data_format=K.image_data_format(), 104 | require_flatten=include_top, 105 | weights=weights) 106 | 107 | if input_tensor is None: 108 | img_input = Input(shape=input_shape) 109 | else: 110 | img_input = input_tensor 111 | 112 | x = darknet_body(img_input) 113 | 114 | if include_top: 115 | model_name = 'darknet53' 116 | x = AveragePooling2D(pool_size=(3, 3), strides=None, padding='same')(x) 117 | x = Conv2D( 118 | classes, (1, 1), padding='same', activation=classifier_activation)( 119 | x) 120 | else: 121 | model_name = 'darknet53_headless' 122 | if pooling == 'avg': 123 | x = GlobalAveragePooling2D(name='avg_pool')(x) 124 | elif pooling == 'max': 125 | x = GlobalMaxPooling2D(name='max_pool')(x) 126 | 127 | # Ensure that the model takes into account 128 | # any potential predecessors of `input_tensor`. 129 | if input_tensor is not None: 130 | inputs = get_source_inputs(input_tensor) 131 | else: 132 | inputs = img_input 133 | 134 | # Create model. 135 | model = Model(inputs, x, name=model_name) 136 | 137 | # Load weights. 138 | if weights == 'imagenet': 139 | if include_top: 140 | url = 'https://segmind-data.s3.ap-south-1.amazonaws.com/edge/transfer-learning/classification/darknet53_weights.h5' # noqa: E501 141 | else: 142 | url = 'https://segmind-data.s3.ap-south-1.amazonaws.com/edge/transfer-learning/classification/darknet53_notop_weights.h5' # noqa: E501 143 | 144 | weights_file = get_file('darknet53_weights.h5', url) 145 | file_path = os.path.join(weights_file) 146 | model.load_weights(file_path) 147 | 148 | elif weights is not None: 149 | model.load_weights(weights) 150 | 151 | return model 152 | -------------------------------------------------------------------------------- /cral/augmentations/engine.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | import numpy as np 4 | from albumentations import BboxParams, Compose 5 | 6 | 7 | class Base(ABC): 8 | 9 | def __init__(self, aug): 10 | self.aug = aug 11 | 12 | @abstractmethod 13 | def apply(): 14 | pass 15 | 16 | 17 | class Classification(Base): 18 | """Class to apply augentations to Classification datasets. 19 | 20 | Attributes: 21 | aug (list(augmentations)): List of Albumentations Augmentations 22 | augmentation (function): function to apply list of agumentations on 23 | images 24 | p (float): probability of applying the transformation 25 | """ 26 | 27 | def __init__(self, aug, p=0.5): 28 | """ 29 | 30 | Args: 31 | aug (list(augmentations)): List of Albumentations Augmentations 32 | p (float, optional): probability of applying the transformation 33 | """ 34 | self.aug = aug 35 | self.p = p 36 | self.augmentation = self.get_aug() 37 | 38 | def get_aug(self): 39 | """returns a function that applies the list of transformations. 40 | 41 | Returns: 42 | function: function to apply list of agumentations on images 43 | """ 44 | return Compose(transforms=self.aug, p=self.p) 45 | 46 | def apply(self, image): 47 | """Applies the augmentation on image. 48 | 49 | Args: 50 | image (nparray): input image 51 | 52 | Returns: 53 | image: Augmented image 54 | """ 55 | return self.augmentation(image=image)['image'] 56 | 57 | 58 | class ObjectDetection(Base): 59 | """Class to apply augentations to ObjectDetection datasets. 60 | 61 | Attributes: 62 | annotation_format (str): can be one of 'coco','pascal' 63 | aug (list(augmentations)): List of Albumentations Augmentations 64 | p (float): probability of applying the transformation 65 | """ 66 | 67 | def __init__(self, aug, annotation_format, p=0.5): 68 | """ 69 | 70 | Args: 71 | aug (list(augmentations)): List of Albumentations Augmentations 72 | annotation_format (str): can be one of 'coco','pascal' 73 | p (float , optional): probability of applying the transformation 74 | """ 75 | self.aug = aug 76 | self.p = p 77 | 78 | assert annotation_format in ('coco', 'pascal_voc', 'yolo') 79 | self.annotation_format = annotation_format 80 | self.augmentation = self.get_aug('category_id') 81 | 82 | def get_aug(self, label_field, min_area=0.0, min_visibility=0.0): 83 | """returns a function that applies the list of transformations. 84 | 85 | Args: 86 | label_field (str): The feild in the dictionary that contains the 87 | name labels 88 | min_area (float, optional): minimum area of bbox that is considered 89 | min_visibility (float, optional): minimum area of bbox to be 90 | visible 91 | 92 | Returns: 93 | function: function to apply list of agumentations on images and 94 | bboxes 95 | """ 96 | return Compose( 97 | self.aug, 98 | bbox_params=BboxParams( 99 | format=self.annotation_format, 100 | min_area=min_area, 101 | min_visibility=min_visibility, 102 | label_fields=[label_field]), 103 | p=self.p) 104 | 105 | def apply(self, image, bboxes, labels): 106 | """applies augentations to ObjectDetection datasets. 107 | 108 | Args: 109 | image (list(images)): list of images 110 | bboxes (list(bboxes)): list of boxxes 111 | labels (list(labels)): list of labels 112 | annotation_format (str): can be one of 'coco','pascal_voc' 113 | 114 | Returns: 115 | list(images),list(bbox_,list(labels):return list of images bboxes 116 | and labels 117 | """ 118 | # augmentation=self.get_aug(annotation_format,'category_id') 119 | annotation = {'image': image, 'bboxes': bboxes, 'category_id': labels} 120 | augmented_annotation = self.augmentation(**annotation) 121 | return augmented_annotation['image'], np.array( 122 | augmented_annotation['bboxes']), np.array( 123 | augmented_annotation['category_id']) 124 | 125 | 126 | class Segmentation(Base): 127 | """Class to apply augentations to ObjectDetection datasets. 128 | 129 | Attributes: 130 | aug (list(augmentations)): List of Albumentations Augmentations 131 | augmentation (function): function to apply list of agumentations 132 | on images 133 | p (float): probability of applying the transformation 134 | """ 135 | 136 | def __init__(self, aug, p=0.5): 137 | """ 138 | 139 | Args: 140 | aug (list(augmentations)): List of Albumentations Augmentations 141 | p (float, optional): probability of applying the transformation 142 | """ 143 | self.aug = aug 144 | self.p = p 145 | self.augmentation = self.get_aug() 146 | 147 | def get_aug(self): 148 | """returns a function that applies the list of transformations 149 | Returns: 150 | function: function to apply list of agumentations on images and 151 | bboxes 152 | """ 153 | return Compose(self.aug, p=self.p) 154 | 155 | def apply(self, image, mask): 156 | """applies augentations to Segmentation datasets. 157 | 158 | Args: 159 | image (list(images)): list of images 160 | mask (list(masks)): list of masks 161 | 162 | Returns: 163 | list(image),list(mask): list of augmented images and masks 164 | """ 165 | augmented = self.augmentation(image=image, mask=mask) 166 | return augmented['image'], augmented['mask'] 167 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/utils/matching_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities to match ground truth boxes to anchor boxes. 2 | 3 | Copyright (C) 2018 Pierluigi Ferrari 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from __future__ import division 19 | 20 | import numpy as np 21 | 22 | 23 | def match_bipartite_greedy(weight_matrix): 24 | """Returns a bipartite matching according to the given weight matrix. 25 | 26 | The algorithm works as follows: 27 | 28 | Let the first axis of `weight_matrix` represent ground truth boxes 29 | and the second axis anchor boxes. 30 | The ground truth box that has the greatest similarity with any 31 | anchor box will be matched first, then out of the remaining ground 32 | truth boxes, the ground truth box that has the greatest similarity 33 | with any of the remaining anchor boxes will be matched second, and 34 | so on. That is, the ground truth boxes will be matched in descending 35 | order by maximum similarity with any of the respectively remaining 36 | anchor boxes. 37 | The runtime complexity is O(m^2 * n), where `m` is the number of 38 | ground truth boxes and `n` is the number of anchor boxes. 39 | 40 | Arguments: 41 | weight_matrix (array): A 2D Numpy array that represents the weight 42 | matrix for the matching process. If `(m,n)` is the shape of the 43 | weight matrix, it must be `m <= n`. The weights can be integers or 44 | floating point numbers. The matching process will maximize, i.e. 45 | larger weights are preferred over smaller weights. 46 | 47 | Returns: 48 | A 1D Numpy array of length `weight_matrix.shape[0]` that represents 49 | the matched index along the second axis of `weight_matrix` for each 50 | index along the first axis. 51 | """ 52 | 53 | weight_matrix = np.copy(weight_matrix) # We'll modify this array. 54 | num_ground_truth_boxes = weight_matrix.shape[0] 55 | all_gt_indices = list(range( 56 | num_ground_truth_boxes)) # Only relevant for fancy-indexing below. 57 | 58 | # This 1D array will contain for each ground truth box the index of 59 | # the matched anchor box. 60 | matches = np.zeros(num_ground_truth_boxes, dtype=np.int) 61 | 62 | # In each iteration of the loop below, exactly one ground truth box 63 | # will be matched to one anchor box. 64 | for _ in range(num_ground_truth_boxes): 65 | 66 | # Find the maximal anchor-ground truth pair in two steps: First, 67 | # reduceover the anchor boxes and then reduce over the ground truth 68 | # boxes. 69 | anchor_indices = np.argmax( 70 | weight_matrix, axis=1) # Reduce along the anchor box axis. 71 | overlaps = weight_matrix[all_gt_indices, anchor_indices] 72 | ground_truth_index = np.argmax( 73 | overlaps) # Reduce along the ground truth box axis. 74 | anchor_index = anchor_indices[ground_truth_index] 75 | matches[ground_truth_index] = anchor_index # Set the match. 76 | 77 | # Set the row of the matched ground truth box and the column of the 78 | # matched anchor box to all zeros. This ensures that those boxes will 79 | # not be matched again, because they will never be the best matches 80 | # for any other boxes. 81 | weight_matrix[ground_truth_index] = 0 82 | weight_matrix[:, anchor_index] = 0 83 | 84 | return matches 85 | 86 | 87 | def match_multi(weight_matrix, threshold): 88 | """Matches all elements along the second axis of `weight_matrix` to their 89 | best matches along the first axis subject to the constraint that the weight 90 | of a match must be greater than or equal to `threshold` in order to produce 91 | a match. 92 | 93 | If the weight matrix contains elements that should be ignored, the row or 94 | column representing the respective elemet should be set to a value below 95 | `threshold`. 96 | 97 | Arguments: 98 | weight_matrix (array): A 2D Numpy array that represents the weight 99 | matrix for the matching process. If `(m,n)` is the shape of the 100 | weight matrix, it must be `m <= n`. The weights can be integers or 101 | floating point numbers. The matching process will maximize, i.e. 102 | larger weights are preferred over smaller weights. 103 | threshold (float): A float that represents the threshold (i.e. lower 104 | bound) that must be met by a pair of elements to produce a match. 105 | 106 | Returns: 107 | Two 1D Numpy arrays of equal length that represent the matched indices. 108 | The first array contains the indices along the first axis of 109 | `weight_matrix`, the second array contains the indices along the second 110 | axis. 111 | """ 112 | 113 | num_anchor_boxes = weight_matrix.shape[1] 114 | all_anchor_indices = list( 115 | range(num_anchor_boxes)) # Only relevant for fancy-indexing below. 116 | 117 | # Find the best ground truth match for every anchor box. 118 | ground_truth_indices = np.argmax( 119 | weight_matrix, axis=0) # Array of shape (weight_matrix.shape[1],) 120 | overlaps = weight_matrix[ 121 | ground_truth_indices, 122 | all_anchor_indices] # Array of shape (weight_matrix.shape[1],) 123 | 124 | # Filter out the matches with a weight below the threshold. 125 | anchor_indices_thresh_met = np.nonzero(overlaps >= threshold)[0] 126 | gt_indices_thresh_met = ground_truth_indices[anchor_indices_thresh_met] 127 | 128 | return gt_indices_thresh_met, anchor_indices_thresh_met 129 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/deeplabv3/deeplab.py: -------------------------------------------------------------------------------- 1 | from cral.common import classification_networks 2 | from cral.models.semantic_segmentation.deeplabv3.deeplab_xception import \ 3 | DeepLabV3Plus_xception 4 | from cral.models.semantic_segmentation.deeplabv3.deeplabv3_mobilenet import \ 5 | DeepLabV3Plus_mobilenet 6 | from cral.models.semantic_segmentation.deeplabv3.utils import (Deeplabv3Config, 7 | Upsample) 8 | from tensorflow.keras import backend as K 9 | from tensorflow.keras.layers import (Activation, AveragePooling2D, 10 | BatchNormalization, Conv2D, concatenate) 11 | from tensorflow.keras.models import Model 12 | 13 | 14 | def ASPP(tensor, atrous_rates): 15 | """atrous spatial pyramid pooling.""" 16 | dims = K.int_shape(tensor) 17 | 18 | y_pool = AveragePooling2D( 19 | pool_size=(dims[1], dims[2]), name='average_pooling')( 20 | tensor) 21 | y_pool = Conv2D( 22 | filters=256, 23 | kernel_size=1, 24 | padding='same', 25 | kernel_initializer='he_normal', 26 | name='pool_1x1conv2d', 27 | use_bias=False)( 28 | y_pool) 29 | y_pool = BatchNormalization(name='bn_1')(y_pool) 30 | y_pool = Activation('relu', name='relu_1')(y_pool) 31 | 32 | y_pool = Upsample( 33 | height=dims[1], 34 | width=dims[2], 35 | name=y_pool.name.split('/')[0] + '_upsample')( 36 | y_pool) 37 | 38 | y_1 = Conv2D( 39 | filters=256, 40 | kernel_size=1, 41 | dilation_rate=1, 42 | padding='same', 43 | kernel_initializer='he_normal', 44 | name='ASPP_conv2d_d1', 45 | use_bias=False)( 46 | tensor) 47 | y_1 = BatchNormalization(name='bn_2')(y_1) 48 | y_1 = Activation('relu', name='relu_2')(y_1) 49 | 50 | y_6 = Conv2D( 51 | filters=256, 52 | kernel_size=3, 53 | dilation_rate=atrous_rates[0], 54 | padding='same', 55 | kernel_initializer='he_normal', 56 | name='ASPP_conv2d_d6', 57 | use_bias=False)( 58 | tensor) 59 | y_6 = BatchNormalization(name='bn_3')(y_6) 60 | y_6 = Activation('relu', name='relu_3')(y_6) 61 | 62 | y_12 = Conv2D( 63 | filters=256, 64 | kernel_size=3, 65 | dilation_rate=atrous_rates[1], 66 | padding='same', 67 | kernel_initializer='he_normal', 68 | name='ASPP_conv2d_d12', 69 | use_bias=False)( 70 | tensor) 71 | y_12 = BatchNormalization(name='bn_4')(y_12) 72 | y_12 = Activation('relu', name='relu_4')(y_12) 73 | 74 | y_18 = Conv2D( 75 | filters=256, 76 | kernel_size=3, 77 | dilation_rate=atrous_rates[2], 78 | padding='same', 79 | kernel_initializer='he_normal', 80 | name='ASPP_conv2d_d18', 81 | use_bias=False)( 82 | tensor) 83 | y_18 = BatchNormalization(name='bn_5')(y_18) 84 | y_18 = Activation('relu', name='relu_5')(y_18) 85 | 86 | y = concatenate([y_pool, y_1, y_6, y_12, y_18], name='ASPP_concat') 87 | 88 | y = Conv2D( 89 | filters=256, 90 | kernel_size=1, 91 | dilation_rate=1, 92 | padding='same', 93 | kernel_initializer='he_normal', 94 | name='ASPP_conv2d_final', 95 | use_bias=False)( 96 | y) 97 | y = BatchNormalization(name='bn_final')(y) 98 | y = Activation('relu', name='relu_final')(y) 99 | return y 100 | 101 | 102 | def DeepLabV3Plus_resnet50(config, num_classes, weights, base_trainable): 103 | 104 | base_model, preprocessing_function = classification_networks['resnet50']( 105 | input_shape=config.input_shape, weights=weights, include_top=False) 106 | 107 | img_height = config.height 108 | img_width = config.width 109 | 110 | base_model.trainable = base_trainable 111 | 112 | image_features = base_model.get_layer('conv4_block6_out').output 113 | 114 | x_a = ASPP(image_features, config.atrous_rates) 115 | x_a = Upsample(height=img_height // 4, width=img_width // 4)(x_a) 116 | 117 | # x_b = base_model.get_layer('activation_9').output 118 | x_b = base_model.get_layer('conv2_block3_out').output 119 | x_b = Conv2D( 120 | filters=48, 121 | kernel_size=1, 122 | padding='same', 123 | kernel_initializer='he_normal', 124 | name='low_level_projection', 125 | use_bias=False)( 126 | x_b) 127 | x_b = BatchNormalization(name='bn_low_level_projection')(x_b) 128 | x_b = Activation('relu', name='low_level_activation')(x_b) 129 | 130 | x = concatenate([x_a, x_b], name='decoder_concat') 131 | 132 | x = Conv2D( 133 | filters=256, 134 | kernel_size=3, 135 | padding='same', 136 | activation='relu', 137 | kernel_initializer='he_normal', 138 | name='decoder_conv2d_1', 139 | use_bias=False)( 140 | x) 141 | x = BatchNormalization(name='bn_decoder_1')(x) 142 | x = Activation('relu', name='activation_decoder_1')(x) 143 | 144 | x = Conv2D( 145 | filters=256, 146 | kernel_size=3, 147 | padding='same', 148 | activation='relu', 149 | kernel_initializer='he_normal', 150 | name='decoder_conv2d_2', 151 | use_bias=False)( 152 | x) 153 | x = BatchNormalization(name='bn_decoder_2')(x) 154 | x = Activation('relu', name='activation_decoder_2')(x) 155 | x = Upsample(height=img_height, width=img_width)(x) 156 | 157 | x = Conv2D(num_classes, (1, 1), name='output_layer')(x) 158 | 159 | model = Model(inputs=base_model.input, outputs=x, name='DeepLabV3_Plus') 160 | # print(f'*** Output_Shape => {model.output_shape} ***') 161 | return model, preprocessing_function 162 | 163 | 164 | def create_DeepLabv3Plus(feature_extractor, 165 | config, 166 | num_classes, 167 | weights, 168 | base_trainable=True): 169 | 170 | assert isinstance( 171 | config, Deeplabv3Config), 'please provide a `Deeplabv3Config()` object' 172 | 173 | if feature_extractor == 'resnet50': 174 | return DeepLabV3Plus_resnet50(config, num_classes, weights, 175 | base_trainable) 176 | elif feature_extractor == 'xception': 177 | return DeepLabV3Plus_xception(config, num_classes, weights, 178 | base_trainable) 179 | elif feature_extractor == 'mobilenetv2': 180 | return DeepLabV3Plus_mobilenet(config, num_classes, weights, 181 | base_trainable) 182 | else: 183 | assert False, 'only resnet50, xception and mobilenetv2 are supported' 184 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/LinkNet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .utils import LinkNetConfig 5 | 6 | 7 | def _pad(image, height, width, channels=3): 8 | """Summary. 9 | Args: 10 | image (TYPE): Description 11 | height (TYPE): Description 12 | width (TYPE): Description 13 | scale (TYPE): Description 14 | Returns: 15 | numpy nd.array: Description 16 | """ 17 | 18 | image = image.astype(np.uint8) 19 | 20 | padded_image = np.zeros( 21 | shape=(height.astype(int), width.astype(int), channels), 22 | dtype=np.uint8) 23 | h, w, _ = image.shape 24 | padded_image[:h, :w, :] = image 25 | return padded_image 26 | 27 | 28 | @tf.function 29 | def decode_pad_img(image_string, pad_height, pad_width): 30 | """Summary. 31 | Args: 32 | image_string (TYPE): Description 33 | pad_height (TYPE): Description 34 | pad_width (TYPE): Description 35 | scale (TYPE): Description 36 | Returns: 37 | tf.tensor: Description 38 | """ 39 | image = tf.image.decode_jpeg(image_string) 40 | image = tf.numpy_function( 41 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 42 | image = tf.cast(image, tf.keras.backend.floatx()) 43 | return image 44 | 45 | 46 | @tf.function 47 | def decode_pad_msk(mask_string, pad_height, pad_width): 48 | """Summary. 49 | Args: 50 | mask_string (TYPE): Description 51 | pad_height (TYPE): Description 52 | pad_width (TYPE): Description 53 | scale (TYPE): Description 54 | Returns: 55 | tf.tensor: Description 56 | """ 57 | mask = tf.image.decode_png(mask_string) 58 | mask = tf.numpy_function( 59 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 60 | return mask 61 | 62 | 63 | class LinkNetGenerator(object): 64 | """docstring for LinkNetGenerator.""" 65 | 66 | def __init__( 67 | self, 68 | config, 69 | train_tfrecords, 70 | test_tfrecords, 71 | # num_classes, 72 | # mask_format, 73 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 74 | augmentation=None, 75 | batch_size=4): 76 | 77 | assert isinstance( 78 | config, LinkNetConfig), 'please provide a `LinkNetConfig()` object' 79 | self.config = config 80 | 81 | self.train_tfrecords = train_tfrecords 82 | self.test_tfrecords = test_tfrecords 83 | 84 | # self.min_side = int(min_side) 85 | # self.max_side = int(max_side) 86 | 87 | # self.num_classes = int(num_classes) 88 | self.batch_size = batch_size 89 | self.aug = augmentation 90 | # self.mask_format = mask_format 91 | 92 | self.normalize_func = processing_func 93 | 94 | # def parse_tfrecords(filenames, height, width, batch_size=32): 95 | 96 | def _parse_function(self, serialized): 97 | 98 | features = { 99 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 100 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 101 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 102 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 103 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 104 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 105 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 106 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 107 | } 108 | 109 | parsed_example = tf.io.parse_example( 110 | serialized=serialized, features=features) 111 | 112 | max_height = tf.cast( 113 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 114 | max_width = tf.cast( 115 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 116 | 117 | image_batch = tf.map_fn( 118 | lambda x: decode_pad_img(x, max_height, max_width), 119 | parsed_example['image_raw'], 120 | dtype=tf.keras.backend.floatx()) 121 | image_batch = tf.numpy_function( 122 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 123 | image_batch.set_shape([None, None, None, 3]) 124 | 125 | mask_batch = tf.map_fn( 126 | lambda x: decode_pad_msk(x, max_height, max_width), 127 | parsed_example['mask_raw'], 128 | dtype=tf.uint8) 129 | mask_batch.set_shape([None, None, None, 1]) 130 | 131 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 132 | image_batch = tf.image.resize(image_batch, 133 | (self.config.height, self.config.width)) 134 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 135 | 136 | mask_batch = tf.image.resize( 137 | mask_batch, (self.config.height, self.config.width), 138 | method='nearest') 139 | 140 | return image_batch, mask_batch 141 | 142 | def get_train_function(self): 143 | 144 | filenames = tf.io.gfile.glob(self.train_tfrecords) 145 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 146 | buffer_size=16).repeat(-1) 147 | 148 | dataset = dataset.interleave( 149 | tf.data.TFRecordDataset, 150 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 151 | cycle_length=4, 152 | block_length=16) 153 | 154 | dataset = dataset.batch( 155 | self.batch_size, drop_remainder=True) # Batch Size 156 | 157 | dataset = dataset.map( 158 | self._parse_function, 159 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 160 | 161 | # dataset = dataset.cache() 162 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 163 | 164 | return dataset 165 | 166 | def get_test_function(self): 167 | 168 | filenames = tf.io.gfile.glob(self.train_tfrecords) 169 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 170 | buffer_size=16).repeat(-1) 171 | dataset = dataset.interleave( 172 | tf.data.TFRecordDataset, 173 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 174 | cycle_length=4, 175 | block_length=16) 176 | 177 | dataset = dataset.batch( 178 | self.batch_size, drop_remainder=True) # Batch Size 179 | 180 | dataset = dataset.map( 181 | self._parse_function, 182 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 183 | 184 | # dataset = dataset.cache() 185 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 186 | 187 | return dataset 188 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/FpnNet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .utils import FpnNetConfig 5 | 6 | 7 | def _pad(image, height, width, channels=3): 8 | """Summary. 9 | 10 | Args: 11 | image (TYPE): Description 12 | height (TYPE): Description 13 | width (TYPE): Description 14 | scale (TYPE): Description 15 | 16 | Returns: 17 | numpy nd.array: Description 18 | """ 19 | 20 | image = image.astype(np.uint8) 21 | 22 | padded_image = np.zeros( 23 | shape=(height.astype(int), width.astype(int), channels), 24 | dtype=np.uint8) 25 | h, w, _ = image.shape 26 | padded_image[:h, :w, :] = image 27 | return padded_image 28 | 29 | 30 | @tf.function 31 | def decode_pad_img(image_string, pad_height, pad_width): 32 | """Summary. 33 | 34 | Args: 35 | image_string (TYPE): Description 36 | pad_height (TYPE): Description 37 | pad_width (TYPE): Description 38 | scale (TYPE): Description 39 | 40 | Returns: 41 | tf.tensor: Description 42 | """ 43 | image = tf.image.decode_jpeg(image_string) 44 | image = tf.numpy_function( 45 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 46 | image = tf.cast(image, tf.keras.backend.floatx()) 47 | return image 48 | 49 | 50 | @tf.function 51 | def decode_pad_msk(mask_string, pad_height, pad_width): 52 | """Summary. 53 | 54 | Args: 55 | mask_string (TYPE): Description 56 | pad_height (TYPE): Description 57 | pad_width (TYPE): Description 58 | scale (TYPE): Description 59 | 60 | Returns: 61 | tf.tensor: Description 62 | """ 63 | mask = tf.image.decode_png(mask_string) 64 | mask = tf.numpy_function( 65 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 66 | return mask 67 | 68 | 69 | class FpnNetGenerator(object): 70 | """docstring for DeepLabv3Generator.""" 71 | 72 | def __init__( 73 | self, 74 | config, 75 | train_tfrecords, 76 | test_tfrecords, 77 | # num_classes, 78 | # mask_format, 79 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 80 | augmentation=None, 81 | batch_size=4): 82 | 83 | assert isinstance( 84 | config, FpnNetConfig), 'please provide a `FpnNetConfig()` object' 85 | self.config = config 86 | 87 | self.train_tfrecords = train_tfrecords 88 | self.test_tfrecords = test_tfrecords 89 | 90 | # self.min_side = int(min_side) 91 | # self.max_side = int(max_side) 92 | 93 | # self.num_classes = int(num_classes) 94 | self.batch_size = batch_size 95 | self.aug = augmentation 96 | # self.mask_format = mask_format 97 | 98 | self.normalize_func = processing_func 99 | 100 | # def parse_tfrecords(filenames, height, width, batch_size=32): 101 | 102 | def _parse_function(self, serialized): 103 | 104 | features = { 105 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 107 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 108 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 109 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 112 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 113 | } 114 | 115 | parsed_example = tf.io.parse_example( 116 | serialized=serialized, features=features) 117 | 118 | max_height = tf.cast( 119 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 120 | max_width = tf.cast( 121 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 122 | 123 | image_batch = tf.map_fn( 124 | lambda x: decode_pad_img(x, max_height, max_width), 125 | parsed_example['image_raw'], 126 | dtype=tf.keras.backend.floatx()) 127 | image_batch = tf.numpy_function( 128 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 129 | image_batch.set_shape([None, None, None, 3]) 130 | 131 | mask_batch = tf.map_fn( 132 | lambda x: decode_pad_msk(x, max_height, max_width), 133 | parsed_example['mask_raw'], 134 | dtype=tf.uint8) 135 | mask_batch.set_shape([None, None, None, 1]) 136 | 137 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 138 | image_batch = tf.image.resize(image_batch, 139 | (self.config.height, self.config.width)) 140 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 141 | 142 | mask_batch = tf.image.resize( 143 | mask_batch, (self.config.height, self.config.width), 144 | method='nearest') 145 | 146 | return image_batch, mask_batch 147 | 148 | def get_train_function(self): 149 | 150 | filenames = tf.io.gfile.glob(self.train_tfrecords) 151 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 152 | buffer_size=16).repeat(-1) 153 | 154 | dataset = dataset.interleave( 155 | tf.data.TFRecordDataset, 156 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 157 | cycle_length=4, 158 | block_length=16) 159 | 160 | dataset = dataset.batch( 161 | self.batch_size, drop_remainder=True) # Batch Size 162 | 163 | dataset = dataset.map( 164 | self._parse_function, 165 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 166 | 167 | # dataset = dataset.cache() 168 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 169 | 170 | return dataset 171 | 172 | def get_test_function(self): 173 | 174 | filenames = tf.io.gfile.glob(self.train_tfrecords) 175 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 176 | buffer_size=16).repeat(-1) 177 | dataset = dataset.interleave( 178 | tf.data.TFRecordDataset, 179 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 180 | cycle_length=4, 181 | block_length=16) 182 | 183 | dataset = dataset.batch( 184 | self.batch_size, drop_remainder=True) # Batch Size 185 | 186 | dataset = dataset.map( 187 | self._parse_function, 188 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 189 | 190 | # dataset = dataset.cache() 191 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 192 | 193 | return dataset 194 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/Unet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .utils import UNetConfig 5 | 6 | 7 | def _pad(image, height, width, channels=3): 8 | """Summary. 9 | 10 | Args: 11 | image (TYPE): Description 12 | height (TYPE): Description 13 | width (TYPE): Description 14 | scale (TYPE): Description 15 | 16 | Returns: 17 | numpy nd.array: Description 18 | """ 19 | 20 | image = image.astype(np.uint8) 21 | 22 | padded_image = np.zeros( 23 | shape=(height.astype(int), width.astype(int), channels), 24 | dtype=np.uint8) 25 | h, w, _ = image.shape 26 | padded_image[:h, :w, :] = image 27 | return padded_image 28 | 29 | 30 | @tf.function 31 | def decode_pad_img(image_string, pad_height, pad_width): 32 | """Summary. 33 | 34 | Args: 35 | image_string (TYPE): Description 36 | pad_height (TYPE): Description 37 | pad_width (TYPE): Description 38 | scale (TYPE): Description 39 | 40 | Returns: 41 | tf.tensor: Description 42 | """ 43 | image = tf.image.decode_jpeg(image_string) 44 | image = tf.numpy_function( 45 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 46 | image = tf.cast(image, tf.keras.backend.floatx()) 47 | return image 48 | 49 | 50 | @tf.function 51 | def decode_pad_msk(mask_string, pad_height, pad_width): 52 | """Summary. 53 | 54 | Args: 55 | mask_string (TYPE): Description 56 | pad_height (TYPE): Description 57 | pad_width (TYPE): Description 58 | scale (TYPE): Description 59 | 60 | Returns: 61 | tf.tensor: Description 62 | """ 63 | mask = tf.image.decode_png(mask_string) 64 | mask = tf.numpy_function( 65 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 66 | return mask 67 | 68 | 69 | class UNetGenerator(object): 70 | """docstring for DeepLabv3Generator.""" 71 | 72 | def __init__( 73 | self, 74 | config, 75 | train_tfrecords, 76 | test_tfrecords, 77 | # num_classes, 78 | # mask_format, 79 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 80 | augmentation=None, 81 | batch_size=4): 82 | 83 | assert isinstance(config, 84 | UNetConfig), 'please provide a `UNetConfig()` object' 85 | self.config = config 86 | 87 | self.train_tfrecords = train_tfrecords 88 | self.test_tfrecords = test_tfrecords 89 | 90 | # self.min_side = int(min_side) 91 | # self.max_side = int(max_side) 92 | 93 | # self.num_classes = int(num_classes) 94 | self.batch_size = batch_size 95 | self.aug = augmentation 96 | # self.mask_format = mask_format 97 | 98 | self.normalize_func = processing_func 99 | 100 | # def parse_tfrecords(filenames, height, width, batch_size=32): 101 | 102 | def _parse_function(self, serialized): 103 | 104 | features = { 105 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 107 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 108 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 109 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 112 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 113 | } 114 | 115 | parsed_example = tf.io.parse_example( 116 | serialized=serialized, features=features) 117 | 118 | max_height = tf.cast( 119 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 120 | max_width = tf.cast( 121 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 122 | 123 | image_batch = tf.map_fn( 124 | lambda x: decode_pad_img(x, max_height, max_width), 125 | parsed_example['image_raw'], 126 | dtype=tf.keras.backend.floatx()) 127 | image_batch = tf.numpy_function( 128 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 129 | image_batch.set_shape([None, None, None, 3]) 130 | 131 | mask_batch = tf.map_fn( 132 | lambda x: decode_pad_msk(x, max_height, max_width), 133 | parsed_example['mask_raw'], 134 | dtype=tf.uint8) 135 | mask_batch.set_shape([None, None, None, 1]) 136 | 137 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 138 | image_batch = tf.image.resize(image_batch, 139 | (self.config.height, self.config.width)) 140 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 141 | 142 | mask_batch = tf.image.resize( 143 | mask_batch, (self.config.height, self.config.width), 144 | method='nearest') 145 | 146 | return image_batch, mask_batch 147 | 148 | def get_train_function(self): 149 | 150 | filenames = tf.io.gfile.glob(self.train_tfrecords) 151 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 152 | buffer_size=16).repeat(-1) 153 | 154 | dataset = dataset.interleave( 155 | tf.data.TFRecordDataset, 156 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 157 | cycle_length=4, 158 | block_length=16) 159 | 160 | dataset = dataset.batch( 161 | self.batch_size, drop_remainder=True) # Batch Size 162 | 163 | dataset = dataset.map( 164 | self._parse_function, 165 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 166 | 167 | # dataset = dataset.cache() 168 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 169 | 170 | return dataset 171 | 172 | def get_test_function(self): 173 | 174 | filenames = tf.io.gfile.glob(self.train_tfrecords) 175 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 176 | buffer_size=16).repeat(-1) 177 | dataset = dataset.interleave( 178 | tf.data.TFRecordDataset, 179 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 180 | cycle_length=4, 181 | block_length=16) 182 | 183 | dataset = dataset.batch( 184 | self.batch_size, drop_remainder=True) # Batch Size 185 | 186 | dataset = dataset.map( 187 | self._parse_function, 188 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 189 | 190 | # dataset = dataset.cache() 191 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 192 | 193 | return dataset 194 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/SegNet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .utils import SegNetConfig 5 | 6 | 7 | def _pad(image, height, width, channels=3): 8 | """Summary. 9 | 10 | Args: 11 | image (TYPE): Description 12 | height (TYPE): Description 13 | width (TYPE): Description 14 | scale (TYPE): Description 15 | 16 | Returns: 17 | numpy nd.array: Description 18 | """ 19 | 20 | image = image.astype(np.uint8) 21 | 22 | padded_image = np.zeros( 23 | shape=(height.astype(int), width.astype(int), channels), 24 | dtype=np.uint8) 25 | h, w, _ = image.shape 26 | padded_image[:h, :w, :] = image 27 | return padded_image 28 | 29 | 30 | @tf.function 31 | def decode_pad_img(image_string, pad_height, pad_width): 32 | """Summary. 33 | 34 | Args: 35 | image_string (TYPE): Description 36 | pad_height (TYPE): Description 37 | pad_width (TYPE): Description 38 | scale (TYPE): Description 39 | 40 | Returns: 41 | tf.tensor: Description 42 | """ 43 | image = tf.image.decode_jpeg(image_string) 44 | image = tf.numpy_function( 45 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 46 | image = tf.cast(image, tf.keras.backend.floatx()) 47 | return image 48 | 49 | 50 | @tf.function 51 | def decode_pad_msk(mask_string, pad_height, pad_width): 52 | """Summary. 53 | 54 | Args: 55 | mask_string (TYPE): Description 56 | pad_height (TYPE): Description 57 | pad_width (TYPE): Description 58 | scale (TYPE): Description 59 | 60 | Returns: 61 | tf.tensor: Description 62 | """ 63 | mask = tf.image.decode_png(mask_string) 64 | mask = tf.numpy_function( 65 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 66 | return mask 67 | 68 | 69 | class SegNetGenerator(object): 70 | """docstring for DeepLabv3Generator.""" 71 | 72 | def __init__( 73 | self, 74 | config, 75 | train_tfrecords, 76 | test_tfrecords, 77 | # num_classes, 78 | # mask_format, 79 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 80 | augmentation=None, 81 | batch_size=4): 82 | 83 | assert isinstance( 84 | config, SegNetConfig), 'please provide a `SegNetConfig()` object' 85 | self.config = config 86 | 87 | self.train_tfrecords = train_tfrecords 88 | self.test_tfrecords = test_tfrecords 89 | 90 | # self.min_side = int(min_side) 91 | # self.max_side = int(max_side) 92 | 93 | # self.num_classes = int(num_classes) 94 | self.batch_size = batch_size 95 | self.aug = augmentation 96 | # self.mask_format = mask_format 97 | 98 | self.normalize_func = processing_func 99 | 100 | # def parse_tfrecords(filenames, height, width, batch_size=32): 101 | 102 | def _parse_function(self, serialized): 103 | 104 | features = { 105 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 107 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 108 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 109 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 112 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 113 | } 114 | 115 | parsed_example = tf.io.parse_example( 116 | serialized=serialized, features=features) 117 | 118 | max_height = tf.cast( 119 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 120 | max_width = tf.cast( 121 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 122 | 123 | image_batch = tf.map_fn( 124 | lambda x: decode_pad_img(x, max_height, max_width), 125 | parsed_example['image_raw'], 126 | dtype=tf.keras.backend.floatx()) 127 | image_batch = tf.numpy_function( 128 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 129 | image_batch.set_shape([None, None, None, 3]) 130 | 131 | mask_batch = tf.map_fn( 132 | lambda x: decode_pad_msk(x, max_height, max_width), 133 | parsed_example['mask_raw'], 134 | dtype=tf.uint8) 135 | mask_batch.set_shape([None, None, None, 1]) 136 | 137 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 138 | image_batch = tf.image.resize(image_batch, 139 | (self.config.height, self.config.width)) 140 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 141 | 142 | mask_batch = tf.image.resize( 143 | mask_batch, (self.config.height, self.config.width), 144 | method='nearest') 145 | 146 | return image_batch, mask_batch 147 | 148 | def get_train_function(self): 149 | 150 | filenames = tf.io.gfile.glob(self.train_tfrecords) 151 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 152 | buffer_size=16).repeat(-1) 153 | 154 | dataset = dataset.interleave( 155 | tf.data.TFRecordDataset, 156 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 157 | cycle_length=4, 158 | block_length=16) 159 | 160 | dataset = dataset.batch( 161 | self.batch_size, drop_remainder=True) # Batch Size 162 | 163 | dataset = dataset.map( 164 | self._parse_function, 165 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 166 | 167 | # dataset = dataset.cache() 168 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 169 | 170 | return dataset 171 | 172 | def get_test_function(self): 173 | 174 | filenames = tf.io.gfile.glob(self.train_tfrecords) 175 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 176 | buffer_size=16).repeat(-1) 177 | 178 | dataset = dataset.interleave( 179 | tf.data.TFRecordDataset, 180 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 181 | cycle_length=4, 182 | block_length=16) 183 | 184 | dataset = dataset.batch( 185 | self.batch_size, drop_remainder=True) # Batch Size 186 | 187 | dataset = dataset.map( 188 | self._parse_function, 189 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 190 | 191 | # dataset = dataset.cache() 192 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 193 | 194 | return dataset 195 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/PspNet/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from cral.models.semantic_segmentation.PspNet.utils import PspNetConfig 4 | 5 | 6 | def _pad(image, height, width, channels=3): 7 | """Summary. 8 | 9 | Args: 10 | image (TYPE): Description 11 | height (TYPE): Description 12 | width (TYPE): Description 13 | scale (TYPE): Description 14 | 15 | Returns: 16 | numpy nd.array: Description 17 | """ 18 | 19 | image = image.astype(np.uint8) 20 | 21 | padded_image = np.zeros( 22 | shape=(height.astype(int), width.astype(int), channels), 23 | dtype=np.uint8) 24 | h, w, _ = image.shape 25 | padded_image[:h, :w, :] = image 26 | return padded_image 27 | 28 | 29 | @tf.function 30 | def decode_pad_img(image_string, pad_height, pad_width): 31 | """Summary. 32 | 33 | Args: 34 | image_string (TYPE): Description 35 | pad_height (TYPE): Description 36 | pad_width (TYPE): Description 37 | scale (TYPE): Description 38 | 39 | Returns: 40 | tf.tensor: Description 41 | """ 42 | image = tf.image.decode_jpeg(image_string) 43 | image = tf.numpy_function( 44 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 45 | image = tf.cast(image, tf.keras.backend.floatx()) 46 | return image 47 | 48 | 49 | @tf.function 50 | def decode_pad_msk(mask_string, pad_height, pad_width): 51 | """Summary. 52 | 53 | Args: 54 | mask_string (TYPE): Description 55 | pad_height (TYPE): Description 56 | pad_width (TYPE): Description 57 | scale (TYPE): Description 58 | 59 | Returns: 60 | tf.tensor: Description 61 | """ 62 | mask = tf.image.decode_png(mask_string) 63 | mask = tf.numpy_function( 64 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 65 | return mask 66 | 67 | 68 | class PspNetGenerator(object): 69 | """docstring for DeepLabv3Generator.""" 70 | 71 | def __init__( 72 | self, 73 | config, 74 | train_tfrecords, 75 | test_tfrecords, 76 | # num_classes, 77 | # mask_format, 78 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 79 | augmentation=None, 80 | batch_size=4): 81 | 82 | assert isinstance( 83 | config, PspNetConfig), 'please provide a `PspNetConfig()` object' 84 | self.config = config 85 | 86 | self.train_tfrecords = train_tfrecords 87 | self.test_tfrecords = test_tfrecords 88 | 89 | # self.min_side = int(min_side) 90 | # self.max_side = int(max_side) 91 | 92 | # self.num_classes = int(num_classes) 93 | self.batch_size = batch_size 94 | self.aug = augmentation 95 | # self.mask_format = mask_format 96 | 97 | self.normalize_func = processing_func 98 | 99 | # def parse_tfrecords(filenames, height, width, batch_size=32): 100 | 101 | def _parse_function(self, serialized): 102 | 103 | features = { 104 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 105 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 107 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 108 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 109 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 112 | } 113 | 114 | parsed_example = tf.io.parse_example( 115 | serialized=serialized, features=features) 116 | 117 | max_height = tf.cast( 118 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 119 | max_width = tf.cast( 120 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 121 | 122 | image_batch = tf.map_fn( 123 | lambda x: decode_pad_img(x, max_height, max_width), 124 | parsed_example['image_raw'], 125 | dtype=tf.keras.backend.floatx()) 126 | image_batch = tf.numpy_function( 127 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 128 | image_batch.set_shape([None, None, None, 3]) 129 | 130 | mask_batch = tf.map_fn( 131 | lambda x: decode_pad_msk(x, max_height, max_width), 132 | parsed_example['mask_raw'], 133 | dtype=tf.uint8) 134 | mask_batch.set_shape([None, None, None, 1]) 135 | 136 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 137 | image_batch = tf.image.resize(image_batch, 138 | (self.config.height, self.config.width)) 139 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 140 | 141 | mask_batch = tf.image.resize( 142 | mask_batch, (self.config.height, self.config.width), 143 | method='nearest') 144 | 145 | return image_batch, mask_batch 146 | 147 | def get_train_function(self): 148 | 149 | filenames = tf.io.gfile.glob(self.train_tfrecords) 150 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 151 | buffer_size=16).repeat(-1) 152 | 153 | dataset = dataset.interleave( 154 | tf.data.TFRecordDataset, 155 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 156 | cycle_length=4, 157 | block_length=16) 158 | 159 | dataset = dataset.batch( 160 | self.batch_size, drop_remainder=True) # Batch Size 161 | 162 | dataset = dataset.map( 163 | self._parse_function, 164 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 165 | 166 | # dataset = dataset.cache() 167 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 168 | 169 | return dataset 170 | 171 | def get_test_function(self): 172 | 173 | filenames = tf.io.gfile.glob(self.train_tfrecords) 174 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 175 | buffer_size=16).repeat(-1) 176 | dataset = dataset.interleave( 177 | tf.data.TFRecordDataset, 178 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 179 | cycle_length=4, 180 | block_length=16) 181 | 182 | dataset = dataset.batch( 183 | self.batch_size, drop_remainder=True) # Batch Size 184 | 185 | dataset = dataset.map( 186 | self._parse_function, 187 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 188 | 189 | # dataset = dataset.cache() 190 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 191 | 192 | return dataset 193 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/UnetPlusPlus/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from .utils import UnetPlusPlusConfig 5 | 6 | 7 | def _pad(image, height, width, channels=3): 8 | """Summary. 9 | 10 | Args: 11 | image (TYPE): Description 12 | height (TYPE): Description 13 | width (TYPE): Description 14 | scale (TYPE): Description 15 | 16 | Returns: 17 | numpy nd.array: Description 18 | """ 19 | 20 | image = image.astype(np.uint8) 21 | 22 | padded_image = np.zeros( 23 | shape=(height.astype(int), width.astype(int), channels), 24 | dtype=np.uint8) 25 | h, w, _ = image.shape 26 | padded_image[:h, :w, :] = image 27 | return padded_image 28 | 29 | 30 | @tf.function 31 | def decode_pad_img(image_string, pad_height, pad_width): 32 | """Summary. 33 | 34 | Args: 35 | image_string (TYPE): Description 36 | pad_height (TYPE): Description 37 | pad_width (TYPE): Description 38 | scale (TYPE): Description 39 | 40 | Returns: 41 | tf.tensor: Description 42 | """ 43 | image = tf.image.decode_jpeg(image_string) 44 | image = tf.numpy_function( 45 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 46 | image = tf.cast(image, tf.keras.backend.floatx()) 47 | return image 48 | 49 | 50 | @tf.function 51 | def decode_pad_msk(mask_string, pad_height, pad_width): 52 | """Summary. 53 | 54 | Args: 55 | mask_string (TYPE): Description 56 | pad_height (TYPE): Description 57 | pad_width (TYPE): Description 58 | scale (TYPE): Description 59 | 60 | Returns: 61 | tf.tensor: Description 62 | """ 63 | mask = tf.image.decode_png(mask_string) 64 | mask = tf.numpy_function( 65 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 66 | return mask 67 | 68 | 69 | class UnetPlusPlusGenerator(object): 70 | """docstring for DeepLabv3Generator.""" 71 | 72 | def __init__( 73 | self, 74 | config, 75 | train_tfrecords, 76 | test_tfrecords, 77 | # num_classes, 78 | # mask_format, 79 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 80 | augmentation=None, 81 | batch_size=4): 82 | 83 | assert isinstance(config, UnetPlusPlusConfig 84 | ), 'please provide a `UnetPlusPlusConfig()` object' 85 | self.config = config 86 | 87 | self.train_tfrecords = train_tfrecords 88 | self.test_tfrecords = test_tfrecords 89 | 90 | # self.min_side = int(min_side) 91 | # self.max_side = int(max_side) 92 | 93 | # self.num_classes = int(num_classes) 94 | self.batch_size = batch_size 95 | self.aug = augmentation 96 | # self.mask_format = mask_format 97 | 98 | self.normalize_func = processing_func 99 | 100 | # def parse_tfrecords(filenames, height, width, batch_size=32): 101 | 102 | def _parse_function(self, serialized): 103 | 104 | features = { 105 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 107 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 108 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 109 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 112 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 113 | } 114 | 115 | parsed_example = tf.io.parse_example( 116 | serialized=serialized, features=features) 117 | 118 | max_height = tf.cast( 119 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 120 | max_width = tf.cast( 121 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 122 | 123 | image_batch = tf.map_fn( 124 | lambda x: decode_pad_img(x, max_height, max_width), 125 | parsed_example['image_raw'], 126 | dtype=tf.keras.backend.floatx()) 127 | image_batch = tf.numpy_function( 128 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 129 | image_batch.set_shape([None, None, None, 3]) 130 | 131 | mask_batch = tf.map_fn( 132 | lambda x: decode_pad_msk(x, max_height, max_width), 133 | parsed_example['mask_raw'], 134 | dtype=tf.uint8) 135 | mask_batch.set_shape([None, None, None, 1]) 136 | 137 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 138 | image_batch = tf.image.resize(image_batch, 139 | (self.config.height, self.config.width)) 140 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 141 | 142 | mask_batch = tf.image.resize( 143 | mask_batch, (self.config.height, self.config.width), 144 | method='nearest') 145 | 146 | return image_batch, mask_batch 147 | 148 | def get_train_function(self): 149 | 150 | filenames = tf.io.gfile.glob(self.train_tfrecords) 151 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 152 | buffer_size=16).repeat(-1) 153 | 154 | dataset = dataset.interleave( 155 | tf.data.TFRecordDataset, 156 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 157 | cycle_length=4, 158 | block_length=16) 159 | 160 | dataset = dataset.batch( 161 | self.batch_size, drop_remainder=True) # Batch Size 162 | 163 | dataset = dataset.map( 164 | self._parse_function, 165 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 166 | 167 | # dataset = dataset.cache() 168 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 169 | 170 | return dataset 171 | 172 | def get_test_function(self): 173 | 174 | filenames = tf.io.gfile.glob(self.train_tfrecords) 175 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 176 | buffer_size=16).repeat(-1) 177 | dataset = dataset.interleave( 178 | tf.data.TFRecordDataset, 179 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 180 | cycle_length=4, 181 | block_length=16) 182 | 183 | dataset = dataset.batch( 184 | self.batch_size, drop_remainder=True) # Batch Size 185 | 186 | dataset = dataset.map( 187 | self._parse_function, 188 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 189 | 190 | # dataset = dataset.cache() 191 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 192 | 193 | return dataset 194 | -------------------------------------------------------------------------------- /cral/models/object_detection/YoloV3/predict.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from cral.models.object_detection.YoloV3.base import yolo_eval 4 | from PIL import Image, ImageDraw, ImageFont 5 | from tensorflow import keras 6 | 7 | font = ImageFont.load_default() 8 | 9 | STANDARD_COLORS = [ 10 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 11 | 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 12 | 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 13 | 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 14 | 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 15 | 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 16 | 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 17 | 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 18 | 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 19 | 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 20 | 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 21 | 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 22 | 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 23 | 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 24 | 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 25 | 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 26 | 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 27 | 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 28 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 29 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 30 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 31 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 32 | 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 33 | 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' 34 | ] 35 | 36 | 37 | def letterbox_image(image, size): 38 | """resize image with unchanged aspect ratio using padding.""" 39 | iw, ih = image.size 40 | w, h = size 41 | scale = min(w / iw, h / ih) 42 | nw = int(iw * scale) 43 | nh = int(ih * scale) 44 | 45 | image = image.resize((nw, nh), Image.BICUBIC) 46 | new_image = Image.new('RGB', size, (128, 128, 128)) 47 | new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) 48 | return new_image 49 | 50 | 51 | def preprocess_image(image_array, config): 52 | 53 | ih, iw, _c = image_array.shape 54 | h, w = config.input_shape 55 | 56 | # if not random: 57 | # resize image 58 | scale = min(w / iw, h / ih) 59 | nw = int(iw * scale) 60 | nh = int(ih * scale) 61 | dx = (w - nw) // 2 62 | dy = (h - nh) // 2 63 | 64 | image = cv2.resize(image_array, (nw, nh), interpolation=cv2.INTER_CUBIC) 65 | new_image = np.ones(shape=(h, w, 3), dtype=np.uint8) * 128 66 | new_image[dy:dy + nh, dx:dx + nw, :] = image 67 | return new_image, scale, dy, dx 68 | 69 | 70 | def annotate_image(image, 71 | bboxes, 72 | scores, 73 | labels, 74 | threshold=0.5, 75 | label_dict=None): 76 | image = Image.open(image) 77 | Imagedraw = ImageDraw.Draw(image) 78 | # thickness = (image.size[0] + image.size[1]) // 300 79 | 80 | for box, label, score in zip(bboxes, labels, scores): 81 | if score < threshold: 82 | continue 83 | 84 | top, left, bottom, right = box 85 | 86 | top = max(0, np.floor(top + 0.5).astype('int32')) 87 | left = max(0, np.floor(left + 0.5).astype('int32')) 88 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) 89 | right = min(image.size[0], np.floor(right + 0.5).astype('int32')) 90 | # print(label, (left, top), (right, bottom)) 91 | 92 | label_to_display = label 93 | if isinstance(label_dict, dict): 94 | label_to_display = label_dict[label] 95 | 96 | caption = '{}|{:.3f}'.format(label_to_display, score) 97 | 98 | colortofill = STANDARD_COLORS[label] 99 | Imagedraw.rectangle([left, top, right, bottom], 100 | fill=None, 101 | outline=colortofill, 102 | width=3) 103 | 104 | display_str_heights = font.getsize(caption)[1] 105 | # Each display_str has a top and bottom margin of 0.05x. 106 | total_display_str_height = (1 + 2 * 0.05) * display_str_heights 107 | 108 | if top > total_display_str_height: 109 | text_bottom = top 110 | else: 111 | text_bottom = bottom + total_display_str_height 112 | 113 | text_width, text_height = font.getsize(caption) 114 | margin = np.ceil(0.05 * text_height) 115 | Imagedraw.rectangle([(left, text_bottom - text_height - 2 * margin), 116 | (left + text_width, text_bottom)], 117 | fill=colortofill) 118 | 119 | Imagedraw.text((left + margin, text_bottom - text_height - margin), 120 | caption, 121 | fill='black', 122 | font=font) 123 | 124 | return image 125 | 126 | 127 | def freeze_model(model_path, 128 | config, 129 | num_classes, 130 | max_boxes=20, 131 | score_threshold=.6, 132 | iou_threshold=.5): 133 | # Load model, or construct model and load weights. 134 | yolo_model = keras.models.load_model(model_path, compile=False) 135 | 136 | boxes, scores, classes = yolo_eval( 137 | yolo_model.outputs, 138 | config.anchors, 139 | num_classes, 140 | config.input_shape, 141 | anchor_mask=config.anchor_mask, 142 | max_boxes=config.max_boxes, 143 | score_threshold=score_threshold, 144 | iou_threshold=iou_threshold) 145 | 146 | prediction_model = keras.models.Model(yolo_model.input, 147 | [boxes, scores, classes]) 148 | 149 | return prediction_model 150 | 151 | 152 | def detect_image(model, image, config): 153 | 154 | boxed_image, scale, dy, dx = preprocess_image(np.array(image), config) 155 | 156 | image_data = boxed_image.astype(keras.backend.floatx()) 157 | 158 | image_data /= 255. 159 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 160 | 161 | boxes, scores, classes = model.predict(image_data) 162 | 163 | boxes[:, 0] = boxes[:, 0] - dy 164 | boxes[:, 2] = boxes[:, 2] - dy 165 | 166 | boxes[:, 1] = boxes[:, 1] - dx 167 | boxes[:, 3] = boxes[:, 3] - dx 168 | 169 | return boxes / scale, scores, classes 170 | -------------------------------------------------------------------------------- /cral/models/semantic_segmentation/deeplabv3/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from cral.models.semantic_segmentation.deeplabv3.utils import Deeplabv3Config 4 | 5 | 6 | def _pad(image, height, width, channels=3): 7 | """Summary. 8 | 9 | Args: 10 | image (TYPE): Description 11 | height (TYPE): Description 12 | width (TYPE): Description 13 | scale (TYPE): Description 14 | 15 | Returns: 16 | numpy nd.array: Description 17 | """ 18 | 19 | image = image.astype(np.uint8) 20 | 21 | padded_image = np.zeros( 22 | shape=(height.astype(int), width.astype(int), channels), 23 | dtype=np.uint8) 24 | h, w, _ = image.shape 25 | padded_image[:h, :w, :] = image 26 | return padded_image 27 | 28 | 29 | @tf.function 30 | def decode_pad_img(image_string, pad_height, pad_width): 31 | """Summary. 32 | 33 | Args: 34 | image_string (TYPE): Description 35 | pad_height (TYPE): Description 36 | pad_width (TYPE): Description 37 | scale (TYPE): Description 38 | 39 | Returns: 40 | tf.tensor: Description 41 | """ 42 | image = tf.image.decode_jpeg(image_string) 43 | image = tf.numpy_function( 44 | _pad, [image, pad_height, pad_width], Tout=tf.uint8) 45 | image = tf.cast(image, tf.keras.backend.floatx()) 46 | return image 47 | 48 | 49 | @tf.function 50 | def decode_pad_msk(mask_string, pad_height, pad_width): 51 | """Summary. 52 | 53 | Args: 54 | mask_string (TYPE): Description 55 | pad_height (TYPE): Description 56 | pad_width (TYPE): Description 57 | scale (TYPE): Description 58 | 59 | Returns: 60 | tf.tensor: Description 61 | """ 62 | mask = tf.image.decode_png(mask_string) 63 | mask = tf.numpy_function( 64 | _pad, [mask, pad_height, pad_width, 1], Tout=tf.uint8) 65 | return mask 66 | 67 | 68 | class DeepLabv3Generator(object): 69 | """docstring for DeepLabv3Generator.""" 70 | 71 | def __init__( 72 | self, 73 | config, 74 | train_tfrecords, 75 | test_tfrecords, 76 | # num_classes, 77 | # mask_format, 78 | processing_func=lambda x: x.astype(tf.keras.backend.floatx()), 79 | augmentation=None, 80 | batch_size=4): 81 | 82 | assert isinstance( 83 | config, 84 | Deeplabv3Config), 'please provide a `Deeplabv3Config()` object' 85 | self.config = config 86 | 87 | self.train_tfrecords = train_tfrecords 88 | self.test_tfrecords = test_tfrecords 89 | 90 | # self.min_side = int(min_side) 91 | # self.max_side = int(max_side) 92 | 93 | # self.num_classes = int(num_classes) 94 | self.batch_size = batch_size 95 | self.aug = augmentation 96 | # self.mask_format = mask_format 97 | 98 | self.normalize_func = processing_func 99 | 100 | # def parse_tfrecords(filenames, height, width, batch_size=32): 101 | 102 | def _parse_function(self, serialized): 103 | 104 | features = { 105 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 106 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 107 | 'image/depth': tf.io.FixedLenFeature([], tf.int64), 108 | 'image_raw': tf.io.FixedLenFeature([], tf.string), 109 | 'mask/height': tf.io.FixedLenFeature([], tf.int64), 110 | 'mask/width': tf.io.FixedLenFeature([], tf.int64), 111 | 'mask/depth': tf.io.FixedLenFeature([], tf.int64), 112 | 'mask_raw': tf.io.FixedLenFeature([], tf.string) 113 | } 114 | 115 | parsed_example = tf.io.parse_example( 116 | serialized=serialized, features=features) 117 | 118 | max_height = tf.cast( 119 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 120 | max_width = tf.cast( 121 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 122 | 123 | image_batch = tf.map_fn( 124 | lambda x: decode_pad_img(x, max_height, max_width), 125 | parsed_example['image_raw'], 126 | dtype=tf.keras.backend.floatx()) 127 | image_batch = tf.numpy_function( 128 | self.normalize_func, [image_batch], Tout=tf.keras.backend.floatx()) 129 | image_batch.set_shape([None, None, None, 3]) 130 | 131 | mask_batch = tf.map_fn( 132 | lambda x: decode_pad_msk(x, max_height, max_width), 133 | parsed_example['mask_raw'], 134 | dtype=tf.uint8) 135 | mask_batch.set_shape([None, None, None, 1]) 136 | 137 | # image = tf.cast(tf.image.decode_jpeg(image_string), tf.uint8) 138 | image_batch = tf.image.resize(image_batch, 139 | (self.config.height, self.config.width)) 140 | image_batch = tf.cast(image_batch, tf.keras.backend.floatx()) 141 | 142 | mask_batch = tf.image.resize( 143 | mask_batch, (self.config.height, self.config.width), 144 | method='nearest') 145 | 146 | return image_batch, mask_batch 147 | 148 | def get_train_function(self): 149 | 150 | filenames = tf.io.gfile.glob(self.train_tfrecords) 151 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 152 | buffer_size=16).repeat(-1) 153 | 154 | dataset = dataset.interleave( 155 | tf.data.TFRecordDataset, 156 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 157 | cycle_length=4, 158 | block_length=16) 159 | 160 | dataset = dataset.batch( 161 | self.batch_size, drop_remainder=True) # Batch Size 162 | 163 | dataset = dataset.map( 164 | self._parse_function, 165 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 166 | 167 | # dataset = dataset.cache() 168 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 169 | 170 | return dataset 171 | 172 | def get_test_function(self): 173 | 174 | filenames = tf.io.gfile.glob(self.train_tfrecords) 175 | dataset = tf.data.Dataset.from_tensor_slices(filenames).shuffle( 176 | buffer_size=16).repeat(-1) 177 | 178 | dataset = dataset.interleave( 179 | tf.data.TFRecordDataset, 180 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 181 | cycle_length=4, 182 | block_length=16) 183 | 184 | dataset = dataset.batch( 185 | self.batch_size, drop_remainder=True) # Batch Size 186 | 187 | dataset = dataset.map( 188 | self._parse_function, 189 | num_parallel_calls=tf.data.experimental.AUTOTUNE) 190 | 191 | # dataset = dataset.cache() 192 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 193 | 194 | return dataset 195 | -------------------------------------------------------------------------------- /cral/data_feeder/semantic_seg_data_feeder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import tensorflow as tf 7 | import tqdm 8 | from cral.data_feeder.utils import _bytes_feature, _int64_feature 9 | from PIL import Image 10 | 11 | # number of shard per split 12 | _NUM_SHARDS = 4 13 | 14 | # Queue length 15 | _PARALLEL_READS = 16 16 | debug = False 17 | 18 | 19 | def image_example(image_string, mask_string, image_shape, mask_shape): 20 | # image_shape = tf.image.decode_jpeg(image_string).shape 21 | # mask_shape = tf.image.decode_png(mask_string).shape 22 | 23 | feature = { 24 | 'image/height': _int64_feature(image_shape[0]), 25 | 'image/width': _int64_feature(image_shape[1]), 26 | 'image/depth': _int64_feature(image_shape[2]), 27 | 'image_raw': _bytes_feature(image_string), 28 | 'mask/height': _int64_feature(mask_shape[0]), 29 | 'mask/width': _int64_feature(mask_shape[1]), 30 | 'mask/depth': _int64_feature(1), 31 | 'mask_raw': _bytes_feature(mask_string) 32 | } 33 | 34 | return tf.train.Example(features=tf.train.Features(feature=feature)) 35 | 36 | 37 | def _create_tfrecords_from_dataset(image_paths, 38 | image_dir, 39 | mask_dir, 40 | out_dir, 41 | train_only=True): 42 | '''Args: 43 | image_paths : List of file-paths for the images 44 | labels : class-labels for images(vector of size len(image_paths)X1) 45 | out_path : Destination of TFRecords output file 46 | size : resize dimensions 47 | ''' 48 | 49 | assert os.path.exists(out_dir), 'directory doesnot exist :: {}'.format( 50 | out_dir) 51 | 52 | # image_paths = glob.glob(os.path.join(image_dir,'*.jpg')) 53 | num_images = len(image_paths) 54 | # print(num_images) 55 | unique_vals = [] 56 | 57 | images_per_shard = num_images // _NUM_SHARDS 58 | 59 | shard_meta = {} 60 | start_index = 0 61 | 62 | for idx in range(_NUM_SHARDS): 63 | prefix = 'train' if train_only else 'test' 64 | end_index = min(start_index + images_per_shard, num_images) 65 | shard_meta['%s-%05d-of-%05d.tfrecord' % 66 | (prefix, idx + 1, _NUM_SHARDS)] = (start_index, end_index) 67 | start_index = end_index + 1 68 | 69 | for key, (START, END) in shard_meta.items(): 70 | 71 | print('Writing :: {}'.format(key)) 72 | 73 | with tf.io.TFRecordWriter(os.path.join(out_dir, key)) as writer: 74 | 75 | for image_name in tqdm.tqdm(image_paths[START:END]): 76 | image_file = os.path.join(image_dir, image_name) 77 | 78 | with open(image_file, 'rb') as imf: 79 | image_string = imf.read() 80 | image_array = np.array(Image.open(image_file)) 81 | image_shape = image_array.shape 82 | if len(image_shape) != 3: 83 | print('ignoring {}, shape : {}'.format( 84 | image_file, image_shape)) 85 | continue 86 | 87 | assert image_shape[ 88 | 2] == 3, f'expected image to have 3 channels but got\ 89 | {image_shape[2]} instead' 90 | 91 | mask_file = image_file.replace(image_dir, mask_dir).replace( 92 | '.jpg', '.png') 93 | 94 | if os.path.isfile(mask_file) is False: # image has a mask 95 | # create an all black mask 96 | black_mask = Image.new( 97 | mode='L', 98 | size=(image_shape[1], image_shape[0]), 99 | color=0) 100 | # save it as temp.png 101 | black_mask.save('temp.png') 102 | mask_file = 'temp.png' 103 | 104 | with open(mask_file, 'rb') as mmf: 105 | mask_string = mmf.read() 106 | mask_array = np.array(Image.open(mask_file)) 107 | unique_pixels = np.unique(mask_array).tolist() 108 | unique_vals = list(set(unique_vals).union(unique_pixels)) 109 | mask_shape = mask_array.shape 110 | 111 | assert len(mask_shape 112 | ) == 2, f'expected mask to have 1 channel but got \ 113 | {mask_shape} instead' 114 | 115 | assert mask_shape[0] == image_shape[ 116 | 0], 'mask and image height mismatch' 117 | assert mask_shape[1] == image_shape[ 118 | 1], 'mask and image width mismatch' 119 | 120 | tf_example = image_example( 121 | image_string=image_string, 122 | mask_string=mask_string, 123 | image_shape=image_shape, 124 | mask_shape=mask_shape) 125 | 126 | writer.write(tf_example.SerializeToString()) 127 | 128 | print('Num labels in %s-set:: %d' % (prefix, len(unique_vals))) 129 | 130 | return num_images, unique_vals 131 | 132 | 133 | def create_tfrecords(meta_info, 134 | dataset_csv_path, 135 | out_path=tempfile.gettempdir()): 136 | 137 | dataset_df = pd.read_csv(dataset_csv_path) 138 | # with open(meta_json_path,'r') as json_file: 139 | # meta_info=json.loads(json_file.read()) 140 | 141 | train_dataset = dataset_df[dataset_df['train_only'] == # noqa: E712 142 | True]['image_name'].tolist() 143 | train_img_dir = meta_info['train_images_dir'] 144 | train_anno_dir = meta_info['train_anno_dir'] 145 | 146 | print('creating tfrecords for training set...') 147 | 148 | meta_info[ 149 | 'num_training_images'], num_classes = _create_tfrecords_from_dataset( 150 | image_paths=train_dataset, 151 | image_dir=train_img_dir, 152 | mask_dir=train_anno_dir, 153 | out_dir=out_path, 154 | train_only=True) 155 | 156 | test_dataset = dataset_df[dataset_df['train_only'] == False] # noqa: E712 157 | if len(test_dataset) > 0: 158 | test_dataset = test_dataset['image_name'].tolist() 159 | val_img_dir = meta_info['val_images_dir'] 160 | val_anno_dir = meta_info['val_anno_dir'] 161 | if val_img_dir is None: 162 | val_img_dir = train_img_dir 163 | print('creating tfrecords for test set...') 164 | meta_info[ 165 | 'num_test_images'], classes_in_testset = _create_tfrecords_from_dataset( # noqa: E501 166 | image_paths=test_dataset, 167 | image_dir=val_img_dir, 168 | mask_dir=val_anno_dir, 169 | out_dir=out_path, 170 | train_only=False) 171 | num_classes = list(set(num_classes).union(classes_in_testset)) 172 | else: 173 | meta_info['num_test_images'] = 0 174 | 175 | meta_info['tfrecord_path'] = out_path 176 | meta_info['num_classes'] = len(num_classes) 177 | 178 | return meta_info 179 | -------------------------------------------------------------------------------- /cral/data_versioning/classification_data_parse_v2.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import json 3 | import os 4 | from itertools import repeat 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import tqdm 9 | 10 | from .cral_hash import hashFile 11 | from .cral_util import find_images 12 | 13 | _RANDOM_SEED = 12 14 | 15 | 16 | def find_classes(img_path): 17 | classes = [] 18 | for folder in os.listdir(img_path): 19 | class_images_path = os.path.join(img_path, folder) 20 | if os.path.isdir(class_images_path): 21 | images = find_images(class_images_path) 22 | if (len(images) != 0): 23 | classes.append(folder) 24 | return classes 25 | 26 | 27 | def make_dict(train_images_dir, val_images_dir): 28 | classes = find_classes(train_images_dir) 29 | json_data = {} 30 | json_data['task_type'] = 'Classification' 31 | json_data['dataset_format'] = 'Classification Format' 32 | json_data['num_classes'] = len(classes) 33 | json_data['class_names'] = classes 34 | json_data['train_images_dir'] = train_images_dir 35 | json_data['val_images_dir'] = val_images_dir 36 | return json_data 37 | 38 | 39 | def get_datapoint_info_helper(func_args): 40 | return get_datapoint_info(func_args[0], func_args[1], func_args[2]) 41 | 42 | 43 | def get_datapoint_info(image_location, folder, train_only): 44 | image_name = os.path.basename(image_location) 45 | image_hs = hashFile(image_location) 46 | return image_name, folder, image_hs, train_only 47 | 48 | 49 | def get_dataset_info(file_dir, train_only=True, split=None): 50 | """Parses the data and makes a dictionary. 51 | 52 | Args: 53 | file_dir (str): path to folder with images 54 | train_only (bool, optional): True=Train ,False=Validation 55 | split (float, optional): float to divide training dataset into 56 | training and validation 57 | 58 | Returns: 59 | dict: dictionary with information filled 60 | """ 61 | master_dataset_info = { 62 | 'image_name': [], 63 | 'annotation_name': [], 64 | 'image_hash': [], 65 | 'train_only': [] 66 | } 67 | master_pd_dataset = pd.DataFrame.from_dict(master_dataset_info) 68 | np.random.seed(_RANDOM_SEED) 69 | num_classes = 0 70 | num_images = 0 71 | class_name = list() 72 | for folder in os.listdir(file_dir): 73 | class_images_path = os.path.join(file_dir, folder) 74 | if os.path.isdir(class_images_path): 75 | images = find_images(class_images_path) 76 | if len(images) > 0: 77 | num_classes += 1 78 | class_name.append(folder) 79 | num_images += len(images) 80 | 81 | if train_only: 82 | subset = 'training' 83 | else: 84 | subset = 'test' 85 | print(f'\nProcessing {subset} dataset') 86 | print( 87 | f'Found {num_images} images belonging to {num_classes} classes with labels {class_name}' # noqa: E501 88 | ) 89 | 90 | done = 0 91 | done_classes = 0 92 | for folder in os.listdir(file_dir): 93 | class_images_path = os.path.join(file_dir, folder) 94 | if os.path.isdir(class_images_path): 95 | images = find_images(class_images_path) 96 | if len(images) > 0: 97 | print( 98 | f"\nProcessing Class {done_classes+1}/{num_classes} '{class_name[done_classes]}'" # noqa: E501 99 | ) 100 | done_classes += 1 101 | dataset_info = { 102 | 'image_name': [], 103 | 'annotation_name': [], 104 | 'image_hash': [], 105 | 'train_only': [] 106 | } 107 | with concurrent.futures.ThreadPoolExecutor() as executer: 108 | results = list( 109 | tqdm.tqdm( 110 | executer.map(get_datapoint_info, images, 111 | repeat(folder), repeat(train_only)), 112 | total=len(images), 113 | ncols=100, 114 | initial=done)) 115 | for result in results: 116 | dataset_info['image_name'].append(result[0]) 117 | dataset_info['annotation_name'].append(result[1]) 118 | dataset_info['image_hash'].append(result[2]) 119 | dataset_info['train_only'].append(result[3]) 120 | pd_dataset = pd.DataFrame.from_dict(dataset_info) 121 | if split is not None and split != 0: 122 | pd_dataset = pd_dataset.loc[np.random.permutation( 123 | len(pd_dataset))].reset_index(drop=True) 124 | split_len = round(len(pd_dataset) * split) 125 | pd_dataset.loc[pd_dataset.index < split_len, 126 | ('train_only')] = False 127 | master_pd_dataset = pd.concat([master_pd_dataset, pd_dataset]) 128 | return master_pd_dataset 129 | 130 | 131 | def make_csv(csv_dir, train_images_dir, val_images_dir=None, split=None): 132 | """Parses the data and makes a csv file and returns its hash. 133 | 134 | Args: 135 | csv_dir (str): path to save the CSV file created 136 | train_images_dir (str): path to images 137 | val_images_dir (str, optional): path to validation images 138 | split (float, optional): float to divide training dataset into 139 | training and validation 140 | 141 | Returns: 142 | str: Hash of the csv file created 143 | """ 144 | assert os.path.isdir(csv_dir), f'{csv_dir} is not a directory' 145 | assert os.path.isdir( 146 | train_images_dir), f'{train_images_dir} is not a directory' 147 | 148 | if val_images_dir: 149 | assert os.path.isdir( 150 | val_images_dir), f'{val_images_dir} is not a directory' 151 | dataset_df = get_dataset_info(file_dir=train_images_dir) 152 | val_df = get_dataset_info(file_dir=val_images_dir, train_only=False) 153 | dataset_df = pd.concat([dataset_df, val_df]) 154 | 155 | elif split is not None: 156 | assert isinstance(split, float) or isinstance( 157 | split, int), f'expected to be float, but got {type(split)} instead' 158 | assert 0 <= split <= 1.0, f'expected a float between 0 and 1, but got {split} instead' # noqa: E501 159 | dataset_df = get_dataset_info( 160 | file_dir=train_images_dir, train_only=True, split=split) 161 | else: 162 | dataset_df = get_dataset_info( 163 | file_dir=train_images_dir, train_only=True) 164 | 165 | json_data = make_dict(train_images_dir, val_images_dir) 166 | json_save_path = os.path.join(csv_dir, 'dataset.json') 167 | with open(json_save_path, 'w') as json_file: 168 | json.dump(json_data, json_file) 169 | 170 | dataset_save_path = os.path.join(csv_dir, 'dataset.csv') 171 | dataset_df.to_csv(dataset_save_path, index=False) 172 | 173 | dataset_csv_hash = hashFile(dataset_save_path) 174 | 175 | return dataset_csv_hash, dataset_save_path, json_save_path 176 | -------------------------------------------------------------------------------- /cral/data_feeder/parallel_data_feeder.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import json 3 | import os 4 | import tempfile 5 | 6 | import cv2 7 | import pandas as pd 8 | import tensorflow as tf 9 | import tqdm 10 | # from cral.augmentations.engine import \ 11 | # Classification as Classification_augmentor 12 | from PIL import Image 13 | 14 | _NUM_SHARDS = 4 15 | _PNG_CONVERT_PATH = os.path.join(tempfile.gettempdir(), 'temp.jpg') 16 | 17 | 18 | def get_label_dict(label_list): 19 | label_list.sort() 20 | label_dict = dict() 21 | for index, label in enumerate(label_list): 22 | label_dict[label] = index 23 | 24 | return label_dict 25 | 26 | 27 | def _bytes_feature(value): 28 | """Returns a bytes_list from a string / byte.""" 29 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 30 | 31 | 32 | def _float_feature(value): 33 | """Returns a float_list from a float / double.""" 34 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 35 | 36 | 37 | def _int64_feature(value): 38 | """Returns an int64_list from a bool / enum / int / uint.""" 39 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 40 | 41 | 42 | def _bytes_list_feature(value): 43 | """Returns a bytes_list from a list of string / byte.""" 44 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 45 | 46 | 47 | def _float_list_feature(value): 48 | """Returns a float_list from a list of float / double.""" 49 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 50 | 51 | 52 | def _int64_list_feature(value): 53 | """Returns an int64_list from a list of bool / enum / int / uint.""" 54 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 55 | 56 | 57 | def statistics_example(data): 58 | tf_example = tf.train.Example( 59 | features=tf.train.Features( 60 | feature={ 61 | 'image/height': _float_feature(data['height']), 62 | 'image/width': _float_feature(data['width']), 63 | })) 64 | 65 | return tf_example 66 | 67 | 68 | def image_example(image_string, label, image_shape): 69 | feature = { 70 | 'image_raw': _bytes_feature(image_string), 71 | 'image/height': _int64_feature(image_shape[0]), 72 | 'image/width': _int64_feature(image_shape[1]), 73 | 'image/depth': _int64_feature(image_shape[2]), 74 | 'image_label': _int64_feature(label), 75 | } 76 | 77 | return tf.train.Example(features=tf.train.Features(feature=feature)) 78 | 79 | 80 | def create_tfrecords(meta_json, dataset_csv, out_path): 81 | 82 | assert os.path.isdir(out_path) 83 | 84 | tfrecord_path = os.path.join(out_path, 'tfrecords') 85 | os.makedirs(tfrecord_path, exist_ok=True) 86 | 87 | with open(meta_json, 'r') as f: 88 | meta_info = json.loads(f.read()) 89 | 90 | meta_info['num_training_images'] = None 91 | meta_info['num_test_images'] = None 92 | meta_info['tfrecord_path'] = out_path 93 | 94 | # dataset_dir = meta_info['dataset_path'] 95 | class_list = meta_info['class_names'] 96 | class_dict = get_label_dict(class_list) 97 | label_file_pointer = dict() 98 | 99 | def make_tf_records_parallel(dataset, tfrecord_paths, train=True): 100 | 101 | writers = [tf.io.TFRecordWriter(path) for path in tfrecord_paths] 102 | total_images = len(dataset) 103 | tasks = [] 104 | with concurrent.futures.ThreadPoolExecutor() as executer: 105 | for index, row in tqdm.tqdm( 106 | dataset.iterrows(), total=total_images, ncols=100): 107 | if train: 108 | label_str = row['annotation_name'] 109 | if label_str not in label_file_pointer: 110 | os.makedirs( 111 | os.path.join(out_path, 'statistics'), 112 | exist_ok=True) 113 | label_file_pointer[label_str] = tf.io.TFRecordWriter( 114 | os.path.join(out_path, 'statistics', label_str) + 115 | '.tfrecord') 116 | 117 | tasks.append( 118 | executer.submit(make_tf_record_row, row, 119 | writers[index % _NUM_SHARDS], train)) 120 | 121 | if len(tasks) > _NUM_SHARDS: 122 | concurrent.futures.wait(tasks) 123 | tasks = [] 124 | 125 | concurrent.futures.wait(tasks) 126 | tasks = [] 127 | for writer in writers: 128 | writer.close() 129 | 130 | if train: 131 | for label_stats_pointer_file in label_file_pointer.values(): 132 | label_stats_pointer_file.close() 133 | 134 | def make_tf_record_row(row, writer, train): 135 | 136 | label_str = row['annotation_name'] 137 | image_file = os.path.join(meta_info['dataset_path'], label_str, 138 | row['image_name']) 139 | 140 | assert image_file.endswith( 141 | ('.jpg', '.jpeg', '.png') 142 | ), 'required `.jpg or .jpeg or .png ` image got instead {}'.format( 143 | image_file) 144 | if image_file.endswith('.png'): 145 | im = Image.open(image_file) 146 | im.save(_PNG_CONVERT_PATH) 147 | image_file = _PNG_CONVERT_PATH 148 | 149 | with open(image_file, 'rb') as imgfile: 150 | image_string = imgfile.read() 151 | 152 | image_array = cv2.imread(image_file) 153 | image_shape = image_array.shape 154 | 155 | tf_example = image_example( 156 | image_string=image_string, 157 | label=class_dict[label_str], 158 | image_shape=image_shape) 159 | 160 | if train: 161 | stats_feature = { 162 | 'height': image_shape[0], 163 | 'width': image_shape[1], 164 | } 165 | stats_example = statistics_example(data=stats_feature) 166 | label_file_pointer[label_str].write( 167 | stats_example.SerializeToString()) 168 | 169 | writer.write(tf_example.SerializeToString()) 170 | 171 | dataset_df = pd.read_csv(dataset_csv) 172 | 173 | print('\nprocessing training set ...') 174 | train_set = dataset_df[dataset_df['train_only'] == True] # noqa: E712 175 | training_images_num = len(train_set) 176 | meta_info['num_training_images'] = training_images_num 177 | tfrecord_paths = [] 178 | for shard_id in range(_NUM_SHARDS): 179 | tfrecord_paths.append( 180 | os.path.join( 181 | out_path, '%s-%05d-of-%05d.tfrecord' % 182 | ('train', shard_id + 1, _NUM_SHARDS))) 183 | 184 | make_tf_records_parallel(train_set, tfrecord_paths, train=True) 185 | 186 | print('\nprocessing testing set ...') 187 | 188 | test_set = dataset_df[dataset_df['train_only'] == False] # noqa: E712 189 | test_images_num = len(test_set) 190 | meta_info['num_test_images'] = test_images_num 191 | tfrecord_paths = [] 192 | for shard_id in range(_NUM_SHARDS): 193 | tfrecord_paths.append( 194 | os.path.join( 195 | out_path, '%s-%05d-of-%05d.tfrecord' % 196 | ('test', shard_id + 1, _NUM_SHARDS))) 197 | 198 | make_tf_records_parallel(test_set, tfrecord_paths, train=False) 199 | 200 | return meta_info 201 | -------------------------------------------------------------------------------- /test/test_segmentation.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import tempfile 4 | import unittest 5 | import zipfile 6 | 7 | import cv2 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | 12 | class Test_SegmentationPipeline(unittest.TestCase): 13 | 14 | @classmethod 15 | def setUpClass(cls): 16 | 17 | zip_url = 'https://segmind-data.s3.ap-south-1.amazonaws.com/edge/data/segmentation/mini_ADE20K.zip' 18 | path_to_zip_file = tf.keras.utils.get_file( 19 | 'mini_ADE20K.zip', 20 | zip_url, 21 | cache_dir=tempfile.gettempdir(), 22 | cache_subdir='', 23 | extract=False) 24 | directory_to_extract_to = os.path.join(tempfile.gettempdir(), 25 | 'mini_ADE20K') 26 | with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref: 27 | zip_ref.extractall(directory_to_extract_to) 28 | 29 | cls.dataset = os.path.join(directory_to_extract_to, 'mini_ADE20K') 30 | 31 | def setup(self): 32 | self.dataset = cls.dataset 33 | 34 | def test_UNet(self): 35 | from cral.pipeline import SemanticSegPipe 36 | from cral.models.semantic_segmentation import UNetConfig 37 | 38 | pipe = SemanticSegPipe() 39 | 40 | pipe.add_data( 41 | train_images_dir=os.path.join(self.dataset, 'images'), 42 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 43 | annotation_format='rgb', 44 | split=0.2) 45 | 46 | pipe.lock_data() 47 | 48 | pipe.set_algo( 49 | feature_extractor='mobilenet', config=UNetConfig(224, 224)) 50 | 51 | pipe.train( 52 | num_epochs=2, 53 | snapshot_prefix='test_unet', 54 | snapshot_path='/tmp', 55 | snapshot_every_n=10, 56 | batch_size=1, 57 | steps_per_epoch=2) 58 | 59 | tf.keras.backend.clear_session() 60 | 61 | def test_fpnNet(self): 62 | from cral.pipeline import SemanticSegPipe 63 | from cral.models.semantic_segmentation import FpnNetConfig 64 | 65 | pipe = SemanticSegPipe() 66 | 67 | pipe.add_data( 68 | train_images_dir=os.path.join(self.dataset, 'images'), 69 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 70 | annotation_format='rgb', 71 | split=0.2) 72 | 73 | pipe.lock_data() 74 | 75 | pipe.set_algo( 76 | feature_extractor='mobilenet', config=FpnNetConfig(224, 224)) 77 | 78 | pipe.train( 79 | num_epochs=2, 80 | snapshot_prefix='test_fpnet', 81 | snapshot_path='/tmp', 82 | snapshot_every_n=10, 83 | batch_size=1, 84 | steps_per_epoch=2) 85 | 86 | tf.keras.backend.clear_session() 87 | 88 | def test_pspNet(self): 89 | from cral.pipeline import SemanticSegPipe 90 | from cral.models.semantic_segmentation import PspNetConfig 91 | 92 | pipe = SemanticSegPipe() 93 | 94 | pipe.add_data( 95 | train_images_dir=os.path.join(self.dataset, 'images'), 96 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 97 | annotation_format='rgb', 98 | split=0.2) 99 | 100 | pipe.lock_data() 101 | 102 | pipe.set_algo(feature_extractor='mobilenet', config=PspNetConfig()) 103 | 104 | pipe.train( 105 | num_epochs=2, 106 | snapshot_prefix='test_pspnet', 107 | snapshot_path='/tmp', 108 | snapshot_every_n=10, 109 | batch_size=1, 110 | steps_per_epoch=2) 111 | 112 | tf.keras.backend.clear_session() 113 | 114 | def test_segNet(self): 115 | from cral.pipeline import SemanticSegPipe 116 | from cral.models.semantic_segmentation import SegNetConfig 117 | 118 | pipe = SemanticSegPipe() 119 | 120 | pipe.add_data( 121 | train_images_dir=os.path.join(self.dataset, 'images'), 122 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 123 | annotation_format='rgb', 124 | split=0.2) 125 | 126 | pipe.lock_data() 127 | 128 | pipe.set_algo( 129 | feature_extractor='mobilenet', config=SegNetConfig(224, 224)) 130 | 131 | pipe.train( 132 | num_epochs=2, 133 | snapshot_prefix='test_segnet', 134 | snapshot_path='/tmp', 135 | snapshot_every_n=10, 136 | batch_size=1, 137 | steps_per_epoch=2) 138 | 139 | tf.keras.backend.clear_session() 140 | 141 | def test_UNetPlusPlus(self): 142 | from cral.pipeline import SemanticSegPipe 143 | from cral.models.semantic_segmentation import UnetPlusPlusConfig 144 | 145 | pipe = SemanticSegPipe() 146 | 147 | pipe.add_data( 148 | train_images_dir=os.path.join(self.dataset, 'images'), 149 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 150 | annotation_format='rgb', 151 | split=0.2) 152 | 153 | pipe.lock_data() 154 | 155 | pipe.set_algo( 156 | feature_extractor='mobilenet', config=UnetPlusPlusConfig(224, 224)) 157 | 158 | pipe.train( 159 | num_epochs=2, 160 | snapshot_prefix='test_unetplusplus', 161 | snapshot_path='/tmp', 162 | snapshot_every_n=10, 163 | batch_size=1, 164 | steps_per_epoch=2) 165 | 166 | tf.keras.backend.clear_session() 167 | 168 | def test_deeplabv3(self): 169 | from cral.pipeline import SemanticSegPipe 170 | from cral.models.semantic_segmentation import Deeplabv3Config 171 | 172 | pipe = SemanticSegPipe() 173 | 174 | pipe.add_data( 175 | train_images_dir=os.path.join(self.dataset, 'images'), 176 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 177 | annotation_format='rgb', 178 | split=0.2) 179 | 180 | pipe.lock_data() 181 | 182 | pipe.set_algo( 183 | feature_extractor='resnet50', config=Deeplabv3Config(224, 224)) 184 | 185 | pipe.train( 186 | num_epochs=2, 187 | snapshot_prefix='test_deeplabv3', 188 | snapshot_path='/tmp', 189 | snapshot_every_n=10, 190 | batch_size=1, 191 | steps_per_epoch=2) 192 | 193 | tf.keras.backend.clear_session() 194 | 195 | def test_linkNet(self): 196 | from cral.pipeline import SemanticSegPipe 197 | from cral.models.semantic_segmentation import LinkNetConfig 198 | 199 | pipe = SemanticSegPipe() 200 | 201 | pipe.add_data( 202 | train_images_dir=os.path.join(self.dataset, 'images'), 203 | train_anno_dir=os.path.join(self.dataset, 'annotations'), 204 | annotation_format='rgb', 205 | split=0.2) 206 | 207 | pipe.lock_data() 208 | 209 | pipe.set_algo( 210 | feature_extractor='mobilenet', config=LinkNetConfig(224, 224)) 211 | 212 | pipe.train( 213 | num_epochs=2, 214 | snapshot_prefix='test_linknet', 215 | snapshot_path='/tmp', 216 | snapshot_every_n=10, 217 | batch_size=1, 218 | steps_per_epoch=2) 219 | 220 | tf.keras.backend.clear_session() 221 | 222 | 223 | if __name__ == '__main__': 224 | unittest.main() 225 | -------------------------------------------------------------------------------- /cral/models/object_detection/SSD/tfrecord_parser.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from tensorflow import keras 5 | 6 | from .helpers import SSD300Config 7 | from .ssd_input_encoder import SSDInputEncoder 8 | 9 | # from cral.data_feeder.object_detection_parser import DetectionBase 10 | 11 | 12 | def pad_resize(image, height, width, resize_width, resize_height): 13 | """Summary. 14 | 15 | Args: 16 | image (TYPE): Description 17 | height (TYPE): Description 18 | width (TYPE): Description 19 | scale (TYPE): Description 20 | 21 | Returns: 22 | numpy nd.array: Description 23 | """ 24 | padded_image = np.zeros( 25 | shape=(height.astype(int), width.astype(int), 3), dtype=image.dtype) 26 | h, w, _ = image.shape 27 | padded_image[:h, :w, :] = image 28 | resized_image = cv2.resize(padded_image, 29 | (resize_width, resize_height)).astype( 30 | keras.backend.floatx()) 31 | return resized_image 32 | 33 | 34 | @tf.function 35 | def decode_pad_resize(image_string, pad_height, pad_width, resize_width, 36 | resize_height): 37 | """Summary. 38 | 39 | Args: 40 | image_string (TYPE): Description 41 | pad_height (TYPE): Description 42 | pad_width (TYPE): Description 43 | esize_width, resize_height (TYPE): Description 44 | 45 | Returns: 46 | tf.tensor: Description 47 | """ 48 | image = tf.image.decode_jpeg(image_string) 49 | image = tf.numpy_function( 50 | pad_resize, 51 | [image, pad_height, pad_width, resize_width, resize_height], 52 | Tout=keras.backend.floatx()) 53 | return image 54 | 55 | 56 | class SSD300Generator(object): 57 | """docstring for Tfrpaser.""" 58 | 59 | def __init__(self, config, num_classes, predictor_sizes, batch_size, 60 | preprocess_input): 61 | 62 | assert isinstance( 63 | config, SSD300Config), 'please provide a `SSD300Config()` object' 64 | self.config = config 65 | self.num_classes = num_classes 66 | self.batch_size = batch_size 67 | self.max_box_per_image = config.max_boxes_per_image 68 | self.boxes_list = [] 69 | self.variances = config.variances 70 | 71 | self.img_height = config.height 72 | self.img_width = config.width 73 | 74 | self.ssd_encoder_layer = SSDInputEncoder( 75 | img_height=self.config.height, 76 | img_width=self.config.width, 77 | n_classes=num_classes, 78 | predictor_sizes=predictor_sizes, 79 | scales=self.config.scales, 80 | aspect_ratios_per_layer=self.config.aspect_ratios, 81 | two_boxes_for_ar1=self.config.two_boxes_for_ar1, 82 | strides=self.config.strides, 83 | offsets=self.config.offsets, 84 | clip_boxes=self.config.clip_boxes, 85 | variances=self.config.variances, 86 | matching_type='multi', 87 | pos_iou_threshold=self.config.pos_iou_threshold, 88 | neg_iou_limit=self.config.neg_iou_limit, 89 | normalize_coords=self.config.normalize_coords) 90 | 91 | self.preprocess_input = preprocess_input 92 | 93 | def _parse_fn(self, serialized): 94 | """Summary. 95 | 96 | Args: 97 | serialized (TYPE): Description 98 | 99 | Returns: 100 | TYPE: Description 101 | """ 102 | features = { 103 | 'image/height': tf.io.FixedLenFeature([], tf.int64), 104 | 'image/width': tf.io.FixedLenFeature([], tf.int64), 105 | 'image/encoded': tf.io.FixedLenFeature([], tf.string), 106 | 'image/object/bbox/xmin': 107 | tf.io.VarLenFeature(keras.backend.floatx()), 108 | 'image/object/bbox/xmax': 109 | tf.io.VarLenFeature(keras.backend.floatx()), 110 | 'image/object/bbox/ymin': 111 | tf.io.VarLenFeature(keras.backend.floatx()), 112 | 'image/object/bbox/ymax': 113 | tf.io.VarLenFeature(keras.backend.floatx()), 114 | 'image/f_id': tf.io.FixedLenFeature([], tf.int64), 115 | 'image/object/class/label': tf.io.VarLenFeature(tf.int64) 116 | } 117 | 118 | parsed_example = tf.io.parse_example( 119 | serialized=serialized, features=features) 120 | 121 | max_height = tf.cast( 122 | tf.keras.backend.max(parsed_example['image/height']), tf.int32) 123 | max_width = tf.cast( 124 | tf.keras.backend.max(parsed_example['image/width']), tf.int32) 125 | 126 | height_scale = self.img_width / max_height 127 | width_scale = self.img_height / max_width 128 | 129 | height_scale = keras.backend.cast_to_floatx(height_scale) 130 | width_scale = keras.backend.cast_to_floatx(width_scale) 131 | 132 | image_batch = tf.map_fn( 133 | lambda x: decode_pad_resize(x, max_height, max_width, self.config. 134 | width, self.config.width), 135 | parsed_example['image/encoded'], 136 | dtype=keras.backend.floatx()) 137 | 138 | # **[1] pad with -1 to batch properly 139 | xmin_batch = tf.expand_dims( 140 | tf.sparse.to_dense( 141 | parsed_example['image/object/bbox/xmin'] * width_scale, 142 | default_value=-1), 143 | axis=-1) 144 | xmax_batch = tf.expand_dims( 145 | tf.sparse.to_dense( 146 | parsed_example['image/object/bbox/xmax'] * width_scale, 147 | default_value=-1), 148 | axis=-1) 149 | ymin_batch = tf.expand_dims( 150 | tf.sparse.to_dense( 151 | parsed_example['image/object/bbox/ymin'] * height_scale, 152 | default_value=-1), 153 | axis=-1) 154 | ymax_batch = tf.expand_dims( 155 | tf.sparse.to_dense( 156 | parsed_example['image/object/bbox/ymax'] * height_scale, 157 | default_value=-1), 158 | axis=-1) 159 | 160 | label_batch = tf.expand_dims( 161 | tf.sparse.to_dense( 162 | parsed_example['image/object/class/label'], default_value=-1), 163 | axis=-1) 164 | label_batch = keras.backend.cast_to_floatx(label_batch) 165 | 166 | annotation_batch = tf.concat( 167 | [label_batch, xmin_batch, ymin_batch, xmax_batch, ymax_batch], 168 | axis=-1) 169 | 170 | y_true = tf.numpy_function( 171 | self.ssd_encoder_layer.generate_ytrue, [annotation_batch], 172 | Tout=keras.backend.floatx()) 173 | 174 | return self.preprocess_input(image_batch), y_true 175 | # return image_batch, y_true 176 | 177 | def parse_tfrecords(self, filename): 178 | 179 | dataset = tf.data.Dataset.list_files(filename).shuffle( 180 | buffer_size=8).repeat(-1) 181 | dataset = dataset.interleave( 182 | tf.data.TFRecordDataset, 183 | num_parallel_calls=tf.data.experimental.AUTOTUNE, 184 | deterministic=False) 185 | 186 | dataset = dataset.batch(self.batch_size, drop_remainder=True) 187 | 188 | dataset = dataset.map( 189 | self._parse_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) 190 | 191 | dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) 192 | 193 | return dataset 194 | -------------------------------------------------------------------------------- /cral/data_feeder/classification_parallel_data_feeder.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import json 3 | import os 4 | import tempfile 5 | from io import BytesIO 6 | from itertools import repeat 7 | from math import ceil, floor 8 | 9 | import pandas as pd 10 | import tensorflow as tf 11 | import tqdm 12 | from cral.data_feeder.utils import _bytes_feature, _int64_feature 13 | from PIL import Image 14 | 15 | _NUM_SHARDS = 4 16 | _PARALLEL_READS = 16 17 | _PNG_CONVERT_PATH = os.path.join(tempfile.gettempdir(), 'temp') 18 | 19 | 20 | def get_label_dict(label_list): 21 | label_list.sort() 22 | label_dict = dict() 23 | for index, label in enumerate(label_list): 24 | label_dict[label] = index 25 | 26 | return label_dict 27 | 28 | 29 | def image_example(image_string, label, image_shape): 30 | feature = { 31 | 'image_raw': _bytes_feature(image_string), 32 | 'image/height': _int64_feature(image_shape[0]), 33 | 'image/width': _int64_feature(image_shape[1]), 34 | 'image_label': _int64_feature(label), 35 | } 36 | 37 | return tf.train.Example(features=tf.train.Features(feature=feature)) 38 | 39 | 40 | def create_tfrecords(meta_json, dataset_csv, out_path): 41 | assert os.path.isdir(out_path) 42 | 43 | with open(meta_json, 'r') as f: 44 | meta_info = json.loads(f.read()) 45 | 46 | meta_info['num_training_images'] = None 47 | meta_info['num_test_images'] = None 48 | meta_info['tfrecord_path'] = out_path 49 | 50 | # dataset_dir = meta_info['dataset_path'] 51 | 52 | class_list = meta_info['class_names'] 53 | class_dict = get_label_dict(class_list) 54 | label_file_pointer = dict() 55 | 56 | def make_tf_records_parallel(dataset, tfrecord_paths, img_dir, train=True): 57 | 58 | writers = [tf.io.TFRecordWriter(path) for path in tfrecord_paths] 59 | total_images = len(dataset) 60 | num_seen_images = 0 61 | num_images_per_shard = ceil(total_images / _NUM_SHARDS) 62 | rows = [] 63 | indexs = [] 64 | for index, row in tqdm.tqdm( 65 | dataset.iterrows(), total=total_images, ncols=100): 66 | if train: 67 | label_str = row['annotation_name'] 68 | if label_str not in label_file_pointer: 69 | os.makedirs( 70 | os.path.join(out_path, 'statistics'), exist_ok=True) 71 | label_file_pointer[label_str] = tf.io.TFRecordWriter( 72 | os.path.join(out_path, 'statistics', label_str) + 73 | '.tfrecord') 74 | rows.append(row) 75 | indexs.append(num_seen_images) 76 | num_seen_images += 1 77 | if len(rows) == _PARALLEL_READS: 78 | with concurrent.futures.ThreadPoolExecutor() as executer: 79 | results = executer.map(make_tf_record_row, rows, 80 | repeat(train, len(rows)), 81 | repeat(img_dir, len(rows))) 82 | for i, result in enumerate(results): 83 | if result is not None: 84 | writer = writers[floor(indexs[i] / 85 | num_images_per_shard)] 86 | writer.write(result[0]) 87 | if train: 88 | label_file_pointer[result[1]].write(result[2]) 89 | rows = [] 90 | indexs = [] 91 | 92 | if len(rows) > 0: 93 | with concurrent.futures.ThreadPoolExecutor() as executer: 94 | results = executer.map(make_tf_record_row, rows, 95 | repeat(train, len(rows)), 96 | repeat(img_dir, len(rows))) 97 | for i, result in enumerate(results): 98 | if result is not None: 99 | writer = writers[floor(indexs[i] / num_images_per_shard)] 100 | writer.write(result[0]) 101 | if train: 102 | label_file_pointer[result[1]].write(result[2]) 103 | rows = [] 104 | indexs = [] 105 | 106 | for writer in writers: 107 | writer.close() 108 | 109 | if train: 110 | for label_stats_pointer_file in label_file_pointer.values(): 111 | label_stats_pointer_file.close() 112 | 113 | def make_tf_record_row(row, train, img_dir): 114 | label_str = row['annotation_name'] 115 | image_file = os.path.join(img_dir, label_str, row['image_name']) 116 | image_shape = None 117 | image_string = None 118 | assert image_file.endswith( 119 | ('.jpg', '.jpeg', '.png') 120 | ), 'required `.jpg or .jpeg or .png ` image got instead {}'.format( 121 | image_file) 122 | if image_file.endswith('.png'): 123 | im = Image.open(image_file + str()) 124 | image_file = BytesIO() 125 | im.save(image_file, format='jpeg') 126 | 127 | with Image.open(image_file) as img: 128 | if img.mode != 'RGB': 129 | img = img.convert('RGB') 130 | byts = BytesIO() 131 | img.save(byts, format='jpeg') 132 | image_string = byts.getvalue() 133 | else: 134 | if type(image_file) is str: 135 | with open(image_file, 'rb') as imgfile: 136 | image_string = imgfile.read() 137 | else: 138 | image_string = image_file.getvalue() 139 | image_shape = img.size 140 | 141 | if image_shape is None or image_string is None: 142 | print(f"Not able to load {row['image_name']}") 143 | return None 144 | 145 | tf_example = image_example( 146 | image_string=image_string, 147 | label=class_dict[label_str], 148 | image_shape=image_shape) 149 | 150 | return [tf_example.SerializeToString()] 151 | 152 | dataset_df = pd.read_csv(dataset_csv) 153 | print('\nprocessing training set ...') 154 | train_set = dataset_df[dataset_df['train_only'] == True] # noqa: E712 155 | training_images_num = len(train_set) 156 | meta_info['num_training_images'] = training_images_num 157 | image_dir = meta_info['train_images_dir'] 158 | tfrecord_paths = [] 159 | for shard_id in range(_NUM_SHARDS): 160 | tfrecord_paths.append( 161 | os.path.join( 162 | out_path, '%s-%05d-of-%05d.tfrecord' % 163 | ('train', shard_id + 1, _NUM_SHARDS))) 164 | 165 | make_tf_records_parallel(train_set, tfrecord_paths, image_dir, train=True) 166 | 167 | test_set = dataset_df[dataset_df['train_only'] == False] # noqa: E712 168 | test_images_num = len(test_set) 169 | meta_info['num_test_images'] = test_images_num 170 | tfrecord_paths = [] 171 | image_dir = meta_info['val_images_dir'] 172 | if image_dir is None: 173 | image_dir = meta_info['train_images_dir'] 174 | if test_images_num > 0: 175 | print('\nprocessing testing set ...') 176 | for shard_id in range(_NUM_SHARDS): 177 | tfrecord_paths.append( 178 | os.path.join( 179 | out_path, '%s-%05d-of-%05d.tfrecord' % 180 | ('test', shard_id + 1, _NUM_SHARDS))) 181 | 182 | make_tf_records_parallel( 183 | test_set, tfrecord_paths, image_dir, train=False) 184 | 185 | return meta_info 186 | --------------------------------------------------------------------------------