├── .gitattributes ├── .gitignore ├── CNTK_faster-rcnn ├── Detection │ ├── Dockerfile │ ├── FastRCNN │ │ ├── FastRCNN_config.py │ │ ├── FastRCNN_eval.py │ │ ├── FastRCNN_train.py │ │ ├── README.md │ │ ├── install_data_and_model.py │ │ └── run_fast_rcnn.py │ ├── FasterRCNN │ │ ├── FasterRCNN_config.py │ │ ├── FasterRCNN_eval.py │ │ ├── FasterRCNN_train.py │ │ ├── README.md │ │ ├── run_faster_rcnn.py │ │ └── run_sweep_parameters.py │ ├── __init__.py │ ├── app.py │ ├── predict.py │ ├── run.sh │ ├── server.py │ └── utils │ │ ├── Readme.md │ │ ├── annotations │ │ ├── C1_DrawBboxesOnImages.py │ │ ├── C2_AssignLabelsToBboxes.py │ │ ├── LabelMeConverter.py │ │ └── annotations_helper.py │ │ ├── caffe_layers │ │ ├── anchor_target_layer.py │ │ ├── bbox_transform.py │ │ ├── default_config.py │ │ ├── proposal_layer.py │ │ └── proposal_target_layer.py │ │ ├── config_helpers.py │ │ ├── configs │ │ ├── AlexNet_config.py │ │ ├── CM_config.py │ │ └── VGG16_config.py │ │ ├── cython_modules │ │ ├── cpu_nms.cp35-win_amd64.pyd │ │ ├── cpu_nms.cpython-34m.so │ │ ├── cpu_nms.cpython-35m-x86_64-linux-gnu.so │ │ ├── cpu_nms.cpython-35m.so.bak │ │ ├── cpu_nms.cpython-36m.so │ │ ├── cython_bbox.cp35-win_amd64.pyd │ │ ├── cython_bbox.cpython-34m.so │ │ ├── cython_bbox.cpython-35m-x86_64-linux-gnu.so │ │ ├── cython_bbox.cpython-35m.so.bak │ │ ├── cython_bbox.cpython-36m.so │ │ └── gpu_nms.cp35-win_amd64.pyd │ │ ├── map_helpers.py │ │ ├── misc │ │ ├── azure_utils.py │ │ └── zip_helper.py │ │ ├── nms_wrapper.py │ │ ├── od_mb_source.py │ │ ├── od_reader.py │ │ ├── od_utils.py │ │ ├── plot_helpers.py │ │ ├── proposal_helpers.py │ │ └── rpn │ │ ├── anchor_target_layer.py │ │ ├── bbox_transform.py │ │ ├── cntk_smoothL1_loss.py │ │ ├── generate_anchors.py │ │ ├── proposal_layer.py │ │ ├── proposal_target_layer.py │ │ └── rpn_helpers.py └── aml_config │ ├── conda_dependencies.yml │ ├── docker.compute │ ├── docker.runconfig │ ├── jupyter_notebook_config.py │ ├── local.compute │ ├── local.runconfig │ ├── sampledsvm.compute │ ├── sampledsvm.runconfig │ └── spark_dependencies.yml ├── README.md ├── Tensorflow-Object-Detection ├── .gitignore ├── aml_config │ ├── .gitignore │ ├── conda_dependencies.yml │ ├── conda_dependencies_ws.yml │ ├── docker.compute │ ├── docker.runconfig │ ├── local.compute │ ├── local.runconfig │ └── spark_dependencies.yml ├── create_pascal_tf_record.py ├── driver.py ├── faster_rcnn_resnet101_voc07.config ├── kw_data │ ├── faster_rcnn_resnet101_aug1_adam.config │ ├── faster_rcnn_resnet101_aug2_adam.config │ ├── faster_rcnn_resnet101_aug3_adam.config │ ├── faster_rcnn_resnet101_no_aug.config │ ├── faster_rcnn_resnet101_no_aug_adam.config │ └── kw_label_map.pbtxt ├── misc │ ├── azure_utils.py │ ├── results_logger.py │ └── zip_helper.py ├── object_detection_tutorial.ipynb ├── pascal_label_map.pbtxt └── tf_train_eval.py ├── assets └── README.md └── sample-visualization ├── .eslintrc.js ├── .gitignore ├── Dockerfile ├── README.md ├── package.json ├── public ├── favicon.ico ├── index.html └── manifest.json ├── src ├── App.js ├── Container.js ├── Drawer.js ├── Image.css ├── Image.js ├── index.css ├── index.js ├── lib.js └── registerServiceWorker.js └── yarn.lock /.gitattributes: -------------------------------------------------------------------------------- 1 | *.zip filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Detection/FastRCNN/Output/ 2 | Detection/FasterRCNN/Output/ 3 | 4 | # Python 5 | *.pyc 6 | __pycache__/ 7 | .idea 8 | 9 | .azuremlhistory_git 10 | .azuremlhistory_git 11 | .ipynb_checkpoints 12 | .azureml/project.json 13 | azureml-logs -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM hsienting/dl_az 2 | 3 | COPY ./ /app 4 | ADD run.sh /app/ 5 | RUN chmod +x /app/run.sh 6 | 7 | ENV STORAGE_ACCOUNT_NAME 8 | ENV STORAGE_ACCOUNT_KEY 9 | ENV AZUREML_NATIVE_SHARE_DIRECTORY /cmcntk 10 | ENV TESTIMAGESCONTAINER data 11 | 12 | EXPOSE 80 13 | 14 | ENTRYPOINT ["/app/run.sh"] 15 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FastRCNN/FastRCNN_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import os.path as osp 9 | import numpy as np 10 | from easydict import EasyDict as edict 11 | # `pip install easydict` if you don't have it 12 | 13 | __C = edict() 14 | cfg = __C 15 | __C.CNTK = edict() 16 | __C.TRAIN = edict() 17 | 18 | # If set to 'True' training will be skipped if a trained model exists already 19 | __C.CNTK.MAKE_MODE = True 20 | # set to 'True' to use deterministic algorithms 21 | __C.CNTK.FORCE_DETERMINISTIC = False 22 | # set to 'True' to run only a single epoch 23 | __C.CNTK.FAST_MODE = False 24 | # Debug parameters 25 | __C.CNTK.DEBUG_OUTPUT = False 26 | __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf" 27 | 28 | # Learning parameters 29 | __C.CNTK.L2_REG_WEIGHT = 0.0005 30 | __C.CNTK.MOMENTUM_PER_MB = 0.9 31 | __C.CNTK.MAX_EPOCHS = 20 32 | __C.CNTK.LR_FACTOR = 10.0 # 10.0 is used for the Grocery example data. Start with 1.0 for other data sets. 33 | __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] 34 | # The learning rate multiplier for all bias weights 35 | __C.CNTK.BIAS_LR_MULT = 2.0 36 | 37 | # Number of regions of interest [ROIs] proposals 38 | __C.NUM_ROI_PROPOSALS = 200 # use 2000 or more for good results 39 | # the minimum IoU (overlap) of a proposal to qualify for training regression targets 40 | __C.BBOX_THRESH = 0.5 41 | 42 | # Normalize the targets using "precomputed" (or made up) means and stdevs 43 | __C.BBOX_NORMALIZE_TARGETS = True 44 | __C.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 45 | __C.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 46 | 47 | # Maximum number of ground truth annotations per image 48 | __C.INPUT_ROIS_PER_IMAGE = 50 49 | __C.IMAGE_WIDTH = 850 50 | __C.IMAGE_HEIGHT = 850 51 | 52 | # Use horizontally-flipped images during training? 53 | __C.TRAIN.USE_FLIPPED = True 54 | # If set to 'True' conv layers weights from the base model will be trained, too 55 | __C.TRAIN_CONV_LAYERS = True 56 | # Sigma parameter for smooth L1 loss in the RPN and the detector (DET) 57 | __C.SIGMA_DET_L1 = 1.0 58 | 59 | # NMS threshold used to discard overlapping predicted bounding boxes 60 | __C.RESULTS_NMS_THRESHOLD = 0.5 61 | # all bounding boxes with a score lower than this threshold will be considered background 62 | __C.RESULTS_NMS_CONF_THRESHOLD = 0.0 63 | 64 | # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes 65 | __C.VISUALIZE_RESULTS = False 66 | __C.DRAW_NEGATIVE_ROIS = False 67 | __C.DRAW_UNREGRESSED_ROIS = False 68 | # only for plotting results: boxes with a score lower than this threshold will be considered background 69 | __C.RESULTS_BGR_PLOT_THRESHOLD = 0.1 70 | 71 | 72 | # If set to True the following two parameters need to point to the corresponding files that contain the proposals: 73 | # __C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE 74 | # __C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE 75 | __C.USE_PRECOMPUTED_PROPOSALS = False 76 | 77 | # roi proposal parameters for selective search, grid and filtering 78 | # The first three parameters are for dlib's selective search. For details see 79 | # http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations 80 | # 81 | # The basic segmentation is performed kvals.size() times. The k parameter is set (from, to, step_size) 82 | __C.roi_ss_kvals = (10, 500, 5) 83 | # When doing the basic segmentations prior to any box merging, all 84 | # rectangles that have an area < min_size are discarded. Therefore, all outputs and 85 | # subsequent merged rectangles are built out of rectangles that contain at 86 | # least min_size pixels. Note that setting min_size to a smaller value than 87 | # you might otherwise be interested in using can be useful since it allows a 88 | # larger number of possible merged boxes to be created 89 | __C.roi_ss_min_size = 9 90 | # There are max_merging_iterations rounds of neighboring blob merging. 91 | # Therefore, this parameter has some effect on the number of output rectangles 92 | # you get, with larger values of the parameter giving more output rectangles. 93 | # Hint: set __C.CNTK.DEBUG_OUTPUT=True to see the number of ROIs from selective search 94 | __C.roi_ss_mm_iterations = 30 95 | # 96 | # image size used for ROI generation 97 | __C.roi_ss_img_size = 200 98 | # minimum relative width/height of an ROI 99 | __C.roi_min_side_rel = 0.01 100 | # maximum relative width/height of an ROI 101 | __C.roi_max_side_rel = 1.0 102 | # minimum relative area of an ROI 103 | __C.roi_min_area_rel = 0.0001 104 | # maximum relative area of an ROI 105 | __C.roi_max_area_rel = 0.9 106 | # maximum aspect ratio of an ROI vertically and horizontally 107 | __C.roi_max_aspect_ratio = 4.0 108 | # aspect ratios of ROIs for uniform grid ROIs 109 | __C.roi_grid_aspect_ratios = [1.0, 2.0, 0.5] 110 | 111 | # For reproducibility 112 | __C.RND_SEED = 3 113 | 114 | # Use GPU implementation of non-maximum suppression 115 | __C.USE_GPU_NMS = False 116 | 117 | # Default GPU device id 118 | __C.GPU_ID = 0 119 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FastRCNN/README.md: -------------------------------------------------------------------------------- 1 | # CNTK Examples: Image/Detection/Fast R-CNN 2 | 3 | ## Overview 4 | 5 | This folder contains an end-to-end solution for using Fast R-CNN to perform object detection. 6 | The original research paper for Fast R-CNN can be found at [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083). 7 | Base models that are supported by the current configuration are AlexNet and VGG16. 8 | Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. 9 | Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_fast_rcnn.py`. 10 | 11 | ## Running the example 12 | 13 | ### Setup 14 | 15 | To run Fast R-CNN you need a CNTK Python environment. Install the following additional packages: 16 | 17 | ``` 18 | pip install opencv-python easydict pyyaml dlib 19 | ``` 20 | 21 | The code uses prebuild Cython modules for parts of the region proposal network. These binaries are contained in the folder (`Examples/Image/Detection/utils/cython_modules`) for Python 3.5 for Windows and Python 3.4, 3.5, and 3.6 for Linux. 22 | If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo). 23 | 24 | If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) (GraphViz executable has to be in the system’s PATH) to be able to plot the CNTK graphs. 25 | 26 | ### Getting the data and AlexNet model 27 | 28 | We use a toy dataset of images captured from a refrigerator to demonstrate Fast R-CNN. Both the dataset and the pre-trained AlexNet model can be downloaded by running the following Python command from the Examples/Image/Detection/FastRCNN folder: 29 | 30 | `python install_data_and_model.py` 31 | 32 | After running the script, the toy dataset will be installed under the `Image/DataSets/Grocery` folder. The AlexNet model will be downloaded to the `Image/PretrainedModels` folder. 33 | We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files expect that by default. 34 | 35 | ### Running Fast R-CNN on the example data 36 | 37 | To train and evaluate Fast R-CNN run 38 | 39 | `python run_fast_rcnn.py` 40 | 41 | ### Running Fast R-CNN on Pascal VOC data 42 | 43 | To download the Pascal data and create the annotation file for Pascal in CNTK format run the following scripts: 44 | 45 | ``` 46 | python Examples/Image/DataSets/Pascal/install_pascalvoc.py 47 | python Examples/Image/DataSets/Pascal/mappings/create_mappings.py 48 | ``` 49 | 50 | Change the `dataset_cfg` in the `get_configuration()` method of `run_fast_rcnn.py` to 51 | 52 | ``` 53 | from utils.configs.Pascal_config import cfg as dataset_cfg 54 | ``` 55 | 56 | Now you're set to train on the Pascal VOC 2007 data using `python run_fast_rcnn.py`. Beware that training might take a while. 57 | 58 | ### Running Fast R-CNN on your own data 59 | 60 | Preparing your own data and annotating it with ground truth bounding boxes is described [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Object-Detection-using-Fast-R-CNN#train-on-your-own-data). 61 | After storing your images in the described folder structure and annotating them please run 62 | 63 | `python Examples/Image/Detection/utils/annotations/annotations_helper.py` 64 | 65 | after changing the folder in that script to your data folder. Finally, create a `MyDataSet_config.py` in the `utils\configs` folder following the existing examples: 66 | 67 | ``` 68 | __C.CNTK.DATASET == "YourDataSet": 69 | __C.CNTK.MAP_FILE_PATH = "../../DataSets/YourDataSet" 70 | __C.CNTK.CLASS_MAP_FILE = "class_map.txt" 71 | __C.CNTK.TRAIN_MAP_FILE = "train_img_file.txt" 72 | __C.CNTK.TEST_MAP_FILE = "test_img_file.txt" 73 | __C.CNTK.TRAIN_ROI_FILE = "train_roi_file.txt" 74 | __C.CNTK.TEST_ROI_FILE = "test_roi_file.txt" 75 | __C.CNTK.NUM_TRAIN_IMAGES = 500 76 | __C.CNTK.NUM_TEST_IMAGES = 200 77 | __C.CNTK.PROPOSAL_LAYER_SCALES = [8, 16, 32] 78 | ``` 79 | 80 | Change the `dataset_cfg` in the `get_configuration()` method of `run_fast_rcnn.py` to 81 | 82 | ``` 83 | from utils.configs.MyDataSet_config import cfg as dataset_cfg 84 | ``` 85 | 86 | and run `python run_fast_rcnn.py` to train and evaluate Fast R-CNN on your data. 87 | 88 | ## Technical details 89 | 90 | ### Parameters 91 | 92 | All options and parameters are in `FastRCNN_config.py` in the `FastRCNN` folder and all of them are explained there. These include 93 | 94 | ``` 95 | # learning parameters 96 | __C.CNTK.MAX_EPOCHS = 10 97 | __C.CNTK.LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] 98 | 99 | # Number of regions of interest [ROIs] proposals 100 | __C.NUM_ROI_PROPOSALS = 1000 101 | # minimum relative width/height of an ROI 102 | __C.roi_min_side_rel = 0.01 103 | # maximum relative width/height of an ROI 104 | __C.roi_max_side_rel = 1.0 105 | ``` 106 | 107 | ### Fast R-CNN CNTK code 108 | 109 | Most of the code is in `FastRCNN_train.py` and `FastRCNN_eval.py` (and `Examples/Image/Detection/utils/*.py` for helper methods). Please see those files for details. 110 | 111 | ### Algorithm 112 | 113 | All details regarding the Fast R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083). 114 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FastRCNN/install_data_and_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from __future__ import print_function 8 | import zipfile 9 | import os, sys 10 | 11 | def create_grocery_mappings(grocery_folder): 12 | sys.path.append(os.path.join(grocery_folder, "..", "..", "Detection", "utils", "annotations")) 13 | from annotations_helper import create_class_dict, create_map_files 14 | abs_path = os.path.dirname(os.path.abspath(__file__)) 15 | data_set_path = os.path.join(abs_path, "..", "..", "DataSets", "Grocery") 16 | class_dict = create_class_dict(data_set_path) 17 | create_map_files(data_set_path, class_dict, training_set=True) 18 | create_map_files(data_set_path, class_dict, training_set=False) 19 | 20 | if __name__ == '__main__': 21 | base_folder = os.path.dirname(os.path.abspath(__file__)) 22 | 23 | sys.path.append(os.path.join(base_folder, "..", "..", "DataSets", "Grocery")) 24 | from install_grocery import download_grocery_data 25 | download_grocery_data() 26 | 27 | sys.path.append(os.path.join(base_folder, "..", "..", "..", "..", "PretrainedModels")) 28 | from download_model import download_model_by_name 29 | download_model_by_name("AlexNet_ImageNet_Caffe") 30 | 31 | print("Creating mapping files for Grocery data set..") 32 | create_grocery_mappings(base_folder) 33 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FastRCNN/run_fast_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import numpy as np 9 | from FastRCNN_train import prepare, train_fast_rcnn 10 | from FastRCNN_eval import compute_test_set_aps, FastRCNN_Evaluator 11 | from utils.config_helpers import merge_configs 12 | from utils.plot_helpers import plot_test_set_results 13 | 14 | def get_configuration(): 15 | # load configs for detector, base network and data set 16 | from FastRCNN_config import cfg as detector_cfg 17 | # for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg 18 | # for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg 19 | from utils.configs.AlexNet_config import cfg as network_cfg 20 | # for Pascal VOC 2007 data set use: from utils.configs.Pascal_config import cfg as dataset_cfg 21 | # for the Grocery data set use: from utils.configs.Grocery_config import cfg as dataset_cfg 22 | from utils.configs.Grocery_config import cfg as dataset_cfg 23 | 24 | return merge_configs([detector_cfg, network_cfg, dataset_cfg]) 25 | 26 | # trains and evaluates a Fast R-CNN model. 27 | if __name__ == '__main__': 28 | cfg = get_configuration() 29 | prepare(cfg, True) 30 | 31 | # train and test 32 | trained_model = train_fast_rcnn(cfg) 33 | eval_results = compute_test_set_aps(trained_model, cfg) 34 | 35 | # write AP results to output 36 | for class_name in eval_results: print('AP for {:>15} = {:.4f}'.format(class_name, eval_results[class_name])) 37 | print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values())))) 38 | 39 | # Plot results on test set images 40 | if cfg.VISUALIZE_RESULTS: 41 | num_eval = min(cfg["DATA"].NUM_TEST_IMAGES, 100) 42 | results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET) 43 | evaluator = FastRCNN_Evaluator(trained_model, cfg) 44 | plot_test_set_results(evaluator, num_eval, results_folder, cfg) 45 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FasterRCNN/FasterRCNN_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import os.path as osp 9 | import numpy as np 10 | from easydict import EasyDict as edict 11 | # `pip install easydict` if you don't have it 12 | 13 | __C = edict() 14 | cfg = __C 15 | 16 | __C.AZURE = edict() 17 | __C.AZURE.ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME'] 18 | __C.AZURE.ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY'] 19 | __C.AZURE.PRETRAINED_MODELS = "pretrainedmodels" 20 | __C.AZURE.DATA="data" 21 | # 22 | # CNTK parameters 23 | # 24 | 25 | __C.CNTK = edict() 26 | 27 | # If set to 'True' training will be skipped if a trained model exists already 28 | __C.CNTK.MAKE_MODE = False 29 | # E2E or 4-stage training 30 | __C.CNTK.TRAIN_E2E = True 31 | # set to 'True' to use deterministic algorithms 32 | __C.CNTK.FORCE_DETERMINISTIC = False 33 | # set to 'True' to run only a single epoch 34 | __C.CNTK.FAST_MODE = False 35 | # Debug parameters 36 | __C.CNTK.DEBUG_OUTPUT = False 37 | __C.CNTK.GRAPH_TYPE = "png" # "png" or "pdf" 38 | # Set to True if you want to store an eval model with native UDFs (e.g. for inference using C++ or C#) 39 | __C.STORE_EVAL_MODEL_WITH_NATIVE_UDF = False 40 | 41 | # Learning parameters 42 | __C.CNTK.L2_REG_WEIGHT = 0.0005 43 | __C.CNTK.MOMENTUM_PER_MB = 0.9 44 | # The learning rate multiplier for all bias weights 45 | __C.CNTK.BIAS_LR_MULT = 2.0 46 | 47 | # E2E learning parameters 48 | __C.CNTK.E2E_MAX_EPOCHS = 20 49 | __C.CNTK.E2E_LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] 50 | 51 | # 4-stage learning parameters (alternating training scheme) 52 | __C.CNTK.RPN_EPOCHS = 16 53 | __C.CNTK.RPN_LR_PER_SAMPLE = [0.001] * 12 + [0.0001] * 4 54 | __C.CNTK.FRCN_EPOCHS = 8 55 | __C.CNTK.FRCN_LR_PER_SAMPLE = [0.001] * 6 + [0.0001] * 2 56 | 57 | # Maximum number of ground truth annotations per image 58 | __C.INPUT_ROIS_PER_IMAGE = 50 59 | __C.IMAGE_WIDTH = 850 60 | __C.IMAGE_HEIGHT = 850 61 | 62 | # Sigma parameter for smooth L1 loss in the RPN and the detector (DET) 63 | __C.SIGMA_RPN_L1 = 3.0 64 | __C.SIGMA_DET_L1 = 1.0 65 | 66 | # NMS threshold used to discard overlapping predicted bounding boxes 67 | __C.RESULTS_NMS_THRESHOLD = 0.3 68 | # all bounding boxes with a score lower than this threshold will be considered background 69 | __C.RESULTS_NMS_CONF_THRESHOLD = 0.0 70 | 71 | # Enable plotting of results generally / also plot background boxes / also plot unregressed boxes 72 | __C.VISUALIZE_RESULTS = False 73 | __C.DRAW_NEGATIVE_ROIS = False 74 | __C.DRAW_UNREGRESSED_ROIS = False 75 | # only for plotting results: boxes with a score lower than this threshold will be considered background 76 | __C.RESULTS_BGR_PLOT_THRESHOLD = 0.1 77 | 78 | # 79 | # Training parameters 80 | # 81 | 82 | __C.TRAIN = edict() 83 | 84 | # Use horizontally-flipped images during training? 85 | __C.TRAIN.USE_FLIPPED = True 86 | # If set to 'True' conv layers weights from the base model will be trained, too 87 | __C.TRAIN_CONV_LAYERS = True 88 | 89 | # RPN parameters 90 | # IOU >= thresh: positive example 91 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 92 | # IOU < thresh: negative example 93 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 94 | # If an anchor statisfied by positive and negative conditions set to negative 95 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False 96 | # Max number of foreground examples 97 | __C.TRAIN.RPN_FG_FRACTION = 0.5 98 | # Total number of examples 99 | __C.TRAIN.RPN_BATCHSIZE = 256 100 | # NMS threshold used on RPN proposals 101 | __C.TRAIN.RPN_NMS_THRESH = 0.7 102 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 103 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 104 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 105 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 106 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 107 | __C.TRAIN.RPN_MIN_SIZE = 16 108 | 109 | # Detector parameters 110 | # Minibatch size (number of regions of interest [ROIs]) -- was: __C.TRAIN.BATCH_SIZE = 128 111 | __C.NUM_ROI_PROPOSALS = 128 112 | # Fraction of minibatch that is labeled foreground (i.e. class > 0) 113 | __C.TRAIN.FG_FRACTION = 0.25 114 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 115 | __C.TRAIN.FG_THRESH = 0.5 116 | # Overlap threshold for a ROI to be considered background (class = 0 if 117 | # overlap in [LO, HI)) 118 | __C.TRAIN.BG_THRESH_HI = 0.5 119 | __C.TRAIN.BG_THRESH_LO = 0.0 120 | 121 | # Normalize the targets using "precomputed" (or made up) means and stdevs 122 | __C.BBOX_NORMALIZE_TARGETS = True 123 | __C.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 124 | __C.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 125 | 126 | 127 | # 128 | # Testing parameters 129 | # 130 | 131 | __C.TEST = edict() 132 | 133 | ## NMS threshold used on RPN proposals 134 | __C.TEST.RPN_NMS_THRESH = 0.7 135 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals 136 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000 137 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals 138 | __C.TEST.RPN_POST_NMS_TOP_N = 300 139 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 140 | __C.TEST.RPN_MIN_SIZE = 16 141 | 142 | 143 | # 144 | # MISC 145 | # 146 | 147 | # For reproducibility 148 | __C.RND_SEED = 3 149 | 150 | # Use GPU implementation of non-maximum suppression 151 | __C.USE_GPU_NMS = False 152 | 153 | # Default GPU device id 154 | __C.GPU_ID = 0 155 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FasterRCNN/FasterRCNN_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import numpy as np 9 | import cntk 10 | from cntk import input_variable, Axis 11 | from utils.map_helpers import evaluate_detections 12 | from utils.plot_helpers import load_resize_and_pad 13 | from utils.rpn.bbox_transform import regress_rois 14 | from utils.od_mb_source import ObjectDetectionMinibatchSource 15 | 16 | class FasterRCNN_Evaluator: 17 | def __init__(self, eval_model, cfg): 18 | # load model once in constructor and push images through the model in 'process_image()' 19 | self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) 20 | image_input = input_variable(shape=self._img_shape, 21 | dynamic_axes=[Axis.default_batch_axis()], 22 | name=cfg["MODEL"].FEATURE_NODE_NAME) 23 | dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') 24 | self._eval_model = eval_model(image_input, dims_input) 25 | 26 | def process_image(self, img_path): 27 | out_cls_pred, out_rpn_rois, out_bbox_regr, dims = self.process_image_detailed(img_path) 28 | labels = out_cls_pred.argmax(axis=1) 29 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) 30 | 31 | return regressed_rois, out_cls_pred 32 | 33 | def process_image_detailed(self, img_path): 34 | print (img_path) 35 | _, cntk_img_input, dims = load_resize_and_pad(img_path, self._img_shape[2], self._img_shape[1]) 36 | 37 | cntk_dims_input = np.array(dims, dtype=np.float32) 38 | cntk_dims_input.shape = (1,) + cntk_dims_input.shape 39 | output = self._eval_model.eval({self._eval_model.arguments[0]: [cntk_img_input], 40 | self._eval_model.arguments[1]: cntk_dims_input}) 41 | 42 | out_dict = dict([(k.name, k) for k in output]) 43 | out_cls_pred = output[out_dict['cls_pred']][0] 44 | out_rpn_rois = output[out_dict['rpn_rois']][0] 45 | out_bbox_regr = output[out_dict['bbox_regr']][0] 46 | 47 | return out_cls_pred, out_rpn_rois, out_bbox_regr, dims 48 | 49 | def compute_test_set_aps(eval_model, cfg): 50 | num_test_images = cfg["DATA"].NUM_TEST_IMAGES 51 | classes = cfg["DATA"].CLASSES 52 | image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), 53 | dynamic_axes=[Axis.default_batch_axis()], 54 | name=cfg["MODEL"].FEATURE_NODE_NAME) 55 | roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) 56 | dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) 57 | frcn_eval = eval_model(image_input, dims_input) 58 | 59 | # Create the minibatch source 60 | minibatch_source = ObjectDetectionMinibatchSource( 61 | cfg["DATA"].TEST_MAP_FILE, 62 | cfg["DATA"].TEST_ROI_FILE, 63 | max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, 64 | pad_width=cfg.IMAGE_WIDTH, 65 | pad_height=cfg.IMAGE_HEIGHT, 66 | pad_value=cfg["MODEL"].IMG_PAD_COLOR, 67 | randomize=False, use_flipping=False, 68 | max_images=cfg["DATA"].NUM_TEST_IMAGES, 69 | num_classes=cfg["DATA"].NUM_CLASSES, 70 | proposal_provider=None) 71 | 72 | # define mapping from reader streams to network inputs 73 | input_map = { 74 | minibatch_source.image_si: image_input, 75 | minibatch_source.roi_si: roi_input, 76 | minibatch_source.dims_si: dims_input 77 | } 78 | 79 | # all detections are collected into: 80 | # all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score) 81 | all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)] 82 | 83 | # evaluate test images and write netwrok output to file 84 | print("Evaluating Faster R-CNN model for %s images." % num_test_images) 85 | all_gt_infos = {key: [] for key in classes} 86 | for img_i in range(0, num_test_images): 87 | mb_data = minibatch_source.next_minibatch(1, input_map=input_map) 88 | 89 | gt_row = mb_data[roi_input].asarray() 90 | gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5)) 91 | all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] 92 | 93 | for cls_index, cls_name in enumerate(classes): 94 | if cls_index == 0: continue 95 | cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] 96 | all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), 97 | 'difficult': [False] * len(cls_gt_boxes), 98 | 'det': [False] * len(cls_gt_boxes)}) 99 | 100 | output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]}) 101 | out_dict = dict([(k.name, k) for k in output]) 102 | out_cls_pred = output[out_dict['cls_pred']][0] 103 | out_rpn_rois = output[out_dict['rpn_rois']][0] 104 | out_bbox_regr = output[out_dict['bbox_regr']][0] 105 | 106 | labels = out_cls_pred.argmax(axis=1) 107 | scores = out_cls_pred.max(axis=1) 108 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) 109 | 110 | labels.shape = labels.shape + (1,) 111 | scores.shape = scores.shape + (1,) 112 | coords_score_label = np.hstack((regressed_rois, scores, labels)) 113 | 114 | # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 115 | for cls_j in range(1, cfg["DATA"].NUM_CLASSES): 116 | coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] 117 | all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) 118 | 119 | if (img_i+1) % 100 == 0: 120 | print("Processed {} samples".format(img_i+1)) 121 | 122 | # calculate mAP 123 | aps = evaluate_detections(all_boxes, all_gt_infos, classes, 124 | use_gpu_nms = cfg.USE_GPU_NMS, 125 | device_id = cfg.GPU_ID, 126 | nms_threshold=cfg.RESULTS_NMS_THRESHOLD, 127 | conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD) 128 | 129 | return aps 130 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FasterRCNN/README.md: -------------------------------------------------------------------------------- 1 | # CNTK Examples: Image/Detection/Faster R-CNN 2 | 3 | ## Overview 4 | 5 | This folder contains an end-to-end solution for using Faster R-CNN to perform object detection. 6 | The original research paper for Faster R-CNN can be found at [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497). 7 | Base models that are supported by the current configuration are AlexNet and VGG16. 8 | Two image sets that are preconfigured are Pascal VOC 2007 and Grocery. 9 | Other base models or image sets can be used by adding a configuration file similar to the examples in `utils/configs` and importing it in `run_faster_rcnn.py`. 10 | 11 | ## Running the example 12 | 13 | ### Setup 14 | 15 | To run Faster R-CNN you need a CNTK Python environment. Install the following additional packages: 16 | 17 | ``` 18 | pip install opencv-python easydict pyyaml 19 | ``` 20 | 21 | The code uses prebuild Cython modules for parts of the region proposal network. 22 | These binaries are contained in the folder (`Examples/Image/Detection/utils/cython_modules`) for Python 3.5 for Windows and Python 3.4, 3.5, and 3.6 for Linux. 23 | If you require other versions please follow the instructions at [https://github.com/rbgirshick/py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#installation-sufficient-for-the-demo). 24 | 25 | If you want to use the debug output you need to run `pip install pydot_ng` ([website](https://pypi.python.org/pypi/pydot-ng)) and install [graphviz](http://graphviz.org/) to be able to plot the CNTK graphs (the GraphViz executable has to be in the system’s PATH). 26 | 27 | ### Getting the data and AlexNet model 28 | 29 | We use a toy dataset of images captured from a refrigerator to demonstrate Faster R-CNN (the same one is used in the Fast R-CNN example). Both the dataset and the pre-trained AlexNet model can be downloaded by running the following Python command from the Examples/Image/Detection/FastRCNN folder: 30 | 31 | `python install_data_and_model.py` 32 | 33 | After running the script, the toy dataset will be installed under the `Image/DataSets/Grocery` folder. The AlexNet model will be downloaded to the `Image/PretrainedModels` folder. 34 | We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files expect that by default. 35 | 36 | ### Running Faster R-CNN on the example data 37 | 38 | To train and evaluate Faster R-CNN run 39 | 40 | `python run_faster_rcnn.py` 41 | 42 | The results for end-to-end training on Grocery using AlexNet as the base model should look similar to these: 43 | 44 | ``` 45 | AP for eggBox = 1.0000 46 | AP for tomato = 1.0000 47 | AP for orangeJuice = 1.0000 48 | AP for ketchup = 0.6667 49 | AP for mustard = 1.0000 50 | AP for water = 0.5000 51 | AP for champagne = 1.0000 52 | AP for joghurt = 1.0000 53 | AP for pepper = 1.0000 54 | AP for avocado = 1.0000 55 | AP for onion = 1.0000 56 | AP for tabasco = 1.0000 57 | AP for milk = 1.0000 58 | AP for orange = 1.0000 59 | AP for gerkin = 1.0000 60 | AP for butter = 1.0000 61 | Mean AP = 0.9479 62 | ``` 63 | 64 | ### Running Faster R-CNN on Pascal VOC data 65 | 66 | To download the Pascal data and create the annotation file for Pascal in CNTK format run the following scripts: 67 | 68 | ``` 69 | python Examples/Image/DataSets/Pascal/install_pascalvoc.py 70 | python Examples/Image/DataSets/Pascal/mappings/create_mappings.py 71 | ``` 72 | 73 | Change the `dataset_cfg` in the `get_configuration()` method of `run_faster_rcnn.py` to 74 | 75 | ``` 76 | from utils.configs.Pascal_config import cfg as dataset_cfg 77 | ``` 78 | 79 | Now you're set to train on the Pascal VOC 2007 data using `python run_faster_rcnn.py`. Beware that training might take a while. 80 | 81 | ### Running Faster R-CNN on your own data 82 | 83 | Preparing your own data and annotating it with ground truth bounding boxes is described [here](https://docs.microsoft.com/en-us/cognitive-toolkit/Object-Detection-using-Fast-R-CNN#train-on-your-own-data). 84 | After storing your images in the described folder structure and annotating them, please run 85 | 86 | `python Examples/Image/Detection/utils/annotations/annotations_helper.py` 87 | 88 | after changing the folder in that script to your data folder. Finally, create a `MyDataSet_config.py` in the `utils\configs` folder following the existing examples: 89 | 90 | ``` 91 | __C.CNTK.DATASET == "YourDataSet": 92 | __C.CNTK.MAP_FILE_PATH = "../../DataSets/YourDataSet" 93 | __C.CNTK.CLASS_MAP_FILE = "class_map.txt" 94 | __C.CNTK.TRAIN_MAP_FILE = "train_img_file.txt" 95 | __C.CNTK.TEST_MAP_FILE = "test_img_file.txt" 96 | __C.CNTK.TRAIN_ROI_FILE = "train_roi_file.txt" 97 | __C.CNTK.TEST_ROI_FILE = "test_roi_file.txt" 98 | __C.CNTK.NUM_TRAIN_IMAGES = 500 99 | __C.CNTK.NUM_TEST_IMAGES = 200 100 | __C.CNTK.PROPOSAL_LAYER_SCALES = [8, 16, 32] 101 | ``` 102 | 103 | Change the `dataset_cfg` in the `get_configuration()` method of `run_faster_rcnn.py` to 104 | 105 | ``` 106 | from utils.configs.MyDataSet_config import cfg as dataset_cfg 107 | ``` 108 | 109 | and run `python run_faster_rcnn.py` to train and evaluate Faster R-CNN on your data. 110 | 111 | ## Technical details 112 | 113 | ### Parameters 114 | 115 | All options and parameters are in `FasterRCNN_config.py` in the `FasterRCNN` folder and all of them are explained there. These include 116 | 117 | ``` 118 | # E2E or 4-stage training 119 | __C.CNTK.TRAIN_E2E = True 120 | # If set to 'True' conv layers weights from the base model will be trained, too 121 | __C.TRAIN_CONV_LAYERS = True 122 | 123 | # E2E learning parameters 124 | __C.CNTK.E2E_MAX_EPOCHS = 20 125 | __C.CNTK.E2E_LR_PER_SAMPLE = [0.001] * 10 + [0.0001] * 10 + [0.00001] 126 | 127 | # NMS threshold used to discard overlapping predicted bounding boxes 128 | __C.RESULTS_NMS_THRESHOLD = 0.5 129 | ``` 130 | 131 | ### Faster R-CNN CNTK code 132 | 133 | Most of the code is in `FasterRCNN_train.py` and `FasterRCNN_eval.py` (and `Examples/Image/Detection/utils/rpn/rpn_helpers.py` for the region proposal network). Please see those files for details. 134 | 135 | ### Algorithm 136 | 137 | All details regarding the Faster R-CNN algorithm can be found in the original research paper: [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497). 138 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FasterRCNN/run_faster_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import numpy as np 9 | import cntk 10 | from FasterRCNN_train import prepare, train_faster_rcnn, store_eval_model_with_native_udf 11 | from FasterRCNN_eval import compute_test_set_aps, FasterRCNN_Evaluator 12 | from utils.config_helpers import merge_configs 13 | from utils.plot_helpers import plot_test_set_results 14 | from azureml.logging import get_azureml_logger 15 | 16 | def get_configuration(basemodel='AlexNet'): 17 | # load configs for detector, base network and data set 18 | from FasterRCNN_config import cfg as detector_cfg 19 | # for VGG16 base model use: from utils.configs.VGG16_config import cfg as network_cfg 20 | # for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg 21 | if(basemodel == 'AlexNet'): 22 | from utils.configs.AlexNet_config import cfg as network_cfg 23 | elif (basemodel == 'VGG16'): 24 | from utils.configs.VGG16_config import cfg as network_cfg 25 | # for Pascal VOC 2007 data set use: from utils.configs.Pascal_config import cfg as dataset_cfg 26 | # for the Grocery data set use: from utils.configs.Grocery_config import cfg as dataset_cfg 27 | from utils.configs.CM_config import cfg as dataset_cfg 28 | 29 | return merge_configs([detector_cfg, network_cfg, dataset_cfg]) 30 | 31 | # trains and evaluates a Fast R-CNN model. 32 | if __name__ == '__main__': 33 | basemodel = '' 34 | projectfolder = os.getcwd() 35 | if len(sys.argv) > 1: 36 | basemodel = sys.argv[1] 37 | else: 38 | basemodel = 'AlexNet' 39 | cfg = get_configuration(basemodel) 40 | prepare(cfg, False) 41 | 42 | # train and test 43 | trained_model = train_faster_rcnn(cfg) 44 | eval_results = compute_test_set_aps(trained_model, cfg) 45 | 46 | # write AP results to output 47 | for class_name in eval_results: print('AP for {:>15} = {:.4f}'.format(class_name, eval_results[class_name])) 48 | print('Mean AP = {:.4f}'.format(np.nanmean(list(eval_results.values())))) 49 | os.chdir(projectfolder) 50 | 51 | run_logger = get_azureml_logger() 52 | run_logger.log("mAP", np.nanmean(list(eval_results.values()))) 53 | 54 | # Plot results on test set images 55 | if cfg.VISUALIZE_RESULTS: 56 | num_eval = min(cfg["DATA"].NUM_TEST_IMAGES, 100) 57 | results_folder = os.path.join(cfg.OUTPUT_PATH, cfg["DATA"].DATASET) 58 | evaluator = FasterRCNN_Evaluator(trained_model, cfg) 59 | plot_test_set_results(evaluator, num_eval, results_folder, cfg) 60 | 61 | if cfg.STORE_EVAL_MODEL_WITH_NATIVE_UDF: 62 | store_eval_model_with_native_udf(trained_model, cfg) 63 | 64 | 65 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/FasterRCNN/run_sweep_parameters.py: -------------------------------------------------------------------------------- 1 | # run run_faster_rcnn.py with trying different models 2 | # run this with just "python ./Detection/FasterRCNN/run_sweep_parameters.py your_context_name" 3 | 4 | import os, sys 5 | 6 | # set context 7 | context_name = '' 8 | if len(sys.argv) > 1: 9 | context_name = sys.argv[1] 10 | 11 | base_models = ['AlexNet', 'VGG16'] 12 | 13 | for base_model in base_models: 14 | os.system('az ml experiment submit -c {} ./Detection/FasterRCNN/run_faster_rcnn.py {}'.format(context_name, base_model)) 15 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/__init__.py -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/app.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from sanic import Sanic 3 | from sanic.response import json, text 4 | from sanic.config import Config 5 | from server import Server 6 | import server 7 | 8 | def main(): 9 | app = Sanic(__name__) 10 | Config.KEEP_ALIVE = False 11 | 12 | server = Server() 13 | server.set_model() 14 | 15 | @app.route('/') 16 | async def test(request): 17 | return text(server.server_running()) 18 | 19 | @app.route('/predict', methods=["POST",]) 20 | def post_json(request): 21 | return json(server.predict(request)) 22 | 23 | app.run(host= '0.0.0.0', port=80) 24 | print ('exiting...') 25 | sys.exit(0) 26 | 27 | if __name__ == '__main__': 28 | main() -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/predict.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os, sys 8 | import numpy as np 9 | import argparse 10 | from utils.od_utils import train_object_detector, evaluate_single_image, filter_results 11 | from utils.config_helpers import merge_configs 12 | from utils.misc.azure_utils import load_file_from_blob 13 | 14 | available_detectors = ['FastRCNN', 'FasterRCNN'] 15 | 16 | def get_detector_name(args): 17 | detector_name = None 18 | default_detector = 'FasterRCNN' 19 | if len(args) != 2: 20 | print("Please provide a detector name as the single argument. Usage:") 21 | print(" python predict.py ") 22 | print("Available detectors: {}".format(available_detectors)) 23 | else: 24 | detector_name = args[1] 25 | if not any(detector_name == x for x in available_detectors): 26 | print("Unknown detector: {}.".format(detector_name)) 27 | print("Available detectors: {}".format(available_detectors)) 28 | detector_name = None 29 | 30 | if detector_name is None: 31 | print("Using default detector: {}".format(default_detector)) 32 | return default_detector 33 | else: 34 | return detector_name 35 | 36 | def get_configuration(detector_name): 37 | # load configs for detector, base network and data set 38 | if detector_name == "FastRCNN": 39 | from FastRCNN.FastRCNN_config import cfg as detector_cfg 40 | elif detector_name == "FasterRCNN": 41 | from FasterRCNN.FasterRCNN_config import cfg as detector_cfg 42 | else: 43 | print('Unknown detector: {}'.format(detector_name)) 44 | 45 | # for VGG16 base model use: 46 | from utils.configs.VGG16_config import cfg as network_cfg 47 | # for AlexNet base model use: from utils.configs.AlexNet_config import cfg as network_cfg 48 | #from utils.configs.AlexNet_config import cfg as network_cfg 49 | # for Pascal VOC 2007 data set use: from utils.configs.Pascal_config import cfg as dataset_cfg 50 | from utils.configs.CM_config import cfg as dataset_cfg 51 | 52 | return merge_configs([detector_cfg, network_cfg, dataset_cfg, {'DETECTOR': detector_name}]) 53 | 54 | def get_model(): 55 | cfg = get_configuration('FasterRCNN')#os.environ["DETECTOR_NAME"]) 56 | # train and test 57 | eval_model = train_object_detector(cfg) 58 | return eval_model 59 | 60 | def get_result(filename, eval_model): 61 | names = filename.split('/') 62 | name = names[len(names)-1] 63 | print (name) 64 | data_path = os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'] + '/output' 65 | if os.path.isdir(data_path) == False: 66 | os.makedirs(data_path) 67 | img_path = '' 68 | if load_file_from_blob(os.environ['STORAGE_ACCOUNT_NAME'], \ 69 | os.environ['TESTIMAGESCONTAINER'], filename, data_path + '/' + name) is True: 70 | print (name) 71 | img_path=data_path + '/' + name 72 | print (img_path) 73 | #print('detector_name: {}'.format(os.environ["DETECTOR_NAME"])) 74 | #print('img_path: {}'.format(img_path)) 75 | cfg = get_configuration('FasterRCNN')#os.environ["DETECTOR_NAME"]) 76 | 77 | # detect objects in single image 78 | regressed_rois, cls_probs = evaluate_single_image(eval_model, img_path, cfg) 79 | bboxes, labels, scores = filter_results(regressed_rois, cls_probs, cfg) 80 | 81 | # write detection results to output 82 | fg_boxes = np.where(labels > 0) 83 | print("#bboxes: before nms: {}, after nms: {}, foreground: {}".format(len(regressed_rois), len(bboxes), len(fg_boxes[0]))) 84 | result = [] 85 | for i in fg_boxes[0]: 86 | #print("{:<12} (label: {:<2}), score: {:.3f}, box: {}".format( 87 | #cfg["DATA"].CLASSES[labels[i]], labels[i], scores[i], [int(v) for v in bboxes[i]])) 88 | result.append({'label':cfg["DATA"].CLASSES[labels[i]], 'score':'%.3f'%(scores[i]), 'box':[int(v) for v in bboxes[i]]}) 89 | 90 | return result -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source /root/anaconda3/bin/activate root 3 | pip install easydict 4 | pip install azure-ml-api-sdk==0.1.0a9 5 | pip install sanic 6 | python3 /app/app.py 7 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | from predict import get_result, get_model 3 | 4 | class Server: 5 | model = None 6 | 7 | def set_model(self): 8 | self.model = get_model() 9 | 10 | def server_running(self): 11 | return 'Server is running...' 12 | 13 | def predict(self, request): 14 | incoming = request.json 15 | filename = incoming['filename'] 16 | print (filename) 17 | prediction = get_result(filename, self.model) 18 | print (prediction) 19 | return prediction -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/Readme.md: -------------------------------------------------------------------------------- 1 | ## Detection utils 2 | 3 | This folder contains Python modules that are utilities for object detection networks. 4 | 5 | ### Cython modules 6 | 7 | To use the rpn component you need precompiled cython modules for nms (at least cpu_nms.cpXX-win_amd64.pyd for Windows or cpu_nms.cpython-XXm.so for Linux) and bbox (cython_bbox.cpXX-win_amd64.pyd for Windows or cython_bbox.cpython-XXm.so for Linux). 8 | To compile the cython modules for windows see (https://github.com/MrGF/py-faster-rcnn-windows): 9 | ``` 10 | git clone https://github.com/MrGF/py-faster-rcnn-windows 11 | cd $FRCN_ROOT/lib 12 | python setup.py build_ext --inplace 13 | ``` 14 | For Linux see (https://github.com/rbgirshick/py-faster-rcnn): 15 | ``` 16 | git clone https://github.com/rbgirshick/py-faster-rcnn 17 | cd $FRCN_ROOT/lib 18 | python setup.py build_ext --inplace 19 | ``` 20 | Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder. 21 | 22 | ### `rpn` module overview 23 | 24 | The rpn module contains helper methods and required layers to generate region proposal networks for object detection. 25 | 26 | ##### `rpn_helpers` 27 | 28 | Contains helper methods to create a region proposal network (rpn) and a proposal target layer for training the rpn. 29 | 30 | ##### `generate_anchors.py` 31 | 32 | Generates a regular grid of multi-scale, multi-aspect anchor boxes. 33 | 34 | ##### `proposal_layer.py` 35 | 36 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals. 37 | 38 | ##### `anchor_target_layer.py` 39 | 40 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore). 41 | Bbox regression targets are specified when the classification label is > 0. 42 | 43 | ##### `proposal_target_layer.py` 44 | 45 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K) 46 | and bbox regression targets in that case that the label is > 0. 47 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/annotations/C1_DrawBboxesOnImages.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from __future__ import print_function 8 | from builtins import chr 9 | import os, sys, importlib, shutil 10 | from cntk_helpers import * 11 | 12 | 13 | #################################### 14 | # Parameters 15 | #################################### 16 | imgDir = "C:/Users/chazhang/Desktop/newImgs/" 17 | 18 | # no need to change these params 19 | drawingImgSize = 1000.0 20 | 21 | 22 | #################################### 23 | # Functions 24 | #################################### 25 | def event_cv2_drawRectangles(event, x, y, flags, param): 26 | global global_image 27 | global global_bboxes 28 | global global_leftButtonDownPoint 29 | 30 | # draw all previous bounding boxes, and the most recent box in a different color 31 | imgCopy = global_image.copy() 32 | drawRectangles(imgCopy, global_bboxes) 33 | if len(global_bboxes)>0: 34 | drawRectangles(imgCopy, [global_bboxes[-1]], color = (255, 0, 0)) 35 | 36 | # handle mouse events 37 | if event == cv2.EVENT_LBUTTONDOWN: 38 | global_leftButtonDownPoint = (x, y) 39 | 40 | elif event == cv2.EVENT_LBUTTONUP: 41 | pt1 = global_leftButtonDownPoint 42 | pt2 = (x, y) 43 | minPt = (min(pt1[0], pt2[0]), min(pt1[1], pt2[1])) 44 | maxPt = (max(pt1[0], pt2[0]), max(pt1[1], pt2[1])) 45 | imgWidth, imgHeight = imArrayWidthHeight(global_image) 46 | minPt = ptClip(minPt, imgWidth, imgHeight) 47 | maxPt = ptClip(maxPt, imgWidth, imgHeight) 48 | global_bboxes.append(minPt + maxPt) 49 | 50 | elif flags == cv2.EVENT_FLAG_LBUTTON: #if left mouse button is held down 51 | cv2.rectangle(imgCopy, global_leftButtonDownPoint, (x, y), (255, 255, 0), 1) 52 | 53 | else: 54 | drawCrossbar(imgCopy, (x, y)) 55 | cv2.imshow("AnnotationWindow", imgCopy) 56 | 57 | def scaleCropBboxes(rectsIn, scaleFactor, imgWidth, imgHeight): 58 | if len(rectsIn) <= 0: 59 | return rectsIn 60 | else: 61 | rects = [ [int(round(rect[i]/scaleFactor)) for i in range(4)] 62 | for rect in rectsIn] 63 | rects = [Bbox(*rect).crop(imgWidth, imgHeight).rect() for rect in rects] 64 | for rect in rects: 65 | assert (Bbox(*rect).isValid()) 66 | return rects 67 | 68 | 69 | #################################### 70 | # Main 71 | #################################### 72 | imgFilenames = [f for f in os.listdir(imgDir) if f.lower().endswith(".jpg")] 73 | 74 | # loop over each image and get annotation 75 | for imgFilenameIndex,imgFilename in enumerate(imgFilenames): 76 | print (imgFilenameIndex, imgFilename) 77 | imgPath = os.path.join(imgDir, imgFilename) 78 | bBoxPath = imgPath[:-4] + ".bboxes.tsv" 79 | 80 | # skip image if ground truth already exists 81 | if os.path.exists(bBoxPath): 82 | print ("Skipping image {0} since ground truth already exists".format(imgFilename)) 83 | continue 84 | else: 85 | print ("Processing image {0} of {1}: {2}".format(imgFilenameIndex, len(imgFilenames), imgPath)) 86 | 87 | # prepare image window and callback 88 | global_bboxes = [] 89 | global_image, scaleFactor = imresizeMaxDim(imread(imgPath), drawingImgSize) 90 | cv2.namedWindow("AnnotationWindow") 91 | cv2.setMouseCallback("AnnotationWindow", event_cv2_drawRectangles) 92 | cv2.imshow("AnnotationWindow", global_image) 93 | 94 | # process user input 95 | while True: 96 | key = chr(cv2.waitKey()) 97 | 98 | # undo/remove last rectangle 99 | if key == "u": 100 | if len(global_bboxes) >= 1: 101 | global_bboxes = global_bboxes[:-1] 102 | imgCopy = global_image.copy() 103 | drawRectangles(imgCopy, global_bboxes) 104 | cv2.imshow("AnnotationWindow", imgCopy) 105 | 106 | # skip image 107 | elif key == "s": 108 | if os.path.exists(bBoxPath): 109 | print ("Skipping image hence deleting existing bbox file: " + bBoxPath) 110 | os.remove(bBoxPath) 111 | break 112 | 113 | # next image 114 | elif key == "n": 115 | bboxes = scaleCropBboxes(global_bboxes, scaleFactor, imWidth(imgPath), imHeight(imgPath)) 116 | writeTable(bBoxPath, bboxes) 117 | break 118 | 119 | # quit 120 | elif key == "q": 121 | sys.exit() 122 | 123 | cv2.destroyAllWindows() 124 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/annotations/C2_AssignLabelsToBboxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from __future__ import print_function 8 | try: 9 | # for Python2 10 | from Tkinter import * 11 | except ImportError: 12 | # for Python3 13 | from tkinter import * 14 | from PIL import ImageTk 15 | from cntk_helpers import * 16 | 17 | 18 | #################################### 19 | # Parameters 20 | #################################### 21 | imgDir = "C:/Users/chazhang/Desktop/newImgs/" 22 | classes = ("avocado", "orange", "butter", "champagne", "cheese", "eggBox", "gerkin", "joghurt", "ketchup", 23 | "orangeJuice", "onion", "pepper", "sausage", "tomato", "water", "apple", "milk", 24 | "tabasco", "soySauce", "mustard", "beer") 25 | 26 | #no need to change these 27 | drawingImgSize = 1000 28 | boxWidth = 10 29 | boxHeight = 2 30 | 31 | 32 | #################################### 33 | # Main 34 | #################################### 35 | # define callback function for tk button 36 | def buttonPressedCallback(s): 37 | global global_lastButtonPressed 38 | global_lastButtonPressed = s 39 | 40 | # create UI 41 | objectNames = np.sort(classes).tolist() 42 | objectNames += ["UNDECIDED", "EXCLUDE"] 43 | tk = Tk() 44 | w = Canvas(tk, width=len(objectNames) * boxWidth, height=len(objectNames) * boxHeight, bd = boxWidth, bg = 'white') 45 | w.grid(row = len(objectNames), column = 0, columnspan = 2) 46 | for objectIndex,objectName in enumerate(objectNames): 47 | b = Button(width=boxWidth, height=boxHeight, text=objectName, command=lambda s = objectName: buttonPressedCallback(s)) 48 | b.grid(row = objectIndex, column = 0) 49 | 50 | # loop over all images 51 | imgFilenames = getFilesInDirectory(imgDir, ".jpg") 52 | for imgIndex, imgFilename in enumerate(imgFilenames): 53 | print (imgIndex, imgFilename) 54 | labelsPath = os.path.join(imgDir, imgFilename[:-4] + ".bboxes.labels.tsv") 55 | if os.path.exists(labelsPath): 56 | print ("Skipping image {:3} ({}) since annotation file already exists: {}".format(imgIndex, imgFilename, labelsPath)) 57 | continue 58 | 59 | # load image and ground truth rectangles 60 | img = imread(os.path.join(imgDir,imgFilename)) 61 | rectsPath = os.path.join(imgDir, imgFilename[:-4] + ".bboxes.tsv") 62 | rects = [ToIntegers(rect) for rect in readTable(rectsPath)] 63 | 64 | # annotate each rectangle in turn 65 | labels = [] 66 | for rectIndex,rect in enumerate(rects): 67 | imgCopy = img.copy() 68 | drawRectangles(imgCopy, [rect], thickness = 15) 69 | 70 | # draw image in tk window 71 | imgTk, _ = imresizeMaxDim(imgCopy, drawingImgSize, boUpscale = True) 72 | imgTk = ImageTk.PhotoImage(imconvertCv2Pil(imgTk)) 73 | label = Label(tk, image=imgTk) 74 | label.grid(row=0, column=1, rowspan=drawingImgSize) 75 | tk.update_idletasks() 76 | tk.update() 77 | 78 | # busy-wait until button pressed 79 | global_lastButtonPressed = None 80 | while not global_lastButtonPressed: 81 | tk.update_idletasks() 82 | tk.update() 83 | 84 | # store result 85 | print ("Button pressed = ", global_lastButtonPressed) 86 | labels.append(global_lastButtonPressed) 87 | 88 | writeFile(labelsPath, labels) 89 | tk.destroy() 90 | print ("DONE.") -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/annotations/LabelMeConverter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import xml.etree.ElementTree as ET 3 | import csv 4 | 5 | filepath = "C:/Your/Folder/Labelme/Files/" # set path of Labelme XML Files here include slash at end of path 6 | 7 | for filename in os.listdir(filepath): 8 | try: 9 | file = filepath + filename 10 | 11 | tree = ET.parse(file) 12 | root = tree.getroot() 13 | 14 | outputpath = filepath + "Parsed/" 15 | 16 | if not os.path.exists(outputpath): 17 | os.makedirs(outputpath) 18 | 19 | imagename = os.path.splitext(filename)[0] 20 | 21 | ## create output files 22 | outputFile_label = outputpath + imagename + ".bboxes.labels.tsv" 23 | outputFile_ROI = outputpath + imagename + ".bboxes.tsv" 24 | 25 | labelFile = open(outputFile_label, 'w') 26 | ROIFile = open(outputFile_ROI, 'w') 27 | 28 | # loop through to get objects 29 | for child in root: 30 | if str(child.tag) == 'object': 31 | 32 | label = "" 33 | xlist = [] 34 | ylist = [] 35 | 36 | # loop through to get name and BBox values from object 37 | for child in child: 38 | if str(child.tag) == 'name': 39 | label = child.text 40 | if str(child.tag) == 'polygon' or str(child.tag) == 'segm': 41 | for child in child: 42 | if str(child.tag) == 'box' or str(child.tag) == 'pt': 43 | for child in child: 44 | if str(child.tag) == 'xmin' or str(child.tag) == 'xmax' or str(child.tag) == 'x': 45 | xlist.append(int(child.text)) 46 | if str(child.tag) == 'ymin' or str(child.tag) == 'ymax' or str(child.tag) == 'y': 47 | ylist.append(int(child.text)) 48 | 49 | xmin = min(xlist) 50 | xmax = max(xlist) 51 | 52 | ymin = min(ylist) 53 | ymax = max(ylist) 54 | 55 | # output object roi based on cntk format of xmin ymin xmax ymax 56 | obj_ROI = str(xmin) + "\t" + str(ymin) + "\t" +str(xmax) + "\t" + str(ymax) 57 | 58 | labelFile.write(label + '\n') 59 | ROIFile.write(obj_ROI + '\n') 60 | 61 | labelFile.close() 62 | ROIFile.close() 63 | 64 | except Exception: 65 | pass 66 | 67 | print("Done") 68 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/annotations/annotations_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | import os 9 | 10 | def _getFilesInDirectory(directory, postfix = ""): 11 | fileNames = [s for s in os.listdir(directory) if not os.path.isdir(os.path.join(directory, s))] 12 | if not postfix or postfix == "": 13 | return fileNames 14 | else: 15 | return [s for s in fileNames if s.lower().endswith(postfix)] 16 | 17 | def _get_image_paths(img_dir, training_set): 18 | if training_set: 19 | subDirs = ['positive', 'negative'] 20 | else: 21 | subDirs = ['testImages'] 22 | 23 | image_paths = [] 24 | for subdir in subDirs: 25 | sub_dir_path = os.path.join(img_dir, subdir) 26 | imgFilenames = _getFilesInDirectory(sub_dir_path, ".jpg") 27 | for img in imgFilenames: 28 | image_paths.append("{}/{}".format(subdir, img)) 29 | 30 | return image_paths 31 | 32 | def _removeLineEndCharacters(line): 33 | if line.endswith(b'\r\n'): 34 | return line[:-2] 35 | elif line.endswith(b'\n'): 36 | return line[:-1] 37 | else: 38 | return line 39 | 40 | def _load_annotation(imgPath, class_dict): 41 | bboxesPaths = imgPath[:-4] + ".bboxes.tsv" 42 | labelsPaths = imgPath[:-4] + ".bboxes.labels.tsv" 43 | # if no ground truth annotations are available, return None 44 | if not os.path.exists(bboxesPaths) or not os.path.exists(labelsPaths): 45 | return None 46 | bboxes = np.loadtxt(bboxesPaths, np.float32) 47 | 48 | # in case there's only one annotation and numpy read the array as single array, 49 | # we need to make sure the input is treated as a multi dimensional array instead of a list/ 1D array 50 | if len(bboxes.shape) == 1: 51 | bboxes = np.array([bboxes]) 52 | 53 | with open(labelsPaths, 'rb') as f: 54 | lines = f.readlines() 55 | labels = [_removeLineEndCharacters(s) for s in lines] 56 | 57 | label_idxs = np.asarray([class_dict[l.decode('utf-8')] for l in labels]) 58 | label_idxs.shape = label_idxs.shape + (1,) 59 | annotations = np.hstack((bboxes, label_idxs)) 60 | 61 | return annotations 62 | 63 | def create_map_files(data_folder, class_dict, training_set): 64 | # get relative paths for map files 65 | img_file_paths = _get_image_paths(data_folder, training_set) 66 | 67 | out_map_file_path = os.path.join(data_folder, "{}_img_file.txt".format("train" if training_set else "test")) 68 | roi_file_path = os.path.join(data_folder, "{}_roi_file.txt".format("train" if training_set else "test")) 69 | 70 | counter = 0 71 | with open(out_map_file_path, 'w') as img_file: 72 | with open(roi_file_path, 'w') as roi_file: 73 | for img_path in img_file_paths: 74 | abs_img_path = os.path.join(data_folder, img_path) 75 | gt_annotations = _load_annotation(abs_img_path, class_dict) 76 | if gt_annotations is None: 77 | continue 78 | 79 | img_line = "{}\t{}\t0\n".format(counter, img_path) 80 | img_file.write(img_line) 81 | 82 | roi_line = "{} |roiAndLabel".format(counter) 83 | for val in gt_annotations.flatten(): 84 | roi_line += " {}".format(val) 85 | 86 | roi_file.write(roi_line + "\n") 87 | counter += 1 88 | if counter % 500 == 0: 89 | print("Processed {} images".format(counter)) 90 | 91 | def create_class_dict(data_folder): 92 | # get relative paths for map files 93 | img_file_paths = _get_image_paths(data_folder, True) 94 | train_classes = ["__background__"] 95 | 96 | for img_path in img_file_paths: 97 | abs_img_path = os.path.join(data_folder, img_path) 98 | labelsPaths = abs_img_path[:-4] + ".bboxes.labels.tsv" 99 | if not os.path.exists(labelsPaths): 100 | continue 101 | with open(labelsPaths, 'rb') as f: 102 | lines = f.readlines() 103 | labels = [_removeLineEndCharacters(s).decode('utf-8') for s in lines] 104 | 105 | for label in labels: 106 | if not label in train_classes: 107 | train_classes.append(label) 108 | 109 | class_dict = {k: v for v, k in enumerate(train_classes)} 110 | class_list = [None]*len(class_dict) 111 | for k in class_dict: 112 | class_list[class_dict[k]]=k 113 | class_map_file_path = os.path.join(data_folder, "class_map.txt") 114 | with open(class_map_file_path, 'w') as class_map_file: 115 | for i in range(len(class_list)): 116 | class_map_file.write("{}\t{}\n".format(class_list[i], i)) 117 | 118 | return class_dict 119 | 120 | def parse_class_map_file(class_map_file): 121 | with open(class_map_file, "r") as f: 122 | lines = f.readlines() 123 | class_list = [None]*len(lines) 124 | for line in lines: 125 | tab_pos = line.find('\t') 126 | class_name = line[:tab_pos] 127 | class_id = int(line[tab_pos+1:-1]) 128 | class_list[class_id] = class_name 129 | 130 | return class_list 131 | 132 | if __name__ == '__main__': 133 | abs_path = os.path.dirname(os.path.abspath(__file__)) 134 | data_set_path = os.path.join(abs_path, "../../../DataSets/CM_kittiwake") 135 | 136 | class_dict = create_class_dict(data_set_path) 137 | create_map_files(data_set_path, class_dict, training_set=True) 138 | create_map_files(data_set_path, class_dict, training_set=False) 139 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/caffe_layers/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/caffe_layers/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | #import caffe 9 | import numpy as np 10 | import yaml 11 | from utils.caffe_layers.default_config import cfg 12 | from utils.rpn.generate_anchors import generate_anchors 13 | from utils.caffe_layers.bbox_transform import bbox_transform_inv, clip_boxes 14 | from utils.nms_wrapper import nms 15 | 16 | DEBUG = False 17 | 18 | class ProposalLayer: #(caffe.Layer): 19 | """ 20 | Outputs object detection proposals by applying estimated bounding-box 21 | transformations to a set of regular boxes (called "anchors"). 22 | """ 23 | 24 | def set_param_str(self, param_str): 25 | self.param_str_ = param_str 26 | 27 | def setup(self, bottom, top): 28 | # parse the layer parameter string, which must be valid YAML 29 | layer_params = yaml.load(self.param_str_) 30 | 31 | self._feat_stride = layer_params['feat_stride'] 32 | anchor_scales = layer_params.get('scales', (8, 16, 32)) 33 | self._anchors = generate_anchors(scales=np.array(anchor_scales)) 34 | self._num_anchors = self._anchors.shape[0] 35 | self.phase = "TEST" 36 | 37 | #if DEBUG: 38 | #print 'feat_stride: {}'.format(self._feat_stride) 39 | #print 'anchors:' 40 | #print self._anchors 41 | 42 | # rois blob: holds R regions of interest, each is a 5-tuple 43 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 44 | # rectangle (x1, y1, x2, y2) 45 | #top[0].reshape(1, 5) 46 | 47 | # scores blob: holds scores for R regions of interest 48 | #if len(top) > 1: 49 | # top[1].reshape(1, 1, 1, 1) 50 | 51 | def forward(self, bottom, top): 52 | # Algorithm: 53 | # 54 | # for each (H, W) location i 55 | # generate A anchor boxes centered on cell i 56 | # apply predicted bbox deltas at cell i to each of the A anchors 57 | # clip predicted boxes to image 58 | # remove predicted boxes with either height or width < threshold 59 | # sort all (proposal, score) pairs by score from highest to lowest 60 | # take top pre_nms_topN proposals before NMS 61 | # apply NMS with threshold 0.7 to remaining proposals 62 | # take after_nms_topN proposals after NMS 63 | # return the top proposals (-> RoIs top, scores top) 64 | 65 | assert bottom[0].shape[0] == 1, \ 66 | 'Only single item batches are supported' 67 | 68 | cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' 69 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 70 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 71 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 72 | min_size = cfg[cfg_key].RPN_MIN_SIZE 73 | 74 | # the first set of _num_anchors channels are bg probs 75 | # the second set are the fg probs, which we want 76 | scores = bottom[0][:, self._num_anchors:, :, :] 77 | bbox_deltas = bottom[1] 78 | im_info = bottom[2][0, :] 79 | 80 | #if DEBUG: 81 | # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) 82 | # print 'scale: {}'.format(im_info[2]) 83 | 84 | # 1. Generate proposals from bbox deltas and shifted anchors 85 | height, width = scores.shape[-2:] 86 | 87 | #if DEBUG: 88 | # print 'score map size: {}'.format(scores.shape) 89 | 90 | # Enumerate all shifts 91 | shift_x = np.arange(0, width) * self._feat_stride 92 | shift_y = np.arange(0, height) * self._feat_stride 93 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 94 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 95 | shift_x.ravel(), shift_y.ravel())).transpose() 96 | 97 | # Enumerate all shifted anchors: 98 | # 99 | # add A anchors (1, A, 4) to 100 | # cell K shifts (K, 1, 4) to get 101 | # shift anchors (K, A, 4) 102 | # reshape to (K*A, 4) shifted anchors 103 | A = self._num_anchors 104 | K = shifts.shape[0] 105 | anchors = self._anchors.reshape((1, A, 4)) + \ 106 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 107 | anchors = anchors.reshape((K * A, 4)) 108 | 109 | # Transpose and reshape predicted bbox transformations to get them 110 | # into the same order as the anchors: 111 | # 112 | # bbox deltas will be (1, 4 * A, H, W) format 113 | # transpose to (1, H, W, 4 * A) 114 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 115 | # in slowest to fastest order 116 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 117 | 118 | # Same story for the scores: 119 | # 120 | # scores are (1, A, H, W) format 121 | # transpose to (1, H, W, A) 122 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 123 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 124 | 125 | # Convert anchors into proposals via bbox transformations 126 | proposals = bbox_transform_inv(anchors, bbox_deltas) 127 | 128 | # 2. clip predicted boxes to image 129 | proposals = clip_boxes(proposals, im_info[:2]) 130 | 131 | # 3. remove predicted boxes with either height or width < threshold 132 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 133 | keep = _filter_boxes(proposals, min_size * im_info[2]) 134 | proposals = proposals[keep, :] 135 | scores = scores[keep] 136 | 137 | # 4. sort all (proposal, score) pairs by score from highest to lowest 138 | # 5. take top pre_nms_topN (e.g. 6000) 139 | order = scores.ravel().argsort(kind='mergesort')[::-1] 140 | if pre_nms_topN > 0: 141 | order = order[:pre_nms_topN] 142 | proposals = proposals[order, :] 143 | scores = scores[order] 144 | 145 | # 6. apply nms (e.g. threshold = 0.7) 146 | # 7. take after_nms_topN (e.g. 300) 147 | # 8. return the top proposals (-> RoIs top) 148 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 149 | if post_nms_topN > 0: 150 | keep = keep[:post_nms_topN] 151 | proposals = proposals[keep, :] 152 | scores = scores[keep] 153 | 154 | # Output rois blob 155 | # Our RPN implementation only supports a single input image, so all 156 | # batch inds are 0 157 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 158 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 159 | 160 | return blob 161 | #top[0].reshape(*(blob.shape)) 162 | #top[0].data[...] = blob 163 | 164 | # [Optional] output scores blob 165 | #if len(top) > 1: 166 | # top[1].reshape(*(scores.shape)) 167 | # top[1].data[...] = scores 168 | 169 | def backward(self, top, propagate_down, bottom): 170 | """This layer does not propagate gradients.""" 171 | pass 172 | 173 | def reshape(self, bottom, top): 174 | """Reshaping happens during the call to forward.""" 175 | pass 176 | 177 | def _filter_boxes(boxes, min_size): 178 | """Remove all boxes with any side smaller than min_size.""" 179 | ws = boxes[:, 2] - boxes[:, 0] + 1 180 | hs = boxes[:, 3] - boxes[:, 1] + 1 181 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 182 | return keep 183 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/config_helpers.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict 2 | 3 | def merge_configs(config_list): 4 | if config_list == None or len(config_list) == 0: 5 | return None 6 | 7 | base_config = config_list[0] 8 | if type(base_config) is dict: 9 | base_config = EasyDict(base_config) 10 | 11 | if type(base_config) is not EasyDict: 12 | print("The argument given to 'merge_configs' have to be of type dict or EasyDict.") 13 | return None 14 | 15 | for i in range(len(config_list) - 1): 16 | config_to_merge = config_list[i+1] 17 | if type(config_to_merge) is dict: 18 | config_to_merge = EasyDict(config_to_merge) 19 | _merge_add_a_into_b(config_to_merge, base_config) 20 | return base_config 21 | 22 | 23 | def _merge_add_a_into_b(a, b): 24 | """ 25 | Merge config dictionary a into config dictionary b, 26 | clobbering the options in b whenever they are also specified in a. 27 | New options that are only in a will be added to b. 28 | """ 29 | if type(a) is not EasyDict: 30 | return 31 | 32 | for k, v in a.items(): 33 | # if the key from a is new to b simply add it 34 | if not k in b: 35 | b[k] = v 36 | continue 37 | 38 | # the types must match 39 | old_type = type(b[k]) 40 | if old_type is not type(v): 41 | if isinstance(b[k], np.ndarray): 42 | v = np.array(v, dtype=b[k].dtype) 43 | else: 44 | raise ValueError(('Type mismatch ({} vs. {}) for config key: {}').format(type(b[k]), type(v), k)) 45 | 46 | # recursively merge dicts 47 | if type(v) is EasyDict: 48 | try: 49 | _merge_add_a_into_b(a[k], b[k]) 50 | except: 51 | print('Error under config key: {}'.format(k)) 52 | raise 53 | else: 54 | b[k] = v 55 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/configs/AlexNet_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | # `pip install easydict` if you don't have it 8 | from easydict import EasyDict as edict 9 | 10 | __C = edict() 11 | __C.MODEL = edict() 12 | cfg = __C 13 | 14 | # model config 15 | __C.MODEL.BASE_MODEL = "AlexNet" 16 | __C.MODEL.BASE_MODEL_FILE = "AlexNet_ImageNet_Caffe.model" 17 | __C.MODEL.IMG_PAD_COLOR = [114, 114, 114] 18 | __C.MODEL.FEATURE_NODE_NAME = "data" 19 | __C.MODEL.LAST_CONV_NODE_NAME = "relu5" 20 | __C.MODEL.START_TRAIN_CONV_NODE_NAME = __C.MODEL.FEATURE_NODE_NAME 21 | __C.MODEL.POOL_NODE_NAME = "pool5" 22 | __C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7" 23 | __C.MODEL.FEATURE_STRIDE = 16 24 | __C.MODEL.RPN_NUM_CHANNELS = 256 25 | __C.MODEL.ROI_DIM = 6 26 | __C.MODEL.E2E_LR_FACTOR = 1.0 27 | __C.MODEL.RPN_LR_FACTOR = 1.0 28 | __C.MODEL.FRCN_LR_FACTOR = 1.0 29 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/configs/CM_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | # `pip install easydict` if you don't have it 8 | from easydict import EasyDict as edict 9 | import os 10 | 11 | __C = edict() 12 | __C.DATA = edict() 13 | __C.CNTK = edict() 14 | cfg = __C 15 | 16 | # data set config 17 | __C.DATA.DATASET = "CNTK_train_eval"#"new_VOTT" #"CM_kittiwake" 18 | __C.DATA.MAP_FILE_PATH = os.path.join(os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "train_images") 19 | __C.DATA.CLASS_MAP_FILE = "class_map.txt" 20 | __C.DATA.TRAIN_MAP_FILE = "train_img_file.txt" 21 | __C.DATA.TRAIN_ROI_FILE = "train_roi_file.txt" 22 | __C.DATA.TEST_MAP_FILE = "test_img_file.txt" 23 | __C.DATA.TEST_ROI_FILE = "test_roi_file.txt" 24 | __C.DATA.NUM_TRAIN_IMAGES = 160 25 | __C.DATA.NUM_TEST_IMAGES = 53 26 | __C.DATA.PROPOSAL_LAYER_SCALES = [8, 16, 32] 27 | 28 | # overwriting proposal parameters for Fast R-CNN 29 | # minimum relative width/height of an ROI 30 | __C.roi_min_side_rel = 0.04 31 | # maximum relative width/height of an ROI 32 | __C.roi_max_side_rel = 0.4 33 | # minimum relative area of an ROI 34 | __C.roi_min_area_rel = 2 * __C.roi_min_side_rel * __C.roi_min_side_rel 35 | # maximum relative area of an ROI 36 | __C.roi_max_area_rel = 0.33 * __C.roi_max_side_rel * __C.roi_max_side_rel 37 | # maximum aspect ratio of an ROI vertically and horizontally 38 | __C.roi_max_aspect_ratio = 4.0 39 | 40 | # For this data set use the following lr factor for Fast R-CNN: 41 | # __C.CNTK.LR_FACTOR = 10.0 42 | 43 | __C.CNTK.MAKE_MODE = True 44 | __C.MODEL_PATH = "FasterRCNN/Output/faster_rcnn_eval_AlexNet_e2e.model" 45 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/configs/VGG16_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | # `pip install easydict` if you don't have it 8 | from easydict import EasyDict as edict 9 | 10 | __C = edict() 11 | __C.MODEL = edict() 12 | cfg = __C 13 | 14 | # model config 15 | __C.MODEL.BASE_MODEL = "VGG16" 16 | __C.MODEL.BASE_MODEL_FILE = "VGG16_ImageNet_Caffe.model" 17 | __C.MODEL.IMG_PAD_COLOR = [103, 116, 123] 18 | __C.MODEL.FEATURE_NODE_NAME = "data" 19 | __C.MODEL.LAST_CONV_NODE_NAME = "relu5_3" 20 | __C.MODEL.START_TRAIN_CONV_NODE_NAME = "pool2" # __C.MODEL.FEATURE_NODE_NAME 21 | __C.MODEL.POOL_NODE_NAME = "pool5" 22 | __C.MODEL.LAST_HIDDEN_NODE_NAME = "drop7" 23 | __C.MODEL.FEATURE_STRIDE = 16 24 | __C.MODEL.RPN_NUM_CHANNELS = 512 25 | __C.MODEL.ROI_DIM = 7 26 | __C.MODEL.E2E_LR_FACTOR = 1.0 27 | __C.MODEL.RPN_LR_FACTOR = 1.0 28 | __C.MODEL.FRCN_LR_FACTOR = 1.0 29 | 30 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-34m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-34m.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-35m.so.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-35m.so.bak -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-36m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cpu_nms.cpython-36m.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-34m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-34m.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-35m.so.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-35m.so.bak -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-36m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/cython_bbox.cpython-36m.so -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/cython_modules/gpu_nms.cp35-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/CNTK_faster-rcnn/Detection/utils/cython_modules/gpu_nms.cp35-win_amd64.pyd -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/map_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | from utils.nms_wrapper import apply_nms_to_test_set_results 9 | 10 | def evaluate_detections(all_boxes, all_gt_infos, classes, 11 | use_gpu_nms, device_id, 12 | apply_mms=True, nms_threshold=0.5, conf_threshold=0.0, 13 | use_07_metric=False): 14 | ''' 15 | Computes per-class average precision. 16 | 17 | Args: 18 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 19 | all_gt_infos: a dictionary that contains all ground truth annoations in the following form: 20 | {'class_A': [{'bbox': array([[ 376., 210., 456., 288., 10.]], dtype=float32), 'det': [False], 'difficult': [False]}, ... ]} 21 | 'class_B': [ ], } 22 | classes: a list of class name, e.g. ['__background__', 'avocado', 'orange', 'butter'] 23 | use_07_metric: whether to use VOC07's 11 point AP computation (default False) 24 | apply_mms: whether to apply non maximum suppression before computing average precision values 25 | nms_threshold: the threshold for discarding overlapping ROIs in nms 26 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 27 | 28 | Returns: 29 | aps - average precision value per class in a dictionary {classname: ap} 30 | ''' 31 | 32 | if apply_mms: 33 | print ("Number of rois before non-maximum suppression: %d" % sum([len(all_boxes[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))])) 34 | nms_dets,_ = apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold, use_gpu_nms, device_id) 35 | print ("Number of rois after non-maximum suppression: %d" % sum([len(nms_dets[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))])) 36 | else: 37 | print ("Skipping non-maximum suppression") 38 | nms_dets = all_boxes 39 | 40 | aps = {} 41 | for classIndex, className in enumerate(classes): 42 | if className != '__background__': 43 | rec, prec, ap = _evaluate_detections(classIndex, nms_dets, all_gt_infos[className], use_07_metric=use_07_metric) 44 | aps[className] = ap 45 | 46 | return aps 47 | 48 | def _evaluate_detections(classIndex, all_boxes, gtInfos, overlapThreshold=0.5, use_07_metric=False): 49 | ''' 50 | Top level function that does the PASCAL VOC evaluation. 51 | ''' 52 | 53 | # parse detections for this class 54 | # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 55 | num_images = len(all_boxes[0]) 56 | detBboxes = [] 57 | detImgIndices = [] 58 | detConfidences = [] 59 | for imgIndex in range(num_images): 60 | dets = all_boxes[classIndex][imgIndex] 61 | if dets != []: 62 | for k in range(dets.shape[0]): 63 | detImgIndices.append(imgIndex) 64 | detConfidences.append(dets[k, -1]) 65 | # the VOCdevkit expects 1-based indices 66 | detBboxes.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1]) 67 | detBboxes = np.array(detBboxes) 68 | detConfidences = np.array(detConfidences) 69 | 70 | # compute precision / recall / ap 71 | rec, prec, ap = _voc_computePrecisionRecallAp( 72 | class_recs=gtInfos, 73 | confidence=detConfidences, 74 | image_ids=detImgIndices, 75 | BB=detBboxes, 76 | ovthresh=overlapThreshold, 77 | use_07_metric=use_07_metric) 78 | return rec, prec, ap 79 | 80 | def computeAveragePrecision(recalls, precisions, use_07_metric=False): 81 | ''' 82 | Computes VOC AP given precision and recall. 83 | ''' 84 | if use_07_metric: 85 | # 11 point metric 86 | ap = 0. 87 | for t in np.arange(0., 1.1, 0.1): 88 | if np.sum(recalls >= t) == 0: 89 | p = 0 90 | else: 91 | p = np.max(precisions[recalls >= t]) 92 | ap = ap + p / 11. 93 | else: 94 | # correct AP calculation 95 | # first append sentinel values at the end 96 | mrecalls = np.concatenate(([0.], recalls, [1.])) 97 | mprecisions = np.concatenate(([0.], precisions, [0.])) 98 | 99 | # compute the precision envelope 100 | for i in range(mprecisions.size - 1, 0, -1): 101 | mprecisions[i - 1] = np.maximum(mprecisions[i - 1], mprecisions[i]) 102 | 103 | # to calculate area under PR curve, look for points 104 | # where X axis (recall) changes value 105 | i = np.where(mrecalls[1:] != mrecalls[:-1])[0] 106 | 107 | # and sum (\Delta recall) * prec 108 | ap = np.sum((mrecalls[i + 1] - mrecalls[i]) * mprecisions[i + 1]) 109 | return ap 110 | 111 | def _voc_computePrecisionRecallAp(class_recs, confidence, image_ids, BB, ovthresh=0.5, use_07_metric=False): 112 | ''' 113 | Computes precision, recall. and average precision 114 | ''' 115 | if len(BB) == 0: 116 | return 0.0, 0.0, 0.0 117 | 118 | # sort by confidence 119 | sorted_ind = np.argsort(-confidence) 120 | 121 | BB = BB[sorted_ind, :] 122 | image_ids = [image_ids[x] for x in sorted_ind] 123 | 124 | # go down dets and mark TPs and FPs 125 | nd = len(image_ids) 126 | tp = np.zeros(nd) 127 | fp = np.zeros(nd) 128 | for d in range(nd): 129 | R = class_recs[image_ids[d]] 130 | bb = BB[d, :].astype(float) 131 | ovmax = -np.inf 132 | BBGT = R['bbox'].astype(float) 133 | 134 | if BBGT.size > 0: 135 | # compute overlaps 136 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 137 | iymin = np.maximum(BBGT[:, 1], bb[1]) 138 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 139 | iymax = np.minimum(BBGT[:, 3], bb[3]) 140 | iw = np.maximum(ixmax - ixmin + 1., 0.) 141 | ih = np.maximum(iymax - iymin + 1., 0.) 142 | inters = iw * ih 143 | 144 | # union 145 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 146 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 147 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 148 | 149 | overlaps = inters / uni 150 | ovmax = np.max(overlaps) 151 | jmax = np.argmax(overlaps) 152 | 153 | if ovmax > ovthresh: 154 | if not R['difficult'][jmax]: 155 | if not R['det'][jmax]: 156 | tp[d] = 1. 157 | R['det'][jmax] = 1 158 | else: 159 | fp[d] = 1. 160 | else: 161 | fp[d] = 1. 162 | 163 | # compute precision recall 164 | npos = sum([len(cr['bbox']) for cr in class_recs]) 165 | fp = np.cumsum(fp) 166 | tp = np.cumsum(tp) 167 | rec = tp / float(npos) 168 | # avoid divide by zero in case the first detection matches a difficult ground truth 169 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 170 | ap = computeAveragePrecision(rec, prec, use_07_metric) 171 | return rec, prec, ap 172 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/misc/azure_utils.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isfile 2 | import glob 3 | import os 4 | from azure.storage.blob import BlockBlobService 5 | import datetime 6 | 7 | def load_file_from_blob(account, container, fileName, dest): 8 | print("Starting download of {}...".format(fileName)) 9 | if os.path.isfile(dest): 10 | print("File {} already exists, skipping download from Azure Blob.".format(dest)) 11 | return False 12 | 13 | blob_service = BlockBlobService(account_name=account) 14 | print("container {0}, fileName {1}, dest {2}".format(container, fileName, dest)) 15 | blob_service.get_blob_to_path(container, fileName, dest) 16 | return True 17 | 18 | def get_blob_service(): 19 | storage_account_name = os.environ['STORAGE_ACCOUNT_NAME'] 20 | storage_account_key = os.environ['STORAGE_ACCOUNT_KEY'] 21 | return BlockBlobService(account_name=storage_account_name, account_key=storage_account_key) 22 | 23 | def upload_checkpoint_files(dir_path): 24 | blob_service = get_blob_service() 25 | files = os.listdir(dir_path) 26 | for file in files: 27 | blob_service.create_blob_from_path('checkpoints', file, os.path.join(dir_path, file)) 28 | 29 | 30 | def upload_checkpoint_file(file_path, file_name, add_timestamp=True): 31 | blob_service = get_blob_service() 32 | if add_timestamp: 33 | splited = file_name.rsplit('.', 1) 34 | file_name = ('_' + datetime.datetime.now().isoformat() + '.').join(splited) 35 | blob_service.create_blob_from_path('checkpoints', file_name , file_path) 36 | print("Uploaded eval model at checkpoints/%s" % file_name) -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/misc/zip_helper.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | 3 | def unzip_file(filepath, destination): 4 | print("Unzipping {0} to {1}".format(filepath, destination)) 5 | with zipfile.ZipFile(filepath,"r") as zip_ref: 6 | zip_ref.extractall(destination) -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | from utils.cython_modules.cpu_nms import cpu_nms 9 | try: 10 | from utils.cython_modules.gpu_nms import gpu_nms 11 | gpu_nms_available = True 12 | except ImportError: 13 | gpu_nms_available = False 14 | 15 | def nms(dets, thresh, use_gpu_nms=True, device_id=0): 16 | ''' 17 | Dispatches the call to either CPU or GPU NMS implementations 18 | ''' 19 | if dets.shape[0] == 0: 20 | return [] 21 | if gpu_nms_available and use_gpu_nms: 22 | return gpu_nms(dets, thresh, device_id=device_id) 23 | else: 24 | return cpu_nms(dets, thresh) 25 | 26 | def apply_nms_to_single_image_results(coords, labels, scores, use_gpu_nms, device_id, nms_threshold=0.5, conf_threshold=0.0): 27 | ''' 28 | Applies nms to the results for a single image. 29 | 30 | Args: 31 | coords: (x_min, y_min, x_max, y_max) coordinates for n rois. shape = (n, 4) 32 | labels: the predicted label per roi. shape = (n, 1) 33 | scores: the predicted score per roi. shape = (n, 1) 34 | nms_threshold: the threshold for discarding overlapping ROIs in nms 35 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 36 | 37 | Returns: 38 | nmsKeepIndices - the indices of the ROIs to keep after nms 39 | ''' 40 | 41 | # generate input for nms 42 | allIndices = [] 43 | nmsRects = [[[]] for _ in range(max(labels) + 1)] 44 | coordsWithScores = np.hstack((coords, np.array([scores]).T)) 45 | for i in range(max(labels) + 1): 46 | indices = np.where(np.array(labels) == i)[0] 47 | nmsRects[i][0] = coordsWithScores[indices,:] 48 | allIndices.append(indices) 49 | 50 | # call nms 51 | _, nmsKeepIndicesList = apply_nms_to_test_set_results(nmsRects, nms_threshold, conf_threshold, use_gpu_nms, device_id) 52 | 53 | # map back to original roi indices 54 | nmsKeepIndices = [] 55 | for i in range(max(labels) + 1): 56 | for keepIndex in nmsKeepIndicesList[i][0]: 57 | nmsKeepIndices.append(allIndices[i][keepIndex]) # for keepIndex in nmsKeepIndicesList[i][0]] 58 | assert (len(nmsKeepIndices) == len(set(nmsKeepIndices))) # check if no roi indices was added >1 times 59 | return nmsKeepIndices 60 | 61 | def apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold, use_gpu_nms, device_id): 62 | ''' 63 | Applies nms to the results of multiple images. 64 | 65 | Args: 66 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 67 | nms_threshold: the threshold for discarding overlapping ROIs in nms 68 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 69 | 70 | Returns: 71 | nms_boxes - the reduced set of rois after nms 72 | nmsKeepIndices - the indices of the ROIs to keep after nms 73 | ''' 74 | 75 | num_classes = len(all_boxes) 76 | num_images = len(all_boxes[0]) 77 | nms_boxes = [[[] for _ in range(num_images)] 78 | for _ in range(num_classes)] 79 | nms_keepIndices = [[[] for _ in range(num_images)] 80 | for _ in range(num_classes)] 81 | for cls_ind in range(num_classes): 82 | for im_ind in range(num_images): 83 | dets = all_boxes[cls_ind][im_ind] 84 | if len(dets) == 0: 85 | continue 86 | if len(dets) == 1: 87 | keep = [0] 88 | else: 89 | keep = nms(dets.astype(np.float32), nms_threshold, use_gpu_nms, device_id) 90 | 91 | # also filter out low confidences 92 | if conf_threshold > 0: 93 | keep_conf_idx = np.where(dets[:, -1] > conf_threshold) 94 | keep = list(set(keep_conf_idx[0]).intersection(keep)) 95 | 96 | if len(keep) == 0: 97 | continue 98 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 99 | nms_keepIndices[cls_ind][im_ind] = keep 100 | return nms_boxes, nms_keepIndices 101 | 102 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/od_mb_source.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from cntk.io import UserMinibatchSource, StreamInformation, MinibatchData 8 | from cntk.core import Value 9 | from utils.od_reader import ObjectDetectionReader 10 | import numpy as np 11 | 12 | class ObjectDetectionMinibatchSource(UserMinibatchSource): 13 | def __init__(self, img_map_file, roi_map_file, num_classes, 14 | max_annotations_per_image, pad_width, pad_height, pad_value, 15 | randomize, use_flipping, proposal_provider, proposal_iou_threshold=0.5, 16 | provide_targets=False, normalize_means=None, normalize_stds=None, max_images=None): 17 | 18 | self.image_si = StreamInformation("image", 0, 'dense', np.float32, (3, pad_height, pad_width,)) 19 | self.roi_si = StreamInformation("annotation", 1, 'dense', np.float32, (max_annotations_per_image, 5,)) 20 | self.dims_si = StreamInformation("dims", 1, 'dense', np.float32, (4,)) 21 | 22 | if proposal_provider is not None: 23 | num_proposals = proposal_provider.num_proposals() 24 | self.proposals_si = StreamInformation("proposals", 1, 'dense', np.float32, (num_proposals, 4)) 25 | self.label_targets_si = StreamInformation("label_targets", 1, 'dense', np.float32, (num_proposals, num_classes)) 26 | self.bbox_targets_si = StreamInformation("bbox_targets", 1, 'dense', np.float32, (num_proposals, num_classes*4)) 27 | self.bbiw_si = StreamInformation("bbiw", 1, 'dense', np.float32, (num_proposals, num_classes*4)) 28 | else: 29 | self.proposals_si = None 30 | 31 | self.od_reader = ObjectDetectionReader(img_map_file, roi_map_file, num_classes, 32 | max_annotations_per_image, pad_width, pad_height, pad_value, 33 | randomize, use_flipping, proposal_provider, proposal_iou_threshold, 34 | provide_targets, normalize_means, normalize_stds, max_images) 35 | 36 | super(ObjectDetectionMinibatchSource, self).__init__() 37 | 38 | def stream_infos(self): 39 | if self.proposals_si is None: 40 | return [self.image_si, self.roi_si, self.dims_si] 41 | else: 42 | return [self.image_si, self.roi_si, self.dims_si, self.proposals_si, self.label_targets_si, self.bbox_targets_si, self.bbiw_si] 43 | 44 | def image_si(self): 45 | return self.image_si 46 | 47 | def roi_si(self): 48 | return self.roi_si 49 | 50 | def dims_si(self): 51 | return self.dims_si 52 | 53 | def proposals_si(self): 54 | return self.proposals_si 55 | 56 | def label_targets_si(self): 57 | return self.label_targets_si 58 | 59 | def bbox_targets_si(self): 60 | return self.bbox_targets_si 61 | 62 | def bbiw_si(self): 63 | return self.bbiw_si 64 | 65 | def next_minibatch(self, num_samples, number_of_workers=1, worker_rank=1, device=None, input_map=None): 66 | if num_samples > 1: 67 | print("Only single item mini batches are supported currently by od_mb_source.py") 68 | exit(1) 69 | 70 | img_data, roi_data, img_dims, proposals, label_targets, bbox_targets, bbox_inside_weights = self.od_reader.get_next_input() 71 | sweep_end = self.od_reader.sweep_end() 72 | 73 | if input_map is None: 74 | result = { 75 | self.image_si: MinibatchData(Value(batch=img_data), 1, 1, sweep_end), 76 | self.roi_si: MinibatchData(Value(batch=roi_data), 1, 1, sweep_end), 77 | self.dims_si: MinibatchData(Value(batch=np.asarray(img_dims, dtype=np.float32)), 1, 1, sweep_end), 78 | self.proposals_si: MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, sweep_end), 79 | self.label_targets_si: MinibatchData(Value(batch=np.asarray(label_targets, dtype=np.float32)), 1, 1, sweep_end), 80 | self.bbox_targets_si: MinibatchData(Value(batch=np.asarray(bbox_targets, dtype=np.float32)), 1, 1, sweep_end), 81 | self.bbiw_si: MinibatchData(Value(batch=np.asarray(bbox_inside_weights, dtype=np.float32)), 1, 1, sweep_end), 82 | } 83 | else: 84 | result = { 85 | input_map[self.image_si]: MinibatchData(Value(batch=np.asarray(img_data, dtype=np.float32)), 1, 1, sweep_end) 86 | } 87 | if self.roi_si in input_map: 88 | result[input_map[self.roi_si]] = MinibatchData(Value(batch=np.asarray(roi_data, dtype=np.float32)), 1, 1, sweep_end) 89 | if self.dims_si in input_map: 90 | result[input_map[self.dims_si]] = MinibatchData(Value(batch=np.asarray(img_dims, dtype=np.float32)), 1, 1, sweep_end) 91 | if self.proposals_si in input_map: 92 | result[input_map[self.proposals_si]] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, sweep_end) 93 | if self.label_targets_si in input_map: 94 | result[input_map[self.label_targets_si]] = MinibatchData(Value(batch=np.asarray(label_targets, dtype=np.float32)), 1, 1, sweep_end) 95 | if self.bbox_targets_si in input_map: 96 | result[input_map[self.bbox_targets_si]] = MinibatchData(Value(batch=np.asarray(bbox_targets, dtype=np.float32)), 1, 1, sweep_end) 97 | if self.bbiw_si in input_map: 98 | result[input_map[self.bbiw_si]] = MinibatchData(Value(batch=np.asarray(bbox_inside_weights, dtype=np.float32)), 1, 1, sweep_end) 99 | 100 | return result 101 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/od_utils.py: -------------------------------------------------------------------------------- 1 | import easydict 2 | from utils.nms_wrapper import apply_nms_to_single_image_results 3 | 4 | def train_object_detector(cfg): 5 | """ 6 | Trains an object detector as specified in the configuration 7 | :param cfg: the configuration 8 | :return: the eval model of the trained detector 9 | """ 10 | 11 | detector_name = _get_detector_name(cfg) 12 | eval_model = None 13 | print("training {}".format(detector_name)) 14 | if detector_name == 'FastRCNN': 15 | from FastRCNN.FastRCNN_train import prepare, train_fast_rcnn 16 | prepare(cfg, use_arg_parser=False) 17 | eval_model = train_fast_rcnn(cfg) 18 | elif detector_name == 'FasterRCNN': 19 | from FasterRCNN.FasterRCNN_train import prepare, train_faster_rcnn 20 | prepare(cfg, use_arg_parser=False) 21 | eval_model = train_faster_rcnn(cfg) 22 | else: 23 | print('Unknown detector: {}'.format(detector_name)) 24 | 25 | return eval_model 26 | 27 | def evaluate_test_set(model, cfg): 28 | """ 29 | Evaluates the given model on the test set as specified in the configuration 30 | :param model: the model 31 | :param cfg: the configuration 32 | :return: AP (average precision) per class 33 | """ 34 | 35 | detector_name = _get_detector_name(cfg) 36 | aps = None 37 | print("evaluating {}".format(detector_name)) 38 | if detector_name == 'FastRCNN': 39 | from FastRCNN.FastRCNN_eval import compute_test_set_aps 40 | aps = compute_test_set_aps(model, cfg) 41 | elif detector_name == 'FasterRCNN': 42 | from FasterRCNN.FasterRCNN_eval import compute_test_set_aps 43 | aps = compute_test_set_aps(model, cfg) 44 | else: 45 | print('Unknown detector: {}'.format(detector_name)) 46 | 47 | return aps 48 | 49 | def evaluate_single_image(model, img_path, cfg): 50 | """ 51 | Computes detection results for the given model on the provided image 52 | :param model: the model 53 | :param img_path: the path to the image 54 | :param cfg: the configuration 55 | :return: 56 | regressed_rois - the predicted bounding boxes 57 | cls_probs - class probabilities per bounding box 58 | """ 59 | 60 | detector_name = _get_detector_name(cfg) 61 | regressed_rois = None 62 | cls_probs = None 63 | print("detecting objects in image {}".format(img_path)) 64 | if detector_name == 'FastRCNN': 65 | from FastRCNN.FastRCNN_eval import FastRCNN_Evaluator 66 | evaluator = FastRCNN_Evaluator(model, cfg) 67 | regressed_rois, cls_probs = evaluator.process_image(img_path) 68 | elif detector_name == 'FasterRCNN': 69 | from FasterRCNN.FasterRCNN_eval import FasterRCNN_Evaluator 70 | evaluator = FasterRCNN_Evaluator(model, cfg) 71 | regressed_rois, cls_probs = evaluator.process_image(img_path) 72 | else: 73 | print('Unknown detector: {}'.format(detector_name)) 74 | 75 | return regressed_rois, cls_probs 76 | 77 | def filter_results(regressed_rois, cls_probs, cfg): 78 | """ 79 | Filters the provided results by performing NMS (non maximum suppression) 80 | :param regressed_rois: the predicted bounding boxes 81 | :param cls_probs: class probabilities per bounding box 82 | :param cfg: the configuration 83 | :return: 84 | bboxes - the filtered list of bounding boxes 85 | labels - the single class label per bounding box 86 | scores - the probability for the assigned class label per bounding box 87 | """ 88 | 89 | labels = cls_probs.argmax(axis=1) 90 | scores = cls_probs.max(axis=1) 91 | nmsKeepIndices = apply_nms_to_single_image_results( 92 | regressed_rois, labels, scores, 93 | use_gpu_nms=cfg.USE_GPU_NMS, 94 | device_id=cfg.GPU_ID, 95 | nms_threshold=cfg.RESULTS_NMS_THRESHOLD, 96 | conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD) 97 | 98 | filtered_bboxes = regressed_rois[nmsKeepIndices] 99 | filtered_labels = labels[nmsKeepIndices] 100 | filtered_scores = scores[nmsKeepIndices] 101 | 102 | return filtered_bboxes, filtered_labels, filtered_scores 103 | 104 | def visualize_results(img_path, bboxes, labels, scores, cfg, store_to_path=None): 105 | """ 106 | Renders the detection results (bboxes and labels) onto the image. 107 | :param img_path: the path to the image 108 | :param bboxes: the predicted bounding boxes 109 | :param labels: the single class label per bounding box 110 | :param scores: the probability for the assigned class label per bounding box 111 | :param cfg: the configuration 112 | :param store_to_path: optional: a path where to store the rendered image. 113 | If set to 'None' the image will be displayed on screen. 114 | :return: 115 | """ 116 | 117 | from matplotlib.pyplot import imsave, imshow, show 118 | from utils.plot_helpers import visualize_detections 119 | img = visualize_detections(img_path, bboxes, labels, scores, 120 | cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT, 121 | classes = cfg["DATA"].CLASSES, 122 | draw_negative_rois = cfg.DRAW_NEGATIVE_ROIS) 123 | 124 | if store_to_path is not None: 125 | imsave(store_to_path, img) 126 | else: 127 | imshow(img) 128 | show() 129 | 130 | def _get_detector_name(cfg): 131 | try: 132 | detector = cfg['DETECTOR'] 133 | except: 134 | print("Please specify a 'DETECTOR' in your configuration.") 135 | detector = None 136 | return detector 137 | 138 | def measure_inference_time(model, img_path, cfg, num_repetitions=100): 139 | """ 140 | Computes detection results for the given model on the provided image 141 | :param model: the model 142 | :param img_path: the path to the image 143 | :param cfg: the configuration 144 | :return: 145 | regressed_rois - the predicted bounding boxes 146 | cls_probs - class probabilities per bounding box 147 | """ 148 | 149 | detector_name = _get_detector_name(cfg) 150 | print("Measuring inference time (seconds per image) as average over {} runs".format(num_repetitions)) 151 | if detector_name == 'FastRCNN': 152 | from FastRCNN.FastRCNN_eval import FastRCNN_Evaluator 153 | evaluator = FastRCNN_Evaluator(model, cfg) 154 | elif detector_name == 'FasterRCNN': 155 | from FasterRCNN.FasterRCNN_eval import FasterRCNN_Evaluator 156 | evaluator = FasterRCNN_Evaluator(model, cfg) 157 | else: 158 | print('Unknown detector: {}'.format(detector_name)) 159 | return 160 | 161 | from time import time 162 | start = time() 163 | for i in range(num_repetitions): 164 | _,_ = evaluator.process_image(img_path) 165 | total = time() - start 166 | print("seconds per image: {:2f} (total for {} images: {:2f})".format(total/num_repetitions, num_repetitions, total)) 167 | 168 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/rpn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | 9 | # compute example and gt width ctr, width and height 10 | # and returns optimal target deltas 11 | def bbox_transform(ex_rois, gt_rois): 12 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 13 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 14 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 15 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 16 | 17 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 18 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 19 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 20 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 21 | 22 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 23 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 24 | targets_dw = np.log(gt_widths / ex_widths) 25 | targets_dh = np.log(gt_heights / ex_heights) 26 | 27 | targets = np.vstack( 28 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 29 | return targets 30 | 31 | # gets 32 | # - boxes (n, 4) as [x_low, y_low, x_high, y_high] 33 | # - deltas (n, 4) as [dx, dy, dw, dh] 34 | # returns 35 | # - pred_boxes (n, 4) as [x_low, y_low, x_high, y_high] 36 | # where 37 | # pred_ctr_x = dx * widths + ctr_x 38 | # --> pred_x_low = pred_ctr_x - 0.5 * pred_w 39 | # and 40 | # pred_w = np.exp(dw) * widths 41 | def bbox_transform_inv(boxes, deltas): 42 | if boxes.shape[0] == 0: 43 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 44 | 45 | boxes = boxes.astype(deltas.dtype, copy=False) 46 | 47 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 48 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 49 | ctr_x = boxes[:, 0] + 0.5 * widths 50 | ctr_y = boxes[:, 1] + 0.5 * heights 51 | 52 | dx = deltas[:, 0::4] 53 | dy = deltas[:, 1::4] 54 | dw = deltas[:, 2::4] 55 | dh = deltas[:, 3::4] 56 | 57 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 58 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 59 | pred_w = np.exp(dw) * widths[:, np.newaxis] 60 | pred_h = np.exp(dh) * heights[:, np.newaxis] 61 | 62 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 63 | # x1 64 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 65 | # y1 66 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 67 | # x2 68 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 69 | # y2 70 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 71 | 72 | return pred_boxes 73 | 74 | def clip_boxes(boxes, im_info): 75 | ''' 76 | Clip boxes to image boundaries. 77 | :param boxes: boxes 78 | :param im_info: (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 79 | e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 80 | ''' 81 | 82 | im_info.shape = (6) 83 | padded_wh = im_info[0:2] 84 | scaled_wh = im_info[2:4] 85 | xy_offset = (padded_wh - scaled_wh) / 2 86 | xy_min = xy_offset 87 | xy_max = xy_offset + scaled_wh 88 | 89 | # x_min <= x1 <= x_max 90 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], xy_max[0] - 1), xy_min[0]) 91 | # y_min <= y1 <= y_max 92 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], xy_max[1] - 1), xy_min[1]) 93 | # x_min <= x2 <= x_max 94 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], xy_max[0] - 1), xy_min[0]) 95 | # y_min <= y2 <= y_max 96 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], xy_max[1] - 1), xy_min[1]) 97 | return boxes 98 | 99 | def regress_rois(roi_proposals, roi_regression_factors, labels, dims_input): 100 | for i in range(len(labels)): 101 | label = labels[i] 102 | if label > 0: 103 | deltas = roi_regression_factors[i:i+1,label*4:(label+1)*4] 104 | roi_coords = roi_proposals[i:i+1,:] 105 | regressed_rois = bbox_transform_inv(roi_coords, deltas) 106 | roi_proposals[i,:] = regressed_rois 107 | 108 | if dims_input is not None: 109 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 110 | pad_width, pad_height, scaled_image_width, scaled_image_height, _, _ = dims_input 111 | left = (pad_width - scaled_image_width) / 2 112 | right = pad_width - left - 1 113 | top = (pad_height - scaled_image_height) / 2 114 | bottom = pad_height - top - 1 115 | 116 | roi_proposals[:,0] = roi_proposals[:,0].clip(left, right) 117 | roi_proposals[:,1] = roi_proposals[:,1].clip(top, bottom) 118 | roi_proposals[:,2] = roi_proposals[:,2].clip(left, right) 119 | roi_proposals[:,3] = roi_proposals[:,3].clip(top, bottom) 120 | 121 | return roi_proposals 122 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/rpn/cntk_smoothL1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | import cntk as C 9 | 10 | def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): 11 | """ 12 | From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py 13 | 14 | ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) 15 | SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 16 | |x| - 0.5 / sigma^2, otherwise 17 | """ 18 | sigma2 = sigma * sigma 19 | 20 | inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets))) 21 | 22 | smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2) 23 | smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2) 24 | smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2) 25 | smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign), 26 | C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign))) 27 | 28 | return C.element_times(bbox_outside_weights, smooth_l1_result) 29 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/Detection/utils/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | 9 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 10 | scales=2**np.arange(3, 6)): 11 | """ 12 | Generate anchor (reference) windows by enumerating aspect ratios X 13 | scales wrt a reference (0, 0, 15, 15) window. 14 | """ 15 | 16 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 17 | ratio_anchors = _ratio_enum(base_anchor, ratios) 18 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 19 | for i in range(ratio_anchors.shape[0])]) # was xrange 20 | return anchors 21 | 22 | def _whctrs(anchor): 23 | """ 24 | Return width, height, x center, and y center for an anchor (window). 25 | """ 26 | 27 | w = anchor[2] - anchor[0] + 1 28 | h = anchor[3] - anchor[1] + 1 29 | x_ctr = anchor[0] + 0.5 * (w - 1) 30 | y_ctr = anchor[1] + 0.5 * (h - 1) 31 | return w, h, x_ctr, y_ctr 32 | 33 | def _mkanchors(ws, hs, x_ctr, y_ctr): 34 | """ 35 | Given a vector of widths (ws) and heights (hs) around a center 36 | (x_ctr, y_ctr), output a set of anchors (windows). 37 | """ 38 | 39 | ws = ws[:, np.newaxis] 40 | hs = hs[:, np.newaxis] 41 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 42 | y_ctr - 0.5 * (hs - 1), 43 | x_ctr + 0.5 * (ws - 1), 44 | y_ctr + 0.5 * (hs - 1))) 45 | return anchors 46 | 47 | def _ratio_enum(anchor, ratios): 48 | """ 49 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 50 | """ 51 | 52 | w, h, x_ctr, y_ctr = _whctrs(anchor) 53 | size = w * h 54 | size_ratios = size / ratios 55 | ws = np.round(np.sqrt(size_ratios)) 56 | hs = np.round(ws * ratios) 57 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 58 | return anchors 59 | 60 | def _scale_enum(anchor, scales): 61 | """ 62 | Enumerate a set of anchors for each scale wrt an anchor. 63 | """ 64 | 65 | w, h, x_ctr, y_ctr = _whctrs(anchor) 66 | ws = w * scales 67 | hs = h * scales 68 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 69 | return anchors 70 | 71 | if __name__ == '__main__': 72 | import time 73 | t = time.time() 74 | a = generate_anchors() 75 | print (time.time() - t) 76 | print (a) 77 | from IPython import embed; embed() 78 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/conda_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will 2 | # be automatically provisioned for managed runs. These include runs against 3 | # the localdocker, remotedocker, and cluster compute targets. 4 | 5 | # Note that this file is NOT used to automatically manage dependencies for the 6 | # local compute target. To provision these dependencies locally, run: 7 | # conda env update --file conda_dependencies.yml 8 | 9 | # Details about the Conda environment file format: 10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 11 | 12 | # For managing Spark packages and configuration, see spark_dependencies.yml. 13 | 14 | name: project_environment 15 | channels: 16 | - conda-forge 17 | - jdreaver 18 | - defaults 19 | dependencies: 20 | # The python interpreter version. 21 | # Currently Azure ML Workbench only supports 3.5.2. 22 | - python=3.5.2 23 | 24 | # Required for Jupyter Notebooks. 25 | - ipykernel=4.6.1 26 | - glib=2.50.2 27 | - xorg-libsm 28 | - xorg-libxrender 29 | - xorg-libxext 30 | - xorg-libxau 31 | - libxdmcp 32 | - cryptography 33 | 34 | # Required for CNTK 2.2 35 | - libpng=1.2 36 | - jasper 37 | 38 | - pip: 39 | # The API for Azure Machine Learning Model Management Service. 40 | # Details: https://github.com/Azure/Machine-Learning-Operationalization 41 | - azure-ml-api-sdk==0.1.0a10 42 | - azureml.datacollector==0.1.0a13 43 | - azure-common==1.1.8 44 | - azure-storage==0.36.0 45 | - opencv-python 46 | - easydict 47 | - pyyaml 48 | - pillow 49 | # GPU 50 | - https://cntk.ai/PythonWheel/GPU/cntk-2.2-cp35-cp35m-linux_x86_64.whl 51 | # Helper utilities for dealing with Azure ML Workbench Assets. 52 | - https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.assets-1.0.0-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D 53 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/docker.compute: -------------------------------------------------------------------------------- 1 | # Defines a localdocker compute target that uses a local Docker container. 2 | type: "localdocker" 3 | 4 | # The base image for the Docker container. This is used to provision Spark and 5 | # the Conda package manager. Supported based images are microsoft/mmlspark:plus 6 | # variants. The default 0.7.91 version includes Spark 2.1.1. 7 | # Available mmlspark images: https://hub.docker.com/r/microsoft/mmlspark/tags/ 8 | baseDockerImage: "microsoft/mmlspark:plus-0.7.91" 9 | 10 | # Azure ML Workbench uses the Docker shared volumes feature to improve run 11 | # performance and to enable the automatic mounting of the shared directory. 12 | # This Docker features isn't completely stable yet on Windows, and so it's 13 | # disabled by default to ensure compatibility. 14 | sharedVolumes: false 15 | 16 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points 17 | # at a persistent directory that is shared between all runs of the same project 18 | # on the same target. This specifies the base path for those directories. 19 | # Note that this is not available if sharedVolumes is false. 20 | nativeSharedDirectory: "~/.azureml/share/" 21 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/docker.runconfig: -------------------------------------------------------------------------------- 1 | # The program name and arguments to run when they aren't specified through 2 | # other means. The $file token is replaced with the currently selected file 3 | # by the Workbench application. 4 | ArgumentVector: 5 | - "$file" 6 | 7 | # The name of the compute target to use for this run. 8 | Target: "docker" 9 | 10 | # Environment variables set for the run. 11 | EnvironmentVariables: 12 | "EXAMPLE_ENV_VAR": "Example Value" 13 | 14 | # Framework to execute inside. Allowed values are "Python" and "PySpark". 15 | Framework: "PySpark" 16 | 17 | # Path to the Conda dependencies file to use for this run. If a project 18 | # contains multiple programs with different sets of dependencies, it may be 19 | # convenient to manage those environments with separate files. 20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 21 | 22 | # Path to the Spark dependencies file to use for this run. If a project 23 | # contains multiple programs with different sets of dependencies, it may be 24 | # convenient to manage those environments with separate files. 25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 26 | 27 | # Automatically prepare the run environment as part of the run itself. 28 | # Manual preparation of a compute target can be perfomed with: 29 | # az ml experiment prepare --run-configuration 30 | PrepareEnvironment: false 31 | 32 | # Enable history tracking -- this allows status, logs, metrics, and outputs 33 | # to be collected by Azure ML Workbench and uploaded to the cloud project. 34 | TrackedRun: true 35 | 36 | # The UseSampling setting controls the use of sample data or complete data on 37 | # all .dsource and .dprep files. Setting this to false will read all the data 38 | # when loading data using either a .dsource or .dprep file. 39 | UseSampling: true 40 | 41 | # For each data source (.dsource or .dprep file), the Sample setting allows a 42 | # specific named sample to be used, overriding the active sample set in the 43 | # .dsource or .dprep file. 44 | # DataSourceSettings: 45 | # my.dsource: 46 | # Sampling: 47 | # Sample: MySampleName 48 | 49 | # Data source substitutions allows all references to a .dsource, be it in a 50 | # .dprep file or in your code to load data, to instead use a different .dsource. 51 | # It can be useful to setup two .dsources and then substitute the second one 52 | # when running in some compute targets. 53 | # DataSourceSubstitutions: 54 | # my.dsource: replacement.dsource 55 | 56 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/local.compute: -------------------------------------------------------------------------------- 1 | # Defines a local compute target that uses an existing python environment. 2 | type: "local" 3 | 4 | # Specifies the user-managed python environment for the run. By default this 5 | # is "python" which uses the currently active python environment. The Azure ML 6 | # Workbench will use the python environment installed with it and the Azure ML 7 | # CLI will use whatever python environment it was installed into. 8 | # 9 | # You can change this to point at any python environment on your system, 10 | # including virtual environments and Conda environments. Note that backslashes 11 | # need to be escaped in this path, so it's easier to use forward slashes. 12 | pythonLocation: "python" 13 | 14 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points 15 | # at a persistent directory that is shared between all runs of the same project 16 | # on the same target. This specifies the base path for those directories. 17 | nativeSharedDirectory: "~/.azureml/share/" -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/local.runconfig: -------------------------------------------------------------------------------- 1 | # The program name and arguments to run when they aren't specified through 2 | # other means. The $file token is replaced with the currently selected file 3 | # by the Workbench application. 4 | ArgumentVector: 5 | - "$file" 6 | 7 | # The name of the compute target to use for this run. 8 | Target: "local" 9 | 10 | # Environment variables set for the run. 11 | EnvironmentVariables: 12 | "EXAMPLE_ENV_VAR": "Example Value" 13 | 14 | # Framework to execute inside. Allowed values are "Python" and "PySpark". 15 | Framework: "Python" 16 | 17 | # Path to the Conda dependencies file to use for this run. If a project 18 | # contains multiple programs with different sets of dependencies, it may be 19 | # convenient to manage those environments with separate files. 20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 21 | 22 | # Path to the Spark dependencies file to use for this run. If a project 23 | # contains multiple programs with different sets of dependencies, it may be 24 | # convenient to manage those environments with separate files. 25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 26 | 27 | # Automatically prepare the run environment as part of the run itself. 28 | # Manual preparation of a compute target can be perfomed with: 29 | # az ml experiment prepare --run-configuration 30 | PrepareEnvironment: false 31 | 32 | # Enable history tracking -- this allows status, logs, metrics, and outputs 33 | # to be collected by Azure ML Workbench and uploaded to the cloud project. 34 | TrackedRun: true 35 | 36 | # The UseSampling setting controls the use of sample data or complete data on 37 | # all .dsource and .dprep files. Setting this to false will read all the data 38 | # when loading data using either a .dsource or .dprep file. 39 | UseSampling: true 40 | 41 | # For each data source (.dsource or .dprep file), the Sample setting allows a 42 | # specific named sample to be used, overriding the active sample set in the 43 | # .dsource or .dprep file. 44 | # DataSourceSettings: 45 | # my.dsource: 46 | # Sampling: 47 | # Sample: MySampleName 48 | 49 | # Data source substitutions allows all references to a .dsource, be it in a 50 | # .dprep file or in your code to load data, to instead use a different .dsource. 51 | # It can be useful to setup two .dsources and then substitute the second one 52 | # when running in some compute targets. 53 | # DataSourceSubstitutions: 54 | # my.dsource: replacement.dsource 55 | 56 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/sampledsvm.compute: -------------------------------------------------------------------------------- 1 | address: 2 | baseDockerImage: microsoft/mmlspark:plus-gpu-0.7.91 3 | nvidiaDocker: true 4 | nativeSharedDirectory: ~/.azureml/share/ 5 | password: 6 | sharedVolumes: true 7 | type: remotedocker 8 | username: 9 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/sampledsvm.runconfig: -------------------------------------------------------------------------------- 1 | ArgumentVector: 2 | - $file 3 | CondaDependenciesFile: aml_config/conda_dependencies.yml 4 | EnvironmentVariables: 5 | "STORAGE_ACCOUNT_NAME": "" 6 | "STORAGE_ACCOUNT_KEY": "" 7 | Framework: Python 8 | PrepareEnvironment: true 9 | SparkDependenciesFile: aml_config/spark_dependencies.yml 10 | Target: sampledsvm 11 | TrackedRun: true 12 | UseSampling: true 13 | -------------------------------------------------------------------------------- /CNTK_faster-rcnn/aml_config/spark_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Spark configuration and packages specification. The dependencies defined in 2 | # this file will be automatically provisioned for runs that use Spark. 3 | 4 | # For managing third-party python libraries, see conda_dependencies.yml. 5 | 6 | # Spark configuration properties. 7 | configuration: 8 | "spark.app.name": "Azure ML Experiment" 9 | "spark.yarn.maxAppAttempts": 1 10 | 11 | # Repositories to search for the specified Spark packages. 12 | repositories: 13 | - "https://mmlspark.azureedge.net/maven" 14 | 15 | # Spark packages to include in the run. 16 | packages: 17 | # Microsoft Machine Learning for Apache Spark provides a number of deep 18 | # learning and data science tools, including seamless integration of Spark 19 | # Machine Learning pipelines with Microsoft Cognitive Toolkit (CNTK) and 20 | # OpenCV, enabling you to quickly create powerful, highly-scalable 21 | # predictive and analytical models for large image and text datasets. 22 | # Details: https://github.com/Azure/mmlspark 23 | - group: "com.microsoft.ml.spark" 24 | artifact: "mmlspark_2.11" 25 | version: "0.7.91" 26 | 27 | # Required for SQL Server data sources. 28 | - group: "com.microsoft.sqlserver" 29 | artifact: "mssql-jdbc" 30 | version: "6.2.1.jre8" 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bird Detection with Azure ML Workbench 2 | 3 | Object detection using Faster R-CNN APIs in CNTK and Tensorflow. 4 | For more detailed walkthrough see also code story [Bird Detection with Azure ML Workbench](https://www.microsoft.com/developerblog/2017/10/24/bird-detection-with-azure-ml-workbench/). 5 | 6 | ## Train 7 | 8 | If you are using `az ml`, to push models to Azure Blob Storage, add the following to your .runconfig file: 9 | 10 | ``` 11 | EnvironmentVariables: 12 | "STORAGE_ACCOUNT_NAME": "" 13 | "STORAGE_ACCOUNT_KEY": "" 14 | ``` 15 | 16 | For more info on Azure ML Workbench compute targets see[documentation](https://docs.microsoft.com/en-us/azure/machine-learning/preview/how-to-create-dsvm-hdi). 17 | ### CNTK 18 | To train on different pre-trained models, use this command below: 19 | This submits multiple jobs on each registered pre-trained model in a row 20 | 21 | ``` 22 | python Detection/FasterRCNN/run_sweep_parameters.py 23 | ``` 24 | 25 | You can also switch the base model by giving an input argument like 26 | 27 | ``` 28 | az ml experiment submit -c Detection/FasterRCNN/run_faster_rcnn.py AlexNet 29 | az ml experiment submit -c Detection/FasterRCNN/run_faster_rcnn.py VGG16 30 | ``` 31 | ### Tensorflow 32 | Here is how to submit training experiment using Tensorflow Object Detection APIs: 33 | 34 | ``` 35 | az ml experiment submit -c tf_train_eval.py --logtostderr --train_dir=/azureml-share/traindir_no_aug \ 36 | --input_type image_tensor --pipeline_config_path=./kw_data/faster_rcnn_resnet101_no_aug.config \ 37 | --eval_every_n_steps=500 --eval_dir=/azureml-share/eval_no_aug 38 | ``` 39 | 40 | Use different config files in ./kw_data to train with different parameters. 41 | Change _eval_every_n_steps_if you'd like to run evaluation more oftem 42 | 43 | > Note: you will need to update conda_dependancies.yaml to point to the location of TF object detection dist packages. 44 | 45 | ## Predict 46 | 47 | ### CNTK 48 | 49 | Run a prediction web service: 50 | 51 | ``` 52 | $ cd CNTK_faster-rcnn/Detection/ 53 | $ docker build -t cmcntk . 54 | $ docker run -v /:/cmcntk -p 80:80 -it cmcntk:latest 55 | # note this will persist the model files in / 56 | 57 | ``` 58 | Now you should have a service running on 59 | http://localhost 60 | 61 | To get prediction of an image: 62 | 1. In the same storage account, upload your test image into a container named `data` 63 | 2. Make a POST request against the running prediction web service by providing the path of the file in the Azure blob storage container, e.g. birds/testimages/IMG_0010.JPG 64 | 65 | ``` 66 | $ curl -X POST http:///predict -H 'content-type: application/json' -d '{"filename": "birds/testimages/IMG_0010.JPG"}' 67 | 68 | ``` 69 | 70 | ### TensorFlow 71 | 72 | Register provider: 73 | 74 | ``` 75 | az provider register -n Microsoft.MachineLearningCompute 76 | ``` 77 | 78 | Create environment: 79 | 80 | VM: 81 | ``` 82 | az ml env setup -l [Azure Region, e.g. eastus2] -n [your environment name] [-g [existing resource group]] 83 | ``` 84 | 85 | Azure Container Service (ACS) Kubernetes cluster: 86 | 87 | ``` 88 | az ml env setup --cluster -n [your environment name] -l [Azure region e.g. eastus2] [-g [resource group]] 89 | ``` 90 | 91 | Set environment: 92 | 93 | ``` 94 | az ml env set -n [environment name] -g [resource group] 95 | ``` 96 | 97 | Create model management: 98 | 99 | ``` 100 | az ml account modelmanagement create -l [Azure region, e.g. eastus2] -n [your account name] -g [resource group name] --sku-instances [number of instances, e.g. 1] --sku-name [Pricing tier for example S1] 101 | ``` 102 | 103 | Create the web service: 104 | 105 | ``` 106 | az ml service create realtime --model-file [model file/folder path] -f [scoring file e.g. score.py] -n [your service name] -s [schema file e.g. service_schema.json] -r [runtime for the Docker container e.g. spark-py or python] -c [conda dependencies file for additional python packages] -d [additional files] 107 | ``` 108 | 109 | To test the service: 110 | 111 | ``` 112 | curl -X POST -H "Content-Type:application/json" --data {"input": "[IMAGE URL]"} http:///score 113 | ``` 114 | 115 | ## Data Credit 116 | The data used in experiments was collected by [Dr. Rachael Orben](https://rachaelorben.dunked.com/red-legged-kittiwake-incubation) of Oregon State University and Abram Fleishman of San Jose State University and [Conservation Metrics](http://conservationmetrics.com), Inc. 117 | It was collected as part of a large project investigating early breeding season responses of red-legged kittiwakes to changes in prey availability and linkages to the non-breeding stage in the Bering Sea, Alaska. 118 | 119 | Data for CMTK training is [here](https://olgalicodestory.blob.core.windows.net/kittiwakedata/kittiwake_labled.zip). 120 | Data in TF Records format for training in Tensorflow is [here](https://olgalicodestory.blob.core.windows.net/kittiwakedata/kittiwake_labled_tfrecords.zip) 121 | 122 | 123 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/.gitignore: -------------------------------------------------------------------------------- 1 | .azuremlhistory_git 2 | .azureml 3 | .ipynb_checkpoints 4 | azureml-logs 5 | 6 | traindir/ 7 | 8 | # Python 9 | *.pyc 10 | __pycache__/ 11 | .idea 12 | 13 | .azuremlhistory_git 14 | .azuremlhistory_git 15 | .ipynb_checkpoints 16 | .azureml/project.json 17 | azureml-logs -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/.gitignore: -------------------------------------------------------------------------------- 1 | pct-ds1_2* 2 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/conda_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will be 2 | # automatically provisioned for runs against docker, VM, and HDI cluster targets. 3 | 4 | # Details about the Conda environment file format: 5 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 6 | 7 | # For Spark packages and configuration, see spark_dependencies.yml. 8 | 9 | # NOTE: replace stubs with appropriate values 10 | 11 | 12 | name: project_environment 13 | dependencies: 14 | - python=3.5.2 15 | - pandas 16 | - tensorflow-gpu 17 | - pip: 18 | - azure-common==1.1.8 19 | - azure-storage==0.36.0 20 | - argparse 21 | - matplotlib 22 | # The API for Azure Machine Learning Model Management Service. 23 | # Details: https://github.com/Azure/Machine-Learning-Operationalization 24 | - azure-ml-api-sdk==0.1.0a6 25 | # Helper utilities for calculating dataprofiles from Pandas DataFrames. 26 | #- https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.pyrecipes.dataframe-1.0.12-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D 27 | # Helper utilities for dealing with Azure ML Workbench Assets. 28 | - https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.assets-1.0.0-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D 29 | # TF object detection 30 | - https:///object_detection-0.1_3.tar.gz 31 | - https:///slim-0.1.tar.gz -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/conda_dependencies_ws.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will be 2 | # automatically provisioned for runs against docker, VM, and HDI cluster targets. 3 | 4 | # Details about the Conda environment file format: 5 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 6 | 7 | # For Spark packages and configuration, see spark_dependencies.yml. 8 | # NOTE: replace stubs with appropriate values 9 | 10 | 11 | name: project_environment 12 | dependencies: 13 | - python=3.5.2 14 | - pandas 15 | - tensorflow 16 | - pip: 17 | - azure-common==1.1.8 18 | - azure-storage==0.36.0 19 | # The API for Azure Machine Learning Model Management Service. 20 | # Details: https://github.com/Azure/Machine-Learning-Operationalization 21 | - azure-ml-api-sdk==0.1.0a9 22 | # Helper utilities for dealing with Azure ML Workbench Assets. 23 | - https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.assets-1.0.0-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D 24 | # TF object detection 25 | - https:///object_detection-0.1.tar.gz 26 | - https:///tfobj/slim-0.1.tar.gz 27 | - scipy -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/docker.compute: -------------------------------------------------------------------------------- 1 | type: "localdocker" 2 | baseDockerImage: "microsoft/mmlspark:plus-0.7.dev7_2.gcfbc920" 3 | 4 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/docker.runconfig: -------------------------------------------------------------------------------- 1 | ArgumentVector: 2 | - "$file" 3 | Target: "docker" 4 | EnvironmentVariables: 5 | "EXAMPLE_ENV_VAR": "Example Value" 6 | Framework: "Python" 7 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 8 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 9 | PrepareEnvironment: true 10 | TrackedRun: true 11 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/local.compute: -------------------------------------------------------------------------------- 1 | type: "local" 2 | pythonLocation: "python" 3 | sparkSubmitLocation: "spark-submit" -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/local.runconfig: -------------------------------------------------------------------------------- 1 | ArgumentVector: 2 | - "$file" 3 | Target: "local" 4 | Framework: "Python" 5 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 6 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 7 | PrepareEnvironment: true 8 | TrackedRun: true 9 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/aml_config/spark_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Spark configuration and packages specification. The dependencies defined in 2 | # this file will be automatically provisioned for each run that uses Spark. 3 | 4 | # For third-party python libraries, see conda_dependencies.yml. 5 | 6 | configuration: {} 7 | repositories: 8 | - "https://mmlspark.azureedge.net/maven" 9 | packages: 10 | - group: "com.microsoft.ml.spark" 11 | artifact: "mmlspark_2.11" 12 | version: "0.7" 13 | 14 | # Required for SQL Server data sources. 15 | - group: "com.microsoft.sqlserver" 16 | artifact: "mssql-jdbc" 17 | version: "6.2.1.jre8" 18 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/create_pascal_tf_record.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | r"""Convert raw PASCAL dataset to TFRecord for object_detection. 17 | 18 | Example usage: 19 | ./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \ 20 | --output_path=/home/user/pascal.record 21 | """ 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import hashlib 27 | import io 28 | import logging 29 | import os 30 | 31 | from lxml import etree 32 | import PIL.Image 33 | import tensorflow as tf 34 | 35 | from object_detection.utils import dataset_util 36 | from object_detection.utils import label_map_util 37 | 38 | 39 | flags = tf.app.flags 40 | flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.') 41 | flags.DEFINE_string('set', 'train', 'Convert training set, validation set or ' 42 | 'merged set.') 43 | flags.DEFINE_string('annotations_dir', 'Annotations', 44 | '(Relative) path to annotations directory.') 45 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord') 46 | flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt', 47 | 'Path to label map proto') 48 | flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore ' 49 | 'difficult instances') 50 | FLAGS = flags.FLAGS 51 | 52 | SETS = ['train', 'val', 'trainval', 'test'] 53 | 54 | 55 | def dict_to_tf_example(data, 56 | dataset_directory, 57 | label_map_dict, 58 | ignore_difficult_instances=False, 59 | image_subdirectory='JPEGImages'): 60 | """Convert XML derived dict to tf.Example proto. 61 | 62 | Notice that this function normalizes the bounding box coordinates provided 63 | by the raw data. 64 | 65 | Args: 66 | data: dict holding PASCAL XML fields for a single image (obtained by 67 | running dataset_util.recursive_parse_xml_to_dict) 68 | dataset_directory: Path to root directory holding PASCAL dataset 69 | label_map_dict: A map from string label names to integers ids. 70 | ignore_difficult_instances: Whether to skip difficult instances in the 71 | dataset (default: False). 72 | image_subdirectory: String specifying subdirectory within the 73 | PASCAL dataset directory holding the actual image data. 74 | 75 | Returns: 76 | example: The converted tf.Example. 77 | 78 | Raises: 79 | ValueError: if the image pointed to by data['filename'] is not a valid JPEG 80 | """ 81 | full_path = os.path.join(dataset_directory, image_subdirectory, data['filename']+'.JPG') 82 | with tf.gfile.GFile(full_path, 'rb') as fid: 83 | encoded_jpg = fid.read() 84 | encoded_jpg_io = io.BytesIO(encoded_jpg) 85 | image = PIL.Image.open(encoded_jpg_io) 86 | if image.format != 'JPEG': 87 | raise ValueError('Image format not JPEG') 88 | key = hashlib.sha256(encoded_jpg).hexdigest() 89 | 90 | width = int(data['size']['width']) 91 | height = int(data['size']['height']) 92 | 93 | xmin = [] 94 | ymin = [] 95 | xmax = [] 96 | ymax = [] 97 | classes = [] 98 | classes_text = [] 99 | truncated = [] 100 | poses = [] 101 | difficult_obj = [] 102 | try: 103 | for obj in data['object']: 104 | xmin.append(float(obj['bndbox']['xmin']) / width) 105 | ymin.append(float(obj['bndbox']['ymin']) / height) 106 | xmax.append(float(obj['bndbox']['xmax']) / width) 107 | ymax.append(float(obj['bndbox']['ymax']) / height) 108 | classes_text.append(obj['name'].encode('utf8')) 109 | classes.append(label_map_dict[obj['name']]) 110 | poses.append(obj['pose'].encode('utf8')) 111 | except: 112 | print("object not found in file") 113 | 114 | example = tf.train.Example(features=tf.train.Features(feature={ 115 | 'image/height': dataset_util.int64_feature(height), 116 | 'image/width': dataset_util.int64_feature(width), 117 | 'image/filename': dataset_util.bytes_feature( 118 | data['filename'].encode('utf8')), 119 | 'image/source_id': dataset_util.bytes_feature( 120 | data['filename'].encode('utf8')), 121 | 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 122 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 123 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 124 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 125 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 126 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 127 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 128 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 129 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 130 | 'image/object/view': dataset_util.bytes_list_feature(poses), 131 | })) 132 | return example 133 | 134 | 135 | def main(_): 136 | if FLAGS.set not in SETS: 137 | raise ValueError('set must be in : {}'.format(SETS)) 138 | 139 | data_dir = FLAGS.data_dir 140 | 141 | writer = tf.python_io.TFRecordWriter(FLAGS.output_path) 142 | 143 | label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path) 144 | 145 | 146 | examples_path = os.path.join(data_dir, 'ImageSets', 'Main', 147 | 'Kittiwake_' + FLAGS.set + '.txt') 148 | annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir) 149 | examples_list = dataset_util.read_examples_list(examples_path) 150 | for idx, example in enumerate(examples_list): 151 | if idx % 100 == 0: 152 | logging.info('On image %d of %d', idx, len(examples_list)) 153 | path = os.path.join(annotations_dir, example + '.xml') 154 | with tf.gfile.GFile(path, 'r') as fid: 155 | xml_str = fid.read() 156 | xml = etree.fromstring(xml_str) 157 | data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] 158 | 159 | tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, 160 | FLAGS.ignore_difficult_instances) 161 | writer.write(tf_example.SerializeToString()) 162 | 163 | writer.close() 164 | 165 | 166 | if __name__ == '__main__': 167 | tf.app.run() 168 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/driver.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tensorflow as tf 3 | import numpy as np 4 | import os 5 | from PIL import Image 6 | import urllib.request 7 | 8 | PATH_TO_LABELS = os.path.join("./pascal_label_map.pbtxt") 9 | 10 | def init(): 11 | 12 | global detection_graph 13 | 14 | detection_graph = tf.Graph() 15 | with detection_graph.as_default(): 16 | od_graph_def = tf.GraphDef() 17 | with tf.gfile.GFile('frozen_inference_graph.pb', 'rb') as fid: 18 | serialized_graph = fid.read() 19 | od_graph_def.ParseFromString(serialized_graph) 20 | tf.import_graph_def(od_graph_def, name='') 21 | 22 | def load_image_into_numpy_array(image): 23 | (im_width, im_height) = image.size 24 | return np.array(image.getdata()).reshape( 25 | (im_height, im_width, 3)).astype(np.uint8) 26 | 27 | def process_bounding_box(box): 28 | ymin, xmin, ymax, xmax = box 29 | (left, right, top, bottom) = (xmin * 850, xmax * 850, ymin * 850, ymax * 850) 30 | return (left, right, top, bottom) 31 | 32 | def run(input_string): 33 | try: 34 | input_list = json.loads(input_string) 35 | except ValueError: 36 | return "Bad input: Expecting a json encoded list of lists." 37 | 38 | with detection_graph.as_default(): 39 | with tf.Session(graph=detection_graph) as sess: 40 | # Definite input and output Tensors for detection_graph 41 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 42 | # Each box represents a part of the image where a particular object was detected. 43 | detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 44 | # Each score represent how level of confidence for each of the objects. 45 | # Score is shown on the result image, together with the class label. 46 | detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') 47 | detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') 48 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 49 | 50 | urllib.request.urlretrieve(input_list['input'], "image.jpg") 51 | image = Image.open("./image.jpg") 52 | # the array based representation of the image will be used later in order to prepare the 53 | # result image with boxes and labels on it. 54 | image_np = load_image_into_numpy_array(image) 55 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 56 | image_np_expanded = np.expand_dims(image_np, axis=0) 57 | # Actual detection. 58 | (boxes, scores, classes, num_detections) = sess.run( 59 | [detection_boxes, detection_scores, detection_classes, num_detections], 60 | feed_dict={image_tensor: image_np_expanded}) 61 | 62 | boxes, scores, classes, num_detections = map(np.squeeze, [boxes, scores, classes, num_detections]) 63 | 64 | results = [] 65 | 66 | for i in range(int(num_detections)): 67 | if scores[i] < 0.05: 68 | continue 69 | left, right, top, bottom = process_bounding_box(boxes[i]) 70 | results.append([left, right, top, bottom]) 71 | 72 | return '{"output":' + '"' + json.dumps(results) + '"}' 73 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/faster_rcnn_resnet101_voc07.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 1 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 850 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.5 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 10 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "./faster_rcnn_resnet101_coco_11_06_2017/model.ckpt" 111 | from_detection_checkpoint: true 112 | num_steps: 800000 113 | data_augmentation_options { 114 | random_horizontal_flip { 115 | } 116 | } 117 | } 118 | 119 | train_input_reader: { 120 | tf_record_input_reader { 121 | input_path: "./pascal_train.record" 122 | } 123 | label_map_path: "./pascal_label_map.pbtxt" 124 | } 125 | 126 | eval_config: { 127 | num_examples: 53 128 | } 129 | 130 | eval_input_reader: { 131 | tf_record_input_reader { 132 | input_path: "./pascal_val.record" 133 | } 134 | label_map_path: "./pascal_label_map.pbtxt" 135 | shuffle: false 136 | num_readers: 1 137 | } 138 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/faster_rcnn_resnet101_aug1_adam.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 2 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | adam_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | } 106 | use_moving_average: false 107 | } 108 | gradient_clipping_by_norm: 10.0 109 | fine_tune_checkpoint: "/azureml-share/kw_data/model.ckpt" 110 | from_detection_checkpoint: true 111 | num_steps: 7000 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | # random_pixel_value_scale { 116 | # } 117 | # random_crop_image { 118 | # } 119 | } 120 | } 121 | 122 | train_input_reader: { 123 | tf_record_input_reader { 124 | input_path: "/azureml-share/kw_data/kw_train.record" 125 | } 126 | label_map_path: "./kw_data/kw_label_map.pbtxt" 127 | } 128 | 129 | eval_config: { 130 | num_examples: 54 131 | max_evals: 1 132 | } 133 | 134 | eval_input_reader: { 135 | tf_record_input_reader { 136 | input_path: "/azureml-share/kw_data/kw_val.record" 137 | } 138 | label_map_path: "./kw_data/kw_label_map.pbtxt" 139 | shuffle: false 140 | num_readers: 1 141 | } 142 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/faster_rcnn_resnet101_aug2_adam.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 2 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | adam_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | } 106 | use_moving_average: false 107 | } 108 | gradient_clipping_by_norm: 10.0 109 | fine_tune_checkpoint: "/azureml-share/kw_data/model.ckpt" 110 | from_detection_checkpoint: true 111 | num_steps: 7000 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | random_pixel_value_scale { 116 | } 117 | random_crop_image { 118 | } 119 | } 120 | } 121 | 122 | train_input_reader: { 123 | tf_record_input_reader { 124 | input_path: "/azureml-share/kw_data/kw_train.record" 125 | } 126 | label_map_path: "./kw_data/kw_label_map.pbtxt" 127 | } 128 | 129 | eval_config: { 130 | num_examples: 54 131 | max_evals: 1 132 | } 133 | 134 | eval_input_reader: { 135 | tf_record_input_reader { 136 | input_path: "/azureml-share/kw_data/kw_val.record" 137 | } 138 | label_map_path: "./kw_data/kw_label_map.pbtxt" 139 | shuffle: false 140 | num_readers: 1 141 | } 142 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/faster_rcnn_resnet101_aug3_adam.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 2 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | adam_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | } 106 | use_moving_average: false 107 | } 108 | gradient_clipping_by_norm: 10.0 109 | fine_tune_checkpoint: "/azureml-share/kw_data/model.ckpt" 110 | from_detection_checkpoint: true 111 | num_steps: 7000 112 | data_augmentation_options { 113 | random_horizontal_flip { 114 | } 115 | random_crop_image { 116 | } 117 | } 118 | } 119 | 120 | train_input_reader: { 121 | tf_record_input_reader { 122 | input_path: "/azureml-share/kw_data/kw_train.record" 123 | } 124 | label_map_path: "./kw_data/kw_label_map.pbtxt" 125 | } 126 | 127 | eval_config: { 128 | num_examples: 54 129 | max_evals: 1 130 | } 131 | 132 | eval_input_reader: { 133 | tf_record_input_reader { 134 | input_path: "/azureml-share/kw_data/kw_val.record" 135 | } 136 | label_map_path: "./kw_data/kw_label_map.pbtxt" 137 | shuffle: false 138 | num_readers: 1 139 | } 140 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/faster_rcnn_resnet101_no_aug.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 2 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | momentum_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | momentum_optimizer_value: 0.9 106 | } 107 | use_moving_average: false 108 | } 109 | gradient_clipping_by_norm: 10.0 110 | fine_tune_checkpoint: "/azureml-share/kw_data/model.ckpt" 111 | from_detection_checkpoint: true 112 | num_steps: 7000 113 | # data_augmentation_options { 114 | # random_horizontal_flip { 115 | # } 116 | # random_pixel_value_scale { 117 | # } 118 | # random_crop_image { 119 | # } 120 | # } 121 | } 122 | 123 | train_input_reader: { 124 | tf_record_input_reader { 125 | input_path: "/azureml-share/kw_data/kw_train.record" 126 | } 127 | label_map_path: "./kw_data/kw_label_map.pbtxt" 128 | } 129 | 130 | eval_config: { 131 | num_examples: 54 132 | max_evals: 1 133 | } 134 | 135 | eval_input_reader: { 136 | tf_record_input_reader { 137 | input_path: "/azureml-share/kw_data/kw_val.record" 138 | } 139 | label_map_path: "./kw_data/kw_label_map.pbtxt" 140 | shuffle: false 141 | num_readers: 1 142 | } 143 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/faster_rcnn_resnet101_no_aug_adam.config: -------------------------------------------------------------------------------- 1 | # Faster R-CNN with Resnet-101 (v1), configured for Pascal VOC Dataset. 2 | # Users should configure the fine_tune_checkpoint field in the train config as 3 | # well as the label_map_path and input_path fields in the train_input_reader and 4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that 5 | # should be configured. 6 | 7 | model { 8 | faster_rcnn { 9 | num_classes: 2 10 | image_resizer { 11 | keep_aspect_ratio_resizer { 12 | min_dimension: 600 13 | max_dimension: 1024 14 | } 15 | } 16 | feature_extractor { 17 | type: 'faster_rcnn_resnet101' 18 | first_stage_features_stride: 16 19 | } 20 | first_stage_anchor_generator { 21 | grid_anchor_generator { 22 | scales: [0.25, 0.5, 1.0, 2.0] 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | height_stride: 16 25 | width_stride: 16 26 | } 27 | } 28 | first_stage_box_predictor_conv_hyperparams { 29 | op: CONV 30 | regularizer { 31 | l2_regularizer { 32 | weight: 0.0 33 | } 34 | } 35 | initializer { 36 | truncated_normal_initializer { 37 | stddev: 0.01 38 | } 39 | } 40 | } 41 | first_stage_nms_score_threshold: 0.0 42 | first_stage_nms_iou_threshold: 0.7 43 | first_stage_max_proposals: 300 44 | first_stage_localization_loss_weight: 2.0 45 | first_stage_objectness_loss_weight: 1.0 46 | initial_crop_size: 14 47 | maxpool_kernel_size: 2 48 | maxpool_stride: 2 49 | second_stage_box_predictor { 50 | mask_rcnn_box_predictor { 51 | use_dropout: false 52 | dropout_keep_probability: 1.0 53 | fc_hyperparams { 54 | op: FC 55 | regularizer { 56 | l2_regularizer { 57 | weight: 0.0 58 | } 59 | } 60 | initializer { 61 | variance_scaling_initializer { 62 | factor: 1.0 63 | uniform: true 64 | mode: FAN_AVG 65 | } 66 | } 67 | } 68 | } 69 | } 70 | second_stage_post_processing { 71 | batch_non_max_suppression { 72 | score_threshold: 0.0 73 | iou_threshold: 0.6 74 | max_detections_per_class: 100 75 | max_total_detections: 300 76 | } 77 | score_converter: SOFTMAX 78 | } 79 | second_stage_localization_loss_weight: 2.0 80 | second_stage_classification_loss_weight: 1.0 81 | } 82 | } 83 | 84 | train_config: { 85 | batch_size: 1 86 | optimizer { 87 | adam_optimizer: { 88 | learning_rate: { 89 | manual_step_learning_rate { 90 | initial_learning_rate: 0.0001 91 | schedule { 92 | step: 0 93 | learning_rate: .0001 94 | } 95 | schedule { 96 | step: 500000 97 | learning_rate: .00001 98 | } 99 | schedule { 100 | step: 700000 101 | learning_rate: .000001 102 | } 103 | } 104 | } 105 | } 106 | use_moving_average: false 107 | } 108 | gradient_clipping_by_norm: 10.0 109 | fine_tune_checkpoint: "/azureml-share/kw_data/model.ckpt" 110 | from_detection_checkpoint: true 111 | num_steps: 7000 112 | # data_augmentation_options { 113 | # random_horizontal_flip { 114 | # } 115 | # random_pixel_value_scale { 116 | # } 117 | # random_crop_image { 118 | # } 119 | # } 120 | } 121 | 122 | train_input_reader: { 123 | tf_record_input_reader { 124 | input_path: "/azureml-share/kw_data/kw_train.record" 125 | } 126 | label_map_path: "./kw_data/kw_label_map.pbtxt" 127 | } 128 | 129 | eval_config: { 130 | num_examples: 54 131 | max_evals: 1 132 | } 133 | 134 | eval_input_reader: { 135 | tf_record_input_reader { 136 | input_path: "/azureml-share/kw_data/kw_val.record" 137 | } 138 | label_map_path: "./kw_data/kw_label_map.pbtxt" 139 | shuffle: false 140 | num_readers: 1 141 | } 142 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/kw_data/kw_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: 'Kittiwake' 4 | } 5 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/misc/azure_utils.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isfile 2 | import glob 3 | import os 4 | from azure.storage.blob import BlockBlobService 5 | import datetime 6 | 7 | def load_file_from_blob(container, fileName, dest): 8 | print("Starting download of {0} to {1}...".format(fileName, dest)) 9 | if os.path.isfile(dest): 10 | print("File {} already exists, skipping download from Azure Blob.".format(dest)) 11 | return False 12 | 13 | blob_service = get_blob_service() 14 | print("container {0}, fileName {1}, dest {2}".format(container, fileName, dest)) 15 | blob_service.get_blob_to_path(container, fileName, dest) 16 | return True 17 | 18 | def get_blob_service(): 19 | storage_account_name = os.environ['STORAGE_ACCOUNT_NAME'] 20 | storage_account_key = os.environ['STORAGE_ACCOUNT_KEY'] 21 | return BlockBlobService(account_name=storage_account_name, account_key=storage_account_key) 22 | 23 | def upload_checkpoint_files(dir_path): 24 | blob_service = get_blob_service() 25 | files = os.listdir(dir_path) 26 | for file in files: 27 | blob_service.create_blob_from_path('tf_checkpoints', file, os.path.join(dir_path, file)) 28 | 29 | 30 | def upload_checkpoint_file(file_path, file_name, add_timestamp=True): 31 | blob_service = get_blob_service() 32 | if add_timestamp: 33 | file_name += datetime.datetime.now().isoformat() 34 | print("file name {0}, file path {1}".format(file_name, file_path)) 35 | blob_service.create_blob_from_path('tfcheckpoints', file_name , file_path) 36 | print("Uploaded eval model at tfcheckpoints/%s" % file_name) 37 | 38 | 39 | def delete_existing_blobs(): 40 | blob_service = get_blob_service() 41 | generator = blob_service.list_blobs('tfcheckpoints') 42 | for blob in generator: 43 | blob_service.delete_blob('tfcheckpoints', blob.name) 44 | 45 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/misc/results_logger.py: -------------------------------------------------------------------------------- 1 | try: 2 | import matplotlib 3 | matplotlib.use('agg') 4 | import matplotlib.pyplot as plt 5 | except ImportError: 6 | print("Library matplotlib missing. Can't plot.") 7 | 8 | import pandas as pd 9 | from PIL import Image 10 | from io import BytesIO 11 | from tensorboard.backend.event_processing import event_accumulator 12 | from azureml.logging import get_azureml_logger 13 | 14 | #matplotlib.style.use('ggplot') 15 | 16 | def record_results(eval_path): 17 | print("Starting logging results, using eval dir {0}".format(eval_path)) 18 | ea = event_accumulator.EventAccumulator(eval_path, 19 | size_guidance={ # see below regarding this argument 20 | event_accumulator.COMPRESSED_HISTOGRAMS: 500, 21 | event_accumulator.IMAGES: 30, 22 | event_accumulator.AUDIO: 4, 23 | event_accumulator.SCALARS: 0, 24 | event_accumulator.HISTOGRAMS: 1 }) 25 | ea.Reload() 26 | 27 | # Plot mAP vs Interations 28 | df = pd.DataFrame(ea.Scalars('Precision/mAP@0.5IOU')) 29 | max_vals = df.loc[df["value"].idxmax()] # max value of mAP 30 | 31 | fig = plt.figure(figsize=(6, 5), dpi=75) 32 | plt.plot(df["step"], df["value"]) 33 | plt.plot(max_vals["step"], max_vals["value"], "g+", mew=2, ms=10) 34 | plt.title("Precision") 35 | plt.ylabel("mAP") 36 | plt.xlabel("interations") 37 | fig.savefig("./outputs/mAP.png", bbox_inches='tight') 38 | 39 | # Plot detection resuts for model with max mAP 40 | IMAGE_ID = "image-1" # Use blurry image as a baseline across runs 41 | IMG_OUTPUT = "./outputs/kittiwake.png" 42 | match = [x for x in ea.Images(IMAGE_ID) if x.step == max_vals["step"]] 43 | if (len(match) >0): 44 | img_encoded = match[0].encoded_image_string 45 | img_file = BytesIO(img_encoded) 46 | img = Image.open(img_file) 47 | print(img.size) 48 | img.save(IMG_OUTPUT, "PNG") 49 | else: 50 | message = "Did not find images summary for step {0} with max mAP {1}. Need increase event_accumulator.IMAGES?" 51 | print(message.format(max_vals["step"], max_vals["value"]) ) 52 | 53 | run_logger = get_azureml_logger() 54 | run_logger.log("max_mAP", max_vals["value"]) 55 | run_logger.log("max_mAP_interation#", max_vals["step"]) 56 | print("Done logging resuts") -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/misc/zip_helper.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import shutil 3 | import time 4 | from os import rename 5 | 6 | def unzip_file(filepath, destination): 7 | print("Unzipping {0} to {1}".format(filepath, destination)) 8 | with zipfile.ZipFile(filepath,"r") as zip_ref: 9 | zip_ref.extractall(destination) 10 | 11 | def zip_dir(source, add_timestamp=True): 12 | output_filename = source 13 | zip = output_filename + ".zip" 14 | print("Zipping {0} to {1}".format(source, zip)) 15 | shutil.make_archive(output_filename, 'zip', source) 16 | if add_timestamp: 17 | timestr = time.strftime("%Y%m%d%H%M%S") 18 | newName = output_filename + timestr + ".zip" 19 | print("Add timestamp {}".format(newName)) 20 | print(newName) 21 | rename(zip, newName) 22 | zip = newName 23 | return zip 24 | 25 | 26 | def append_it(filename, it): 27 | return "{0}_{2}.{1}".format(*filename.rsplit('.', 1) + it) 28 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/pascal_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: 'Kittiwake' 4 | } 5 | -------------------------------------------------------------------------------- /Tensorflow-Object-Detection/tf_train_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from misc.azure_utils import load_file_from_blob 4 | from misc.zip_helper import unzip_file 5 | from object_detection.train_eval import main as train_obj 6 | from misc.results_logger import record_results 7 | 8 | BLOB = "tfobj" 9 | MODELS_ZIP = "models.zip" 10 | TRAIN_DATA_ZIP = "kw_train_eval.zip" 11 | data_path = os.path.join(os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "kw_data") 12 | 13 | load_file_from_blob(BLOB, MODELS_ZIP, data_path + MODELS_ZIP) 14 | unzip_file(data_path + MODELS_ZIP, data_path) 15 | 16 | load_file_from_blob(BLOB, TRAIN_DATA_ZIP, data_path + TRAIN_DATA_ZIP) 17 | unzip_file(data_path + TRAIN_DATA_ZIP, data_path) 18 | print("Starting training") 19 | train_obj("") 20 | 21 | # Get directory where all TF logging events are 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--eval_dir', type=str) 24 | args = parser.parse_known_args() 25 | result_dir = args[0].eval_dir 26 | print("Eval dir {0}".format(result_dir)) 27 | record_results(result_dir) -------------------------------------------------------------------------------- /assets/README.md: -------------------------------------------------------------------------------- 1 | ## Data Credit 2 | The data used in experiments was collected by [Dr. Rachael Orben](https://rachaelorben.dunked.com/red-legged-kittiwake-incubation) of Oregon State University and Abram Fleishman of San Jose State University and [Conservation Metrics](http://conservationmetrics.com), Inc. 3 | It was collected as part of a large project investigating early breeding season responses of red-legged kittiwakes to changes in prey availability and linkages to the non-breeding stage in the Bering Sea, Alaska. 4 | 5 | Data for CMTK training is [here](https://olgalicodestory.blob.core.windows.net/kittiwakedata/kittiwake_labled.zip). 6 | Data in TF Records format for training in Tensorflow is [here](https://olgalicodestory.blob.core.windows.net/kittiwakedata/kittiwake_labled_tfrecords.zip) 7 | -------------------------------------------------------------------------------- /sample-visualization/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: 'airbnb', 3 | parser: 'babel-eslint', 4 | env: { 5 | browser: true, 6 | }, 7 | rules: { 8 | 'react/jsx-filename-extension': [1, { extensions: ['.js'] }], 9 | 'no-await-in-loop': [0], 10 | 'no-restricted-syntax': ['error', 'ForInStatement', 'LabeledStatement', 'WithStatement'], 11 | }, 12 | }; 13 | -------------------------------------------------------------------------------- /sample-visualization/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | -------------------------------------------------------------------------------- /sample-visualization/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:latest 2 | 3 | RUN apt-get update 4 | RUN npm install -g n yarn 5 | RUN n latest 6 | 7 | ADD . . 8 | RUN yarn 9 | RUN yarn build 10 | RUN yarn global add serve 11 | 12 | CMD serve -s build -p 80 13 | -------------------------------------------------------------------------------- /sample-visualization/README.md: -------------------------------------------------------------------------------- 1 | # Conservation Metrics - Birds Eye View 2 | 3 | ## Getting Started 4 | 5 | - Fill in `SUBSCRIPTION_KEY`, `API_HOST`, `AZURE_STORAGE_ACCOUNT`, `AZURE_STORAGE_ACCESS_KEY` in `src/lib.js` 6 | - `npm install -g yarn` 7 | - `yarn` 8 | - `yarn start` 9 | 10 | Constant Values: 11 | 12 | |Constant | Description | 13 | | --- | --- | 14 | | `SUBSCRIPTION_KEY` | Your [Azure API Management](https://azure.microsoft.com/en-us/services/api-management/) Subscription Key | 15 | | `API_HOST`| The root URL for your Azure API Management API | 16 | | `AZURE_STORAGE_ACCOUNT` | Your [Azure Blob storage](https://azure.microsoft.com/en-us/services/storage/blobs/)Ï account | 17 | | `AZURE_STORAGE_ACCESS_KEY` | Your Azure Blob storage account access key |Ï 18 | | `TENSORFLOW_AUTHORIZATION_BEARER` | If configured, your Authorization Bearer header valueÏ | 19 | -------------------------------------------------------------------------------- /sample-visualization/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nest", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "azure-storage": "^2.5.0", 7 | "babel-eslint": "^8.0.0", 8 | "material-ui": "^1.0.0-beta.10", 9 | "material-ui-icons": "^1.0.0-beta.5", 10 | "prop-types": "^15.5.10", 11 | "react": "^16.0.0", 12 | "react-dom": "^16.0.0", 13 | "react-router-dom": "^4.2.2", 14 | "react-scripts": "1.0.13", 15 | "request": "^2.81.0" 16 | }, 17 | "scripts": { 18 | "start": "react-scripts start", 19 | "build": "react-scripts build", 20 | "test": "react-scripts test --env=jsdom", 21 | "eject": "react-scripts eject" 22 | }, 23 | "devDependencies": { 24 | "@types/material-ui": "^0.18.2", 25 | "@types/prop-types": "^15.5.2", 26 | "@types/react": "^16.0.9", 27 | "@types/react-dom": "^16.0.0", 28 | "@types/react-router-dom": "^4.0.8", 29 | "@types/request": "^2.0.4", 30 | "eslint-config-airbnb": "^15.1.0", 31 | "eslint-plugin-import": "^2.7.0", 32 | "eslint-plugin-jsx-a11y": "^5.1.1", 33 | "eslint-plugin-react": "^7.3.0", 34 | "tslint-config-prettier": "^1.5.0" 35 | }, 36 | "main": "index.js", 37 | "license": "MIT" 38 | } 39 | -------------------------------------------------------------------------------- /sample-visualization/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olgaliak/detection-amlworkbench/a486fb930d518b69e809934d7f747bb301f785ff/sample-visualization/public/favicon.ico -------------------------------------------------------------------------------- /sample-visualization/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 14 | 23 | 24 | Conservation Metrics | BirdsEyeView 25 | 26 | 27 | 28 | 31 |
32 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /sample-visualization/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "Birds Eye View", 3 | "name": "Conservation Metrics | Birds Eye View", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "192x192", 8 | "type": "image/png" 9 | } 10 | ], 11 | "start_url": "./index.html", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /sample-visualization/src/App.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import { BrowserRouter as Router } from 'react-router-dom'; 3 | import Drawer from './Drawer'; 4 | import { getContainers } from './lib'; 5 | 6 | class App extends Component { 7 | constructor(props) { 8 | super(props); 9 | this.state = { 10 | containers: [], 11 | }; 12 | this.state = { ...props }; 13 | } 14 | 15 | componentDidMount() { 16 | getContainers().then((containers) => { 17 | this.setState({ containers }); 18 | }); 19 | } 20 | 21 | render() { 22 | return ( 23 | 24 | 25 | 26 | ); 27 | } 28 | } 29 | 30 | export default App; 31 | -------------------------------------------------------------------------------- /sample-visualization/src/Container.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import Card, { CardContent, CardHeader } from 'material-ui/Card'; 4 | import Table, { TableBody, TableCell, TableHead, TableRow } from 'material-ui/Table'; 5 | import Button from 'material-ui/Button'; 6 | import Image from './Image'; 7 | import { cntk, getBlobs, generateAzureBlobURL } from './lib'; 8 | 9 | class Container extends Component { 10 | constructor(props) { 11 | super(props); 12 | this.state = { 13 | blobs: [], 14 | loadingBlobs: [], // blob names 15 | labels: {}, // dictionary of file URL -> label array 16 | maxConcurrency: 4, 17 | search: '', 18 | currentBlob: null, // blob name 19 | runningCNTK: false, 20 | searchedBlobs: [], // blob objects 21 | }; 22 | } 23 | 24 | componentDidMount() { 25 | this.getBlobs().then((blobs) => { 26 | this.setState({ 27 | blobs, 28 | }); 29 | }); 30 | } 31 | 32 | getBlobs = async () => getBlobs(this.props.name); 33 | 34 | runCNTK = async () => { 35 | this.setState( 36 | { 37 | runningCNTK: true, 38 | }, 39 | () => { 40 | let loadingBlobs = [...this.state.loadingBlobs]; 41 | const blobs = this.state.search ? [...this.state.searchedBlobs] : [...this.state.blobs]; 42 | const cntkPromise = () => { 43 | if (blobs.length <= 0) { 44 | return Promise.resolve(); 45 | } 46 | const blob = blobs.shift(); 47 | loadingBlobs.push(blob.name); 48 | this.setState({ loadingBlobs }); 49 | return cntk(blob.name) 50 | .then((blobLabels) => { 51 | const labels = { ...this.state.labels }; 52 | labels[blob.name] = blobLabels; 53 | loadingBlobs = this.state.loadingBlobs.filter( 54 | loadingBlobName => loadingBlobName !== blob.name, 55 | ); 56 | this.setState({ labels, loadingBlobs }); 57 | }) 58 | .then(() => cntkPromise()) 59 | .catch((err) => { 60 | console.error(err); 61 | loadingBlobs = this.state.loadingBlobs.filter( 62 | loadingBlob => loadingBlob.name !== blob.name, 63 | ); 64 | this.setState({ loadingBlobs }); 65 | }); 66 | }; 67 | 68 | // Run a max of maxConcurrency 69 | Promise.all( 70 | Array(this.state.maxConcurrency) 71 | .fill(null) 72 | .map(() => cntkPromise()), 73 | ) 74 | .then(() => this.setState({ runningCNTK: false })) 75 | .catch((err) => { 76 | console.error(err); 77 | this.setState({ runningCNTK: false }); 78 | }); 79 | }, 80 | ); 81 | }; 82 | 83 | downloadJson = () => { 84 | // Blob string to download 85 | const dataStr = `data:text/json;charset=utf-8,${encodeURIComponent( 86 | JSON.stringify(this.state.labels), 87 | )}`; 88 | 89 | // Create fake download element and click 90 | const dlAnchorElem = document.createElement('a'); 91 | dlAnchorElem.setAttribute('href', dataStr); 92 | dlAnchorElem.setAttribute('download', 'labels.json'); 93 | dlAnchorElem.click(); 94 | dlAnchorElem.remove(); 95 | }; 96 | 97 | openDialog = (blob) => { 98 | this.setState({ 99 | currentBlob: blob.name, 100 | }); 101 | }; 102 | 103 | closeDialog = () => { 104 | this.setState({ 105 | currentBlob: null, 106 | }); 107 | }; 108 | 109 | runSearch = (searchString) => { 110 | const search = new RegExp(searchString, 'i'); 111 | const searchedBlobs = this.state.blobs.filter(blob => search.exec(blob.name)); 112 | this.setState({ searchedBlobs, search: searchString }); 113 | }; 114 | 115 | render() { 116 | const blobs = this.state.search ? this.state.searchedBlobs : this.state.blobs; 117 | 118 | return ( 119 |
120 | 121 | 122 | 123 | 124 | 125 | 126 | Filename 127 | Size (Bytes) 128 | Operations 129 | 130 | 131 | 132 | {blobs.map(blob => ( 133 | 134 | {blob.name} 135 | {blob.contentLength} 136 | 137 | 138 | {this.state.currentBlob === blob.name ? ( 139 | { 144 | this.setState({ currentBlob: null }); 145 | }} 146 | /> 147 | ) : null} 148 | 149 | 150 | ))} 151 | 152 |
153 |
154 |
155 |
156 | ); 157 | } 158 | } 159 | 160 | Container.propTypes = { 161 | name: PropTypes.string.isRequired, 162 | lastModified: PropTypes.string, 163 | }; 164 | 165 | Container.defaultProps = { 166 | lastModified: '', 167 | }; 168 | 169 | export default Container; 170 | -------------------------------------------------------------------------------- /sample-visualization/src/Drawer.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import { withStyles } from 'material-ui/styles'; 4 | import Drawer from 'material-ui/Drawer'; 5 | import AppBar from 'material-ui/AppBar'; 6 | import Toolbar from 'material-ui/Toolbar'; 7 | import Typography from 'material-ui/Typography'; 8 | import Divider from 'material-ui/Divider'; 9 | import Paper from 'material-ui/Paper'; 10 | import List, { ListItem, ListItemIcon, ListItemText } from 'material-ui/List'; 11 | import FolderIcon from 'material-ui-icons/Folder'; 12 | import { Route, Link } from 'react-router-dom'; 13 | import Container from './Container'; 14 | 15 | const drawerWidth = 240; 16 | const styles = theme => ({ 17 | root: { 18 | width: '100%', 19 | // height: 430, 20 | height: '100vh', 21 | zIndex: 1, 22 | overflow: 'hidden', 23 | }, 24 | appFrame: { 25 | position: 'relative', 26 | display: 'flex', 27 | width: '100%', 28 | height: '100%', 29 | }, 30 | appBar: { 31 | position: 'absolute', 32 | width: `calc(100% - ${drawerWidth}px)`, 33 | marginLeft: drawerWidth, 34 | overflow: 'scroll', 35 | }, 36 | drawerPaper: { 37 | position: 'relative', 38 | height: '100%', 39 | width: drawerWidth, 40 | }, 41 | drawerHeader: theme.mixins.toolbar, 42 | content: { 43 | backgroundColor: theme.palette.background.default, 44 | width: '100%', 45 | padding: theme.spacing.unit * 3, 46 | height: 'calc(100% - 56px)', 47 | marginTop: 56, 48 | overflow: 'scroll', 49 | [theme.breakpoints.up('sm')]: { 50 | height: 'calc(100% - 64px)', 51 | marginTop: 64, 52 | }, 53 | }, 54 | }); 55 | 56 | function PermanentDrawer(props) { 57 | const { classes } = props; 58 | 59 | return ( 60 |
61 |
62 | 63 | 64 | 65 | Project Birds-Eye-View 66 | 67 | 68 | 69 | 75 |
76 | 77 | 78 | {props.containers.map(container => ( 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | ))} 88 | 89 | 90 |
91 | { 94 | const container = props.containers.find( 95 | potential => potential.name === match.params.container, 96 | ); 97 | return container ? ( 98 | 103 | ) : null; 104 | }} 105 | /> 106 | ( 110 | 118 | 119 | Welcome to Birds Eye View! 120 | 121 | 122 | Choose a container on the left sidebar 123 | 124 | 125 | Happy Predicting! 126 | 127 | 128 | )} 129 | /> 130 |
131 |
132 |
133 | ); 134 | } 135 | 136 | PermanentDrawer.propTypes = { 137 | classes: PropTypes.object.isRequired, 138 | containers: PropTypes.array, 139 | }; 140 | 141 | PermanentDrawer.defaultProps = { 142 | containers: [], 143 | }; 144 | 145 | export default withStyles(styles)(PermanentDrawer); 146 | -------------------------------------------------------------------------------- /sample-visualization/src/Image.css: -------------------------------------------------------------------------------- 1 | canvas { 2 | display: block; 3 | margin: 1em auto; 4 | border: 1px solid black; 5 | } 6 | -------------------------------------------------------------------------------- /sample-visualization/src/Image.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import Table, { TableBody, TableCell, TableHead, TableRow } from 'material-ui/Table'; 4 | import Button from 'material-ui/Button'; 5 | import Dialog, { DialogActions, DialogContent, DialogTitle } from 'material-ui/Dialog'; 6 | import Slide from 'material-ui/transitions/Slide'; 7 | import { LinearProgress } from 'material-ui/Progress'; 8 | import Tooltip from 'material-ui/Tooltip'; 9 | import { cntk, tensorflow } from './lib'; 10 | import './Image.css'; 11 | 12 | class Image extends Component { 13 | constructor(props) { 14 | super(props); 15 | this.state = { 16 | open: true, 17 | labels: props.labels, 18 | url: props.url, 19 | isLoading: false, 20 | errorOccured: false, 21 | }; 22 | } 23 | 24 | componentDidMount() { 25 | this.updateCanvas(); 26 | } 27 | 28 | componentDidUpdate() { 29 | this.updateCanvas(); 30 | } 31 | 32 | handleRequestClose = () => { 33 | this.setState({ open: false }, () => { 34 | this.props.onClose(); 35 | }); 36 | }; 37 | 38 | handleOpen = () => { 39 | this.setState({ open: true }); 40 | }; 41 | 42 | downloadJson = () => { 43 | // Blob string to download 44 | const dataStr = `data:text/json;charset=utf-8,${encodeURIComponent( 45 | JSON.stringify(this.state.labels), 46 | )}`; 47 | 48 | // Create fake download element and click 49 | const dlAnchorElem = document.createElement('a'); 50 | dlAnchorElem.setAttribute('href', dataStr); 51 | dlAnchorElem.setAttribute('download', 'labels.json'); 52 | dlAnchorElem.click(); 53 | dlAnchorElem.remove(); 54 | }; 55 | 56 | downloadImage = () => { 57 | this.canvas.toBlob((blob) => { 58 | const url = URL.createObjectURL(blob); 59 | 60 | const dlAnchorElem = document.createElement('a'); 61 | dlAnchorElem.setAttribute('href', url); 62 | dlAnchorElem.setAttribute('download', this.props.filename); 63 | dlAnchorElem.click(); 64 | dlAnchorElem.remove(); 65 | }); 66 | }; 67 | 68 | runCNTK = async () => { 69 | try { 70 | this.setState({ isLoading: true }, async () => { 71 | const labels = await cntk(this.props.filename); 72 | this.setState( 73 | { 74 | isLoading: false, 75 | labels: [...this.state.labels, ...labels], 76 | }, 77 | () => { 78 | this.updateCanvas(); 79 | }, 80 | ); 81 | }); 82 | } catch (err) { 83 | this.setState({ 84 | errorOccured: true, 85 | }); 86 | console.error(err); 87 | } 88 | }; 89 | 90 | runTensorFlow = async () => { 91 | this.setState({ isLoading: true }, async () => { 92 | try { 93 | const labels = await tensorflow(this.props.url); 94 | this.setState( 95 | { 96 | isLoading: false, 97 | labels: [...this.state.labels, ...labels], 98 | }, 99 | () => { 100 | this.updateCanvas(); 101 | }, 102 | ); 103 | } catch (err) { 104 | this.setState({ 105 | errorOccured: true, 106 | }); 107 | console.error(err); 108 | } 109 | }); 110 | }; 111 | 112 | updateCanvas = () => { 113 | const image = document.createElement('img'); 114 | image.setAttribute('crossOrigin', 'Anonymous'); 115 | image.onload = () => { 116 | if (this.canvas) { 117 | const canvasWidth = 850; 118 | const scale = canvasWidth / image.width; 119 | const canvasHeight = image.height * scale; 120 | this.canvas.width = canvasWidth; 121 | this.canvas.height = canvasHeight; 122 | const ctx = this.canvas.getContext('2d'); 123 | 124 | // render image on convas and draw the square labels 125 | ctx.drawImage(image, 0, 0, canvasWidth, canvasHeight); 126 | ctx.lineWidth = 5; 127 | this.state.labels.forEach((label) => { 128 | ctx.strokeStyle = label.color || 'black'; 129 | ctx.strokeRect(label.x, label.y, label.width, label.height); 130 | }); 131 | } 132 | }; 133 | image.src = this.state.url; 134 | }; 135 | 136 | render() { 137 | return ( 138 |
139 | } 144 | > 145 | 146 | {this.props.url} 147 | 148 | 149 |
150 | 151 | 152 | 153 | Label 154 | Box 155 | Score 156 | 157 | 158 | 159 | {this.state.labels.length > 0 ? ( 160 | this.state.labels.map(label => ( 161 | 162 | {label.text} 163 | 164 | [{label.x}, {label.y}, {label.width}, {label.height}] 165 | 166 | {label.score} 167 | 168 | )) 169 | ) : ( 170 | 171 | 172 | {this.state.isLoading ? ( 173 | 174 | ) : ( 175 | 'Label data unavailable or not yet loaded' 176 | )} 177 | 178 | --- 179 | --- 180 | 181 | )} 182 | 183 |
184 | { 187 | this.canvas = ref; 188 | }} 189 | /> 190 |
191 |
192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 |
201 |
202 | ); 203 | } 204 | } 205 | 206 | Image.propTypes = { 207 | url: PropTypes.string.isRequired, 208 | filename: PropTypes.string.isRequired, 209 | labels: PropTypes.arrayOf( 210 | PropTypes.shape({ 211 | x: PropTypes.number.isRequired, 212 | y: PropTypes.number.isRequired, 213 | width: PropTypes.number.isRequired, 214 | height: PropTypes.number.isRequired, 215 | text: PropTypes.string.isRequired, 216 | score: PropTypes.number.isRequired, 217 | color: PropTypes.string, 218 | }), 219 | ), 220 | onClose: PropTypes.func, 221 | }; 222 | 223 | Image.defaultProps = { 224 | labels: [], 225 | onClose: () => {}, 226 | }; 227 | 228 | export default Image; 229 | -------------------------------------------------------------------------------- /sample-visualization/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | padding: 0; 4 | font-family: sans-serif; 5 | } 6 | -------------------------------------------------------------------------------- /sample-visualization/src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom'; 3 | import './index.css'; 4 | import App from './App'; 5 | import registerServiceWorker from './registerServiceWorker'; 6 | 7 | ReactDOM.render(, document.getElementById('root')); 8 | registerServiceWorker(); 9 | -------------------------------------------------------------------------------- /sample-visualization/src/lib.js: -------------------------------------------------------------------------------- 1 | import * as azure from 'azure-storage'; 2 | 3 | const SUBSCRIPTION_KEY = ''; 4 | const API_HOST = ''; 5 | const AZURE_STORAGE_ACCOUNT = ''; 6 | const AZURE_STORAGE_ACCESS_KEY = ''; 7 | const TENSORFLOW_AUTHORIZATION_BEARER = ''; 8 | 9 | /* 10 | * Create a blob storage URL 11 | * @param {string} containerName 12 | * @param {string} filename 13 | */ 14 | export function generateAzureBlobURL(containerName, filename) { 15 | const url = `https://${AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/${containerName}/${filename}`; 16 | return url; 17 | } 18 | 19 | export async function getBlobService() { 20 | try { 21 | const blobService = new azure.BlobService(AZURE_STORAGE_ACCOUNT, AZURE_STORAGE_ACCESS_KEY); 22 | return blobService; 23 | } catch (err) { 24 | console.error(err); 25 | window.localStorage.clear(); 26 | return null; 27 | } 28 | } 29 | 30 | /** 31 | * @returns {Promise} 32 | */ 33 | export async function getContainers() { 34 | const blobService = await getBlobService(); 35 | if (!blobService) { 36 | return []; 37 | } 38 | 39 | try { 40 | let continuationToken; 41 | let containers = []; 42 | const fetchListSegment = token => 43 | new Promise((resolve, reject) => { 44 | blobService.listContainersSegmented(token, (err, result) => { 45 | if (err) { 46 | return reject(err); 47 | } 48 | return resolve(result); 49 | }); 50 | }); 51 | do { 52 | const listContainerResult = await fetchListSegment(continuationToken); 53 | continuationToken = listContainerResult.continuationToken; 54 | containers = [...containers, ...listContainerResult.entries]; 55 | } while (continuationToken); 56 | return containers; 57 | } catch (err) { 58 | console.error(err); 59 | throw err; 60 | } 61 | } 62 | 63 | /** 64 | * Given the container, return the list of blobs in the container 65 | * @param {string} container 66 | * @return {Array} 67 | */ 68 | export async function getBlobs(container) { 69 | const blobService = await getBlobService(); 70 | if (!blobService) { 71 | return []; 72 | } 73 | 74 | try { 75 | let continuationToken; 76 | let blobs = []; 77 | const fetchListSegment = token => 78 | new Promise((resolve, reject) => { 79 | blobService.listBlobsSegmented(container, token, (err, result) => { 80 | if (err) { 81 | return reject(err); 82 | } 83 | return resolve(result); 84 | }); 85 | }); 86 | do { 87 | const listBlobResult = await fetchListSegment(continuationToken); 88 | continuationToken = listBlobResult.continuationToken; 89 | blobs = [...blobs, ...listBlobResult.entries]; 90 | } while (continuationToken); 91 | 92 | // only return images 93 | return blobs.filter(blob => /\.(gif|jpg|jpeg|tiff|png)$/i.exec(blob.name)); 94 | } catch (err) { 95 | console.error(err); 96 | return []; 97 | } 98 | } 99 | 100 | /** 101 | * @param {string} fileurl - full url to the image to be processed 102 | * @returns {Array} 103 | */ 104 | export async function tensorflow(fileurl) { 105 | return fetch(`${API_HOST}/tensorflow/`, { 106 | method: 'post', 107 | headers: { 108 | Accept: 'application/json', 109 | Authorization: TENSORFLOW_AUTHORIZATION_BEARER, 110 | 'Content-Type': 'application/json', 111 | 'Cache-Control': 'no-cache', 112 | 'Ocp-Apim-Trace': 'true', 113 | 'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY, 114 | }, 115 | body: JSON.stringify({ 116 | input: fileurl, 117 | }), 118 | }) 119 | .then(response => response.json()) 120 | .then((body) => { 121 | const jsonBody = JSON.parse(body); 122 | const parsed = { output: JSON.parse(jsonBody.output) }; 123 | const labels = parsed.output.map((label) => { 124 | const x = Number.parseInt(label[0], 10); 125 | const y = Number.parseInt(label[2], 10); 126 | const width = Number.parseInt(label[1] - label[0], 10); 127 | const height = Number.parseInt(label[3] - label[2], 10); 128 | const text = 'TensorFlow'; 129 | const score = Number.parseFloat('0.000'); 130 | const color = 'orange'; 131 | return { x, y, width, height, text, score, color }; 132 | }); 133 | 134 | return labels; 135 | }) 136 | .catch((err) => { 137 | console.error(err); 138 | return []; 139 | }); 140 | } 141 | 142 | /** 143 | * @param {string} filename - filename of file in 'data' container 144 | * @returns {Array