├── .gitignore
├── LICENSE
├── Legacy
    ├── README.md
    ├── config.py
    ├── convert_vott.py
    ├── create_knots_tf_record.py
    ├── detect_object.py
    ├── filter_images_bysize.py
    ├── images
    │   └── VOTT_knot_defect.PNG
    ├── process_images.py
    ├── run_detection.py
    ├── run_process_images.py
    └── utils
    │   └── Utils.ipynb
├── README.md
├── cli
    └── src
    │   ├── cli.py
    │   ├── operations.py
    │   └── test_operations.py
├── config.ini
├── config_description.md
├── images
    ├── VOTT_animal.PNG
    ├── VOTT_knot_defect.PNG
    ├── init_predict.PNG
    └── semi_automated.png
├── init_pred_desription.md
├── tag
    ├── download_vott_json.py
    └── upload_vott_json.py
├── test
    ├── Images_source.json
    ├── Images_source_workdir90.json
    ├── all_predictions.npy
    ├── all_predictions_cow.npy
    ├── board_images_png
    │   ├── st1026.png
    │   ├── st1194.png
    │   ├── st1578.png
    │   ├── st1611.png
    │   └── st1840.png
    ├── camera_images
    │   ├── IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG
    │   ├── IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG
    │   ├── IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG
    │   ├── IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG
    │   ├── IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG
    │   ├── IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG
    │   ├── IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG
    │   ├── IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG
    │   ├── IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG
    │   └── IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG
    ├── camera_images_source.json
    ├── init_class_get_rows_min.npy
    ├── init_classes_map.json
    ├── run_all_tests.py
    ├── test_create_init_predictions.py
    ├── test_create_predictions.py
    ├── test_download_vott_json.py
    ├── test_init_download_vott_json.py
    ├── test_init_tf_detector.py
    ├── test_make_vott_output.py
    ├── test_tf_detector.py
    ├── test_workdir_init_pred
    │   └── camera_images
    │   │   ├── IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG
    │   │   ├── IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG
    │   │   ├── IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG
    │   │   ├── IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG
    │   │   ├── IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG
    │   │   ├── IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG
    │   │   ├── IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG
    │   │   ├── IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG
    │   │   ├── IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG
    │   │   └── IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG
    ├── test_workdir_train
    │   └── board_images_png
    │   │   ├── st1026.png
    │   │   ├── st1194.png
    │   │   ├── st1578.png
    │   │   └── st1611.png
    ├── testconfig.ini
    ├── totag_no_folder_source.csv
    ├── totag_source.csv
    ├── totag_source2.csv
    ├── untagged_cow.csv
    └── untagged_source.csv
├── train
    ├── active_learning_init_pred.sh
    ├── active_learning_initialize.sh
    ├── active_learning_train.sh
    ├── convert_tf_record.py
    ├── create_predictions.py
    ├── cv_train.py
    ├── export_inference_graph.py
    ├── initialize_vott_pull.py
    ├── map_validation.py
    ├── pipeline.config
    ├── repartition_test_set.py
    ├── repartition_test_set_script.sh
    ├── tf_detector.py
    ├── update_blob_folder.py
    └── validation.py
└── utils
    ├── blob_utils.py
    ├── config.py
    ├── convert_to_jpeg.py
    ├── decode_tf_record.py
    └── repartition_test_set.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | *tfevents*
  2 | *weights*/
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | config.ini
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | #PyCharm
 14 | .idea/
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | .static_storage/
 63 | .media/
 64 | local_settings.py
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | input/
113 | weights/*.h5
114 | models/*.h5
115 | 
116 | # csv files
117 | tag/*.csv
118 | 
119 | # TF exported graph files
120 | .pb
121 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Olga Liakhovich
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Legacy/README.md:
--------------------------------------------------------------------------------
 1 | # Active learning + object detection.
 2 | Let's use wood knots data set as example: the goal label the data and to train the model that can detect wood knots. 
 3 | Here is [link](https://olgaliakrepo.blob.core.windows.net/woodknots/board_images_png.zip) to the dataset: zip file with 800+ board png images.
 4 | As we label the data we want optimize human efforts by leveraging ML to pre-label the data.
 5 | The flow will be:
 6 | 1) Label small set of data (Set #1) to train simple but already somewhat useful model  that can detect objects of interest (wood knots).
 7 |    In the case with wood knots spending around 1h and going though 150 images will be enough. I used [VOTT](https://github.com/Microsoft/VoTT) tool for bounding boxes drawing.
 8 |    I've shared my labeleing results for Set #1 [here](https://olgaliakrepo.blob.core.windows.net/woodknots/labeled_set1_VOTT.7z): import board_images_set1 folder to VOTT while makeing sure that board_images_set1.json is located at the ame level as folder with images. Json files contains info about bounding boxes lcoation that I have marked.
 9 | 
10 |    Here example of labeling flow in VOTT: I've labled wood "knots" (round shapes) and "defect" (pretty much  non-round shaped type of defect):
11 | 
12 | ![Labeling](Legacy/images/VOTT_knot_defect.PNG)
13 | 
14 | 2) Train Model #1
15 | 3) Now select bigger set of images that were not used in Set #1. This will be our Set #2.
16 | 4) User Model #1 for inference on Set #2: the models should be able to detect quite a bit objects of interest.
17 | 5) Load object detection results from previous step into the labeling tool (VOTT).
18 | 6) Now instead of "labeling from scratch" human will much quicker review detection results from Model #1 and make small adjustments.
19 | 7) Combine training Set #1 and Set #2. Train Model #2. Observe how Model #2 overall performance increases. Celebrate :).
20 | 
21 | # Code setup
22 | The process in this repo heavily relies on Tensorflow Object Detection API.  So as pre-req make sure you have it working on the samples.
23 | Further below I assume you cloned TF object detection to a location like:
24 | `repo\models`
25 | 1)  Create TF records for training (and evaluation: use --set=val param)
26 | 
27 | `python create_knots_tf_record.py  --data_dir=C:\data\woodknots\board_images_png_output --output_path=knots_train.record --set=train --label_map_path=C:\data\woodknots\board_images_png_output\pascal_label_map.pbtxt`
28 | 
29 | As you run VOTT for labeling and export result you will have YOU_FOLDER_WITH_IMAGES_output and inside it there will be pascal_label_map.pbtxt
30 | 
31 | Note: I'm running _create_knots_tf_record.py_ from within _repo\models\research_
32 | 
33 | 2) Train Model #1 using TF Object Detection
34 | `python train.py --logtostderr --train_dir=.object_detection\knot_models --pipeline_config_path=object_detection\samples\configs\faster_rcnn_resnet50_knots.config`
35 | 
36 | Note: I based _faster_rcnn_resnet50_knots.config_ on _faster_rcnn_resnet50_pets.config_.  I made the following changes: 
37 | 
38 | a) Set _num_classes_ appropriately (3 in my case: knots, other wood defects and background)
39 | 
40 | b) Set _fine_tune_checkpoint_ to point to the location of _faster_rcnn_resnet50_coco_2018_01_28_ model in my file system. Something like this: 
41 | `C:\\repo\\models\\research\\object_detection\\faster_rcnn_resnet50_coco_2018_01_28\\faster_rcnn_resnet50_coco_2018_01_28\\model.ckpt`
42 | 
43 | c) Lowered _num_steps_ to 10000
44 | 
45 | d) Added more data augmentation options:
46 | `data_augmentation_options {
47 |     random_horizontal_flip {
48 |     }
49 |     random_crop_image {
50 |     }
51 |     random_image_scale {    
52 |     }
53 |   }`
54 | 
55 |  e) Updated _input_path_ and _label_map_path_ for training and evaluation.
56 | 
57 |  3)  Export inference graph
58 | 
59 |  `python export_inference_graph.py --input_type image_tensor --pipeline_config_path object_detection\samples\configs\faster_rcnn_resnet50_knots.config --trained_checkpoint_prefix object_detection\knot_models\model.ckpt-10000 --output_directory .\fine_tuned_model_set1_10k`
60 | 
61 | 4)  Run evaluation (so later you can compare Model #1 and Model #2)
62 | 
63 | `python eval.py --logtostderr  --pipeline_config_path object_detection\samples\configs\faster_rcnn_resnet50_knots.config --checkpoint_dir=object_detection\knot_models_10000     --eval_dir=eval_10000`
64 | 
65 | 5) Copy _active_learning_ folder to _repo\models\research\object_detection_
66 | 
67 | 6) Set active-learning-detect\detect_object.py to use Model #1
68 | 
69 | `   PATH_Fold = "C:\\repo\\models\\research\\fine_tuned_model_set1_10k\\"
70 |     PATH_TO_CKPT = PATH_Fold + 'frozen_inference_graph.pb'`
71 | 
72 | 7) Use Model #1 to detect objects in Set #2: execute _object_detection\active-learning-detect\run_detection.py_.  Set input to be folder with Set #2 images.
73 | 
74 | The results of detection will be saved in csv file: woodknots_detection_log.csv. Each row in the csv file contaings location of the detected object and its class.
75 | 
76 | 8) Next step is about converting csv file with detection results to the format supported labeling tool of choice.
77 | 
78 | For using VOTT run this script: 
79 | 
80 | `\object_detection\active-learning-detect\convert_vott.py --input "C:\data\woodknots\set2_res\woodknots_detection_log.csv" --output "C:\data\woodknots\set2_res\vott.json"`
81 | 
82 | 9) Now you have Set #2 pre-labeled.  To load it to VOTT copy _vott.json_ from previous step to the same level as your FOLDER_WITH_SET2_IMAGES and rename vott.json to _FOLDER_WITH_SET2_IMAGES.json_. Then start VOTT, load FOLDER_WITH_SET2_IMAGES for tagging, review pre-labeled info, fix labels.
83 | 
84 | 10) Once you get your Set2 labeled you will repeat the step of coverting dataset (now with SET #2) to tfrecords.
85 | 
86 | Then train Model #2 on bigger dataset (Set #1 combined with Set #2).  You can specify multiple source files for training in _faster_rcnn_resnet50_knots.config_ by reading all files in a train directory:
87 | 
88 | `tf_record_input_reader {
89 |     input_path: "C:\\knots_data\\train\\*"
90 |   }`
91 |   
92 | 
93 | 


--------------------------------------------------------------------------------
/Legacy/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class config:
 4 | 	DETS_FILE = 'woodknots_detection_log.csv'
 5 | 	METADATA_JSON = 'metadata.json'
 6 | 	CROPS_DIR = "cropped"
 7 | 	REDACTED_DIR = "redacted"
 8 | 	OUTPUT_DIR = "output"
 9 | 	MIN_CROP_DIM = 20
10 | 	CROP_CLASS = "knot"
11 | 
12 | 	OUTPUT_CLASSES = ['knot','defect']
13 | 	ID_CLASSES = [1,2]
14 | 	CLASSES_DICT = dict(zip(ID_CLASSES, OUTPUT_CLASSES))
15 | 
16 | 	SCALE_RATIO =1
17 | 	CAR_ID = "car1"
18 | 	DEVICE_ID = "dev0"
19 | 	FILE_FORMAT = "{0}_{1}_{2}_{3}{4}" # carName, deviceName, timestamp, orig_fn, extension"
20 | 
21 | 
22 | # class VideoConfig(config):
23 | # 	# dpi = 100
24 | # 	# COL_CYAN  = (0, 255, 255)
25 | # 	# OUT_COLORS_CV2 = [COL_CYAN]
26 | # 	# TH_MODE = 0
27 | 
28 | 
29 | class DetectConfig(config):
30 | 	MIN_SCORE_THRESH = 0.7
31 | 	BATCH_SIZE = 1
32 | 	SAMPLE_SIZE = 10
33 | 	MIN_DETECT_HEIGHT = 15
34 | 	use_sample = 0
35 | 	use_relative_size = 1
36 | 	CLASS_NA = 'na'
37 | 
38 | 


--------------------------------------------------------------------------------
/Legacy/convert_vott.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | import json
 4 | import argparse
 5 | import config
 6 | 
 7 | def vis_all_detections_cv2(im, dets):
 8 | 
 9 |     classes  = dets['class'].tolist()
10 |     scores   = dets['score'].tolist()
11 |     bboxes_0 = dets['bbox_0'].tolist()
12 |     bboxes_1 = dets['bbox_1'].tolist()
13 |     bboxes_2 = dets['bbox_2'].tolist()
14 |     bboxes_3 = dets['bbox_3'].tolist()
15 |     im_widths = dets['im_width'].tolist()
16 |     im_heights = dets['im_height'].tolist()
17 |     filenames  = dets['image'].tolist()
18 |     img_boxes = []
19 |     for i in range(0, len(dets)):
20 |         # Hadle images that did not have any detections
21 |         if classes[i] == config.DetectConfig.CLASS_NA:
22 |             continue
23 | 
24 |         box_dict = {}
25 |         im_width = im_widths[i]
26 |         im_height = im_heights[i]
27 |         box_dict['x1'] = int(bboxes_1[i]*im_width)
28 |         box_dict['y1'] = int(bboxes_0[i]*im_height)
29 |         box_dict['x2'] = int(bboxes_3[i]*im_width)
30 |         box_dict['y2'] = int(bboxes_2[i]*im_height)
31 |         box_dict['id'] = i
32 |         box_dict['width'] = im_width
33 |         box_dict['height'] = im_height
34 |         box_dict['type'] = 'Rectangle'
35 |         box_dict['tags'] = [classes[i]]
36 |         box_dict['name'] = i+1
37 |         box_dict['fname'] = filenames[i]
38 |         img_boxes.append(box_dict)
39 |     
40 |     return img_boxes
41 | 
42 | 
43 | parser = argparse.ArgumentParser(description='convert object detection results to VOTT json')
44 | parser.add_argument('--input', 
45 |                     help='object detection log file')
46 | parser.add_argument('--output', 
47 |                     help='vott json file')
48 | args = parser.parse_args()
49 | 
50 | # logfile = '/home/tinzha/Projects/Audi/object_detection/output-v2/audi_video_log.csv'
51 | # raw_images = '/home/tinzha/Projects/Audi/object_detection/data-v2/raw'
52 | # out_json_filename = 'big-person.json'
53 | input_file = args.input
54 | out_json_filename = args.output
55 | dets = pd.read_csv(input_file, delimiter=',', header=0)
56 | image_names = list(dets.image.unique())
57 | 
58 | #image_names = sorted(image_names, key =lambda x: int(x[-11:-5]))
59 | #print (image_names)
60 | # dets = log[log['score'] >= CONF_THRESH]
61 | # image_names = [x for i,x in enumerate(list(dets.image.unique())) if x not in name_list_v1]
62 | 
63 | 
64 | num = 0
65 | id_num = 0
66 | metadata = {}
67 | name_list = sorted(image_names, key=lambda name: name.lower())
68 | print (name_list)
69 | for i in name_list:
70 |     print (i)
71 |     test = dets[dets['image'] == i]
72 |     metadata[str(num)] = vis_all_detections_cv2(i, test)
73 |     num = num+1
74 | 
75 | vott_meta = {}
76 | vott_meta["frames"] = metadata
77 | vott_meta["framerate"] = "1"
78 | vott_meta["inputTags"] = "knot,defect"
79 | vott_meta["suggestiontype"] = "track"
80 | vott_meta["scd"] = 'false'
81 | vott_meta["visitedFrames"] = list(range(len(image_names)))
82 | 
83 | with open(out_json_filename, 'w') as f:
84 |     json.dump(vott_meta, f)
85 | print("All done. Metadata is saved to {}". format(out_json_filename))
86 | 


--------------------------------------------------------------------------------
/Legacy/create_knots_tf_record.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | r"""Convert raw PASCAL dataset to TFRecord for object_detection.
 17 | 
 18 | Example usage:
 19 |     python object_detection/dataset_tools/create_knot_tf_record \
 20 | 	      --data_dir=/home/user/knots \
 21 |         --output_path=/home/user/pascal.record
 22 | """
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | import hashlib
 28 | import io
 29 | import logging
 30 | import os
 31 | 
 32 | from lxml import etree
 33 | import PIL.Image
 34 | import tensorflow as tf
 35 | 
 36 | from object_detection.utils import dataset_util
 37 | from object_detection.utils import label_map_util
 38 | 
 39 | 
 40 | flags = tf.app.flags
 41 | flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
 42 | flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
 43 |                     'merged set.')
 44 | flags.DEFINE_string('annotations_dir', 'Annotations',
 45 |                     '(Relative) path to annotations directory.')
 46 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
 47 | flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
 48 |                     'Path to label map proto')
 49 | flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
 50 |                      'difficult instances')
 51 | FLAGS = flags.FLAGS
 52 | 
 53 | SETS = ['train', 'val', 'trainval', 'test']
 54 | 
 55 | 
 56 | def dict_to_tf_example(data,
 57 |                        dataset_directory,
 58 |                        label_map_dict,
 59 |                        ignore_difficult_instances=False,
 60 |                        image_subdirectory='JPEGImages'):
 61 |   """Convert XML derived dict to tf.Example proto.
 62 | 
 63 |   Notice that this function normalizes the bounding box coordinates provided
 64 |   by the raw data.
 65 | 
 66 |   Args:
 67 |     data: dict holding PASCAL XML fields for a single image (obtained by
 68 |       running dataset_util.recursive_parse_xml_to_dict)
 69 |     dataset_directory: Path to root directory holding PASCAL dataset
 70 |     label_map_dict: A map from string label names to integers ids.
 71 |     ignore_difficult_instances: Whether to skip difficult instances in the
 72 |       dataset  (default: False).
 73 |     image_subdirectory: String specifying subdirectory within the
 74 |       PASCAL dataset directory holding the actual image data.
 75 | 
 76 |   Returns:
 77 |     example: The converted tf.Example.
 78 | 
 79 |   Raises:
 80 |     ValueError: if the image pointed to by data['filename'] is not a valid PNG
 81 |   """
 82 |   full_path = os.path.join(dataset_directory, image_subdirectory, data['filename']+'.png')
 83 |   with tf.gfile.GFile(full_path, 'rb') as fid:
 84 |     encoded_jpg = fid.read()
 85 |   encoded_jpg_io = io.BytesIO(encoded_jpg)
 86 |   image = PIL.Image.open(encoded_jpg_io)
 87 |   if image.format != 'JPEG':
 88 |     raise ValueError('Image format not JPEG')
 89 |   key = hashlib.sha256(encoded_jpg).hexdigest()
 90 | 
 91 |   width = int(data['size']['width'])
 92 |   height = int(data['size']['height'])
 93 | 
 94 |   xmin = []
 95 |   ymin = []
 96 |   xmax = []
 97 |   ymax = []
 98 |   classes = []
 99 |   classes_text = []
100 |   truncated = []
101 |   poses = []
102 |   difficult_obj = []
103 |   try:
104 |     for obj in data['object']:
105 |       xmin.append(float(obj['bndbox']['xmin']) / width)
106 |       ymin.append(float(obj['bndbox']['ymin']) / height)
107 |       xmax.append(float(obj['bndbox']['xmax']) / width)
108 |       ymax.append(float(obj['bndbox']['ymax']) / height)
109 |       classes_text.append(obj['name'].encode('utf8'))
110 |       classes.append(label_map_dict[obj['name']])
111 |       poses.append(obj['pose'].encode('utf8'))
112 |   except:
113 |     print("object not found in file")
114 | 
115 |   example = tf.train.Example(features=tf.train.Features(feature={
116 |       'image/height': dataset_util.int64_feature(height),
117 |       'image/width': dataset_util.int64_feature(width),
118 |       'image/filename': dataset_util.bytes_feature(
119 |           data['filename'].encode('utf8')),
120 |       'image/source_id': dataset_util.bytes_feature(
121 |           data['filename'].encode('utf8')),
122 |       'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
123 |       'image/encoded': dataset_util.bytes_feature(encoded_jpg),
124 |       'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
125 |       'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
126 |       'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
127 |       'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
128 |       'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
129 |       'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
130 |       'image/object/class/label': dataset_util.int64_list_feature(classes),
131 |       'image/object/view': dataset_util.bytes_list_feature(poses),
132 |   }))
133 |   return example
134 | 
135 | def get_examples_list(data_dir, prefix, setName):
136 |     examples_path = os.path.join(data_dir, 'ImageSets', 'Main',
137 |                                  prefix + setName+ '.txt')
138 |     examples_list = dataset_util.read_examples_list(examples_path)
139 |     return examples_list
140 | 
141 | def main(_):
142 |   if FLAGS.set not in SETS:
143 |     raise ValueError('set must be in : {}'.format(SETS))
144 |   print("Getting data for {} set".format(FLAGS.set))
145 | 
146 |   data_dir = FLAGS.data_dir
147 | 
148 |   writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
149 | 
150 |   label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
151 | 
152 |   if FLAGS.set == 'trainval':
153 |     examples_list_knot1 = get_examples_list(data_dir, 'knot_', 'train')
154 |     examples_list_knot2 = get_examples_list(data_dir, 'knot_', 'val')
155 |   #print(len(examples_list_knot1), len(examples_list_knot2), len(examples_list_defect1), len(examples_list_defect2))
156 |     examples_list = examples_list_knot1 +  examples_list_knot2 # + examples_list_defect1 + examples_list_defect2
157 |   else:
158 |     examples_list = get_examples_list(data_dir, 'knot_', FLAGS.set)
159 | 
160 |   print("About to parse {}  examples".format(len(examples_list)))
161 | 
162 |   annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
163 |   for idx, example in enumerate(examples_list):
164 |     if idx % 10 == 0:
165 |       logging.info('On image %d of %d', idx, len(examples_list))
166 |       print('On image {0} of {1}'.format(idx, len(examples_list)))
167 |     path = os.path.join(annotations_dir, example + '.xml')
168 |     with tf.gfile.GFile(path, 'r') as fid:
169 |       xml_str = fid.read()
170 |     xml = etree.fromstring(xml_str)
171 |     data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
172 | 
173 |     tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
174 |                                     FLAGS.ignore_difficult_instances)
175 |     writer.write(tf_example.SerializeToString())
176 | 
177 |   writer.close()
178 | 
179 | 
180 | if __name__ == '__main__':
181 |   tf.app.run()
182 | 


--------------------------------------------------------------------------------
/Legacy/detect_object.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import glob
  4 | import cv2
  5 | import six.moves.urllib as urllib
  6 | import sys
  7 | import tarfile
  8 | import tensorflow as tf
  9 | import config
 10 | import time
 11 | 
 12 | sys.path.append("..")
 13 | 
 14 | from object_detection.utils import label_map_util
 15 | 
 16 | # set to one GPU machine
 17 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
 18 | 
 19 | def detect(input_dir,
 20 |            output_dir,
 21 | 	   model_name,
 22 |            use_sample = config.DetectConfig.use_sample,
 23 |            use_relative_size = config.DetectConfig.use_relative_size):
 24 |     detection_metadata_fn = os.path.join(output_dir, config.config.DETS_FILE)
 25 |     print("Starting detection {}, output {}, scale {}". format(input_dir, detection_metadata_fn, config.DetectConfig.SCALE_RATIO))
 26 | 
 27 |     # Path to frozen detection graph. This is the actual model that is used for the object detection.
 28 |     PATH_Fold = "C:\\Users\\olgali\\repo\\models\\research\\fine_tuned_model_set1_10k\\"
 29 |     PATH_TO_CKPT = PATH_Fold + 'frozen_inference_graph.pb'
 30 | 
 31 |     # List of the strings that is used to add correct label for each box.
 32 |     PATH_TO_LABELS = os.path.join('C:\\data\\woodknots\\board_images_set1_output', 'pascal_label_map.pbtxt')
 33 | 
 34 |     NUM_CLASSES = 2
 35 | 
 36 |     ## Load a (frozen) Tensorflow model into memory
 37 |     detection_graph = tf.Graph()
 38 |     with detection_graph.as_default():
 39 |         od_graph_def = tf.GraphDef()
 40 |         with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
 41 |             serialized_graph = fid.read()
 42 |             od_graph_def.ParseFromString(serialized_graph)
 43 |             tf.import_graph_def(od_graph_def, name='')
 44 | 
 45 |     ## Loading label map
 46 |     label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
 47 |     categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
 48 |     category_index = label_map_util.create_category_index(categories)
 49 | 
 50 |     ## Helper code
 51 |     def load_image_into_numpy_array(image):
 52 |       (im_width, im_height) = image.size
 53 |       return np.array(image.getdata()).reshape(
 54 |           (im_height, im_width, 3)).astype(np.uint8)
 55 | 
 56 |     #============================ Detection ============================
 57 |     start_time = time.time()
 58 |     raw_img_name = next(os.walk(input_dir))[2][0]
 59 |     fn_img, ext = os.path.splitext(os.path.basename(raw_img_name))
 60 |     raw_img = cv2.imread(os.path.join(input_dir, raw_img_name))
 61 |     raw_im_height, raw_im_width = raw_img.shape[0:2]
 62 | 
 63 |     print("Reading frames...")
 64 |     if config.DetectConfig.SCALE_RATIO < 1:
 65 |         im_height, im_width = int(raw_im_height*config.DetectConfig.SCALE_RATIO), int(raw_im_width*config.DetectConfig.SCALE_RATIO)
 66 |         images, image_names = zip(*[(cv2.resize(cv2.imread(file), (im_height, im_width)), file.split('/')[-1]) for file in glob.glob(input_dir + '/*' + ext)])
 67 |     else:
 68 |         im_height, im_width = raw_im_height, raw_im_width
 69 |         images, image_names = zip(*[(cv2.imread(file), file.split('/')[-1]) for file in glob.glob(input_dir + '/*' + ext)])
 70 | 
 71 |     images = np.array(images)
 72 |     image_names = list(image_names)
 73 |     if (use_sample == 1):
 74 |         print("Using sample size: ", config.DetectConfig.SAMPLE_SIZE)
 75 |         images = images[:config.DetectConfig.SAMPLE_SIZE]
 76 |         image_names = image_names[:config.DetectConfig.SAMPLE_SIZE]
 77 | 
 78 | 
 79 |     boxes_total = np.zeros(shape = (len(images), 300, 4))
 80 |     scores_total =  np.zeros(shape = (len(images), 300))
 81 |     classes_total = np.zeros(shape = (len(images), 300))
 82 | 
 83 |     with detection_graph.as_default():
 84 |         with tf.Session(graph=detection_graph) as sess:
 85 |             # Definite input and output Tensors for detection_graph
 86 |             image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
 87 |             # Each box represents a part of the image where a particular object was detected.
 88 |             detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
 89 |             # Each score represent how level of confidence for each of the objects.
 90 |             # Score is shown on the result image, together with the class label.
 91 |             detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
 92 |             detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
 93 |             num_detections = detection_graph.get_tensor_by_name('num_detections:0')
 94 | 
 95 |             for i in range(0, len(images), config.DetectConfig.BATCH_SIZE):
 96 |                 feed_images = np.array(images[i:i+config.DetectConfig.BATCH_SIZE])
 97 |                 (boxes, scores, classes, num) = sess.run(
 98 |                   [detection_boxes, detection_scores, detection_classes, num_detections],
 99 |                   feed_dict={image_tensor: feed_images})
100 |                 boxes_total[i:i+config.DetectConfig.BATCH_SIZE] = boxes
101 |                 scores_total[i:i+config.DetectConfig.BATCH_SIZE] = scores
102 |                 classes_total[i:i+config.DetectConfig.BATCH_SIZE] = classes
103 |   
104 |     print("--- %s seconds ---" % (time.time() - start_time))
105 |     #============================ Post-processing ============================
106 |     all_dets = []
107 |     skipped_dets = 0
108 |     for i in range(len(images)):
109 |         image_name = os.path.basename(image_names[i])
110 |         per_score = scores_total[i]
111 |         per_class = classes_total[i]
112 |         per_box = boxes_total[i]
113 |         ind = np.where(per_score >= config.DetectConfig.MIN_SCORE_THRESH)
114 |         #print("Found {} boxes for class {}".format(len(ind[0]), config.config.CLASSES_DICT))
115 | 
116 |         saved_detection = 0
117 |         for i in ind[0]:
118 |             bbox_height = abs(per_box[i][0] - per_box[i][2])*im_height
119 |             if bbox_height < config.DetectConfig.MIN_DETECT_HEIGHT:
120 |                 skipped_dets = skipped_dets + 1
121 |                 continue
122 |             if use_relative_size:
123 |                 bbox = per_box[i]
124 |             else:
125 |                 bbox = [ per_box[i][1]*im_width, per_box[i][0]*im_height, per_box[i][3]*im_width,per_box[i][2]*im_height]
126 | 
127 |             all_dets.append(image_name+','+str(config.config.CLASSES_DICT[per_class[i]]) + ',' + str(per_score[i]) + ',' +
128 |                             ','.join(map(str,bbox)) + ','
129 |                             + str(im_width) + ',' + str(im_height))
130 |             saved_detection = 1
131 |         # Handle images that did not have any detections
132 |         if not saved_detection:
133 |             bbox = [0, 0, 0, 0]
134 |             all_dets.append(
135 |                 image_name + ',' + config.DetectConfig.CLASS_NA + ',' + str(0) + ',' +
136 |                 ','.join(map(str, bbox)) + ','
137 |                 + str(im_width) + ',' + str(im_height))
138 | 
139 |     ## Save to log file
140 |     out = open(detection_metadata_fn, 'w')
141 |     line = 'image,class,score,bbox_0,bbox_1,bbox_2,bbox_3,im_width,im_height'
142 |     out.write("%s\n" % line)
143 | 
144 |     for line in all_dets:
145 |         out.write("%s\n" % line)
146 |     out.flush()
147 | 
148 |     out.close()
149 | 
150 |     print("Skipped {0} detections which heights is less than {1}". format(skipped_dets, config.DetectConfig.MIN_DETECT_HEIGHT))
151 | 


--------------------------------------------------------------------------------
/Legacy/filter_images_bysize.py:
--------------------------------------------------------------------------------
 1 | #source: https://www.geeksforgeeks.org/filtering-images-based-size-attributes-python/
 2 | from PIL import Image
 3 | from shutil import copyfile
 4 | import os, os.path
 5 | 
 6 | 
 7 | def filterImages(path, thresholdWidth, thresholdHeight):
 8 |     # Defining images array for
 9 |     # identifying only image files
10 |     imgs = []
11 | 
12 |     # List of possible images extensions
13 |     # add if you want more
14 |     valid_images = [".jpg", ".gif", ".png", ".tga",
15 |                     ".jpeg", ".PNG", ".JPG", ".JPEG"]
16 | 
17 |     # Storing all images in images array (imgs)
18 |     for f in os.listdir(path):
19 |         ext = os.path.splitext(f)[1]
20 | 
21 |         if ext.lower() not in valid_images:
22 |             continue
23 |         imgs.append(f)
24 | 
25 |     # Checking whether the filteredImages
26 |     # directory exists or not
27 |     directory = os.path.dirname('filteredImages' + path)
28 |     if not os.path.exists(directory):
29 |         os.makedirs(directory)
30 | 
31 |     # Defining filteredIMages array for
32 |     # storing all the images we need
33 |     filteredImages = []
34 | 
35 |     for i in imgs:
36 |         image = Image.open(os.path.join(path, i))
37 | 
38 |         # Storing width and height of a image
39 |         width, height = image.size
40 | 
41 |         # if only width exceeds the thresholdWidth
42 |         if (width > thresholdWidth and
43 |                 height <= thresholdHeight):
44 | 
45 |             image.resize((thresholdWidth,
46 |                           (thresholdWidth * height)
47 |                           // width)).save(i)
48 | 
49 |         # if only height exceeds the thresholdHeight
50 |         elif (width <= thresholdWidth and
51 |               height > thresholdHeight):
52 | 
53 |             image.resize(((thresholdHeight * width)
54 |                           // height, thresholdHeight)).save(i)
55 | 
56 |         # if both the paramateres exceeds
57 |         # the threshold attributes
58 |         elif (width > thresholdWidth and
59 |               height > thresholdHeight):
60 | 
61 |             image.resize((thresholdWidth, thresholdHeight)).save(i)
62 | 
63 |         copyfile(os.path.join(path, i),
64 |                  os.path.join(path + '/filteredImages', i))
65 | 
66 |         filteredImages.append(i)
67 | 
68 |     # returning the filteredImages array
69 |     return filteredImages
70 | 
71 | 
72 | # Driver Code
73 | if __name__ == '__main__':
74 |     filteredImages = []
75 | 
76 |     # Enter the path of the python sizeFilter
77 |     # file, the thresholdWidth(in pixels) and
78 |     # thresholdHeight(in pixels)
79 |     filteredImages = filterImages("path", 1000, 1000)
80 | 


--------------------------------------------------------------------------------
/Legacy/images/VOTT_knot_defect.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/Legacy/images/VOTT_knot_defect.PNG


--------------------------------------------------------------------------------
/Legacy/process_images.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import pandas as pd
 3 | import os
 4 | import cv2
 5 | import json
 6 | import config
 7 | from datetime import datetime, timezone
 8 | 
 9 | def crop_objects(input_dir,
10 |                    output_dir)                   :
11 |     print("Starting processing, input {0}, output {1}".format(input_dir, output_dir))
12 |     
13 |     df = pd.read_csv(os.path.join(output_dir, config.config.DETS_FILE))
14 |     images_data = df.groupby(["image"])
15 |     cropped_dir_path = os.path.join(output_dir, config.config.CROPS_DIR)
16 |     if not os.path.exists(cropped_dir_path):
17 |         os.makedirs(cropped_dir_path)
18 | 
19 |     redacted_dir_path = os.path.join(output_dir, config.config.REDACTED_DIR)
20 |     if not os.path.exists(redacted_dir_path):
21 |         os.makedirs(redacted_dir_path)
22 | 
23 |     skipped_small = 0
24 |     cropped_cnt = 0
25 |     # Loop through all frames
26 |     for image_data in images_data:
27 |         image_fn = image_data[0]
28 |         image_path = os.path.join(input_dir, image_fn)
29 |         image_data_df = image_data[1]
30 |         img_cv2 = cv2.imread(image_path)
31 |         im_height, im_width = img_cv2.shape[0:2]
32 | 
33 |         fn_image, ext = os.path.splitext(os.path.basename(image_path))
34 |         dict_image_data = {}
35 |         dict_image_data["frame"] = fn_image;
36 |         # Loop though all bboxes for the image
37 | 
38 |         for index, row in image_data_df.iterrows():
39 |             if row["class"] != config.config.CROP_CLASS:
40 |                 continue
41 |             ymin, xmin, ymax, xmax = row["bbox_0"], row["bbox_1"], row["bbox_2"], row["bbox_3"]
42 |             # save crops and metadata
43 |             # blurring
44 |             (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width),
45 |                                       int(ymin * im_height), int(ymax * im_height))
46 |             ic = img_cv2[top:bottom, left:right]
47 |             ic_height, ic_width = ic.shape[0:2]
48 | 
49 |             if (ic_height < config.config.MIN_CROP_DIM) or (ic_width < config.config.MIN_CROP_DIM):
50 |                 skipped_small = skipped_small + 1
51 |                 continue
52 | 
53 |             crop_height = int(ic_height + 5)
54 |             img_cropped = img_cv2[top:top + crop_height, left:right]
55 |             img_cropped_fn = '{}_{}{}'.format(fn_image, index, ext)
56 |             path_image_cropped = os.path.join(cropped_dir_path, img_cropped_fn)
57 |             
58 |             # save cropped image
59 |             cv2.imwrite(path_image_cropped, img_cropped)
60 |             cropped_cnt = cropped_cnt +1
61 | 
62 |     #     redacted_dir_path))
63 |     print("Skipped {0} objects, height or width is less than {1}.".format(skipped_small, config.config.MIN_CROP_DIM))
64 |     print("Created {0} crops.".format(cropped_cnt))


--------------------------------------------------------------------------------
/Legacy/run_detection.py:
--------------------------------------------------------------------------------
1 | from detect_object import detect
2 | 
3 | input_dir = 'C:\\data\\woodknots\\board_images_set2'
4 | output_dir = 'C:\\data\\woodknots\\set2_res'
5 | model_name = 'bla'
6 | detect(input_dir, output_dir, model_name)
7 | 


--------------------------------------------------------------------------------
/Legacy/run_process_images.py:
--------------------------------------------------------------------------------
1 | from process_images import crop_objects
2 | input = r'C:\data\woodknots\board_images_set2'
3 | output =  r'C:\data\woodknots\set2_res'
4 | crop_objects(input, output)


--------------------------------------------------------------------------------
/Legacy/utils/Utils.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Split data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 16,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "import shutil"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 17,
 25 |    "metadata": {
 26 |     "collapsed": false
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "DATA_PATH = r'C:\\data\\woodknots\\board_images_png'\n",
 31 |     "DATA_SET1_PATH =  r'C:\\data\\woodknots\\board_images_set1'\n",
 32 |     "DATA_SET2_PATH =  r'C:\\data\\woodknots\\board_images_set2'\n",
 33 |     "DATA_SET_PRED_PATH =  r'C:\\data\\woodknots\\board_images_set_pred'\n",
 34 |     "SET1_FILE_CNT = 150\n",
 35 |     "SET2_FILE_CNT = 150"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 20,
 41 |    "metadata": {
 42 |     "collapsed": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def do_copy(src, file_name, dest):\n",
 47 |     "    full_file_name = os.path.join(src, file_name)\n",
 48 |     "    if (os.path.isfile(full_file_name)):\n",
 49 |     "        shutil.copy(full_file_name, dest)\n",
 50 |     "        \n",
 51 |     "def loop_copy(src_files, src, dest):\n",
 52 |     "    i = 0\n",
 53 |     "    for file_name in src_files: \n",
 54 |     "        do_copy(src, file_name, dest)\n",
 55 |     "        i = i +1\n",
 56 |     "    print(\"Copied {0} files to {1}\".format(i, dest))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 22,
 62 |    "metadata": {
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stdout",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "Copied 150 files to C:\\data\\woodknots\\board_images_set1\n",
 71 |       "Copied 150 files to C:\\data\\woodknots\\board_images_set2\n",
 72 |       "Copied 539 files to C:\\data\\woodknots\\board_images_set_pred\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "src_files = os.listdir(DATA_PATH)\n",
 78 |     "os.makedirs(DATA_SET1_PATH, exist_ok=True)\n",
 79 |     "os.makedirs(DATA_SET2_PATH, exist_ok=True)\n",
 80 |     "os.makedirs(DATA_SET_PRED_PATH, exist_ok=True)\n",
 81 |     "i = 0\n",
 82 |     "src_files_set1 = src_files[0:SET1_FILE_CNT]\n",
 83 |     "src_files_set2 = src_files[SET1_FILE_CNT: SET1_FILE_CNT + SET2_FILE_CNT]\n",
 84 |     "src_files_set3 = src_files[SET1_FILE_CNT + SET2_FILE_CNT:]\n",
 85 |     "loop_copy(src_files_set1, DATA_PATH, DATA_SET1_PATH)\n",
 86 |     "loop_copy(src_files_set2, DATA_PATH, DATA_SET2_PATH)\n",
 87 |     "loop_copy(src_files_set3, DATA_PATH, DATA_SET_PRED_PATH)\n"
 88 |    ]
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "kernelspec": {
 93 |    "display_name": "tf-py352",
 94 |    "language": "python",
 95 |    "name": "tf-py352"
 96 |   },
 97 |   "language_info": {
 98 |    "codemirror_mode": {
 99 |     "name": "ipython",
100 |     "version": 3
101 |    },
102 |    "file_extension": ".py",
103 |    "mimetype": "text/x-python",
104 |    "name": "python",
105 |    "nbconvert_exporter": "python",
106 |    "pygments_lexer": "ipython3",
107 |    "version": "3.5.2"
108 |   }
109 |  },
110 |  "nbformat": 4,
111 |  "nbformat_minor": 0
112 | }
113 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Active learning + object detection
  2 | Labeling images for object detection is commonly required task to get started with Computer Vision related project.
  3 | Good news that you do not have to label all images  (draw bounding boxes) from scratch --- the goal of this project is to add (semi)automation to the process. 
  4 | Please refer to this blog post that describes Active Learning and semi-automated flow: 
  5 |   [Active Learning for Object Detection in Partnership with Conservation Metrics](https://www.microsoft.com/developerblog/2018/11/06/active-learning-for-object-detection/)
  6 | We will use Transfer Learning and Active Learning as core Machine Learning  components of the pipeline.
  7 |  -- Transfer Learning: use powerful pre-trained on big dataset (COCO) model as a startining point for fine-tuning foe needed classes.
  8 |  -- Active Learning: human annotator labels small set of images (set1), trains Object Detection Model  (model1) on this set1 and then uses model1 to predict bounding boxes on images (thus pre-labeling those). Human annotator reviews mode1's predictions where the model was less confident -- and thus comes up with new set of images -- set2. Next phase will be to train more powerful model2 on bigger train set that includes set1 and set2 and use model2 prediction results as draft of labeled set3…
  9 | The plan is to have 2 versions of pipeline set-up.
 10 | 
 11 | # Semi-automated pipeline
 12 | 
 13 | ![Flow](images/semi_automated.png)  
 14 | 
 15 | This one (ideally) includes minimum setup. The core components here are: 
 16 | 1) Azure Blob Storage with images to be labeled.
 17 | It will also be used to save "progress" logs of labeling activities
 18 | 2) "Tagger" machine(s) 
 19 | This is computer(s) that human annotator(s) is using as environment for labeling portion of images -- for example [VOTT](https://github.com/Microsoft/VoTT).  
 20 | Here example of labeling flow in VOTT: I've labled wood "knots" (round shapes) and "defect" (pretty much  non-round shaped type of defect):
 21 | 
 22 | ![Labeling](images/VOTT_knot_defect.PNG)
 23 | 
 24 | 
 25 | 3) Model re-training machine (or service)
 26 | This is environment were Object Detection model is retrained with growing train set as well as does predictions of bounding boxes on unlabeled images.
 27 | There is config.ini that needs to be updated with details like blob storage connection  and model retraining configuration. 
 28 | 
 29 | # Automated pipeline
 30 | More details TBD.  
 31 | Basically the idea is to kick off Active Learning cycle with model retaining as soon as human annotator revises new set of images.
 32 | 
 33 | # Notes before we get started 
 34 | - The steps below refer to updating config.ini. You can find detailed description of config [here](config_description.md) 
 35 | - Got several thousands of images (or much more) and not sure if random sampling will be helpful to get rolling with labeling data? 
 36 | Take a look at [Guide to "initialization" predictions](init_pred_desription.md).
 37 | 
 38 | # How to run semi-automated pipeline
 39 | The flow below assumes the following: 
 40 | 1) We use Tensorflow Object Detection API (Faster RCNN with Resnet 50 as default option)  to fine tune object detection. 
 41 | 2) Tensorflow Object Detection API is setup on Linux box (Azure DSVM is an option) that you can ssh to. See docs for Tensorflow Object Detection API regarding its general config.
 42 | 3) Data(images) is in Azure blob storage
 43 | 4) Human annotators use [VOTT](https://github.com/Microsoft/VoTT)  to label\revise images.  To support another tagging tool it's output (boudin boxes) need to be converted to csv form -- pull requests are welcomed!
 44 | 
 45 | Here is general flow has 2 steps:
 46 | 1) Environments setup
 47 | 2) Active Learnining cycle: labeling data and running scipts to update model and feed back results for human annotator to review.  
 48 | The whole flow is currenly automated with **4 scrips** user needs to run.
 49 | 
 50 | 
 51 | ### General  prep
 52 | 1) Provision Azure Blob storage. Create 2 containers: _"activelearningimages"_ and _"activelearninglabels"_
 53 | 2) Upload unzipped folder with images to  _"activelearningimages"_ container.
 54 | 
 55 | 
 56 | ### On Linux box aka Model (re)training env
 57 | 1) Setup Tensorflow Object Detection API if you have not already.
 58 | This will include cloning of https://github.com/tensorflow/models. (On my machine I have it cloned to `/home/olgali/repos/models`).
 59 |  Run `research/object_detection/object_detection_tutorial.ipynb` to make sure Tensorflow Object Detection API is functioning.
 60 | 2) Clone this repo to the machine (for example: `/home/olgali/repos/models/research/active-learning-detect/`)
 61 | 3)  Update _config.ini_: 
 62 |   - set values for _AZURE_STORAGE_ACCOUNT_ and _AZURE_STORAGE_KEY_
 63 |   - set (update if needed) values for _# Data Information_ section
 64 |  -  set values for _# Training Machine_ and _# Tensorflow_ sections of the config.ini  
 65 |  _"python_file_directory"_ config value should point to the _"train"_ scripts from this project.
 66 | Example:  
 67 | `python_file_directory=/home/olgali/repos/models/research/active-learning-detect/train`
 68 | 3) pip install azure-blob packages: azure.storage.blob
 69 | 
 70 | ### Tagger machine(s) (could be same as Linux box or separate boxes\vms)
 71 | 1) Have Python 3.6 up and running.
 72 | 2) Pip install azure-blob packages: azure.storage.blob
 73 | 3) Clone this repo, copy  updated config.ini from Model re-training box (as it has Azure Blob Storage and other generic info already).
 74 | 4) Update  _config.ini_ values for _# Tagger Machine_ section:    
 75 |         `tagging_location=D:\temp\NewTag`
 76 |         
 77 | # Label data, run the scripts!
 78 | Overview: you will run **4 scripts* in total: 
 79 | - two scipts on the machine where model (re)training happens and 
 80 | - two scripts where human annotators label images (or review images pre-labeled by the model).
 81 | 
 82 | ### On Linux box aka Model (re)training env
 83 | Run bash script to Init pipeline  
 84 | `~/repos/models/research/active-learning-detect/train$ . ./active_learning_initialize.sh  ../config.ini`
 85 | This step will:
 86 | - Download all images to the box.
 87 | - Create totag_xyz.csv on the blob storage ( "activelearninglabels" container by default).  
 88 | This is the snapshot of images file names that need tagging (labeling).  As human annotators make progress on labeling data the list will get smaller and smaller.
 89 | 
 90 | ### Tagger machine(s)
 91 | 1) Make sure that the tagging_location is empty.
 92 | 2) Start each "phase" with downloading images to label (or to review pre-labeled images).  
 93 | Sample cmd below requests 40 images for tagging:  
 94 | `D:\repo\active-learning-detect\tag>python download_vott_json.py 40 ..\config.ini`  
 95 | This step will create new version of totag_xyz.csv on blob storage that will have 40 images excluded from the list.  
 96 | File tagging_abc.csv will hold list of 40 images being tagged.
 97 | 3) Start [VOTT](https://github.com/Microsoft/VoTT) , load the folder for labeling\review (in my case it will be `D:\temp\NewTag\images`)
 98 | 4) Once done with labeling push results back to central storage:  
 99 |  `D:\repo\active-learning-detect\tag>python upload_vott_json.py ..\config.ini`
100 | This step will push tagged_123.csv to blob storage: this file contains actual bounding boxes coordinates for every image.  
101 | Tagging_abc.csv will contain list of files that are "work in progress" -- the ones to be tagged soon.
102 | 
103 |   
104 | Now model can be trained.
105 | 
106 | ### Model(re)training on Linux box
107 | Before your first time running the model, and at any later time if you would like to repartition the test set, run:
108 | 
109 | `~/repos/models/research/active-learning-detect/train$ . ./repartition_test_set_script.sh  ../config.ini`
110 | 
111 | This script will take all the tagged data and split some of it into a test set, which will not be trained/validated on and will then be use by evalution code to return mAP values.
112 | 
113 | Run bash script:  
114 | `~/repos/models/research/active-learning-detect/train$ . ./active_learning_train.sh  ../config.ini`
115 | 
116 | This script will kick of training based on available labeled data.  
117 | 
118 | Model will evaluated on test set and perf numbers will be saved in blob storage (performance.csv).
119 | 
120 | Latest totag.csv will have predictions for all available images made of the newly trained model -- bounding box locations that could be used by human annotator as a starter.
121 | 
122 | ### Reviewing of pre-labeled results (on Tagger machine)
123 | Human annotator(s) deletes any leftovers from previous predictions (csv files in active-learning-detect\tag, image dirs) and runs goes again sequence of:
124 | 1) Downloading next batch of pre-labeled images for review (`active-learning-detect\tag\download_vott_json.py`)
125 | 2) Going through the pre-labeled images with [VOTT](https://github.com/Microsoft/VoTT)  and fixing bounding boxes when needed.
126 | 3) Pushing back new set of labeled images to storage (`active-learning-detect\tag\upload_vott_json.py`) 
127 | 
128 | Training cycle can now be repeated on bigger training set and dataset with higher quality of pre-labeled bounding boxes could be obtained. 
129 | 
130 | 
131 | # Using Custom Vision service for training
132 | 
133 | The Custom Vision service can be used instead of Tensorflow in case you do not have access to an Azure Data Science VM or other GPU-enabled machine. The steps for Custom Vision are pretty similar to those for Tensorflow, although the training step is slightly different:
134 | 
135 | ### Model (re)training on Custom Vision
136 | If you would like to repartition the test set, run:
137 | 
138 | `~/repos/models/research/active-learning-detect/train$ . ./repartition_test_set_script.sh  ../config.ini`
139 | 
140 | This script will take all the tagged data and split some of it into a test set, which will not be trained/validated on and will then be use by evalution code to return mAP values.
141 | 
142 | To train the model:  
143 | python cv_train.py ../config.ini
144 | 
145 | This python script will train a custom vision model based on available labeled data.  
146 | 
147 | Model will evaluated on test set and perf numbers will be saved in blob storage (performance.csv).
148 | 
149 | Latest totag.csv will have predictions for all available images made of the newly trained model -- bounding box locations that could be used by human annotator as a starter.
150 | 
151 | 
152 | # Sample dataset
153 | I'm using wood knots dataset mentioned in this [blog](http://blog.revolutionanalytics.com/2017/09/wood-knots.html) 
154 | Here is [link](https://olgaliakrepo.blob.core.windows.net/woodknots/board_images_png.zip) to the dataset: zip file with 800+ board png images.
155 |  
156 | # Custom Vision HttpOperationError 'Bad Request'
157 | 
158 | The current custom vision SDK is in preview mode, and one of the limitations is that an error while training does not return an error message, just a generic 'Bad Request' response. Common reasons for this error include:
159 | 1) Having a tag with less than 15 images. Custom Vision requires a minimum of 15 images per tag and will throw an error if it finds any tag with less than that many.
160 | 2) Having a tag out of bounds. If for some reason you attempt to add a tag through the API which is out of bounds, it will accept the request but will throw an error while training.
161 | 3) No new images since last training session. If you try to train without adding additional images Custom Vision will return a bad request exception.
162 | The best way to debug these is to go into the Custom Vision website (customvision.ai) and click the train button, which should then tell you what the error was.
163 | 


--------------------------------------------------------------------------------
/cli/src/cli.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from operations import (
 4 |     init,
 5 |     download,
 6 |     upload,
 7 |     abandon,
 8 |     LOWER_LIMIT,
 9 |     UPPER_LIMIT
10 | )
11 | 
12 | if __name__ == "__main__":
13 | 
14 |     # how i want to use the tool:
15 |     # python3 cli.py init --config /path/to/config.ini
16 |     # python3 cli.py download --num-images 40
17 |     # python3 cli.py upload
18 |     # python3 cli.py abandon
19 |     parser = argparse.ArgumentParser()
20 | 
21 |     parser.add_argument(
22 |         'operation',
23 |         choices=['init', 'download', 'upload', 'abandon']
24 |     )
25 | 
26 |     parser.add_argument('-n', '--num-images', type=int)
27 | 
28 |     parser.add_argument('-c', '--config')
29 | 
30 |     args = parser.parse_args()
31 | 
32 |     operation = args.operation
33 | 
34 |     if operation == 'init':
35 |         init(args.config)
36 |     elif operation == 'download':
37 |         download(args.num_images)
38 |     elif operation == 'upload':
39 |         upload()
40 |     else:
41 |         abandon()
42 | 


--------------------------------------------------------------------------------
/cli/src/operations.py:
--------------------------------------------------------------------------------
 1 | DEFAULT_NUM_IMAGES = 40
 2 | LOWER_LIMIT = 0
 3 | UPPER_LIMIT = 100
 4 | 
 5 | 
 6 | class MissingConfigException(Exception):
 7 |     pass
 8 | 
 9 | 
10 | class ImageLimitException(Exception):
11 |     pass
12 | 
13 | 
14 | def init(config):
15 |     if (config is None):
16 |         raise MissingConfigException()
17 | 
18 |     raise NotImplementedError
19 | 
20 | 
21 | def download(num_images):
22 |     images_to_download = num_images
23 | 
24 |     if num_images is None:
25 |         images_to_download = DEFAULT_NUM_IMAGES
26 | 
27 |     if images_to_download <= LOWER_LIMIT or images_to_download > UPPER_LIMIT:
28 |         raise ImageLimitException()
29 | 
30 |     return images_to_download
31 | 
32 | 
33 | def upload():
34 |     raise NotImplementedError()
35 | 
36 | 
37 | def abandon():
38 |     raise NotImplementedError()
39 | 


--------------------------------------------------------------------------------
/cli/src/test_operations.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from operations import (
 3 |     init,
 4 |     download,
 5 |     upload,
 6 |     abandon,
 7 |     MissingConfigException,
 8 |     ImageLimitException,
 9 |     DEFAULT_NUM_IMAGES,
10 |     LOWER_LIMIT,
11 |     UPPER_LIMIT
12 | )
13 | 
14 | 
15 | class TestCLIOperations(unittest.TestCase):
16 |     def test_init(self):
17 |         with self.assertRaises(NotImplementedError):
18 |             init("fakeconfig")
19 | 
20 |     def test_init_missing_config(self):
21 |         with self.assertRaises(MissingConfigException):
22 |             init(None)
23 | 
24 |     def test_download_under_limit(self):
25 |         with self.assertRaises(ImageLimitException):
26 |             download(LOWER_LIMIT)
27 | 
28 |     def test_download_over_limit(self):
29 |         with self.assertRaises(ImageLimitException):
30 |             download(UPPER_LIMIT + 1)
31 | 
32 |     def test_download_missing_image_count(self):
33 |         downloaded_image_count = download(None)
34 |         self.assertEqual(DEFAULT_NUM_IMAGES, downloaded_image_count)
35 | 
36 |     def test_download_with_image_count(self):
37 |         downloaded_image_count = download(10)
38 |         self.assertEqual(10, downloaded_image_count)
39 | 
40 |     def test_upload(self):
41 |         with self.assertRaises(NotImplementedError):
42 |             upload()
43 | 
44 |     def test_abandon(self):
45 |         with self.assertRaises(NotImplementedError):
46 |             abandon()
47 | 
48 | if __name__ == '__main__':
49 |     unittest.main()


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | # AZURE STORAGE ACCOUNT INFORMATION 
 2 | AZURE_STORAGE_ACCOUNT=<STORAGE ACCOUNT NAME>
 3 | AZURE_STORAGE_KEY=<STORAGE ACCOUNT KEY>
 4 | image_container_name=activelearningimages
 5 | label_container_name=activelearninglabels
 6 | # IMAGE INFORMATION
 7 | user_folders=True
 8 | classes=knots,defect
 9 | # Provide preferred distribution of images-review ratio.
10 | # Last value corresponds to images were no object were detected.
11 | # In the example below: 60% of images that use will be reviewing have at least one  bbox with objct class1 (knot),
12 | #   30%  images that have  bboxes for class  (defects)
13 | #   and 10% of images get class "NULL" -- were neither knots nor defects were detected by the model
14 | ideal_class_balance=0.6,0.3,0.1
15 | filetype=*.png
16 | # TAGGING MACHINE 
17 | tagging_location=C:\Users\t-yapand\Desktop\NewTag
18 | pick_max=False
19 | max_tags_per_pixel=2
20 | #
21 | # CUSTOM VISION
22 | # Uncomment lines below if using Azure Custom Vision Service
23 | # training_key=<CUSTOM VISION TRAINING KEY>
24 | # prediction_key=<CUSTOM VISION PREDICTION KEY>
25 | # project_id=<CUSTOM VISION PROJECT ID>
26 | #
27 | # TRAINING MACHINE
28 | # Locations
29 | python_file_directory=/home/yashpande/active-learning-detect/train
30 | data_dir=/home/yashpande/data
31 | train_dir=/home/yashpande/data/training
32 | inference_output_dir=knots_inference_graphs
33 | tf_models_location=/home/yashpande/models/research
34 | download_location=/home/yashpande/downloads
35 | # Training 
36 | train_iterations=200
37 | eval_iterations=10
38 | min_confidence=.5
39 | test_percentage=.2
40 | model_name=faster_rcnn_resnet50_coco_2018_01_28
41 | optional_pipeline_url=https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/samples/configs/faster_rcnn_resnet50_pets.config
42 | #Init Predictions
43 | init_model_name=faster_rcnn_resnet101_coco_2018_01_28
44 | # Config File Details
45 | old_label_path=PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt
46 | old_train_path=PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010
47 | old_val_path=PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-00010
48 | old_checkpoint_path=PATH_TO_BE_CONFIGURED/model.ckpt
49 | num_examples_marker=num_examples:
50 | num_steps_marker=num_steps:
51 | num_classes_marker=num_classes: 
52 | # Calculated
53 | num_classes="$(awk -F ',' '{print NF}' <<< ${classes})"
54 | image_dir=${data_dir}/AllImages
55 | untagged_output=${data_dir}/untagged.csv
56 | tagged_output=${data_dir}/tagged.csv
57 | tagged_predictions=${data_dir}/tagged_preds.csv
58 | test_output=${data_dir}/test.csv
59 | validation_output=${data_dir}/val.csv
60 | tf_location=${tf_models_location}/object_detection
61 | tf_location_legacy=${tf_models_location}/object_detection/legacy
62 | PYTHONPATH=$PYTHONPATH:${tf_models_location}:${tf_models_location}/slim/
63 | label_map_path=${data_dir}/pascal_label_map.pbtxt
64 | tf_record_location=${data_dir}/stamps.record
65 | tf_train_record=${tf_record_location%.*}_train.${tf_record_location##*.}
66 | tf_val_record=${tf_record_location%.*}_val.${tf_record_location##*.}
67 | tf_url=http://download.tensorflow.org/models/object_detection/${model_name}.tar.gz
68 | pipeline_file=${download_location}/${model_name}/pipeline.config
69 | fine_tune_checkpoint=${download_location}/${model_name}/model.ckpt
70 | init_pred_tf_url=http://download.tensorflow.org/models/object_detection/${init_model_name}.tar.gz
71 | init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb
72 | 
73 | 


--------------------------------------------------------------------------------
/config_description.md:
--------------------------------------------------------------------------------
 1 | # Guide to config.ini
 2 | As config.ini has many values to fill out, this guide is intended to help provide more details between what each variable means and how to choose the right value. The guide is organized by sections in the config file. Note that the Azure Storage Account Information and Image Information need to be filled in for both tagging and training machines, while for the remaining sections only those corresponding to the type of machine must be filled.
 3 | ## Azure Storage Account Information
 4 | As described in the readme, azure blob storage containers are used to store image and label data as well as trained model inference graph and performance files. To connect to the azure storage account, both the account name and an access key are required. Furthermore, the storage account needs to have separate containers to store images and to store label data.
 5 | - AZURE_STORAGE_ACCOUNT:
 6 | This is the azure blob storage account name.
 7 | - AZURE_STORAGE_KEY:
 8 | This is a valid access key for the blob storage account.
 9 | - image_container_name:
10 | This is the name of the container within the blob storage account that holds all images.
11 | - label_container_name:
12 | This is the name of the container within the blob storage that holds all label and model data.
13 | ## Image Information
14 | Certain information about the images is required before the tagging and training process can begin. This is mainly so that the right images can be found and so the right tag names are used.
15 | - user_folders:
16 | This determines whether or not the images in blob storage are within separate folders (e.g. by date or by weather condition). Set to True if they are, False if not.
17 | - classes:
18 | This is a comma separated list of all classes that are being tagged. Please ensure that there are no spaces in the list and only commas are used to separate names.
19 | - ideal_class_balance
20 | This is a comma separated list of requested classes distribution in images being reviewed by human expert.  
21 | Example (for 2-class scenario):  
22 | `ideal_class_balance=0.6,0.3,0.1`  
23 | In this example:  
24 |   60% of images that use will be reviewing will have at least one bbox with object class1,   
25 |   30%  images that have  bboxes for class  (defects),  
26 |   10% of images get class "NULL" -- were neither knots nor defects were detected by the model.
27 | 
28 | - filetype:
29 | This is the type of image file used. The format is a glob pattern, so *.jpg for a .jpg file or *.png for a .png file. Note that only JPEG or PNG filetypes can be used with tensorflow.
30 | ## Tagging Machine
31 | These are variables that must be set (along with azure storage information and image information) on the config.ini file for the tagging machine. They are not needed on a training machine.
32 | - tagging_location:
33 | This is the folder where all images will be downloaded and the VOTT .json file will be created. Please ensure that this folder is empty so that there are no conflicts while downloading/uploading. The folder will be created automatically if it does not exist.
34 | - pick_max:
35 | This determines whether the images with lowest or highest confidence are chosen to be labelled. Only set it to True if you wish to evaluate images where your model is very confident to ensure that it is not learning incorrect patterns with high confidence. Otherwise, keep it at the default value of False.
36 | - max_tags_per_pixel:
37 | This limits the number of tags per pixel, preventing too many duplicate tags.
38 | ## Custom Vision
39 | These variables are all available at customvision.ai if you select the settings icon.
40 | - training_key:
41 | This is your Custom Vision training key.
42 | - prediction_key:
43 | This is your Custom Vision prediction key.
44 | - project_id:
45 | This is the project ID for an **object detection** project in custom vision. You can create a new one by selecting the New Project button.
46 | ## Training Machine
47 | These are variables that must be set (along with azure storage information and image information) on the config.ini file for the training machine. They are not needed on a tagging machine.
48 | ### Locations
49 | These variables deal with the location that data and files are kept on the tagging machine. These are needed to ensure that the right python files are being called and that active learning data is not interfering with other data on the machine.
50 | - python_file_directory:
51 | This is the directory housing all the python files from the active-learning-detect repository. It will be in train folder within the directory where the active-learning-detect repository was cloned.
52 | - data_dir:
53 | This is where all images and label files will be downloaded. Please ensure that this directory is empty / does not currently exist and that the disk it is hosted on has enough free space for all the images in blob storage.
54 | - train_dir:
55 | This is where tensorflow will update information while it is training. It can be used to visualize training using tensorboard through the command tensorboard --logdir ${train_dir}. This directory will be deleted if it already exists, so please ensure that it either does not exist or is empty.
56 | - inference_output_dir:
57 | This is where tensorflow will export the inference graph that saves the state of the model and is used for inference. Again, please ensure that the location is empty and that it contains enough space for an inference graph (up to ~100mb for large models).
58 | - tf_models_location:
59 | This is where the tensorflow models repository has been cloned. Please point to the research folder within the repository, as that is where all the necessary files are located.
60 | - download_location:
61 | This is where the tensorflow pretrained model and pipeline.config is stored. If this does not already exist, then this is where the model is downloaded form the tensorflow model zoo.
62 | ### Training
63 | This information relates to training the model. It includes the type of model to use and how to train and predict using the model.
64 | - train_iterations:
65 | This is how many iterations to train the model. More iterations can improve performance of the model, but may take longer and lead to model overfitting.
66 | - eval_iterations:
67 | This is how many iterations of evaluation to run on the model. Evaluation is run using the validation set.
68 | - min_confidence:
69 | This is the minimum confidence with which a prediction is considered a valid prediction. This is to prevent predictions with very low confidence from being considered valid.
70 | - test_percentage:
71 | This is the percentage of the data to split into a separate test set. Note that the test set can be repartitioned using the repartition_test_set.py file within the train folder.
72 | - model_name:
73 | This is the name of the pretrained model to use for transfer learning. This model is automatically downloaded from the [tensorflow model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) if it does not already exist in the download_location.
74 | - optional_pipeline_url:
75 | This is an optional parameter that can be used to override the pipeline.config file in the pretrained model. We suggest using the [sample config files](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs) over the ones from model zoo as the model zoo files are not kept updated with newer releases of tensorflow and may cause training to fail.
76 | ### Config File Details
77 | These parameters are used to replace certain parts of the pipeline.config file to match current settings. You may need to change some parameters every time you choose a different pipeline.config file. The last three values will likely not change.
78 | - old_label_path:
79 | This is the label path previously set in the pipeline.config file. It will be replaced with the current path to the label map file.
80 | - old_train_path:
81 | This is the path to the TF record file containing training data previously set in the pipeline.config file. It will be replaced with the path to a TF record file generated from the most recent set of tagged data.
82 | - old_val_path:
83 | This is the path to the TF record file containing validation data previously set in the pipeline.config file. It will be replaced with the path to a TF record file generated from the most recent set of tagged data.
84 | - old_checkpoint_path:
85 | This is the path to a previous model checkpoint previously set in the pipeline.config file. It will be replaced with a path to the model that is being used for transfer learning.
86 | - num_examples_marker:
87 | This is the marker used to designate the number of examples. The number after it will be changed to the number of examples in the evaluation file.
88 | - num_steps_marker:
89 | This is the marker used to designate the number of training steps. The number after it will be changed to the number of steps defined in train_iterations.
90 | - num_classes_marker:
91 | This the the marker used to designate the number of classes. The number after it will be replaced with the number of classes specified in classes.
92 | 


--------------------------------------------------------------------------------
/images/VOTT_animal.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/images/VOTT_animal.PNG


--------------------------------------------------------------------------------
/images/VOTT_knot_defect.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/images/VOTT_knot_defect.PNG


--------------------------------------------------------------------------------
/images/init_predict.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/images/init_predict.PNG


--------------------------------------------------------------------------------
/images/semi_automated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/images/semi_automated.png


--------------------------------------------------------------------------------
/init_pred_desription.md:
--------------------------------------------------------------------------------
 1 | # Guide to "initialization" predictions
 2 | Assuming you got an datatet containing  many thousands of images -- how do you get started with labeling first
 3 | few hundreds images? 
 4 | What about unblanced case when most of the pictures not have much going on?  
 5 | If you just random sample pictures _blindly_ it make quite a few Active Learning cycles to set your model and 
 6 | training set onto the right pass.
 7 | 
 8 | ## Let's get "metadata" about each image
 9 | We could use pretrained model that can detect decently few dozens or more object class to get idea what kind
10 | of objects are on the images. The model might not provide super-accurate results however some of those might be
11 | useful for more target image sampling.  
12 | For example if you dataset has common scenes of nature or city life than using model trained on [COCO dataset](https://github.com/amikelive/coco-labels/blob/master/coco-labels-paper.txt)
13 | might give you an idea what images have objects that _resembles_ person, car, deer and so on. And depedning on your
14 | scenario you might focus you initial labeling efforts on images that have or don't have a particular class.  
15 | 
16 | ![Flow](images/init_predict.PNG)  
17 | 
18 | ## Settings in config.ini
19 | The following settings control what model is going to be used for "initialization" predictions.  
20 |   - init_model_name=faster_rcnn_resnet101_coco_2018_01_28  
21 |   Model name to be used for predictions. Current code assumes it's COCO based model.
22 |   - init_pred_tf_url=http://download.tensorflow.org/models/object_detection/${init_model_name}.tar.gz  
23 |  URL for downloading model from Tensorflow Object Detection model zoo.  
24 |   - init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb 
25 |   Location (on DSVM) of inference graph that's used for producing "initialization" predictions. 
26 | 
27 | ## Running "initialization" predictions flow
28 | Once config settings are set (and images are on blob storage) user needs to do the following:
29 | - SSH to DSVM and run script  that will actually produces predictions
30 | - provide desired mapping (and merging) or detected classes to the classes of interest (more details below)
31 | - dowload specified number of images to client machine and review the tags
32 | 
33 | *Produce predictions*  
34 | SSH to DSVM, activate needed Tensorflow virtul environment if needed and run:  
35 |  `. ./active_learning_init_pred.sh ../config.ini`  
36 |  The output _init_totag*.csv_ contains all detecting objetcs bboxes iformation. It's probably worth spedining
37 |  the time analizing those results. 
38 |   
39 |  *Class mapping json*  
40 |  Please refer to _sample_init_classes_map.json_ for reference. 
41 |  First we define that we want to class  "1" to be shown as class "person" in VOTT when user will be doing labels review.
42 |  We also want to have 60% of images that will be pulled for review to have presence of class "person" in them:  
43 |  `{`  
44 |       `"initclass": "1", `       
45 |      `"map": "person",`  
46 |      `"balance": "0.6"`  
47 |   `}`
48 |   
49 |  Then we want to _merge_ several classes: "19" (horse) and "21" (cow) will be displayed in VOTT as "animal".  
50 |     `{`  
51 |       `"initclass": "19",`  
52 |       `"map": "animal",`  
53 |       `"balance": "0.2"`  
54 |     `},`  
55 |     `{`  
56 |       `"initclass": "21",`  
57 |       `"map": "animal",`  
58 |       `"balance": "0.2"`  
59 |     `}'  
60 |     
61 |   We specify that 20% of each _animal_ class (40% in total) is present in the dataset that user will be reviewing in VOTT.  
62 |    Also we specifically request not to include images where no known COCO classes were detected. Given that COCO-based
63 |     model may miss quite a bit of objects it's good practice still to review some of those.  
64 |   Model might be detecting classes that we will be cluttering image during review process. For example the dataset
65 |   may have busket images that is wrongly classified as a "vase".  In scenario when we are not interested in detecting
66 |   baskets nor vases we may want just to "drop" bboxes for the "vase" class (class 86 in COCO):  
67 |   ` "unmapclass":["64","86", "15"],`   
68 |   Finally for _everything else_  -- classes we are not sure what to do at this stage but still want to preserve bbox -- 
69 |   we will map then to a "default" class. We can set the name of "default" class in mapping json.
70 |   
71 |   *Review predictions in VOTT* 
72 |   On a client (tagger) machine run the usual script to download images. Only difference is that you'd be providing 
73 |   "class mapping json" as 3rd parameter:  
74 |   ` D:\repo\active-learning-detect\tag>python download_vott_json.py 200 ..\config.ini ..\sample_init_classes_map.json`  
75 |   
76 |   ![Flow](images/VOTT_animal.PNG)  


--------------------------------------------------------------------------------
/tag/upload_vott_json.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import json
  3 | import csv
  4 | import time
  5 | import cv2
  6 | import shutil
  7 | import re
  8 | 
  9 | def extract_data(filename):
 10 |     height, width, _ = cv2.imread(str(filename),1).shape
 11 |     return filename.name, height, width
 12 | 
 13 | def select_jsons(image_directory, user_folders, file_location):
 14 |     if user_folders:
 15 |         image_directory = Path(image_directory)
 16 |         all_images = []
 17 |         all_jsons = []
 18 |         for subfolder in image_directory.iterdir():
 19 |             if subfolder.is_dir():
 20 |                 all_images.append([extract_data(filename) for filename in sorted(subfolder.iterdir(), 
 21 |                     key=lambda fullname: str(fullname.name).lower())])
 22 |                 all_jsons.append(str(subfolder)+".json")
 23 |     else:
 24 |         image_directory = Path(image_directory)
 25 |         all_images = [[extract_data(filename) for filename in sorted((image_directory/"Images").iterdir(), 
 26 |             key=lambda fullname: str(fullname.name).lower())]]
 27 |         all_jsons = [str(image_directory/"Images")+".json"]
 28 |  
 29 |     for json_file, sorted_images in zip(all_jsons, all_images):
 30 | 
 31 |         image_directory = Path(json_file.rsplit(".", 1)[0]).stem
 32 |         with open(json_file, "r") as read_file:
 33 |             json_file = json.load(read_file)["frames"]
 34 | 
 35 |         if (file_location/"tagging.csv").is_file():
 36 |             with (file_location/"tagging.csv").open(mode='r') as file:
 37 |                 reader = csv.reader(file)
 38 |                 header = next(reader)
 39 |                 tagging_list = list(reader)
 40 |         else:
 41 |             tagging_list = []
 42 |         file_exists = (file_location/"tagged.csv").is_file()
 43 |         tagged = set()
 44 |         with (file_location/"tagged.csv").open(mode='a', newline='') as csv_file:
 45 |             csv_writer = csv.writer(csv_file)
 46 |             if not file_exists:
 47 |                 if user_folders:
 48 |                     csv_writer.writerow(["filename","class","xmin","xmax","ymin","ymax","height","width","folder"])
 49 |                 else:
 50 |                     csv_writer.writerow(["filename","class","xmin","xmax","ymin","ymax","height","width"])
 51 |             for index,(filename,true_height,true_width) in enumerate(sorted_images):
 52 |                 tagged.add(filename)
 53 |                 if filename in json_file:
 54 |                     all_frames = json_file[filename]
 55 |                     if all_frames:
 56 |                         for cur_frame in all_frames:
 57 |                             if cur_frame:
 58 |                                 vott_width = float(cur_frame["width"])
 59 |                                 vott_height = float(cur_frame["height"])
 60 |                                 x1 = float(cur_frame["x1"])/vott_width
 61 |                                 x2 = float(cur_frame["x2"])/vott_width
 62 |                                 y1 = float(cur_frame["y1"])/vott_height
 63 |                                 y2 = float(cur_frame["y2"])/vott_height
 64 |                                 for tag in cur_frame["tags"]:
 65 |                                     if user_folders:
 66 |                                         csv_writer.writerow([filename,tag,x1,x2,y1,y2,true_height,true_width,image_directory])
 67 |                                     else:
 68 |                                         csv_writer.writerow([filename,tag,x1,x2,y1,y2,true_height,true_width])
 69 |                     else:
 70 |                         if user_folders:
 71 |                             csv_writer.writerow([filename,"NULL",0,0,0,0,true_height,true_width,image_directory])
 72 |                         else:
 73 |                             csv_writer.writerow([filename,"NULL",0,0,0,0,true_height,true_width])
 74 |         with (file_location/"tagging.csv").open(mode='w', newline='') as tagging:
 75 |             tagging_writer = csv.writer(tagging)
 76 |             tagging_writer.writerow(header)
 77 |             # Does nothing if tagging_list is empty
 78 |             for row in filter(lambda x: x[0] not in tagged, tagging_list):
 79 |                 tagging_writer.writerow(row)
 80 | 
 81 | if __name__ == "__main__":
 82 |     from azure.storage.blob import BlockBlobService
 83 |     import sys
 84 |     import os    
 85 |     # Allow us to import utils
 86 |     config_dir = str(Path.cwd().parent / "utils")
 87 |     if config_dir not in sys.path:
 88 |         sys.path.append(config_dir)
 89 |     from config import Config
 90 |     if len(sys.argv)<2:
 91 |         raise ValueError("Need to specify config file")
 92 |     config_file = Config.parse_file(sys.argv[1])
 93 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
 94 |     container_name = config_file["label_container_name"]
 95 |     csv_file_loc = Path(config_file["tagging_location"])
 96 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)]
 97 |     if file_date:
 98 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagged.csv"))
 99 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)]
100 |     if file_date:
101 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagging.csv"))
102 |     #TODO: Ensure this parses folder recursively when given tagging location. Remove the .json part
103 |     select_jsons(config_file["tagging_location"],config_file["user_folders"]=="True",csv_file_loc)
104 |     block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagged",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagged.csv"))
105 |     block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagging.csv"))
106 | 


--------------------------------------------------------------------------------
/test/Images_source.json:
--------------------------------------------------------------------------------
1 | {"framerate": "1", "frames": {"st1026.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 144, "x2": 174, "y1": 205, "y2": 254}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 142, "x2": 183, "y1": 213, "y2": 248}, {"height": 512.0, "id": 3, "name": 3, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 337, "x2": 361, "y1": 172, "y2": 202}], "st1578.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 293, "x2": 330, "y1": 188, "y2": 223}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 265, "x2": 293, "y1": 401, "y2": 438}], "st1611.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 317, "x2": 348, "y1": 440, "y2": 494}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 37, "x2": 55, "y1": 170, "y2": 189}], "st1840.png": [{"height": 512.0, "id": 1, "name": 1, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 292, "x2": 313, "y1": 134, "y2": 164}, {"height": 512.0, "id": 2, "name": 2, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 354, "x2": 377, "y1": 319, "y2": 342}, {"height": 512.0, "id": 3, "name": 3, "tags": ["knot"], "type": "Rectangle", "width": 488.0, "x1": 60, "x2": 92, "y1": 392, "y2": 423}]}, "inputTags": "knot,defect", "scd": false, "suggestiontype": "track", "tag_colors": ["#e9f1fe", "#f3e9ff"]}


--------------------------------------------------------------------------------
/test/Images_source_workdir90.json:
--------------------------------------------------------------------------------
1 | {"framerate": "1", "frames": {"": [], "IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 338, "y1": 50, "y2": 258}, {"height": 480.0, "id": 2, "name": 2, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 190, "y1": 3, "y2": 268}], "IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 600.0, "x1": 50, "x2": 233, "y1": 143, "y2": 293}], "IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["16"], "type": "Rectangle", "width": 600.0, "x1": 291, "x2": 568, "y1": 223, "y2": 455}], "IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["1"], "type": "Rectangle", "width": 600.0, "x1": 164, "x2": 264, "y1": 69, "y2": 285}, {"height": 480.0, "id": 2, "name": 2, "tags": ["27"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 196, "y1": 102, "y2": 193}], "IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["16"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 475, "y1": 127, "y2": 385}, {"height": 480.0, "id": 2, "name": 2, "tags": ["22"], "type": "Rectangle", "width": 600.0, "x1": 26, "x2": 463, "y1": 125, "y2": 388}], "IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 853.0, "x1": 21, "x2": 403, "y1": 14, "y2": 447}], "IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["19"], "type": "Rectangle", "width": 853.0, "x1": 385, "x2": 644, "y1": 29, "y2": 327}], "IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["18"], "type": "Rectangle", "width": 853.0, "x1": 477, "x2": 710, "y1": 134, "y2": 307}]}, "inputTags": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90", "scd": false, "suggestiontype": "track", "tag_colors": ["#a5b2f6", "#f6fee6", "#f9a9dd", "#fbc1b4", "#fcbdb1", "#b4e8fb", "#ef9cfd", "#cdc3f8", "#f8c3d1", "#fff9f1", "#fae5fe", "#fcc3cd", "#fed9ff", "#e5f1fc", "#e3f7bf", "#d9f7ac", "#c4cefa", "#ecffb5", "#dcdffb", "#ccb0f9", "#fddddf", "#f1f0ff", "#d8f7a5", "#cfffaf", "#fcbcec", "#f7fffa", "#d3fbb7", "#e7ffd6", "#b0ffcf", "#adf0f4", "#fff2da", "#a7f8d2", "#ffcfd1", "#fc9eec", "#d8d4fb", "#abb7f9", "#fbfffe", "#e3fad2", "#fef3f9", "#dce1fb", "#ebd2fe", "#a2e7f7", "#fff9f8", "#f7c1f7", "#fffbfe", "#a3fdf5", "#daacfa", "#b5e9fe", "#b3fbda", "#ceb5f8", "#fddedf", "#aeeff7", "#d9fbda", "#dffba5", "#fcfff6", "#fcd3b5", "#e6ffa7", "#cbe8fe", "#ecb6f6", "#c8fce6", "#ebdeff", "#fae6c7", "#f8baed", "#cafaeb", "#fcfff8", "#fad7f4", "#ffefef", "#fef2f7", "#b5f9f4", "#ffd6c0", "#f3feeb", "#fbfffd", "#e6f5fc", "#fdfffc", "#e9f7fc", "#cee6ff", "#fffffc", "#f9fed8", "#dafea7", "#ecfed8", "#d7fdeb", "#fdb1cf", "#e4fdc4", "#c9fdbc", "#ffe0c8", "#f6aaac", "#fbfff9", "#f9c6eb", "#fee3fe", "#fbe0e3"]}


--------------------------------------------------------------------------------
/test/all_predictions.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/all_predictions.npy


--------------------------------------------------------------------------------
/test/all_predictions_cow.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/all_predictions_cow.npy


--------------------------------------------------------------------------------
/test/board_images_png/st1026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/board_images_png/st1026.png


--------------------------------------------------------------------------------
/test/board_images_png/st1194.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/board_images_png/st1194.png


--------------------------------------------------------------------------------
/test/board_images_png/st1578.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/board_images_png/st1578.png


--------------------------------------------------------------------------------
/test/board_images_png/st1611.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/board_images_png/st1611.png


--------------------------------------------------------------------------------
/test/board_images_png/st1840.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/board_images_png/st1840.png


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG


--------------------------------------------------------------------------------
/test/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG


--------------------------------------------------------------------------------
/test/camera_images_source.json:
--------------------------------------------------------------------------------
1 | {"framerate": "1", "frames": {"IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 338, "y1": 50, "y2": 258}, {"height": 480.0, "id": 2, "name": 2, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 0, "x2": 190, "y1": 3, "y2": 268}], "IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["horse"], "type": "Rectangle", "width": 600.0, "x1": 50, "x2": 233, "y1": 143, "y2": 293}], "IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["person"], "type": "Rectangle", "width": 600.0, "x1": 164, "x2": 264, "y1": 69, "y2": 285}, {"height": 480.0, "id": 2, "name": 2, "tags": ["default"], "type": "Rectangle", "width": 600.0, "x1": 155, "x2": 196, "y1": 102, "y2": 193}], "IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG": [{"height": 480.0, "id": 1, "name": 1, "tags": ["horse"], "type": "Rectangle", "width": 853.0, "x1": 385, "x2": 644, "y1": 29, "y2": 327}]}, "inputTags": "person,horse,default", "scd": false, "suggestiontype": "track", "tag_colors": ["#e9f1fe", "#33BBFF", "#FFFF19"]}


--------------------------------------------------------------------------------
/test/init_class_get_rows_min.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/init_class_get_rows_min.npy


--------------------------------------------------------------------------------
/test/init_classes_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "classmap": [
 3 |     {
 4 |       "initclass": "1",
 5 |       "map": "person",
 6 |       "balance": "0.8"
 7 |     },
 8 |     {
 9 |       "initclass": "19",
10 |       "map": "horse",
11 |       "balance": "0.2"
12 |     },
13 |     {
14 |       "initclass": "NULL",
15 |       "map": "NULL",
16 |       "balance": "0"
17 |     }
18 |   ],
19 |   "unmapclass":["64","86", "15"],
20 |   "default_class":"default"
21 | }


--------------------------------------------------------------------------------
/test/run_all_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | loader = unittest.TestLoader()
3 | start_dir = '.'
4 | suite = loader.discover(start_dir)
5 | 
6 | runner = unittest.TextTestRunner()
7 | runner.run(suite)


--------------------------------------------------------------------------------
/test/test_create_init_predictions.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import shutil
 3 | import sys
 4 | import  os
 5 | import cv2
 6 | import numpy as np
 7 | from pathlib import Path
 8 | from collections import defaultdict
 9 | import filecmp
10 | 
11 | 
12 | 
13 | # Allow us to import files from "train'
14 | 
15 | train_dir = str(Path.cwd().parent / "train")
16 | if train_dir not in sys.path:
17 |     sys.path.append(train_dir)
18 | from create_predictions import get_suggestions, make_csv_output
19 | from tf_detector import TFDetector
20 | import six.moves.urllib as urllib
21 | import tarfile
22 | TEST_WORKDIR = "test_workdir"
23 | 
24 | class CreateInitPredictionsTestCase(unittest.TestCase):
25 |     def setUp(self):
26 |         DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
27 |         MODEL_NAME = 'faster_rcnn_resnet101_coco_2018_01_28'  # 'ssd_mobilenet_v1_coco_2017_11_17'
28 |         MODEL_FILE = MODEL_NAME + '.tar.gz'
29 |         url = DOWNLOAD_BASE + MODEL_FILE
30 |         MODEL_FILE_DST = os.path.join(TEST_WORKDIR, MODEL_FILE)
31 |         self.froz_graph = os.path.join(TEST_WORKDIR, MODEL_NAME, "frozen_inference_graph.pb")
32 |         if not os.path.exists(self.froz_graph):
33 |             if not os.path.exists(MODEL_FILE_DST):
34 |                 print("Downloading model: ", url)
35 |                 opener = urllib.request.URLopener()
36 |                 opener.retrieve(url, MODEL_FILE_DST)
37 |             print("Unzipping: ", MODEL_FILE_DST)
38 |             tar_file = tarfile.open(MODEL_FILE_DST)
39 |             for file in tar_file.getmembers():
40 |                 file_name = os.path.basename(file.name)
41 |                 if 'frozen_inference_graph.pb' in file_name:
42 |                     tar_file.extract(file, TEST_WORKDIR)
43 | 
44 |     def tearDown(self):
45 |         if os.path.exists("untagged.csv"):
46 |             os.remove("untagged.csv")
47 |         if os.path.exists("tagged_preds.csv"):
48 |             os.remove("tagged_preds.csv")
49 |         #print("TBD tear down")
50 | 
51 |     def test_make_csv_output(self):
52 |         all_predictions = np.load('all_predictions_cow.npy')
53 |         basedir = Path("camera_images")
54 | 
55 |         CV2_COLOR_LOAD_FLAG = 1
56 |         all_image_files = list(basedir.rglob("*.JPG"))
57 |         all_names = []
58 |         all_names += [("camera_images", filename.name) for filename in all_image_files ]
59 | 
60 |         all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
61 |         untagged_output = 'untagged.csv'
62 |         tagged_output = 'tagged_preds.csv'
63 |         already_tagged = defaultdict(set)
64 |         make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged,
65 |                         user_folders = True)
66 | 
67 |         self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_cow.csv'), True, "generated untagged.csv is correct")
68 | 
69 | 
70 |     def test_get_suggestions(self):
71 |         classesIDs = list(range(1, 91))
72 |         classes = [str(x) for x in classesIDs]
73 |         cur_detector = TFDetector(classes, self.froz_graph)
74 |         image_dir = "test_workdir_init_pred"
75 |         untagged_output = 'untagged.csv'
76 |         tagged_output = 'tagged_preds.csv'
77 |         cur_tagged = None
78 |         cur_tagging = None
79 |         get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging,
80 |                         filetype="*.jpg", min_confidence=0.5,
81 |                         user_folders=True)
82 |         self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_cow.csv'), True, "generated untagged.csv is correct")
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/test/test_create_predictions.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import shutil
 3 | import sys
 4 | import  os
 5 | import cv2
 6 | import numpy as np
 7 | from pathlib import Path
 8 | from collections import defaultdict
 9 | import filecmp
10 | import six.moves.urllib as urllib
11 | 
12 | 
13 | 
14 | # Allow us to import files from "train'
15 | 
16 | train_dir = str(Path.cwd().parent / "train")
17 | if train_dir not in sys.path:
18 |     sys.path.append(train_dir)
19 | from create_predictions import get_suggestions, make_csv_output
20 | from tf_detector import TFDetector
21 | 
22 | class CreatePredictionsTestCase(unittest.TestCase):
23 | 
24 |     def setUp(self):
25 |        url = "https://olgaliakrepo.blob.core.windows.net/woodknots/model_knots.pb"
26 |        model_file = "model_knots.pb"
27 |        if not os.path.exists(model_file):
28 |            print("Downloading model: ", url)
29 |            opener = urllib.request.URLopener()
30 |            opener.retrieve(url, model_file)
31 | 
32 |     def tearDown(self):
33 |         if os.path.exists("untagged.csv"):
34 |             os.remove("untagged.csv")
35 |         if os.path.exists("tagged_preds.csv"):
36 |             os.remove("tagged_preds.csv")
37 | 
38 |     def test_make_csv_output(self):
39 |         all_predictions = np.load('all_predictions.npy')
40 |         basedir = Path("board_images_png")
41 |         N_IMAGES = 4
42 |         CV2_COLOR_LOAD_FLAG = 1
43 |         all_image_files = list(basedir.rglob("*.png"))[0:N_IMAGES]
44 |         all_names = []
45 |         all_names += [("board_images_png", filename.name) for filename in all_image_files ]
46 | 
47 |         all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
48 |         untagged_output = 'untagged.csv'
49 |         tagged_output = 'tagged_preds.csv'
50 |         already_tagged = defaultdict(set)
51 |         make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged,
52 |                         user_folders = True)
53 | 
54 |         self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source.csv'), True, "generated untagged.csv is correct")
55 | 
56 |     def test_get_suggestions(self):
57 |         classes = 'knot,defect'
58 |         cur_detector = TFDetector(classes.split(','), 'model_knots.pb')
59 |         image_dir = "test_workdir_train"
60 |         untagged_output = 'untagged.csv'
61 |         tagged_output = 'tagged_preds.csv'
62 |         cur_tagged = None
63 |         cur_tagging = None
64 |         get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging,
65 |                         filetype="*.png", min_confidence=0.5,
66 |                         user_folders=True)
67 |         self.assertEqual(filecmp.cmp('untagged.csv', 'untagged_source.csv'), True, "generated untagged.csv is correct")
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/test/test_download_vott_json.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import shutil
  3 | import sys
  4 | import  os
  5 | from pathlib import Path
  6 | import filecmp
  7 | 
  8 | # Allow us to import utils
  9 | config_dir = str(Path.cwd().parent / "utils")
 10 | if config_dir not in sys.path:
 11 |     sys.path.append(config_dir)
 12 | from config import Config
 13 | 
 14 | tag_dir = str(Path.cwd().parent / "tag")
 15 | if tag_dir not in sys.path:
 16 |     sys.path.append(tag_dir)
 17 | from download_vott_json import create_vott_json, get_top_rows, add_bkg_class_name, parse_class_balance_setting, make_vott_output
 18 | 
 19 | 
 20 | class DownloadVOTTJSONTestCase(unittest.TestCase):
 21 |     def setUp(self):
 22 |         self.config_file = Config.parse_file("testconfig.ini")
 23 | 
 24 |         self.tagging_location = self.config_file["tagging_location"] + "_test"
 25 |         shutil.rmtree(self.tagging_location, ignore_errors=True)
 26 |         self.totag_csv_file_loc = Path(self.config_file["tagging_location"])/"totag.csv"
 27 | 
 28 |         Path(self.config_file["tagging_location"]).mkdir(parents=True, exist_ok=True)
 29 |         self. max_tags_per_pixel = self.config_file.get("max_tags_per_pixel")
 30 |         self.tag_names = self.config_file["classes"].split(",")
 31 |         self.user_folders = self.config_file["user_folders"] == "True"
 32 |         self.pick_max  = self.config_file["pick_max"] == "True"
 33 | 
 34 | 
 35 | 
 36 |     def tearDown(self):
 37 |         shutil.rmtree(self.tagging_location, ignore_errors=True)
 38 |         shutil.rmtree("Images", ignore_errors=True)
 39 | 
 40 |         if os.path.exists("totag.csv"):
 41 |             os.remove("totag.csv")
 42 | 
 43 |         if os.path.exists("tagging.csv"):
 44 |                 os.remove("tagging.csv")
 45 |         if os.path.exists("Images.json"):
 46 |             os.remove("Images.json")
 47 | 
 48 |         print("Tear down")
 49 | 
 50 |     def test_get_top_rows(self):
 51 |         # prepare file
 52 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
 53 | 
 54 |         N_ROWS = 3
 55 |         N_FILES = 3
 56 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'],
 57 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'],
 58 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']],
 59 |                     [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']],
 60 |                     [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488', 'board_images_png', '0.9884343', '0.96366304'],
 61 |                      ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488', 'board_images_png', '0.97863936', '0.96366304'],
 62 |                      ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488', 'board_images_png', '0.96366304', '0.96366304']]]
 63 | 
 64 |         class_balance = "0.7,0.3,0"
 65 | 
 66 |         tag_names = add_bkg_class_name(self.tag_names)
 67 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
 68 | 
 69 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
 70 |                                       self.pick_max, tag_names, ideal_class_balance)
 71 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
 72 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
 73 | 
 74 |     def test_get_top_rows_no_folder(self):
 75 |         # prepare file
 76 |         shutil.copyfile("./totag_no_folder_source.csv", str(self.totag_csv_file_loc))
 77 | 
 78 |         N_ROWS = 3
 79 |         N_FILES = 3
 80 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488',
 81 |                        '0.986', '0.986'],
 82 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488',
 83 |                        '0.986', '0.986'],
 84 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488',
 85 |                        '0.986', '0.986']],
 86 |                     [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488',
 87 |                        '0.98448783', '0.98448783']],
 88 |                     [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488',
 89 |                        '0.9884343', '0.96366304'],
 90 |                      ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488',
 91 |                       '0.97863936', '0.96366304'],
 92 |                      ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488',
 93 |                        '0.96366304', '0.96366304']]]
 94 | 
 95 |         class_balance = "0.7,0.3,0"
 96 |         user_folders = False
 97 | 
 98 |         tag_names = add_bkg_class_name(self.tag_names)
 99 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
100 | 
101 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, user_folders,
102 |                                       self.pick_max, tag_names, ideal_class_balance)
103 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
104 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
105 | 
106 |     def test_get_top_rows_empty_class_balance(self):
107 |         # prepare file
108 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
109 | 
110 |         N_ROWS = 3
111 |         N_FILES = 3
112 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'],
113 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'],
114 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']],
115 |                      [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']],
116 |                      [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'],
117 |                       ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]]
118 |         class_balance = ''
119 |         tag_names = add_bkg_class_name(self.tag_names)
120 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
121 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
122 |                                       self.pick_max, tag_names, ideal_class_balance)
123 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
124 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
125 | 
126 |     def test_get_top_rows_invalid_class_balance1(self):
127 |         # prepare file
128 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
129 | 
130 |         N_ROWS = 3
131 |         N_FILES = 3
132 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'],
133 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'],
134 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']],
135 |                      [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']],
136 |                      [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'],
137 |                       ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]]
138 |         class_balance = 'Random'
139 |         tag_names = add_bkg_class_name(self.tag_names)
140 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
141 | 
142 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
143 |                                       self.pick_max, tag_names, ideal_class_balance)
144 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
145 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
146 | 
147 |     def test_get_top_rows_invalid_class_balance2(self):
148 |         # prepare file
149 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
150 | 
151 |         N_ROWS = 3
152 |         N_FILES = 3
153 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'],
154 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'],
155 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']],
156 |                      [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']],
157 |                      [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488', 'board_images_png', '0.99616516', '0.9843567'],
158 |                       ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488', 'board_images_png', '0.9843567', '0.9843567']]]
159 | 
160 |         class_balance = '0.1, 0.2, 0.3'
161 |         tag_names = add_bkg_class_name(self.tag_names)
162 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
163 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
164 |                                       self.pick_max, tag_names, ideal_class_balance)
165 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
166 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
167 | 
168 |     def test_get_top_rows_class_balance_min(self):
169 |         # prepare file
170 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
171 | 
172 |         N_ROWS = 3
173 |         EXPECTED = [[['st1091.png', 'knot', '0.20989896', '0.251748', '0.34986168', '0.3921352', '512', '488', 'board_images_png', '0.99201256', '0.70161'],
174 |                                      ['st1091.png', 'knot', '0.696119', '0.7461088', '0.27078417', '0.33086362', '512', '488', 'board_images_png', '0.9827361', '0.70161'],
175 |                                      ['st1091.png', 'knot', '0.89531857', '0.93743694', '0.4605299', '0.5066802', '512', '488', 'board_images_png', '0.9794672', '0.70161'],
176 |                                      ['st1091.png', 'defect', '0.7629506', '1.0', '0.6205898', '0.67307687', '512', '488', 'board_images_png', '0.74762243', '0.70161'],
177 |                                      ['st1091.png', 'knot', '0.14214082', '0.247842', '0.7355515', '0.8967391', '512', '488', 'board_images_png', '0.7072498', '0.70161'],
178 |                                      ['st1091.png', 'defect', '0.0', '0.1281265', '0.55038965', '0.59755194', '512', '488', 'board_images_png', '0.70161', '0.70161']],
179 |                                     [['st1185.png', 'knot', '0.6978268', '0.7582275', '0.66821593', '0.7535644', '512', '488', 'board_images_png', '0.97257924', '0.7035888'],
180 |                                      ['st1185.png', 'defect', '0.35780182', '0.60781866', '0.27580062', '0.32093963', '512', '488', 'board_images_png', '0.9720861', '0.7035888'],
181 |                                      ['st1185.png', 'knot', '0.5183983', '0.57071316', '0.84764653', '0.91617334', '512', '488', 'board_images_png', '0.9241496', '0.7035888'],
182 |                                      ['st1185.png', 'knot', '0.55567926', '0.5904746', '0.51832056', '0.5461106', '512', '488', 'board_images_png', '0.7035888', '0.7035888']],
183 |                                     [['st1192.png', 'knot', '0.39846605', '0.45543727', '0.36765742', '0.4488806', '512', '488', 'board_images_png', '0.99612194', '0.7127546'],
184 |                                      ['st1192.png', 'defect', '0.07790943', '0.44866413', '0.5975798', '0.640683', '512', '488', 'board_images_png', '0.80447847', '0.7127546'],
185 |                                      ['st1192.png', 'defect', '0.47953823', '0.7499259', '0.5517361', '0.59940904', '512', '488', 'board_images_png', '0.7127546', '0.7127546']]]
186 | 
187 |         pick_max = False
188 |         class_balance = "0.7,0.3,0"
189 |         tag_names = add_bkg_class_name(self.tag_names)
190 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
191 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
192 |                                       pick_max, tag_names, ideal_class_balance)
193 |         #self.assertEqual(len(all_rows), N_FILES, 'number of rows')
194 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
195 | 
196 |     def test_create_vott_json(self):
197 |         # prepare file
198 |         shutil.copyfile("./totag_source.csv", "./totag.csv")
199 | 
200 |         N_ROWS = 3
201 |         N_FILES = 3
202 |         FOLDER_NAME = "board_images_png"
203 |         class_balance = "0.7,0.3,0"
204 |         tag_names = add_bkg_class_name(self.tag_names)
205 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
206 | 
207 |         create_vott_json(self.totag_csv_file_loc, N_ROWS, self.user_folders,
208 |                          self.pick_max, "",
209 |                          self.tagging_location, blob_credentials=None,
210 |                          tag_names= tag_names,
211 |                          max_tags_per_pixel=self. max_tags_per_pixel,
212 |                          config_class_balance= ideal_class_balance)
213 | 
214 |         res_folder = os.path.join(self.tagging_location, FOLDER_NAME)
215 |         res_immages_cnt = sum([len(files) for r, d, files in os.walk(res_folder)])
216 |         self.assertEqual(N_FILES, res_immages_cnt)
217 | 
218 |     def test_get_top_rows_with_bkg(self):
219 |         # prepare file
220 |         shutil.copyfile("./totag_source.csv", str(self.totag_csv_file_loc))
221 | 
222 |         N_ROWS = 5
223 |         N_FILES = 5
224 |         EXPECTED = [[['st1840.png', 'knot', '0.12036637', '0.18497443', '0.7618415', '0.8283344', '512', '488', 'board_images_png', '0.986', '0.986'],
225 |                      ['st1840.png', 'knot', '0.7297609', '0.7755673', '0.62443626', '0.6670296', '512', '488', 'board_images_png', '0.986', '0.986'],
226 |                      ['st1840.png', 'defect', '0.76513', '0.9952971', '0.6075407', '0.6546806', '512', '488', 'board_images_png', '0.986', '0.986']],
227 |                     [['st1578.png', 'knot', '0.594302', '0.6663906', '0.35276932', '0.43525606', '512', '488', 'board_images_png', '0.98448783', '0.98448783']],
228 |                     [['st1611.png', 'knot', '0.6326234', '0.7054164', '0.86741334', '0.96444726', '512', '488',
229 |                       'board_images_png', '0.99616516', '0.9843567'],
230 |                      ['st1611.png', 'knot', '0.07399843', '0.11282173', '0.32572043', '0.36819047', '512', '488',
231 |                       'board_images_png', '0.9843567', '0.9843567']],
232 |                     [['st1026.png', 'knot', '0.2674017', '0.35383838', '0.39859554', '0.50976944', '512', '488', 'board_images_png', '0.9884343', '0.96366304'],
233 |                      ['st1026.png', 'knot', '0.69417506', '0.744075', '0.34379873', '0.39051458', '512', '488', 'board_images_png', '0.97863936', '0.96366304'],
234 |                      ['st1026.png', 'defect', '0.70078284', '0.9907891', '0.5857268', '0.6470487', '512', '488', 'board_images_png', '0.96366304', '0.96366304']],
235 |                     [['st1524.png', 'NULL', '0', '0', '0', '0', '512', '488', 'board_images_png', '0', '0.05']]]
236 | 
237 |         class_balance = "0.6, 0.29, 0.11"
238 |         tag_names = add_bkg_class_name(self.tag_names)
239 |         ideal_class_balance = parse_class_balance_setting(class_balance, len(tag_names))
240 | 
241 |         all_rows, _, _ = get_top_rows(self.totag_csv_file_loc, N_ROWS, self.user_folders,
242 |                                       self.pick_max, tag_names, ideal_class_balance)
243 |         self.assertEqual(len(all_rows), N_FILES, 'number of rows')
244 |         self.assertEqual(all_rows, EXPECTED, 'raw values')
245 | 
246 |     def test_create_vott_json(self):
247 |         # prepare file
248 |         shutil.copyfile("./totag_source2.csv", "totag.csv")
249 | 
250 |         csv_file_loc = Path('.')
251 |         N_IMAGES = 4
252 |         user_folders = False
253 |         pick_max = True
254 |         tagging_location = "."
255 |         tag_names = add_bkg_class_name(self.tag_names)
256 |         ideal_class_balance = parse_class_balance_setting(None, len(tag_names))
257 |         create_vott_json(csv_file_loc, N_IMAGES, user_folders,
258 |                          pick_max, "board_images_png",
259 |                          tagging_location, blob_credentials = None,
260 |                          tag_names=tag_names,
261 |                          max_tags_per_pixel= 2,
262 |                          config_class_balance=ideal_class_balance,
263 |                          colors = ["#e9f1fe", "#f3e9ff"])
264 |         self.assertEqual(filecmp.cmp('Images.json', 'Images_source.json'), True, "generated VOTT json is correct")
265 | 
266 | 
267 | 
268 | if __name__ == '__main__':
269 |     unittest.main()
270 | 


--------------------------------------------------------------------------------
/test/test_init_download_vott_json.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import shutil
  3 | import sys
  4 | import  os
  5 | from pathlib import Path
  6 | import filecmp
  7 | import json
  8 | import numpy as np
  9 | 
 10 | # Allow us to import utils
 11 | config_dir = str(Path.cwd().parent / "utils")
 12 | if config_dir not in sys.path:
 13 |     sys.path.append(config_dir)
 14 | from config import Config
 15 | 
 16 | tag_dir = str(Path.cwd().parent / "tag")
 17 | if tag_dir not in sys.path:
 18 |     sys.path.append(tag_dir)
 19 | from download_vott_json import create_init_vott_json, create_vott_json, get_top_rows, filter_top, add_bkg_class_name, remove_bkg_class_name, parse_class_balance_setting
 20 | 
 21 | 
 22 | class DownloadInitVOTTJSONTestCase(unittest.TestCase):
 23 |     def setUp(self):
 24 |         self.config_file = Config.parse_file("testconfig.ini")
 25 | 
 26 |         self.tagging_location = self.config_file["tagging_location"] + "_test"
 27 |         shutil.rmtree(self.tagging_location, ignore_errors=True)
 28 |         self.csv_file_loc = Path(self.config_file["tagging_location"])
 29 | 
 30 |         self.csv_file_loc.mkdir(parents=True, exist_ok=True)
 31 |         self. max_tags_per_pixel = self.config_file.get("max_tags_per_pixel")
 32 |         self.tag_names = self.config_file["classes"].split(",")
 33 |         self.user_folders = self.config_file["user_folders"] == "True"
 34 |         self.pick_max  = self.config_file["pick_max"] == "True"
 35 | 
 36 | 
 37 | 
 38 |     def tearDown(self):
 39 |         shutil.rmtree(self.tagging_location, ignore_errors=True)
 40 |         shutil.rmtree("Images", ignore_errors=True)
 41 | 
 42 |         shutil.rmtree("test_workdir/camera_images", ignore_errors=True)
 43 |         shutil.rmtree("test_workdir90", ignore_errors=True)
 44 |         if os.path.exists(r"test_workdir/camera_images.json"):
 45 |             os.remove(r"test_workdir/camera_images.json")
 46 | 
 47 |         if os.path.exists("totag.csv"):
 48 |             os.remove("totag.csv")
 49 | 
 50 |         if os.path.exists("tagging.csv"):
 51 |                 os.remove("tagging.csv")
 52 |         if os.path.exists("Images.json"):
 53 |             os.remove("Images.json")
 54 | 
 55 |         if os.path.exists("init_totag.csv"):
 56 |             os.remove("init_totag.csv")
 57 | 
 58 |         print("Tear down")
 59 | 
 60 | 
 61 |     def test_create_vott_json_90(self):
 62 |         # prepare file
 63 |         shutil.copyfile("./untagged_cow.csv", "totag.csv")
 64 | 
 65 |         csv_file_loc = Path('.')
 66 |         FOLDER = "camera_images"
 67 |         N_IMAGES = sum([len(files) for r, d, files in os.walk(FOLDER)])
 68 |         user_folders = False
 69 |         pick_max = True
 70 |         tagging_location = "."
 71 |         tagging_location = "test_workdir90"
 72 |         classesIDs =  [str(i) for i in (range(1, 91))]
 73 |         tag_names = add_bkg_class_name(classesIDs)
 74 | 
 75 |         ideal_class_balance = parse_class_balance_setting(None, len(tag_names))
 76 |         create_vott_json(csv_file_loc, N_IMAGES, user_folders,
 77 |                          pick_max, FOLDER,
 78 |                          tagging_location, blob_credentials = None,
 79 |                          tag_names= tag_names,
 80 |                          max_tags_per_pixel= 2,
 81 |                          config_class_balance=ideal_class_balance
 82 |                          )
 83 | 
 84 | 
 85 | 
 86 |         self.assertEqual(filecmp.cmp(os.path.join(tagging_location, 'Images.json'), 'Images_source_workdir90.json'), True, "generated VOTT json is correct")
 87 | 
 88 |     def test_get_filtered(self):
 89 |         shutil.copyfile("./untagged_cow.csv", "init_totag.csv")
 90 |         json_fn = "init_classes_map.json"
 91 |         json_config = None
 92 |         with open(json_fn, "r") as read_file:
 93 |             json_config = json.load(read_file)
 94 |         classmap = json_config["classmap"]
 95 |         ideal_balance_list = []
 96 |         new_tag_names = []
 97 |         init_tag_names = []
 98 |         class_map_dict = {}
 99 |         for m in classmap:
100 |             ideal_balance_list.append(m['balance'])
101 |             new_tag_names.append(m['map'])
102 |             init_tag_names.append(m['initclass'])
103 |             class_map_dict[m['initclass']] = m['map']
104 |         ideal_balance = ','.join(ideal_balance_list)
105 |         unmapclass_list = json_config["unmapclass"]
106 |         default_class = json_config["default_class"]
107 |         file_location_totag = Path('.')/"init_totag.csv"
108 |         new_tag_names = add_bkg_class_name(new_tag_names)
109 |         ideal_class_balance = parse_class_balance_setting(ideal_balance, len(new_tag_names))
110 | 
111 |         rows, _, _ = get_top_rows(file_location_totag, 10,  True,  False,
112 |                              init_tag_names,  ideal_class_balance,
113 |                             filter_top,
114 |                             unmapclass_list, init_tag_names, class_map_dict, default_class)
115 | 
116 |         expected_rows = np.load("init_class_get_rows_min.npy")
117 |         self.assertEqual((rows == expected_rows).all(), True)
118 |         print("")
119 | 
120 |     def test_create_vott_json(self):
121 |         # prepare file
122 |         shutil.copyfile("./untagged_cow.csv", "init_totag.csv")
123 | 
124 |         csv_file_loc = Path('.')
125 |         FOLDER = "camera_images"
126 |         N_IMAGES =  10
127 |         user_folders = True
128 |         pick_max = False
129 |         tagging_location = "test_workdir"
130 | 
131 |         json_fn = "init_classes_map.json"
132 |         json_config = None
133 |         with open(json_fn, "r") as read_file:
134 |             json_config = json.load(read_file)
135 |         classmap = json_config["classmap"]
136 |         ideal_balance_list = []
137 |         new_tag_names = []
138 |         init_tag_names = []
139 |         class_map_dict = {}
140 |         for m in classmap:
141 |             ideal_balance_list.append(m['balance'])
142 |             new_tag_names.append(m['map'])
143 |             init_tag_names.append(m['initclass'])
144 |             class_map_dict[m['initclass']] = m['map']
145 | 
146 |         unmapclass_list = json_config["unmapclass"]
147 |         default_class = json_config["default_class"]
148 |         ideal_balance = ','.join(ideal_balance_list)
149 |         new_tag_names.append(default_class)
150 |         new_tag_names = remove_bkg_class_name(new_tag_names)
151 |         ideal_class_balance = parse_class_balance_setting(ideal_balance, len(init_tag_names))
152 | 
153 |         create_init_vott_json(csv_file_loc , N_IMAGES, user_folders,
154 |                          pick_max,
155 |                          "", #image loc
156 |                          tagging_location,
157 |                          None, #blob creds
158 |                          init_tag_names,
159 |                          new_tag_names,
160 |                          2, #max pix
161 |                          ideal_class_balance,
162 |                          ["#e9f1fe", "#33BBFF", "#FFFF19"], #colors
163 |                          unmapclass_list, init_tag_names, class_map_dict, default_class )
164 | 
165 |         self.assertEqual(filecmp.cmp(os.path.join( tagging_location, FOLDER +'.json'),   FOLDER + '_source.json'), True, "generated VOTT json is correct")
166 | 
167 | if __name__ == '__main__':
168 |     unittest.main()
169 | 


--------------------------------------------------------------------------------
/test/test_init_tf_detector.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import shutil
 3 | import sys
 4 | import  os
 5 | import numpy as np
 6 | import cv2
 7 | from pathlib import Path
 8 | import six.moves.urllib as urllib
 9 | import tarfile
10 | 
11 | 
12 | # Allow us to import utils
13 | config_dir = str(Path.cwd().parent / "utils")
14 | if config_dir not in sys.path:
15 |     sys.path.append(config_dir)
16 | from config import Config
17 | 
18 | train_dir = str(Path.cwd().parent / "train")
19 | if train_dir not in sys.path:
20 |     sys.path.append(train_dir)
21 | from tf_detector import TFDetector
22 | 
23 | TEST_WORKDIR = "test_workdir"
24 | 
25 | class TFDetectorTestCase(unittest.TestCase):
26 |     def setUp(self):
27 |         DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
28 |         MODEL_NAME = 'faster_rcnn_resnet101_coco_2018_01_28'  # 'ssd_mobilenet_v1_coco_2017_11_17'
29 |         MODEL_FILE = MODEL_NAME + '.tar.gz'
30 |         url = DOWNLOAD_BASE + MODEL_FILE
31 |         MODEL_FILE_DST = os.path.join(TEST_WORKDIR, MODEL_FILE)
32 |         self.froz_graph = os.path.join(TEST_WORKDIR, MODEL_NAME, "frozen_inference_graph.pb")
33 |         if not os.path.exists(self.froz_graph):
34 |             if not os.path.exists(MODEL_FILE_DST):
35 |                 print("Downloading model: ", url)
36 |                 opener = urllib.request.URLopener()
37 |                 opener.retrieve(url, MODEL_FILE_DST)
38 |             print("Unzipping: ", MODEL_FILE_DST)
39 |             tar_file = tarfile.open(MODEL_FILE_DST)
40 |             for file in tar_file.getmembers():
41 |                 file_name = os.path.basename(file.name)
42 |                 if 'frozen_inference_graph.pb' in file_name:
43 |                     tar_file.extract(file, TEST_WORKDIR)
44 | 
45 |     def tearDown(self):
46 |         #shutil.rmtree(self.tagging_location, ignore_errors=True)
47 |         print("Tear down")
48 | 
49 |     def test_predict(self):
50 |         classesIDs = list(range(1,91))
51 |         classes = ','.join(str(x) for x in classesIDs )
52 |         detector = TFDetector(classes.split(','),self.froz_graph)
53 |         basedir = Path("camera_images")
54 | 
55 |         all_image_files = list(basedir.rglob("*.JPG"))
56 |         image_size = (1000,750)
57 |         NUM_CHANNELS = 3
58 |         CV2_COLOR_LOAD_FLAG = 1
59 |         all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
60 |         for curindex, image in enumerate(all_image_files):
61 |             all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size)
62 |         all_predictions = detector.predict(all_images, min_confidence=0.5)
63 | 
64 |         self.assertEqual(len(all_predictions), len(all_image_files))
65 | 
66 |         expected_allpred = np.load('all_predictions_cow.npy')
67 | 
68 |         self.assertEqual((all_predictions == expected_allpred).all(), True,
69 |                          "(expected_allpred == all_predictions).all()")
70 | 
71 |         #np.save('all_predictions_cow', all_predictions)
72 | 
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     unittest.main()
77 | 


--------------------------------------------------------------------------------
/test/test_make_vott_output.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import shutil
 3 | import sys
 4 | import  os
 5 | from pathlib import Path
 6 | import filecmp
 7 | from azure.storage.blob import BlockBlobService
 8 | 
 9 | # Allow us to import utils
10 | config_dir = str(Path.cwd().parent / "utils")
11 | if config_dir not in sys.path:
12 |     sys.path.append(config_dir)
13 | from config import Config
14 | 
15 | tag_dir = str(Path.cwd().parent / "tag")
16 | if tag_dir not in sys.path:
17 |     sys.path.append(tag_dir)
18 | from download_vott_json import create_vott_json, get_top_rows, add_bkg_class_name, parse_class_balance_setting, make_vott_output
19 | 
20 | class MakeVOTTOutputTestCase(unittest.TestCase):
21 |     def setUp(self):
22 |         print("no-op")
23 | 
24 |     # Uncomment code below for "ond-demand' VOTT json creaation using data on blob storage
25 |     # def test_download_catdata(self):
26 |     #     #dowload data from tagged_Abram_small
27 |     #     config_file = Config.parse_file( r'../workconfig.ini')
28 |     #
29 |     #     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"],
30 |     #                                           account_key=config_file["AZURE_STORAGE_KEY"])
31 |     #     container_name = config_file["image_container_name"]
32 |     #     file_location = Path('D://temp')
33 |     #     image_loc = 'D://temp'
34 |     #
35 |     #     file_location_totag = (file_location / "totag.csv")
36 |     #     create_vott_json(file_location, num_rows=1024, user_folders = True, pick_max = True, image_loc = "", output_location = file_location,
37 |     #                      blob_credentials=(block_blob_service, container_name),
38 |     #                      tag_names=["human","iguana"], max_tags_per_pixel=None, config_class_balance=None, colors=None)
39 |     #     self.assertEqual(True, True)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/test/test_tf_detector.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import shutil
 3 | import sys
 4 | import  os
 5 | import numpy as np
 6 | import cv2
 7 | from pathlib import Path
 8 | import six.moves.urllib as urllib
 9 | 
10 | 
11 | # Allow us to import utils
12 | config_dir = str(Path.cwd().parent / "utils")
13 | if config_dir not in sys.path:
14 |     sys.path.append(config_dir)
15 | from config import Config
16 | 
17 | train_dir = str(Path.cwd().parent / "train")
18 | if train_dir not in sys.path:
19 |     sys.path.append(train_dir)
20 | from tf_detector import TFDetector
21 | 
22 | class TFDetectorTestCase(unittest.TestCase):
23 |     def setUp(self):
24 |         url = "https://olgaliakrepo.blob.core.windows.net/woodknots/model_knots.pb"
25 |         model_file = "model_knots.pb"
26 |         if not os.path.exists(model_file):
27 |             print("Downloading model: ", url)
28 |             opener = urllib.request.URLopener()
29 |             opener.retrieve(url, model_file)
30 | 
31 |     def tearDown(self):
32 |         #shutil.rmtree(self.tagging_location, ignore_errors=True)
33 |         print("Tear down")
34 | 
35 |     def test_predict(self):
36 |         classes = 'knot,defect'
37 |         detector = TFDetector(classes.split(','),'model_knots.pb')
38 | 
39 |         basedir = Path("board_images_png")
40 |         N_IMAGES = 4
41 |         all_image_files = list(basedir.rglob("*.png"))[0:N_IMAGES]
42 |         image_size = (1000,750)
43 |         NUM_CHANNELS = 3
44 |         CV2_COLOR_LOAD_FLAG = 1
45 |         all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
46 |         for curindex, image in enumerate(all_image_files):
47 |             all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size)
48 |         all_predictions = detector.predict(all_images, min_confidence=0.5)
49 | 
50 |         self.assertEqual(len(all_predictions), N_IMAGES)
51 | 
52 |         expected_allpred = np.load('all_predictions.npy')
53 |         self.assertEqual((all_predictions == expected_allpred).all(), True, "(expected_allpred == all_predictions).all()")
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     unittest.main()
58 | 


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG


--------------------------------------------------------------------------------
/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_init_pred/camera_images/IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG


--------------------------------------------------------------------------------
/test/test_workdir_train/board_images_png/st1026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_train/board_images_png/st1026.png


--------------------------------------------------------------------------------
/test/test_workdir_train/board_images_png/st1194.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_train/board_images_png/st1194.png


--------------------------------------------------------------------------------
/test/test_workdir_train/board_images_png/st1578.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_train/board_images_png/st1578.png


--------------------------------------------------------------------------------
/test/test_workdir_train/board_images_png/st1611.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olgaliak/active-learning-detect/58b17e37271db29ee16fb8d65c0b6a529d8a2c6c/test/test_workdir_train/board_images_png/st1611.png


--------------------------------------------------------------------------------
/test/testconfig.ini:
--------------------------------------------------------------------------------
 1 | # AZURE STORAGE ACCOUNT INFORMATION
 2 | AZURE_STORAGE_ACCOUNT=test
 3 | AZURE_STORAGE_KEY=test==
 4 | image_container_name=testimages
 5 | label_container_name=testabels
 6 | # IMAGE INFORMATION
 7 | user_folders=True
 8 | classes=knot,defect
 9 | ideal_class_balance=0.7,0.3,0
10 | filetype=*.png
11 | # TAGGING MACHINE
12 | tagging_location=D:\\temp\\test
13 | pick_max=True
14 | max_tags_per_pixel=2
15 | # CUSTOM VISION
16 | #training_key=<CUSTOM VISION TRAINING KEY>
17 | #prediction_key=<CUSTOM VISION PREDICTION KEY>
18 | #project_id=<CUSTOM VISION PROJECT ID>
19 | # TRAINING MACHINE
20 | # Locations
21 | python_file_directory=/home/olgali/repos/active-learning-detect/train
22 | data_dir=/home/olgali/ActiveLearning/Data_cat
23 | train_dir=/home/olgali/ActiveLearning/training_cat
24 | inference_output_dir=cat_inference_graphs
25 | tf_models_location=/home/olgali/repos/tf_model/models/research
26 | download_location=/home/olgali/downloads_cat
27 | # Training
28 | train_iterations=2000
29 | eval_iterations=10
30 | min_confidence=.5
31 | test_percentage=.2
32 | model_name=faster_rcnn_resnet50_coco_2018_01_28
33 | optional_pipeline_url=https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/samples/configs/faster_rcnn_resnet50_pets.config
34 | #Init Predictions
35 | init_model_name=faster_rcnn_resnet101_coco_2018_01_28
36 | # Config File Details
37 | old_label_path=PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt
38 | old_train_path=PATH_TO_BE_CONFIGURED/pet_faces_train.record-?????-of-00010
39 | old_val_path=PATH_TO_BE_CONFIGURED/pet_faces_val.record-?????-of-00010
40 | old_checkpoint_path=PATH_TO_BE_CONFIGURED/model.ckpt
41 | num_examples_marker=num_examples:
42 | num_steps_marker=num_steps:
43 | num_classes_marker=num_classes:
44 | # Calculated
45 | num_classes="$(awk -F ',' '{print NF}' <<< ${classes})"
46 | image_dir=${data_dir}/AllImages
47 | untagged_output=${data_dir}/untagged.csv
48 | tagged_output=${data_dir}/tagged.csv
49 | tagged_predictions=${data_dir}/tagged_preds.csv
50 | test_output=${data_dir}/test.csv
51 | validation_output=${data_dir}/val.csv
52 | tf_location_legacy=${tf_models_location}/object_detection/legacy
53 | tf_location=${tf_models_location}/object_detection
54 | PYTHONPATH=$PYTHONPATH:${tf_models_location}:${tf_models_location}/slim/
55 | label_map_path=${data_dir}/pascal_label_map.pbtxt
56 | tf_record_location=${data_dir}/stamps.record
57 | tf_train_record=${tf_record_location%.*}_train.${tf_record_location##*.}
58 | tf_val_record=${tf_record_location%.*}_val.${tf_record_location##*.}
59 | tf_url=http://download.tensorflow.org/models/object_detection/${model_name}.tar.gz
60 | pipeline_file=${download_location}/${model_name}/pipeline.config
61 | fine_tune_checkpoint=${download_location}/${model_name}/model.ckpt
62 | init_model_graph=${download_location}/${init_model_name}/frozen_inference_graph.pb
63 | 


--------------------------------------------------------------------------------
/test/totag_source2.csv:
--------------------------------------------------------------------------------
 1 | filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence
 2 | st1026.png,knot,0.69150746,0.7407868,0.3375946,0.39474854,512,488,board_images_png,0.9990602,0.54169416
 3 | st1026.png,knot,0.29255274,0.37531677,0.41773036,0.48604906,512,488,board_images_png,0.74185294,0.54169416
 4 | st1026.png,knot,0.29603952,0.35703427,0.40142354,0.49790853,512,488,board_images_png,0.54169416,0.54169416
 5 | st1578.png,knot,0.54391885,0.60184073,0.7846939,0.85633487,512,488,board_images_png,0.9994636,0.9942725
 6 | st1578.png,knot,0.60079277,0.6762777,0.36906424,0.4369791,512,488,board_images_png,0.9942725,0.9942725
 7 | st1611.png,knot,0.65116334,0.7139255,0.86043906,0.9666604,512,488,board_images_png,0.99822897,0.9488958
 8 | st1611.png,knot,0.07768918,0.1141083,0.332125,0.36988598,512,488,board_images_png,0.9488958,0.9488958
 9 | st1840.png,knot,0.12473148,0.18879795,0.76679623,0.8271259,512,488,board_images_png,0.99770314,0.5600077
10 | st1840.png,knot,0.72636276,0.7735574,0.62331045,0.66846347,512,488,board_images_png,0.98784816,0.5600077
11 | st1840.png,knot,0.59846735,0.6426683,0.2617435,0.32195628,512,488,board_images_png,0.5600077,0.5600077


--------------------------------------------------------------------------------
/test/untagged_cow.csv:
--------------------------------------------------------------------------------
 1 | filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence
 2 | IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,1,0.0,0.31701845,0.0077313487,0.55989933,480,600,camera_images,0.99540824,0.7061533
 3 | IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,1,0.25898764,0.5642854,0.104834646,0.53862405,480,600,camera_images,0.9025605,0.7061533
 4 | IC_Cabritos_2018_R1_cam01_20150421_200634_SUNP0063.JPG,35,0.18555662,0.5546974,0.48929358,0.5928695,480,600,camera_images,0.7061533,0.7061533
 5 | IC_Cabritos_2018_R1_cam01_20150510_022358_SUNP0017.JPG,19,0.08357386,0.3895763,0.29830444,0.6114975,480,600,camera_images,0.90615094,0.90615094
 6 | IC_Cabritos_2018_R1_cam01_20150510_114950_SUNP0020.JPG,NULL,0,0,0,0,480,600,camera_images,0,0.0
 7 | IC_Cabritos_2018_R1_cam01_20150516_201936_SUNP0036.JPG,16,0.48581073,0.9471678,0.46597427,0.9487598,480,600,camera_images,0.6689594,0.6689594
 8 | IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG,1,0.2735326,0.44143108,0.14391276,0.59441584,480,600,camera_images,0.9997955,0.9007767
 9 | IC_Cabritos_2018_R1_cam01_20150604_090322_SUNP0070.JPG,27,0.25880748,0.32737294,0.2144454,0.40248972,480,600,camera_images,0.9007767,0.9007767
10 | IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG,16,0.0,0.7922343,0.2661451,0.80265963,480,600,camera_images,0.849446,0.7052012
11 | IC_Cabritos_2018_R1_cam01_20151027_170516_SUNP0014.JPG,22,0.044430044,0.7727591,0.2618165,0.8102997,480,600,camera_images,0.7052012,0.7052012
12 | IC_Mona_2018_cam1A_20150522_164552_IMG_0856.JPG,NULL,0,0,0,0,480,640,camera_images,0,0.0
13 | IC_Mona_2018_cam20_20141121_112319_IMG_0091.JPG,19,0.02553936,0.47359988,0.02941447,0.93185467,480,853,camera_images,0.9975829,0.9975829
14 | IC_Mona_2018_cam20_20141124_180256_IMG_0101.JPG,19,0.45203093,0.75573874,0.061686885,0.681505,480,853,camera_images,0.9460018,0.9460018
15 | IC_Mona_2018_cam20_20141206_191101_IMG_0153.JPG,18,0.5594412,0.8323967,0.2793807,0.6407886,480,853,camera_images,0.7723789,0.7723789
16 | 


--------------------------------------------------------------------------------
/test/untagged_source.csv:
--------------------------------------------------------------------------------
 1 | filename,class,xmin,xmax,ymin,ymax,height,width,folder,box_confidence,image_confidence
 2 | st1026.png,knot,0.69150746,0.7407868,0.3375946,0.39474854,512,488,board_images_png,0.9990602,0.54169416
 3 | st1026.png,knot,0.29255274,0.37531677,0.41773036,0.48604906,512,488,board_images_png,0.74185294,0.54169416
 4 | st1026.png,knot,0.29603952,0.35703427,0.40142354,0.49790853,512,488,board_images_png,0.54169416,0.54169416
 5 | st1194.png,knot,0.6518282,0.70353997,0.7374667,0.80387944,512,488,board_images_png,0.99921286,0.99921286
 6 | st1578.png,knot,0.54391885,0.60184073,0.7846939,0.85633487,512,488,board_images_png,0.9994636,0.9942725
 7 | st1578.png,knot,0.60079277,0.6762777,0.36906424,0.4369791,512,488,board_images_png,0.9942725,0.9942725
 8 | st1611.png,knot,0.65116334,0.7139255,0.86043906,0.9666604,512,488,board_images_png,0.99822897,0.9488958
 9 | st1611.png,knot,0.07768918,0.1141083,0.332125,0.36988598,512,488,board_images_png,0.9488958,0.9488958
10 | 


--------------------------------------------------------------------------------
/train/active_learning_init_pred.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Source environmental variables
 3 | set -a
 4 | sed -i 's/\r//g' $1
 5 | . $1
 6 | set +a
 7 | # Updating vars in config file
 8 | envsubst < $1 > cur_config.ini
 9 | # Update images from blob storage
10 | echo "Updating Blob Folder"
11 | python ${python_file_directory}/update_blob_folder.py cur_config.ini
12 | # Create TFRecord from images + csv file on blob storage
13 | echo "Download MS COCO tf model if it doesn't exist"
14 | # Download tf model if it doesn't exist
15 | if [ ! -d "$download_location/${init_model_name}" ]; then
16 |   mkdir -p $download_location
17 |   curl $init_pred_tf_url --create-dirs -o ${download_location}/${init_model_name}.tar.gz
18 |   tar -xzf ${download_location}/${init_model_name}.tar.gz -C $download_location
19 | fi
20 | 
21 | 
22 | echo "Running pretratined model on the images"
23 | python ${python_file_directory}/create_predictions.py cur_config.ini init_pred $init_model_graph
24 | # Rename predictions and inference graph based on timestamp and upload
25 | echo "Uploading new data"
26 | 
27 | az storage blob upload --container-name $label_container_name --file $untagged_output --name init_totag_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY


--------------------------------------------------------------------------------
/train/active_learning_initialize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Source environmental variables
 3 | set -a
 4 | sed -i 's/\r//g' $1
 5 | . $1
 6 | set +a
 7 | # Make all necessary directories
 8 | mkdir -p $image_dir
 9 | # Download all images
10 | az storage blob download-batch --source $image_container_name --destination $image_dir
11 | # Create TFRecord from images + csv file on blob storage
12 | # TODO: Try to import create_predictions into this
13 | envsubst < $1 > cur_config.ini
14 | python ${python_file_directory}/initialize_vott_pull.py cur_config.ini 
15 | 


--------------------------------------------------------------------------------
/train/active_learning_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Source environmental variables
 3 | set -a
 4 | sed -i 's/\r//g' $1
 5 | . $1
 6 | set +a
 7 | # Updating vars in config file
 8 | envsubst < $1 > cur_config.ini
 9 | # Update images from blob storage
10 | echo "Updating Blob Folder"
11 | python ${python_file_directory}/update_blob_folder.py cur_config.ini
12 | # Create TFRecord from images + csv file on blob storage
13 | echo "Creating TF Record"
14 | python ${python_file_directory}/convert_tf_record.py cur_config.ini
15 | # Download tf model if it doesn't exist
16 | if [ ! -d "$download_location/${model_name}" ]; then
17 |   mkdir -p $download_location
18 |   curl $tf_url --create-dirs -o ${download_location}/${model_name}.tar.gz
19 |   tar -xzf ${download_location}/${model_name}.tar.gz -C $download_location
20 | fi
21 | if [ ! -z "$optional_pipeline_url" ]; then
22 |   curl $optional_pipeline_url -o $pipeline_file
23 | elif [ ! -f $pipeline_file ]; then
24 |   cp ${download_location}/${model_name}/pipeline.config $pipeline_file
25 | fi
26 | echo "Making pipeline file from env vars"
27 | temp_pipeline=${pipeline_file%.*}_temp.${pipeline_file##*.}
28 | sed "s/${old_label_path//\//\\/}/${label_map_path//\//\\/}/g" $pipeline_file > $temp_pipeline
29 | sed -i "s/${old_train_path//\//\\/}/${tf_train_record//\//\\/}/g" $temp_pipeline
30 | sed -i "s/${old_val_path//\//\\/}/${tf_val_record//\//\\/}/g" $temp_pipeline
31 | sed -i "s/${old_checkpoint_path//\//\\/}/${fine_tune_checkpoint//\//\\/}/g" $temp_pipeline
32 | sed -i "s/$num_steps_marker[[:space:]]*[[:digit:]]*/$num_steps_marker $train_iterations/g" $temp_pipeline
33 | sed -i "s/$num_examples_marker[[:space:]]*[[:digit:]]*/$num_examples_marker $eval_iterations/g" $temp_pipeline
34 | sed -i "s/$num_classes_marker[[:space:]]*[[:digit:]]*/$num_classes_marker $num_classes/g" $temp_pipeline
35 | # Train model on TFRecord
36 | echo "Training model"
37 | rm -rf $train_dir
38 | python ${tf_location_legacy}/train.py --train_dir=$train_dir --pipeline_config_path=$temp_pipeline --logtostderr
39 | # Export inference graph of model
40 | echo "Exporting inference graph"
41 | rm -rf $inference_output_dir
42 | python ${tf_location}/export_inference_graph.py --input_type "image_tensor" --pipeline_config_path "$temp_pipeline" --trained_checkpoint_prefix "${train_dir}/model.ckpt-$train_iterations" --output_directory "$inference_output_dir"
43 | # TODO: Validation on Model, keep track of MAP etc.
44 | # Use inference graph to create predictions on untagged images
45 | echo "Creating new predictions"
46 | python ${python_file_directory}/create_predictions.py cur_config.ini
47 | echo "Calculating performance"
48 | python ${python_file_directory}/map_validation.py cur_config.ini
49 | # Rename predictions and inference graph based on timestamp and upload
50 | echo "Uploading new data"
51 | az storage blob upload --container-name $label_container_name --file ${inference_output_dir}/frozen_inference_graph.pb --name model_$(date +%s).pb  --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY
52 | az storage blob upload --container-name $label_container_name --file $untagged_output --name totag_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY
53 | az storage blob upload --container-name $label_container_name --file $validation_output --name performance_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY
54 | 


--------------------------------------------------------------------------------
/train/convert_tf_record.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import csv
  5 | import hashlib
  6 | from pathlib import Path
  7 | import re
  8 | 
  9 | FOLDER_LOCATION = 8
 10 | HEIGHT_LOCATION = 6
 11 | WIDTH_LOCATION = 7
 12 | 
 13 | def int64_feature(value):
 14 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 15 | 
 16 | def bytes_feature(value):
 17 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
 18 | 
 19 | def float_feature(value):
 20 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 21 | 
 22 | def create_tf_example(predictions, raw_img, tag_map):
 23 |     filename = predictions[0][0]
 24 |     height = int(predictions[0][HEIGHT_LOCATION])
 25 |     width = int(predictions[0][WIDTH_LOCATION])
 26 |     key = hashlib.sha256(raw_img).hexdigest()
 27 |     xmin = []
 28 |     ymin = []
 29 |     xmax = []
 30 |     ymax = []
 31 |     classes = []
 32 |     classes_text = []
 33 |     truncated = []
 34 |     poses = []
 35 |     difficult_obj = []
 36 |     for prediction in predictions:
 37 |         if prediction[1]!="NULL":
 38 |             ymin.append(float(prediction[4]))
 39 |             xmin.append(float(prediction[2]))
 40 |             ymax.append(float(prediction[5]))
 41 |             xmax.append(float(prediction[3]))
 42 |             tag_name = prediction[1]
 43 |             classes_text.append(tag_name.encode('utf8'))
 44 |             classes.append(tag_map[tag_name])
 45 |             truncated.append(0)
 46 |             poses.append("Unspecified".encode('utf8'))
 47 |             difficult_obj.append(0)
 48 |     
 49 |     example = tf.train.Example(features=tf.train.Features(feature={
 50 |       'image/height': int64_feature([height]),
 51 |       'image/width': int64_feature([width]),
 52 |       'image/filename': bytes_feature([
 53 |           filename.encode('utf8')]),
 54 |       'image/source_id': bytes_feature([
 55 |           filename.encode('utf8')]),
 56 |       'image/key/sha256': bytes_feature([key.encode('utf8')]),
 57 |       'image/encoded': bytes_feature([raw_img]),
 58 |       'image/format': bytes_feature(['jpeg'.encode('utf8')]),
 59 |       'image/object/bbox/xmin': float_feature(xmin),
 60 |       'image/object/bbox/xmax': float_feature(xmax),
 61 |       'image/object/bbox/ymin': float_feature(ymin),
 62 |       'image/object/bbox/ymax': float_feature(ymax),
 63 |       'image/object/class/text': bytes_feature(classes_text),
 64 |       'image/object/class/label': int64_feature(classes),
 65 |       'image/object/difficult': int64_feature(difficult_obj),
 66 |       'image/object/truncated': int64_feature(truncated),
 67 |       'image/object/view': bytes_feature(poses),
 68 |     }))
 69 |     return example
 70 | 
 71 | def create_tf_record(pred_file, record_file, image_loc, user_folders, split_names=["train","val"],
 72 |         split_percent=[.7,.3], tag_names = ["stamp"], test_file=None):
 73 |     
 74 |     record_file = Path(record_file)
 75 |     with open(pred_file, 'r') as file:
 76 |         reader = csv.reader(file)
 77 |         next(reader, None)
 78 |         all_preds = list(reader)
 79 | 
 80 |     all_files = defaultdict(list)
 81 |     if test_file is not None:
 82 |         with open(test_file, 'r') as file:
 83 |             reader = csv.reader(file)
 84 |             next(reader, None)
 85 |             all_test = set((row[0] for row in reader))
 86 |         for row in all_preds:
 87 |             if row[0] not in all_test:
 88 |                 all_files[row[0]].append(row)
 89 |     else:
 90 |         for row in all_preds:
 91 |             all_files[row[0]].append(row)
 92 | 
 93 |     rand_list = list(all_files)
 94 |     np.random.shuffle(rand_list)
 95 |     split_percent = np.cumsum(split_percent)
 96 |     split_percent = split_percent[:-1]
 97 |     split_percent *= len(rand_list)
 98 |     split_percent = split_percent.round().astype(np.int)
 99 |     split_preds = np.split(rand_list,split_percent)
100 | 
101 |     tag_map = {name: index for index, name in enumerate(tag_names, 1)}
102 | 
103 |     for name, filenames in zip(split_names, split_preds):
104 |         writer = tf.python_io.TFRecordWriter("{}_{}".format(record_file.with_suffix(''), name) + record_file.suffix)
105 |         for filename in filenames:
106 |             predictions = all_files[filename]
107 |             if user_folders:
108 |                 file_loc = str(Path(image_loc)/predictions[0][FOLDER_LOCATION]/filename)
109 |             else:
110 |                 file_loc = str(Path(image_loc)/filename)
111 |             with open(file_loc, "rb") as img_file:
112 |                 raw_img = img_file.read()
113 |             tf_example = create_tf_example(predictions, raw_img, tag_map)
114 |             writer.write(tf_example.SerializeToString())
115 | 
116 |         writer.close()
117 | 
118 | if __name__ == "__main__":
119 |     #select_jsons(r"C:\Users\t-yapand\Desktop\GAUCC",r"C:\Users\t-yapand\Desktop\GAUCC.json",True,r"C:\Users\t-yapand\Desktop\GAUCC1_1533070038606.csv")
120 |     from azure.storage.blob import BlockBlobService
121 |     import sys
122 |     import os    
123 |     # Allow us to import utils
124 |     config_dir = str(Path.cwd().parent / "utils")
125 |     if config_dir not in sys.path:
126 |         sys.path.append(config_dir)
127 |     from config import Config
128 |     if len(sys.argv)<2:
129 |         raise ValueError("Need to specify config file")
130 |     config_file = Config.parse_file(sys.argv[1])
131 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
132 |     container_name = config_file["label_container_name"]
133 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)]
134 |     if file_date:
135 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], config_file["tagged_output"])
136 |     else:
137 |         raise ValueError("No tagged data exists. Cannot train model without any tagged data.")
138 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'test_(.*).csv', blob.name)]
139 |     if file_date:
140 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], config_file["test_output"])
141 |         create_tf_record(config_file["tagged_output"],config_file["tf_record_location"],config_file["image_dir"], 
142 |             config_file["user_folders"]=="True", tag_names=config_file["classes"].split(","), test_file=config_file["test_output"])
143 |     else:
144 |         create_tf_record(config_file["tagged_output"],config_file["tf_record_location"],config_file["image_dir"], 
145 |             config_file["user_folders"]=="True", tag_names=config_file["classes"].split(","))
146 | 


--------------------------------------------------------------------------------
/train/create_predictions.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | from pathlib import Path
  3 | from typing import List, Tuple, Dict, AbstractSet
  4 | import json
  5 | import cv2
  6 | import csv
  7 | from collections import defaultdict
  8 | import numpy as np
  9 | 
 10 | NUM_CHANNELS=3
 11 | FOLDER_LOCATION=8
 12 | 
 13 | PREDICTIONS_SCHEMA = \
 14 |     ["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"]
 15 | PREDICTIONS_SCHEMA_NO_FOLDER =\
 16 |     ["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"]
 17 | 
 18 | #name,prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX],prediction[YMIN_IDX],prediction[YMAX_IDX],height,width,folder,prediction[BOX_CONFID_IDX], confidence
 19 | BOX_CONFID_IDX = 0
 20 | CLASS_IDX = 1
 21 | XMIN_IDX = 3
 22 | XMAX_IDX = 5
 23 | YMIN_IDX = 2
 24 | YMAX_IDX = 4
 25 | 
 26 | 
 27 | def calculate_confidence(predictions):
 28 |     return min([float(prediction[0]) for prediction in predictions])
 29 | 
 30 | def make_csv_output(all_predictions: List[List[List[int]]], all_names: List[str], all_sizes: List[Tuple[int]], 
 31 |         untagged_output: str, tagged_output: str, file_set: AbstractSet, user_folders: bool = True):
 32 |     '''
 33 |     Convert list of Detector class predictions as well as list of image sizes
 34 |     into a dict matching the VOTT json format.
 35 |     '''
 36 |     with open(tagged_output, 'w', newline='') as tagged_file, open(untagged_output, 'w', newline='') as untagged_file:
 37 |         tagged_writer = csv.writer(tagged_file)
 38 |         untagged_writer = csv.writer(untagged_file)
 39 |         if user_folders:
 40 |             tagged_writer.writerow(PREDICTIONS_SCHEMA)
 41 |             untagged_writer.writerow(PREDICTIONS_SCHEMA)
 42 |         else:
 43 |             tagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER)
 44 |             untagged_writer.writerow(PREDICTIONS_SCHEMA_NO_FOLDER)
 45 |         if user_folders:
 46 |             for (folder, name), predictions, (height, width) in zip(all_names, all_predictions, all_sizes):
 47 |                 if not predictions:
 48 |                     predictions = [[0,"NULL",0,0,0,0]]
 49 |                 confidence = calculate_confidence(predictions)
 50 |                 for prediction in predictions:
 51 |                     (tagged_writer if name in file_set[folder] else untagged_writer).writerow([
 52 |                         name,
 53 |                         prediction[CLASS_IDX],prediction[XMIN_IDX],prediction[XMAX_IDX],
 54 |                         prediction[YMIN_IDX],prediction[YMAX_IDX],height,width,
 55 |                         folder,
 56 |                         prediction[BOX_CONFID_IDX], confidence])
 57 |         else:
 58 |             for name, predictions, (height,width) in zip(all_names, all_predictions, all_sizes):
 59 |                 if not predictions:
 60 |                     predictions = [[0,"NULL",0,0,0,0]]
 61 |                 confidence = calculate_confidence(predictions)
 62 |                 for prediction in predictions:
 63 |                     (tagged_writer if name in file_set else untagged_writer).writerow([
 64 |                             name,
 65 |                             prediction[CLASS_IDX], prediction[XMIN_IDX], prediction[XMAX_IDX],
 66 |                             prediction[YMIN_IDX], prediction[YMAX_IDX], height, width,
 67 |                             prediction[BOX_CONFID_IDX], confidence])
 68 | 
 69 | def get_suggestions(detector, basedir: str, untagged_output: str, 
 70 |     tagged_output: str, cur_tagged: str, cur_tagging: str, min_confidence: float =.2,
 71 |     image_size: Tuple=(1000,750), filetype: str="*.jpg", minibatchsize: int=50,
 72 |     user_folders: bool=True):
 73 |     '''Gets suggestions from a given detector and uses them to generate VOTT tags
 74 |     
 75 |     Function inputs an instance of the Detector class along with a directory,
 76 |     and optionally a confidence interval, image size, and tag information (name and color). 
 77 |     It returns a list of subfolders in that directory sorted by how confident the 
 78 |     given Detector was was in predicting bouding boxes on files within that subfolder.
 79 |     It also generates VOTT JSON tags corresponding to the predicted bounding boxes.
 80 |     The optional confidence interval and image size correspond to the matching optional
 81 |     arguments to the Detector class
 82 |     '''
 83 |     basedir = Path(basedir)
 84 |     CV2_COLOR_LOAD_FLAG = 1
 85 |     all_predictions = []
 86 |     all_tagged = []
 87 |     if user_folders:
 88 |         # TODO: Cross reference with ToTag
 89 |         # download latest tagging and tagged
 90 |         if cur_tagged is not None:
 91 |             with open(cur_tagged, 'r') as file:
 92 |                 reader = csv.reader(file)
 93 |                 next(reader, None)
 94 |                 all_tagged = list(reader)
 95 |         if cur_tagging is not None:
 96 |             with open(cur_tagging, 'r') as file:
 97 |                 reader = csv.reader(file)
 98 |                 next(reader, None)
 99 |                 all_tagged.extend(list(reader))
100 |         already_tagged = defaultdict(set)
101 |         for row in all_tagged:
102 |             already_tagged[row[FOLDER_LOCATION]].add(row[0])
103 |         subdirs = [subfile for subfile in basedir.iterdir() if subfile.is_dir()]
104 |         print("subdirs: ", subdirs)
105 |         all_names = []
106 |         all_image_files = [] 
107 |         all_sizes = []
108 |         for subdir in subdirs:
109 |             cur_image_names = list(subdir.rglob(filetype))
110 |             print("Total image names: ", len(cur_image_names))
111 |             all_image_files += [str(image_name) for image_name in cur_image_names]
112 |             foldername = subdir.stem
113 |             all_names += [(foldername, filename.name) for filename in cur_image_names]
114 |         # Reversed because numpy is row-major
115 |         all_sizes = [cv2.imread(image, CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
116 |         all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
117 |         for curindex, image in enumerate(all_image_files):
118 |             all_images[curindex] = cv2.resize(cv2.imread(image, CV2_COLOR_LOAD_FLAG), image_size)
119 |         print("Shape of all_images: ", all_images.shape)
120 |         all_predictions = detector.predict(all_images, min_confidence=min_confidence)
121 |     else:
122 |         with open(cur_tagged, 'r') as file:
123 |             reader = csv.reader(file)
124 |             next(reader, None)
125 |             already_tagged = {row[0] for row in reader}
126 |         with open(cur_tagging, 'r') as file:
127 |             reader = csv.reader(file)
128 |             next(reader, None)
129 |             already_tagged |= {row[0] for row in reader}
130 |         all_image_files = list(basedir.rglob(filetype))
131 |         all_names = [filename.name for filename in all_image_files]
132 |         all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
133 |         all_images = np.zeros((len(all_image_files), *reversed(image_size), NUM_CHANNELS), dtype=np.uint8)
134 |         for curindex, image in enumerate(all_image_files):
135 |             all_images[curindex] = cv2.resize(cv2.imread(str(image), CV2_COLOR_LOAD_FLAG), image_size)
136 |         all_predictions = detector.predict(all_images, batch_size=2, min_confidence=min_confidence)
137 |     make_csv_output(all_predictions, all_names, all_sizes, untagged_output, tagged_output, already_tagged, user_folders)
138 | 
139 | if __name__ == "__main__":
140 |     from azure.storage.blob import BlockBlobService
141 |     from tf_detector import TFDetector
142 |     import re
143 |     import sys
144 |     import os
145 |     # Allow us to import utils
146 |     config_dir = str(Path.cwd().parent / "utils")
147 |     if config_dir not in sys.path:
148 |         sys.path.append(config_dir)
149 |     from config import Config
150 |     if len(sys.argv)<2:
151 |         raise ValueError("Need to specify config file")
152 |     config_file = Config.parse_file(sys.argv[1])
153 |     image_dir = config_file["image_dir"]
154 |     untagged_output = config_file["untagged_output"]
155 |     tagged_output = config_file["tagged_predictions"]
156 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
157 |     container_name = config_file["label_container_name"]
158 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)]
159 |     cur_tagged = None
160 |     cur_tagging = None
161 |     classes = []
162 |     model = None
163 |     if len(sys.argv) > 3 and (sys.argv[2].lower() =='init_pred'):
164 |         print("Using MS COCO pretrained model to detect known 90 classes. For class id <-> name mapping check this file: https://github.com/tensorflow/models/blob/master/research/object_detection/data/mscoco_label_map.pbtxt")
165 |         model = sys.argv[3]
166 |         print("Using model: " + model)
167 |         classesIDs = list(range(1, 91))
168 |         classes = [str(x) for x in classesIDs]
169 |     else:
170 |         classes = config_file["classes"].split(",")
171 |         model = str(Path(config_file["inference_output_dir"])/"frozen_inference_graph.pb")
172 |         if file_date:
173 |             block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagged.csv")
174 |             cur_tagged = "tagged.csv"
175 |         file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)]
176 |         if file_date:
177 |             block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], "tagging.csv")
178 |             cur_tagging = "tagging.csv"
179 | 
180 |     cur_detector = TFDetector(classes, model)
181 |     get_suggestions(cur_detector, image_dir, untagged_output, tagged_output, cur_tagged, cur_tagging, filetype=config_file["filetype"], min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"]=="True")
182 | 


--------------------------------------------------------------------------------
/train/cv_train.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | import csv
  3 | from pathlib import Path
  4 | import re
  5 | import functools
  6 | from typing import List, Tuple, Dict, AbstractSet
  7 | import time
  8 | import cv2
  9 | 
 10 | from azure.cognitiveservices.vision.customvision.training import training_api
 11 | from azure.cognitiveservices.vision.customvision.prediction import prediction_endpoint
 12 | from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region
 13 | 
 14 | # Make sure this is less than all the API limits
 15 | IMAGE_BATCH_SIZE = 64
 16 | IMAGE_NAME_LOCATION = 0
 17 | TAG_NAME_LOCATION = 1
 18 | FOLDER_LOCATION = 8
 19 | HEIGHT_LOCATION = 6
 20 | WIDTH_LOCATION = 7
 21 | X_MIN_LOCATION = 2
 22 | X_MAX_LOCATION = 3
 23 | Y_MIN_LOCATION = 4
 24 | Y_MAX_LOCATION = 5
 25 | 
 26 | def calculate_confidence(predictions):
 27 |     return min([float(prediction[0]) for prediction in predictions])
 28 | 
 29 | def convert_row_to_region(tag_map, row):
 30 |     tag = tag_map[row[TAG_NAME_LOCATION]]
 31 |     x = float(row[X_MIN_LOCATION])
 32 |     y = float(row[Y_MIN_LOCATION])
 33 |     width = float(row[X_MAX_LOCATION]) - x
 34 |     height = float(row[Y_MAX_LOCATION])- y
 35 |     return Region(tag_id=tag.id, left=x,top=y,width=width,height=height)
 36 | 
 37 | def pred_to_list(prediction):
 38 |     x_min = prediction.bounding_box.left
 39 |     x_max = x_min + prediction.bounding_box.width
 40 |     y_min = prediction.bounding_box.top
 41 |     y_max = y_min + prediction.bounding_box.height
 42 |     return [prediction.probability, prediction.tag_name, y_min, x_min, y_max, x_max]
 43 | 
 44 | def make_csv_output(all_predictions: List[List[List[int]]], all_names: List[str], all_sizes: List[Tuple[int]],
 45 |         untagged_output: str, tagged_output: str, file_set: AbstractSet, user_folders: bool = True):
 46 |     '''
 47 |     Convert list of Detector class predictions as well as list of image sizes
 48 |     into a dict matching the VOTT json format.
 49 |     '''
 50 |     with open(tagged_output, 'w', newline='') as tagged_file, open(untagged_output, 'w', newline='') as untagged_file:
 51 |         tagged_writer = csv.writer(tagged_file)
 52 |         untagged_writer = csv.writer(untagged_file)
 53 |         if user_folders:
 54 |             tagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"])
 55 |             untagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","folder", "box_confidence", "image_confidence"])
 56 |         else:
 57 |             tagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"])
 58 |             untagged_writer.writerow(["filename", "class", "xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"])
 59 |         if user_folders:
 60 |             for (folder, name), predictions, (height, width) in zip(all_names, all_predictions, all_sizes):
 61 |                 if not predictions:
 62 |                     predictions = [[0,"NULL",0,0,0,0]]
 63 |                 else:
 64 |                     predictions = [pred_to_list(prediction) for prediction in predictions]
 65 |                 confidence = calculate_confidence(predictions)
 66 |                 for prediction in predictions:
 67 |                     (tagged_writer if name in file_set[folder] else untagged_writer).writerow([name,prediction[1],prediction[3],prediction[5],prediction[2],prediction[4],height,width,folder,prediction[0], confidence])
 68 |         else:
 69 |             for name, predictions, (height,width) in zip(all_names, all_predictions, all_sizes):
 70 |                 if not predictions:
 71 |                     predictions = [[0,"NULL",0,0,0,0]]
 72 |                 confidence = calculate_confidence(predictions)
 73 |                 for prediction in predictions:
 74 |                     (tagged_writer if name in file_set else untagged_writer).writerow([name,prediction[1],prediction[3],prediction[5],prediction[2],prediction[4],height,width,prediction[0], confidence])
 75 | 
 76 | def create_cv_predictions(image_loc, predictor, project_id, output_file_tagged, output_file_untagged, tagged_images, tagging_images,
 77 |                                 filetype, min_confidence=.2, user_folders=True):
 78 |     basedir = Path(image_loc)
 79 |     CV2_COLOR_LOAD_FLAG = 1
 80 |     all_predictions = []
 81 |     if user_folders:
 82 |         if tagged_images is not None:
 83 |             with open(tagged_images, 'r') as file:
 84 |                 reader = csv.reader(file)
 85 |                 next(reader, None)
 86 |                 all_tagged = list(reader)
 87 |         if tagging_images is not None:
 88 |             with open(tagging_images, 'r') as file:
 89 |                 reader = csv.reader(file)
 90 |                 next(reader, None)
 91 |                 all_tagged.extend(list(reader))
 92 |         already_tagged = defaultdict(set)
 93 |         for row in all_tagged:
 94 |             already_tagged[row[FOLDER_LOCATION]].add(row[0])
 95 |         subdirs = [subfile for subfile in basedir.iterdir() if subfile.is_dir()]
 96 |         all_names = []
 97 |         all_image_files = []
 98 |         all_sizes = []
 99 |         all_predictions = []
100 |         for subdir in subdirs:
101 |             cur_image_names = list(subdir.rglob(filetype))
102 |             all_image_files += [str(image_name) for image_name in cur_image_names]
103 |             foldername = subdir.stem
104 |             all_names += [(foldername, filename.name) for filename in cur_image_names]
105 |             for image in cur_image_names:
106 |                 with image.open(mode="rb") as img_data:
107 |                     all_predictions.append(predictor.predict_image(project_id, img_data).predictions)
108 |         all_sizes = [cv2.imread(image, CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
109 | 
110 |     else:
111 |         with open(tagged_images, 'r') as file:
112 |             reader = csv.reader(file)
113 |             next(reader, None)
114 |             already_tagged = {row[0] for row in reader}
115 |         with open(tagging_images, 'r') as file:
116 |             reader = csv.reader(file)
117 |             next(reader, None)
118 |             already_tagged |= {row[0] for row in reader}
119 |         all_image_files = list(basedir.rglob(filetype))
120 |         all_names = [filename.name for filename in all_image_files]
121 |         all_predictions = []
122 |         for image in all_image_files:
123 |             with image.open(mode="rb") as img_data:
124 |                 all_predictions.append(predictor.predict_image(project_id, img_data).predictions)
125 |         all_sizes = [cv2.imread(str(image), CV2_COLOR_LOAD_FLAG).shape[:2] for image in all_image_files]
126 |     make_csv_output(all_predictions, all_names, all_sizes, output_file_untagged, output_file_tagged, already_tagged, user_folders)
127 | 
128 | 
129 | def train_cv_model(tags_file, trainer, project_id, image_loc, user_folders, tag_names = ["stamp"], test_file=None):
130 | 
131 |     # Make sure tag_names are in custom vision and create tag_map
132 |     all_tag_names = {tag.name:tag for tag in trainer.get_tags(project_id)}
133 |     for tag_name in tag_names:
134 |         if tag_name not in all_tag_names:
135 |             all_tag_names[tag_name] = trainer.create_tag(project_id, tag_name)
136 |     get_region = functools.partial(convert_row_to_region, all_tag_names)
137 | 
138 |     num_tagged_images = trainer.get_tagged_image_count(project_id)
139 |     all_images = []
140 |     for num_to_skip in range(0, num_tagged_images, IMAGE_BATCH_SIZE):
141 |         all_images+=trainer.get_tagged_images(project_id, take=IMAGE_BATCH_SIZE, skip=num_to_skip)
142 |     all_existing_image_names = set(image.id for image in all_images)
143 | 
144 |     with open(tags_file, 'r') as file:
145 |         reader = csv.reader(file)
146 |         next(reader, None)
147 |         all_tags = list(reader)
148 | 
149 |     all_train = defaultdict(list)
150 |     if user_folders:
151 |         if test_file is not None:
152 |             with open(test_file, 'r') as file:
153 |                 testreader = csv.reader(file)
154 |                 next(testreader, None)
155 |                 all_test = set(row[IMAGE_NAME_LOCATION]+"/"+row[FOLDER_LOCATION] for row in testreader)
156 |             for row in all_tags:
157 |                 if row[0] not in all_test:
158 |                     all_train[row[IMAGE_NAME_LOCATION]+"/"+row[FOLDER_LOCATION]].append(row)
159 | 
160 |             # Remove images from test set that are in custom vision training set
161 |             images_to_delete = []
162 |             for image_name in all_test:
163 |                 if image_name in all_existing_image_names:
164 |                     images_to_delete.append(image_name)
165 |             for cur_index in range(0,len(images_to_delete),IMAGE_BATCH_SIZE):
166 |                 trainer.delete_images(project_id, images_to_delete[cur_index:cur_index+IMAGE_BATCH_SIZE])
167 | 
168 |         else:
169 |             for row in all_tags:
170 |                 all_train[row[IMAGE_NAME_LOCATION]+"/"+row[FOLDER_LOCATION]].append(row)
171 | 
172 |         # Add images from training set that are not yet in custom vision training set
173 |         to_upload = []
174 |         for image_name, information in all_train.items():
175 |             regions = [get_region(row) for row in information]
176 |             with (Path(image_loc)/information[0][FOLDER_LOCATION]/information[0][IMAGE_NAME_LOCATION]).open(mode="rb") as image_contents:
177 |                 to_upload.append(ImageFileCreateEntry(name=image_name, contents=image_contents.read(), regions=regions))
178 |         for cur_index in range(0,len(to_upload),IMAGE_BATCH_SIZE):
179 |             trainer.create_images_from_files(project_id, images=to_upload[cur_index:cur_index+IMAGE_BATCH_SIZE])
180 | 
181 |     else:
182 |         if test_file is not None:
183 |             with open(test_file, 'r') as file:
184 |                 testreader = csv.reader(file)
185 |                 next(testreader, None)
186 |                 all_test = set(row[IMAGE_NAME_LOCATION] for row in testreader)
187 |             for row in all_tags:
188 |                 if row[IMAGE_NAME_LOCATION] not in all_test:
189 |                     all_train[row[IMAGE_NAME_LOCATION]].append(row)
190 | 
191 |             # Remove images from test set that are in custom vision training set
192 |             images_to_delete = []
193 |             for image_name in all_test:
194 |                 if image_name in all_existing_image_names:
195 |                     images_to_delete.append(image_name)
196 |             for cur_index in range(0,len(images_to_delete),IMAGE_BATCH_SIZE):
197 |                 trainer.delete_images(project_id, images_to_delete[cur_index:cur_index+IMAGE_BATCH_SIZE])
198 | 
199 |         else:
200 |             for row in all_tags:
201 |                 all_train[row[IMAGE_NAME_LOCATION]].append(row)
202 | 
203 |         # Add images from training set that are not yet in custom vision training set
204 |         to_upload = []
205 |         for image_name, information in all_train.items():
206 |             regions = [get_region(row) for row in information]
207 |             with (Path(image_loc)/image_name).open(mode="rb") as image_contents:
208 |                 to_upload.append(ImageFileCreateEntry(name=image_name, contents=image_contents.read(), regions=regions))
209 |         for cur_index in range(0,len(to_upload),IMAGE_BATCH_SIZE):
210 |             trainer.create_images_from_files(project_id, images=to_upload[cur_index:cur_index+IMAGE_BATCH_SIZE])
211 |             print("trainer works")
212 | 
213 |     print ("Training...")
214 |     iteration = trainer.train_project(project_id)
215 |     while (iteration.status != "Completed"):
216 |         iteration = trainer.get_iteration(project_id, iteration.id)
217 |         print ("Training status: " + iteration.status)
218 |         time.sleep(1)
219 | 
220 |     # The iteration is now trained. Make it the default project endpoint
221 |     trainer.update_iteration(project_id, iteration.id, is_default=True)
222 |     print ("Done!")
223 | 
224 | 
225 | if __name__ == "__main__":
226 |     from azure.storage.blob import BlockBlobService
227 |     import sys
228 |     # Allow us to import utils
229 |     config_dir = str(Path.cwd().parent / "utils")
230 |     if config_dir not in sys.path:
231 |         sys.path.append(config_dir)
232 |     from config import Config
233 |     if len(sys.argv)<2:
234 |         raise ValueError("Need to specify config file")
235 |     config_file = Config.parse_file(sys.argv[1])
236 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
237 |     label_container_name = config_file["label_container_name"]
238 |     img_container_name = config_file["image_container_name"]
239 |     image_folder_name =  config_file["image_dir"]
240 |     from update_blob_folder import update_folder
241 |     update_folder(image_folder_name, block_blob_service, img_container_name)
242 | 
243 | 
244 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(label_container_name) if re.match(r'tagged_(.*).csv', blob.name)]
245 |     if file_date:
246 |         block_blob_service.get_blob_to_path(label_container_name, max(file_date, key=lambda x:x[1])[0], config_file["tagged_output"])
247 |     else:
248 |         raise ValueError("No tagged data exists. Cannot train model without any tagged data.")
249 | 
250 |     from map_validation import detectortest
251 |     trainer = training_api.TrainingApi(config_file["training_key"])
252 |     predictor = prediction_endpoint.PredictionEndpoint(config_file["prediction_key"])
253 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(label_container_name) if re.match(r'test_(.*).csv', blob.name)]
254 |     if file_date:
255 |         block_blob_service.get_blob_to_path(label_container_name, max(file_date, key=lambda x:x[1])[0], config_file["test_output"])
256 |         train_cv_model(config_file["tagged_output"], trainer, config_file["project_id"], config_file["image_dir"], config_file["user_folders"]=="True",
257 |            tag_names=config_file["classes"].split(","), test_file=config_file["test_output"])
258 | 
259 |         file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(label_container_name) if re.match(r'tagging_(.*).csv', blob.name)]
260 |         cur_tagging = None
261 |         if file_date:
262 |             block_blob_service.get_blob_to_path(label_container_name, max(file_date, key=lambda x:x[1])[0], "tagging.csv")
263 |             cur_tagging = "tagging.csv"
264 | 
265 |         create_cv_predictions(config_file["image_dir"], predictor, config_file["project_id"], config_file["tagged_output"], config_file["untagged_output"],
266 |                 config_file["tagged_output"], cur_tagging, filetype=config_file["filetype"], min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"]=="True")
267 |         detectortest(config_file["tagged_predictions"], config_file["test_output"], config_file["validation_output"], config_file["user_folders"]=="True")
268 |     else:
269 |         train_cv_model(config_file["tagged_output"], trainer, config_file["project_id"], config_file["image_dir"], config_file["user_folders"]=="True",
270 |            tag_names=config_file["classes"].split(","))
271 | 
272 |         file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(label_container_name) if re.match(r'tagging_(.*).csv', blob.name)]
273 |         cur_tagging = None
274 |         if file_date:
275 |             block_blob_service.get_blob_to_path(label_container_name, max(file_date, key=lambda x:x[1])[0], "tagging.csv")
276 |             cur_tagging = "tagging.csv"
277 | 
278 |         create_cv_predictions(config_file["image_dir"], predictor, config_file["project_id"], config_file["tagged_predictions"], config_file["untagged_output"],
279 |                 config_file["tagged_output"], cur_tagging, filetype=config_file["filetype"], min_confidence=float(config_file["min_confidence"]), user_folders=config_file["user_folders"]=="True")
280 | 
281 |         detectortest(config_file["tagged_predictions"], config_file["tagged_output"], config_file["validation_output"], config_file["user_folders"]=="True")
282 |     # Upload validation files and new tags
283 |     block_blob_service.create_blob_from_path(label_container_name, "{}_{}.{}".format("performance",int(time.time() * 1000),"csv"), config_file["validation_output"])
284 |     block_blob_service.create_blob_from_path(label_container_name, "{}_{}.{}".format("totag",int(time.time() * 1000),"csv"), config_file["untagged_output"])
285 | 


--------------------------------------------------------------------------------
/train/export_inference_graph.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | r"""Tool to export an object detection model for inference.
 17 | 
 18 | Prepares an object detection tensorflow graph for inference using model
 19 | configuration and an optional trained checkpoint. Outputs inference
 20 | graph, associated checkpoint files, a frozen inference graph and a
 21 | SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
 22 | 
 23 | The inference graph contains one of three input nodes depending on the user
 24 | specified option.
 25 |   * `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3]
 26 |   * `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None]
 27 |     containing encoded PNG or JPEG images. Image resolutions are expected to be
 28 |     the same if more than 1 image is provided.
 29 |   * `tf_example`: Accepts a 1-D string tensor of shape [None] containing
 30 |     serialized TFExample protos. Image resolutions are expected to be the same
 31 |     if more than 1 image is provided.
 32 | 
 33 | and the following output nodes returned by the model.postprocess(..):
 34 |   * `num_detections`: Outputs float32 tensors of the form [batch]
 35 |       that specifies the number of valid boxes per image in the batch.
 36 |   * `detection_boxes`: Outputs float32 tensors of the form
 37 |       [batch, num_boxes, 4] containing detected boxes.
 38 |   * `detection_scores`: Outputs float32 tensors of the form
 39 |       [batch, num_boxes] containing class scores for the detections.
 40 |   * `detection_classes`: Outputs float32 tensors of the form
 41 |       [batch, num_boxes] containing classes for the detections.
 42 |   * `detection_masks`: Outputs float32 tensors of the form
 43 |       [batch, num_boxes, mask_height, mask_width] containing predicted instance
 44 |       masks for each box if its present in the dictionary of postprocessed
 45 |       tensors returned by the model.
 46 | 
 47 | Notes:
 48 |  * This tool uses `use_moving_averages` from eval_config to decide which
 49 |    weights to freeze.
 50 | 
 51 | Example Usage:
 52 | --------------
 53 | python export_inference_graph \
 54 |     --input_type image_tensor \
 55 |     --pipeline_config_path path/to/ssd_inception_v2.config \
 56 |     --trained_checkpoint_prefix path/to/model.ckpt \
 57 |     --output_directory path/to/exported_model_directory
 58 | 
 59 | The expected output would be in the directory
 60 | path/to/exported_model_directory (which is created if it does not exist)
 61 | with contents:
 62 |  - graph.pbtxt
 63 |  - model.ckpt.data-00000-of-00001
 64 |  - model.ckpt.info
 65 |  - model.ckpt.meta
 66 |  - frozen_inference_graph.pb
 67 |  + saved_model (a directory)
 68 | 
 69 | Config overrides (see the `config_override` flag) are text protobufs
 70 | (also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
 71 | certain fields in the provided pipeline_config_path.  These are useful for
 72 | making small changes to the inference graph that differ from the training or
 73 | eval config.
 74 | 
 75 | Example Usage (in which we change the second stage post-processing score
 76 | threshold to be 0.5):
 77 | 
 78 | python export_inference_graph \
 79 |     --input_type image_tensor \
 80 |     --pipeline_config_path path/to/ssd_inception_v2.config \
 81 |     --trained_checkpoint_prefix path/to/model.ckpt \
 82 |     --output_directory path/to/exported_model_directory \
 83 |     --config_override " \
 84 |             model{ \
 85 |               faster_rcnn { \
 86 |                 second_stage_post_processing { \
 87 |                   batch_non_max_suppression { \
 88 |                     score_threshold: 0.5 \
 89 |                   } \
 90 |                 } \
 91 |               } \
 92 |             }"
 93 | """
 94 | import tensorflow as tf
 95 | from google.protobuf import text_format
 96 | from object_detection import exporter
 97 | from object_detection.protos import pipeline_pb2
 98 | 
 99 | slim = tf.contrib.slim
100 | flags = tf.app.flags
101 | 
102 | flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
103 |                     'one of [`image_tensor`, `encoded_image_string_tensor`, '
104 |                     '`tf_example`]')
105 | flags.DEFINE_string('input_shape', None,
106 |                     'If input_type is `image_tensor`, this can explicitly set '
107 |                     'the shape of this input tensor to a fixed size. The '
108 |                     'dimensions are to be provided as a comma-separated list '
109 |                     'of integers. A value of -1 can be used for unknown '
110 |                     'dimensions. If not specified, for an `image_tensor, the '
111 |                     'default shape will be partially specified as '
112 |                     '`[None, None, None, 3]`.')
113 | flags.DEFINE_string('pipeline_config_path', None,
114 |                     'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
115 |                     'file.')
116 | flags.DEFINE_string('trained_checkpoint_prefix', None,
117 |                     'Path to trained checkpoint, typically of the form '
118 |                     'path/to/model.ckpt')
119 | flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
120 | flags.DEFINE_string('config_override', '',
121 |                     'pipeline_pb2.TrainEvalPipelineConfig '
122 |                     'text proto to override pipeline_config_path.')
123 | tf.app.flags.mark_flag_as_required('pipeline_config_path')
124 | tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
125 | tf.app.flags.mark_flag_as_required('output_directory')
126 | FLAGS = flags.FLAGS
127 | 
128 | 
129 | def main(_):
130 |   pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
131 |   with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
132 |     text_format.Merge(f.read(), pipeline_config)
133 |   text_format.Merge(FLAGS.config_override, pipeline_config)
134 |   if FLAGS.input_shape:
135 |     input_shape = [
136 |         int(dim) if dim != '-1' else None
137 |         for dim in FLAGS.input_shape.split(',')
138 |     ]
139 |   else:
140 |     input_shape = None
141 |   exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
142 |                                   FLAGS.trained_checkpoint_prefix,
143 |                                   FLAGS.output_directory, input_shape)
144 | 
145 | 
146 | if __name__ == '__main__':
147 |   tf.app.run()
148 | 


--------------------------------------------------------------------------------
/train/initialize_vott_pull.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import cv2
 3 | from pathlib import Path
 4 | import time
 5 | def extract_data(filename):
 6 |     height, width, _ = cv2.imread(str(filename),1).shape
 7 |     return filename.name, height, width
 8 | 
 9 | def select_jsons(image_directory, user_folders, classes, csv_filename, map_filename):
10 |     with open(map_filename, "w") as map_file:
11 |         for index, name in enumerate(classes, 1):
12 |             map_file.write("item {{\n  id: {}\n  name: '{}'\n}}".format(index, name))
13 | 
14 |     image_directory = Path(image_directory)
15 |     if user_folders:
16 |         all_images = [(extract_data(filename),filename.parent) for filename in image_directory.glob('**/*') if filename.is_file()]
17 |     else:
18 |         all_images = [extract_data(filename) for filename in image_directory.iterdir()]
19 | 
20 |     with open(csv_filename, 'w', newline='') as csv_file:
21 |         csv_writer = csv.writer(csv_file)
22 |         if user_folders:
23 |             csv_writer.writerow(["filename","class","xmin","xmax","ymin","ymax","height","width","folder","box_confidence", "image_confidence"])
24 |             for (filename,true_height,true_width),folder in all_images:
25 |                 csv_writer.writerow([filename,"NULL",0,0,0,0,true_height,true_width,folder,0,0])
26 |         else:
27 |             csv_writer.writerow(["filename","class","xmin","xmax","ymin","ymax","height","width","box_confidence", "image_confidence"])
28 |             for filename,true_height,true_width in all_images:
29 |                 csv_writer.writerow([filename,"NULL",0,0,0,0,true_height,true_width,0,0])
30 | 
31 | if __name__ == "__main__":
32 |     from azure.storage.blob import BlockBlobService
33 |     import sys
34 |     import os
35 |     # Allow us to import utils
36 |     config_dir = str(Path.cwd().parent / "utils")
37 |     if config_dir not in sys.path:
38 |         sys.path.append(config_dir)
39 |     from config import Config
40 |     if len(sys.argv)<2:
41 |         raise ValueError("Need to specify config file")
42 |     config_file = Config.parse_file(sys.argv[1])
43 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
44 |     container_name = config_file["label_container_name"]
45 |     select_jsons(config_file["image_dir"],config_file["user_folders"]=="True", config_file["classes"].split(","), "totag.csv", config_file["label_map_path"])
46 |     block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("totag",int(time.time() * 1000),"csv"), "totag.csv")
47 | 


--------------------------------------------------------------------------------
/train/map_validation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import csv
  4 | from collections import defaultdict
  5 | from functools import partial
  6 | from pathlib import Path
  7 | from pandas._libs.hashtable import unique_label_indices
  8 | HEIGHT, WIDTH = 1000, 1000
  9 | FILENAME_LOCATION=0
 10 | FOLDER_LOCATION=8
 11 | CLASS_LOCATION=1
 12 | PREDS_START=2
 13 | PREDS_END=5
 14 | BOX_CONFIDENCE_LOCATION=-2
 15 | 
 16 | def get_map_for_class(zipped_data_arr, min_ious=np.linspace(.50, 0.95, 10, endpoint=True),
 17 |             avg_recalls = np.linspace(0.00, 1.00, 101, endpoint=True), nms_iou=.7):
 18 |     # Used linspace over arange for min_ious/avg_recalls due to issues with endpoints
 19 |     all_confs = []
 20 |     all_correct_preds = []
 21 |     num_total_detections = 0
 22 |     num_total_gtruths = 0
 23 |     for ground_arr, detector_arr in zipped_data_arr:
 24 |         num_gtruths = len(ground_arr)
 25 |         if not detector_arr:
 26 |             num_total_gtruths+=num_gtruths
 27 |             continue
 28 |         detector_arr = np.asarray(detector_arr, dtype=np.float64)
 29 |         # Sort by descending confidence, use mergesort to match COCO evaluation
 30 |         detector_arr = detector_arr[detector_arr[:,-1].argsort(kind='mergesort')[::-1]]
 31 |         det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose()
 32 |         if nms_iou is not None:
 33 |             # Code for NMS
 34 |             all_indices_to_keep = []
 35 |             cur_indices_to_keep = np.arange(len(detector_arr))
 36 |             # Repeat until no detections left below overlap threshold
 37 |             while cur_indices_to_keep.size>1:
 38 |                 # Add the most confident element
 39 |                 all_indices_to_keep.append(cur_indices_to_keep[0])
 40 |                 cur_x_min = det_x_min[cur_indices_to_keep]
 41 |                 cur_x_max = det_x_max[cur_indices_to_keep]
 42 |                 cur_y_min = det_y_min[cur_indices_to_keep]
 43 |                 cur_y_max = det_y_max[cur_indices_to_keep]
 44 |                 intersect_widths = (np.minimum(cur_x_max[0], cur_x_max[1:]) - np.maximum(cur_x_min[0], cur_x_min[1:])).clip(min=0)
 45 |                 intersect_heights = (np.minimum(cur_y_max[0], cur_y_max[1:]) - np.maximum(cur_y_min[0], cur_y_min[1:])).clip(min=0)
 46 |                 intersect_areas = intersect_widths*intersect_heights
 47 |                 # Inclusion exclusion principle!
 48 |                 union_areas = ((cur_x_max[0]-cur_x_min[0])*(cur_y_max[0]-cur_y_min[0]) + (cur_x_max[1:]-cur_x_min[1:])*(cur_y_max[1:]-cur_y_min[1:])) - intersect_areas
 49 |                 # Just in case a ground truth has zero area
 50 |                 cur_ious = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas!=0)
 51 |                 # Keep appending [0] to a list
 52 |                 # Just say cur_indices = np where cur_ious < nms_iou
 53 |                 cur_indices_to_keep = cur_indices_to_keep[1:]
 54 |                 cur_indices_to_keep = np.intersect1d(cur_indices_to_keep, cur_indices_to_keep[np.nonzero(cur_ious < nms_iou)[0]], assume_unique=True)
 55 |             if cur_indices_to_keep.size==1:
 56 |                 all_indices_to_keep.append(cur_indices_to_keep[0])
 57 |             detector_arr = detector_arr[np.asarray(all_indices_to_keep)]
 58 |             det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose()
 59 |         num_detections = len(detector_arr)
 60 |         if not ground_arr:
 61 |             num_total_detections+=num_detections
 62 |             all_confs.append(confs)
 63 |             continue
 64 |         ground_arr = np.asarray(ground_arr, dtype=np.float64)
 65 |         ground_x_min, ground_x_max, ground_y_min, ground_y_max = ground_arr.transpose()
 66 |         # Clip negative since negative implies no overlap
 67 |         intersect_widths = (np.minimum(det_x_max[:, np.newaxis], ground_x_max) - np.maximum(det_x_min[:, np.newaxis], ground_x_min)).clip(min=0)
 68 |         intersect_heights = (np.minimum(det_y_max[:, np.newaxis], ground_y_max) - np.maximum(det_y_min[:, np.newaxis], ground_y_min)).clip(min=0)
 69 |         intersect_areas = intersect_widths*intersect_heights
 70 |         # Inclusion exclusion principle!
 71 |         union_areas = ((det_x_max-det_x_min)*(det_y_max-det_y_min))[:, np.newaxis] + ((ground_x_max-ground_x_min)*(ground_y_max-ground_y_min)) - intersect_areas
 72 |         # Just in case a ground truth has zero area
 73 |         iou = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas!=0)
 74 |         # Defined best ground truth as one with highest IOU. This is an array of size num_detections, where
 75 |         # best_gtruths[i] is the index of the ground truth to which prediction i is most similar (highest IOU)
 76 |         best_gtruths = np.argmax(iou, axis=1)
 77 |         # valid_preds is a generator where each element is a numpy int array. Each numpy array corresponds to
 78 |         # a min_iou in the min_ious array, and has indices corresponding to the predictions whose
 79 |         # prediction-ground truth pairs have IOU greater than that min_iou.
 80 |         valid_preds = map(np.nonzero, iou[np.arange(num_detections), best_gtruths]>min_ious[:, np.newaxis])
 81 |         #
 82 |         ## Useful for standard precision/recall metrics
 83 |         # num_true_positives = np.count_nonzero(np.bincount(best_gtruths[valid_preds]))
 84 |         # num_false_positives = num_detections - detected_gtruths
 85 |         # num_false_negatives = num_gtruths - detected_gtruths
 86 |         #
 87 |         # best_gtruths[valid_preds] uses the previously calculated valid_preds array to return an array 
 88 |         # containing the ground truths indices for each prediction whenever the ground truth-prediction
 89 |         # IOU was greater than min_iou. Then unique_label_indices is used to find the leftmost occuring
 90 |         # ground truth index for each ground truth index, which corresponds to finding the true positives
 91 |         # (since we only consider the highest confidence prediction for each ground truth to be a true
 92 |         # positive, rest are false positives)
 93 |         # Note that pandas unique_label_indices is equivalent to np.unique(labels, return_index=True)[1] but
 94 |         # is considerably faster due to using a hashtable instead of sorting
 95 |         # Once the indices of the true positive predictions are found in the smaller array containing only
 96 |         # predictions with IOU > min_iou, they are converted back into indices for the original array
 97 |         # using valid_pred.
 98 |         correct_preds = [valid_pred[0][unique_label_indices(best_gtruths[valid_pred[0]])]+num_total_detections for valid_pred in valid_preds]
 99 |         all_correct_preds.append(correct_preds)
100 |         all_confs.append(confs)
101 |         num_total_detections += num_detections
102 |         num_total_gtruths += num_gtruths
103 |     # Edge case of no predictions for a class
104 |     if not all_confs:
105 |         return 0
106 |     # Concatenates all predictions and confidences together to find class MAP
107 |     all_confs = np.concatenate(all_confs)
108 |     all_correct_preds = [np.concatenate(cur_pred) for cur_pred in zip(*all_correct_preds)]
109 |     # Sets only correct prediction indices to true, rest to false.
110 |     true_positives = np.zeros((len(min_ious), num_total_detections), dtype=bool)
111 |     for iou_index, positive_locs in enumerate(all_correct_preds):
112 |         true_positives[iou_index][positive_locs]=True
113 |     # Mergesort is chosen to be consistent with coco/matlab results
114 |     sort_order = all_confs.argsort(kind='mergesort')[::-1]
115 |     true_positives = true_positives[:,sort_order]
116 |     # Keeps track of number of true positives until each given point
117 |     all_true_positives = np.cumsum(true_positives, axis=1)
118 |     # PASCAL VOC 2012
119 |     if avg_recalls is None:
120 |         # Zero pad both sides to calculate area under curve
121 |         precision = np.zeros((len(min_ious), num_total_detections+2), dtype=np.float64)
122 |         # Pad one side with zeros and the other with ones for area under curve
123 |         recall = np.zeros((len(min_ious), num_total_detections+2), dtype=np.float64)
124 |         recall[:,-1] = np.ones(len(min_ious), dtype=np.float64)
125 |         # In python >=3 this is equivalent to np.true_divide
126 |         precision[:,1:-1] = all_true_positives / np.arange(1, num_total_detections+1)
127 |         # Makes each element in precision list max of all elements to right (ignores endpoints)
128 |         precision[:,1:-1] = np.maximum.accumulate(precision[:,-2:0:-1], axis=1)[:,::-1]
129 |         recall[:,1:-1] = all_true_positives / num_total_gtruths
130 |         # Calculate area under P-R curve for each IOU
131 |         # Should only be one IOU at .5 for PASCAL
132 |         all_areas = [] 
133 |         for cur_recall, cur_precision in zip(recall, precision):
134 |             # Find indices where value of recall changes
135 |             change_points = np.nonzero(cur_recall[1:]!=cur_recall[:-1])[0]
136 |             # Calculate sum of dw * dh as area and append to all areas
137 |             all_areas.append(np.sum((cur_recall[change_points+1] - cur_recall[change_points]) * cur_precision[change_points+1]))
138 |         return np.mean(all_areas)
139 |     # PASCAL VOC 2007
140 |     else:
141 |         # The extra zero is to deal with a recall larger than is achieved by model
142 |         precision = np.zeros((len(min_ious), num_total_detections+1), dtype=np.float64)
143 |         # In python >=3 this is equivalent to np.true_divide
144 |         precision[:,:-1] = all_true_positives / np.arange(1, num_total_detections+1)
145 |         # Makes each element in precision list max of all elements to right (extra zero at right doesn't matter)
146 |         precision = np.maximum.accumulate(precision[:,::-1], axis=1)[:,::-1]
147 |         recall = all_true_positives / num_total_gtruths
148 |         # For each recall, finds leftmost index (i.e. largest precision) greater than it
149 |         indices_to_average = np.apply_along_axis(np.searchsorted, 1, recall, avg_recalls)
150 |         # Finds matching largest prediction for each recall and turns it into an array
151 |         precs_to_average = precision[np.arange(len(precision))[:,np.newaxis], indices_to_average]
152 |         # Returns average precision over each recall and over each IOU. Can specify an axis
153 |         # if separate average precision is wanted for each IOU (e.g. to do more precise statistics)
154 |         return np.mean(precs_to_average)
155 | 
156 | def detectortest(predictions, ground_truths, output, user_folders):
157 |     '''Inputs test_detector that follows the Detector ABC, images which is
158 |     a list of image filenames, image_size which is the resized image size
159 |     necessary for inputting and ground_truths which is the correct labels
160 |     for the images. Optionally takes in min_fscore.
161 |     Outputs a boolean based on whether or not the F1-Score
162 |     of test_detector was greater than min_fscore'''
163 |     # First defaultdict corresponds to class name, inner one corresponds to filename, first list in tuple
164 |     # corresponds to ground truths for that class+file and second list corresponds to predictions
165 |     all_boxes = defaultdict(lambda: defaultdict(lambda: ([],[])))
166 |     files_in_ground_truth = set()
167 |     with open(ground_truths, 'r') as truths_file:
168 |         reader = csv.reader(truths_file)
169 |         next(reader, None)
170 |         if user_folders:
171 |             for row in reader:
172 |                 all_boxes[row[CLASS_LOCATION]][(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][0].append(row[PREDS_START:PREDS_END+1])
173 |                 files_in_ground_truth.add((row[FOLDER_LOCATION], row[FILENAME_LOCATION]))
174 |         else:
175 |             for row in reader:
176 |                 all_boxes[row[CLASS_LOCATION]][row[FILENAME_LOCATION]][0].append(row[PREDS_START:PREDS_END+1])
177 |                 files_in_ground_truth.add(row[FILENAME_LOCATION])
178 |     with open(predictions, 'r') as preds_file:
179 |         reader = csv.reader(preds_file)
180 |         next(reader, None)
181 |         if user_folders:
182 |             for row in reader:
183 |                 if (row[FOLDER_LOCATION], row[FILENAME_LOCATION]) in files_in_ground_truth:
184 |                     all_boxes[row[CLASS_LOCATION]][(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][1].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1])
185 |         else:
186 |             for row in reader:
187 |                 if row[FILENAME_LOCATION] in files_in_ground_truth:
188 |                     all_boxes[row[CLASS_LOCATION]][row[FILENAME_LOCATION]][1].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1])
189 |     all_class_maps = {}
190 |     for classname, all_file_preds in all_boxes.items():
191 |         class_map = get_map_for_class(all_file_preds.values(), avg_recalls=None, min_ious=np.asarray([.5]))
192 |         all_class_maps[classname] = class_map
193 |     # Calculates average over all classes. This is the mAP for the test set.
194 |     avg_map = sum(all_class_maps.values())/len(all_class_maps) if all_class_maps else 0 
195 |     print('Class Name: Average, AP: {}'.format(avg_map))
196 |     print('\n'.join('Class Name: {}, AP: {}'.format(*classdata) for classdata in all_class_maps.items()))
197 |     with open(output, 'w') as out_file:
198 |         csv_writer=csv.writer(out_file)
199 |         csv_writer.writerow(['Class Name','AP'])
200 |         csv_writer.writerow(['Average', avg_map])
201 |         for classdata in all_class_maps.items():
202 |             csv_writer.writerow(classdata)
203 | if __name__ == "__main__":
204 |     import re
205 |     from azure.storage.blob import BlockBlobService
206 |     import sys
207 |     import os    
208 |     # Allow us to import utils
209 |     config_dir = str(Path.cwd().parent / "utils")
210 |     if config_dir not in sys.path:
211 |         sys.path.append(config_dir)
212 |     from config import Config
213 |     if len(sys.argv)<2:
214 |         raise ValueError("Need to specify config file")
215 |     config_file = Config.parse_file(sys.argv[1])
216 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
217 |     container_name = config_file["label_container_name"]
218 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'test_(.*).csv', blob.name)]
219 |     if file_date:
220 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], config_file["test_output"])
221 |         detectortest(config_file["tagged_predictions"], config_file["test_output"], config_file["validation_output"], config_file["user_folders"]=="True")
222 |     else:
223 |         # TODO: If we keep track of val/train we can calc prec/f-score for that too
224 |         detectortest(config_file["tagged_predictions"], config_file["tagged_output"], config_file["validation_output"], config_file["user_folders"]=="True")
225 | 


--------------------------------------------------------------------------------
/train/pipeline.config:
--------------------------------------------------------------------------------
  1 | model {
  2 |   faster_rcnn {
  3 |     num_classes: 90
  4 |     image_resizer {
  5 |       keep_aspect_ratio_resizer {
  6 |         min_dimension: 600
  7 |         max_dimension: 1024
  8 |       }
  9 |     }
 10 |     feature_extractor {
 11 |       type: "faster_rcnn_resnet50"
 12 |       first_stage_features_stride: 16
 13 |     }
 14 |     first_stage_anchor_generator {
 15 |       grid_anchor_generator {
 16 |         height_stride: 16
 17 |         width_stride: 16
 18 |         scales: 0.25
 19 |         scales: 0.5
 20 |         scales: 1.0
 21 |         scales: 2.0
 22 |         aspect_ratios: 0.5
 23 |         aspect_ratios: 1.0
 24 |         aspect_ratios: 2.0
 25 |       }
 26 |     }
 27 |     first_stage_box_predictor_conv_hyperparams {
 28 |       op: CONV
 29 |       regularizer {
 30 |         l2_regularizer {
 31 |           weight: 0.0
 32 |         }
 33 |       }
 34 |       initializer {
 35 |         truncated_normal_initializer {
 36 |           stddev: 0.009999999776482582
 37 |         }
 38 |       }
 39 |     }
 40 |     first_stage_nms_score_threshold: 0.0
 41 |     first_stage_nms_iou_threshold: 0.699999988079071
 42 |     first_stage_max_proposals: 100
 43 |     first_stage_localization_loss_weight: 2.0
 44 |     first_stage_objectness_loss_weight: 1.0
 45 |     initial_crop_size: 14
 46 |     maxpool_kernel_size: 2
 47 |     maxpool_stride: 2
 48 |     second_stage_box_predictor {
 49 |       mask_rcnn_box_predictor {
 50 |         fc_hyperparams {
 51 |           op: FC
 52 |           regularizer {
 53 |             l2_regularizer {
 54 |               weight: 0.0
 55 |             }
 56 |           }
 57 |           initializer {
 58 |             variance_scaling_initializer {
 59 |               factor: 1.0
 60 |               uniform: true
 61 |               mode: FAN_AVG
 62 |             }
 63 |           }
 64 |         }
 65 |         use_dropout: false
 66 |         dropout_keep_probability: 1.0
 67 |       }
 68 |     }
 69 |     second_stage_post_processing {
 70 |       batch_non_max_suppression {
 71 |         score_threshold: 0.30000001192092896
 72 |         iou_threshold: 0.6000000238418579
 73 |         max_detections_per_class: 100
 74 |         max_total_detections: 100
 75 |       }
 76 |       score_converter: SOFTMAX
 77 |     }
 78 |     second_stage_localization_loss_weight: 2.0
 79 |     second_stage_classification_loss_weight: 1.0
 80 |   }
 81 | }
 82 | train_config {
 83 |   batch_size: 1
 84 |   data_augmentation_options {
 85 |     random_crop_pad_image {
 86 |     }
 87 |   }
 88 |   optimizer {
 89 |     momentum_optimizer {
 90 |       learning_rate {
 91 |         manual_step_learning_rate {
 92 |           initial_learning_rate: 0.00019999999494757503
 93 |           schedule {
 94 |             step: 900000
 95 |             learning_rate: 1.9999999494757503e-05
 96 |           }
 97 |           schedule {
 98 |             step: 1200000
 99 |             learning_rate: 1.9999999949504854e-06
100 |           }
101 |         }
102 |       }
103 |       momentum_optimizer_value: 0.8999999761581421
104 |     }
105 |     use_moving_average: false
106 |   }
107 |   gradient_clipping_by_norm: 10.0
108 |   fine_tune_checkpoint: "$fine_tune_checkpoint"
109 |   from_detection_checkpoint: true
110 |   num_steps: $train_iterations
111 | }
112 | train_input_reader {
113 |   label_map_path: "$label_map_path"
114 |   tf_record_input_reader {
115 |     input_path: "$tf_train_record"
116 |   }
117 | }
118 | eval_config {
119 |   num_examples: 5
120 |   max_evals: 10
121 |   use_moving_averages: false
122 | }
123 | eval_input_reader {
124 |   label_map_path: "$label_map_path"
125 |   shuffle: false
126 |   num_readers: 1
127 |   tf_record_input_reader {
128 |     input_path: "$tf_val_record"
129 |   }
130 | }
131 | 


--------------------------------------------------------------------------------
/train/repartition_test_set.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import csv
 3 | import random
 4 | import time
 5 | from collections import defaultdict
 6 | from azure.storage.blob import BlockBlobService
 7 | from pathlib import Path
 8 | import sys
 9 | import os
10 | 
11 | random.seed(42)
12 | 
13 | # Allow us to import utils
14 | config_dir = str(Path.cwd().parent / "utils")
15 | if config_dir not in sys.path:
16 |     sys.path.append(config_dir)
17 | from config import Config
18 | if len(sys.argv)<2:
19 |     raise ValueError("Need to specify config file")
20 | config_file = Config.parse_file(sys.argv[1])
21 | block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
22 | container_name = config_file["label_container_name"]
23 | file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)]
24 | block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], config_file["tagged_output"])
25 | with open(config_file["tagged_output"], 'r') as file:
26 |     reader = csv.reader(file)
27 |     header = next(reader)
28 |     all_preds = list(reader)
29 | all_files=defaultdict(list)
30 | for row in all_preds:
31 |     all_files[row[0]].append(row)
32 | test_num=int(len(all_files)*float(config_file["test_percentage"]))
33 | test_tags = random.sample(all_files.items(), test_num)
34 | flat_tags = [cur_tag for tag_list in test_tags for cur_tag in tag_list[1]]
35 | with open(config_file["test_output"], 'w') as test_file:
36 |     csv_writer = csv.writer(test_file)
37 |     csv_writer.writerow(header)
38 |     csv_writer.writerows(flat_tags)
39 | block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("test",int(time.time() * 1000),"csv"), config_file["test_output"])
40 | 


--------------------------------------------------------------------------------
/train/repartition_test_set_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Source environmental variables
3 | set -a
4 | sed -i 's/\r//g' $1
5 | . $1
6 | set +a
7 | envsubst < $1 > cur_config.ini
8 | python repartition_test_set.py cur_config.ini
9 | 


--------------------------------------------------------------------------------
/train/tf_detector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class TFDetector():
 6 | 
 7 |     def __init__(self, classes, inference_graph="frozen_graph.pb"):
 8 |         '''Initialize Detector Object'''
 9 |         super().__init__()
10 |         self.label_arr = np.asarray(["NULL"]+classes)
11 |         path_to_ckpt = inference_graph
12 |         self.detection_graph = tf.Graph()
13 |         with self.detection_graph.as_default():
14 |             od_graph_def = tf.GraphDef()
15 |             with tf.gfile.GFile(path_to_ckpt, 'rb') as fid:
16 |                 serialized_graph = fid.read()
17 |                 od_graph_def.ParseFromString(serialized_graph)
18 |                 tf.import_graph_def(od_graph_def, name='')
19 |     
20 |     def predict(self, images_data, batch_size=10, min_confidence=.7):
21 |         '''Predict results from list of images to list of boxes'''
22 |         with self.detection_graph.as_default():
23 |             with tf.Session() as sess:
24 |                 ops = tf.get_default_graph().get_operations()
25 |                 all_tensor_names = {output.name for op in ops for output in op.outputs}
26 |                 tensor_dict = {}
27 |                 for key in ['detection_boxes','detection_scores','detection_classes']:
28 |                     tensor_name = key + ':0'
29 |                     if tensor_name in all_tensor_names:
30 |                         tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
31 |                             tensor_name)
32 |                 image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
33 |                 split_data = [images_data[i:i+batch_size] for i in range(0,images_data.shape[0],batch_size)]
34 |                 split_data = [sess.run(tensor_dict, feed_dict={image_tensor: batch}) for batch in split_data]
35 |                 split_data = [np.dstack((batch['detection_scores'],
36 |                               self.label_arr[batch['detection_classes'].astype(np.uint8)],
37 |                               batch['detection_boxes'])) for batch in split_data]
38 |                 combined = np.concatenate(split_data)
39 |                 non_zero = combined[:,:,0].astype(np.float)>min_confidence
40 |         return [sorted(cur_combined[cur_non_zero].tolist(), reverse=True) for cur_combined, cur_non_zero in zip(combined, non_zero)]
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/train/update_blob_folder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | def update_folder(folder_name, block_blob_service, container_name):
 4 |     existing_files = {os.path.relpath(os.path.join(directory, cur_file), folder_name) for (directory, _, filenames) 
 5 |         in os.walk(folder_name) for cur_file in filenames}
 6 |     folder_name = Path(folder_name)
 7 |     for blob in block_blob_service.list_blobs(container_name):
 8 |         if blob.name not in existing_files:
 9 |             (folder_name/blob.name).parent.mkdir(parents=True, exist_ok=True) 
10 |             block_blob_service.get_blob_to_path(container_name, blob.name, str(folder_name/blob.name))
11 |             # TODO: Append onto totag
12 | 
13 | if __name__ == "__main__":      
14 |     from azure.storage.blob import BlockBlobService
15 |     import sys
16 |     import os    
17 |     module_dir = os.path.split(os.getcwd())[0]
18 |     # Allow us to import utils
19 |     config_dir = str(Path.cwd().parent / "utils")
20 |     if config_dir not in sys.path:
21 |         sys.path.append(config_dir)
22 |     from config import Config
23 |     if len(sys.argv)<2:
24 |         raise ValueError("Need to specify config file")
25 |     config_file = Config.parse_file(sys.argv[1])
26 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
27 |     container_name = config_file["image_container_name"]
28 |     image_folder_name =  config_file["image_dir"]
29 |     update_folder(image_folder_name, block_blob_service, container_name)
30 | 


--------------------------------------------------------------------------------
/train/validation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import csv
 4 | from collections import defaultdict
 5 | from functools import partial
 6 | from pathlib import Path
 7 | HEIGHT, WIDTH = 1000, 1000
 8 | FILENAME_LOCATION=0
 9 | FOLDER_LOCATION=8
10 | CLASS_LOCATION=1
11 | PREDS_START=2
12 | PREDS_END=5
13 | BOX_CONFIDENCE_LOCATION=-2
14 | def detectortest(predictions, ground_truths, output, user_folders):
15 |     '''Inputs test_detector that follows the Detector ABC, images which is
16 |     a list of image filenames, image_size which is the resized image size
17 |     necessary for inputting and ground_truths which is the correct labels
18 |     for the images. Optionally takes in min_fscore.
19 |     Outputs a boolean based on whether or not the F1-Score
20 |     of test_detector was greater than min_fscore'''
21 |     all_detector_preds = defaultdict(lambda: defaultdict(list))
22 |     with open(predictions, 'r') as preds_file:
23 |         reader = csv.reader(preds_file)
24 |         next(reader, None)
25 |         if user_folders:
26 |             for row in reader:
27 |                 all_detector_preds[(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][row[CLASS_LOCATION]].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1])
28 |         else:
29 |             for row in reader:
30 |                 all_detector_preds[row[FILENAME_LOCATION]][row[CLASS_LOCATION]].append(row[PREDS_START:PREDS_END+1]+row[BOX_CONFIDENCE_LOCATION:BOX_CONFIDENCE_LOCATION+1])
31 |     all_gtruths = defaultdict(lambda: defaultdict(list))
32 |     with open(ground_truths, 'r') as truths_file:
33 |         reader = csv.reader(truths_file)
34 |         next(reader, None)
35 |         if user_folders:
36 |             for row in reader:
37 |                 all_gtruths[(row[FOLDER_LOCATION], row[FILENAME_LOCATION])][row[CLASS_LOCATION]].append(row[PREDS_START:PREDS_END+1])
38 |         else:
39 |             for row in reader:
40 |                 all_gtruths[row[FILENAME_LOCATION]][row[CLASS_LOCATION]].append(row[PREDS_START:PREDS_END+1])
41 |     precisions = []
42 |     recalls = []
43 |     for filename in all_gtruths:
44 |         file_precisions = []
45 |         file_recalls = []
46 |         for classname, ground_preds in all_gtruths[filename].items():
47 |             ground_truth = np.zeros((HEIGHT, WIDTH))
48 |             for xmin,xmax,ymin,ymax in map(partial(map, float), ground_preds):
49 |                 ground_truth[int(ymin*HEIGHT):int(ymax*HEIGHT), int(xmin*WIDTH):int(xmax*WIDTH)] = 1
50 |             det_preds = all_detector_preds[filename][classname]
51 |             detection = np.zeros((HEIGHT, WIDTH))
52 |             for xmin,xmax,ymin,ymax in map(partial(map, float), det_preds):
53 |                 detection[int(ymin*HEIGHT):int(ymax*HEIGHT), int(xmin*WIDTH):int(xmax*WIDTH)] = 1
54 |             ground_area = ground_truth.sum()
55 |             detect_area = detection.sum()
56 |             inter_area = (ground_truth * detection).sum()
57 |             precision = inter_area / detect_area if detect_area!=0 else 1
58 |             recall = inter_area / ground_area
59 |             file_precisions.append(precision)
60 |             file_recalls.append(recall)
61 |         precisions.append(np.mean(file_precisions))
62 |         recalls.append(np.mean(file_recalls))
63 |     avg_prec = np.mean(precisions)
64 |     avg_recall = np.mean(recalls)
65 |     f1_score = 2 * avg_prec * avg_recall/(avg_prec + avg_recall)
66 |     print("Average Precision: {}, Recall: {}, F1-Score: {}".format(avg_prec, avg_recall, f1_score))
67 |     with open(output, 'w') as out_file:
68 |         out_file.write("Average Precision: {}, Recall: {}, F1-Score: {}".format(avg_prec, avg_recall, f1_score))
69 | if __name__ == "__main__":
70 |     import re
71 |     from azure.storage.blob import BlockBlobService
72 |     import sys
73 |     import os    
74 |     # Allow us to import utils
75 |     config_dir = str(Path.cwd().parent / "utils")
76 |     if config_dir not in sys.path:
77 |         sys.path.append(config_dir)
78 |     from config import Config
79 |     if len(sys.argv)<2:
80 |         raise ValueError("Need to specify config file")
81 |     config_file = Config.parse_file(sys.argv[1])
82 |     block_blob_service = BlockBlobService(account_name=config_file["AZURE_STORAGE_ACCOUNT"], account_key=config_file["AZURE_STORAGE_KEY"])
83 |     container_name = config_file["label_container_name"]
84 |     file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'test_(.*).csv', blob.name)]
85 |     if file_date:
86 |         block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], config_file["test_output"])
87 |         detectortest(config_file["tagged_predictions"], config_file["test_output"], config_file["validation_output"], config_file["user_folders"]=="True")
88 |     else:
89 |         # TODO: If we keep track of val/train we can calc prec/f-score for that too
90 |         detectortest(config_file["tagged_predictions"], config_file["tagged_output"], config_file["validation_output"], config_file["user_folders"]=="True")
91 | 


--------------------------------------------------------------------------------
/utils/blob_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def attempt_get_blob(blob_credentials, blob_name, blob_dest):
 4 |     if blob_credentials is None:
 5 |         print ("blob_credentials is None, can not get blob")
 6 |         return  False
 7 |     blob_service, container_name = blob_credentials
 8 |     is_successful = False
 9 |     print("Dest: {0}".format(blob_dest))
10 |     try:
11 |         blob_service.get_blob_to_path(container_name, blob_name, blob_dest)
12 |         is_successful = True
13 |     except:
14 |         print("Error when getting blob")
15 |         print("Src: {0} {1}".format(container_name, blob_name))
16 | 
17 | 
18 |     return is_successful
19 | 
20 | 


--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Config():
 3 |     @staticmethod
 4 |     def parse_file(file_name):
 5 |         config = {}
 6 |         with open(file_name) as file_:
 7 |             for line in file_:
 8 |                 line = line.strip()
 9 |                 if line and line[0] is not "#":
10 |                     var,value = line.split('=', 1)
11 |                     config[var.strip()] = value.strip()
12 | 
13 |         return config
14 | 


--------------------------------------------------------------------------------
/utils/convert_to_jpeg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | 
 4 | curpath = os.getcwd()
 5 | for root, dirs, files in os.walk(curpath, topdown=False):
 6 |     for name in files:
 7 |         if os.path.splitext(os.path.join(root, name))[1].lower() == ".tif":
 8 |             reldir = os.path.relpath(root, curpath)
 9 |             if os.path.isfile(os.path.splitext(os.path.join(curpath, "JPEG", reldir, name))[0] + ".jpg"):
10 |                 print("A jpeg file already exists for %s" % name)
11 |             else:
12 |                 outfile = os.path.splitext(os.path.join(curpath, "JPEG", reldir, name))[0] + ".jpg"
13 |                 if not os.path.exists(os.path.join(curpath, "JPEG", reldir)):
14 |                     os.makedirs(os.path.join(curpath, "JPEG", reldir))
15 |                 try:
16 |                     im = Image.open(os.path.join(root, name))
17 |                     print("Generating jpeg for %s" % name)
18 |                     im.thumbnail(im.size)
19 |                     im.save(outfile, "JPEG", quality=95)
20 |                 except:
21 |                     print("Error")
22 |         else:
23 |             print(os.path.join(root, name) + " is not a tiff file.")


--------------------------------------------------------------------------------
/utils/decode_tf_record.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from pathlib import Path
 4 | import cv2
 5 | import csv
 6 | 
 7 | def decode_record(record_file, output_folder):
 8 |     output_folder = Path(output_folder)
 9 |     output_folder.mkdir(parents=True, exist_ok=True)
10 |     output_file = output_folder/"output.csv"
11 |     record_iterator = tf.python_io.tf_record_iterator(record_file)
12 |     for string_record in record_iterator:
13 |         example = tf.train.Example()
14 |         example.ParseFromString(string_record)
15 |         filename = example.features.feature['image/filename'].bytes_list.value[0].decode("utf-8")
16 |         height = int(example.features.feature['image/height'].int64_list.value[0])
17 |         width = int(example.features.feature['image/width'].int64_list.value[0])
18 |         xmins = example.features.feature['image/object/bbox/xmin'].float_list.value 
19 |         ymins = example.features.feature['image/object/bbox/ymin'].float_list.value 
20 |         xmaxs = example.features.feature['image/object/bbox/xmax'].float_list.value 
21 |         ymaxs = example.features.feature['image/object/bbox/ymax'].float_list.value
22 |         classes = example.features.feature['image/object/class/text'].bytes_list.value
23 |         img_raw = (example.features.feature['image/encoded'].bytes_list.value[0])
24 |         img_raw = np.fromstring(img_raw, dtype=np.uint8)
25 |         cv2_image = cv2.imdecode(img_raw, cv2.IMREAD_COLOR)
26 |         cv2.imwrite(str(output_folder/(filename+".JPG")),cv2_image)
27 |         with output_file.open('a') as out_csv:
28 |             tagwriter = csv.writer(out_csv)
29 |             for xmin, ymin, xmax, ymax, class_raw in zip(xmins, ymins, xmaxs, ymaxs, classes):
30 |                 tagwriter.writerow([filename,class_raw.decode("utf-8"),float(xmin),float(xmax),float(ymin),float(ymax),height,width])
31 | if __name__ == "__main__":
32 |     import sys
33 |     if len(sys.argv)<3:
34 |         raise ValueError("Need to specify input file and output folder")
35 |     input_file = sys.argv[1]
36 |     output_folder = sys.argv[2]
37 |     decode_record(input_file, output_folder)
38 | 


--------------------------------------------------------------------------------
/utils/repartition_test_set.sh:
--------------------------------------------------------------------------------
 1 | # Separate test set
 2 | filearr=($(shuf -e $(find "$image_dir" -type f -name $filetype)))
 3 | test_num=$(echo "scale=0;${#filearr[@]}*${test_percentage}/1" | bc)
 4 | 
 5 | mkdir -p ${image_dir}/test
 6 | mkdir -p ${image_dir}/train
 7 | # Separate test set
 8 | filearr=($(shuf -e $(ls -pL $image_dir | grep -v /)))
 9 | split=$(echo "scale=0;${#filearr[@]}*${train_percentage}/1" | bc)
10 | for i in "${!filearr[@]}"; do
11 |   if (("$i" < "$split")); then
12 |     mv ${image_dir}/${filearr[$i]} ${image_dir}/train/${filearr[$i]}
13 |   else
14 |     mv ${image_dir}/${filearr[$i]} ${image_dir}/test/${filearr[$i]}    
15 |   fi
16 | done
17 | 
18 | printf "%s\n" "${filearr[@]:0:$test_num}" > ${image_dir}/test.txt
19 | az storage blob upload --container-name activelearninglabels --file ${image_dir}/test.txt --name test_$(date +%s).csv --account-name $AZURE_STORAGE_ACCOUNT --account-key $AZURE_STORAGE_KEY


--------------------------------------------------------------------------------