├── images
    ├── aims.png
    ├── data.png
    ├── masks.png
    └── gtVsPred.png
├── requirements.txt
├── actionCLSS_config.py
├── utilities
    ├── printPR.py
    ├── evaluation.py
    └── create_masks.py
├── test.py
├── actionCLSS_training.py
├── README.md
├── config.py
├── actionCLSS_dataset.py
├── testList.txt
├── validationList.txt
├── actionCLSS_dataset_partitioned.py
├── visualize.py
└── utils.py


/images/aims.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabarlacchi/MASK-CNN-for-actions-recognition-/HEAD/images/aims.png


--------------------------------------------------------------------------------
/images/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabarlacchi/MASK-CNN-for-actions-recognition-/HEAD/images/data.png


--------------------------------------------------------------------------------
/images/masks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabarlacchi/MASK-CNN-for-actions-recognition-/HEAD/images/masks.png


--------------------------------------------------------------------------------
/images/gtVsPred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabarlacchi/MASK-CNN-for-actions-recognition-/HEAD/images/gtVsPred.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | Pillow
 4 | cython
 5 | matplotlib
 6 | scikit-image
 7 | tensorflow>=1.3.0
 8 | keras>=2.0.8
 9 | opencv-python
10 | h5py
11 | imgaug
12 | IPython[all]


--------------------------------------------------------------------------------
/actionCLSS_config.py:
--------------------------------------------------------------------------------
 1 | from config import Config
 2 | 
 3 | class actionCLSS_Config(Config):
 4 | 	""" Configuration for training on the action oriented images dataset.
 5 | 		Derives from the base config class and overrides values specific to our own dataset.
 6 | 	"""
 7 | 
 8 | 	NAME = "SPORTS"
 9 | 
10 | 	# Train on 1 GPU and 8 images. We can put few images in a single GPU only if the images are small
11 | 	# At the start use 1 GPU and small images for a faster training. Set values in different way later
12 | 	GPU_COUNT = 1
13 | 	IMAGEG_PER_GPU = 8
14 | 
15 | 	DETECTION_MIN_CONFIDENCE = 0.7
16 | 	LEARNING_RATE = 0.0005
17 | 
18 | 	NUM_CLASSES = 1+24
19 | 
20 | 	# Use small images for faster training. Set values in different way later
21 | 	IMAGE_MIN_DIM = 240
22 | 	IMAGE_MAX_DIM = 320
23 | 
24 | 	# The anchors for training bounding boxes
25 | 	RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
26 | 
27 | 	# Reduce ROIS 'cause the smaller images and we have only few objects in the images
28 | 	TRAIN_ROIS_PER_IMAGE = 32
29 | 
30 | 	# for now we set few steps for a faster training
31 | 	STEPS_PER_EPOCH = 10000
32 | 
33 | 	# the same as above
34 | 	VALIDATION_STEPS = 8000
35 | 
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/utilities/printPR.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | # class_ids = pickle.load(open("evaluationVars/class_ids.pkl", "rb"))
 6 | # scores = pickle.load(open("evaluationVars/scores.pkl", "rb"))
 7 | # pred_matches = pickle.load(open("evaluationVars/pred_matches.pkl", "rb"))
 8 | # gt_matches = pickle.load(open("evaluationVars/gt_matches.pkl", "rb"))
 9 | 
10 | class_ids = pickle.load(open("evaluationVarsM/class_ids.pkl", "rb"))
11 | scores = pickle.load(open("evaluationVarsM/scores.pkl", "rb"))
12 | pred_matches = pickle.load(open("evaluationVarsM/pred_matches.pkl", "rb"))
13 | gt_matches = pickle.load(open("evaluationVarsM/gt_matches.pkl", "rb"))
14 | 
15 | class_names = ['Background', 'WalkingWithDog', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving',
16 |                 'Fencing', 'FloorGymnastics', 'GolfSwing','HorseRiding', 'IceDancing', 'LongJump',
17 |                 'PoleVault', 'RopeClimbing', 'SalsaSpin','SkateBoarding', 'Skiing', 'Skijet',
18 |                 'SoccerJuggling', 'Surfing', 'TennisSwing','TrampolineJumping', 'VolleyballSpiking', 'Basketball']
19 | 
20 | targetClass = 3
21 | 
22 | target_ids = []
23 | target_scores = []
24 | target_pred_m = []
25 | target_gt_len = 0
26 | 
27 | # fro every image
28 | for i in range( len(class_ids) ):
29 | 	# for evry detected subject
30 | 	aBool = False
31 | 	for j in range( len(class_ids[i]) ):
32 | 		# if is of the target class
33 | 		if class_ids[i][j] == targetClass:
34 | 			# append the relative score and pred_match to global vectors
35 | 			target_scores.append(scores[i][j])
36 | 			target_pred_m.append(pred_matches[i][j])
37 | 			# TO CHECK... Lot's of doubt here
38 | 			aBool = True
39 | 
40 | 	if aBool:
41 | 		target_gt_len = target_gt_len + len(gt_matches[i])
42 | 
43 | 
44 | # sort prediction and scores by scores
45 | score, pm = zip(*sorted(zip(target_scores, target_pred_m)))
46 | # Revert array cause the sort is ascending
47 | score = list(reversed(score))
48 | pm = list(reversed(pm))
49 | 
50 | # Compute precision and recall
51 | precisions = np.cumsum(pm) / (np.arange(len(pm)) + 1)
52 | recalls = np.cumsum(pm).astype(np.float32) / target_gt_len
53 | 
54 | #pad with starting and finisching values
55 | precisions = np.concatenate([[0], precisions, [0]])
56 | recalls = np.concatenate([[0], recalls, [1]])
57 | 
58 | # print(precisions)
59 | # print(recalls)
60 | 
61 | # Plot Graph
62 | plt.plot(recalls, precisions)
63 | plt.title( class_names[targetClass] )
64 | plt.ylabel('Precision')
65 | plt.xlabel('Recall')
66 | plt.show()


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | matplotlib.use('TkAgg')
  3 | 
  4 | 
  5 | import os
  6 | import sys
  7 | import random
  8 | import math
  9 | import re
 10 | import time
 11 | import numpy as np
 12 | import cv2
 13 | #import matplotlib
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from actionCLSS_config import actionCLSS_Config
 17 | from actionCLSS_dataset import ShapesDataset
 18 | import utils
 19 | import config
 20 | import model as modellib
 21 | import visualize
 22 | from model import log
 23 | from PIL import Image
 24 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 25 | os.environ["CUDA_VISIBLE_DEVICES"]=""
 26 | # Root directory of the project
 27 | ROOT_DIR = os.getcwd()
 28 | 
 29 | 
 30 | def get_ax(rows=1, cols=1, size=8):
 31 |     """Return a Matplotlib Axes array to be used in
 32 |     all visualizations in the notebook. Provide a
 33 |     central point to control graph sizes.
 34 | 
 35 |     Change the default size attribute to control the size
 36 |     of rendered images
 37 |     """
 38 |     _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
 39 |     return ax
 40 | 
 41 | 
 42 | # Directory to save logs and trained model
 43 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/tr_heads_nativ_nativ_0.0005||2018-07-16")
 44 | # MODEL_DIR = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 45 | config = actionCLSS_Config()
 46 | config.display()
 47 | 
 48 | dataset_test = ShapesDataset()
 49 | dataset_test.load_shapes(10, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], purpose='test')
 50 | dataset_test.prepare()
 51 | 
 52 | class InferenceConfig(actionCLSS_Config):
 53 |     GPU_COUNT = 1
 54 |     IMAGES_PER_GPU = 1
 55 | 
 56 | inference_config = InferenceConfig()
 57 | 
 58 | # Recreate the model in inference mode
 59 | model = modellib.MaskRCNN(mode="inference",
 60 |                            config=inference_config,
 61 |                            model_dir=MODEL_DIR)
 62 | 
 63 | model_path = model.find_last()[1]
 64 | model.load_weights(model_path, by_name=True)
 65 | 
 66 | image_id = random.choice(dataset_test.image_ids)
 67 | original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
 68 |     modellib.load_image_gt(dataset_test, inference_config,
 69 |                             image_id, use_mini_mask=False)
 70 | 
 71 | # USE HD IMAGES
 72 | # hd_img = plt.imread('HDimages_2/'+random.choice([img for img in os.listdir('HDimages_2/') if not img.endswith('.DS_Store')]))
 73 | 
 74 | hd_img = plt.imread('HDimages_2/skate.jpg')
 75 | 
 76 | hd_img , hd_window, hd_scale, hd_padding = utils.resize_image(
 77 |                                                 hd_img,
 78 |                                                 min_dim=config.IMAGE_MIN_DIM,
 79 |                                                 max_dim=config.IMAGE_MAX_DIM,
 80 |                                                 padding=config.IMAGE_PADDING)
 81 | # END HD IMAGES
 82 | results = model.detect([hd_img], verbose=1)
 83 | 
 84 | 
 85 | r = results[0]
 86 | N = r['rois'].shape[0]
 87 | 
 88 | for i in range(N):
 89 |     class_id = r['class_ids'][i]
 90 |     score = r['scores'][i] if r['scores'] is not None else None
 91 |     label = dataset_test.class_names[class_id]
 92 |     print(str(label) + " – " +str(score))
 93 | 
 94 | 
 95 | class_names = ['Background', 'WalkingWithDog', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving',
 96 |                 'Fencing', 'FloorGymnastics', 'GolfSwing','HorseRiding', 'IceDancing', 'LongJump',
 97 |                 'PoleVault', 'RopeClimbing', 'SalsaSpin','SkateBoarding', 'Skiing', 'Skijet',
 98 |                 'SoccerJuggling', 'Surfing', 'TennisSwing','TrampolineJumping', 'VolleyballSpiking', 'Basketball']
 99 | 
100 | visualize.display_instances(hd_img, r['rois'], r['masks'], r['class_ids'],
101 |                               class_names, r['scores'], ax=get_ax())
102 | 
103 | 


--------------------------------------------------------------------------------
/utilities/evaluation.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | matplotlib.use('TkAgg')
  3 | 
  4 | import os
  5 | import pickle
  6 | import sys
  7 | import random
  8 | import math
  9 | import re
 10 | import time
 11 | import numpy as np
 12 | import cv2
 13 | #import matplotlib
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from actionCLSS_config import actionCLSS_Config
 17 | from actionCLSS_dataset_partitioned import ShapesDatasetPartitioned
 18 | import utils
 19 | import config
 20 | import model as modellib
 21 | import visualize
 22 | from model import log
 23 | from PIL import Image
 24 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 25 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
 26 | # Root directory of the project
 27 | ROOT_DIR = os.getcwd()
 28 | 
 29 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/tr_heads_nativ_nativ_0.0005||2018-07-16")
 30 | config = actionCLSS_Config()
 31 | 
 32 | dataset_test = ShapesDatasetPartitioned()
 33 | # Quanto è grande il test set?
 34 | dataset_test.load_test_shapes()
 35 | dataset_test.prepare()
 36 | 
 37 | class_names = ['WalkingWithDog', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving',
 38 |                 'Fencing', 'FloorGymnastics', 'GolfSwing','HorseRiding', 'IceDancing', 'LongJump',
 39 |                 'PoleVault', 'RopeClimbing', 'SalsaSpin','SkateBoarding', 'Skiing', 'Skijet',
 40 |                 'SoccerJuggling', 'Surfing', 'TennisSwing','TrampolineJumping', 'VolleyballSpiking', 'Basketball', 'Background']
 41 | 
 42 | class InferenceConfig(actionCLSS_Config):
 43 |     GPU_COUNT = 1
 44 |     IMAGES_PER_GPU = 1
 45 | 
 46 | inference_config = InferenceConfig()
 47 | 
 48 | # Recreate the model in inference mode
 49 | model = modellib.MaskRCNN(mode="inference",
 50 |                            config=inference_config,
 51 |                            model_dir=MODEL_DIR)
 52 | 
 53 | model_path = model.find_last()[1]
 54 | model.load_weights(model_path, by_name=True)
 55 | 
 56 | 
 57 | # Compute VOC-Style mAP @ IoU=0.5
 58 | # compute the entire dataset
 59 | image_ids = dataset_test.image_ids
 60 | APs = []
 61 | 
 62 | ev_ids = []
 63 | ev_scores = []
 64 | ev_pred_match = []
 65 | ev_gt_match = []
 66 | for image_id in image_ids:
 67 |     # Load image and ground truth data
 68 |     image, image_meta, gt_class_id, gt_bbox, gt_mask = \
 69 |         modellib.load_image_gt(dataset_test, inference_config,
 70 |                                image_id, use_mini_mask=False)
 71 |     molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
 72 |     # Run object detection
 73 |     results = model.detect([image], verbose=0)
 74 |     r = results[0]
 75 | 
 76 |     #IF PREDICT SOMETHING
 77 |     if not r['masks'].shape[0] == 0:
 78 | 
 79 |         # Compute AP
 80 |         AP, precisions, recalls, overlaps, pred_match, gt_match = \
 81 |             utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
 82 |                              r["rois"], r["class_ids"], r["scores"], r["masks"], iou_threshold=0.25)
 83 | 
 84 |         ev_ids.append(r['class_ids'])
 85 |         ev_scores.append(r['scores'])
 86 |         ev_pred_match.append(pred_match)
 87 |         ev_gt_match.append(gt_match)
 88 |         # print((gt_match))
 89 |         APs.append(AP)
 90 | 
 91 |     if image_id%500 == 1 or image_id == len(image_ids):
 92 |         print(image_id)
 93 |         with open('evaluationVarsM/class_ids.pkl', 'wb') as f:
 94 |             pickle.dump(ev_ids, f)
 95 | 
 96 |         with open('evaluationVarsM/scores.pkl', 'wb') as f:
 97 |             pickle.dump(ev_scores, f)
 98 | 
 99 |         with open('evaluationVarsM/pred_matches.pkl', 'wb') as f:
100 |             pickle.dump(ev_pred_match, f)
101 | 
102 |         with open('evaluationVarsM/gt_matches.pkl', 'wb') as f:
103 |             pickle.dump(ev_gt_match, f)
104 | 
105 | 
106 | 
107 | print("mAP @ IoU=50: ", np.mean(APs))
108 | 
109 | 


--------------------------------------------------------------------------------
/actionCLSS_training.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | matplotlib.use('TkAgg')
  3 | import os
  4 | import sys
  5 | import random
  6 | import math
  7 | import re
  8 | import time
  9 | import numpy as np
 10 | import cv2
 11 | import matplotlib.pyplot as plt
 12 | import utils
 13 | import config
 14 | from PIL import Image
 15 | from datetime import date
 16 | 
 17 | # from the project
 18 | from actionCLSS_config import actionCLSS_Config
 19 | from actionCLSS_dataset import ShapesDataset
 20 | from actionCLSS_dataset_partitioned import ShapesDatasetPartitioned
 21 | import model as modellib
 22 | import visualize
 23 | from model import log
 24 | 
 25 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 26 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 27 | # Root directory of the project
 28 | ROOT_DIR = os.getcwd()
 29 | 
 30 | # ! SAVING MODELS ! #
 31 | #   passing data to the function after loaded the images
 32 | #   If fold_name is empty ('') a default one are given. See below
 33 | 
 34 | def save_model(LAYERS, N_TR_IMGS, N_VAL_IMGS, learninRate, fold_name='', dir='logs/'):
 35 |     path = []
 36 |     # compose the name with defaults values. Add info or not below
 37 |     path.append(dir)
 38 |     path.append('tr_')
 39 |     path.append(str(LAYERS))
 40 |     path.append('_')
 41 |     path.append(str(N_TR_IMGS))
 42 |     path.append('_')
 43 |     path.append(str(N_VAL_IMGS))
 44 |     path.append('_')
 45 |     path.append(str(learninRate))
 46 |     path.append('||')
 47 |     if fold_name == '':
 48 |         path.append(str(date.today()))
 49 |     else:
 50 |         path.append(fold_name)
 51 |     full_path = ''.join(path)
 52 | 
 53 |     try:
 54 |         os.makedirs(full_path)
 55 |     except OSError:
 56 |         if not os.path.isdir(full_path):
 57 |             raise
 58 | 
 59 | 
 60 |     return full_path
 61 | 
 62 | 
 63 | def get_ax(rows=1, cols=1, size=8):
 64 |     """Return a Matplotlib Axes array to be used in
 65 |     all visualizations in the notebook. Provide a
 66 |     central point to control graph sizes.
 67 | 
 68 |     Change the default size attribute to control the size
 69 |     of rendered images
 70 |     """
 71 |     _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
 72 |     return ax
 73 | 
 74 | # Local path to trained weights file
 75 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 76 | 
 77 | config = actionCLSS_Config()
 78 | config.display()
 79 | 
 80 | TARGET_LAYERS = 'heads'
 81 | 
 82 | 
 83 | # training dataset
 84 | N_TRAIN_IMGS = 'nativ'
 85 | dataset_train = ShapesDatasetPartitioned()
 86 | dataset_train.load_train_shapes()
 87 | dataset_train.prepare()
 88 | 
 89 | # Validation dataset
 90 | N_VAL_IMGS = 'nativ'
 91 | dataset_val = ShapesDatasetPartitioned()
 92 | dataset_val.load_val_shapes()
 93 | dataset_val.prepare()
 94 | 
 95 | # Create directory to save logs and trained model
 96 | full_path = save_model(TARGET_LAYERS, N_TRAIN_IMGS, N_VAL_IMGS, config.LEARNING_RATE, '')
 97 | MODEL_DIR = os.path.join(ROOT_DIR, full_path)
 98 | print('MODEL SAVED PATH:' + str(full_path))
 99 | 
100 | # Create model in training mode
101 | model = modellib.MaskRCNN(mode="training", config=config,
102 |                           model_dir=MODEL_DIR)
103 | 
104 | # Which weights to start with?
105 | init_with = "coco"
106 | #init_with = "last"
107 | if init_with == "coco":
108 | 	model.load_weights(COCO_MODEL_PATH, by_name=True,
109 |                        exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
110 |                                  "mrcnn_bbox", "mrcnn_mask"])
111 | 
112 | 
113 | elif init_with == "last":
114 | 	# Load the last model you trained and continue training
115 |     model.load_weights(model.find_last()[1], by_name=True)
116 | 
117 | model.train(dataset_train, dataset_val,
118 |             learning_rate=config.LEARNING_RATE,
119 |             epochs=20,
120 |             layers=TARGET_LAYERS)
121 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MASK R-CNN  sport actions fine tuning
  2 | 
  3 | Mask R-CNN is a powerful framework developed by facebook ([here more](https://arxiv.org/abs/1703.06870)), main features are:
  4 | - General and flexible for object instance segmentation 
  5 | - Part of Detectron, a state-of-the-art object detection algorithm collection
  6 | - Powered in python and natively Coffe2 
  7 | - Also available a [Keras + TensorFlow version](https://github.com/matterport/Mask_RCNN) (here you can find all the requirements)
  8 | 
  9 | ## Aims of this repo
 10 | 
 11 | Address the network towards Sport activities detection using fine tuning technique.
 12 | We want that the new will be able to detect only main subjects labelled as activity name (i.e. only people)
 13 | 
 14 | ![Aims](https://github.com/barloccia/MASK-CNN-for-actions-recognition-/blob/master/images/aims.png)
 15 | 
 16 | ## Dataset
 17 | 
 18 | Dataset used is ucf24 (subset of ucf101): a set for Action Recognition that consists of 3194 videos from 4 kind of action. Resolution of each video is 320x240 px.
 19 | We intending to work frame-by-frame, and also need an annotated groundtruth:
 20 | A frame annotated version of this dataset it's available from this [repo by Gurkit](https://github.com/gurkirt/realtime-action-detection)
 21 | #### but:
 22 | Not whole dataset is annotated, only “frame of interest”: this produces the 70% of useless data.
 23 | Only bboxes groundtruth is available and no masks are annotated: so we produced a mask gt by ourselves.
 24 | 
 25 | ![Data distribution](https://github.com/barloccia/MASK-CNN-for-actions-recognition-/blob/master/images/data.png)
 26 | 
 27 | 
 28 | ## Getting Started
 29 | Here we propose a bief explenation of the files and their usages (we strongly refer to ucf24 dataset above mentioned!):
 30 | Coco weights used are available [here](https://arxiv.org/abs/1703.06870)
 31 | 
 32 | - **actionCLSS_config.py**: extends and override net configuration.
 33 | - **actionCLSS_dataset.py** and **actionCLSS_dataset_partitioned.py**: offers two dataset classes: the first can be instantiated specifying the number of samples which compose it, the second read the samples from `testList.txt` and `validationList.txt`.
 34 | - **actionCLSS_training.py**: obviouslly, is the routine that manage the train.
 35 | - **evaluation.py** : evaluate the model on the whole testSet and iteratively save local results.
 36 | - **printPR.py**: use results produced by `evaluation.py` to compute Precision and Recall for each class.
 37 | - **createMasks.py**: produce person masks for each frame of the dataset, like exposed below.
 38 | 
 39 | ![Mask Groundthrut generation](https://github.com/barloccia/MASK-CNN-for-actions-recognition-/blob/master/images/masks.png)
 40 | 
 41 | ## Results
 42 | 
 43 | A brief argue can be over the divergence between a quantitative and a qualitative analysis on the maks and bb produced.
 44 | Below an example is showed: predictedion surclass the groundtruth, but numerically this means a penalization!
 45 | ![Qualitative Vs Quantitative](https://github.com/barloccia/MASK-CNN-for-actions-recognition-/blob/master/images/gtVsPred.png)
 46 | 
 47 | - mAP without considering masks:  84.5%
 48 | - mAP considering masks IoU=25: 37.4%
 49 | - mAP considering masks IoU=50:  28.7%
 50 | 
 51 | | Class        | No Mask           | IoU = 25  | IoU = 50  |
 52 | | ------------ |:-----------------:| :--------:| :--------:|
 53 | | WalkingWithDog	| 85.8% | 57.2% | 48.9% |
 54 | | BasketballDunk	| 62.1% | 1.7% | 0.2% |
 55 | | Biking	| 92.4% | 38.3% | 27.5% |
 56 | | CliffDiving	| 22.7% | 3.2% | 0.0% |
 57 | | CricketBowling	| 47.2% | 3.8% | 2.7% |
 58 | | Diving	| 83.0% | 2.3% | 1.4% |
 59 | | Fencing	| 97.9% | 19.5% | 14.0% |
 60 | | FloorGymnastics	| 64.8% | 34.0% | 28.5% |
 61 | | GolfSwing	| 81.0% | 71.8% | 67.6% |
 62 | | HorseRiding	| 95.3% | 27.7% | 16.2% |
 63 | | IceDancing	| 93.7% | 68.8% | 64.3% |
 64 | | LongJump	| 59.9% | 25.1% | 22.1% |
 65 | | PoleVault	| 54.7% | 2.6% | 1.6% |
 66 | | RopeClimbing	| 90.6% | 30.8% | 20.5% |
 67 | | SalsaSpin	| 86.4% | 48.5% | 22.7% |
 68 | | SkateBoarding	| 86.7% | 46.9% | 34.2% |
 69 | | Skiing	| 80.7% | 46.3% | 37.2% |
 70 | | Skijet	| 87.8% | 21.9% | 13.0% |
 71 | | SoccerJuggling	| 85.8% | 58.3% | 52.8% |
 72 | | Surfing	| 78.2% | 18.1% | 12.7% |
 73 | | TennisSwing	| 64.9% | 59.3% | 56.1% |
 74 | | TrampolineJumping	| 83.5% | 16.3% | 13.8% |
 75 | | VolleyballSpiking	| 39.5% | 0.7% | 0.3% |
 76 | 
 77 | ## Requirements
 78 | Python 3.4, TensorFlow 1.3, Keras 2.0.8 and other common packages listed in `requirements.txt`.
 79 | 
 80 | ### MS COCO Requirements:
 81 | To train or test on MS COCO, you'll also need:
 82 | * pycocotools (installation instructions below)
 83 | * [MS COCO Dataset](http://cocodataset.org/#home)
 84 | * Download the 5K [minival](https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0)
 85 |   and the 35K [validation-minus-minival](https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0)
 86 |   subsets. More details in the original [Faster R-CNN implementation](https://github.com/rbgirshick/py-faster-rcnn/blob/master/data/README.md).
 87 | 
 88 | If you use Docker, the code has been verified to work on
 89 | [this Docker container](https://hub.docker.com/r/waleedka/modern-deep-learning/).
 90 | 
 91 | 
 92 | ## Installation
 93 | 1. Install dependencies
 94 |    ```bash
 95 |    pip3 install -r requirements.txt
 96 |    ```
 97 | 2. Clone this repository
 98 | 3. Run setup from the repository root directory
 99 |     ```bash
100 |     python3 setup.py install
101 |     ``` 
102 | 3. Download pre-trained COCO weights (mask_rcnn_coco.h5) from the [releases page](https://github.com/matterport/Mask_RCNN/releases).
103 | 4. (Optional) To train or test on MS COCO install `pycocotools` from one of these repos. They are forks of the original pycocotools with fixes for Python3 and Windows (the official repo doesn't seem to be active anymore).
104 | 
105 |     * Linux: https://github.com/waleedka/coco
106 |     * Windows: https://github.com/philferriere/cocoapi.
107 |     You must have the Visual C++ 2015 build tools on your path (see the repo for additional details)


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Base Configuration Class
 15 | # Don't use this class directly. Instead, sub-class it and override
 16 | # the configurations you need to change.
 17 | 
 18 | class Config(object):
 19 |     """Base configuration class. For custom configurations, create a
 20 |     sub-class that inherits from this one and override properties
 21 |     that need to be changed.
 22 |     """
 23 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 24 |     # Useful if your code needs to do things differently depending on which
 25 |     # experiment is running.
 26 |     NAME = None  # Override in sub-classes
 27 | 
 28 |     # NUMBER OF GPUs to use. For CPU training, use 1
 29 |     GPU_COUNT = 1
 30 | 
 31 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 32 |     # handle 2 images of 1024x1024px.
 33 |     # Adjust based on your GPU memory and image sizes. Use the highest
 34 |     # number that your GPU can handle for best performance.
 35 |     IMAGES_PER_GPU = 1
 36 | 
 37 |     # Number of training steps per epoch
 38 |     # This doesn't need to match the size of the training set. Tensorboard
 39 |     # updates are saved at the end of each epoch, so setting this to a
 40 |     # smaller number means getting more frequent TensorBoard updates.
 41 |     # Validation stats are also calculated at each epoch end and they
 42 |     # might take a while, so don't set this too small to avoid spending
 43 |     # a lot of time on validation stats.
 44 |     STEPS_PER_EPOCH = 10
 45 | 
 46 |     # Number of validation steps to run at the end of every training epoch.
 47 |     # A bigger number improves accuracy of validation stats, but slows
 48 |     # down the training.
 49 |     VALIDATION_STEPS = 50
 50 | 
 51 |     # Backbone network architecture
 52 |     # Supported values are: resnet50, resnet101
 53 |     BACKBONE = "resnet101"
 54 | 
 55 |     # The strides of each layer of the FPN Pyramid. These values
 56 |     # are based on a Resnet101 backbone.
 57 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 58 | 
 59 |     # Number of classification classes (including background)
 60 |     NUM_CLASSES = 1  # Override in sub-classes
 61 | 
 62 |     # Length of square anchor side in pixels
 63 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 64 | 
 65 |     # Ratios of anchors at each cell (width/height)
 66 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 67 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 68 | 
 69 |     # Anchor stride
 70 |     # If 1 then anchors are created for each cell in the backbone feature map.
 71 |     # If 2, then anchors are created for every other cell, and so on.
 72 |     RPN_ANCHOR_STRIDE = 1
 73 | 
 74 |     # Non-max suppression threshold to filter RPN proposals.
 75 |     # You can reduce this during training to generate more propsals.
 76 |     RPN_NMS_THRESHOLD = 0.7
 77 | 
 78 |     # How many anchors per image to use for RPN training
 79 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 80 | 
 81 |     # ROIs kept after non-maximum supression (training and inference)
 82 |     POST_NMS_ROIS_TRAINING = 2000
 83 |     POST_NMS_ROIS_INFERENCE = 1000
 84 | 
 85 |     # If enabled, resizes instance masks to a smaller size to reduce
 86 |     # memory load. Recommended when using high-resolution images.
 87 |     USE_MINI_MASK = True
 88 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 89 | 
 90 |     # Input image resing
 91 |     # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
 92 |     # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
 93 |     # be satisfied together the IMAGE_MAX_DIM is enforced.
 94 |     IMAGE_MIN_DIM = 240
 95 |     IMAGE_MAX_DIM = 320
 96 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 97 |     IMAGE_PADDING = True  # currently, the False option is not supported
 98 | 
 99 |     # Image mean (RGB)
100 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
101 | 
102 |     # Number of ROIs per image to feed to classifier/mask heads
103 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
104 |     # enough positive proposals to fill this and keep a positive:negative
105 |     # ratio of 1:3. You can increase the number of proposals by adjusting
106 |     # the RPN NMS threshold.
107 |     TRAIN_ROIS_PER_IMAGE = 200
108 | 
109 |     # Percent of positive ROIs used to train classifier/mask heads
110 |     ROI_POSITIVE_RATIO = 0.33
111 | 
112 |     # Pooled ROIs
113 |     POOL_SIZE = 7
114 |     MASK_POOL_SIZE = 14
115 |     MASK_SHAPE = [28, 28]
116 | 
117 |     # Maximum number of ground truth instances to use in one image
118 |     MAX_GT_INSTANCES = 100
119 | 
120 |     # Bounding box refinement standard deviation for RPN and final detections.
121 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
122 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
123 | 
124 |     # Max number of final detections
125 |     DETECTION_MAX_INSTANCES = 100
126 | 
127 |     # Minimum probability value to accept a detected instance
128 |     # ROIs below this threshold are skipped
129 |     DETECTION_MIN_CONFIDENCE = 0.7
130 | 
131 |     # Non-maximum suppression threshold for detection
132 |     DETECTION_NMS_THRESHOLD = 0.3
133 | 
134 |     # Learning rate and momentum
135 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
136 |     # weights to explode. Likely due to differences in optimzer
137 |     # implementation.
138 |     LEARNING_RATE = 0.001
139 |     LEARNING_MOMENTUM = 0.9
140 | 
141 |     # Weight decay regularization
142 |     WEIGHT_DECAY = 0.0001
143 | 
144 |     # Use RPN ROIs or externally generated ROIs for training
145 |     # Keep this True for most situations. Set to False if you want to train
146 |     # the head branches on ROI generated by code rather than the ROIs from
147 |     # the RPN. For example, to debug the classifier head without having to
148 |     # train the RPN.
149 |     USE_RPN_ROIS = True
150 | 
151 |     def __init__(self):
152 |         """Set values of computed attributes."""
153 |         # Effective batch size
154 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
155 | 
156 |         # Input image size
157 |         self.IMAGE_SHAPE = np.array(
158 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
159 | 
160 |         # Compute backbone size from input image size
161 |         self.BACKBONE_SHAPES = np.array(
162 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
163 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
164 |              for stride in self.BACKBONE_STRIDES])
165 | 
166 |     def display(self):
167 |         """Display Configuration values."""
168 |         print("\nConfigurations:")
169 |         for a in dir(self):
170 |             if not a.startswith("__") and not callable(getattr(self, a)):
171 |                 print("{:30} {}".format(a, getattr(self, a)))
172 |         print("\n")
173 | 


--------------------------------------------------------------------------------
/utilities/create_masks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import random
  4 | import math
  5 | import numpy as np
  6 | import skimage.io
  7 | import matplotlib
  8 | import matplotlib.pyplot as plt
  9 | import scipy.misc
 10 | from PIL import Image
 11 | import cv2
 12 | 
 13 | # import coco
 14 | import utils
 15 | import model as modellib
 16 | import visualize
 17 | 
 18 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
 19 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 20 | # Root directory of the project
 21 | ROOT_DIR = os.getcwd()
 22 | 
 23 | # Directory to save logs and trained model
 24 | MODEL_DIR = os.path.join(ROOT_DIR, "logs")
 25 | 
 26 | # Local path to trained weights file
 27 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 28 | # Download COCO trained weights from Releases if needed
 29 | if not os.path.exists(COCO_MODEL_PATH):
 30 |     utils.download_trained_weights(COCO_MODEL_PATH)
 31 | 
 32 | # Directory of images to run detection on
 33 | # not interesting
 34 | IMAGE_DIR = os.path.join(ROOT_DIR, "images")
 35 | 
 36 | class InferenceConfig():
 37 |     NAME = "coco"
 38 |     GPU_COUNT = 1
 39 |     IMAGES_PER_GPU = 1
 40 |     STEPS_PER_EPOCH = 1000
 41 |     VALIDATION_STEPS = 50
 42 |     BACKBONE = "resnet101"
 43 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 44 |     NUM_CLASSES = 1 + 80  # Override in sub-classes
 45 |     # Length of square anchor side in pixels
 46 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 47 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 48 |     RPN_ANCHOR_STRIDE = 1
 49 |     # Non-max suppression threshold to filter RPN proposals.
 50 |     # You can reduce this during training to generate more propsals.
 51 |     RPN_NMS_THRESHOLD = 0.7
 52 |     # How many anchors per image to use for RPN training
 53 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 54 |     # ROIs kept after non-maximum supression (training and inference)
 55 |     POST_NMS_ROIS_TRAINING = 2000
 56 |     POST_NMS_ROIS_INFERENCE = 1000
 57 |     USE_MINI_MASK = True
 58 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 59 |     IMAGE_MIN_DIM = 800
 60 |     IMAGE_MAX_DIM = 1024
 61 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 62 |     IMAGE_PADDING = True  # currently, the False option is not supported
 63 |     # Image mean (RGB)
 64 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
 65 |     TRAIN_ROIS_PER_IMAGE = 200
 66 |     # Percent of positive ROIs used to train classifier/mask heads
 67 |     ROI_POSITIVE_RATIO = 0.33
 68 |     # Pooled ROIs
 69 |     POOL_SIZE = 7
 70 |     MASK_POOL_SIZE = 14
 71 |     MASK_SHAPE = [28, 28]
 72 |     # Maximum number of ground truth instances to use in one image
 73 |     MAX_GT_INSTANCES = 100
 74 |     # Bounding box refinement standard deviation for RPN and final detections.
 75 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
 76 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
 77 |     # Max number of final detections
 78 |     DETECTION_MAX_INSTANCES = 100
 79 |     # Minimum probability value to accept a detected instance
 80 |     # ROIs below this threshold are skipped
 81 |     DETECTION_MIN_CONFIDENCE = 0.7
 82 |     # Non-maximum suppression threshold for detection
 83 |     DETECTION_NMS_THRESHOLD = 0.3
 84 |     LEARNING_RATE = 0.001
 85 |     LEARNING_MOMENTUM = 0.9
 86 |     WEIGHT_DECAY = 0.0001
 87 |     USE_RPN_ROIS = True
 88 | 
 89 |     def __init__(self):
 90 |         """Set values of computed attributes."""
 91 |         # Effective batch size
 92 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
 93 | 
 94 |         # Input image size
 95 |         self.IMAGE_SHAPE = np.array(
 96 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
 97 | 
 98 |         # Compute backbone size from input image size
 99 |         self.BACKBONE_SHAPES = np.array(
100 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
101 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
102 |              for stride in self.BACKBONE_STRIDES])
103 | 
104 |     def display(self):
105 |         """Display Configuration values."""
106 |         print("\nConfigurations:")
107 |         for a in dir(self):
108 |             if not a.startswith("__") and not callable(getattr(self, a)):
109 |                 print("{:30} {}".format(a, getattr(self, a)))
110 |         print("\n")
111 | 
112 | def draw_shape(image, x1, y1, x2, y2, color):
113 |     """Draws a shape from the given specs."""
114 |     # Get the center x, y and the size s
115 |     cv2.rectangle(image, (x1, y1), (x2, y2), 255, -1)
116 |     image = image.reshape(240, 320)
117 |     return image
118 | 
119 | 
120 | config = InferenceConfig()
121 | #config.display()
122 | 
123 | # Create model object in inference mode.
124 | model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
125 | 
126 | # Load weights trained on MS-COCO
127 | model.load_weights(COCO_MODEL_PATH, by_name=True)
128 | 
129 | 
130 | # COCO Class names
131 | # Index of the class in the list is its ID. For example, to get ID of
132 | # the teddy bear class, use: class_names.index('teddy bear')
133 | class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
134 |                'bus', 'train', 'truck', 'boat', 'traffic light',
135 |                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
136 |                'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
137 |                'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
138 |                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
139 |                'kite', 'baseball bat', 'baseball glove', 'skateboard',
140 |                'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
141 |                'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
142 |                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
143 |                'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
144 |                'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
145 |                'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
146 |                'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
147 |                'teddy bear', 'hair drier', 'toothbrush']
148 | 
149 | 
150 | # load all the labelled images
151 | # activities = [dir for dir in os.listdir('ucf24_project/labels') if not dir.endswith('.DS_Store')]
152 | activities = ['WalkingWithDog', 'VolleyballSpiking']
153 | for act in activities:
154 |     videos = [dir for dir in os.listdir('ucf24_project/labels/'+str(act)) if not dir.endswith('.DS_Store')]
155 |     for vid in videos:
156 |         frames = [dir.replace('.txt', '.jpg') for dir in os.listdir('ucf24_project/labels/' + str(act)+ '/' + str(vid)) if dir.endswith('.txt')]
157 |         print( str(act) + ' ' + str(vid))
158 |         for frame in frames:
159 |             # get frame paht and set outupt path
160 |             frame_dir = 'ucf24_project/rgb-images/'+str(act)+'/'+str(vid)+'/'+frame
161 |             output_dir = 'ucf24_project/labels/'+str(act)+'/'+str(vid)+'/'
162 | 
163 |             # read the target frame
164 |             image = skimage.io.imread(frame_dir)
165 |             # run detectron on the image readed
166 |             results = model.detect([image], verbose=1)
167 |             # get the result
168 |             r = results[0]
169 |             # get the masks for the person classes
170 |             indexes = np.where(r['class_ids'] == 1)[0]
171 |             # collect the masks in the frame
172 |             masks = []
173 |             num_masks = 0
174 |             for ind in indexes:
175 |                 masks.append(r['masks'][:,:,num_masks]*255)
176 |                 num_masks = num_masks + 1
177 |             for k in range(num_masks):
178 |                 a_mask = masks[k]
179 |                 im = Image.fromarray(a_mask)
180 |                 im.save(output_dir+frame.replace('.jpg', '')+'mask_'+str(k)+".jpg")
181 | 


--------------------------------------------------------------------------------
/actionCLSS_dataset.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | 
  3 | matplotlib.use('TkAgg')
  4 | 
  5 | import utils
  6 | import random
  7 | import glob, os
  8 | # import math
  9 | import cv2
 10 | import csv
 11 | # from PIL import Image, ImageFont, ImageDraw, ImageEnhance
 12 | import numpy as np
 13 | import matplotlib.pyplot as plt
 14 | # import matplotlib.patches as patches
 15 | # import matplotlib.lines as lines
 16 | # from matplotlib.patches import Polygon
 17 | # import IPython.display
 18 | 
 19 | 
 20 | class ShapesDataset(utils.Dataset):
 21 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 22 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 23 |     The images are generated on the fly. No file access required.
 24 |     """
 25 |     with open('validationList.txt') as file:
 26 |         reserved_for_val = [video.replace('\n', '') for video in file.readlines()]
 27 |     with open('testList.txt') as file:
 28 |         reserved_for_test = [video.replace('\n', '') for video in file.readlines()]
 29 | 
 30 |     def load_shapes(self, count, height, width, purpose):
 31 |         # Training is True if the dataset is for training, False if it's for validation set
 32 |         """Generate the requested number of synthetic images.
 33 |         count: number of images to generate.
 34 |         height, width: the size of the generated images.
 35 |         """
 36 |         # Add classes
 37 |         self.add_class("sport", 1, "WalkingWithDog")
 38 |         self.add_class("sport", 2, "BasketballDunk")
 39 |         self.add_class("sport", 3, "Biking")
 40 |         self.add_class("sport", 4, "CliffDiving")
 41 |         self.add_class("sport", 5, "CricketBowling")
 42 |         self.add_class("sport", 6, "Diving")
 43 |         self.add_class("sport", 7, "Fencing")
 44 |         self.add_class("sport", 8, "FloorGymnastics")
 45 |         self.add_class("sport", 9, "GolfSwing")
 46 |         self.add_class("sport", 10, "HorseRiding")
 47 |         self.add_class("sport", 11, "IceDancing")
 48 |         self.add_class("sport", 12, "LongJump")
 49 |         self.add_class("sport", 13, "PoleVault")
 50 |         self.add_class("sport", 14, "RopeClimbing")
 51 |         self.add_class("sport", 15, "SalsaSpin")
 52 |         self.add_class("sport", 16, "SkateBoarding")
 53 |         self.add_class("sport", 17, "Skiing")
 54 |         self.add_class("sport", 18, "Skijet")
 55 |         self.add_class("sport", 19, "SoccerJuggling")
 56 |         self.add_class("sport", 20, "Surfing")
 57 |         self.add_class("sport", 21, "TennisSwing")
 58 |         self.add_class("sport", 22, "TrampolineJumping")
 59 |         self.add_class("sport", 23, "VolleyballSpiking")
 60 |         self.add_class("sport", 24, "Basketball")
 61 | 
 62 | 
 63 |         # Carico count immagini a caso
 64 |         for i in range(count):
 65 |             # Select random path of the folders
 66 |             root_labels = 'ucf24_project/labels'
 67 |             root_images = 'ucf24_project/rgb-images'
 68 |             activity = random.choice(self.class_info)['name']
 69 |             if(activity == 'BackGround'):
 70 |                 i = i - 1
 71 |             else:
 72 |                 while True:
 73 |                     video = random.choice([dir for dir in os.listdir(root_labels + '/' + activity) if not dir.endswith('.DS_Store')])
 74 |                     if purpose == "train" and video not in self.reserved_for_val and video not in self.reserved_for_test: break
 75 |                     elif purpose == "val" and video in self.reserved_for_val: break
 76 |                     elif purpose == "test" and video not in self.reserved_for_test: break
 77 | 
 78 |                 frame = random.choice([f for f in os.listdir(root_labels + '/' + activity + '/' + video) if f.endswith('.txt')])
 79 | 
 80 |                 URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
 81 |                 URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
 82 |                 #print(URL_image)
 83 |                 # open the file with the labels (class and bbox)
 84 |                 with open(URL_label) as file:
 85 |                     lines = file.readlines()
 86 |                     bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
 87 | 
 88 |                     # Orrible cast from string matrix to int matrix
 89 |                     for i in range(len(bounding_boxes)):
 90 |                         bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
 91 | 
 92 |                 #add image
 93 |                 self.add_image("sport", image_id=i, path=URL_image,
 94 |                                    width=320, height=240,
 95 |                                    bbox=bounding_boxes, action=activity)
 96 | 
 97 | 
 98 |     def load_image(self, image_id):
 99 | 
100 |         info = self.image_info[image_id]
101 |         image = plt.imread(info['path'])
102 | 
103 |         return image
104 | 
105 | 
106 |     def image_reference(self, image_id):
107 |         """Return the shapes data of the image."""
108 |         info = self.image_info[image_id]
109 |         if info["source"] == "shapes":
110 |             return info["shapes"]
111 |         else:
112 |             super(self.__class__).image_reference(self, image_id)
113 | 
114 | 
115 |     def load_mask(self, image_id):
116 |         """Generate instance masks for shapes of the given image ID.
117 |         """
118 |         info = self.image_info[image_id]
119 |         shapes = info['bbox']
120 |         masks_path = info['path'].replace(".jpg", "mask_").replace("rgb-images", "labels")
121 |         count = len(shapes)
122 |         # si crea una matrice 3D per ogni bbox. La terza dimensione è il numero di bounding.
123 |         # ogni maschera è un area di 0 su un blocco di 1 grosso come l'immagine (320x240)
124 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
125 | 
126 |         for i, (action, x1, y1, x2, y2) in enumerate(info['bbox']):
127 |             mask[:, :, i:i+1] = self.draw_shape(mask[:, :, i:i+1].copy(), x1, y1, x2, y2, 1).reshape(240, 320, 1)
128 |             # se ci sono le maschere...
129 |             if os.path.exists(masks_path +"0.jpg"):
130 |                 mask_counter, intersection_max = 0, -1
131 |                 # per ogni maschera
132 |                 while os.path.exists(masks_path + str(mask_counter) +".jpg"):
133 |                     mask_counter = mask_counter +1
134 |                     # leggo la maschera, essendo in jpg è compressa, ci sta che non tutti i valori siano 255
135 |                     # uso ">128" per filtrarla -> ottengo una matrice binaria
136 |                     current_mask = plt.imread(masks_path+"0.jpg") > 128
137 |                     # Calcolo l'intersezione col boundingbox e la percentuale di maschera dentro questo
138 |                     intersection = mask[:, :, i:i + 1] * current_mask.reshape(240, 320, 1)
139 |                     # the factors *1 and /255 needed in order to had a normalized score
140 |                     intersection_score = sum(sum(intersection/255)) / sum(sum(current_mask*1))
141 |                     # Se ho un nuovo massimo, metto da parte l'intersezione trovata
142 |                     if intersection_score > intersection_max:
143 |                         intersection_max = intersection_score
144 |                         final_mask = current_mask
145 |                 # Se ho avuto almeno un intersezione con il bbox
146 |                 if intersection_max > 0.1:
147 |                     # uncomment to save a mask, but seems good
148 |                     # print(intersection_max)
149 |                     aMask = mask[:, :, i:i+1]*final_mask.reshape(240, 320, 1)
150 |                     # plt.imsave("maskTest2/"+str(intersection_max)+"m.png", aMask.reshape(240, 320))
151 |                     # plt.imsave("maskTest2/"+str(intersection_max)+"i.png", plt.imread(info['path']))
152 |                     # plt.imsave("maskTest2/"+str(intersection_max)+"b.png", mask[:, :, i:i+1].reshape(240, 320))
153 |                     # aggiorno la maschera
154 |                     mask[:, :, i:i + 1] = mask[:, :, i:i+1]*final_mask.reshape(240, 320, 1)
155 | 
156 |         # prima poteva accadere che due maschere in scena fossero di classi diverse (e.g. tondo e quadrato)
157 |         # ora avremo che le maschere in scena fanno tutte parte della stessa attività
158 |         class_ids = np.array([self.class_names.index(info['action']) for i in range(count)])
159 | 
160 |         return mask, class_ids.astype(np.int32)
161 | 
162 | 
163 |     def draw_shape(self, image, x1, y1, x2, y2, color):
164 |         """Draws a shape from the given specs."""
165 |         # Get the center x, y and the size s
166 |         cv2.rectangle(image, (x1, y1), (x2, y2), 255, -1)
167 |         image = image.reshape(240, 320)
168 | 
169 |         return image
170 |     #
171 |     #
172 |     # def random_shape(self, height, width):
173 |     #     """Generates specifications of a random shape that lies within
174 |     #     the given height and width boundaries.
175 |     #     Returns a tuple of three valus:
176 |     #     * The shape name (square, circle, ...)
177 |     #     * Shape color: a tuple of 3 values, RGB.
178 |     #     * Shape dimensions: A tuple of values that define the shape size
179 |     #                         and location. Differs per shape type.
180 |     #     """
181 |     #     # Shape
182 |     #     shape = random.choice(["square", "circle", "triangle"])
183 |     #     # Color
184 |     #     color = tuple([random.randint(0, 255) for _ in range(3)])
185 |     #     # Center x, y
186 |     #     buffer = 20
187 |     #     y = random.randint(buffer, height - buffer - 1)
188 |     #     x = random.randint(buffer, width - buffer - 1)
189 |     #     # Size
190 |     #     s = random.randint(buffer, height // 4)
191 |     #     return shape, color, (x, y, s)
192 |     #
193 |     # def random_image(self, height, width):
194 |     #     """Creates random specifications of an image with multiple shapes.
195 |     #     Returns the background color of the image and a list of shape
196 |     #     specifications that can be used to draw the image.
197 |     #     """
198 |     #     # Pick random background color
199 |     #     bg_color = np.array([random.randint(0, 255) for _ in range(3)])
200 |     #     # Generate a few random shapes and record their
201 |     #     # bounding boxes
202 |     #     shapes = []
203 |     #     boxes = []
204 |     #     N = random.randint(1, 4)
205 |     #     for _ in range(N):
206 |     #         shape, color, dims = self.random_shape(height, width)
207 |     #         shapes.append((shape, color, dims))
208 |     #         x, y, s = dims
209 |     #         boxes.append([y - s, x - s, y + s, x + s])
210 |     #     # Apply non-max suppression wit 0.3 threshold to avoid
211 |     #     # shapes covering each other
212 |     #     keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3)
213 |     #     shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
214 |     #     return bg_color, shapes
215 | 


--------------------------------------------------------------------------------
/testList.txt:
--------------------------------------------------------------------------------
  1 | v_Basketball_g01_c01
  2 | v_Basketball_g01_c03
  3 | v_Basketball_g01_c05
  4 | v_Basketball_g01_c07
  5 | v_Basketball_g02_c02
  6 | v_Basketball_g02_c04
  7 | v_Basketball_g02_c06
  8 | v_Basketball_g03_c02
  9 | v_Basketball_g03_c04
 10 | v_Basketball_g03_c06
 11 | v_Basketball_g04_c02
 12 | v_Basketball_g04_c04
 13 | v_Basketball_g05_c02
 14 | v_Basketball_g05_c04
 15 | v_Basketball_g06_c02
 16 | v_Basketball_g06_c04
 17 | v_Basketball_g07_c02
 18 | v_Basketball_g07_c04
 19 | v_BasketballDunk_g01_c02
 20 | v_BasketballDunk_g01_c04
 21 | v_BasketballDunk_g01_c06
 22 | v_BasketballDunk_g02_c01
 23 | v_BasketballDunk_g02_c03
 24 | v_BasketballDunk_g03_c01
 25 | v_BasketballDunk_g03_c03
 26 | v_BasketballDunk_g03_c05
 27 | v_BasketballDunk_g04_c01
 28 | v_BasketballDunk_g04_c03
 29 | v_BasketballDunk_g05_c01
 30 | v_BasketballDunk_g05_c03
 31 | v_BasketballDunk_g05_c05
 32 | v_BasketballDunk_g06_c01
 33 | v_BasketballDunk_g06_c03
 34 | v_BasketballDunk_g07_c01
 35 | v_BasketballDunk_g07_c03
 36 | v_BasketballDunk_g07_c05
 37 | v_Biking_g01_c01
 38 | v_Biking_g01_c03
 39 | v_Biking_g02_c01
 40 | v_Biking_g02_c03
 41 | v_Biking_g02_c05
 42 | v_Biking_g02_c07
 43 | v_Biking_g03_c02
 44 | v_Biking_g03_c04
 45 | v_Biking_g04_c02
 46 | v_Biking_g04_c04
 47 | v_Biking_g05_c01
 48 | v_Biking_g05_c03
 49 | v_Biking_g05_c05
 50 | v_Biking_g05_c07
 51 | v_Biking_g06_c02
 52 | v_Biking_g06_c04
 53 | v_Biking_g07_c01
 54 | v_Biking_g07_c03
 55 | v_Biking_g07_c05
 56 | CliffDiving/v_CliffDiving_g01_c01
 57 | CliffDiving/v_CliffDiving_g01_c03
 58 | CliffDiving/v_CliffDiving_g01_c05
 59 | CliffDiving/v_CliffDiving_g02_c01
 60 | CliffDiving/v_CliffDiving_g02_c03
 61 | CliffDiving/v_CliffDiving_g03_c01
 62 | CliffDiving/v_CliffDiving_g03_c03
 63 | CliffDiving/v_CliffDiving_g03_c05
 64 | CliffDiving/v_CliffDiving_g04_c02
 65 | CliffDiving/v_CliffDiving_g04_c04
 66 | CliffDiving/v_CliffDiving_g05_c02
 67 | CliffDiving/v_CliffDiving_g05_c04
 68 | CliffDiving/v_CliffDiving_g05_c06
 69 | CliffDiving/v_CliffDiving_g06_c01
 70 | CliffDiving/v_CliffDiving_g06_c03
 71 | CliffDiving/v_CliffDiving_g06_c05
 72 | CliffDiving/v_CliffDiving_g06_c07
 73 | CliffDiving/v_CliffDiving_g07_c02
 74 | v_CliffDiving_g07_c04
 75 | v_CliffDiving_g07_c06
 76 | v_CricketBowling_g01_c02
 77 | v_CricketBowling_g01_c04
 78 | v_CricketBowling_g01_c06
 79 | v_CricketBowling_g02_c01
 80 | v_CricketBowling_g02_c03
 81 | v_CricketBowling_g02_c05
 82 | v_CricketBowling_g02_c07
 83 | v_CricketBowling_g03_c02
 84 | v_CricketBowling_g03_c04
 85 | v_CricketBowling_g04_c02
 86 | v_CricketBowling_g04_c04
 87 | v_CricketBowling_g05_c01
 88 | v_CricketBowling_g05_c03
 89 | v_CricketBowling_g06_c01
 90 | v_CricketBowling_g06_c03
 91 | v_CricketBowling_g06_c05
 92 | v_CricketBowling_g07_c02
 93 | v_CricketBowling_g07_c04
 94 | v_Diving_g01_c02
 95 | v_Diving_g01_c04
 96 | v_Diving_g01_c06
 97 | v_Diving_g02_c01
 98 | v_Diving_g02_c03
 99 | v_Diving_g02_c05
100 | v_Diving_g02_c07
101 | v_Diving_g03_c02
102 | v_Diving_g03_c04
103 | v_Diving_g03_c06
104 | v_Diving_g04_c01
105 | v_Diving_g04_c03
106 | v_Diving_g04_c05
107 | v_Diving_g04_c07
108 | v_Diving_g05_c02
109 | v_Diving_g05_c04
110 | v_Diving_g05_c06
111 | v_Diving_g06_c02
112 | v_Diving_g06_c04
113 | v_Diving_g06_c06
114 | v_Diving_g07_c01
115 | v_Diving_g07_c03
116 | v_Fencing_g01_c01
117 | v_Fencing_g01_c03
118 | v_Fencing_g01_c05
119 | v_Fencing_g02_c01
120 | v_Fencing_g02_c03
121 | v_Fencing_g02_c05
122 | v_Fencing_g03_c02
123 | v_Fencing_g03_c04
124 | v_Fencing_g04_c01
125 | v_Fencing_g04_c03
126 | v_Fencing_g04_c05
127 | v_Fencing_g05_c02
128 | v_Fencing_g05_c04
129 | v_Fencing_g06_c01
130 | v_Fencing_g06_c03
131 | v_Fencing_g07_c01
132 | v_Fencing_g07_c03
133 | v_FloorGymnastics_g01_c01
134 | v_FloorGymnastics_g01_c03
135 | v_FloorGymnastics_g01_c05
136 | v_FloorGymnastics_g02_c02
137 | v_FloorGymnastics_g02_c04
138 | v_FloorGymnastics_g03_c02
139 | v_FloorGymnastics_g03_c04
140 | v_FloorGymnastics_g04_c02
141 | v_FloorGymnastics_g04_c04
142 | v_FloorGymnastics_g05_c01
143 | v_FloorGymnastics_g05_c03
144 | v_FloorGymnastics_g06_c01
145 | v_FloorGymnastics_g06_c03
146 | v_FloorGymnastics_g06_c05
147 | v_FloorGymnastics_g06_c07
148 | v_FloorGymnastics_g07_c02
149 | v_FloorGymnastics_g07_c04
150 | v_FloorGymnastics_g07_c06
151 | v_GolfSwing_g01_c01
152 | v_GolfSwing_g01_c03
153 | v_GolfSwing_g01_c05
154 | v_GolfSwing_g02_c01
155 | v_GolfSwing_g02_c03
156 | v_GolfSwing_g03_c01
157 | v_GolfSwing_g03_c03
158 | v_GolfSwing_g03_c05
159 | v_GolfSwing_g03_c07
160 | v_GolfSwing_g04_c02
161 | v_GolfSwing_g04_c04
162 | v_GolfSwing_g04_c06
163 | v_GolfSwing_g05_c02
164 | v_GolfSwing_g05_c04
165 | v_GolfSwing_g05_c06
166 | v_GolfSwing_g06_c01
167 | v_GolfSwing_g06_c03
168 | v_GolfSwing_g07_c01
169 | v_GolfSwing_g07_c03
170 | v_GolfSwing_g07_c05
171 | v_HorseRiding_g01_c02
172 | v_HorseRiding_g01_c04
173 | v_HorseRiding_g01_c06
174 | v_HorseRiding_g02_c01
175 | v_HorseRiding_g02_c03
176 | v_HorseRiding_g02_c05
177 | v_HorseRiding_g02_c07
178 | v_HorseRiding_g03_c02
179 | v_HorseRiding_g03_c04
180 | v_HorseRiding_g03_c06
181 | v_HorseRiding_g04_c01
182 | v_HorseRiding_g04_c03
183 | v_HorseRiding_g04_c05
184 | v_HorseRiding_g04_c07
185 | v_HorseRiding_g05_c02
186 | v_HorseRiding_g05_c04
187 | v_HorseRiding_g05_c06
188 | v_HorseRiding_g06_c01
189 | v_HorseRiding_g06_c03
190 | v_HorseRiding_g06_c05
191 | v_HorseRiding_g06_c07
192 | v_HorseRiding_g07_c02
193 | v_HorseRiding_g07_c04
194 | v_HorseRiding_g07_c06
195 | v_IceDancing_g01_c01
196 | v_IceDancing_g01_c03
197 | v_IceDancing_g01_c05
198 | v_IceDancing_g01_c07
199 | v_IceDancing_g02_c02
200 | v_IceDancing_g02_c04
201 | v_IceDancing_g02_c06
202 | v_IceDancing_g03_c01
203 | v_IceDancing_g03_c03
204 | v_IceDancing_g03_c05
205 | v_IceDancing_g04_c01
206 | v_IceDancing_g04_c03
207 | v_IceDancing_g04_c05
208 | v_IceDancing_g04_c07
209 | v_IceDancing_g05_c02
210 | v_IceDancing_g05_c04
211 | v_IceDancing_g05_c06
212 | v_IceDancing_g06_c02
213 | v_IceDancing_g06_c04
214 | v_IceDancing_g06_c06
215 | v_IceDancing_g07_c02
216 | v_IceDancing_g07_c04
217 | v_IceDancing_g07_c06
218 | v_LongJump_g01_c01
219 | v_LongJump_g01_c03
220 | v_LongJump_g01_c05
221 | v_LongJump_g01_c07
222 | v_LongJump_g02_c02
223 | v_LongJump_g02_c04
224 | v_LongJump_g03_c01
225 | v_LongJump_g03_c03
226 | v_LongJump_g03_c05
227 | v_LongJump_g04_c01
228 | v_LongJump_g04_c03
229 | v_LongJump_g04_c05
230 | v_LongJump_g04_c07
231 | v_LongJump_g05_c02
232 | v_LongJump_g05_c04
233 | v_LongJump_g06_c01
234 | v_LongJump_g06_c03
235 | v_LongJump_g07_c01
236 | v_LongJump_g07_c03
237 | v_PoleVault_g01_c01
238 | v_PoleVault_g01_c03
239 | v_PoleVault_g01_c05
240 | v_PoleVault_g02_c02
241 | v_PoleVault_g02_c04
242 | v_PoleVault_g02_c06
243 | v_PoleVault_g03_c01
244 | v_PoleVault_g03_c03
245 | v_PoleVault_g03_c05
246 | v_PoleVault_g03_c07
247 | v_PoleVault_g04_c02
248 | v_PoleVault_g04_c04
249 | v_PoleVault_g04_c06
250 | v_PoleVault_g05_c01
251 | v_PoleVault_g05_c03
252 | v_PoleVault_g05_c05
253 | v_PoleVault_g06_c02
254 | v_PoleVault_g06_c04
255 | v_PoleVault_g07_c01
256 | v_PoleVault_g07_c03
257 | v_RopeClimbing_g01_c01
258 | v_RopeClimbing_g01_c03
259 | v_RopeClimbing_g02_c01
260 | v_RopeClimbing_g02_c03
261 | v_RopeClimbing_g02_c05
262 | v_RopeClimbing_g03_c01
263 | v_RopeClimbing_g03_c03
264 | v_RopeClimbing_g04_c01
265 | v_RopeClimbing_g04_c03
266 | v_RopeClimbing_g05_c01
267 | v_RopeClimbing_g05_c03
268 | v_RopeClimbing_g05_c05
269 | v_RopeClimbing_g05_c07
270 | v_RopeClimbing_g06_c02
271 | v_RopeClimbing_g06_c04
272 | v_RopeClimbing_g07_c02
273 | v_RopeClimbing_g07_c04
274 | v_SalsaSpin_g01_c01
275 | v_SalsaSpin_g01_c03
276 | v_SalsaSpin_g01_c05
277 | v_SalsaSpin_g01_c07
278 | v_SalsaSpin_g02_c02
279 | v_SalsaSpin_g02_c04
280 | v_SalsaSpin_g02_c06
281 | v_SalsaSpin_g03_c01
282 | v_SalsaSpin_g03_c03
283 | v_SalsaSpin_g03_c05
284 | v_SalsaSpin_g04_c01
285 | v_SalsaSpin_g04_c03
286 | v_SalsaSpin_g04_c05
287 | v_SalsaSpin_g05_c01
288 | v_SalsaSpin_g05_c03
289 | v_SalsaSpin_g05_c05
290 | v_SalsaSpin_g06_c01
291 | v_SalsaSpin_g06_c03
292 | v_SalsaSpin_g06_c05
293 | v_SalsaSpin_g07_c02
294 | v_SalsaSpin_g07_c04
295 | v_SalsaSpin_g07_c06
296 | v_SkateBoarding_g01_c02
297 | v_SkateBoarding_g01_c04
298 | v_SkateBoarding_g02_c02
299 | v_SkateBoarding_g02_c04
300 | v_SkateBoarding_g02_c06
301 | v_SkateBoarding_g03_c02
302 | v_SkateBoarding_g03_c04
303 | v_SkateBoarding_g04_c02
304 | v_SkateBoarding_g04_c04
305 | v_SkateBoarding_g05_c01
306 | v_SkateBoarding_g05_c03
307 | v_SkateBoarding_g06_c01
308 | v_SkateBoarding_g06_c03
309 | v_SkateBoarding_g07_c01
310 | v_SkateBoarding_g07_c03
311 | v_SkateBoarding_g07_c05
312 | v_Skiing_g01_c02
313 | v_Skiing_g01_c04
314 | v_Skiing_g01_c06
315 | v_Skiing_g02_c02
316 | v_Skiing_g02_c04
317 | v_Skiing_g03_c01
318 | v_Skiing_g03_c03
319 | v_Skiing_g03_c05
320 | v_Skiing_g03_c07
321 | v_Skiing_g04_c02
322 | v_Skiing_g04_c04
323 | v_Skiing_g04_c06
324 | v_Skiing_g05_c01
325 | v_Skiing_g05_c03
326 | v_Skiing_g06_c01
327 | v_Skiing_g06_c03
328 | v_Skiing_g06_c05
329 | v_Skiing_g06_c07
330 | v_Skiing_g07_c02
331 | v_Skiing_g07_c04
332 | v_Skijet_g01_c02
333 | v_Skijet_g01_c04
334 | v_Skijet_g02_c02
335 | v_Skijet_g02_c04
336 | v_Skijet_g03_c02
337 | v_Skijet_g03_c04
338 | v_Skijet_g04_c02
339 | v_Skijet_g04_c04
340 | v_Skijet_g05_c02
341 | v_Skijet_g05_c04
342 | v_Skijet_g06_c02
343 | v_Skijet_g06_c04
344 | v_Skijet_g07_c02
345 | v_Skijet_g07_c04
346 | v_SoccerJuggling_g01_c02
347 | v_SoccerJuggling_g01_c04
348 | v_SoccerJuggling_g02_c01
349 | v_SoccerJuggling_g02_c03
350 | v_SoccerJuggling_g02_c05
351 | v_SoccerJuggling_g03_c01
352 | v_SoccerJuggling_g03_c03
353 | v_SoccerJuggling_g04_c01
354 | v_SoccerJuggling_g04_c03
355 | v_SoccerJuggling_g04_c05
356 | v_SoccerJuggling_g05_c01
357 | v_SoccerJuggling_g05_c03
358 | v_SoccerJuggling_g05_c05
359 | v_SoccerJuggling_g06_c01
360 | v_SoccerJuggling_g06_c03
361 | v_SoccerJuggling_g06_c05
362 | v_SoccerJuggling_g07_c02
363 | v_SoccerJuggling_g07_c04
364 | v_SoccerJuggling_g07_c06
365 | v_Surfing_g01_c01
366 | v_Surfing_g01_c03
367 | v_Surfing_g01_c05
368 | v_Surfing_g01_c07
369 | v_Surfing_g02_c02
370 | v_Surfing_g02_c04
371 | v_Surfing_g02_c06
372 | v_Surfing_g03_c02
373 | v_Surfing_g03_c04
374 | v_Surfing_g04_c02
375 | v_Surfing_g04_c04
376 | v_Surfing_g05_c02
377 | v_Surfing_g05_c04
378 | v_Surfing_g06_c02
379 | v_Surfing_g06_c04
380 | v_Surfing_g07_c02
381 | v_Surfing_g07_c04
382 | v_TennisSwing_g01_c02
383 | v_TennisSwing_g01_c04
384 | v_TennisSwing_g01_c06
385 | v_TennisSwing_g02_c01
386 | v_TennisSwing_g02_c03
387 | v_TennisSwing_g02_c05
388 | v_TennisSwing_g02_c07
389 | v_TennisSwing_g03_c02
390 | v_TennisSwing_g03_c04
391 | v_TennisSwing_g03_c06
392 | v_TennisSwing_g04_c01
393 | v_TennisSwing_g04_c03
394 | v_TennisSwing_g04_c05
395 | v_TennisSwing_g04_c07
396 | v_TennisSwing_g05_c02
397 | v_TennisSwing_g05_c04
398 | v_TennisSwing_g05_c06
399 | v_TennisSwing_g06_c01
400 | v_TennisSwing_g06_c03
401 | v_TennisSwing_g06_c05
402 | v_TennisSwing_g06_c07
403 | v_TennisSwing_g07_c02
404 | v_TennisSwing_g07_c04
405 | v_TennisSwing_g07_c06
406 | v_TrampolineJumping_g01_c01
407 | v_TrampolineJumping_g01_c03
408 | v_TrampolineJumping_g02_c01
409 | v_TrampolineJumping_g02_c03
410 | v_TrampolineJumping_g02_c05
411 | v_TrampolineJumping_g03_c01
412 | v_TrampolineJumping_g03_c03
413 | v_TrampolineJumping_g04_c01
414 | v_TrampolineJumping_g04_c03
415 | v_TrampolineJumping_g04_c05
416 | v_TrampolineJumping_g05_c02
417 | v_TrampolineJumping_g05_c04
418 | v_TrampolineJumping_g06_c02
419 | v_TrampolineJumping_g07_c01
420 | v_TrampolineJumping_g07_c03
421 | v_TrampolineJumping_g07_c05
422 | v_VolleyballSpiking_g01_c02
423 | v_VolleyballSpiking_g01_c04
424 | v_VolleyballSpiking_g02_c04
425 | v_VolleyballSpiking_g03_c02
426 | v_VolleyballSpiking_g03_c04
427 | v_VolleyballSpiking_g04_c02
428 | v_VolleyballSpiking_g04_c04
429 | v_VolleyballSpiking_g04_c06
430 | v_VolleyballSpiking_g05_c01
431 | v_VolleyballSpiking_g05_c03
432 | v_VolleyballSpiking_g05_c05
433 | v_VolleyballSpiking_g06_c02
434 | v_VolleyballSpiking_g06_c04
435 | v_VolleyballSpiking_g07_c02
436 | v_VolleyballSpiking_g07_c04
437 | v_VolleyballSpiking_g07_c06
438 | v_WalkingWithDog_g01_c01
439 | v_WalkingWithDog_g01_c03
440 | v_WalkingWithDog_g02_c01
441 | v_WalkingWithDog_g02_c03
442 | v_WalkingWithDog_g02_c05
443 | v_WalkingWithDog_g03_c01
444 | v_WalkingWithDog_g03_c03
445 | v_WalkingWithDog_g03_c05
446 | v_WalkingWithDog_g04_c02
447 | v_WalkingWithDog_g04_c04
448 | v_WalkingWithDog_g05_c01
449 | v_WalkingWithDog_g05_c03
450 | v_WalkingWithDog_g05_c05
451 | v_WalkingWithDog_g06_c02
452 | v_WalkingWithDog_g06_c04
453 | v_WalkingWithDog_g07_c01
454 | v_WalkingWithDog_g07_c03
455 | v_WalkingWithDog_g07_c05


--------------------------------------------------------------------------------
/validationList.txt:
--------------------------------------------------------------------------------
  1 | v_Basketball_g01_c02
  2 | v_Basketball_g01_c04
  3 | v_Basketball_g01_c06
  4 | v_Basketball_g02_c01
  5 | v_Basketball_g02_c03
  6 | v_Basketball_g02_c05
  7 | v_Basketball_g03_c01
  8 | v_Basketball_g03_c03
  9 | v_Basketball_g03_c05
 10 | v_Basketball_g04_c01
 11 | v_Basketball_g04_c03
 12 | v_Basketball_g05_c01
 13 | v_Basketball_g05_c03
 14 | v_Basketball_g06_c01
 15 | v_Basketball_g06_c03
 16 | v_Basketball_g07_c01
 17 | v_Basketball_g07_c03
 18 | v_BasketballDunk_g01_c01
 19 | v_BasketballDunk_g01_c03
 20 | v_BasketballDunk_g01_c05
 21 | v_BasketballDunk_g01_c07
 22 | v_BasketballDunk_g02_c02
 23 | v_BasketballDunk_g02_c04
 24 | v_BasketballDunk_g03_c02
 25 | v_BasketballDunk_g03_c04
 26 | v_BasketballDunk_g03_c06
 27 | v_BasketballDunk_g04_c02
 28 | v_BasketballDunk_g04_c04
 29 | v_BasketballDunk_g05_c02
 30 | v_BasketballDunk_g05_c04
 31 | v_BasketballDunk_g05_c06
 32 | v_BasketballDunk_g06_c02
 33 | v_BasketballDunk_g06_c04
 34 | v_BasketballDunk_g07_c02
 35 | v_BasketballDunk_g07_c04
 36 | v_BasketballDunk_g07_c06
 37 | v_Biking_g01_c02
 38 | v_Biking_g01_c04
 39 | v_Biking_g02_c02
 40 | v_Biking_g02_c04
 41 | v_Biking_g02_c06
 42 | v_Biking_g03_c01
 43 | v_Biking_g03_c03
 44 | v_Biking_g04_c01
 45 | v_Biking_g04_c03
 46 | v_Biking_g04_c05
 47 | v_Biking_g05_c02
 48 | v_Biking_g05_c04
 49 | v_Biking_g05_c06
 50 | v_Biking_g06_c01
 51 | v_Biking_g06_c03
 52 | v_Biking_g06_c05
 53 | v_Biking_g07_c02
 54 | v_Biking_g07_c04
 55 | v_Biking_g07_c06
 56 | CliffDiving/v_CliffDiving_g01_c02
 57 | CliffDiving/v_CliffDiving_g01_c04
 58 | CliffDiving/v_CliffDiving_g01_c06
 59 | CliffDiving/v_CliffDiving_g02_c02
 60 | CliffDiving/v_CliffDiving_g02_c04
 61 | CliffDiving/v_CliffDiving_g03_c02
 62 | CliffDiving/v_CliffDiving_g03_c04
 63 | CliffDiving/v_CliffDiving_g04_c01
 64 | CliffDiving/v_CliffDiving_g04_c03
 65 | CliffDiving/v_CliffDiving_g05_c01
 66 | CliffDiving/v_CliffDiving_g05_c03
 67 | CliffDiving/v_CliffDiving_g05_c05
 68 | CliffDiving/v_CliffDiving_g05_c07
 69 | CliffDiving/v_CliffDiving_g06_c02
 70 | CliffDiving/v_CliffDiving_g06_c04
 71 | CliffDiving/v_CliffDiving_g06_c06
 72 | CliffDiving/v_CliffDiving_g07_c01
 73 | v_CliffDiving_g07_c03
 74 | v_CliffDiving_g07_c05
 75 | v_CricketBowling_g01_c01
 76 | v_CricketBowling_g01_c03
 77 | v_CricketBowling_g01_c05
 78 | v_CricketBowling_g01_c07
 79 | v_CricketBowling_g02_c02
 80 | v_CricketBowling_g02_c04
 81 | v_CricketBowling_g02_c06
 82 | v_CricketBowling_g03_c01
 83 | v_CricketBowling_g03_c03
 84 | v_CricketBowling_g04_c01
 85 | v_CricketBowling_g04_c03
 86 | v_CricketBowling_g04_c05
 87 | v_CricketBowling_g05_c02
 88 | v_CricketBowling_g05_c04
 89 | v_CricketBowling_g06_c02
 90 | v_CricketBowling_g06_c04
 91 | v_CricketBowling_g07_c01
 92 | v_CricketBowling_g07_c03
 93 | v_Diving_g01_c01
 94 | v_Diving_g01_c03
 95 | v_Diving_g01_c05
 96 | v_Diving_g01_c07
 97 | v_Diving_g02_c02
 98 | v_Diving_g02_c04
 99 | v_Diving_g02_c06
100 | v_Diving_g03_c01
101 | v_Diving_g03_c03
102 | v_Diving_g03_c05
103 | v_Diving_g03_c07
104 | v_Diving_g04_c02
105 | v_Diving_g04_c04
106 | v_Diving_g04_c06
107 | v_Diving_g05_c01
108 | v_Diving_g05_c03
109 | v_Diving_g05_c05
110 | v_Diving_g06_c01
111 | v_Diving_g06_c03
112 | v_Diving_g06_c05
113 | v_Diving_g06_c07
114 | v_Diving_g07_c02
115 | v_Diving_g07_c04
116 | v_Fencing_g01_c02
117 | v_Fencing_g01_c04
118 | v_Fencing_g01_c06
119 | v_Fencing_g02_c02
120 | v_Fencing_g02_c04
121 | v_Fencing_g03_c01
122 | v_Fencing_g03_c03
123 | v_Fencing_g03_c05
124 | v_Fencing_g04_c02
125 | v_Fencing_g04_c04
126 | v_Fencing_g05_c01
127 | v_Fencing_g05_c03
128 | v_Fencing_g05_c05
129 | v_Fencing_g06_c02
130 | v_Fencing_g06_c04
131 | v_Fencing_g07_c02
132 | v_Fencing_g07_c04
133 | v_FloorGymnastics_g01_c02
134 | v_FloorGymnastics_g01_c04
135 | v_FloorGymnastics_g02_c01
136 | v_FloorGymnastics_g02_c03
137 | v_FloorGymnastics_g03_c01
138 | v_FloorGymnastics_g03_c03
139 | v_FloorGymnastics_g04_c01
140 | v_FloorGymnastics_g04_c03
141 | v_FloorGymnastics_g04_c05
142 | v_FloorGymnastics_g05_c02
143 | v_FloorGymnastics_g05_c04
144 | v_FloorGymnastics_g06_c02
145 | v_FloorGymnastics_g06_c04
146 | v_FloorGymnastics_g06_c06
147 | v_FloorGymnastics_g07_c01
148 | v_FloorGymnastics_g07_c03
149 | v_FloorGymnastics_g07_c05
150 | v_FloorGymnastics_g07_c07
151 | v_GolfSwing_g01_c02
152 | v_GolfSwing_g01_c04
153 | v_GolfSwing_g01_c06
154 | v_GolfSwing_g02_c02
155 | v_GolfSwing_g02_c04
156 | v_GolfSwing_g03_c02
157 | v_GolfSwing_g03_c04
158 | v_GolfSwing_g03_c06
159 | v_GolfSwing_g04_c01
160 | v_GolfSwing_g04_c03
161 | v_GolfSwing_g04_c05
162 | v_GolfSwing_g05_c01
163 | v_GolfSwing_g05_c03
164 | v_GolfSwing_g05_c05
165 | v_GolfSwing_g05_c07
166 | v_GolfSwing_g06_c02
167 | v_GolfSwing_g06_c04
168 | v_GolfSwing_g07_c02
169 | v_GolfSwing_g07_c04
170 | v_HorseRiding_g01_c01
171 | v_HorseRiding_g01_c03
172 | v_HorseRiding_g01_c05
173 | v_HorseRiding_g01_c07
174 | v_HorseRiding_g02_c02
175 | v_HorseRiding_g02_c04
176 | v_HorseRiding_g02_c06
177 | v_HorseRiding_g03_c01
178 | v_HorseRiding_g03_c03
179 | v_HorseRiding_g03_c05
180 | v_HorseRiding_g03_c07
181 | v_HorseRiding_g04_c02
182 | v_HorseRiding_g04_c04
183 | v_HorseRiding_g04_c06
184 | v_HorseRiding_g05_c01
185 | v_HorseRiding_g05_c03
186 | v_HorseRiding_g05_c05
187 | v_HorseRiding_g05_c07
188 | v_HorseRiding_g06_c02
189 | v_HorseRiding_g06_c04
190 | v_HorseRiding_g06_c06
191 | v_HorseRiding_g07_c01
192 | v_HorseRiding_g07_c03
193 | v_HorseRiding_g07_c05
194 | v_HorseRiding_g07_c07
195 | v_IceDancing_g01_c02
196 | v_IceDancing_g01_c04
197 | v_IceDancing_g01_c06
198 | v_IceDancing_g02_c01
199 | v_IceDancing_g02_c03
200 | v_IceDancing_g02_c05
201 | v_IceDancing_g02_c07
202 | v_IceDancing_g03_c02
203 | v_IceDancing_g03_c04
204 | v_IceDancing_g03_c06
205 | v_IceDancing_g04_c02
206 | v_IceDancing_g04_c04
207 | v_IceDancing_g04_c06
208 | v_IceDancing_g05_c01
209 | v_IceDancing_g05_c03
210 | v_IceDancing_g05_c05
211 | v_IceDancing_g06_c01
212 | v_IceDancing_g06_c03
213 | v_IceDancing_g06_c05
214 | v_IceDancing_g07_c01
215 | v_IceDancing_g07_c03
216 | v_IceDancing_g07_c05
217 | v_IceDancing_g07_c07
218 | v_LongJump_g01_c02
219 | v_LongJump_g01_c04
220 | v_LongJump_g01_c06
221 | v_LongJump_g02_c01
222 | v_LongJump_g02_c03
223 | v_LongJump_g02_c05
224 | v_LongJump_g03_c02
225 | v_LongJump_g03_c04
226 | v_LongJump_g03_c06
227 | v_LongJump_g04_c02
228 | v_LongJump_g04_c04
229 | v_LongJump_g04_c06
230 | v_LongJump_g05_c01
231 | v_LongJump_g05_c03
232 | v_LongJump_g05_c05
233 | v_LongJump_g06_c02
234 | v_LongJump_g06_c04
235 | v_LongJump_g07_c02
236 | v_LongJump_g07_c05
237 | v_PoleVault_g01_c02
238 | v_PoleVault_g01_c04
239 | v_PoleVault_g02_c01
240 | v_PoleVault_g02_c03
241 | v_PoleVault_g02_c05
242 | v_PoleVault_g02_c07
243 | v_PoleVault_g03_c02
244 | v_PoleVault_g03_c04
245 | v_PoleVault_g03_c06
246 | v_PoleVault_g04_c01
247 | v_PoleVault_g04_c03
248 | v_PoleVault_g04_c05
249 | v_PoleVault_g04_c07
250 | v_PoleVault_g05_c02
251 | v_PoleVault_g05_c04
252 | v_PoleVault_g06_c01
253 | v_PoleVault_g06_c03
254 | v_PoleVault_g06_c05
255 | v_PoleVault_g07_c02
256 | v_PoleVault_g07_c04
257 | v_RopeClimbing_g01_c02
258 | v_RopeClimbing_g01_c04
259 | v_RopeClimbing_g02_c02
260 | v_RopeClimbing_g02_c04
261 | v_RopeClimbing_g02_c06
262 | v_RopeClimbing_g03_c02
263 | v_RopeClimbing_g03_c04
264 | v_RopeClimbing_g04_c02
265 | v_RopeClimbing_g04_c04
266 | v_RopeClimbing_g05_c02
267 | v_RopeClimbing_g05_c04
268 | v_RopeClimbing_g05_c06
269 | v_RopeClimbing_g06_c01
270 | v_RopeClimbing_g06_c03
271 | v_RopeClimbing_g07_c01
272 | v_RopeClimbing_g07_c03
273 | v_RopeClimbing_g07_c05
274 | v_SalsaSpin_g01_c02
275 | v_SalsaSpin_g01_c04
276 | v_SalsaSpin_g01_c06
277 | v_SalsaSpin_g02_c01
278 | v_SalsaSpin_g02_c03
279 | v_SalsaSpin_g02_c05
280 | v_SalsaSpin_g02_c07
281 | v_SalsaSpin_g03_c02
282 | v_SalsaSpin_g03_c04
283 | v_SalsaSpin_g03_c06
284 | v_SalsaSpin_g04_c02
285 | v_SalsaSpin_g04_c04
286 | v_SalsaSpin_g04_c06
287 | v_SalsaSpin_g05_c02
288 | v_SalsaSpin_g05_c04
289 | v_SalsaSpin_g05_c06
290 | v_SalsaSpin_g06_c02
291 | v_SalsaSpin_g06_c04
292 | v_SalsaSpin_g07_c01
293 | v_SalsaSpin_g07_c03
294 | v_SalsaSpin_g07_c05
295 | v_SkateBoarding_g01_c01
296 | v_SkateBoarding_g01_c03
297 | v_SkateBoarding_g02_c01
298 | v_SkateBoarding_g02_c03
299 | v_SkateBoarding_g02_c05
300 | v_SkateBoarding_g03_c01
301 | v_SkateBoarding_g03_c03
302 | v_SkateBoarding_g04_c01
303 | v_SkateBoarding_g04_c03
304 | v_SkateBoarding_g04_c05
305 | v_SkateBoarding_g05_c02
306 | v_SkateBoarding_g05_c04
307 | v_SkateBoarding_g06_c02
308 | v_SkateBoarding_g06_c04
309 | v_SkateBoarding_g07_c02
310 | v_SkateBoarding_g07_c04
311 | v_Skiing_g01_c01
312 | v_Skiing_g01_c03
313 | v_Skiing_g01_c05
314 | v_Skiing_g02_c01
315 | v_Skiing_g02_c03
316 | v_Skiing_g02_c05
317 | v_Skiing_g03_c02
318 | v_Skiing_g03_c04
319 | v_Skiing_g03_c06
320 | v_Skiing_g04_c01
321 | v_Skiing_g04_c03
322 | v_Skiing_g04_c05
323 | v_Skiing_g04_c07
324 | v_Skiing_g05_c02
325 | v_Skiing_g05_c04
326 | v_Skiing_g06_c02
327 | v_Skiing_g06_c04
328 | v_Skiing_g06_c06
329 | v_Skiing_g07_c01
330 | v_Skiing_g07_c03
331 | v_Skijet_g01_c01
332 | v_Skijet_g01_c03
333 | v_Skijet_g02_c01
334 | v_Skijet_g02_c03
335 | v_Skijet_g03_c01
336 | v_Skijet_g03_c03
337 | v_Skijet_g04_c01
338 | v_Skijet_g04_c03
339 | v_Skijet_g05_c01
340 | v_Skijet_g05_c03
341 | v_Skijet_g06_c01
342 | v_Skijet_g06_c03
343 | v_Skijet_g07_c01
344 | v_Skijet_g07_c03
345 | v_SoccerJuggling_g01_c01
346 | v_SoccerJuggling_g01_c03
347 | v_SoccerJuggling_g01_c05
348 | v_SoccerJuggling_g02_c02
349 | v_SoccerJuggling_g02_c04
350 | v_SoccerJuggling_g02_c06
351 | v_SoccerJuggling_g03_c02
352 | v_SoccerJuggling_g03_c04
353 | v_SoccerJuggling_g04_c02
354 | v_SoccerJuggling_g04_c04
355 | v_SoccerJuggling_g04_c06
356 | v_SoccerJuggling_g05_c02
357 | v_SoccerJuggling_g05_c04
358 | v_SoccerJuggling_g05_c06
359 | v_SoccerJuggling_g06_c02
360 | v_SoccerJuggling_g06_c04
361 | v_SoccerJuggling_g07_c01
362 | v_SoccerJuggling_g07_c03
363 | v_SoccerJuggling_g07_c05
364 | v_SoccerJuggling_g07_c07
365 | v_Surfing_g01_c02
366 | v_Surfing_g01_c04
367 | v_Surfing_g01_c06
368 | v_Surfing_g02_c01
369 | v_Surfing_g02_c03
370 | v_Surfing_g02_c05
371 | v_Surfing_g03_c01
372 | v_Surfing_g03_c03
373 | v_Surfing_g04_c01
374 | v_Surfing_g04_c03
375 | v_Surfing_g05_c01
376 | v_Surfing_g05_c03
377 | v_Surfing_g06_c01
378 | v_Surfing_g06_c03
379 | v_Surfing_g07_c01
380 | v_Surfing_g07_c03
381 | v_TennisSwing_g01_c01
382 | v_TennisSwing_g01_c03
383 | v_TennisSwing_g01_c05
384 | v_TennisSwing_g01_c07
385 | v_TennisSwing_g02_c02
386 | v_TennisSwing_g02_c04
387 | v_TennisSwing_g02_c06
388 | v_TennisSwing_g03_c01
389 | v_TennisSwing_g03_c03
390 | v_TennisSwing_g03_c05
391 | v_TennisSwing_g03_c07
392 | v_TennisSwing_g04_c02
393 | v_TennisSwing_g04_c04
394 | v_TennisSwing_g04_c06
395 | v_TennisSwing_g05_c01
396 | v_TennisSwing_g05_c03
397 | v_TennisSwing_g05_c05
398 | v_TennisSwing_g05_c07
399 | v_TennisSwing_g06_c02
400 | v_TennisSwing_g06_c04
401 | v_TennisSwing_g06_c06
402 | v_TennisSwing_g07_c01
403 | v_TennisSwing_g07_c03
404 | v_TennisSwing_g07_c05
405 | v_TennisSwing_g07_c07
406 | v_TrampolineJumping_g01_c02
407 | v_TrampolineJumping_g01_c04
408 | v_TrampolineJumping_g02_c02
409 | v_TrampolineJumping_g02_c04
410 | v_TrampolineJumping_g02_c06
411 | v_TrampolineJumping_g03_c02
412 | v_TrampolineJumping_g03_c04
413 | v_TrampolineJumping_g04_c02
414 | v_TrampolineJumping_g04_c04
415 | v_TrampolineJumping_g05_c01
416 | v_TrampolineJumping_g05_c03
417 | v_TrampolineJumping_g06_c01
418 | v_TrampolineJumping_g06_c04
419 | v_TrampolineJumping_g07_c02
420 | v_TrampolineJumping_g07_c04
421 | v_VolleyballSpiking_g01_c01
422 | v_VolleyballSpiking_g01_c03
423 | v_VolleyballSpiking_g02_c01
424 | v_VolleyballSpiking_g03_c01
425 | v_VolleyballSpiking_g03_c03
426 | v_VolleyballSpiking_g04_c01
427 | v_VolleyballSpiking_g04_c03
428 | v_VolleyballSpiking_g04_c05
429 | v_VolleyballSpiking_g04_c07
430 | v_VolleyballSpiking_g05_c02
431 | v_VolleyballSpiking_g05_c04
432 | v_VolleyballSpiking_g06_c01
433 | v_VolleyballSpiking_g06_c03
434 | v_VolleyballSpiking_g07_c01
435 | v_VolleyballSpiking_g07_c03
436 | v_VolleyballSpiking_g07_c05
437 | v_VolleyballSpiking_g07_c07
438 | v_WalkingWithDog_g01_c02
439 | v_WalkingWithDog_g01_c04
440 | v_WalkingWithDog_g02_c02
441 | v_WalkingWithDog_g02_c04
442 | v_WalkingWithDog_g02_c06
443 | v_WalkingWithDog_g03_c02
444 | v_WalkingWithDog_g03_c04
445 | v_WalkingWithDog_g04_c01
446 | v_WalkingWithDog_g04_c03
447 | v_WalkingWithDog_g04_c05
448 | v_WalkingWithDog_g05_c02
449 | v_WalkingWithDog_g05_c04
450 | v_WalkingWithDog_g06_c01
451 | v_WalkingWithDog_g06_c03
452 | v_WalkingWithDog_g06_c05
453 | v_WalkingWithDog_g07_c02
454 | v_WalkingWithDog_g07_c04
455 | v_WalkingWithDog_g07_c06


--------------------------------------------------------------------------------
/actionCLSS_dataset_partitioned.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | 
  3 | matplotlib.use('TkAgg')
  4 | 
  5 | import utils
  6 | import random
  7 | import glob, os
  8 | # import math
  9 | import cv2
 10 | import csv
 11 | # from PIL import Image, ImageFont, ImageDraw, ImageEnhance
 12 | import numpy as np
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | 
 16 | # import matplotlib.patches as patches
 17 | # import matplotlib.lines as lines
 18 | # from matplotlib.patches import Polygon
 19 | # import IPython.display
 20 | 
 21 | 
 22 | class ShapesDatasetPartitioned(utils.Dataset):
 23 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 24 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 25 |     The images are generated on the fly. No file access required.
 26 |     """
 27 |     with open('validationList.txt') as file:
 28 |         reserved_for_val = [video.replace('\n', '') for video in file.readlines()]
 29 |     with open('testList.txt') as file:
 30 |         reserved_for_test = [video.replace('\n', '') for video in file.readlines()]
 31 | 
 32 |     def load_train_shapes(self):
 33 |         # Training is True if the dataset is for training, False if it's for validation set
 34 |         """Generate the requested number of synthetic images.
 35 |         count: number of images to generate.
 36 |         height, width: the size of the generated images.
 37 |         """
 38 |         # Add classes
 39 |         self.add_class("sport", 1, "WalkingWithDog")
 40 |         self.add_class("sport", 2, "BasketballDunk")
 41 |         self.add_class("sport", 3, "Biking")
 42 |         self.add_class("sport", 4, "CliffDiving")
 43 |         self.add_class("sport", 5, "CricketBowling")
 44 |         self.add_class("sport", 6, "Diving")
 45 |         self.add_class("sport", 7, "Fencing")
 46 |         self.add_class("sport", 8, "FloorGymnastics")
 47 |         self.add_class("sport", 9, "GolfSwing")
 48 |         self.add_class("sport", 10, "HorseRiding")
 49 |         self.add_class("sport", 11, "IceDancing")
 50 |         self.add_class("sport", 12, "LongJump")
 51 |         self.add_class("sport", 13, "PoleVault")
 52 |         self.add_class("sport", 14, "RopeClimbing")
 53 |         self.add_class("sport", 15, "SalsaSpin")
 54 |         self.add_class("sport", 16, "SkateBoarding")
 55 |         self.add_class("sport", 17, "Skiing")
 56 |         self.add_class("sport", 18, "Skijet")
 57 |         self.add_class("sport", 19, "SoccerJuggling")
 58 |         self.add_class("sport", 20, "Surfing")
 59 |         self.add_class("sport", 21, "TennisSwing")
 60 |         self.add_class("sport", 22, "TrampolineJumping")
 61 |         self.add_class("sport", 23, "VolleyballSpiking")
 62 |         self.add_class("sport", 24, "Basketball")
 63 | 
 64 |         # Select ALL IMAGES
 65 |         root_labels = 'ucf24_project/labels'
 66 |         root_images = 'ucf24_project/rgb-images'
 67 |         for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
 68 |             for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
 69 |                           not vid.endswith('.DS_Store')
 70 |                           and vid not in self.reserved_for_test
 71 |                           and vid not in self.reserved_for_val]:
 72 |                 for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
 73 |                               f.endswith('.txt')]:
 74 | 
 75 |                     URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
 76 |                     URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
 77 |                     # open the file with the labels (class and bbox)
 78 |                     with open(URL_label) as file:
 79 |                         lines = file.readlines()
 80 |                         bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
 81 |                         # Orrible cast from string matrix to int matrix
 82 |                         for i in range(len(bounding_boxes)):
 83 |                             bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
 84 | 
 85 |                     # add image
 86 |                     self.add_image("sport", image_id=i, path=URL_image,
 87 |                                    width=320, height=240,
 88 |                                    bbox=bounding_boxes, action=activity)
 89 | 
 90 | 
 91 |     def load_val_shapes(self):
 92 |         # Training is True if the dataset is for training, False if it's for validation set
 93 |         """Generate the requested number of synthetic images.
 94 |         count: number of images to generate.
 95 |         height, width: the size of the generated images.
 96 |         """
 97 |         # Add classes
 98 |         self.add_class("sport", 1, "WalkingWithDog")
 99 |         self.add_class("sport", 2, "BasketballDunk")
100 |         self.add_class("sport", 3, "Biking")
101 |         self.add_class("sport", 4, "CliffDiving")
102 |         self.add_class("sport", 5, "CricketBowling")
103 |         self.add_class("sport", 6, "Diving")
104 |         self.add_class("sport", 7, "Fencing")
105 |         self.add_class("sport", 8, "FloorGymnastics")
106 |         self.add_class("sport", 9, "GolfSwing")
107 |         self.add_class("sport", 10, "HorseRiding")
108 |         self.add_class("sport", 11, "IceDancing")
109 |         self.add_class("sport", 12, "LongJump")
110 |         self.add_class("sport", 13, "PoleVault")
111 |         self.add_class("sport", 14, "RopeClimbing")
112 |         self.add_class("sport", 15, "SalsaSpin")
113 |         self.add_class("sport", 16, "SkateBoarding")
114 |         self.add_class("sport", 17, "Skiing")
115 |         self.add_class("sport", 18, "Skijet")
116 |         self.add_class("sport", 19, "SoccerJuggling")
117 |         self.add_class("sport", 20, "Surfing")
118 |         self.add_class("sport", 21, "TennisSwing")
119 |         self.add_class("sport", 22, "TrampolineJumping")
120 |         self.add_class("sport", 23, "VolleyballSpiking")
121 |         self.add_class("sport", 24, "Basketball")
122 | 
123 |         # Select ALL IMAGES
124 |         root_labels = 'ucf24_project/labels'
125 |         root_images = 'ucf24_project/rgb-images'
126 |         for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
127 |             for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
128 |                           not vid.endswith('.DS_Store')
129 |                           and vid in self.reserved_for_val]:
130 |                 for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
131 |                               f.endswith('.txt')]:
132 | 
133 |                     URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
134 |                     URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
135 |                     # open the file with the labels (class and bbox)
136 |                     with open(URL_label) as file:
137 |                         lines = file.readlines()
138 |                         bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
139 |                         # Orrible cast from string matrix to int matrix
140 |                         for i in range(len(bounding_boxes)):
141 |                             bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
142 | 
143 |                     # add image
144 |                     self.add_image("sport", image_id=i, path=URL_image,
145 |                                    width=320, height=240,
146 |                                    bbox=bounding_boxes, action=activity)
147 | 
148 | 
149 | 
150 |     def load_test_shapes(self):
151 |         # Training is True if the dataset is for training, False if it's for validation set
152 |         """Generate the requested number of synthetic images.
153 |         count: number of images to generate.
154 |         height, width: the size of the generated images.
155 |         """
156 |         # Add classes
157 |         self.add_class("sport", 1, "WalkingWithDog")
158 |         self.add_class("sport", 2, "BasketballDunk")
159 |         self.add_class("sport", 3, "Biking")
160 |         self.add_class("sport", 4, "CliffDiving")
161 |         self.add_class("sport", 5, "CricketBowling")
162 |         self.add_class("sport", 6, "Diving")
163 |         self.add_class("sport", 7, "Fencing")
164 |         self.add_class("sport", 8, "FloorGymnastics")
165 |         self.add_class("sport", 9, "GolfSwing")
166 |         self.add_class("sport", 10, "HorseRiding")
167 |         self.add_class("sport", 11, "IceDancing")
168 |         self.add_class("sport", 12, "LongJump")
169 |         self.add_class("sport", 13, "PoleVault")
170 |         self.add_class("sport", 14, "RopeClimbing")
171 |         self.add_class("sport", 15, "SalsaSpin")
172 |         self.add_class("sport", 16, "SkateBoarding")
173 |         self.add_class("sport", 17, "Skiing")
174 |         self.add_class("sport", 18, "Skijet")
175 |         self.add_class("sport", 19, "SoccerJuggling")
176 |         self.add_class("sport", 20, "Surfing")
177 |         self.add_class("sport", 21, "TennisSwing")
178 |         self.add_class("sport", 22, "TrampolineJumping")
179 |         self.add_class("sport", 23, "VolleyballSpiking")
180 |         self.add_class("sport", 24, "Basketball")
181 | 
182 |         # Select ALL IMAGES
183 |         root_labels = 'ucf24_project/labels'
184 |         root_images = 'ucf24_project/rgb-images'
185 |         for activity in [act['name'] for act in self.class_info if not act['name'] == 'BackGround']:
186 |             for video in [vid for vid in os.listdir(root_labels + '/' + activity) if
187 |                           not vid.endswith('.DS_Store')
188 |                           and vid in self.reserved_for_test]:
189 |                 for frame in [f for f in os.listdir(root_labels + '/' + activity + '/' + video) if
190 |                               f.endswith('.txt')]:
191 | 
192 |                     URL_label = root_labels + '/' + activity + '/' + video + '/' + frame
193 |                     URL_image = root_images + '/' + activity + '/' + video + '/' + frame.replace('.txt', '.jpg')
194 |                     # open the file with the labels (class and bbox)
195 |                     with open(URL_label) as file:
196 |                         lines = file.readlines()
197 |                         bounding_boxes = [str(line).replace('\n', '').split(' ') for line in lines]
198 |                         # Orrible cast from string matrix to int matrix
199 |                         for i in range(len(bounding_boxes)):
200 |                             bounding_boxes[i] = [int(float(element)) for element in bounding_boxes[i]]
201 | 
202 |                     # add image
203 |                     self.add_image("sport", image_id=i, path=URL_image,
204 |                                    width=320, height=240,
205 |                                    bbox=bounding_boxes, action=activity)
206 | 
207 | 
208 |     def load_image(self, image_id):
209 | 
210 |         info = self.image_info[image_id]
211 |         image = plt.imread(info['path'])
212 | 
213 |         return image
214 | 
215 |     def image_reference(self, image_id):
216 |         """Return the shapes data of the image."""
217 |         info = self.image_info[image_id]
218 |         if info["source"] == "shapes":
219 |             return info["shapes"]
220 |         else:
221 |             super(self.__class__).image_reference(self, image_id)
222 | 
223 |     def load_mask(self, image_id):
224 |         """Generate instance masks for shapes of the given image ID.
225 |         """
226 |         info = self.image_info[image_id]
227 |         shapes = info['bbox']
228 |         masks_path = info['path'].replace(".jpg", "mask_").replace("rgb-images", "labels")
229 |         count = len(shapes)
230 |         # si crea una matrice 3D per ogni bbox. La terza dimensione è il numero di bounding.
231 |         # ogni maschera è un area di 0 su un blocco di 1 grosso come l'immagine (320x240)
232 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
233 | 
234 |         for i, (action, x1, y1, x2, y2) in enumerate(info['bbox']):
235 |             mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), x1, y1, x2, y2, 1).reshape(240, 320, 1)
236 |             # se ci sono le maschere...
237 |             if os.path.exists(masks_path + "0.jpg"):
238 |                 mask_counter, intersection_max = 0, -1
239 |                 # per ogni maschera
240 |                 while os.path.exists(masks_path + str(mask_counter) + ".jpg"):
241 |                     mask_counter = mask_counter + 1
242 |                     # leggo la maschera, essendo in jpg è compressa, ci sta che non tutti i valori siano 255
243 |                     # uso ">128" per filtrarla -> ottengo una matrice binaria
244 |                     current_mask = plt.imread(masks_path + "0.jpg") > 128
245 |                     # Calcolo l'intersezione col boundingbox e la percentuale di maschera dentro questo
246 |                     intersection = mask[:, :, i:i + 1] * current_mask.reshape(240, 320, 1)
247 |                     # the factors *1 and /255 needed in order to had a normalized score
248 |                     intersection_score = sum(sum(intersection / 255)) / sum(sum(current_mask * 1))
249 |                     # Se ho un nuovo massimo, metto da parte l'intersezione trovata
250 |                     if intersection_score > intersection_max:
251 |                         intersection_max = intersection_score
252 |                         final_mask = current_mask
253 |                 # Se ho avuto almeno un intersezione con il bbox
254 |                 if intersection_max > 0.1:
255 |                     # uncomment to save a mask, but seems good
256 |                     # print(intersection_max)
257 |                     aMask = mask[:, :, i:i + 1] * final_mask.reshape(240, 320, 1)
258 |                     # plt.imsave("imgSaved/"+str(intersection_max)+"m.png", aMask.reshape(240, 320))
259 |                     # plt.imsave("imgSaved/"+str(intersection_max)+"i.png", plt.imread(info['path']))
260 |                     # plt.imsave("imgSaved/"+str(intersection_max)+"b.png", mask[:, :, i:i+1].reshape(240, 320))
261 |                     # aggiorno la maschera
262 |                     mask[:, :, i:i + 1] = mask[:, :, i:i + 1] * final_mask.reshape(240, 320, 1)
263 | 
264 |         # prima poteva accadere che due maschere in scena fossero di classi diverse (e.g. tondo e quadrato)
265 |         # ora avremo che le maschere in scena fanno tutte parte della stessa attività
266 |         class_ids = np.array([self.class_names.index(info['action']) for i in range(count)])
267 | 
268 |         return mask, class_ids.astype(np.int32)
269 | 
270 |     def draw_shape(self, image, x1, y1, x2, y2, color):
271 |         """Draws a shape from the given specs."""
272 |         # Get the center x, y and the size s
273 |         cv2.rectangle(image, (x1, y1), (x2, y2), 255, -1)
274 |         image = image.reshape(240, 320)
275 | 
276 |         return image
277 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import random
 11 | import itertools
 12 | import colorsys
 13 | import numpy as np
 14 | from skimage.measure import find_contours
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.patches as patches
 17 | import matplotlib.lines as lines
 18 | from matplotlib.patches import Polygon
 19 | import IPython.display
 20 | from PIL import Image
 21 | import utils
 22 | 
 23 | 
 24 | ############################################################
 25 | #  Visualization
 26 | ############################################################
 27 | 
 28 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 29 |                    interpolation=None):
 30 |     """Display the given set of images, optionally with titles.
 31 |     images: list or array of image tensors in HWC format.
 32 |     titles: optional. A list of titles to display with each image.
 33 |     cols: number of images per row
 34 |     cmap: Optional. Color map to use. For example, "Blues".
 35 |     norm: Optional. A Normalize instance to map values to colors.
 36 |     interpolation: Optional. Image interporlation to use for display.
 37 |     """
 38 |     titles = titles if titles is not None else [""] * len(images)
 39 |     rows = len(images) // cols + 1
 40 |     plt.figure(figsize=(14, 14 * rows // cols))
 41 |     i = 1
 42 |     for image, title in zip(images, titles):
 43 |         plt.subplot(rows, cols, i)
 44 |         plt.title(title, fontsize=9)
 45 |         plt.axis('off')
 46 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 47 |                    norm=norm, interpolation=interpolation)
 48 |         i += 1
 49 |     plt.show()
 50 | 
 51 | 
 52 | def random_colors(N, bright=True):
 53 |     """
 54 |     Generate random colors.
 55 |     To get visually distinct colors, generate them in HSV space then
 56 |     convert to RGB.
 57 |     """
 58 |     brightness = 1.0 if bright else 0.7
 59 |     hsv = [(i / N, 1, brightness) for i in range(N)]
 60 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 61 |     random.shuffle(colors)
 62 |     return colors
 63 | 
 64 | 
 65 | def apply_mask(image, mask, color, alpha=0.5):
 66 |     """Apply the given mask to the image.
 67 |     """
 68 |     for c in range(3):
 69 |         image[:, :, c] = np.where(mask == 1,
 70 |                                   image[:, :, c] *
 71 |                                   (1 - alpha) + alpha * color[c] * 255,
 72 |                                   image[:, :, c])
 73 |     return image
 74 | 
 75 | 
 76 | def display_instances(image, boxes, masks, class_ids, class_names,
 77 |                       scores=None, title="",
 78 |                       figsize=(16, 16), ax=None):
 79 |     '''
 80 |     im = Image.fromarray(image)
 81 |     im.save("testImgs/img.jpeg")
 82 |     im = Image.fromarray(masks[:, :, 0])
 83 |     im.save("testImgs/imgMasx.jpeg")
 84 | 
 85 | 
 86 |     return 0
 87 |     '''
 88 |     """
 89 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 90 |     masks: [height, width, num_instances]
 91 |     class_ids: [num_instances]
 92 |     class_names: list of class names of the dataset
 93 |     scores: (optional) confidence scores for each box
 94 |     figsize: (optional) the size of the image.
 95 |     """
 96 |     # Number of instances
 97 |     N = boxes.shape[0]
 98 |     if not N:
 99 |         print("\n*** No instances to display *** \n")
100 |     else:
101 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
102 | 
103 |     if not ax:
104 |         _, ax = plt.subplots(1, figsize=figsize)
105 | 
106 |     # Generate random colors
107 |     colors = random_colors(N)
108 | 
109 |     # Show area outside image boundaries.
110 |     height, width = image.shape[:2]
111 |     ax.set_ylim(height + 10, -10)
112 |     ax.set_xlim(-10, width + 10)
113 |     ax.axis('off')
114 |     ax.set_title(title)
115 | 
116 |     masked_image = image.astype(np.uint32).copy()
117 |     for i in range(N):
118 |         color = colors[i]
119 | 
120 |         # Bounding box
121 |         if not np.any(boxes[i]):
122 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
123 |             continue
124 |         y1, x1, y2, x2 = boxes[i]
125 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
126 |                               alpha=0.7, linestyle="dashed",
127 |                               edgecolor=color, facecolor='none')
128 |         ax.add_patch(p)
129 | 
130 |         # Label
131 |         class_id = class_ids[i]
132 |         score = scores[i] if scores is not None else None
133 |         label = class_names[class_id]
134 |         x = random.randint(x1, (x1 + x2) // 2)
135 |         caption = "{} {:.3f}".format(label, score) if score else label
136 |         ax.text(x1, y1 + 8, caption,
137 |                 color='w', size=11, backgroundcolor="none")
138 | 
139 |         
140 |         # Mask
141 |         mask = masks[:, :, i]
142 |         masked_image = apply_mask(masked_image, mask, color)
143 | 
144 |         # Mask Polygon
145 |         # Pad to ensure proper polygons for masks that touch image edges.
146 |         padded_mask = np.zeros(
147 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
148 |         padded_mask[1:-1, 1:-1] = mask
149 |         contours = find_contours(padded_mask, 0.5)
150 |         for verts in contours:
151 |             # Subtract the padding and flip (y, x) to (x, y)
152 |             verts = np.fliplr(verts) - 1
153 |             p = Polygon(verts, facecolor="none", edgecolor=color)
154 |             ax.add_patch(p)
155 | 
156 |     ax.imshow(masked_image.astype(np.uint8))
157 |     #ax.imshow(image)
158 |     plt.show()
159 |     
160 | 
161 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
162 |     """
163 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
164 |     proposals: [n, 4] the same anchors but refined to fit objects better.
165 |     """
166 |     masked_image = image.copy()
167 | 
168 |     # Pick random anchors in case there are too many.
169 |     ids = np.arange(rois.shape[0], dtype=np.int32)
170 |     ids = np.random.choice(
171 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
172 | 
173 |     fig, ax = plt.subplots(1, figsize=(12, 12))
174 |     if rois.shape[0] > limit:
175 |         plt.title("Showing {} random ROIs out of {}".format(
176 |             len(ids), rois.shape[0]))
177 |     else:
178 |         plt.title("{} ROIs".format(len(ids)))
179 | 
180 |     # Show area outside image boundaries.
181 |     ax.set_ylim(image.shape[0] + 20, -20)
182 |     ax.set_xlim(-50, image.shape[1] + 20)
183 |     ax.axis('off')
184 | 
185 |     for i, id in enumerate(ids):
186 |         color = np.random.rand(3)
187 |         class_id = class_ids[id]
188 |         # ROI
189 |         y1, x1, y2, x2 = rois[id]
190 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
191 |                               edgecolor=color if class_id else "gray",
192 |                               facecolor='none', linestyle="dashed")
193 |         ax.add_patch(p)
194 |         # Refined ROI
195 |         if class_id:
196 |             ry1, rx1, ry2, rx2 = refined_rois[id]
197 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
198 |                                   edgecolor=color, facecolor='none')
199 |             ax.add_patch(p)
200 |             # Connect the top-left corners of the anchor and proposal for easy visualization
201 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
202 | 
203 |             # Label
204 |             label = class_names[class_id]
205 |             ax.text(rx1, ry1 + 8, "{}".format(label),
206 |                     color='w', size=11, backgroundcolor="none")
207 | 
208 |             # Mask
209 |             m = utils.unmold_mask(mask[id], rois[id]
210 |                                   [:4].astype(np.int32), image.shape)
211 |             masked_image = apply_mask(masked_image, m, color)
212 | 
213 |     ax.imshow(masked_image)
214 | 
215 |     # Print stats
216 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
217 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
218 |     print("Positive Ratio: {:.2f}".format(
219 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
220 | 
221 | 
222 | # TODO: Replace with matplotlib equivalent?
223 | def draw_box(image, box, color):
224 |     """Draw 3-pixel width bounding boxes on the given image array.
225 |     color: list of 3 int values for RGB.
226 |     """
227 |     y1, x1, y2, x2 = box
228 |     image[y1:y1 + 2, x1:x2] = color
229 |     image[y2:y2 + 2, x1:x2] = color
230 |     image[y1:y2, x1:x1 + 2] = color
231 |     image[y1:y2, x2:x2 + 2] = color
232 |     return image
233 | 
234 | 
235 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
236 |     """Display the given image and the top few class masks."""
237 |     to_display = []
238 |     titles = []
239 |     to_display.append(image)
240 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
241 |     # Pick top prominent classes in this image
242 |     unique_class_ids = np.unique(class_ids)
243 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
244 |                  for i in unique_class_ids]
245 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
246 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
247 |     # Generate images and titles
248 |     for i in range(limit):
249 |         class_id = top_ids[i] if i < len(top_ids) else -1
250 |         # Pull masks of instances belonging to the same class.
251 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
252 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
253 |         to_display.append(m)
254 |         titles.append(class_names[class_id] if class_id != -1 else "-")
255 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
256 | 
257 | 
258 | def plot_precision_recall(AP, precisions, recalls):
259 |     """Draw the precision-recall curve.
260 | 
261 |     AP: Average precision at IoU >= 0.5
262 |     precisions: list of precision values
263 |     recalls: list of recall values
264 |     """
265 |     # Plot the Precision-Recall curve
266 |     _, ax = plt.subplots(1)
267 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
268 |     ax.set_ylim(0, 1.1)
269 |     ax.set_xlim(0, 1.1)
270 |     _ = ax.plot(recalls, precisions)
271 | 
272 | 
273 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
274 |                   overlaps, class_names, threshold=0.5):
275 |     """Draw a grid showing how ground truth objects are classified.
276 |     gt_class_ids: [N] int. Ground truth class IDs
277 |     pred_class_id: [N] int. Predicted class IDs
278 |     pred_scores: [N] float. The probability scores of predicted classes
279 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
280 |     class_names: list of all class names in the dataset
281 |     threshold: Float. The prediction probability required to predict a class
282 |     """
283 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
284 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
285 | 
286 |     plt.figure(figsize=(12, 10))
287 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
288 |     plt.yticks(np.arange(len(pred_class_ids)),
289 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
290 |                 for i, id in enumerate(pred_class_ids)])
291 |     plt.xticks(np.arange(len(gt_class_ids)),
292 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
293 | 
294 |     thresh = overlaps.max() / 2.
295 |     for i, j in itertools.product(range(overlaps.shape[0]),
296 |                                   range(overlaps.shape[1])):
297 |         text = ""
298 |         if overlaps[i, j] > threshold:
299 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
300 |         color = ("white" if overlaps[i, j] > thresh
301 |                  else "black" if overlaps[i, j] > 0
302 |                  else "grey")
303 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
304 |                  horizontalalignment="center", verticalalignment="center",
305 |                  fontsize=9, color=color)
306 | 
307 |     plt.tight_layout()
308 |     plt.xlabel("Ground Truth")
309 |     plt.ylabel("Predictions")
310 | 
311 | 
312 | def draw_boxes(image, boxes=None, refined_boxes=None,
313 |                masks=None, captions=None, visibilities=None,
314 |                title="", ax=None):
315 |     """Draw bounding boxes and segmentation masks with differnt
316 |     customizations.
317 | 
318 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
319 |     refined_boxes: Like boxes, but draw with solid lines to show
320 |         that they're the result of refining 'boxes'.
321 |     masks: [N, height, width]
322 |     captions: List of N titles to display on each box
323 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
324 |         prominant each bounding box should be.
325 |     title: An optional title to show over the image
326 |     ax: (optional) Matplotlib axis to draw on.
327 |     """
328 |     # Number of boxes
329 |     assert boxes is not None or refined_boxes is not None
330 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
331 | 
332 |     # Matplotlib Axis
333 |     if not ax:
334 |         _, ax = plt.subplots(1, figsize=(12, 12))
335 | 
336 |     # Generate random colors
337 |     colors = random_colors(N)
338 | 
339 |     # Show area outside image boundaries.
340 |     margin = image.shape[0] // 10
341 |     ax.set_ylim(image.shape[0] + margin, -margin)
342 |     ax.set_xlim(-margin, image.shape[1] + margin)
343 |     ax.axis('off')
344 | 
345 |     ax.set_title(title)
346 | 
347 |     masked_image = image.astype(np.uint32).copy()
348 |     for i in range(N):
349 |         # Box visibility
350 |         visibility = visibilities[i] if visibilities is not None else 1
351 |         if visibility == 0:
352 |             color = "gray"
353 |             style = "dotted"
354 |             alpha = 0.5
355 |         elif visibility == 1:
356 |             color = colors[i]
357 |             style = "dotted"
358 |             alpha = 1
359 |         elif visibility == 2:
360 |             color = colors[i]
361 |             style = "solid"
362 |             alpha = 1
363 | 
364 |         # Boxes
365 |         if boxes is not None:
366 |             if not np.any(boxes[i]):
367 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
368 |                 continue
369 |             y1, x1, y2, x2 = boxes[i]
370 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
371 |                                   alpha=alpha, linestyle=style,
372 |                                   edgecolor=color, facecolor='none')
373 |             ax.add_patch(p)
374 | 
375 |         # Refined boxes
376 |         if refined_boxes is not None and visibility > 0:
377 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
378 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
379 |                                   edgecolor=color, facecolor='none')
380 |             ax.add_patch(p)
381 |             # Connect the top-left corners of the anchor and proposal
382 |             if boxes is not None:
383 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
384 | 
385 |         # Captions
386 |         if captions is not None:
387 |             caption = captions[i]
388 |             # If there are refined boxes, display captions on them
389 |             if refined_boxes is not None:
390 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
391 |             x = random.randint(x1, (x1 + x2) // 2)
392 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
393 |                     color='w', backgroundcolor="none",
394 |                     bbox={'facecolor': color, 'alpha': 0.5,
395 |                           'pad': 2, 'edgecolor': 'none'})
396 | 
397 |         # Masks
398 |         if masks is not None:
399 |             mask = masks[:, :, i]
400 |             masked_image = apply_mask(masked_image, mask, color)
401 |             # Mask Polygon
402 |             # Pad to ensure proper polygons for masks that touch image edges.
403 |             padded_mask = np.zeros(
404 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
405 |             padded_mask[1:-1, 1:-1] = mask
406 |             contours = find_contours(padded_mask, 0.5)
407 |             for verts in contours:
408 |                 # Subtract the padding and flip (y, x) to (x, y)
409 |                 verts = np.fliplr(verts) - 1
410 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
411 |                 ax.add_patch(p)
412 |     ax.imshow(masked_image.astype(np.uint8))
413 | 
414 | 
415 | def display_table(table):
416 |     """Display values in a table format.
417 |     table: an iterable of rows, and each row is an iterable of values.
418 |     """
419 |     html = ""
420 |     for row in table:
421 |         row_html = ""
422 |         for col in row:
423 |             row_html += "<td>{:40}</td>".format(str(col))
424 |         html += "<tr>" + row_html + "</tr>"
425 |     html = "<table>" + html + "</table>"
426 |     IPython.display.display(IPython.display.HTML(html))
427 | 
428 | 
429 | def display_weight_stats(model):
430 |     """Scans all the weights in the model and returns a list of tuples
431 |     that contain stats about each weight.
432 |     """
433 |     layers = model.get_trainable_layers()
434 |     table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
435 |     for l in layers:
436 |         weight_values = l.get_weights()  # list of Numpy arrays
437 |         weight_tensors = l.weights  # list of TF tensors
438 |         for i, w in enumerate(weight_values):
439 |             weight_name = weight_tensors[i].name
440 |             # Detect problematic layers. Exclude biases of conv layers.
441 |             alert = ""
442 |             if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
443 |                 alert += "<span style='color:red'>*** dead?</span>"
444 |             if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
445 |                 alert += "<span style='color:red'>*** Overflow?</span>"
446 |             # Add row
447 |             table.append([
448 |                 weight_name + alert,
449 |                 str(w.shape),
450 |                 "{:+9.4f}".format(w.min()),
451 |                 "{:+10.4f}".format(w.max()),
452 |                 "{:+9.4f}".format(w.std()),
453 |             ])
454 |     display_table(table)
455 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import scipy.misc
 17 | import skimage.color
 18 | import skimage.io
 19 | import urllib.request
 20 | import shutil
 21 | 
 22 | # URL from which to download the latest COCO trained weights
 23 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 24 | 
 25 | 
 26 | ############################################################
 27 | #  Bounding Boxes
 28 | ############################################################
 29 | 
 30 | def extract_bboxes(mask):
 31 |     """Compute bounding boxes from masks.
 32 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 33 | 
 34 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 35 |     """
 36 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 37 |     for i in range(mask.shape[-1]):
 38 |         m = mask[:, :, i]
 39 |         # Bounding box.
 40 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 41 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 42 |         if horizontal_indicies.shape[0]:
 43 |             x1, x2 = horizontal_indicies[[0, -1]]
 44 |             y1, y2 = vertical_indicies[[0, -1]]
 45 |             # x2 and y2 should not be part of the box. Increment by 1.
 46 |             x2 += 1
 47 |             y2 += 1
 48 |         else:
 49 |             # No mask for this instance. Might happen due to
 50 |             # resizing or cropping. Set bbox to zeros
 51 |             x1, x2, y1, y2 = 0, 0, 0, 0
 52 |         boxes[i] = np.array([y1, x1, y2, x2])
 53 |     return boxes.astype(np.int32)
 54 | 
 55 | 
 56 | def compute_iou(box, boxes, box_area, boxes_area):
 57 |     """Calculates IoU of the given box with the array of the given boxes.
 58 |     box: 1D vector [y1, x1, y2, x2]
 59 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 60 |     box_area: float. the area of 'box'
 61 |     boxes_area: array of length boxes_count.
 62 | 
 63 |     Note: the areas are passed in rather than calculated here for
 64 |           efficency. Calculate once in the caller to avoid duplicate work.
 65 |     """
 66 |     # Calculate intersection areas
 67 |     y1 = np.maximum(box[0], boxes[:, 0])
 68 |     y2 = np.minimum(box[2], boxes[:, 2])
 69 |     x1 = np.maximum(box[1], boxes[:, 1])
 70 |     x2 = np.minimum(box[3], boxes[:, 3])
 71 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 72 |     union = box_area + boxes_area[:] - intersection[:]
 73 |     iou = intersection / union
 74 |     return iou
 75 | 
 76 | 
 77 | def compute_overlaps(boxes1, boxes2):
 78 |     """Computes IoU overlaps between two sets of boxes.
 79 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 80 | 
 81 |     For better performance, pass the largest set first and the smaller second.
 82 |     """
 83 |     # Areas of anchors and GT boxes
 84 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 85 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 86 | 
 87 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 88 |     # Each cell contains the IoU value.
 89 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 90 |     for i in range(overlaps.shape[1]):
 91 |         box2 = boxes2[i]
 92 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 93 |     return overlaps
 94 | 
 95 | 
 96 | def compute_overlaps_masks(masks1, masks2):
 97 |     '''Computes IoU overlaps between two sets of masks.
 98 |     masks1, masks2: [Height, Width, instances]
 99 |     '''
100 |     # flatten masks
101 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
102 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
103 |     area1 = np.sum(masks1, axis=0)
104 |     area2 = np.sum(masks2, axis=0)
105 | 
106 |     # intersections and union
107 |     intersections = np.dot(masks1.T, masks2)
108 |     union = area1[:, None] + area2[None, :] - intersections
109 |     overlaps = intersections / union
110 | 
111 |     return overlaps
112 | 
113 | 
114 | def non_max_suppression(boxes, scores, threshold):
115 |     """Performs non-maximum supression and returns indicies of kept boxes.
116 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
117 |     scores: 1-D array of box scores.
118 |     threshold: Float. IoU threshold to use for filtering.
119 |     """
120 |     assert boxes.shape[0] > 0
121 |     if boxes.dtype.kind != "f":
122 |         boxes = boxes.astype(np.float32)
123 | 
124 |     # Compute box areas
125 |     y1 = boxes[:, 0]
126 |     x1 = boxes[:, 1]
127 |     y2 = boxes[:, 2]
128 |     x2 = boxes[:, 3]
129 |     area = (y2 - y1) * (x2 - x1)
130 | 
131 |     # Get indicies of boxes sorted by scores (highest first)
132 |     ixs = scores.argsort()[::-1]
133 | 
134 |     pick = []
135 |     while len(ixs) > 0:
136 |         # Pick top box and add its index to the list
137 |         i = ixs[0]
138 |         pick.append(i)
139 |         # Compute IoU of the picked box with the rest
140 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
141 |         # Identify boxes with IoU over the threshold. This
142 |         # returns indicies into ixs[1:], so add 1 to get
143 |         # indicies into ixs.
144 |         remove_ixs = np.where(iou > threshold)[0] + 1
145 |         # Remove indicies of the picked and overlapped boxes.
146 |         ixs = np.delete(ixs, remove_ixs)
147 |         ixs = np.delete(ixs, 0)
148 |     return np.array(pick, dtype=np.int32)
149 | 
150 | 
151 | def apply_box_deltas(boxes, deltas):
152 |     """Applies the given deltas to the given boxes.
153 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
154 |     deltas: [N, (dy, dx, log(dh), log(dw))]
155 |     """
156 |     boxes = boxes.astype(np.float32)
157 |     # Convert to y, x, h, w
158 |     height = boxes[:, 2] - boxes[:, 0]
159 |     width = boxes[:, 3] - boxes[:, 1]
160 |     center_y = boxes[:, 0] + 0.5 * height
161 |     center_x = boxes[:, 1] + 0.5 * width
162 |     # Apply deltas
163 |     center_y += deltas[:, 0] * height
164 |     center_x += deltas[:, 1] * width
165 |     height *= np.exp(deltas[:, 2])
166 |     width *= np.exp(deltas[:, 3])
167 |     # Convert back to y1, x1, y2, x2
168 |     y1 = center_y - 0.5 * height
169 |     x1 = center_x - 0.5 * width
170 |     y2 = y1 + height
171 |     x2 = x1 + width
172 |     return np.stack([y1, x1, y2, x2], axis=1)
173 | 
174 | 
175 | def box_refinement_graph(box, gt_box):
176 |     """Compute refinement needed to transform box to gt_box.
177 |     box and gt_box are [N, (y1, x1, y2, x2)]
178 |     """
179 |     box = tf.cast(box, tf.float32)
180 |     gt_box = tf.cast(gt_box, tf.float32)
181 | 
182 |     height = box[:, 2] - box[:, 0]
183 |     width = box[:, 3] - box[:, 1]
184 |     center_y = box[:, 0] + 0.5 * height
185 |     center_x = box[:, 1] + 0.5 * width
186 | 
187 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
188 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
189 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
190 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
191 | 
192 |     dy = (gt_center_y - center_y) / height
193 |     dx = (gt_center_x - center_x) / width
194 |     dh = tf.log(gt_height / height)
195 |     dw = tf.log(gt_width / width)
196 | 
197 |     result = tf.stack([dy, dx, dh, dw], axis=1)
198 |     return result
199 | 
200 | 
201 | def box_refinement(box, gt_box):
202 |     """Compute refinement needed to transform box to gt_box.
203 |     box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
204 |     assumed to be outside the box.
205 |     """
206 |     box = box.astype(np.float32)
207 |     gt_box = gt_box.astype(np.float32)
208 | 
209 |     height = box[:, 2] - box[:, 0]
210 |     width = box[:, 3] - box[:, 1]
211 |     center_y = box[:, 0] + 0.5 * height
212 |     center_x = box[:, 1] + 0.5 * width
213 | 
214 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
215 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
216 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
217 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
218 | 
219 |     dy = (gt_center_y - center_y) / height
220 |     dx = (gt_center_x - center_x) / width
221 |     dh = np.log(gt_height / height)
222 |     dw = np.log(gt_width / width)
223 | 
224 |     return np.stack([dy, dx, dh, dw], axis=1)
225 | 
226 | 
227 | ############################################################
228 | #  Dataset
229 | ############################################################
230 | 
231 | class Dataset(object):
232 |     """The base class for dataset classes.
233 |     To use it, create a new class that adds functions specific to the dataset
234 |     you want to use. For example:
235 | 
236 |     class CatsAndDogsDataset(Dataset):
237 |         def load_cats_and_dogs(self):
238 |             ...
239 |         def load_mask(self, image_id):
240 |             ...
241 |         def image_reference(self, image_id):
242 |             ...
243 | 
244 |     See COCODataset and ShapesDataset as examples.
245 |     """
246 | 
247 |     def __init__(self, class_map=None):
248 |         self._image_ids = []
249 |         self.image_info = []
250 |         # Background is always the first class
251 |         self.class_info = [{"source": "bg", "id": 0, "name": "BackGround"}]
252 |         #self.class_info = [{"source": "sport", "id": 1, "name": "WalkingWithDog"}]
253 |         self.source_class_ids = {}
254 | 
255 |     def add_class(self, source, class_id, class_name):
256 |         assert "." not in source, "Source name cannot contain a dot"
257 |         # Does the class exist already?
258 |         for info in self.class_info:
259 |             if info['source'] == source and info["id"] == class_id:
260 |                 # source.class_id combination already available, skip
261 |                 return
262 |         # Add the class
263 |         self.class_info.append({
264 |             "source": source,
265 |             "id": class_id,
266 |             "name": class_name,
267 |         })
268 | 
269 |     def add_image(self, source, image_id, path, **kwargs):
270 |         image_info = {
271 |             "id": image_id,
272 |             "source": source,
273 |             "path": path,
274 |         }
275 |         image_info.update(kwargs)
276 |         self.image_info.append(image_info)
277 | 
278 |     def image_reference(self, image_id):
279 |         """Return a link to the image in its source Website or details about
280 |         the image that help looking it up or debugging it.
281 | 
282 |         Override for your dataset, but pass to this function
283 |         if you encounter images not in your dataset.
284 |         """
285 |         return ""
286 | 
287 |     def prepare(self, class_map=None):
288 |         """Prepares the Dataset class for use.
289 | 
290 |         TODO: class map is not supported yet. When done, it should handle mapping
291 |               classes from different datasets to the same class ID.
292 |         """
293 | 
294 |         def clean_name(name):
295 |             """Returns a shorter version of object names for cleaner display."""
296 |             return ",".join(name.split(",")[:1])
297 | 
298 |         # Build (or rebuild) everything else from the info dicts.
299 |         self.num_classes = len(self.class_info)
300 |         self.class_ids = np.arange(self.num_classes)
301 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
302 |         self.num_images = len(self.image_info)
303 |         self._image_ids = np.arange(self.num_images)
304 | 
305 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
306 |                                       for info, id in zip(self.class_info, self.class_ids)}
307 | 
308 |         # Map sources to class_ids they support
309 |         self.sources = list(set([i['source'] for i in self.class_info]))
310 |         self.source_class_ids = {}
311 |         # Loop over datasets
312 |         for source in self.sources:
313 |             self.source_class_ids[source] = []
314 |             # Find classes that belong to this dataset
315 |             for i, info in enumerate(self.class_info):
316 |                 # Include BG class in all datasets
317 |                 if i == 0 or source == info['source']:
318 |                     self.source_class_ids[source].append(i)
319 | 
320 |     def map_source_class_id(self, source_class_id):
321 |         """Takes a source class ID and returns the int class ID assigned to it.
322 | 
323 |         For example:
324 |         dataset.map_source_class_id("coco.12") -> 23
325 |         """
326 |         return self.class_from_source_map[source_class_id]
327 | 
328 |     def get_source_class_id(self, class_id, source):
329 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
330 |         info = self.class_info[class_id]
331 |         assert info['source'] == source
332 |         return info['id']
333 | 
334 |     def append_data(self, class_info, image_info):
335 |         self.external_to_class_id = {}
336 |         for i, c in enumerate(self.class_info):
337 |             for ds, id in c["map"]:
338 |                 self.external_to_class_id[ds + str(id)] = i
339 | 
340 |         # Map external image IDs to internal ones.
341 |         self.external_to_image_id = {}
342 |         for i, info in enumerate(self.image_info):
343 |             self.external_to_image_id[info["ds"] + str(info["id"])] = i
344 | 
345 |     @property
346 |     def image_ids(self):
347 |         return self._image_ids
348 | 
349 |     def source_image_link(self, image_id):
350 |         """Returns the path or URL to the image.
351 |         Override this to return a URL to the image if it's availble online for easy
352 |         debugging.
353 |         """
354 |         return self.image_info[image_id]["path"]
355 | 
356 |     def load_image(self, image_id):
357 |         """Load the specified image and return a [H,W,3] Numpy array.
358 |         """
359 |         print(self.image_info[image_id]['path'])
360 |         # Load image
361 |         image = skimage.io.imread(self.image_info[image_id]['path'])
362 |         # If grayscale. Convert to RGB for consistency.
363 |         if image.ndim != 3:
364 |             image = skimage.color.gray2rgb(image)
365 |         return image
366 | 
367 |     def load_mask(self, image_id):
368 |         """Load instance masks for the given image.
369 | 
370 |         Different datasets use different ways to store masks. Override this
371 |         method to load instance masks and return them in the form of am
372 |         array of binary masks of shape [height, width, instances].
373 | 
374 |         Returns:
375 |             masks: A bool array of shape [height, width, instance count] with
376 |                 a binary mask per instance.
377 |             class_ids: a 1D array of class IDs of the instance masks.
378 |         """
379 |         # Override this function to load a mask from your dataset.
380 |         # Otherwise, it returns an empty mask.
381 |         mask = np.empty([0, 0, 0])
382 |         class_ids = np.empty([0], np.int32)
383 |         return mask, class_ids
384 | 
385 | 
386 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
387 |     """
388 |     Resizes an image keeping the aspect ratio.
389 | 
390 |     min_dim: if provided, resizes the image such that it's smaller
391 |         dimension == min_dim
392 |     max_dim: if provided, ensures that the image longest side doesn't
393 |         exceed this value.
394 |     padding: If true, pads image with zeros so it's size is max_dim x max_dim
395 | 
396 |     Returns:
397 |     image: the resized image
398 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
399 |         be inserted in the returned image. If so, this window is the
400 |         coordinates of the image part of the full image (excluding
401 |         the padding). The x2, y2 pixels are not included.
402 |     scale: The scale factor used to resize the image
403 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
404 |     """
405 |     # Default window (y1, x1, y2, x2) and default scale == 1.
406 |     h, w = image.shape[:2]
407 |     window = (0, 0, h, w)
408 |     scale = 1
409 | 
410 |     # Scale?
411 |     if min_dim:
412 |         # Scale up but not down
413 |         scale = max(1, min_dim / min(h, w))
414 |     # Does it exceed max dim?
415 |     if max_dim:
416 |         image_max = max(h, w)
417 |         if round(image_max * scale) > max_dim:
418 |             scale = max_dim / image_max
419 |     # Resize image and mask
420 |     if scale != 1:
421 |         image = scipy.misc.imresize(
422 |             image, (round(h * scale), round(w * scale)))
423 |     # Need padding?
424 |     if padding:
425 |         # Get new height and width
426 |         h, w = image.shape[:2]
427 |         top_pad = (max_dim - h) // 2
428 |         bottom_pad = max_dim - h - top_pad
429 |         left_pad = (max_dim - w) // 2
430 |         right_pad = max_dim - w - left_pad
431 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
432 |         image = np.pad(image, padding, mode='constant', constant_values=0)
433 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
434 |     return image, window, scale, padding
435 | 
436 | 
437 | def resize_mask(mask, scale, padding):
438 |     """Resizes a mask using the given scale and padding.
439 |     Typically, you get the scale and padding from resize_image() to
440 |     ensure both, the image and the mask, are resized consistently.
441 | 
442 |     scale: mask scaling factor
443 |     padding: Padding to add to the mask in the form
444 |             [(top, bottom), (left, right), (0, 0)]
445 |     """
446 |     h, w = mask.shape[:2]
447 |     mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
448 |     mask = np.pad(mask, padding, mode='constant', constant_values=0)
449 |     return mask
450 | 
451 | 
452 | def minimize_mask(bbox, mask, mini_shape):
453 |     """Resize masks to a smaller version to cut memory load.
454 |     Mini-masks can then resized back to image scale using expand_masks()
455 | 
456 |     See inspect_data.ipynb notebook for more details.
457 |     """
458 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
459 |     for i in range(mask.shape[-1]):
460 |         m = mask[:, :, i]
461 |         y1, x1, y2, x2 = bbox[i][:4]
462 |         m = m[y1:y2, x1:x2]
463 |         if m.size == 0:
464 |             raise Exception("Invalid bounding box with area of zero")
465 |         m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
466 |         mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
467 |     return mini_mask
468 | 
469 | 
470 | def expand_mask(bbox, mini_mask, image_shape):
471 |     """Resizes mini masks back to image size. Reverses the change
472 |     of minimize_mask().
473 | 
474 |     See inspect_data.ipynb notebook for more details.
475 |     """
476 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
477 |     for i in range(mask.shape[-1]):
478 |         m = mini_mask[:, :, i]
479 |         y1, x1, y2, x2 = bbox[i][:4]
480 |         h = y2 - y1
481 |         w = x2 - x1
482 |         m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
483 |         mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
484 |     return mask
485 | 
486 | 
487 | # TODO: Build and use this function to reduce code duplication
488 | def mold_mask(mask, config):
489 |     pass
490 | 
491 | 
492 | def unmold_mask(mask, bbox, image_shape):
493 |     """Converts a mask generated by the neural network into a format similar
494 |     to it's original shape.
495 |     mask: [height, width] of type float. A small, typically 28x28 mask.
496 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
497 | 
498 |     Returns a binary mask with the same size as the original image.
499 |     """
500 |     threshold = 0.5
501 |     y1, x1, y2, x2 = bbox
502 |     mask = scipy.misc.imresize(
503 |         mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
504 |     mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
505 | 
506 |     # Put the mask in the right location.
507 |     full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
508 |     full_mask[y1:y2, x1:x2] = mask
509 |     return full_mask
510 | 
511 | 
512 | ############################################################
513 | #  Anchors
514 | ############################################################
515 | 
516 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
517 |     """
518 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
519 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
520 |     shape: [height, width] spatial shape of the feature map over which
521 |             to generate anchors.
522 |     feature_stride: Stride of the feature map relative to the image in pixels.
523 |     anchor_stride: Stride of anchors on the feature map. For example, if the
524 |         value is 2 then generate anchors for every other feature map pixel.
525 |     """
526 |     # Get all combinations of scales and ratios
527 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
528 |     scales = scales.flatten()
529 |     ratios = ratios.flatten()
530 | 
531 |     # Enumerate heights and widths from scales and ratios
532 |     heights = scales / np.sqrt(ratios)
533 |     widths = scales * np.sqrt(ratios)
534 | 
535 |     # Enumerate shifts in feature space
536 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
537 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
538 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
539 | 
540 |     # Enumerate combinations of shifts, widths, and heights
541 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
542 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
543 | 
544 |     # Reshape to get a list of (y, x) and a list of (h, w)
545 |     box_centers = np.stack(
546 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
547 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
548 | 
549 |     # Convert to corner coordinates (y1, x1, y2, x2)
550 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
551 |                             box_centers + 0.5 * box_sizes], axis=1)
552 |     return boxes
553 | 
554 | 
555 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
556 |                              anchor_stride):
557 |     """Generate anchors at different levels of a feature pyramid. Each scale
558 |     is associated with a level of the pyramid, but each ratio is used in
559 |     all levels of the pyramid.
560 | 
561 |     Returns:
562 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
563 |         with the same order of the given scales. So, anchors of scale[0] come
564 |         first, then anchors of scale[1], and so on.
565 |     """
566 |     # Anchors
567 |     # [anchor_count, (y1, x1, y2, x2)]
568 |     anchors = []
569 |     for i in range(len(scales)):
570 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
571 |                                         feature_strides[i], anchor_stride))
572 |     return np.concatenate(anchors, axis=0)
573 | 
574 | 
575 | ############################################################
576 | #  Miscellaneous
577 | ############################################################
578 | 
579 | def trim_zeros(x):
580 |     """It's common to have tensors larger than the available data and
581 |     pad with zeros. This function removes rows that are all zeros.
582 | 
583 |     x: [rows, columns].
584 |     """
585 |     assert len(x.shape) == 2
586 |     return x[~np.all(x == 0, axis=1)]
587 | 
588 | 
589 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
590 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
591 |                iou_threshold=0.5):
592 |     """Compute Average Precision at a set IoU threshold (default 0.5).
593 | 
594 |     Returns:
595 |     mAP: Mean Average Precision
596 |     precisions: List of precisions at different class score thresholds.
597 |     recalls: List of recall values at different class score thresholds.
598 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
599 |     """
600 |     # Trim zero padding and sort predictions by score from high to low
601 |     # TODO: cleaner to do zero unpadding upstream
602 |     gt_boxes = trim_zeros(gt_boxes)
603 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
604 |     pred_boxes = trim_zeros(pred_boxes)
605 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
606 |     indices = np.argsort(pred_scores)[::-1]
607 |     pred_boxes = pred_boxes[indices]
608 |     pred_class_ids = pred_class_ids[indices]
609 |     pred_scores = pred_scores[indices]
610 |     pred_masks = pred_masks[..., indices]
611 | 
612 |     # Compute IoU overlaps [pred_masks, gt_masks]
613 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
614 |     # overlaps = compute_overlaps_masks(gt_masks, gt_masks)
615 | 
616 |     # Loop through ground truth boxes and find matching predictions
617 |     match_count = 0
618 |     pred_match = np.zeros([pred_boxes.shape[0]])
619 |     gt_match = np.zeros([gt_boxes.shape[0]])
620 |     for i in range(len(pred_boxes)):
621 |         # Find best matching ground truth box
622 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
623 |         for j in sorted_ixs:
624 |             # If ground truth box is already matched, go to next one
625 |             if gt_match[j] == 1:
626 |                 continue
627 |             # If we reach IoU smaller than the threshold, end the loop
628 |             iou = overlaps[i, j]
629 |             if iou < iou_threshold:
630 |                 break
631 |             # Do we have a match?
632 |             if pred_class_ids[i] == gt_class_ids[j]:
633 |                 match_count += 1
634 |                 gt_match[j] = 1
635 |                 pred_match[i] = 1
636 |                 break
637 | 
638 |     # Compute precision and recall at each prediction box step
639 |     precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
640 |     recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
641 | 
642 |     # Pad with start and end values to simplify the math
643 |     precisions = np.concatenate([[0], precisions, [0]])
644 |     recalls = np.concatenate([[0], recalls, [1]])
645 | 
646 |     # Ensure precision values decrease but don't increase. This way, the
647 |     # precision value at each recall threshold is the maximum it can be
648 |     # for all following recall thresholds, as specified by the VOC paper.
649 |     for i in range(len(precisions) - 2, -1, -1):
650 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
651 | 
652 |     # Compute mean AP over recall range
653 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
654 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
655 |                  precisions[indices])
656 | 
657 |     return mAP, precisions, recalls, overlaps, pred_match, gt_match
658 | 
659 | 
660 | def compute_recall(pred_boxes, gt_boxes, iou):
661 |     """Compute the recall at the given IoU threshold. It's an indication
662 |     of how many GT boxes were found by the given prediction boxes.
663 | 
664 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
665 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
666 |     """
667 |     # Measure overlaps
668 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
669 |     iou_max = np.max(overlaps, axis=1)
670 |     iou_argmax = np.argmax(overlaps, axis=1)
671 |     positive_ids = np.where(iou_max >= iou)[0]
672 |     matched_gt_boxes = iou_argmax[positive_ids]
673 | 
674 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
675 |     return recall, positive_ids
676 | 
677 | 
678 | # ## Batch Slicing
679 | # Some custom layers support a batch size of 1 only, and require a lot of work
680 | # to support batches greater than 1. This function slices an input tensor
681 | # across the batch dimension and feeds batches of size 1. Effectively,
682 | # an easy way to support batches > 1 quickly with little code modification.
683 | # In the long run, it's more efficient to modify the code to support large
684 | # batches and getting rid of this function. Consider this a temporary solution
685 | def batch_slice(inputs, graph_fn, batch_size, names=None):
686 |     """Splits inputs into slices and feeds each slice to a copy of the given
687 |     computation graph and then combines the results. It allows you to run a
688 |     graph on a batch of inputs even if the graph is written to support one
689 |     instance only.
690 | 
691 |     inputs: list of tensors. All must have the same first dimension length
692 |     graph_fn: A function that returns a TF tensor that's part of a graph.
693 |     batch_size: number of slices to divide the data into.
694 |     names: If provided, assigns names to the resulting tensors.
695 |     """
696 |     if not isinstance(inputs, list):
697 |         inputs = [inputs]
698 | 
699 |     outputs = []
700 |     for i in range(batch_size):
701 |         inputs_slice = [x[i] for x in inputs]
702 |         output_slice = graph_fn(*inputs_slice)
703 |         if not isinstance(output_slice, (tuple, list)):
704 |             output_slice = [output_slice]
705 |         outputs.append(output_slice)
706 |     # Change outputs from a list of slices where each is
707 |     # a list of outputs to a list of outputs and each has
708 |     # a list of slices
709 |     outputs = list(zip(*outputs))
710 | 
711 |     if names is None:
712 |         names = [None] * len(outputs)
713 | 
714 |     result = [tf.stack(o, axis=0, name=n)
715 |               for o, n in zip(outputs, names)]
716 |     if len(result) == 1:
717 |         result = result[0]
718 | 
719 |     return result
720 | 
721 | 
722 | def download_trained_weights(coco_model_path, verbose=1):
723 |     """Download COCO trained weights from Releases.
724 | 
725 |     coco_model_path: local path of COCO trained weights
726 |     """
727 |     if verbose > 0:
728 |         print("Downloading pretrained model to " + coco_model_path + " ...")
729 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
730 |         shutil.copyfileobj(resp, out)
731 |     if verbose > 0:
732 |         print("... done downloading pretrained model!")
733 | 


--------------------------------------------------------------------------------