├── RoiPoolingConv_i3d2.py
├── check_i3d.py
├── evaluate_i3d_classifier.py
├── evaluate_sample.py
├── extract_i3d_features.py
├── i3d_inception.py
├── preprocess_script.py
├── test_ava_concat.py
├── test_final_i3d.py
├── test_final_i3d_bk.py
├── test_final_noI3d.py
├── test_final_noI3d_vis.py
├── test_frcnn_AVA.py
├── test_frcnn_ava_bk.py
├── test_i3d_whole.py
├── test_with_vis.py
├── train_frcnn_i3d_batch.py
├── train_frcnn_i3d_cls.py
├── train_frcnn_i3d_concat.py
├── train_frcnn_i3d_v2.py
├── train_frcnn_i3d_whole.py
├── train_frcnn_noI3d.py
└── train_whole_noI3d.py


/RoiPoolingConv_i3d2.py:
--------------------------------------------------------------------------------
  1 | from keras.engine.topology import Layer
  2 | import keras.backend as K
  3 | 
  4 | if K.backend() == 'tensorflow':
  5 |     import tensorflow as tf
  6 | 
  7 | class RoiPoolingConv(Layer):
  8 |     '''ROI pooling layer for 2D inputs.
  9 |     See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
 10 |     K. He, X. Zhang, S. Ren, J. Sun
 11 |     # Arguments
 12 |         pool_size: int
 13 |             Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
 14 |         num_rois: number of regions of interest to be used
 15 |     # Input shape
 16 |         list of two 4D tensors [X_img,X_roi] with shape:
 17 |         X_img:
 18 |         `(1, channels, rows, cols)` if dim_ordering='th'
 19 |         or 4D tensor with shape:
 20 |         `(1, rows, cols, channels)` if dim_ordering='tf'.
 21 |         X_roi:
 22 |         `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
 23 |     # Output shape
 24 |         3D tensor with shape:
 25 |         `(1, num_rois, channels, pool_size, pool_size)`
 26 |     '''
 27 |     def __init__(self, pool_size, num_rois, **kwargs):
 28 | 
 29 |         self.dim_ordering = K.image_dim_ordering()
 30 |         assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
 31 | 
 32 |         self.pool_size = pool_size
 33 |         self.num_rois = num_rois
 34 | 
 35 |         super(RoiPoolingConv, self).__init__(**kwargs)
 36 | 
 37 |     def build(self, input_shape):
 38 |         if self.dim_ordering == 'th':
 39 |             self.nb_channels = input_shape[0][1]
 40 |         elif self.dim_ordering == 'tf':
 41 |             self.nb_channels = input_shape[0][3]
 42 | 
 43 |     def compute_output_shape(self, input_shape):
 44 |         if self.dim_ordering == 'th':
 45 |             return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size
 46 |         else:
 47 |             return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
 48 | 
 49 |     def call(self, x, mask=None):
 50 | 
 51 |         assert(len(x) == 2)
 52 | 
 53 |         img = x[0]
 54 |         rois = x[1]
 55 | 
 56 |         input_shape = K.shape(img)
 57 |         print(img.shape.as_list())
 58 |         outputs = []
 59 | 
 60 |         for roi_idx in range(self.num_rois):
 61 | 
 62 |             x = rois[0, roi_idx, 0]
 63 |             y = rois[0, roi_idx, 1]
 64 |             w = rois[0, roi_idx, 2]
 65 |             h = rois[0, roi_idx, 3]
 66 |             # print(x,y,w,h)
 67 |             row_length = w / float(self.pool_size)
 68 |             col_length = h / float(self.pool_size)
 69 | 
 70 |             num_pool_regions = self.pool_size
 71 | 
 72 |             #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
 73 |             # in theano. The theano implementation is much less efficient and leads to long compile times
 74 | 
 75 |             if self.dim_ordering == 'th':
 76 |                 for jy in range(num_pool_regions):
 77 |                     for ix in range(num_pool_regions):
 78 |                         x1 = x + ix * row_length
 79 |                         x2 = x1 + row_length
 80 |                         y1 = y + jy * col_length
 81 |                         y2 = y1 + col_length
 82 | 
 83 |                         x1 = K.cast(x1, 'int32')
 84 |                         x2 = K.cast(x2, 'int32')
 85 |                         y1 = K.cast(y1, 'int32')
 86 |                         y2 = K.cast(y2, 'int32')
 87 | 
 88 |                         x2 = x1 + K.maximum(1,x2-x1)
 89 |                         y2 = y1 + K.maximum(1,y2-y1)
 90 | 
 91 |                         new_shape = [input_shape[0], input_shape[1],input_shape[2],
 92 |                                      y2 - y1, x2 - x1]
 93 | 
 94 |                         x_crop = img[:,:, :, y1:y2, x1:x2]
 95 |                         xm = K.reshape(x_crop, new_shape)
 96 |                         pooled_val = K.max(xm, axis=(3,4))
 97 |                         outputs.append(pooled_val)
 98 | 
 99 |             elif self.dim_ordering == 'tf':
100 |                 x = K.cast(x, 'int32')
101 |                 y = K.cast(y, 'int32')
102 |                 w = K.cast(w, 'int32')
103 |                 h = K.cast(h, 'int32')
104 | 
105 |                 rs = tf.image.resize_images(img[:,:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
106 |                 outputs.append(rs)
107 | 
108 |         final_output = K.concatenate(outputs, axis=0)
109 |         print final_output.shape.as_list()
110 |         print self.nb_channels
111 |         final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
112 | 
113 |         if self.dim_ordering == 'th':
114 |             final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
115 |         else:
116 |             final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
117 | 
118 |         print("final shape",final_output.shape.as_list())
119 |         return final_output
120 | 
121 | 
122 |     def get_config(self):
123 |         config = {'pool_size': self.pool_size,
124 |                   'num_rois': self.num_rois}
125 |         base_config = super(RoiPoolingConv, self).get_config()
126 |         return dict(list(base_config.items()) + list(config.items()))
127 | 


--------------------------------------------------------------------------------
/evaluate_i3d_classifier.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750'
  3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d'
  4 | '''
  5 | 
  6 | import numpy as np
  7 | import argparse
  8 | 
  9 | from check_i3d import Inception_Inflated3d
 10 | # from i3d_inception import Inception_Inflated3d
 11 | from tensorflow.python import keras
 12 | from keras.utils import plot_model
 13 | import os
 14 | import pdb
 15 | 
 16 | 
 17 | 
 18 | 
 19 | 
 20 | NUM_FRAMES = 79
 21 | FRAME_HEIGHT = 224
 22 | FRAME_WIDTH = 224
 23 | NUM_RGB_CHANNELS = 3
 24 | NUM_FLOW_CHANNELS = 2
 25 | 
 26 | NUM_CLASSES = 400
 27 | 
 28 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy'
 29 | 
 30 | SAMPLE_DATA_PATH = {
 31 |     'rgb' :'data/v_CricketShot_g04_c01_rgb.npy',
 32 |     'flow' : 'data/v_CricketShot_g04_c01_flow.npy'
 33 | }
 34 | 
 35 | LABEL_MAP_PATH = 'data/label_map.txt'
 36 | 
 37 | def main(args):
 38 |     # load the kinetics classes
 39 |     kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]
 40 | 
 41 | 
 42 |     if args.eval_type in ['rgb', 'joint']:
 43 |         if args.no_imagenet_pretrained:
 44 |             # build model for RGB data
 45 |             # and load pretrained weights (trained on kinetics dataset only)
 46 |             rgb_model = Inception_Inflated3d(
 47 |                 include_top=False,
 48 |                 weights='rgb_kinetics_only',
 49 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
 50 |                 classes=NUM_CLASSES)
 51 |         else:
 52 |             # build model for RGB data
 53 |             # and load pretrained weights (trained on imagenet and kinetics dataset)
 54 |             rgb_model = Inception_Inflated3d(
 55 |                 include_top=False,
 56 |                 weights='rgb_imagenet_and_kinetics',
 57 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
 58 |                 classes=NUM_CLASSES)
 59 |             # pdb.set_trace()
 60 |             # print rgb_model.summary()
 61 |             plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True)
 62 |             # print rgb_model.summary()
 63 | 
 64 | 
 65 |         # load RGB sample (just one example)
 66 |         '''
 67 |         rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])
 68 |         # #
 69 |         # # # make prediction
 70 |         rgb_features = rgb_model.predict(rgb_sample)
 71 |         # rgb_features
 72 |         # print rgb_features.shape.as_list()
 73 |         #
 74 |         # print rgb_logits.shape
 75 |         features = rgb_features[:,11,:,:,:]
 76 |         features = np.array(features)
 77 |         print features.shape
 78 |         '''
 79 |     '''
 80 |     if args.eval_type in ['flow', 'joint']:
 81 |         if args.no_imagenet_pretrained:
 82 |             # build model for optical flow data
 83 |             # and load pretrained weights (trained on kinetics dataset only)
 84 |             flow_model = Inception_Inflated3d(
 85 |                 include_top=True,
 86 |                 weights='flow_kinetics_only',
 87 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS),
 88 |                 classes=NUM_CLASSES)
 89 |         else:
 90 |             # build model for optical flow data
 91 |             # and load pretrained weights (trained on imagenet and kinetics dataset)
 92 |             flow_model = Inception_Inflated3d(
 93 |                 include_top=True,
 94 |                 weights='flow_imagenet_and_kinetics',
 95 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS),
 96 |                 classes=NUM_CLASSES)
 97 | 
 98 | 
 99 |         # load flow sample (just one example)
100 |         flow_sample = np.load(SAMPLE_DATA_PATH['flow'])
101 | 
102 |         # make prediction
103 |         flow_logits = flow_model.predict(flow_sample)
104 | 
105 | 
106 |     # produce final model logits
107 |     if args.eval_type == 'rgb':
108 |         sample_logits = rgb_logits
109 |     elif args.eval_type == 'flow':
110 |         sample_logits = flow_logits
111 |     else: # joint
112 |         sample_logits = rgb_logits + flow_logits
113 | 
114 |     # produce softmax output from model logit for class probabilities
115 |     sample_logits = sample_logits[0] # we are dealing with just one example
116 |     sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))
117 | 
118 |     sorted_indices = np.argsort(sample_predictions)[::-1]
119 | 
120 |     print('\nNorm of logits: %f' % np.linalg.norm(sample_logits))
121 |     print('\nTop classes and probabilities')
122 |     for index in sorted_indices[:20]:
123 |         print(sample_predictions[index], sample_logits[index], kinetics_classes[index])
124 | 
125 | 
126 |     return
127 | 
128 |     '''
129 | if __name__ == '__main__':
130 |     # parse arguments
131 |     parser = argparse.ArgumentParser()
132 |     parser.add_argument('--eval-type',
133 |         help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).',
134 |         type=str, choices=['rgb', 'flow', 'joint'], default='joint')
135 | 
136 |     parser.add_argument('--no-imagenet-pretrained',
137 |         help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.',
138 |         action='store_true')
139 | 
140 | 
141 |     args = parser.parse_args()
142 |     main(args)
143 | 


--------------------------------------------------------------------------------
/evaluate_sample.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750'
  3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d'
  4 | '''
  5 | 
  6 | import numpy as np
  7 | import argparse
  8 | 
  9 | from i3d_inception import Inception_Inflated3d
 10 | # from i3d_inception import Inception_Inflated3d
 11 | from tensorflow.python import keras
 12 | from keras.utils import plot_model
 13 | import os
 14 | import pdb
 15 | from keras.layers import Input
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | 
 22 | NUM_FRAMES = 79
 23 | FRAME_HEIGHT = 224
 24 | FRAME_WIDTH = 224
 25 | NUM_RGB_CHANNELS = 3
 26 | NUM_FLOW_CHANNELS = 2
 27 | 
 28 | NUM_CLASSES = 400
 29 | 
 30 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy'
 31 | 
 32 | SAMPLE_DATA_PATH = {
 33 |     'rgb' :'data/v_CricketShot_g04_c01_rgb.npy',
 34 |     'flow' : 'data/v_CricketShot_g04_c01_flow.npy'
 35 | }
 36 | 
 37 | LABEL_MAP_PATH = 'data/label_map.txt'
 38 | 
 39 | def main(args):
 40 |     # load the kinetics classes
 41 |     kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')]
 42 | 
 43 | 
 44 |     if args.eval_type in ['rgb', 'joint']:
 45 |         if args.no_imagenet_pretrained:
 46 |             # build model for RGB data
 47 |             # and load pretrained weights (trained on kinetics dataset only)
 48 |             rgb_model = Inception_Inflated3d(
 49 |                 include_top=False,
 50 |                 weights='rgb_kinetics_only',
 51 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
 52 |                 classes=NUM_CLASSES)
 53 |         else:
 54 |             # build model for RGB data
 55 |             # and load pretrained weights (trained on imagenet and kinetics dataset)
 56 |             rgb_model = Inception_Inflated3d(
 57 |                 include_top=False,
 58 |                 weights='rgb_imagenet_and_kinetics',
 59 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS),
 60 |                 classes=NUM_CLASSES)
 61 |             # pdb.set_trace()
 62 |             # print rgb_model.summary()
 63 |             # plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True)
 64 |             # print rgb_model.summary()
 65 | 
 66 | 
 67 |         # load RGB sample (just one example)
 68 |         vid_input = Input(shape =(79, 224,224, 3))
 69 |         features = rgb_model(vid_input)
 70 |         print features.shape.as_list()
 71 |         feature_shape = features.shape.as_list()
 72 |         mid_slice_no = (feature_shape[1]/2)+1
 73 |         print mid_slice_no
 74 |         rgb_features = features[:,mid_slice_no,:,:,:]
 75 | 
 76 |         print rgb_features.shape.as_list()
 77 |         '''
 78 |         rgb_sample = np.load(SAMPLE_DATA_PATH['rgb'])
 79 |         # #
 80 |         # # # make prediction
 81 |         rgb_features = rgb_model.predict(rgb_sample)
 82 |         # rgb_features
 83 |         # print rgb_features.shape.as_list()
 84 |         #
 85 |         # print rgb_logits.shape
 86 |         features = rgb_features[:,11,:,:,:]
 87 |         features = np.array(features)
 88 |         print features.shape
 89 |         '''
 90 |     '''
 91 |     if args.eval_type in ['flow', 'joint']:
 92 |         if args.no_imagenet_pretrained:
 93 |             # build model for optical flow data
 94 |             # and load pretrained weights (trained on kinetics dataset only)
 95 |             flow_model = Inception_Inflated3d(
 96 |                 include_top=True,
 97 |                 weights='flow_kinetics_only',
 98 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS),
 99 |                 classes=NUM_CLASSES)
100 |         else:
101 |             # build model for optical flow data
102 |             # and load pretrained weights (trained on imagenet and kinetics dataset)
103 |             flow_model = Inception_Inflated3d(
104 |                 include_top=True,
105 |                 weights='flow_imagenet_and_kinetics',
106 |                 input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS),
107 |                 classes=NUM_CLASSES)
108 | 
109 | 
110 |         # load flow sample (just one example)
111 |         flow_sample = np.load(SAMPLE_DATA_PATH['flow'])
112 | 
113 |         # make prediction
114 |         flow_logits = flow_model.predict(flow_sample)
115 | 
116 | 
117 |     # produce final model logits
118 |     if args.eval_type == 'rgb':
119 |         sample_logits = rgb_logits
120 |     elif args.eval_type == 'flow':
121 |         sample_logits = flow_logits
122 |     else: # joint
123 |         sample_logits = rgb_logits + flow_logits
124 | 
125 |     # produce softmax output from model logit for class probabilities
126 |     sample_logits = sample_logits[0] # we are dealing with just one example
127 |     sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits))
128 | 
129 |     sorted_indices = np.argsort(sample_predictions)[::-1]
130 | 
131 |     print('\nNorm of logits: %f' % np.linalg.norm(sample_logits))
132 |     print('\nTop classes and probabilities')
133 |     for index in sorted_indices[:20]:
134 |         print(sample_predictions[index], sample_logits[index], kinetics_classes[index])
135 | 
136 | 
137 |     return
138 | 
139 |     '''
140 | if __name__ == '__main__':
141 |     # parse arguments
142 |     parser = argparse.ArgumentParser()
143 |     parser.add_argument('--eval-type',
144 |         help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).',
145 |         type=str, choices=['rgb', 'flow', 'joint'], default='joint')
146 | 
147 |     parser.add_argument('--no-imagenet-pretrained',
148 |         help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.',
149 |         action='store_true')
150 | 
151 | 
152 |     args = parser.parse_args()
153 |     main(args)
154 | 


--------------------------------------------------------------------------------
/extract_i3d_features.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 | 									# (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | 
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | from tqdm import tqdm
 37 | import cv2
 38 | from tqdm import tqdm
 39 | # import tensorflow as tf
 40 | # import keras.backend.tensorflow_backend as ktf
 41 | 
 42 | sys.setrecursionlimit(40000)
 43 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 44 | # def get_session(gpu_fraction=0.333):
 45 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 46 | #                                 allow_growth=True)
 47 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 48 | # ktf.set_session(get_session())
 49 | 
 50 | 
 51 | old_stdout = sys.stdout
 52 | 
 53 | log_file = open("message.log","w")
 54 | 
 55 | 
 56 | from i3d_inception import Inception_Inflated3d
 57 | # from i3d_inception import Inception_Inflated3d
 58 | # from tensorflow.python import keras
 59 | from keras.utils import plot_model
 60 | import os
 61 | import pdb
 62 | from keras.layers import Input
 63 | import logging
 64 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 65 | parser = OptionParser()
 66 | 
 67 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 68 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 69 | 				default="pascal_voc")
 70 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 71 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 72 | 
 73 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 74 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 75 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 76 | 
 77 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 78 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 79 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 80 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 81 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 82 | 				  action="store_true", default=False)
 83 | parser.add_option("--config_filename", dest="config_filename", help=
 84 | 				"Location to store all the metadata related to the training (to be used when testing).",
 85 | 				default="config.pickle")
 86 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 87 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 88 | 
 89 | (options, args) = parser.parse_args()
 90 | 
 91 | if not options.train_path:   # if filename is not given
 92 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 93 | 
 94 | if options.parser == 'pascal_voc':
 95 | 	from keras_frcnn.pascal_voc_parser import get_data
 96 | elif options.parser == 'simple':
 97 | 	from keras_frcnn.simple_parser import get_data
 98 | else:
 99 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
100 | 
101 | # pass the settings from the command line, and persist them in the config object
102 | C = config.Config()
103 | 
104 | C.use_horizontal_flips = bool(options.horizontal_flips)
105 | C.use_vertical_flips = bool(options.vertical_flips)
106 | C.rot_90 = bool(options.rot_90)
107 | C.dataset = options.dataset
108 | C.augment = options.aug
109 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
110 | C.model_path = output_weight_path
111 | C.num_rois = int(options.num_rois)
112 | 
113 | if options.network == 'vgg':
114 | 	C.network = 'vgg'
115 | 	from keras_frcnn import vgg as nn
116 | elif options.network == 'resnet50':
117 | 	from keras_frcnn import resnet as nn
118 | 	C.network = 'resnet50'
119 | else:
120 | 	print('Not a valid model')
121 | 	raise ValueError
122 | 
123 | 
124 | # check if weight path was passed via command line
125 | if options.input_weight_path:
126 | 	C.base_net_weights = options.input_weight_path
127 | else:
128 | 	# set the path to weights based on backend and model
129 | 	C.base_net_weights = nn.get_weight_path()
130 | 
131 | # all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
132 | 
133 | # job = options.job
134 | # print(len(classes_count)), len(class_mapping)
135 | # if job:
136 | # 	sys.stdout = log_file
137 | #
138 | #
139 | # # if 'bg' not in classes_count:
140 | # # 	classes_count['bg'] = 0
141 | # # 	class_mapping['bg'] = len(class_mapping)
142 | #
143 | # C.class_mapping = class_mapping
144 | #
145 | # inv_map = {v: k for k, v in class_mapping.items()}
146 | #
147 | # print('Training images per class:')
148 | # pprint.pprint(classes_count)
149 | # print('Num classes (including bg) = {}'.format(len(classes_count)))
150 | #
151 | # config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
152 | #
153 | # with open(config_output_filename, 'wb') as config_f:
154 | # 	pickle.dump(C,config_f)
155 | # 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
156 | #
157 | # random.shuffle(all_imgs)
158 | #
159 | # num_imgs = len(all_imgs)
160 | #
161 | # train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
162 | # val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
163 | #
164 | # imgs = train_imgs+val_imgs
165 | #
166 | # print('Num train samples {}'.format(len(train_imgs)))
167 | # print('Num val samples {}'.format(len(val_imgs)))
168 | 
169 | 
170 | # data_gen_train = data_generators.get_anchor_gt(imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
171 | # data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
172 | 
173 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input')
174 | vid_input_shape = (64, 400,320, 3)
175 | rgb_model = Inception_Inflated3d(
176 | 				include_top=False,
177 | 				weights='rgb_kinetics_only',
178 | 				input_shape=vid_input_shape,
179 | 				classes=8)
180 | roi_input = Input(shape=(None, 4),name = 'roi_input')
181 | shared_layers_orig = rgb_model(vid_input)
182 | 
183 | 
184 | # epoch_length = len(imgs)
185 | # epoch_length_val =100
186 | # num_epochs = int(options.num_epochs)
187 | # iter_num = 0
188 | # iter_num_tensorboard = 0
189 | # total_cur_loss = []
190 | # total_cur_loss_val = []
191 | # iter_num_val_tensorboard = 0
192 | # losses = np.zeros((epoch_length, 1))
193 | # losses_val = np.zeros((epoch_length_val, 1))
194 | # rpn_accuracy_rpn_monitor = []
195 | # rpn_accuracy_for_epoch = []
196 | # start_time = time.time()
197 | # ###### val #####
198 | # rpn_accuracy_rpn_monitor_val = []
199 | # rpn_accuracy_for_epoch_val = []
200 | 
201 | ################
202 | # best_loss = np.Inf
203 | #
204 | # class_mapping_inv = {v: k for k, v in class_mapping.items()}
205 | # print('Starting training')
206 | #
207 | # vis = True
208 | 
209 | 
210 | def extract_numpy_single_frame(img):
211 | 
212 | 	img = (img/255.)*2 - 1
213 | 	return img
214 | 
215 | def get_frame_idx(img_path):
216 | 	winSize = 64
217 | 	tags = img_path.split(os.path.sep)
218 | 	vid_folder = '/'+'/'.join(tags[1:-1])
219 | 	frames = os.listdir(vid_folder)
220 | 	if 'CAD' in img_path:
221 | 		frames = [f for f in frames if f.startswith('RGB')]
222 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
223 | 	else:
224 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
225 | 	frame_index = frames.index(tags[-1])
226 | 	# print frame_index
227 | 	fi = get_frames_index(frames,frame_index,winSize)
228 | 	seq =[frames[k] if k!=-1 else k for k in fi]
229 | 	# print(seq[0],seq[31])
230 | 	# print seq
231 | 	return seq
232 | 
233 | def get_frames_index(total_list,frame_idx,winSize):
234 | 	nb = (winSize/2)-1
235 | 	na = (winSize/2)+1
236 | 	final_frame_idx = len(total_list)-1
237 | 	sf = final_frame_idx-(winSize/2)+1
238 | 	ef = len(total_list)
239 | 	seq = []
240 | 	if frame_idx < (winSize/2)-1:
241 | 		zp_frames = nb - frame_idx
242 | 		seq = [-1 for number in range(int(zp_frames))]
243 | 		seq.extend(range(0,int(frame_idx)+1))
244 | 		seq.extend(range(int(frame_idx)+1, int(frame_idx)+int(na)))
245 | 		if len(seq)!=winSize:
246 | 			print "No"
247 | 	elif frame_idx >= (winSize/2)-1 and frame_idx < sf:
248 | 		start_index = frame_idx-(winSize/2)+1
249 | 		end_index = (winSize/2) + frame_idx
250 | 		# print frame_idx
251 | 		seq = range(int(start_index),int(end_index+1))
252 | 
253 | 	elif frame_idx >=sf and frame_idx <ef:
254 | 		sa = frame_idx-(nb)
255 | 		seq.extend(range(int(sa),int(ef)))
256 | 		zp = winSize-len(seq)
257 | 		# print zp
258 | 		seq.extend([-1 for _ in range(int(zp))])
259 | 		if len(seq)!=winSize:
260 | 			print "No"
261 | 	return seq
262 | 
263 | train_path = options.train_path
264 | 
265 | csv_file = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_val_8_actions.csv'
266 | f1 = open(csv_file,'r')
267 | lines = f1.read().splitlines()
268 | for line in tqdm(lines):
269 | 			# while True:
270 | 			# try:
271 | 			# img_data, seq_numpy, x_img = next(data_gen
272 | 			# _train)
273 | 			tags = line.split(',')
274 | 			filepath = os.path.join(train_path,tags[0],str(int(tags[1]))+'.jpg')
275 | 			# print filepath
276 | 			seq = get_frame_idx(filepath)
277 | 			# print seq
278 | 			tags = filepath.split(os.path.sep)
279 | 			img_folder = '/'+'/'.join(tags[1:-1])
280 | 			# x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
281 | 			img_list = []
282 | 			vid_numpy = []
283 | 			for frame in seq:
284 | 				if frame!=-1:
285 | 						fr_name = os.path.join(img_folder, frame)
286 | 						fr_img = cv2.imread(fr_name)
287 | 						img_list.append(fr_img)
288 | 				else:
289 | 					img_list.append(np.zeros((resized_height,resized_width,3)))
290 | 
291 | 			# if augment:
292 | 			# 	try:
293 | 			# 		img_list, img_data_aug, new_img = ind_aug_data(img_list, img_data_aug, img)
294 | 			# 		img = new_img
295 | 			# 	except:
296 | 			# 		pass
297 | 			# if random_crop:
298 | 
299 | 				# img = img[]
300 | 			resized_width = 320
301 | 			resized_height = 400
302 | 			for frame,im_frame in zip(seq,img_list):
303 | 				if frame!=-1:
304 | 						# fr_name = os.path.join(img_folder, frame)
305 | 						# fr_img = cv2.imread(fr_name)
306 | 						# if random_crop:
307 | 						# 	fr_img = fr_img[rc[1]:rc[3], rc[0]:rc[2]]
308 | 						fr_img =  cv2.resize(im_frame, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
309 | 						fr_npy = extract_numpy_single_frame(fr_img)
310 | 						# print fr_npy.shape
311 | 						vid_numpy.append(fr_npy)
312 | 				else:
313 | 					vid_numpy.append(np.zeros((resized_height,resized_width,3)))
314 | 
315 | 			vid_numpy = np.array(vid_numpy)
316 | 			# print vid_numpy.shape
317 | 			vid_numpy = np.expand_dims(vid_numpy,axis=0)
318 | 
319 | 			fp = filepath
320 | 			# # print fp
321 | 			fp = fp.replace('keyframes','numpy_8_actions')
322 | 			fp = fp.replace('.jpg','.npy')
323 | 			# print fp
324 | 			numpy_dir = '/'.join(fp.split('/')[:-1])
325 | 			if not os.path.isdir(numpy_dir):
326 | 				os.makedirs(numpy_dir)
327 | 			# # print numpy_dir
328 | 			i3d_features = rgb_model.predict(vid_numpy)
329 | 			# # print i3d_features.shape
330 | 			np.save(fp,i3d_features)
331 | 			# print
332 | '''
333 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
334 | # img_features =
335 | loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],seq_numpy], [Y1[:, :, :]])
336 | losses[iter_num, 0] = loss_class
337 | 
338 | iter_num += 1
339 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
340 | iter_num_tensorboard+=1
341 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
342 | 
343 | if iter_num == epoch_length:
344 | 	loss_class_cls = np.mean(losses[:, 0])
345 | 	curr_loss =loss_class_cls
346 | 	write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
347 | 	total_cur_loss.append(curr_loss)
348 | 	iter_num = 0
349 | 	start_time = time.time()
350 | 
351 | 	#################### Val #########################################################
352 | 	iter_num_val = 0
353 | 
354 | 	while True:
355 | 			# try:
356 | 			img_data, seq_numpy = next(data_gen_val)
357 | 			print("validation")
358 | 			X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
359 | 			loss_class = model_classifier.train_on_batch([ X2[:, :, :],seq_numpy], [Y1[:, :, :]])
360 | 			losses_val[iter_num_val,0] = loss_class
361 | 			iter_num_val += 1
362 | 			write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
363 | 			iter_num_val_tensorboard+=1
364 | 			if iter_num_val == epoch_length_val:
365 | 
366 | 				loss_class_cls = np.mean(losses_val[:, 0])
367 | 				curr_loss_val = loss_class_cls
368 | 				write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
369 | 				total_cur_loss_val.append(curr_loss_val)
370 | 				# total_cur_loss.append(curr_loss)
371 | 				iter_num_val = 0
372 | 				break
373 | 
374 | 	if curr_loss < best_loss:
375 | 		if C.verbose:
376 | 			print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
377 | 		best_loss = curr_loss
378 | 		model_all.save_weights(C.model_path)
379 | 
380 | 	break
381 | 
382 | except Exception as e:
383 | exc_type, exc_obj, exc_tb = sys.exc_info()
384 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
385 | print(exc_type, fname, exc_tb.tb_lineno)
386 | print('Exception: {}'.format(e))
387 | # print(seq_numpy.shape)
388 | continue
389 | 		sys.stdout = old_stdout
390 | 
391 | plt.plot(total_cur_loss)
392 | plt.plot(total_cur_loss_val)
393 | plt.legend(['train loss', 'val loss'], loc='upper left')
394 | 
395 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
396 | plt.savefig(savefigure)
397 | print('Training complete, exiting.')
398 | log_file.close()
399 | '''
400 | 


--------------------------------------------------------------------------------
/preprocess_script.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | 
 5 | # f = open(action_csv,'r')
 6 | # # train_lines = f.read().splitlines()
 7 | # df = pd.read_csv(csv_file)
 8 | # df.columns = ['1','2','3','4','5','6','7','8']
 9 | #
10 | # ac_id_dic = get_action_dic()
11 | # # print ac_idf
12 | # actions = f.read().splitlines()
13 | # action_list = set()
14 | # for action in actions[1:]:
15 | # 		tags = action.split(',')
16 | # 		tags = tags[:-1]
17 | # 		ac_id = int(tags[0])
18 | # 		ac = ''.join(tags[1:])
19 | # 		ac = ac.replace('"','')
20 | # 		action_list.add(ac)
21 | #
22 | # print action_list
23 | # final_df = pd.DataFrame()
24 | # ac_vid = {}
25 | # vids_list = []
26 | # for ac in tqdm(action_list):
27 | # 	# print ac
28 | # 	id = ac_id_dic[ac]
29 | # 	vids_df = df.loc[df['7'] == id]
30 | # 	vid_name = vids_df.iloc[1,0]
31 | # 	vids_list.append(vid_name)
32 | # 	vid_df = df.loc[df['1']== vid_name]
33 | #
34 | # 	final_df = final_df.append(vid_df,ignore_index=True)
35 | # 	df = df.drop(df[df['1']==vids_df.iloc[1,0]].index)
36 | #
37 | # final_df['2'] = final_df['2'].apply(lambda x: str(x).zfill(4))
38 | #
39 | # final_df.to_csv(csv_file_subset, header = False, index = False, float_format='%.3f')
40 | 
41 | 
42 | 
43 | 
44 | 
45 | def form_multi_data():
46 |     dataset = open('/home/subha/hoi_vid/keras-kinetics-i3d/data/ava/ava_data_subset_new.txt','r')
47 |     f = open('/home/subha/hoi_vid/keras-kinetics-i3d/data/ava/ava_data_subset_multi.txt','w+')
48 |     # df = pd.read_csv(dataset)
49 |     # df.columns = ['1','2','3','4','5','6']
50 |     lines = dataset.read().splitlines()
51 |     while lines:
52 |         # print len(lines)
53 | 
54 |         ann = lines[0]
55 |         tags =ann.split(',')
56 |         bbx = ','.join(tags[:-1])
57 |         anns = [l for l in lines if bbx in l]
58 |         # print anns
59 |         [lines.remove(l) for l in anns]
60 |         # [f.write(l+'\n') for l in anns]
61 |         for a in anns:
62 |             ac = a.split(',')[-1]
63 |             bbx = bbx+','+ac
64 | 
65 |         f.write(bbx+'\n')
66 |         # print bbx
67 | 
68 | form_multi_data()
69 | 


--------------------------------------------------------------------------------
/test_ava_concat.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | parser.add_option("-m", "--model_name", dest="model_name",
 47 | 				help="Path to model.")
 48 | parser.add_option("-o", "--output", dest="output",
 49 | 				help="csv to save predictions.")
 50 | parser.add_option("--config_filename", dest="config_filename", help=
 51 | 				"Location to read the metadata related to the training (generated when training).",
 52 | 				default="config_subset_AVA.pickle")
 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 54 | 
 55 | (options, args) = parser.parse_args()
 56 | 
 57 | if not options.test_path:   # if filename is not given
 58 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 59 | 
 60 | 
 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 62 | config_output_filename = options.config_filename
 63 | with open(config_output_filename, 'rb') as f_in:
 64 | 	C = pickle.load(f_in)
 65 | 
 66 | if C.network == 'resnet50':
 67 | 	import keras_frcnn.resnet as nn
 68 | elif C.network == 'vgg':
 69 | 	import keras_frcnn.vgg as nn
 70 | 
 71 | # turn off any data augmentation at test time
 72 | C.use_horizontal_flips = False
 73 | C.use_vertical_flips = False
 74 | C.rot_90 = False
 75 | resized_width = 320
 76 | resized_height = 400
 77 | img_path = options.test_path
 78 | output_csv_file = os.path.join('evaluation','phase2',options.output)
 79 | fc = open(output_csv_file,'w+')
 80 | def format_img_size(img, C):
 81 | 	""" formats the image size based on config """
 82 | 	img_min_side = float(C.im_size)
 83 | 	(height,width,_) = img.shape
 84 | 	ratio_w = resized_width/width
 85 | 	ratio_h = resized_height/height
 86 | 	new_width = resized_width
 87 | 	new_height = resized_height
 88 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 89 | 	return img, ratio_w, ratio_h
 90 | 
 91 | def format_img(img, C):
 92 | 	""" formats an image for model prediction based on config """
 93 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 94 | 	img = format_img_channels(img, C)
 95 | 	return img, ratio_w, ratio_h
 96 | 
 97 | # Method to transform the coordinates of the bounding box to its original size
 98 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
 99 | 
100 | 	real_x1 = int(round(x1 // ratio_w))
101 | 	real_y1 = int(round(y1 // ratio_h))
102 | 	real_x2 = int(round(x2 // ratio_w))
103 | 	real_y2 = int(round(y2 // ratio_h))
104 | 
105 | 	return (real_x1, real_y1, real_x2 ,real_y2)
106 | 
107 | class_mapping = C.class_mapping
108 | classes_count = class_mapping
109 | print len(class_mapping)
110 | # bp()
111 | # if 'bg' not in class_mapping:
112 | # 	class_mapping['bg'] = len(class_mapping)
113 | 
114 | class_mapping = {v: k for k, v in class_mapping.items()}
115 | # print(class_mapping)
116 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
117 | C.num_rois = int(options.num_rois)
118 | # print "Num rois originally",C.num_rois
119 | if C.network == 'resnet50':
120 | 	num_features = 1024
121 | elif C.network == 'vgg':
122 | 	num_features = 512
123 | 
124 | if K.image_dim_ordering() == 'th':
125 | 	input_shape_img = (3, None, None)
126 | 	# input_shape_features = (num_features, None, None)
127 | else:
128 | 	input_shape_img = (None, None, 3)
129 | 
130 | shared_layers_input= Input(shape=( None,None,None,832))
131 | roi_input = Input(shape=(None, 4))
132 | vid_input = Input(shape =(None, None, None, 3))
133 | vid_input_shape = (64, 400,320, 3)
134 | img_input = Input(shape=(None, None, 3))
135 | 
136 | feature_map_input = Input(shape=(None, None,None,832))
137 | shared_layers_image = nn.nn_base(img_input, trainable=False)
138 | 
139 | 
140 | rgb_model = Inception_Inflated3d(
141 | 				include_top=False,
142 | 				weights='rgb_kinetics_only',
143 | 				input_shape=vid_input_shape,
144 | 				classes=classes_count)
145 | def get_new_img_size(width, height, img_min_side, C):
146 | 	img_min_side =448
147 | 	if width <= height:
148 | 		f = float(img_min_side) / width
149 | 		resized_height = int(f * height)
150 | 		resized_width = img_min_side
151 | 	else:
152 | 		f = float(img_min_side) / height
153 | 		resized_width = int(f * width)
154 | 		resized_height = img_min_side
155 | 
156 | 	if C.dataset == 'AVA':
157 | 		return resized_width, resized_height
158 | 	else:
159 | 		return 640, 480
160 | 
161 | def extract_numpy_single_frame(img,C):
162 | 
163 | 	img = (img/255.)*2 - 1
164 | 	return img
165 | 
166 | def get_frame_idx(img_path):
167 | 	winSize = 64
168 | 	tags = img_path.split(os.path.sep)
169 | 	vid_folder = '/'+'/'.join(tags[1:-1])
170 | 	frames = os.listdir(vid_folder)
171 | 	if 'CAD' in img_path:
172 | 		frames = [f for f in frames if f.startswith('RGB')]
173 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
174 | 	else:
175 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
176 | 	frame_index = frames.index(tags[-1])
177 | 	fi = get_frames_index(frames,frame_index,winSize)
178 | 	seq =[frames[k] if k!=-1 else k for k in fi]
179 | 	# print(seq[0],seq[31])
180 | 	# print seq
181 | 	return seq
182 | optimizer_classifier = Adam(lr=1e-5)
183 | # classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True)
184 | print len(classes_count)
185 | # bp()
186 | classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True)
187 | 
188 | # model_classifier_only = Model([feature_map_input, roi_input], classifier)
189 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
190 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
191 | model_name = os.path.join(options.model_name,'model.hdf5')
192 | print('Loading weights from {}'.format(model_name))
193 | model_classifier.load_weights(model_name, by_name=True)
194 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
195 | 
196 | 
197 | all_imgs = []
198 | classes = {}
199 | bbox_threshold = 0.7
200 | visualise = True
201 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
202 | df = pd.read_csv(f_val)
203 | # ac_id = get_action_dic()
204 | 
205 | # for val_vid in val_vids:
206 | final_predictions = []
207 | 
208 | 
209 | indices = range(len(df))
210 | print len(df)
211 | # bp()
212 | bs = 8
213 | for i in tqdm(range(len(df))):
214 | 
215 | 	try:
216 | 		# ind = indices[i:i+bs]
217 | 		row = df.iloc[i,:]
218 | 		val_vid = row[0]
219 | 		vid_path = os.path.join(img_path,val_vid)
220 | 		img_name = str(int(row[1]))+'.jpg'
221 | 		filepath = os.path.join(vid_path,img_name)
222 | 
223 | 		val_vid = row[0]
224 | 		vid_path = os.path.join(img_path,val_vid)
225 | 		img_name = str(int(row[1]))+'.jpg'
226 | 		k = 0
227 | 		k+=1
228 | 
229 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
230 | 			continue
231 | 		st = time.time()
232 | 		filepath = os.path.join(vid_path,img_name)
233 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
234 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
235 | 		img = cv2.imread(filepath)
236 | 		# x_img =
237 | 		tags = filepath.split(os.path.sep)
238 | 		img_folder = '/'+'/'.join(tags[1:-1])
239 | 		seq = get_frame_idx(filepath)
240 | 
241 | 		# print filepath, seq
242 | 		vid_numpy = []
243 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
244 | 		x_img = np.expand_dims(x_img, axis = 0)
245 | 		# print filepath, seq
246 | 		# bp()
247 | 		for frame in seq:
248 | 			if frame!=-1:
249 | 				fr_name = os.path.join(img_folder, frame)
250 | 				np_name = fr_name.replace('.jpg','.npy')
251 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
252 | 				# print np_name
253 | 				fr_npy = np.load(np_name)
254 | 				vid_numpy.append(fr_npy)
255 | 			else:
256 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
257 | 
258 | 		vid_numpy = np.array(vid_numpy)
259 | 		vid_numpy = np.expand_dims(vid_numpy,axis=0)
260 | 		# print vid_numpy.shape
261 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
262 | 		# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
263 | 		w = x2-x1
264 | 		h = y2-y1
265 | 		roi = np.array([x1,y1,w,h])
266 | 		rois = np.expand_dims(roi,axis=0)
267 | 		rois = np.expand_dims(rois,axis=0)
268 | 		# try:
269 | 		# rois, vid_numpy = get_batch(df, ind)
270 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
271 | 
272 | 		# print rois, shared_layers_orig.shape
273 | 		[P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois])
274 | 		# print y
275 | 		# print y.shape
276 | 
277 | 		# except:
278 | 		# 	pass
279 | 
280 | 		seq_name = filepath.split(os.path.sep)[-2]
281 | 		# bp()
282 | 		# line = [seq_name,str(fr_num).zfill(4),str(float(row[2])),str(float(row[3])),str(float(row[4])),str(float(row[5])),P_cls]
283 | 		# final_predictions.append(line)
284 | 		# for cn in range(P_cls.shape[1]):
285 | 		# 	class_num = cn
286 | 		# 	prob = P_cls[0,cn]
287 | 		# 	class_name = class_mapping[cn]
288 | 		# 	if class_name!='bg':
289 | 		# 		line = seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(ac_id[class_name])+','+str(prob)
290 | 		# 		# print line
291 | 		# 		f_predicted = open('evaluation/ava_predicted_cheating_subset_latest.csv','a+')
292 | 		# 		f_predicted.write(line+'\n')
293 | 		# 		f_predicted.close()
294 | 		f_predicted = open(output_csv_file,'a+')
295 | 		[f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg']
296 | 		f_predicted.close()
297 | 
298 | 		# if k==1:
299 | 		# 	break
300 | 	except Exception as e:
301 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
302 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
303 | 		print(exc_type, fname, exc_tb.tb_lineno)
304 | 		print('Exception: {}'.format(e))
305 | 		print(filepath)
306 | 		continue
307 | 


--------------------------------------------------------------------------------
/test_final_i3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | # from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | parser.add_option("-m", "--model_name", dest="model_name",
 47 | 				help="Path to model.")
 48 | parser.add_option("-t", "--type", dest="type",
 49 | 				help="type.", default = 'v1')
 50 | parser.add_option("-o", "--output", dest="output",
 51 | 				help="csv to save predictions.")
 52 | parser.add_option("--config_filename", dest="config_filename", help=
 53 | 				"Location to read the metadata related to the training (generated when training).",
 54 | 				default="config_subset_AVA.pickle")
 55 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 56 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat')
 57 | 
 58 | (options, args) = parser.parse_args()
 59 | 
 60 | if not options.test_path:   # if filename is not given
 61 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 62 | 
 63 | 
 64 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 65 | config_output_filename = options.config_filename
 66 | with open(config_output_filename, 'rb') as f_in:
 67 | 	C = pickle.load(f_in)
 68 | 
 69 | if C.network == 'resnet50':
 70 | 	import keras_frcnn.resnet as nn
 71 | elif C.network == 'vgg':
 72 | 	import keras_frcnn.vgg as nn
 73 | 
 74 | # turn off any data augmentation at test time
 75 | C.use_horizontal_flips = False
 76 | C.use_vertical_flips = False
 77 | C.rot_90 = False
 78 | resized_width = 320
 79 | resized_height = 400
 80 | version = options.version
 81 | img_path = options.test_path
 82 | output_csv_file = os.path.join('evaluation','8_actions',options.output)
 83 | fc = open(output_csv_file,'w+')
 84 | fc.close()
 85 | def format_img_size(img, C):
 86 | 	""" formats the image size based on config """
 87 | 	img_min_side = float(C.im_size)
 88 | 	(height,width,_) = img.shape
 89 | 	ratio_w = resized_width/width
 90 | 	ratio_h = resized_height/height
 91 | 	new_width = resized_width
 92 | 	new_height = resized_height
 93 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 94 | 	return img, ratio_w, ratio_h
 95 | 
 96 | def format_img(img, C):
 97 | 	""" formats an image for model prediction based on config """
 98 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 99 | 	img = format_img_channels(img, C)
100 | 	return img, ratio_w, ratio_h
101 | 
102 | # Method to transform the coordinates of the bounding box to its original size
103 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
104 | 
105 | 	real_x1 = int(round(x1 // ratio_w))
106 | 	real_y1 = int(round(y1 // ratio_h))
107 | 	real_x2 = int(round(x2 // ratio_w))
108 | 	real_y2 = int(round(y2 // ratio_h))
109 | 
110 | 	return (real_x1, real_y1, real_x2 ,real_y2)
111 | 
112 | class_mapping = C.class_mapping
113 | classes_count = class_mapping
114 | print len(class_mapping)
115 | type = options.type
116 | # bp()
117 | # if 'bg' not in class_mapping:
118 | # 	class_mapping['bg'] = len(class_mapping)
119 | 
120 | class_mapping = {v: k for k, v in class_mapping.items()}
121 | # print(class_mapping)
122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
123 | C.num_rois = int(options.num_rois)
124 | # print "Num rois originally",C.num_rois
125 | if C.network == 'resnet50':
126 | 	num_features = 1024
127 | elif C.network == 'vgg':
128 | 	num_features = 512
129 | 
130 | if K.image_dim_ordering() == 'th':
131 | 	input_shape_img = (3, None, None)
132 | else:
133 | 	input_shape_img = (None, None, 3)
134 | 
135 | # shared_layers_input= Input(shape=( None,None,None,832))
136 | roi_input = Input(shape=(None, 4))
137 | vid_input = Input(shape =(None, None, None, 3))
138 | vid_input_shape = (64, 400,320, 3)
139 | img_input = Input(shape=(None, None, 3))
140 | 
141 | feature_map_input = Input(shape=( None,None,None,832))
142 | shared_layers_image = nn.nn_base(img_input, trainable=False)
143 | 
144 | 
145 | rgb_model = Inception_Inflated3d(
146 | 				include_top=False,
147 | 				weights='rgb_kinetics_only',
148 | 				input_shape=vid_input_shape,
149 | 				classes=classes_count)
150 | 
151 | def extract_numpy_single_frame(img,C):
152 | 
153 | 	img = (img/255.)*2 - 1
154 | 	return img
155 | 
156 | def get_frame_idx(img_path):
157 | 	winSize = 64
158 | 	tags = img_path.split(os.path.sep)
159 | 	vid_folder = '/'+'/'.join(tags[1:-1])
160 | 	frames = os.listdir(vid_folder)
161 | 	if 'CAD' in img_path:
162 | 		frames = [f for f in frames if f.startswith('RGB')]
163 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
164 | 	else:
165 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
166 | 	frame_index = frames.index(tags[-1])
167 | 	fi = get_frames_index(frames,frame_index,winSize)
168 | 	seq =[frames[k] if k!=-1 else k for k in fi]
169 | 	# print(seq[0],seq[31])
170 | 	# print seq
171 | 	return seq
172 | optimizer_classifier = Adam(lr=1e-5)
173 | print len(classes_count)
174 | 
175 | if version == 'concat' and type == 'v1':
176 | 	classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True)
177 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
178 | elif version == 'whole' and type == 'v1':
179 | 	classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version)
180 | 	model_classifier = Model([feature_map_input], classifier)
181 | elif version == 'concat' and type == 'v2':
182 | 	classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True)
183 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
184 | elif version == 'roi':
185 | 	classifier = nn.classifier_i3d(feature_map_input,  roi_input, 1, nb_classes=len(classes_count))
186 | 	model_classifier = Model([roi_input, feature_map_input], classifier)
187 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
188 | model_name = os.path.join(options.model_name,'model.hdf5')
189 | print('Loading weights from {}'.format(model_name))
190 | model_classifier.load_weights(model_name, by_name=True)
191 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
192 | 
193 | all_imgs = []
194 | classes = {}
195 | bbox_threshold = 0.7
196 | visualise = True
197 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
198 | df = pd.read_csv(f_val)
199 | final_predictions = []
200 | 
201 | indices = range(len(df))
202 | print len(df)
203 | # bp()
204 | bs = 8
205 | for i in tqdm(range(len(df))):
206 | 
207 | 	try:
208 | 		# ind = indices[i:i+bs]
209 | 		row = df.iloc[i,:]
210 | 		val_vid = row[0]
211 | 		vid_path = os.path.join(img_path,val_vid)
212 | 		img_name = str(int(row[1]))+'.jpg'
213 | 		filepath = os.path.join(vid_path,img_name)
214 | 
215 | 		val_vid = row[0]
216 | 		vid_path = os.path.join(img_path,val_vid)
217 | 		img_name = str(int(row[1]))+'.jpg'
218 | 		k = 0
219 | 		k+=1
220 | 
221 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
222 | 			continue
223 | 		st = time.time()
224 | 		filepath = os.path.join(vid_path,img_name)
225 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
226 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
227 | 		img = cv2.imread(filepath)
228 | 		# x_img =
229 | 		tags = filepath.split(os.path.sep)
230 | 		img_folder = '/'+'/'.join(tags[1:-1])
231 | 		seq = get_frame_idx(filepath)
232 | 
233 | 		# print filepath, seq
234 | 		vid_numpy = []
235 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
236 | 		x_img = np.expand_dims(x_img, axis = 0)
237 | 		# print filepath, seq
238 | 		# bp()
239 | 		for frame in seq:
240 | 			if frame!=-1:
241 | 				fr_name = os.path.join(img_folder, frame)
242 | 				np_name = fr_name.replace('.jpg','.npy')
243 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
244 | 				# print np_name
245 | 				fr_npy = np.load(np_name)
246 | 				vid_numpy.append(fr_npy)
247 | 			else:
248 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
249 | 
250 | 		vid_numpy = np.array(vid_numpy)
251 | 		vid_numpy = np.expand_dims(vid_numpy,axis=0)
252 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
253 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
254 | 
255 | 		# print vid_numpy.shape
256 | 		if version == 'concat':
257 | 			# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
258 | 			w = x2-x1
259 | 			h = y2-y1
260 | 			roi = np.array([x1,y1,w,h])
261 | 			rois = np.expand_dims(roi,axis=0)
262 | 			rois = np.expand_dims(rois,axis=0)
263 | 			# try:
264 | 			# rois, vid_numpy = get_batch(df, ind)
265 | 			# print rois, shared_layers_orig.shape
266 | 			[P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois])
267 | 		elif version == 'whole':
268 | 			[P_cls]= model_classifier.predict([shared_layers_orig])
269 | 		elif version == 'roi':
270 | 			w = x2-x1
271 | 			h = y2-y1
272 | 			roi = np.array([x1,y1,w,h])
273 | 			rois = np.expand_dims(roi,axis=0)
274 | 			rois = np.expand_dims(rois,axis=0)
275 | 			[P_cls]= model_classifier.predict([rois, shared_layers_orig])
276 | 		seq_name = filepath.split(os.path.sep)[-2]
277 | 		# print(P_cls)
278 | 		# bp()
279 | 		# print output_csv_file
280 | 		# print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n')
281 | 		et = time.time()-st
282 | 		print "The per frame time is {}".format(et)
283 | 		with open(output_csv_file,'a+') as f_predicted:
284 | 				  [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg']
285 | 
286 | 	except Exception as e:
287 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
288 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
289 | 		print(exc_type, fname, exc_tb.tb_lineno)
290 | 		print('Exception: {}'.format(e))
291 | 		print(filepath)
292 | 		continue
293 | 


--------------------------------------------------------------------------------
/test_final_i3d_bk.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | # from keras.utils.training_utils import multi_gpu_model
 30 | import time
 31 | 
 32 | 
 33 | '''
 34 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 35 | 
 36 | '''
 37 | sys.setrecursionlimit(40000)
 38 | 
 39 | parser = OptionParser()
 40 | 
 41 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 42 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 43 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 44 | 
 45 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 46 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 47 | parser.add_option("-m", "--model_name", dest="model_name",
 48 | 				help="Path to model.")
 49 | parser.add_option("-o", "--output", dest="output",
 50 | 				help="csv to save predictions.")
 51 | parser.add_option("--config_filename", dest="config_filename", help=
 52 | 				"Location to read the metadata related to the training (generated when training).",
 53 | 				default="config_noI3d_cheating8_AVA.pickle")
 54 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 55 | 
 56 | (options, args) = parser.parse_args()
 57 | 
 58 | if not options.test_path:   # if filename is not given
 59 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 60 | 
 61 | 
 62 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 63 | config_output_filename = options.config_filename
 64 | with open(config_output_filename, 'rb') as f_in:
 65 | 	C = pickle.load(f_in)
 66 | 
 67 | if C.network == 'resnet50':
 68 | 	import keras_frcnn.resnet as nn
 69 | elif C.network == 'vgg':
 70 | 	import keras_frcnn.vgg as nn
 71 | 
 72 | # turn off any data augmentation at test time
 73 | C.use_horizontal_flips = False
 74 | C.use_vertical_flips = False
 75 | C.rot_90 = False
 76 | resized_width = 320
 77 | resized_height = 400
 78 | img_path = options.test_path
 79 | output_csv_file = os.path.join('evaluation','phase2',options.output)
 80 | fc = open(output_csv_file,'w+')
 81 | def format_img_size(img, C):
 82 | 	""" formats the image size based on config """
 83 | 	img_min_side = float(C.im_size)
 84 | 	(height,width,_) = img.shape
 85 | 	ratio_w = resized_width/width
 86 | 	ratio_h = resized_height/height
 87 | 	new_width = resized_width
 88 | 	new_height = resized_height
 89 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 90 | 	return img, ratio_w, ratio_h
 91 | 
 92 | def format_img(img, C):
 93 | 	""" formats an image for model prediction based on config """
 94 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 95 | 	img = format_img_channels(img, C)
 96 | 	return img, ratio_w, ratio_h
 97 | 
 98 | # Method to transform the coordinates of the bounding box to its original size
 99 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
100 | 
101 | 	real_x1 = int(round(x1 // ratio_w))
102 | 	real_y1 = int(round(y1 // ratio_h))
103 | 	real_x2 = int(round(x2 // ratio_w))
104 | 	real_y2 = int(round(y2 // ratio_h))
105 | 
106 | 	return (real_x1, real_y1, real_x2 ,real_y2)
107 | 
108 | class_mapping = C.class_mapping
109 | classes_count = class_mapping
110 | if 'bg' not in class_mapping:
111 | 	class_mapping['bg'] = len(class_mapping)
112 | 
113 | class_mapping = {v: k for k, v in class_mapping.items()}
114 | # print(class_mapping)
115 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
116 | C.num_rois = int(options.num_rois)
117 | # print "Num rois originally",C.num_rois
118 | if C.network == 'resnet50':
119 | 	num_features = 1024
120 | elif C.network == 'vgg':
121 | 	num_features = 512
122 | 
123 | 
124 | def extract_numpy_single_frame(img,C):
125 | 
126 | 	img = (img/255.)*2 - 1
127 | 	return img
128 | 
129 | def get_frame_idx(img_path):
130 | 	winSize = 64
131 | 	tags = img_path.split(os.path.sep)
132 | 	vid_folder = '/'+'/'.join(tags[1:-1])
133 | 	frames = os.listdir(vid_folder)
134 | 	if 'CAD' in img_path:
135 | 		frames = [f for f in frames if f.startswith('RGB')]
136 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
137 | 	else:
138 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
139 | 	frame_index = frames.index(tags[-1])
140 | 	fi = get_frames_index(frames,frame_index,winSize)
141 | 	seq =[frames[k] if k!=-1 else k for k in fi]
142 | 	# print(seq[0],seq[31])
143 | 	# print seq
144 | 	return seq
145 | optimizer_classifier = Adam(lr=1e-5)
146 | 
147 | if K.image_dim_ordering() == 'th':
148 | 	input_shape_img = (3, None, None)
149 | 	# input_shape_features = (num_features, None, None)
150 | else:
151 | 	input_shape_img = (None, None, 3)
152 | 
153 | # shared_layers_input= Input(shape=( None,None,832))
154 | roi_input = Input(shape=(None, 4))
155 | vid_input = Input(shape =(None, None, None, 3))
156 | vid_input_shape = (64, 400,320, 3)
157 | feature_map_input = Input(shape=(None, None, None,None,832))
158 | img_input = Input(shape=(None, None, 3))
159 | shared_layers_image = nn.nn_base(img_input, trainable=False)
160 | rgb_model = Inception_Inflated3d(
161 | 				include_top=False,
162 | 				weights='rgb_kinetics_only',
163 | 				input_shape=vid_input_shape,
164 | 				classes=classes_count)
165 | 
166 | 
167 | # classifier = nn.classifier_i3d_concat_new(shared_layers_input, 1, nb_classes=num_classes, trainable=True)
168 | # model_classifier = Model([vid_input], classifier)
169 | 
170 | 
171 | classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image,roi_input, 1, nb_classes=len(classes_count), trainable=True)
172 | model_classifier = Model([feature_map_input, roi_input], classifier)
173 | 
174 | model_name = os.path.join(options.model_name,'model.hdf5')
175 | print('Loading weights from {}'.format(model_name))
176 | model_classifier.load_weights(model_name, by_name=True)
177 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
178 | '''
179 | 
180 | all_imgs = []
181 | classes = {}
182 | bbox_threshold = 0.7
183 | visualise = True
184 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
185 | df = pd.read_csv(f_val)
186 | final_predictions = []
187 | 
188 | 
189 | indices = range(len(df))
190 | print len(df)
191 | for i in tqdm(range(len(df))):
192 | 	try:
193 | 		# ind = indices[i:i+bs]
194 | 		row = df.iloc[i,:]
195 | 		val_vid = row[0]
196 | 		vid_path = os.path.join(img_path,val_vid)
197 | 		img_name = str(int(row[1]))+'.jpg'
198 | 		filepath = os.path.join(vid_path,img_name)
199 | 		k = 0
200 | 		k+=1
201 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
202 | 			continue
203 | 		st = time.time()
204 | 		filepath = os.path.join(vid_path,img_name)
205 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
206 | 		img = cv2.imread(filepath)
207 | 		tags = filepath.split(os.path.sep)
208 | 		img_folder = '/'+'/'.join(tags[1:-1])
209 | 		seq = get_frame_idx(filepath)
210 | 		s1 = time.time()
211 | 		vid_numpy = []
212 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
213 | 		for frame in seq:
214 | 			if frame!=-1:
215 | 				fr_name = os.path.join(img_folder, frame)
216 | 				np_name = fr_name.replace('.jpg','.npy')
217 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
218 | 				fr_npy = np.load(np_name)
219 | 				vid_numpy.append(fr_npy)
220 | 			else:
221 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
222 | 		e1 = time.time()
223 | 		vid_numpy = np.array(vid_numpy)
224 | 		vid_numpy = np.expand_dims(vid_numpy,axis=0)
225 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
226 | 		w = x2-x1
227 | 		h = y2-y1
228 | 		roi = np.array([x1,y1,w,h])
229 | 		rois = np.expand_dims(roi,axis=0)
230 | 		rois = np.expand_dims(rois,axis=0)
231 | 		s2 = time.time()
232 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
233 | 		print shared_layers_orig.shape
234 | 		s3 = time.time()
235 | 		[P_cls]= model_classifier.predict([shared_layers_orig, rois])
236 | 		seq_name = filepath.split(os.path.sep)[-2]
237 | 		f_predicted = open(output_csv_file,'a+')
238 | 		[f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(10) if class_mapping[cn]!='bg']
239 | 		f_predicted.close()
240 | 
241 | 	except Exception as e:
242 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
243 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
244 | 		print(exc_type, fname, exc_tb.tb_lineno)
245 | 		print('Exception: {}'.format(e))
246 | 		print(filepath)
247 | 		continue
248 | '''
249 | 


--------------------------------------------------------------------------------
/test_final_noI3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | # from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | parser.add_option("-m", "--model_name", dest="model_name",
 47 | 				help="Path to model.")
 48 | parser.add_option("-t", "--type", dest="type",
 49 | 				help="type.", default = 'v1')
 50 | parser.add_option("-o", "--output", dest="output",
 51 | 				help="csv to save predictions.")
 52 | parser.add_option("--config_filename", dest="config_filename", help=
 53 | 				"Location to read the metadata related to the training (generated when training).",
 54 | 				default="config_subset_AVA.pickle")
 55 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 56 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat')
 57 | 
 58 | (options, args) = parser.parse_args()
 59 | 
 60 | if not options.test_path:   # if filename is not given
 61 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 62 | 
 63 | 
 64 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 65 | config_output_filename = options.config_filename
 66 | with open(config_output_filename, 'rb') as f_in:
 67 | 	C = pickle.load(f_in)
 68 | 
 69 | if C.network == 'resnet50':
 70 | 	import keras_frcnn.resnet as nn
 71 | elif C.network == 'vgg':
 72 | 	import keras_frcnn.vgg as nn
 73 | 
 74 | # turn off any data augmentation at test time
 75 | C.use_horizontal_flips = False
 76 | C.use_vertical_flips = False
 77 | C.rot_90 = False
 78 | resized_width = 320
 79 | resized_height = 400
 80 | version = options.version
 81 | img_path = options.test_path
 82 | output_csv_file = os.path.join('evaluation','8_actions',options.output)
 83 | fc = open(output_csv_file,'w+')
 84 | fc.close()
 85 | def format_img_size(img, C):
 86 | 	""" formats the image size based on config """
 87 | 	img_min_side = float(C.im_size)
 88 | 	(height,width,_) = img.shape
 89 | 	ratio_w = resized_width/width
 90 | 	ratio_h = resized_height/height
 91 | 	new_width = resized_width
 92 | 	new_height = resized_height
 93 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 94 | 	return img, ratio_w, ratio_h
 95 | 
 96 | def format_img(img, C):
 97 | 	""" formats an image for model prediction based on config """
 98 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 99 | 	img = format_img_channels(img, C)
100 | 	return img, ratio_w, ratio_h
101 | 
102 | # Method to transform the coordinates of the bounding box to its original size
103 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
104 | 
105 | 	real_x1 = int(round(x1 // ratio_w))
106 | 	real_y1 = int(round(y1 // ratio_h))
107 | 	real_x2 = int(round(x2 // ratio_w))
108 | 	real_y2 = int(round(y2 // ratio_h))
109 | 
110 | 	return (real_x1, real_y1, real_x2 ,real_y2)
111 | 
112 | class_mapping = C.class_mapping
113 | classes_count = class_mapping
114 | print len(class_mapping)
115 | type = options.type
116 | # bp()
117 | # if 'bg' not in class_mapping:
118 | # 	class_mapping['bg'] = len(class_mapping)
119 | 
120 | class_mapping = {v: k for k, v in class_mapping.items()}
121 | # print(class_mapping)
122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
123 | C.num_rois = int(options.num_rois)
124 | # print "Num rois originally",C.num_rois
125 | if C.network == 'resnet50':
126 | 	num_features = 1024
127 | elif C.network == 'vgg':
128 | 	num_features = 512
129 | 
130 | if K.image_dim_ordering() == 'th':
131 | 	input_shape_img = (3, None, None)
132 | else:
133 | 	input_shape_img = (None, None, 3)
134 | 
135 | # shared_layers_input= Input(shape=( None,None,None,832))
136 | roi_input = Input(shape=(None, 4))
137 | # vid_input = Input(shape =(None, None, None, 3))
138 | # vid_input_shape = (64, 400,320, 3)
139 | img_input = Input(shape=(None, None, 3))
140 | 
141 | feature_map_input = Input(shape=( None,None,None,832))
142 | shared_layers_image = nn.nn_base(img_input, trainable=False)
143 | 
144 | 
145 | # rgb_model = Inception_Inflated3d(
146 | # 				include_top=False,
147 | # 				weights='rgb_kinetics_only',
148 | # 				input_shape=vid_input_shape,
149 | # 				classes=classes_count)
150 | 
151 | def extract_numpy_single_frame(img,C):
152 | 
153 | 	img = (img/255.)*2 - 1
154 | 	return img
155 | 
156 | def get_frame_idx(img_path):
157 | 	winSize = 64
158 | 	tags = img_path.split(os.path.sep)
159 | 	vid_folder = '/'+'/'.join(tags[1:-1])
160 | 	frames = os.listdir(vid_folder)
161 | 	if 'CAD' in img_path:
162 | 		frames = [f for f in frames if f.startswith('RGB')]
163 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
164 | 	else:
165 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
166 | 	frame_index = frames.index(tags[-1])
167 | 	fi = get_frames_index(frames,frame_index,winSize)
168 | 	seq =[frames[k] if k!=-1 else k for k in fi]
169 | 	# print(seq[0],seq[31])
170 | 	# print seq
171 | 	return seq
172 | optimizer_classifier = Adam(lr=1e-5)
173 | print len(classes_count)
174 | 
175 | if version == 'concat' and type == 'v1':
176 | 	classifier = 	classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=8, trainable=True)
177 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
178 | elif version == 'whole' :
179 | 	classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version)
180 | 	model_classifier = Model([feature_map_input], classifier)
181 | elif version == 'concat' and type == 'v2':
182 | 	classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True)
183 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
184 | 
185 | elif version == 'roi':
186 | 	classifier = nn.classifier_i3d(feature_map_input,  roi_input, 1, nb_classes=len(classes_count))
187 | 	model_classifier = Model([roi_input, feature_map_input], classifier)
188 | 
189 | 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
190 | 	# model_all = Model([i3d_features, roi_input], classifier)
191 | # elif version == 'whole' and type == 'v2':
192 | # 	classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version)
193 | # 	model_classifier = Model([feature_map_input], classifier)
194 | 
195 | 
196 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
197 | model_name = os.path.join(options.model_name,'model.hdf5')
198 | print('Loading weights from {}'.format(model_name))
199 | model_classifier.load_weights(model_name, by_name=True)
200 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
201 | 
202 | all_imgs = []
203 | classes = {}
204 | bbox_threshold = 0.7
205 | visualise = True
206 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
207 | df = pd.read_csv(f_val)
208 | final_predictions = []
209 | 
210 | indices = range(len(df))
211 | print len(df)
212 | # bp()
213 | bs = 8
214 | for i in tqdm(range(len(df))):
215 | 
216 | 	try:
217 | 		# ind = indices[i:i+bs]
218 | 		row = df.iloc[i,:]
219 | 		val_vid = row[0]
220 | 		vid_path = os.path.join(img_path,val_vid)
221 | 		img_name = str(int(row[1]))+'.jpg'
222 | 
223 | 		filepath = os.path.join(vid_path,img_name)
224 | 		fp = filepath.replace('keyframes','numpy_8_actions')
225 | 		fp = fp.replace('.jpg','.npy')
226 | 		val_vid = row[0]
227 | 		vid_path = os.path.join(img_path,val_vid)
228 | 		img_name = str(int(row[1]))+'.jpg'
229 | 		k = 0
230 | 		k+=1
231 | 
232 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
233 | 			continue
234 | 		st = time.time()
235 | 		filepath = os.path.join(vid_path,img_name)
236 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
237 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
238 | 		img = cv2.imread(filepath)
239 | 		# x_img =
240 | 		tags = filepath.split(os.path.sep)
241 | 		img_folder = '/'+'/'.join(tags[1:-1])
242 | 		shared_layers_orig = np.load(fp)
243 | 		# seq = get_frame_idx(filepath)
244 | 		#
245 | 		# # print filepath, seq
246 | 		# vid_numpy = []
247 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
248 | 		x_img = np.expand_dims(x_img, axis = 0)
249 | 		# # print filepath, seq
250 | 		# # bp()
251 | 		# for frame in seq:
252 | 		# 	if frame!=-1:
253 | 		# 		fr_name = os.path.join(img_folder, frame)
254 | 		# 		np_name = fr_name.replace('.jpg','.npy')
255 | 		# 		np_name = np_name.replace('train/keyframes','numpy_arrays_val')
256 | 		# 		# print np_name
257 | 		# 		fr_npy = np.load(np_name)
258 | 		# 		vid_numpy.append(fr_npy)
259 | 		# 	else:
260 | 		# 		vid_numpy.append(np.zeros((resized_height,resized_width,3)))
261 | 		#
262 | 		# vid_numpy = np.array(vid_numpy)
263 | 		# vid_numpy = np.expand_dims(vid_numpy,axis=0)
264 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
265 | 		# shared_layers_orig = rgb_model.predict(vid_numpy)
266 | 
267 | 		# print vid_numpy.shape
268 | 		if version == 'concat':
269 | 			# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
270 | 			w = x2-x1
271 | 			h = y2-y1
272 | 			roi = np.array([x1,y1,w,h])
273 | 			rois = np.expand_dims(roi,axis=0)
274 | 			rois = np.expand_dims(rois,axis=0)
275 | 			# try:
276 | 			# rois, vid_numpy = get_batch(df, ind)
277 | 			# print rois, shared_layers_orig.shape
278 | 			[P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois])
279 | 		elif version == 'whole':
280 | 			[P_cls]= model_classifier.predict([shared_layers_orig])
281 | 
282 | 		elif version == 'roi':
283 | 			w = x2-x1
284 | 			h = y2-y1
285 | 			roi = np.array([x1,y1,w,h])
286 | 			rois = np.expand_dims(roi,axis=0)
287 | 			rois = np.expand_dims(rois,axis=0)
288 | 			[P_cls]= model_classifier.predict([rois, shared_layers_orig])
289 | 
290 | 		seq_name = filepath.split(os.path.sep)[-2]
291 | 		# print(P_cls)
292 | 		# bp()
293 | 		# print output_csv_file
294 | 		# print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n')
295 | 		et = time.time()-st
296 | 		# print "The per frame time is {}".format(et)
297 | 		with open(output_csv_file,'a+') as f_predicted:
298 | 				  [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg']
299 | 
300 | 		# if k==1:
301 | 		# 	break
302 | 	except Exception as e:
303 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
304 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
305 | 		print(exc_type, fname, exc_tb.tb_lineno)
306 | 		print('Exception: {}'.format(e))
307 | 		print(filepath)
308 | 		continue
309 | 


--------------------------------------------------------------------------------
/test_final_noI3d_vis.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/subhashree-r/Action_detection_AVA/d4da80106b61c9577b76d2486930ea69350393fe/test_final_noI3d_vis.py


--------------------------------------------------------------------------------
/test_frcnn_AVA.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | parser.add_option("-m", "--model_name", dest="model_name",
 47 | 				help="Path to model.")
 48 | parser.add_option("-o", "--output", dest="output",
 49 | 				help="csv to save predictions.")
 50 | parser.add_option("--config_filename", dest="config_filename", help=
 51 | 				"Location to read the metadata related to the training (generated when training).",
 52 | 				default="config_subset_AVA.pickle")
 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 54 | 
 55 | (options, args) = parser.parse_args()
 56 | 
 57 | if not options.test_path:   # if filename is not given
 58 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 59 | 
 60 | 
 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 62 | config_output_filename = options.config_filename
 63 | with open(config_output_filename, 'rb') as f_in:
 64 | 	C = pickle.load(f_in)
 65 | 
 66 | if C.network == 'resnet50':
 67 | 	import keras_frcnn.resnet as nn
 68 | elif C.network == 'vgg':
 69 | 	import keras_frcnn.vgg as nn
 70 | 
 71 | # turn off any data augmentation at test time
 72 | C.use_horizontal_flips = False
 73 | C.use_vertical_flips = False
 74 | C.rot_90 = False
 75 | resized_width = 320
 76 | resized_height = 400
 77 | img_path = options.test_path
 78 | output_csv_file = os.path.join('evaluation',options.output)
 79 | fc = open(output_csv_file,'w+')
 80 | def format_img_size(img, C):
 81 | 	""" formats the image size based on config """
 82 | 	img_min_side = float(C.im_size)
 83 | 	(height,width,_) = img.shape
 84 | 	ratio_w = resized_width/width
 85 | 	ratio_h = resized_height/height
 86 | 	# if width <= height:
 87 | 	# 	ratio = img_min_side/width
 88 | 	# 	new_height = int(ratio * height)
 89 | 	# 	new_width = int(img_min_side)
 90 | 	# else:
 91 | 	# 	ratio = img_min_side/height
 92 | 	# 	new_width = int(ratio * width)
 93 | 	# 	new_height = int(img_min_side)
 94 | 	new_width = resized_width
 95 | 	new_height = resized_height
 96 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 97 | 	return img, ratio_w, ratio_h
 98 | 
 99 | def format_img(img, C):
100 | 	""" formats an image for model prediction based on config """
101 | 	img, ratio_w, ratio_h= format_img_size(img, C)
102 | 	img = format_img_channels(img, C)
103 | 	return img, ratio_w, ratio_h
104 | 
105 | # Method to transform the coordinates of the bounding box to its original size
106 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
107 | 
108 | 	real_x1 = int(round(x1 // ratio_w))
109 | 	real_y1 = int(round(y1 // ratio_h))
110 | 	real_x2 = int(round(x2 // ratio_w))
111 | 	real_y2 = int(round(y2 // ratio_h))
112 | 
113 | 	return (real_x1, real_y1, real_x2 ,real_y2)
114 | 
115 | class_mapping = C.class_mapping
116 | classes_count = class_mapping
117 | if 'bg' not in class_mapping:
118 | 	class_mapping['bg'] = len(class_mapping)
119 | 
120 | class_mapping = {v: k for k, v in class_mapping.items()}
121 | # print(class_mapping)
122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
123 | C.num_rois = int(options.num_rois)
124 | # print "Num rois originally",C.num_rois
125 | if C.network == 'resnet50':
126 | 	num_features = 1024
127 | elif C.network == 'vgg':
128 | 	num_features = 512
129 | 
130 | if K.image_dim_ordering() == 'th':
131 | 	input_shape_img = (3, None, None)
132 | 	# input_shape_features = (num_features, None, None)
133 | else:
134 | 	input_shape_img = (None, None, 3)
135 | 
136 | shared_layers_input= Input(shape=( None,None,832))
137 | roi_input = Input(shape=(None, 4))
138 | vid_input = Input(shape =(None, None, None, 3))
139 | vid_input_shape = (64, 400,320, 3)
140 | feature_map_input = Input(shape=(None, None,None,832))
141 | 
142 | rgb_model = Inception_Inflated3d(
143 | 				include_top=False,
144 | 				weights='rgb_kinetics_only',
145 | 				input_shape=vid_input_shape,
146 | 				classes=classes_count)
147 | def get_new_img_size(width, height, img_min_side, C):
148 | 	img_min_side =448
149 | 	if width <= height:
150 | 		f = float(img_min_side) / width
151 | 		resized_height = int(f * height)
152 | 		resized_width = img_min_side
153 | 	else:
154 | 		f = float(img_min_side) / height
155 | 		resized_width = int(f * width)
156 | 		resized_height = img_min_side
157 | 
158 | 	if C.dataset == 'AVA':
159 | 		return resized_width, resized_height
160 | 	else:
161 | 		return 640, 480
162 | 
163 | def extract_numpy_single_frame(img,C):
164 | 
165 | 	img = (img/255.)*2 - 1
166 | 	return img
167 | 
168 | def get_frame_idx(img_path):
169 | 	winSize = 64
170 | 	tags = img_path.split(os.path.sep)
171 | 	vid_folder = '/'+'/'.join(tags[1:-1])
172 | 	frames = os.listdir(vid_folder)
173 | 	if 'CAD' in img_path:
174 | 		frames = [f for f in frames if f.startswith('RGB')]
175 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
176 | 	else:
177 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
178 | 	frame_index = frames.index(tags[-1])
179 | 	fi = get_frames_index(frames,frame_index,winSize)
180 | 	seq =[frames[k] if k!=-1 else k for k in fi]
181 | 	# print(seq[0],seq[31])
182 | 	# print seq
183 | 	return seq
184 | optimizer_classifier = Adam(lr=1e-5)
185 | classifier = nn.classifier_i3d_batch(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True)
186 | # model_classifier_only = Model([feature_map_input, roi_input], classifier)
187 | model_classifier = Model([feature_map_input, roi_input], classifier)
188 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
189 | 
190 | 
191 | 
192 | model_name = os.path.join(options.model_name,'model.hdf5')
193 | print('Loading weights from {}'.format(model_name))
194 | model_classifier.load_weights(model_name, by_name=True)
195 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
196 | 
197 | 
198 | all_imgs = []
199 | classes = {}
200 | bbox_threshold = 0.7
201 | visualise = True
202 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
203 | df = pd.read_csv(f_val)
204 | # ac_id = get_action_dic()
205 | 
206 | # for val_vid in val_vids:
207 | final_predictions = []
208 | 
209 | def get_batch(df, ind):
210 | 	rows = df.iloc[ind]
211 | 	# print rows
212 | 	roi_batch = []
213 | 	vid_numpy_batch = []
214 | 	for r in range(len(rows)):
215 | 		row = rows.iloc[r,:]
216 | 		# print row
217 | 		# print row[1]
218 | 		# bp()
219 | 		val_vid = row[0]
220 | 		vid_path = os.path.join(img_path,val_vid)
221 | 		img_name = str(int(row[1]))+'.jpg'
222 | 		k = 0
223 | 		k+=1
224 | 
225 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
226 | 			continue
227 | 		st = time.time()
228 | 		filepath = os.path.join(vid_path,img_name)
229 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
230 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
231 | 		img = cv2.imread(filepath)
232 | 		# x_img =
233 | 		tags = filepath.split(os.path.sep)
234 | 		img_folder = '/'+'/'.join(tags[1:-1])
235 | 		seq = get_frame_idx(filepath)
236 | 
237 | 		# print filepath, seq
238 | 		vid_numpy = []
239 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
240 | 		for frame in seq:
241 | 			if frame!=-1:
242 | 				fr_name = os.path.join(img_folder, frame)
243 | 				np_name = fr_name.replace('.jpg','.npy')
244 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
245 | 				# print np_name
246 | 				fr_npy = np.load(np_name)
247 | 				# fr_img = cv2.imread(fr_name)
248 | 				# fr_img =  cv2.resize(fr_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
249 | 				# fr_npy = extract_numpy_single_frame(fr_img,C)
250 | 				vid_numpy.append(fr_npy)
251 | 			else:
252 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
253 | 
254 | 		vid_numpy = np.array(vid_numpy)
255 | 		# vid_numpy = np.expand_dims(vid_numpy,axis=0)
256 | 		# print vid_numpy.shape
257 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
258 | 		# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
259 | 		w = x2-x1
260 | 		h = y2-y1
261 | 		roi = np.array([x1,y1,w,h])
262 | 		rois = np.expand_dims(roi,axis=0)
263 | 		# rois = np.expand_dims(rois,axis=0)
264 | 		roi_batch.append(rois)
265 | 		vid_numpy_batch.append(vid_numpy)
266 | 		# rois = np.expand_dims(rois,axis=0)
267 | 	return np.array(roi_batch), np.array(vid_numpy_batch)
268 | 
269 | indices = range(len(df))
270 | print len(df)
271 | # bp()
272 | bs = 4
273 | for i in tqdm(range(0,len(df),bs)):
274 | 
275 | 	try:
276 | 		ind = indices[i:i+bs]
277 | 		# row = df.iloc[i,:]
278 | 		# val_vid = row[0]
279 | 		# vid_path = os.path.join(img_path,val_vid)
280 | 		# img_name = str(int(row[1]))+'.jpg'
281 | 		# filepath = os.path.join(vid_path,img_name)
282 | 		# try:
283 | 		rois, vid_numpy = get_batch(df, ind)
284 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
285 | 
286 | 		# print rois, shared_layers_orig.shape
287 | 		y= model_classifier.predict([shared_layers_orig, rois])
288 | 		# print y
289 | 		# print y.shape
290 | 
291 | 		# except:
292 | 		# 	pass
293 | 
294 | 		# seq_name = filepath.split(os.path.sep)[-2]
295 | 		# # bp()
296 | 		# # line = [seq_name,str(fr_num).zfill(4),str(float(row[2])),str(float(row[3])),str(float(row[4])),str(float(row[5])),P_cls]
297 | 		# # final_predictions.append(line)
298 | 		# # for cn in range(P_cls.shape[1]):
299 | 		# # 	class_num = cn
300 | 		# # 	prob = P_cls[0,cn]
301 | 		# # 	class_name = class_mapping[cn]
302 | 		# # 	if class_name!='bg':
303 | 		# # 		line = seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(ac_id[class_name])+','+str(prob)
304 | 		# # 		# print line
305 | 		# # 		f_predicted = open('evaluation/ava_predicted_cheating_subset_latest.csv','a+')
306 | 		# # 		f_predicted.write(line+'\n')
307 | 		# # 		f_predicted.close()
308 | 		# f_predicted = open(output_csv_file,'a+')
309 | 		# [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(81) if class_mapping[cn]!='bg']
310 | 		# f_predicted.close()
311 | 
312 | 		# if k==1:
313 | 		# 	break
314 | 	except Exception as e:
315 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
316 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
317 | 		print(exc_type, fname, exc_tb.tb_lineno)
318 | 		print('Exception: {}'.format(e))
319 | 		# print(filepath)
320 | 		continue
321 | 


--------------------------------------------------------------------------------
/test_frcnn_ava_bk.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | # from keras.utils.training_utils import multi_gpu_model
 30 | import time
 31 | 
 32 | 
 33 | '''
 34 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 35 | 
 36 | '''
 37 | sys.setrecursionlimit(40000)
 38 | 
 39 | parser = OptionParser()
 40 | 
 41 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 42 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 43 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 44 | 
 45 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 46 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 47 | parser.add_option("-m", "--model_name", dest="model_name",
 48 | 				help="Path to model.")
 49 | parser.add_option("-o", "--output", dest="output",
 50 | 				help="csv to save predictions.")
 51 | parser.add_option("--config_filename", dest="config_filename", help=
 52 | 				"Location to read the metadata related to the training (generated when training).",
 53 | 				default="config_subset_AVA.pickle")
 54 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 55 | 
 56 | (options, args) = parser.parse_args()
 57 | 
 58 | if not options.test_path:   # if filename is not given
 59 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 60 | 
 61 | 
 62 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 63 | config_output_filename = options.config_filename
 64 | with open(config_output_filename, 'rb') as f_in:
 65 | 	C = pickle.load(f_in)
 66 | 
 67 | if C.network == 'resnet50':
 68 | 	import keras_frcnn.resnet as nn
 69 | elif C.network == 'vgg':
 70 | 	import keras_frcnn.vgg as nn
 71 | 
 72 | # turn off any data augmentation at test time
 73 | C.use_horizontal_flips = False
 74 | C.use_vertical_flips = False
 75 | C.rot_90 = False
 76 | resized_width = 320
 77 | resized_height = 400
 78 | img_path = options.test_path
 79 | output_csv_file = os.path.join('evaluation','phase2',options.output)
 80 | fc = open(output_csv_file,'w+')
 81 | def format_img_size(img, C):
 82 | 	""" formats the image size based on config """
 83 | 	img_min_side = float(C.im_size)
 84 | 	(height,width,_) = img.shape
 85 | 	ratio_w = resized_width/width
 86 | 	ratio_h = resized_height/height
 87 | 	new_width = resized_width
 88 | 	new_height = resized_height
 89 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 90 | 	return img, ratio_w, ratio_h
 91 | 
 92 | def format_img(img, C):
 93 | 	""" formats an image for model prediction based on config """
 94 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 95 | 	img = format_img_channels(img, C)
 96 | 	return img, ratio_w, ratio_h
 97 | 
 98 | # Method to transform the coordinates of the bounding box to its original size
 99 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
100 | 
101 | 	real_x1 = int(round(x1 // ratio_w))
102 | 	real_y1 = int(round(y1 // ratio_h))
103 | 	real_x2 = int(round(x2 // ratio_w))
104 | 	real_y2 = int(round(y2 // ratio_h))
105 | 
106 | 	return (real_x1, real_y1, real_x2 ,real_y2)
107 | 
108 | class_mapping = C.class_mapping
109 | classes_count = class_mapping
110 | if 'bg' not in class_mapping:
111 | 	class_mapping['bg'] = len(class_mapping)
112 | 
113 | class_mapping = {v: k for k, v in class_mapping.items()}
114 | # print(class_mapping)
115 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
116 | C.num_rois = int(options.num_rois)
117 | # print "Num rois originally",C.num_rois
118 | if C.network == 'resnet50':
119 | 	num_features = 1024
120 | elif C.network == 'vgg':
121 | 	num_features = 512
122 | 
123 | if K.image_dim_ordering() == 'th':
124 | 	input_shape_img = (3, None, None)
125 | 	# input_shape_features = (num_features, None, None)
126 | else:
127 | 	input_shape_img = (None, None, 3)
128 | 
129 | shared_layers_input= Input(shape=( None,None,832))
130 | roi_input = Input(shape=(None, 4))
131 | vid_input = Input(shape =(None, None, None, 3))
132 | vid_input_shape = (64, 400,320, 3)
133 | feature_map_input = Input(shape=(None, None,None,832))
134 | 
135 | rgb_model = Inception_Inflated3d(
136 | 				include_top=False,
137 | 				weights='rgb_kinetics_only',
138 | 				input_shape=vid_input_shape,
139 | 				classes=classes_count)
140 | def get_new_img_size(width, height, img_min_side, C):
141 | 	img_min_side =448
142 | 	if width <= height:
143 | 		f = float(img_min_side) / width
144 | 		resized_height = int(f * height)
145 | 		resized_width = img_min_side
146 | 	else:
147 | 		f = float(img_min_side) / height
148 | 		resized_width = int(f * width)
149 | 		resized_height = img_min_side
150 | 
151 | 	if C.dataset == 'AVA':
152 | 		return resized_width, resized_height
153 | 	else:
154 | 		return 640, 480
155 | 
156 | def extract_numpy_single_frame(img,C):
157 | 
158 | 	img = (img/255.)*2 - 1
159 | 	return img
160 | 
161 | def get_frame_idx(img_path):
162 | 	winSize = 64
163 | 	tags = img_path.split(os.path.sep)
164 | 	vid_folder = '/'+'/'.join(tags[1:-1])
165 | 	frames = os.listdir(vid_folder)
166 | 	if 'CAD' in img_path:
167 | 		frames = [f for f in frames if f.startswith('RGB')]
168 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
169 | 	else:
170 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
171 | 	frame_index = frames.index(tags[-1])
172 | 	fi = get_frames_index(frames,frame_index,winSize)
173 | 	seq =[frames[k] if k!=-1 else k for k in fi]
174 | 	# print(seq[0],seq[31])
175 | 	# print seq
176 | 	return seq
177 | optimizer_classifier = Adam(lr=1e-5)
178 | classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True)
179 | model_classifier = Model([feature_map_input, roi_input], classifier)
180 | 
181 | model_name = os.path.join(options.model_name,'model.hdf5')
182 | print('Loading weights from {}'.format(model_name))
183 | model_classifier.load_weights(model_name, by_name=True)
184 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
185 | 
186 | 
187 | all_imgs = []
188 | classes = {}
189 | bbox_threshold = 0.7
190 | visualise = True
191 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
192 | df = pd.read_csv(f_val)
193 | final_predictions = []
194 | 
195 | def get_batch(df, ind):
196 | 	rows = df.iloc[ind]
197 | 	# print rows
198 | 	roi_batch = []
199 | 	vid_numpy_batch = []
200 | 	for r in range(len(rows)):
201 | 		row = rows.iloc[r,:]
202 | 		# print row
203 | 		# print row[1]
204 | 		# bp()
205 | 		val_vid = row[0]
206 | 		vid_path = os.path.join(img_path,val_vid)
207 | 		img_name = str(int(row[1]))+'.jpg'
208 | 		k = 0
209 | 		k+=1
210 | 
211 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
212 | 			continue
213 | 		st = time.time()
214 | 		filepath = os.path.join(vid_path,img_name)
215 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
216 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
217 | 		img = cv2.imread(filepath)
218 | 		# x_img =
219 | 		tags = filepath.split(os.path.sep)
220 | 		img_folder = '/'+'/'.join(tags[1:-1])
221 | 		seq = get_frame_idx(filepath)
222 | 
223 | 		# print filepath, seq
224 | 		vid_numpy = []
225 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
226 | 		for frame in seq:
227 | 			if frame!=-1:
228 | 				fr_name = os.path.join(img_folder, frame)
229 | 				np_name = fr_name.replace('.jpg','.npy')
230 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
231 | 				fr_npy = np.load(np_name)
232 | 				vid_numpy.append(fr_npy)
233 | 			else:
234 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
235 | 
236 | 		vid_numpy = np.array(vid_numpy)
237 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
238 | 		w = x2-x1
239 | 		h = y2-y1
240 | 		roi = np.array([x1,y1,w,h])
241 | 		rois = np.expand_dims(roi,axis=0)
242 | 		rois = np.expand_dims(rois,axis=0)
243 | 		roi_batch.append(rois)
244 | 		vid_numpy_batch.append(vid_numpy)
245 | 	return np.array(roi_batch), np.array(vid_numpy_batch)
246 | 
247 | indices = range(len(df))
248 | print len(df)
249 | for i in tqdm(range(len(df))):
250 | 	try:
251 | 		# ind = indices[i:i+bs]
252 | 		row = df.iloc[i,:]
253 | 		val_vid = row[0]
254 | 		vid_path = os.path.join(img_path,val_vid)
255 | 		img_name = str(int(row[1]))+'.jpg'
256 | 		filepath = os.path.join(vid_path,img_name)
257 | 		k = 0
258 | 		k+=1
259 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
260 | 			continue
261 | 		st = time.time()
262 | 		filepath = os.path.join(vid_path,img_name)
263 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
264 | 		img = cv2.imread(filepath)
265 | 		tags = filepath.split(os.path.sep)
266 | 		img_folder = '/'+'/'.join(tags[1:-1])
267 | 		seq = get_frame_idx(filepath)
268 | 		s1 = time.time()
269 | 		vid_numpy = []
270 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
271 | 		for frame in seq:
272 | 			if frame!=-1:
273 | 				fr_name = os.path.join(img_folder, frame)
274 | 				np_name = fr_name.replace('.jpg','.npy')
275 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
276 | 				fr_npy = np.load(np_name)
277 | 				vid_numpy.append(fr_npy)
278 | 			else:
279 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
280 | 		e1 = time.time()
281 | 		vid_numpy = np.array(vid_numpy)
282 | 		vid_numpy = np.expand_dims(vid_numpy,axis=0)
283 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
284 | 		w = x2-x1
285 | 		h = y2-y1
286 | 		roi = np.array([x1,y1,w,h])
287 | 		rois = np.expand_dims(roi,axis=0)
288 | 		rois = np.expand_dims(rois,axis=0)
289 | 		s2 = time.time()
290 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
291 | 		# print rois, shared_layers_orig.shape
292 | 		s3 = time.time()
293 | 		[P_cls]= model_classifier.predict([shared_layers_orig, rois])
294 | 		seq_name = filepath.split(os.path.sep)[-2]
295 | 		f_predicted = open(output_csv_file,'a+')
296 | 		[f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(10) if class_mapping[cn]!='bg']
297 | 		f_predicted.close()
298 | 
299 | 	except Exception as e:
300 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
301 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
302 | 		print(exc_type, fname, exc_tb.tb_lineno)
303 | 		print('Exception: {}'.format(e))
304 | 		print(filepath)
305 | 		continue
306 | 


--------------------------------------------------------------------------------
/test_i3d_whole.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | parser.add_option("-m", "--model_name", dest="model_name",
 47 | 				help="Path to model.")
 48 | parser.add_option("-o", "--output", dest="output",
 49 | 				help="csv to save predictions.")
 50 | parser.add_option("--config_filename", dest="config_filename", help=
 51 | 				"Location to read the metadata related to the training (generated when training).",
 52 | 				default="config_subset_AVA.pickle")
 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 54 | 
 55 | (options, args) = parser.parse_args()
 56 | 
 57 | if not options.test_path:   # if filename is not given
 58 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 59 | 
 60 | 
 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 62 | config_output_filename = options.config_filename
 63 | with open(config_output_filename, 'rb') as f_in:
 64 | 	C = pickle.load(f_in)
 65 | 
 66 | if C.network == 'resnet50':
 67 | 	import keras_frcnn.resnet as nn
 68 | elif C.network == 'vgg':
 69 | 	import keras_frcnn.vgg as nn
 70 | 
 71 | # turn off any data augmentation at test time
 72 | C.use_horizontal_flips = False
 73 | C.use_vertical_flips = False
 74 | C.rot_90 = False
 75 | resized_width = 320
 76 | resized_height = 400
 77 | img_path = options.test_path
 78 | output_csv_file = os.path.join('evaluation','phase2',options.output)
 79 | fc = open(output_csv_file,'w+')
 80 | def format_img_size(img, C):
 81 | 	""" formats the image size based on config """
 82 | 	img_min_side = float(C.im_size)
 83 | 	(height,width,_) = img.shape
 84 | 	ratio_w = resized_width/width
 85 | 	ratio_h = resized_height/height
 86 | 	new_width = resized_width
 87 | 	new_height = resized_height
 88 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 89 | 	return img, ratio_w, ratio_h
 90 | 
 91 | def format_img(img, C):
 92 | 	""" formats an image for model prediction based on config """
 93 | 	img, ratio_w, ratio_h= format_img_size(img, C)
 94 | 	img = format_img_channels(img, C)
 95 | 	return img, ratio_w, ratio_h
 96 | 
 97 | # Method to transform the coordinates of the bounding box to its original size
 98 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
 99 | 
100 | 	real_x1 = int(round(x1 // ratio_w))
101 | 	real_y1 = int(round(y1 // ratio_h))
102 | 	real_x2 = int(round(x2 // ratio_w))
103 | 	real_y2 = int(round(y2 // ratio_h))
104 | 
105 | 	return (real_x1, real_y1, real_x2 ,real_y2)
106 | 
107 | class_mapping = C.class_mapping
108 | classes_count = class_mapping
109 | print len(class_mapping)
110 | # bp()
111 | # if 'bg' not in class_mapping:
112 | # 	class_mapping['bg'] = len(class_mapping)
113 | 
114 | class_mapping = {v: k for k, v in class_mapping.items()}
115 | # print(class_mapping)
116 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
117 | C.num_rois = int(options.num_rois)
118 | # print "Num rois originally",C.num_rois
119 | if C.network == 'resnet50':
120 | 	num_features = 1024
121 | elif C.network == 'vgg':
122 | 	num_features = 512
123 | 
124 | if K.image_dim_ordering() == 'th':
125 | 	input_shape_img = (3, None, None)
126 | 	# input_shape_features = (num_features, None, None)
127 | else:
128 | 	input_shape_img = (None, None, 3)
129 | 
130 | shared_layers_input= Input(shape=( None,None,None,832))
131 | roi_input = Input(shape=(None, 4))
132 | vid_input = Input(shape =(None, None, None, 3))
133 | vid_input_shape = (64, 400,320, 3)
134 | img_input = Input(shape=(None, None, 3))
135 | 
136 | feature_map_input = Input(shape=(None, None,None,832))
137 | shared_layers_image = nn.nn_base(img_input, trainable=False)
138 | 
139 | 
140 | rgb_model = Inception_Inflated3d(
141 | 				include_top=False,
142 | 				weights='rgb_kinetics_only',
143 | 				input_shape=vid_input_shape,
144 | 				classes=classes_count)
145 | def get_new_img_size(width, height, img_min_side, C):
146 | 	img_min_side =448
147 | 	if width <= height:
148 | 		f = float(img_min_side) / width
149 | 		resized_height = int(f * height)
150 | 		resized_width = img_min_side
151 | 	else:
152 | 		f = float(img_min_side) / height
153 | 		resized_width = int(f * width)
154 | 		resized_height = img_min_side
155 | 
156 | 	if C.dataset == 'AVA':
157 | 		return resized_width, resized_height
158 | 	else:
159 | 		return 640, 480
160 | 
161 | def extract_numpy_single_frame(img,C):
162 | 
163 | 	img = (img/255.)*2 - 1
164 | 	return img
165 | 
166 | def get_frame_idx(img_path):
167 | 	winSize = 64
168 | 	tags = img_path.split(os.path.sep)
169 | 	vid_folder = '/'+'/'.join(tags[1:-1])
170 | 	frames = os.listdir(vid_folder)
171 | 	if 'CAD' in img_path:
172 | 		frames = [f for f in frames if f.startswith('RGB')]
173 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
174 | 	else:
175 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
176 | 	frame_index = frames.index(tags[-1])
177 | 	fi = get_frames_index(frames,frame_index,winSize)
178 | 	seq =[frames[k] if k!=-1 else k for k in fi]
179 | 	# print(seq[0],seq[31])
180 | 	# print seq
181 | 	return seq
182 | optimizer_classifier = Adam(lr=1e-5)
183 | # classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True)
184 | print len(classes_count)
185 | # bp()
186 | classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=num_classes)
187 | 
188 | # model_classifier_only = Model([feature_map_input, roi_input], classifier)
189 | model_classifier = Model([feature_map_input], classifier)
190 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
191 | model_name = os.path.join(options.model_name,'model.hdf5')
192 | print('Loading weights from {}'.format(model_name))
193 | model_classifier.load_weights(model_name, by_name=True)
194 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
195 | 
196 | 
197 | all_imgs = []
198 | classes = {}
199 | bbox_threshold = 0.7
200 | visualise = True
201 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
202 | df = pd.read_csv(f_val)
203 | # ac_id = get_action_dic()
204 | 
205 | # for val_vid in val_vids:
206 | final_predictions = []
207 | 
208 | 
209 | indices = range(len(df))
210 | print len(df)
211 | # bp()
212 | bs = 8
213 | for i in tqdm(range(len(df))):
214 | 
215 | 	try:
216 | 		# ind = indices[i:i+bs]
217 | 		row = df.iloc[i,:]
218 | 		val_vid = row[0]
219 | 		vid_path = os.path.join(img_path,val_vid)
220 | 		img_name = str(int(row[1]))+'.jpg'
221 | 		filepath = os.path.join(vid_path,img_name)
222 | 
223 | 		val_vid = row[0]
224 | 		vid_path = os.path.join(img_path,val_vid)
225 | 		img_name = str(int(row[1]))+'.jpg'
226 | 		k = 0
227 | 		k+=1
228 | 
229 | 		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
230 | 			continue
231 | 		st = time.time()
232 | 		filepath = os.path.join(vid_path,img_name)
233 | 		# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
234 | 		fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
235 | 		img = cv2.imread(filepath)
236 | 		# x_img =
237 | 		tags = filepath.split(os.path.sep)
238 | 		img_folder = '/'+'/'.join(tags[1:-1])
239 | 		seq = get_frame_idx(filepath)
240 | 
241 | 		# print filepath, seq
242 | 		vid_numpy = []
243 | 		x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
244 | 		x_img = np.expand_dims(x_img, axis = 0)
245 | 		# print filepath, seq
246 | 		# bp()
247 | 		for frame in seq:
248 | 			if frame!=-1:
249 | 				fr_name = os.path.join(img_folder, frame)
250 | 				np_name = fr_name.replace('.jpg','.npy')
251 | 				np_name = np_name.replace('train/keyframes','numpy_arrays_val')
252 | 				# print np_name
253 | 				fr_npy = np.load(np_name)
254 | 				vid_numpy.append(fr_npy)
255 | 			else:
256 | 				vid_numpy.append(np.zeros((resized_height,resized_width,3)))
257 | 
258 | 		vid_numpy = np.array(vid_numpy)
259 | 		vid_numpy = np.expand_dims(vid_numpy,axis=0)
260 | 		# print vid_numpy.shape
261 | 		x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
262 | 		# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
263 | 		w = x2-x1
264 | 		h = y2-y1
265 | 		roi = np.array([x1,y1,w,h])
266 | 		rois = np.expand_dims(roi,axis=0)
267 | 		rois = np.expand_dims(rois,axis=0)
268 | 		# try:
269 | 		# rois, vid_numpy = get_batch(df, ind)
270 | 		shared_layers_orig = rgb_model.predict(vid_numpy)
271 | 
272 | 		# print rois, shared_layers_orig.shape
273 | 		[P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois])
274 | 
275 | 		seq_name = filepath.split(os.path.sep)[-2]
276 | 		f_predicted = open(output_csv_file,'a+')
277 | 		[f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg']
278 | 		f_predicted.close()
279 | 
280 | 		# if k==1:
281 | 		# 	break
282 | 	except Exception as e:
283 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
284 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
285 | 		print(exc_type, fname, exc_tb.tb_lineno)
286 | 		print('Exception: {}'.format(e))
287 | 		print(filepath)
288 | 		continue
289 | 


--------------------------------------------------------------------------------
/test_with_vis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | import sys
  6 | import pickle
  7 | from optparse import OptionParser
  8 | import time
  9 | from keras_frcnn import config
 10 | from keras import backend as K
 11 | from keras.layers import Input
 12 | from keras.models import Model
 13 | from keras_frcnn import roi_helpers
 14 | from keras_frcnn import config, data_generators
 15 | from keras_frcnn import losses as losses
 16 | import keras_frcnn.roi_helpers as roi_helpers
 17 | from keras.utils import generic_utils
 18 | from keras.layers import Lambda
 19 | from i3d_inception import Inception_Inflated3d
 20 | import collections
 21 | from keras_frcnn.utils import *
 22 | from pdb import set_trace as bp
 23 | from tqdm import tqdm
 24 | from keras_frcnn import losses as losses
 25 | from keras.optimizers import Adam, SGD, RMSprop
 26 | import pandas as pd
 27 | from tqdm import tqdm
 28 | from pdb import set_trace as bp
 29 | # from keras.utils.training_utils import multi_gpu_model
 30 | 
 31 | 
 32 | '''
 33 |  python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/
 34 | 
 35 | '''
 36 | sys.setrecursionlimit(40000)
 37 | 
 38 | parser = OptionParser()
 39 | 
 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.")
 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
 42 | 				help="Number of ROIs per iteration. Higher means more memory use.", default=4)
 43 | 
 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data",
 45 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 46 | 
 47 | parser.add_option("-i", "--vis_folder", type="str", dest="vis_folder",
 48 | 				help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv')
 49 | parser.add_option("-m", "--model_name", dest="model_name",
 50 | 				help="Path to model.")
 51 | parser.add_option("-t", "--type", dest="type",
 52 | 				help="type.", default = 'v1')
 53 | parser.add_option("-o", "--output", dest="output",
 54 | 				help="csv to save predictions.")
 55 | parser.add_option("--config_filename", dest="config_filename", help=
 56 | 				"Location to read the metadata related to the training (generated when training).",
 57 | 				default="config_subset_AVA.pickle")
 58 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 59 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat')
 60 | 
 61 | (options, args) = parser.parse_args()
 62 | 
 63 | if not options.test_path:   # if filename is not given
 64 | 	parser.error('Error: path to test data must be specified. Pass --path to command line')
 65 | 
 66 | 
 67 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/'
 68 | config_output_filename = options.config_filename
 69 | with open(config_output_filename, 'rb') as f_in:
 70 | 	C = pickle.load(f_in)
 71 | 
 72 | if C.network == 'resnet50':
 73 | 	import keras_frcnn.resnet as nn
 74 | elif C.network == 'vgg':
 75 | 	import keras_frcnn.vgg as nn
 76 | 
 77 | # turn off any data augmentation at test time
 78 | C.use_horizontal_flips = False
 79 | C.use_vertical_flips = False
 80 | C.rot_90 = False
 81 | resized_width = 320
 82 | resized_height = 400
 83 | version = options.version
 84 | img_path = options.test_path
 85 | output_csv_file = os.path.join('evaluation','8_actions',options.output)
 86 | fc = open(output_csv_file,'w+')
 87 | fc.close()
 88 | def format_img_size(img, C):
 89 | 	""" formats the image size based on config """
 90 | 	img_min_side = float(C.im_size)
 91 | 	(height,width,_) = img.shape
 92 | 	ratio_w = resized_width/width
 93 | 	ratio_h = resized_height/height
 94 | 	new_width = resized_width
 95 | 	new_height = resized_height
 96 | 	img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
 97 | 	return img, ratio_w, ratio_h
 98 | 
 99 | def format_img(img, C):
100 | 	""" formats an image for model prediction based on config """
101 | 	img, ratio_w, ratio_h= format_img_size(img, C)
102 | 	img = format_img_channels(img, C)
103 | 	return img, ratio_w, ratio_h
104 | 
105 | # Method to transform the coordinates of the bounding box to its original size
106 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2):
107 | 
108 | 	real_x1 = int(round(x1 // ratio_w))
109 | 	real_y1 = int(round(y1 // ratio_h))
110 | 	real_x2 = int(round(x2 // ratio_w))
111 | 	real_y2 = int(round(y2 // ratio_h))
112 | 
113 | 	return (real_x1, real_y1, real_x2 ,real_y2)
114 | 
115 | class_mapping = C.class_mapping
116 | classes_count = class_mapping
117 | print len(class_mapping)
118 | type = options.type
119 | # bp()
120 | # if 'bg' not in class_mapping:
121 | # 	class_mapping['bg'] = len(class_mapping)
122 | 
123 | class_mapping = {v: k for k, v in class_mapping.items()}
124 | # print(class_mapping)
125 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
126 | C.num_rois = int(options.num_rois)
127 | # print "Num rois originally",C.num_rois
128 | if C.network == 'resnet50':
129 | 	num_features = 1024
130 | elif C.network == 'vgg':
131 | 	num_features = 512
132 | 
133 | if K.image_dim_ordering() == 'th':
134 | 	input_shape_img = (3, None, None)
135 | else:
136 | 	input_shape_img = (None, None, 3)
137 | 
138 | # shared_layers_input= Input(shape=( None,None,None,832))
139 | roi_input = Input(shape=(None, 4))
140 | # vid_input = Input(shape =(None, None, None, 3))
141 | # vid_input_shape = (64, 400,320, 3)
142 | img_input = Input(shape=(None, None, 3))
143 | 
144 | feature_map_input = Input(shape=( None,None,None,832))
145 | shared_layers_image = nn.nn_base(img_input, trainable=False)
146 | 
147 | 
148 | # rgb_model = Inception_Inflated3d(
149 | # 				include_top=False,
150 | # 				weights='rgb_kinetics_only',
151 | # 				input_shape=vid_input_shape,
152 | # 				classes=classes_count)
153 | 
154 | def extract_numpy_single_frame(img,C):
155 | 
156 | 	img = (img/255.)*2 - 1
157 | 	return img
158 | 
159 | def get_frame_idx(img_path):
160 | 	winSize = 64
161 | 	tags = img_path.split(os.path.sep)
162 | 	vid_folder = '/'+'/'.join(tags[1:-1])
163 | 	frames = os.listdir(vid_folder)
164 | 	if 'CAD' in img_path:
165 | 		frames = [f for f in frames if f.startswith('RGB')]
166 | 		frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1]))
167 | 	else:
168 | 		frames.sort(key = lambda x: int(x.split('.')[0]))
169 | 	frame_index = frames.index(tags[-1])
170 | 	fi = get_frames_index(frames,frame_index,winSize)
171 | 	seq =[frames[k] if k!=-1 else k for k in fi]
172 | 	# print(seq[0],seq[31])
173 | 	# print seq
174 | 	return seq
175 | optimizer_classifier = Adam(lr=1e-5)
176 | print len(classes_count)
177 | 
178 | if version == 'concat' and type == 'v1':
179 | 	classifier = 	classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=8, trainable=True)
180 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
181 | elif version == 'whole' :
182 | 	classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version)
183 | 	model_classifier = Model([feature_map_input], classifier)
184 | elif version == 'roi':
185 | 	classifier = nn.classifier_i3d(feature_map_input,  roi_input, 1, nb_classes=len(classes_count))
186 | 	model_classifier = Model([roi_input, feature_map_input], classifier)
187 | 
188 | 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
189 | 	# model_all = Model([i3d_features, roi_input], classifier)
190 | elif version == 'concat' and type == 'v2':
191 | 	classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True)
192 | 	model_classifier = Model([img_input, feature_map_input, roi_input], classifier)
193 | # elif version == 'whole' and type == 'v2':
194 | # 	classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version)
195 | # 	model_classifier = Model([feature_map_input], classifier)
196 | 
197 | 
198 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
199 | model_name = os.path.join(options.model_name,'model.hdf5')
200 | print('Loading weights from {}'.format(model_name))
201 | model_classifier.load_weights(model_name, by_name=True)
202 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
203 | vis_folder = options.vis_folder
204 | all_imgs = []
205 | classes = {}
206 | bbox_threshold = 0.7
207 | visualise = True
208 | f_val =  os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data)
209 | df = pd.read_csv(f_val)
210 | final_predictions = []
211 | 
212 | indices = range(len(df))
213 | print len(df)
214 | # bp()
215 | bs = 8
216 | for i in tqdm(range(len(df))):
217 | 
218 | 	try:
219 | 				# ind = indices[i:i+bs]
220 | 
221 | 				row = df.iloc[i,:]
222 | 				# if row[-2] in [62]:
223 | 
224 | 				val_vid = row[0]
225 | 				vid_path = os.path.join(img_path,val_vid)
226 | 				img_name = str(int(row[1]))+'.jpg'
227 | 
228 | 				filepath = os.path.join(vid_path,img_name)
229 | 				fp = filepath.replace('keyframes','numpy_8_actions')
230 | 				fp = fp.replace('.jpg','.npy')
231 | 				val_vid = row[0]
232 | 				vid_path = os.path.join(img_path,val_vid)
233 | 				img_name = str(int(row[1]))+'.jpg'
234 | 				k = 0
235 | 				k+=1
236 | 
237 | 				if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
238 | 					continue
239 | 				st = time.time()
240 | 				filepath = os.path.join(vid_path,img_name)
241 | 				# filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg'
242 | 				fr_num = filepath.split(os.path.sep)[-1].split('.')[0]
243 | 				img = cv2.imread(filepath)
244 | 				# x_img =
245 | 				tags = filepath.split(os.path.sep)
246 | 				img_folder = '/'+'/'.join(tags[1:-1])
247 | 				shared_layers_orig = np.load(fp)
248 | 				# seq = get_frame_idx(filepath)
249 | 				#
250 | 				# # print filepath, seq
251 | 				# vid_numpy = []
252 | 				x_img_orig = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
253 | 				x_img = np.expand_dims(x_img_orig, axis = 0)
254 | 				# # print filepath, seq
255 | 				# # bp()
256 | 				# for frame in seq:
257 | 				# 	if frame!=-1:
258 | 				# 		fr_name = os.path.join(img_folder, frame)
259 | 				# 		np_name = fr_name.replace('.jpg','.npy')
260 | 				# 		np_name = np_name.replace('train/keyframes','numpy_arrays_val')
261 | 				# 		# print np_name
262 | 				# 		fr_npy = np.load(np_name)
263 | 				# 		vid_numpy.append(fr_npy)
264 | 				# 	else:
265 | 				# 		vid_numpy.append(np.zeros((resized_height,resized_width,3)))
266 | 				#
267 | 				# vid_numpy = np.array(vid_numpy)
268 | 				# vid_numpy = np.expand_dims(vid_numpy,axis=0)
269 | 				x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16)
270 | 				# shared_layers_orig = rgb_model.predict(vid_numpy)
271 | 
272 | 				# print vid_numpy.shape
273 | 				if version == 'concat':
274 | 					# [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)]
275 | 					w = x2-x1
276 | 					h = y2-y1
277 | 					roi = np.array([x1,y1,w,h])
278 | 					rois = np.expand_dims(roi,axis=0)
279 | 					rois = np.expand_dims(rois,axis=0)
280 | 					# try:
281 | 					# rois, vid_numpy = get_batch(df, ind)
282 | 					# print rois, shared_layers_orig.shape
283 | 					[P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois])
284 | 
285 | 				elif version == 'roi':
286 | 					w = x2-x1
287 | 					h = y2-y1
288 | 					roi = np.array([x1,y1,w,h])
289 | 					rois = np.expand_dims(roi,axis=0)
290 | 					rois = np.expand_dims(rois,axis=0)
291 | 					# try:
292 | 					# rois, vid_numpy = get_batch(df, ind)
293 | 					# print rois, shared_layers_orig.shape
294 | 					[P_cls]= model_classifier.predict([rois,shared_layers_orig])
295 | 
296 | 				elif version == 'whole':
297 | 					[P_cls]= model_classifier.predict([shared_layers_orig])
298 | 
299 | 				real_x1, real_y1, real_x2, real_y2 = row[2]*320, row[3]*400, row[4]*320, row[5]*400
300 | 				cv2.rectangle(x_img_orig,(int(real_x1),int(real_y1)),(int(real_x2),int(real_y2)), (255,255,255), 2)
301 | 				# ind =np.argmax(P_cls[0,:])
302 | 				gt = row[-2]
303 | 				pred_prob = P_cls[0,:]
304 | 				# pred_class = class_mapping[ind]
305 | 				sort_prob = sorted(range(len(pred_prob)), key=lambda i: pred_prob[i])[-4:]
306 | 				ac_pred = [class_mapping[ind] for ind in sort_prob]
307 | 				ac_pred = map(int, ac_pred)
308 | 				# print ac_pred
309 | 				# bp()
310 | 				# print gt, pred_class
311 | 				if int(gt) in ac_pred:
312 | 					# pred_prob = P_cls[0,:]
313 | 					# sort_prob = sorted(range(len(pred_prob)), key=lambda i: pred_prob[i])[-3:]
314 | 					# prob_str = [str(pred_prob[ind]) for ind in sort_prob]
315 | 					# prob_str = ','.join(prob_str)
316 | 					#
317 | 					# ac_pred = [class_mapping[ind] for ind in sort_prob]
318 | 					# font                   = cv2.FONT_HERSHEY_SIMPLEX
319 | 					# bottomLeftCornerOfText = (100,100)
320 | 					# fontScale              = 1
321 | 					# fontColor              = (255,255,255)
322 | 					# lineType               = 2
323 | 					#
324 | 					# cv2.putText(img,','.join(ac_pred)+'-'+prob_str,
325 | 					# 	bottomLeftCornerOfText,ff
326 | 					# 	font,
327 | 					# 	fontScale,
328 | 					# 	fontColor,
329 | 					# 	lineType)
330 | 					seq_name = filepath.split(os.path.sep)[-2]
331 | 					vid_dir = os.path.join(vis_folder,str(row[-2]),'true_positives', seq_name)
332 | 					if not os.path.isdir(vid_dir):
333 | 						os.makedirs(vid_dir)
334 | 
335 | 
336 | 					save_img_name = os.path.join(vid_dir,img_name)
337 | 					# print save
338 | 					cv2.imwrite(save_img_name, x_img_orig)
339 | 				else:
340 | 					seq_name = filepath.split(os.path.sep)[-2]
341 | 					vid_dir = os.path.join(vis_folder,str(row[-2]),'false_positives', seq_name)
342 | 					if not os.path.isdir(vid_dir):
343 | 						os.makedirs(vid_dir)
344 | 
345 | 
346 | 					save_img_name = os.path.join(vid_dir,img_name)
347 | 					# print save
348 | 					cv2.imwrite(save_img_name, x_img_orig)
349 | 				# print(P_cls)
350 | 				# bp()
351 | 				# print output_csv_file
352 | 				# print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n')
353 | 
354 | 
355 | 				# with open(output_csv_file,'a+') as f_predicted:
356 | 				# 		  [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg']
357 | 
358 | 		# if k==1:
359 | 		# 	break
360 | 	except Exception as e:
361 | 		exc_type, exc_obj, exc_tb = sys.exc_info()
362 | 		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
363 | 		print(exc_type, fname, exc_tb.tb_lineno)
364 | 		print('Exception: {}'.format(e))
365 | 		# print(filepath)
366 | 		continue
367 | 


--------------------------------------------------------------------------------
/train_frcnn_i3d_batch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 | 									# (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | 
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | from keras.utils.training_utils import multi_gpu_model
 37 | 
 38 | 
 39 | # import tensorflow as tf
 40 | # import keras.backend.tensorflow_backend as ktf
 41 | 
 42 | sys.setrecursionlimit(40000)
 43 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 44 | # def get_session(gpu_fraction=0.333):
 45 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 46 | #                                 allow_growth=True)
 47 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 48 | # ktf.set_session(get_session())
 49 | 
 50 | 
 51 | old_stdout = sys.stdout
 52 | 
 53 | log_file = open("message.log","w")
 54 | 
 55 | 
 56 | from i3d_inception import Inception_Inflated3d
 57 | # from i3d_inception import Inception_Inflated3d
 58 | # from tensorflow.python import keras
 59 | from keras.utils import plot_model
 60 | import os
 61 | import pdb
 62 | from keras.layers import Input
 63 | import logging
 64 | from pdb import set_trace as bp
 65 | 
 66 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 67 | parser = OptionParser()
 68 | 
 69 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 70 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 71 | 				default="pascal_voc")
 72 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 73 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 74 | 
 75 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 76 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 77 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 78 | 
 79 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 80 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 81 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 82 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 83 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 84 | 				  action="store_true", default=False)
 85 | parser.add_option("--config_filename", dest="config_filename", help=
 86 | 				"Location to store all the metadata related to the training (to be used when testing).",
 87 | 				default="config.pickle")
 88 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 89 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 90 | 
 91 | (options, args) = parser.parse_args()
 92 | 
 93 | if not options.train_path:   # if filename is not given
 94 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 95 | 
 96 | if options.parser == 'pascal_voc':
 97 | 	from keras_frcnn.pascal_voc_parser import get_data
 98 | elif options.parser == 'simple':
 99 | 	from keras_frcnn.simple_parser import get_data
100 | else:
101 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
102 | 
103 | # pass the settings from the command line, and persist them in the config object
104 | C = config.Config()
105 | 
106 | C.use_horizontal_flips = bool(options.horizontal_flips)
107 | C.use_vertical_flips = bool(options.vertical_flips)
108 | C.rot_90 = bool(options.rot_90)
109 | C.dataset = options.dataset
110 | C.augment = options.aug
111 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
112 | C.model_path = output_weight_path
113 | C.num_rois = int(options.num_rois)
114 | C.bs =16
115 | if options.network == 'vgg':
116 | 	C.network = 'vgg'
117 | 	from keras_frcnn import vgg as nn
118 | elif options.network == 'resnet50':
119 | 	from keras_frcnn import resnet as nn
120 | 	C.network = 'resnet50'
121 | else:
122 | 	print('Not a valid model')
123 | 	raise ValueError
124 | 
125 | 
126 | # check if weight path was passed via command line
127 | if options.input_weight_path:
128 | 	C.base_net_weights = options.input_weight_path
129 | else:
130 | 	# set the path to weights based on backend and model
131 | 	C.base_net_weights = nn.get_weight_path()
132 | 
133 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
134 | 
135 | job = options.job
136 | print(len(classes_count))
137 | if job:
138 | 	sys.stdout = log_file
139 | 
140 | 
141 | if 'bg' not in classes_count:
142 | 	classes_count['bg'] = 0
143 | 	class_mapping['bg'] = len(class_mapping)
144 | 
145 | C.class_mapping = class_mapping
146 | 
147 | inv_map = {v: k for k, v in class_mapping.items()}
148 | 
149 | print('Training images per class:')
150 | pprint.pprint(classes_count)
151 | print('Num classes (including bg) = {}'.format(len(classes_count)))
152 | 
153 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
154 | 
155 | with open(config_output_filename, 'wb') as config_f:
156 | 	pickle.dump(C,config_f)
157 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
158 | 
159 | random.shuffle(all_imgs)
160 | 
161 | num_imgs = len(all_imgs)
162 | 
163 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
164 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
165 | 
166 | print('Num train samples {}'.format(len(train_imgs)))
167 | print('Num val samples {}'.format(len(val_imgs)))
168 | 
169 | indices = range(len(train_imgs))
170 | val_indices = range(len(val_imgs))
171 | data_gen_train = data_generators.get_anchor_gt_batch(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(),indices, mode='train')
172 | data_gen_val = data_generators.get_anchor_gt_batch_val(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), val_indices,mode='val')
173 | 
174 | roi_input = Input(shape=(None, 4),name = 'roi_input')
175 | vid_input = Input(shape =(64, 400, 320, 3),name = 'vid_input')
176 | vid_input_shape = (64, 400,320, 3)
177 | rgb_model = Inception_Inflated3d(
178 | 				include_top=False,
179 | 				weights='rgb_kinetics_only',
180 | 				input_shape=vid_input_shape,
181 | 				classes=classes_count)
182 | 
183 | # shared_layers_image = nn.nn_base(img_input, trainable=True)
184 | shared_layers_orig = rgb_model(vid_input)
185 | print shared_layers_orig.shape.as_list()
186 | # bp()
187 | def slice_tensor(shared_layers):
188 | 
189 | 	feature_shape = shared_layers.shape.as_list()
190 | 	shared_layers = shared_layers[:,8,:,:,:]
191 | 	return shared_layers
192 | 
193 | 
194 | shared_layers = Lambda(slice_tensor)(shared_layers_orig)
195 | classifier = nn.classifier_i3d_batch(shared_layers_orig, roi_input, 1, nb_classes=len(classes_count), trainable=True)
196 | # model_rpn = Model(vid_input, rpn[:2])
197 | model_classifier = Model([roi_input, vid_input], classifier)
198 | # model_classifier = multi_gpu_model(model_classifier, gpus=2)
199 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
200 | model_all = Model([vid_input, roi_input], classifier)
201 | 
202 | # model_all = multi_gpu_model(model_all, gpus=2)
203 | plot_model(model_all, to_file='model_all_i3d_cls_batch.png', show_shapes = True)
204 | log_folder = os.path.join(options.output_weight_path,'logs/')
205 | if not os.path.isdir(log_folder):
206 | 	os.makedirs(log_folder)
207 | tensorboard = TensorBoard(log_dir=log_folder)
208 | tensorboard.set_model(model_classifier)
209 | train_names = ['train_loss', 'train_mae']
210 | def write_log(callback, names, logs, batch_no):
211 | 	for name, value in zip(names, logs):
212 | 		summary = tf.Summary()
213 | 		summary_value = summary.value.add()
214 | 		summary_value.simple_value = value
215 | 		summary_value.tag = name
216 | 		callback.writer.add_summary(summary, batch_no)
217 | 		callback.writer.flush()
218 | 
219 | optimizer = Adam(lr=1e-5)
220 | optimizer_classifier = Adam(lr=1e-5)
221 | if C.dataset == 'AVA':
222 | 	model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
223 | else:
224 | 	model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls])
225 | 
226 | model_all.compile(optimizer='sgd', loss='mae')
227 | 
228 | 
229 | 
230 | epoch_length = 1000
231 | epoch_length_val = 100
232 | num_epochs = int(options.num_epochs)
233 | iter_num = 0
234 | iter_num_tensorboard = 0
235 | total_cur_loss = []
236 | total_cur_loss_val = []
237 | iter_num_val_tensorboard = 0
238 | losses = np.zeros((epoch_length, 1))
239 | losses_val = np.zeros((epoch_length_val, 1))
240 | rpn_accuracy_rpn_monitor = []
241 | rpn_accuracy_for_epoch = []
242 | start_time = time.time()
243 | ###### val #####
244 | rpn_accuracy_rpn_monitor_val = []
245 | rpn_accuracy_for_epoch_val = []
246 | 
247 | ################
248 | best_loss = np.Inf
249 | 
250 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
251 | print('Starting training')
252 | 
253 | vis = True
254 | 
255 | for epoch_num in range(num_epochs):
256 | 
257 | 	progbar = generic_utils.Progbar(epoch_length)
258 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
259 | 
260 | 	while True:
261 | 		try:
262 | 			img_data, seq_numpy, x_img = next(data_gen_train)
263 | 			print(seq_numpy.shape, x_img.shape)
264 | 			# print type(img_data)
265 | 			X2, Y1 = roi_helpers.calc_iou_batch(img_data, C, class_mapping)
266 | 			print(X2.shape, Y1.shape)
267 | 			# bp()
268 | 			loss_class = model_classifier.train_on_batch([X2[ :, :,:],seq_numpy], [Y1[:, :, :]])
269 | # 			losses[iter_num, 0] = loss_class
270 | #
271 | 			iter_num += 1
272 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
273 | 			iter_num_tensorboard+=1
274 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
275 | 
276 | 			if iter_num == epoch_length:
277 | 				loss_class_cls = np.mean(losses[:, 0])
278 | 				curr_loss =loss_class_cls
279 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
280 | 				total_cur_loss.append(curr_loss)
281 | 				iter_num = 0
282 | 				start_time = time.time()
283 | 
284 | 				#################### Val #########################################################
285 | 				iter_num_val = 0
286 | 
287 | 				while True:
288 | 						# try:
289 | 						img_data, seq_numpy, x_img = next(data_gen_val)
290 | 						# print("validation")
291 | 						X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
292 | 						loss_class = model_classifier.train_on_batch([ X2[:, :, :],seq_numpy], [Y1[:, :, :]])
293 | 						losses_val[iter_num_val,0] = loss_class
294 | 						iter_num_val += 1
295 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
296 | 						iter_num_val_tensorboard+=1
297 | 						if iter_num_val == epoch_length_val:
298 | 
299 | 							loss_class_cls = np.mean(losses_val[:, 0])
300 | 							curr_loss_val = loss_class_cls
301 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
302 | 							total_cur_loss_val.append(curr_loss_val)
303 | 							# total_cur_loss.append(curr_loss)
304 | 							iter_num_val = 0
305 | 							break
306 | 
307 | 				if curr_loss < best_loss:
308 | 					if C.verbose:
309 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
310 | 					best_loss = curr_loss
311 | 					model_all.save_weights(C.model_path)
312 | 
313 | 				break
314 | 
315 | 		except Exception as e:
316 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
317 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
318 | 			print(exc_type, fname, exc_tb.tb_lineno)
319 | 			print('Exception: {}'.format(e))
320 | 			# print(seq_numpy.shape)
321 | 			continue
322 | 		sys.stdout = old_stdout
323 | 
324 | plt.plot(total_cur_loss)
325 | plt.plot(total_cur_loss_val)
326 | plt.legend(['train loss', 'val loss'], loc='upper left')
327 | 
328 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
329 | plt.savefig(savefigure)
330 | print('Training complete, exiting.')
331 | log_file.close()
332 | 


--------------------------------------------------------------------------------
/train_frcnn_i3d_cls.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 |                                     # (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras1000
 17 | from keras.utils import plot_model
 18 | import os
 19 | 
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | # import tensorflow as tf
 37 | # import keras.backend.tensorflow_backend as ktf
 38 | sys.setrecursionlimit(40000)
 39 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 40 | old_stdout = sys.stdout
 41 | log_file = open("message.log","w")
 42 | from i3d_inception import Inception_Inflated3d
 43 | # from i3d_inception import Inception_Inflated3d
 44 | # from tensorflow.python import keras
 45 | from keras.utils import plot_model
 46 | import os
 47 | import pdb
 48 | from keras.layers import Input
 49 | import logging
 50 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 51 | parser = OptionParser()
 52 | 
 53 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 54 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 55 | 				default="pascal_voc")
 56 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 57 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 58 | 
 59 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 60 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 61 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 62 | 
 63 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 64 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 65 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 66 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 67 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 68 | 				  action="store_true", default=False)
 69 | parser.add_option("--config_filename", dest="config_filename", help=
 70 | 				"Location to store all the metadata related to the training (to be used when testing).",
 71 | 				default="config.pickle")
 72 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 73 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 74 | 
 75 | (options, args) = parser.parse_args()
 76 | 
 77 | if not options.train_path:   # if filename is not given
 78 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 79 | 
 80 | if options.parser == 'pascal_voc':
 81 | 	from keras_frcnn.pascal_voc_parser import get_data
 82 | elif options.parser == 'simple':
 83 | 	from keras_frcnn.simple_parser import get_data
 84 | else:
 85 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
 86 | 
 87 | # pass the settings from the command line, and persist them in the config object
 88 | C = config.Config()
 89 | 
 90 | C.use_horizontal_flips = bool(options.horizontal_flips)
 91 | C.use_vertical_flips = bool(options.vertical_flips)
 92 | C.rot_90 = bool(options.rot_90)
 93 | C.dataset = options.dataset
 94 | C.augment = options.aug
 95 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
 96 | C.model_path = output_weight_path
 97 | C.num_rois = int(options.num_rois)
 98 | 
 99 | if options.network == 'vgg':
100 | 	C.network = 'vgg'
101 | 	from keras_frcnn import vgg as nn
102 | elif options.network == 'resnet50':
103 | 	from keras_frcnn import resnet as nn
104 | 	C.network = 'resnet50'
105 | else:
106 | 	print('Not a valid model')
107 | 	raise ValueError
108 | 
109 | 
110 | # check if weight path was passed via command line
111 | if options.input_weight_path:
112 | 	C.base_net_weights = options.input_weight_path
113 | else:
114 | 	# set the path to weights based on backend and model
115 | 	C.base_net_weights = nn.get_weight_path()
116 | 
117 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
118 | 
119 | job = options.job
120 | print(len(classes_count))
121 | if job:
122 | 	sys.stdout = log_file
123 | 
124 | 
125 | if 'bg' not in classes_count:
126 | 	classes_count['bg'] = 0
127 | 	class_mapping['bg'] = len(class_mapping)
128 | 
129 | C.class_mapping = class_mapping
130 | 
131 | inv_map = {v: k for k, v in class_mapping.items()}
132 | 
133 | print('Training images per class:')
134 | pprint.pprint(classes_count)
135 | print('Num classes (including bg) = {}'.format(len(classes_count)))
136 | 
137 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
138 | 
139 | with open(config_output_filename, 'wb') as config_f:
140 | 	pickle.dump(C,config_f)
141 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
142 | 
143 | random.shuffle(all_imgs)
144 | 
145 | num_imgs = len(all_imgs)
146 | 
147 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
148 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
149 | 
150 | print('Num train samples {}'.format(len(train_imgs)))
151 | print('Num val samples {}'.format(len(val_imgs)))
152 | 
153 | 
154 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
155 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
156 | 
157 | roi_input = Input(shape=(None, 4),name = 'roi_input')
158 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input')
159 | vid_input_shape = (64, 400,320, 3)
160 | rgb_model = Inception_Inflated3d(
161 | 				include_top=False,
162 | 				weights='rgb_kinetics_only',
163 | 				input_shape=vid_input_shape,
164 | 				classes=classes_count)
165 | shared_layers_orig = rgb_model(vid_input)
166 | classifier = nn.classifier_i3d(shared_layers_orig, roi_input, 1, nb_classes=len(classes_count), trainable=True)
167 | model_classifier = Model([roi_input, vid_input], classifier)
168 | 
169 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
170 | model_all = Model([vid_input, roi_input], classifier)
171 | plot_model(model_all, to_file='model_all_i3d_cls_latest.png', show_shapes = True)
172 | log_folder = os.path.join(options.output_weight_path,'logs/')
173 | if not os.path.isdir(log_folder):
174 | 	os.makedirs(log_folder)
175 | tensorboard = TensorBoard(log_dir=log_folder)
176 | tensorboard.set_model(model_classifier)
177 | train_names = ['train_loss', 'train_mae']
178 | def write_log(callback, names, logs, batch_no):
179 | 	for name, value in zip(names, logs):
180 | 		summary = tf.Summary()
181 | 		summary_value = summary.value.add()
182 | 		summary_value.simple_value = value
183 | 		summary_value.tag = name
184 | 		callback.writer.add_summary(summary, batch_no)
185 | 		callback.writer.flush()
186 | 
187 | optimizer = Adam(lr=1e-5)
188 | optimizer_classifier = Adam(lr=1e-5)
189 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls])
190 | model_all.compile(optimizer='sgd', loss='mae')
191 | 
192 | epoch_length = 1000
193 | epoch_length_val = 100
194 | num_epochs = int(options.num_epochs)
195 | iter_num = 0
196 | iter_num_tensorboard = 0
197 | total_cur_loss = []
198 | total_cur_loss_val = []
199 | iter_num_val_tensorboard = 0
200 | losses = np.zeros((epoch_length, 1))
201 | losses_val = np.zeros((epoch_length_val, 1))
202 | rpn_accuracy_rpn_monitor = []
203 | rpn_accuracy_for_epoch = []
204 | start_time = time.time()
205 | ###### val #####
206 | rpn_accuracy_rpn_monitor_val = []
207 | rpn_accuracy_for_epoch_val = []
208 | 
209 | ################
210 | best_loss = np.Inf
211 | 
212 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
213 | print('Starting training')
214 | 
215 | vis = True
216 | 
217 | for epoch_num in range(num_epochs):
218 | 	progbar = generic_utils.Progbar(epoch_length)
219 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
220 | 	while True:
221 | 		try:
222 | 			img_data, seq_numpy, x_img = next(data_gen_train)
223 | 			X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
224 | 
225 | 			loss_class = model_classifier.train_on_batch([X2[:, :, :],seq_numpy], [Y1[:, :, :]])
226 | 			losses[iter_num, 0] = loss_class
227 | 			iter_num += 1
228 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
229 | 			iter_num_tensorboard+=1
230 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
231 | 
232 | 			if iter_num == epoch_length:
233 | 				loss_class_cls = np.mean(losses[:, 0])
234 | 				curr_loss =loss_class_cls
235 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
236 | 				total_cur_loss.append(curr_loss)
237 | 				iter_num = 0
238 | 				start_time = time.time()
239 | 
240 | 				#################### Val #########################################################
241 | 				iter_num_val = 0
242 | 
243 | 				while True:
244 | 						# try:
245 | 						img_data, seq_numpy, x_img = next(data_gen_val)
246 | 						# print("validation")
247 | 						X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
248 | 						loss_class = model_classifier.train_on_batch([ X2[:, :, :],seq_numpy], [Y1[:, :, :]])
249 | 						losses_val[iter_num_val,0] = loss_class
250 | 						iter_num_val += 1
251 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
252 | 						iter_num_val_tensorboard+=1
253 | 						if iter_num_val == epoch_length_val:
254 | 
255 | 							loss_class_cls = np.mean(losses_val[:, 0])
256 | 							curr_loss_val = loss_class_cls
257 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
258 | 							total_cur_loss_val.append(curr_loss_val)
259 | 							# total_cur_loss.append(curr_loss)
260 | 							iter_num_val = 0
261 | 							break
262 | 
263 | 				if curr_loss < best_loss:
264 | 					if C.verbose:
265 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
266 | 					best_loss = curr_loss
267 | 					model_all.save_weights(C.model_path)
268 | 
269 | 				break
270 | 
271 | 		except Exception as e:
272 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
273 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
274 | 			print(exc_type, fname, exc_tb.tb_lineno)
275 | 			print('Exception: {}'.format(e))
276 | 			# print(seq_numpy.shape)
277 | 			continue
278 | 		sys.stdout = old_stdout
279 | 
280 | plt.plot(total_cur_loss)
281 | plt.plot(total_cur_loss_val)
282 | plt.legend(['train loss', 'val loss'], loc='upper left')
283 | 
284 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
285 | plt.savefig(savefigure)
286 | print('Training complete, exiting.')
287 | log_file.close()
288 | 


--------------------------------------------------------------------------------
/train_frcnn_i3d_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 |                                     # (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | import cv2
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | # import tensorflow as tf
 37 | # import keras.backend.tensorflow_backend as ktf
 38 | 
 39 | sys.setrecursionlimit(40000)
 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 41 | # def get_session(gpu_fraction=0.333):
 42 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 43 | #                                 allow_growth=True)
 44 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 45 | # ktf.set_session(get_session())
 46 | 
 47 | 
 48 | old_stdout = sys.stdout
 49 | 
 50 | log_file = open("message.log","w")
 51 | 
 52 | 
 53 | from i3d_inception import Inception_Inflated3d
 54 | # from i3d_inception import Inception_Inflated3d
 55 | # from tensorflow.python import keras
 56 | from keras.utils import plot_model
 57 | import os
 58 | import pdb
 59 | from keras.layers import Input
 60 | import logging
 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 62 | parser = OptionParser()
 63 | 
 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 66 | 				default="pascal_voc")
 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 69 | 
 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 73 | 
 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 79 | 				  action="store_true", default=False)
 80 | parser.add_option("--config_filename", dest="config_filename", help=
 81 | 				"Location to store all the metadata related to the training (to be used when testing).",
 82 | 				default="config.pickle")
 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 84 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 85 | 
 86 | (options, args) = parser.parse_args()
 87 | 
 88 | if not options.train_path:   # if filename is not given
 89 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 90 | 
 91 | if options.parser == 'pascal_voc':
 92 | 	from keras_frcnn.pascal_voc_parser import get_data
 93 | elif options.parser == 'simple':
 94 | 	from keras_frcnn.simple_parser import get_data
 95 | else:
 96 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
 97 | 
 98 | # pass the settings from the command line, and persist them in the config object
 99 | C = config.Config()
100 | 
101 | C.use_horizontal_flips = bool(options.horizontal_flips)
102 | C.use_vertical_flips = bool(options.vertical_flips)
103 | C.rot_90 = bool(options.rot_90)
104 | C.dataset = options.dataset
105 | C.augment = options.aug
106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
107 | C.model_path = output_weight_path
108 | C.num_rois = int(options.num_rois)
109 | 
110 | if options.network == 'vgg':
111 | 	C.network = 'vgg'
112 | 	from keras_frcnn import vgg as nn
113 | elif options.network == 'resnet50':
114 | 	from keras_frcnn import resnet as nn
115 | 	C.network = 'resnet50'
116 | else:
117 | 	print('Not a valid model')
118 | 	raise ValueError
119 | 
120 | 
121 | # check if weight path was passed via command line
122 | if options.input_weight_path:
123 | 	C.base_net_weights = options.input_weight_path
124 | else:
125 | 	# set the path to weights based on backend and model
126 | 	C.base_net_weights = nn.get_weight_path()
127 | 
128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
129 | 
130 | job = options.job
131 | print(len(classes_count)), len(class_mapping)
132 | if job:
133 | 	sys.stdout = log_file
134 | 
135 | 
136 | # if 'bg' not in classes_count:
137 | # 	classes_count['bg'] = 0
138 | # 	class_mapping['bg'] = len(class_mapping)
139 | 
140 | C.class_mapping = class_mapping
141 | 
142 | inv_map = {v: k for k, v in class_mapping.items()}
143 | 
144 | print('Training images per class:')
145 | pprint.pprint(classes_count)
146 | print('Num classes (including bg) = {}'.format(len(classes_count)))
147 | 
148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
149 | 
150 | with open(config_output_filename, 'wb') as config_f:
151 | 	pickle.dump(C,config_f)
152 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
153 | 
154 | random.shuffle(all_imgs)
155 | 
156 | num_imgs = len(all_imgs)
157 | 
158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
160 | 
161 | print('Num train samples {}'.format(len(train_imgs)))
162 | print('Num val samples {}'.format(len(val_imgs)))
163 | 
164 | 
165 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
166 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
167 | 
168 | roi_input = Input(shape=(None, 4),name = 'roi_input')
169 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input')
170 | img_input = Input(shape=(None, None, 3), name = 'img_input')
171 | vid_input_shape = (64, 400,320, 3)
172 | rgb_model = Inception_Inflated3d(
173 | 				include_top=False,
174 | 				weights='rgb_kinetics_only',
175 | 				input_shape=vid_input_shape,
176 | 				classes=classes_count)
177 | roi_input = Input(shape=(None, 4),name = 'roi_input')
178 | shared_layers_image = nn.nn_base(img_input, trainable=True)
179 | shared_layers_orig = rgb_model(vid_input)
180 | def slice_tensor(shared_layers):
181 | 
182 | 	feature_shape = shared_layers.shape.as_list()
183 | 	shared_layers = shared_layers[:,8,:,:,:]
184 | 	return shared_layers
185 | 
186 | def get_action_dic():
187 | 
188 | 	action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv'
189 | 	ac_dic = {}
190 | 	f = open(action_csv,'r')
191 | 	actions = f.read().splitlines()
192 | 	for action in actions[1:]:
193 | 		tags = action.split(',')
194 | 		tags = tags[:-1]
195 | 		ac_id = int(tags[0])
196 | 		ac = ''.join(tags[1:])
197 | 		if '"' in ac:
198 | 			ac =ac.replace('"','')
199 | 		# if ',' in ac:
200 | 		# 	ac = ''.join(ac.split(','))
201 | 
202 | 		ac_dic[ac_id] = ac
203 | 		if ac_id == 1:
204 | 			print ac
205 | 	return ac_dic
206 | 
207 | ac_id = get_action_dic()
208 | shared_layers = Lambda(slice_tensor)(shared_layers_orig)
209 | print len(class_mapping)
210 | num_classes = len(class_mapping)
211 | # if C.dataset == 'AVA':
212 | classifier = nn.classifier_i3d_concat_v2(shared_layers_orig, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True)
213 | 
214 | model_classifier = Model([img_input, roi_input, vid_input], classifier)
215 | 
216 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
217 | model_all = Model([img_input,vid_input, roi_input], classifier)
218 | plot_model(model_all, to_file='model_all_i3d_cls.png', show_shapes = True)
219 | 
220 | log_folder = os.path.join(options.output_weight_path,'logs/')
221 | if not os.path.isdir(log_folder):
222 | 	os.makedirs(log_folder)
223 | tensorboard = TensorBoard(log_dir=log_folder)
224 | tensorboard.set_model(model_classifier)
225 | train_names = ['train_loss', 'train_mae']
226 | def write_log(callback, names, logs, batch_no):
227 | 	for name, value in zip(names, logs):
228 | 		summary = tf.Summary()
229 | 		summary_value = summary.value.add()
230 | 		summary_value.simple_value = value
231 | 		summary_value.tag = name
232 | 		callback.writer.add_summary(summary, batch_no)
233 | 		callback.writer.flush()
234 | 
235 | optimizer = Adam(lr=1e-5)
236 | optimizer_classifier = Adam(lr=1e-5)
237 | 
238 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
239 | 
240 | model_all.compile(optimizer='sgd', loss='mae')
241 | 
242 | epoch_length = 1000
243 | epoch_length_val =100
244 | num_epochs = int(options.num_epochs)
245 | iter_num = 0
246 | iter_num_tensorboard = 0
247 | total_cur_loss = []
248 | total_cur_loss_val = []
249 | iter_num_val_tensorboard = 0
250 | losses = np.zeros((epoch_length, 1))
251 | losses_val = np.zeros((epoch_length_val, 1))
252 | rpn_accuracy_rpn_monitor = []
253 | rpn_accuracy_for_epoch = []
254 | start_time = time.time()
255 | ###### val #####
256 | rpn_accuracy_rpn_monitor_val = []
257 | rpn_accuracy_for_epoch_val = []
258 | 
259 | ################
260 | best_loss = np.Inf
261 | 
262 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
263 | print('Starting training')
264 | # os.makedirs('check_dataset')
265 | vis = True
266 | 
267 | for epoch_num in range(num_epochs):
268 | 
269 | 	progbar = generic_utils.Progbar(epoch_length)
270 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
271 | 	num = 0
272 | 	while True:
273 | 		try:
274 | 			img_data, seq_numpy, x_img = next(data_gen_train)
275 | 			X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
276 | 			# print X2, Y1
277 | 			# x1= (X2[0][0][0])
278 | 			# y1 = (X2[0][0][1])
279 | 			# x2 = (x1 + X2[0][0][2])
280 | 			# y2 = (y1 +X2[0][0][3])
281 | 			# x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16
282 | 			# # print x1, y1, x2, y2
283 | 			# # if x1>320 or x2>320 or y1>400 or y2>400:
284 | 			# # 	print "yes"
285 | 			# im_temp =cv2.imread(img_data['filepath'])
286 | 			# im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC)
287 | 			# # print im_temp.shape
288 | 			# cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3)
289 | 			# font = cv2.FONT_HERSHEY_SIMPLEX
290 | 			# cl = [i for i, e in enumerate(Y1[0][0]) if e == 1]
291 | 			# print cl
292 | 			# ind = cl[0]
293 | 			# ac =  ac_id[int(class_mapping_inv[ind])]
294 | 			#
295 | 			# # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA)
296 | 
297 | 
298 | 			# cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp)
299 | 
300 | 			num+=1
301 | 
302 |             # img_features =
303 | 
304 | 			loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],seq_numpy], [Y1[:, :, :]])
305 | 			losses[iter_num, 0] = loss_class
306 | 
307 | 			iter_num += 1
308 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
309 | 			iter_num_tensorboard+=1
310 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
311 | 
312 | 			if iter_num == epoch_length:
313 | 				loss_class_cls = np.mean(losses[:, 0])
314 | 				curr_loss =loss_class_cls
315 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
316 | 				total_cur_loss.append(curr_loss)
317 | 				iter_num = 0
318 | 				start_time = time.time()
319 | 
320 | 				#################### Val #########################################################
321 | 				iter_num_val = 0
322 | 
323 | 				while True:
324 | 						# try:
325 | 						img_data, seq_numpy, x_img = next(data_gen_val)
326 | 						# print("validation")
327 | 						X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
328 | 						loss_class = model_classifier.train_on_batch([x_img, X2[:, :, :],seq_numpy], [Y1[:, :, :]])
329 | 						losses_val[iter_num_val,0] = loss_class
330 | 						iter_num_val += 1
331 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
332 | 						iter_num_val_tensorboard+=1
333 | 						if iter_num_val == epoch_length_val:
334 | 
335 | 							loss_class_cls = np.mean(losses_val[:, 0])
336 | 							curr_loss_val = loss_class_cls
337 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
338 | 							total_cur_loss_val.append(curr_loss_val)
339 | 							# total_cur_loss.append(curr_loss)
340 | 							iter_num_val = 0
341 | 							break
342 | 
343 | 				if curr_loss < best_loss:
344 | 					if C.verbose:
345 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
346 | 					best_loss = curr_loss
347 | 					model_all.save_weights(C.model_path)
348 | 
349 | 				break
350 | 
351 | 		except Exception as e:
352 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
353 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
354 | 			print(exc_type, fname, exc_tb.tb_lineno)
355 | 			print('Exception: {}'.format(e))
356 | 			# print(seq_numpy.shape)
357 | 			continue
358 | 		sys.stdout = old_stdout
359 | 
360 | plt.plot(total_cur_loss)
361 | plt.plot(total_cur_loss_val)
362 | plt.legend(['train loss', 'val loss'], loc='upper left')
363 | 
364 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
365 | plt.savefig(savefigure)
366 | print('Training complete, exiting.')
367 | log_file.close()
368 | 


--------------------------------------------------------------------------------
/train_frcnn_i3d_whole.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 |                                     # (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | import cv2
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | # import tensorflow as tf
 37 | # import keras.backend.tensorflow_backend as ktf
 38 | 
 39 | sys.setrecursionlimit(40000)
 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 41 | # def get_session(gpu_fraction=0.333):
 42 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 43 | #                                 allow_growth=True)
 44 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 45 | # ktf.set_session(get_session())
 46 | 
 47 | 
 48 | old_stdout = sys.stdout
 49 | 
 50 | log_file = open("message.log","w")
 51 | 
 52 | 
 53 | from i3d_inception import Inception_Inflated3d
 54 | # from i3d_inception import Inception_Inflated3d
 55 | # from tensorflow.python import keras
 56 | from keras.utils import plot_model
 57 | import os
 58 | import pdb
 59 | from keras.layers import Input
 60 | import logging
 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 62 | parser = OptionParser()
 63 | 
 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 66 | 				default="pascal_voc")
 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 69 | 
 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 73 | 
 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 79 | 				  action="store_true", default=False)
 80 | parser.add_option("--config_filename", dest="config_filename", help=
 81 | 				"Location to store all the metadata related to the training (to be used when testing).",
 82 | 				default="config.pickle")
 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 84 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 85 | 
 86 | (options, args) = parser.parse_args()
 87 | 
 88 | if not options.train_path:   # if filename is not given
 89 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 90 | 
 91 | if options.parser == 'pascal_voc':
 92 | 	from keras_frcnn.pascal_voc_parser import get_data
 93 | elif options.parser == 'simple':
 94 | 	from keras_frcnn.simple_parser import get_data
 95 | else:
 96 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
 97 | 
 98 | # pass the settings from the command line, and persist them in the config object
 99 | C = config.Config()
100 | 
101 | C.use_horizontal_flips = bool(options.horizontal_flips)
102 | C.use_vertical_flips = bool(options.vertical_flips)
103 | C.rot_90 = bool(options.rot_90)
104 | C.dataset = options.dataset
105 | C.augment = options.aug
106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
107 | C.model_path = output_weight_path
108 | C.num_rois = int(options.num_rois)
109 | 
110 | if options.network == 'vgg':
111 | 	C.network = 'vgg'
112 | 	from keras_frcnn import vgg as nn
113 | elif options.network == 'resnet50':
114 | 	from keras_frcnn import resnet as nn
115 | 	C.network = 'resnet50'
116 | else:
117 | 	print('Not a valid model')
118 | 	raise ValueError
119 | 
120 | 
121 | # check if weight path was passed via command line
122 | if options.input_weight_path:
123 | 	C.base_net_weights = options.input_weight_path
124 | else:
125 | 	# set the path to weights based on backend and model
126 | 	C.base_net_weights = nn.get_weight_path()
127 | 
128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
129 | 
130 | job = options.job
131 | print(len(classes_count)), len(class_mapping)
132 | if job:
133 | 	sys.stdout = log_file
134 | 
135 | 
136 | # if 'bg' not in classes_count:
137 | # 	classes_count['bg'] = 0
138 | # 	class_mapping['bg'] = len(class_mapping)
139 | 
140 | C.class_mapping = class_mapping
141 | 
142 | inv_map = {v: k for k, v in class_mapping.items()}
143 | 
144 | print('Training images per class:')
145 | pprint.pprint(classes_count)
146 | print('Num classes (including bg) = {}'.format(len(classes_count)))
147 | 
148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
149 | 
150 | with open(config_output_filename, 'wb') as config_f:
151 | 	pickle.dump(C,config_f)
152 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
153 | 
154 | random.shuffle(all_imgs)
155 | 
156 | num_imgs = len(all_imgs)
157 | 
158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
160 | 
161 | print('Num train samples {}'.format(len(train_imgs)))
162 | print('Num val samples {}'.format(len(val_imgs)))
163 | 
164 | 
165 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
166 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
167 | 
168 | roi_input = Input(shape=(None, 4),name = 'roi_input')
169 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input')
170 | img_input = Input(shape=(None, None, 3), name = 'img_input')
171 | vid_input_shape = (64, 400,320, 3)
172 | rgb_model = Inception_Inflated3d(
173 | 				include_top=False,
174 | 				weights='rgb_kinetics_only',
175 | 				input_shape=vid_input_shape,
176 | 				classes=classes_count)
177 | roi_input = Input(shape=(None, 4),name = 'roi_input')
178 | shared_layers_image = nn.nn_base(img_input, trainable=True)
179 | shared_layers_orig = rgb_model(vid_input)
180 | def slice_tensor(shared_layers):
181 | 
182 | 	feature_shape = shared_layers.shape.as_list()
183 | 	shared_layers = shared_layers[:,8,:,:,:]
184 | 	return shared_layers
185 | 
186 | def get_action_dic():
187 | 
188 | 	action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv'
189 | 	ac_dic = {}
190 | 	f = open(action_csv,'r')
191 | 	actions = f.read().splitlines()
192 | 	for action in actions[1:]:
193 | 		tags = action.split(',')
194 | 		tags = tags[:-1]
195 | 		ac_id = int(tags[0])
196 | 		ac = ''.join(tags[1:])
197 | 		if '"' in ac:
198 | 			ac =ac.replace('"','')
199 | 		# if ',' in ac:
200 | 		# 	ac = ''.join(ac.split(','))
201 | 
202 | 		ac_dic[ac_id] = ac
203 | 		if ac_id == 1:
204 | 			print ac
205 | 	return ac_dic
206 | 
207 | ac_id = get_action_dic()
208 | shared_layers = Lambda(slice_tensor)(shared_layers_orig)
209 | print len(class_mapping)
210 | num_classes = len(class_mapping)
211 | # if C.dataset == 'AVA':
212 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True)
213 | 
214 | model_classifier = Model([vid_input], classifier)
215 | 
216 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
217 | model_all = Model([vid_input], classifier)
218 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True)
219 | log_folder = os.path.join(options.output_weight_path,'logs/')
220 | if not os.path.isdir(log_folder):
221 | 	os.makedirs(log_folder)
222 | tensorboard = TensorBoard(log_dir=log_folder)
223 | tensorboard.set_model(model_classifier)
224 | train_names = ['train_loss', 'train_mae']
225 | def write_log(callback, names, logs, batch_no):
226 | 	for name, value in zip(names, logs):
227 | 		summary = tf.Summary()
228 | 		summary_value = summary.value.add()
229 | 		summary_value.simple_value = value
230 | 		summary_value.tag = name
231 | 		callback.writer.add_summary(summary, batch_no)
232 | 		callback.writer.flush()
233 | 
234 | optimizer = Adam(lr=1e-5)
235 | optimizer_classifier = Adam(lr=1e-5)
236 | 
237 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
238 | 
239 | model_all.compile(optimizer='sgd', loss='mae')
240 | 
241 | epoch_length = 1000
242 | epoch_length_val =100
243 | num_epochs = int(options.num_epochs)
244 | iter_num = 0
245 | iter_num_tensorboard = 0
246 | total_cur_loss = []
247 | total_cur_loss_val = []
248 | iter_num_val_tensorboard = 0
249 | losses = np.zeros((epoch_length, 1))
250 | losses_val = np.zeros((epoch_length_val, 1))
251 | rpn_accuracy_rpn_monitor = []
252 | rpn_accuracy_for_epoch = []
253 | start_time = time.time()
254 | ###### val #####
255 | rpn_accuracy_rpn_monitor_val = []
256 | rpn_accuracy_for_epoch_val = []
257 | 
258 | ################
259 | best_loss = np.Inf
260 | 
261 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
262 | print('Starting training')
263 | # os.makedirs('check_dataset')
264 | vis = True
265 | 
266 | for epoch_num in range(num_epochs):
267 | 
268 | 	progbar = generic_utils.Progbar(epoch_length)
269 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
270 | 	num = 0
271 | 	while True:
272 | 		try:
273 | 			img_data, seq_numpy, x_img = next(data_gen_train)
274 | 			Y1 = roi_helpers.calc_label(img_data, C, class_mapping)
275 | 			# print X2, Y1
276 | 			# x1= (X2[0][0][0])
277 | 			# y1 = (X2[0][0][1])
278 | 			# x2 = (x1 + X2[0][0][2])
279 | 			# y2 = (y1 +X2[0][0][3])
280 | 			# x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16
281 | 			# # print x1, y1, x2, y2
282 | 			# # if x1>320 or x2>320 or y1>400 or y2>400:
283 | 			# # 	print "yes"
284 | 			# im_temp =cv2.imread(img_data['filepath'])
285 | 			# im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC)
286 | 			# # print im_temp.shape
287 | 			# cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3)
288 | 			# font = cv2.FONT_HERSHEY_SIMPLEX
289 | 			# cl = [i for i, e in enumerate(Y1[0][0]) if e == 1]
290 | 			# print cl
291 | 			# ind = cl[0]
292 | 			# ac =  ac_id[int(class_mapping_inv[ind])]
293 | 			#
294 | 			# # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA)
295 | 
296 | 
297 | 			# cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp)
298 | 
299 | 			num+=1
300 | 
301 |             # img_features =
302 | 
303 | 			loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]])
304 | 			losses[iter_num, 0] = loss_class
305 | 
306 | 			iter_num += 1
307 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
308 | 			iter_num_tensorboard+=1
309 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
310 | 
311 | 			if iter_num == epoch_length:
312 | 				loss_class_cls = np.mean(losses[:, 0])
313 | 				curr_loss =loss_class_cls
314 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
315 | 				total_cur_loss.append(curr_loss)
316 | 				iter_num = 0
317 | 				start_time = time.time()
318 | 
319 | 				#################### Val #########################################################
320 | 				iter_num_val = 0
321 | 
322 | 				while True:
323 | 						# try:
324 | 						img_data, seq_numpy, x_img = next(data_gen_val)
325 | 						# print("validation")
326 | 						Y1 = roi_helpers.calc_label(img_data, C, class_mapping)
327 | 						loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]])
328 | 						losses_val[iter_num_val,0] = loss_class
329 | 						iter_num_val += 1
330 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
331 | 						iter_num_val_tensorboard+=1
332 | 						if iter_num_val == epoch_length_val:
333 | 
334 | 							loss_class_cls = np.mean(losses_val[:, 0])
335 | 							curr_loss_val = loss_class_cls
336 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
337 | 							total_cur_loss_val.append(curr_loss_val)
338 | 							# total_cur_loss.append(curr_loss)
339 | 							iter_num_val = 0
340 | 							break
341 | 
342 | 				if curr_loss < best_loss:
343 | 					if C.verbose:
344 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
345 | 					best_loss = curr_loss
346 | 					model_all.save_weights(C.model_path)
347 | 
348 | 				break
349 | 
350 | 		except Exception as e:
351 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
352 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
353 | 			print(exc_type, fname, exc_tb.tb_lineno)
354 | 			print('Exception: {}'.format(e))
355 | 			# print(seq_numpy.shape)
356 | 			continue
357 | 		sys.stdout = old_stdout
358 | 
359 | plt.plot(total_cur_loss)
360 | plt.plot(total_cur_loss_val)
361 | plt.legend(['train loss', 'val loss'], loc='upper left')
362 | 
363 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
364 | plt.savefig(savefigure)
365 | print('Training complete, exiting.')
366 | log_file.close()
367 | 


--------------------------------------------------------------------------------
/train_frcnn_noI3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 |                                     # (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | import cv2
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | # import tensorflow as tf
 37 | # import keras.backend.tensorflow_backend as ktf
 38 | 
 39 | sys.setrecursionlimit(40000)
 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 41 | # def get_session(gpu_fraction=0.333):
 42 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 43 | #                                 allow_growth=True)
 44 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 45 | # ktf.set_session(get_session())
 46 | 
 47 | 
 48 | old_stdout = sys.stdout
 49 | 
 50 | log_file = open("message.log","w")
 51 | 
 52 | 
 53 | from i3d_inception import Inception_Inflated3d
 54 | # from i3d_inception import Inception_Inflated3d
 55 | # from tensorflow.python import keras
 56 | from keras.utils import plot_model
 57 | import os
 58 | import pdb
 59 | from keras.layers import Input
 60 | import logging
 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 62 | parser = OptionParser()
 63 | 
 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 66 | 				default="pascal_voc")
 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 69 | 
 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 73 | parser.add_option("-v", "--version",dest="version", help="Output path for weights.", default='v1')
 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 79 | 				  action="store_true", default=False)
 80 | parser.add_option("--config_filename", dest="config_filename", help=
 81 | 				"Location to store all the metadata related to the training (to be used when testing).",
 82 | 				default="config.pickle")
 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 84 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 85 | 
 86 | (options, args) = parser.parse_args()
 87 | 
 88 | if not options.train_path:   # if filename is not given
 89 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 90 | 
 91 | if options.parser == 'pascal_voc':
 92 | 	from keras_frcnn.pascal_voc_parser import get_data
 93 | elif options.parser == 'simple':
 94 | 	from keras_frcnn.simple_parser import get_data
 95 | else:
 96 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
 97 | 
 98 | # pass the settings from the command line, and persist them in the config object
 99 | C = config.Config()
100 | 
101 | C.use_horizontal_flips = bool(options.horizontal_flips)
102 | C.use_vertical_flips = bool(options.vertical_flips)
103 | C.rot_90 = bool(options.rot_90)
104 | C.dataset = options.dataset
105 | C.augment = options.aug
106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
107 | C.model_path = output_weight_path
108 | C.num_rois = int(options.num_rois)
109 | 
110 | if options.network == 'vgg':
111 | 	C.network = 'vgg'
112 | 	from keras_frcnn import vgg as nn
113 | elif options.network == 'resnet50':
114 | 	from keras_frcnn import resnet as nn
115 | 	C.network = 'resnet50'
116 | else:
117 | 	print('Not a valid model')
118 | 	raise ValueError
119 | 
120 | 
121 | # check if weight path was passed via command line
122 | if options.input_weight_path:
123 | 	C.base_net_weights = options.input_weight_path
124 | else:
125 | 	# set the path to weights based on backend and model
126 | 	C.base_net_weights = nn.get_weight_path()
127 | 
128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
129 | 
130 | job = options.job
131 | print(len(classes_count)), len(class_mapping)
132 | if job:
133 | 	sys.stdout = log_file
134 | 
135 | 
136 | # if 'bg' not in classes_count:
137 | # 	classes_count['bg'] = 0
138 | # 	class_mapping['bg'] = len(class_mapping)
139 | version = options.version
140 | C.class_mapping = class_mapping
141 | 
142 | inv_map = {v: k for k, v in class_mapping.items()}
143 | 
144 | print('Training images per class:')
145 | pprint.pprint(classes_count)
146 | print('Num classes (including bg) = {}'.format(len(classes_count)))
147 | 
148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
149 | 
150 | with open(config_output_filename, 'wb') as config_f:
151 | 	pickle.dump(C,config_f)
152 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
153 | 
154 | random.shuffle(all_imgs)
155 | 
156 | num_imgs = len(all_imgs)
157 | 
158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
160 | 
161 | print('Num train samples {}'.format(len(train_imgs)))
162 | print('Num val samples {}'.format(len(val_imgs)))
163 | 
164 | 
165 | data_gen_train = data_generators.get_i3d_feature(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
166 | data_gen_val = data_generators.get_i3d_feature(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
167 | 
168 | roi_input = Input(shape=(None, 4),name = 'roi_input')
169 | # vid_input = Input(shape =(None, None, None, 3),name = 'vid_input')
170 | img_input = Input(shape=(None, None, 3), name = 'img_input')
171 | # vid_input_shape = (64, 400,320, 3)
172 | # rgb_model = Inception_Inflated3d(
173 | # 				include_top=False,
174 | # 				weights='rgb_kinetics_only',
175 | # 				input_shape=vid_input_shape,
176 | # 				classes=classes_count)
177 | # roi_input = Input(shape=(None, 4),name = 'roi_input')
178 | shared_layers_image = nn.nn_base(img_input, trainable=True)
179 | # shared_layers_orig = rgb_model(vid_input)
180 | 
181 | 
182 | def get_action_dic():
183 | 
184 | 	action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv'
185 | 	ac_dic = {}
186 | 	f = open(action_csv,'r')
187 | 	actions = f.read().splitlines()
188 | 	for action in actions[1:]:
189 | 		tags = action.split(',')
190 | 		tags = tags[:-1]
191 | 		ac_id = int(tags[0])
192 | 		ac = ''.join(tags[1:])
193 | 		if '"' in ac:
194 | 			ac =ac.replace('"','')
195 | 		# if ',' in ac:
196 | 		# 	ac = ''.join(ac.split(','))
197 | 
198 | 		ac_dic[ac_id] = ac
199 | 		if ac_id == 1:
200 | 			print ac
201 | 	return ac_dic
202 | 
203 | ac_id = get_action_dic()
204 | # shared_layers = Lambda(slice_tensor)(shared_layers_orig)
205 | print len(class_mapping)
206 | num_classes = len(class_mapping)
207 | # if C.dataset == 'AVA':
208 | i3d_features = Input(shape=(None,None,None,832), name = 'i3d_features')
209 | if version == 'v1':
210 | 	classifier = nn.classifier_i3d_concat_new(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True)
211 | 	model_classifier = Model([img_input, roi_input, i3d_features], classifier)
212 | 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
213 | 	model_all = Model([img_input, roi_input,i3d_features], classifier)
214 | 
215 | elif version == 'roi':
216 | 	classifier = nn.classifier_i3d(i3d_features,  roi_input, 1, nb_classes=num_classes)
217 | 	model_classifier = Model([roi_input, i3d_features], classifier)
218 | 
219 | 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
220 | 	model_all = Model([i3d_features, roi_input], classifier)
221 | # elif version == 'frcnn':
222 | # 	classifier = nn.classifier(i3d_features,  roi_input, 1, nb_classes=num_classes)
223 | # 	model_classifier = Model([roi_input, i3d_features], classifier)
224 | #
225 | # 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
226 | # 	model_all = Model([i3d_features, roi_input], classifier)
227 | else:
228 | 	print "Using version 2"
229 | 	classifier = nn.classifier_i3d_concat_v2(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True)
230 | 	model_classifier = Model([img_input, roi_input, i3d_features], classifier)
231 | 	# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
232 | 	model_all = Model([img_input, roi_input,i3d_features], classifier)
233 | #
234 | # classifier = nn.classifier_i3d_concat_new(shared_layers_orig, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True, version = version)
235 | #
236 | # classifier = nn.classifier_i3d_concat_new(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True)
237 | 
238 | plot_model(model_all, to_file='model_all_i3d_cls.png', show_shapes = True)
239 | log_folder = os.path.join(options.output_weight_path,'logs/')
240 | if not os.path.isdir(log_folder):
241 | 	os.makedirs(log_folder)
242 | tensorboard = TensorBoard(log_dir=log_folder)
243 | tensorboard.set_model(model_classifier)
244 | train_names = ['train_loss', 'train_mae']
245 | def write_log(callback, names, logs, batch_no):
246 | 	for name, value in zip(names, logs):
247 | 		summary = tf.Summary()
248 | 		summary_value = summary.value.add()
249 | 		summary_value.simple_value = value
250 | 		summary_value.tag = name
251 | 		callback.writer.add_summary(summary, batch_no)
252 | 		callback.writer.flush()
253 | 
254 | optimizer = Adam(lr=1e-5)
255 | optimizer_classifier = Adam(lr=1e-5)
256 | 
257 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
258 | 
259 | model_all.compile(optimizer='sgd', loss='mae')
260 | 
261 | 
262 | epoch_length = 1000
263 | epoch_length_val =100
264 | num_epochs = int(options.num_epochs)
265 | iter_num = 0
266 | iter_num_tensorboard = 0
267 | total_cur_loss = []
268 | total_cur_loss_val = []
269 | iter_num_val_tensorboard = 0
270 | losses = np.zeros((epoch_length, 1))
271 | losses_val = np.zeros((epoch_length_val, 1))
272 | rpn_accuracy_rpn_monitor = []
273 | rpn_accuracy_for_epoch = []
274 | start_time = time.time()
275 | ###### val #####
276 | rpn_accuracy_rpn_monitor_val = []
277 | rpn_accuracy_for_epoch_val = []
278 | 
279 | ################
280 | best_loss = np.Inf
281 | 
282 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
283 | print('Starting training')
284 | # os.makedirs('check_dataset')
285 | vis = True
286 | 
287 | for epoch_num in range(num_epochs):
288 | 
289 | 	progbar = generic_utils.Progbar(epoch_length)
290 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
291 | 	num = 0
292 | 	while True:
293 | 		try:
294 | 			img_data, i3d_f, x_img = next(data_gen_train)
295 | 			X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
296 | 
297 | 			num+=1
298 | 			if version=='roi':
299 | 				loss_class = model_classifier.train_on_batch([X2[:, :, :],i3d_f], [Y1[:, :, :]])
300 | 			else:
301 | 				loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],i3d_f], [Y1[:, :, :]])
302 | 			losses[iter_num, 0] = loss_class
303 | 
304 | 			iter_num += 1
305 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
306 | 			iter_num_tensorboard+=1
307 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
308 | 
309 | 			if iter_num == epoch_length:
310 | 				loss_class_cls = np.mean(losses[:, 0])
311 | 				curr_loss =loss_class_cls
312 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
313 | 				total_cur_loss.append(curr_loss)
314 | 				iter_num = 0
315 | 				start_time = time.time()
316 | 
317 | 				#################### Val #########################################################
318 | 				iter_num_val = 0
319 | 
320 | 				while True:
321 | 						# try:
322 | 						img_data, i3d_f, x_img = next(data_gen_val)
323 | 						# print("validation")
324 | 						X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping)
325 | 						if version=='roi':
326 | 							loss_class = model_classifier.train_on_batch([X2[:, :, :],i3d_f], [Y1[:, :, :]])
327 | 						else:
328 | 							loss_class = model_classifier.train_on_batch([x_img, X2[:, :, :],i3d_f], [Y1[:, :, :]])
329 | 						losses_val[iter_num_val,0] = loss_class
330 | 						iter_num_val += 1
331 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
332 | 						iter_num_val_tensorboard+=1
333 | 						if iter_num_val == epoch_length_val:
334 | 
335 | 							loss_class_cls = np.mean(losses_val[:, 0])
336 | 							curr_loss_val = loss_class_cls
337 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
338 | 							total_cur_loss_val.append(curr_loss_val)
339 | 							# total_cur_loss.append(curr_loss)
340 | 							iter_num_val = 0
341 | 							break
342 | 
343 | 				if curr_loss < best_loss:
344 | 					if C.verbose:
345 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
346 | 					best_loss = curr_loss
347 | 					model_all.save_weights(C.model_path)
348 | 
349 | 				break
350 | 
351 | 		except Exception as e:
352 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
353 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
354 | 			print(exc_type, fname, exc_tb.tb_lineno)
355 | 			print('Exception: {}'.format(e))
356 | 			# print(seq_numpy.shape)
357 | 			continue
358 | 		sys.stdout = old_stdout
359 | 
360 | plt.plot(total_cur_loss)
361 | plt.plot(total_cur_loss_val)
362 | plt.legend(['train loss', 'val loss'], loc='upper left')
363 | 
364 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
365 | plt.savefig(savefigure)
366 | print('Training complete, exiting.')
367 | log_file.close()
368 | 


--------------------------------------------------------------------------------
/train_whole_noI3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import random
  3 | import pprint
  4 | import sys
  5 | import time
  6 | import numpy as np
  7 | from optparse import OptionParser
  8 | import pickle
  9 | import tensorflow as tf
 10 | from keras.backend.tensorflow_backend import set_session
 11 | config = tf.ConfigProto()
 12 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
 13 | config.log_device_placement = True  # to log device placement (on which device the operation ran)
 14 |                                     # (nothing gets printed in Jupyter, only if you run it standalone)
 15 | sess = tf.Session(config=config)
 16 | set_session(sess)  # set this TensorFlow session as the default session for Keras
 17 | from keras.utils import plot_model
 18 | import os
 19 | import cv2
 20 | from keras.callbacks import TensorBoard
 21 | from keras import backend as K
 22 | from keras.optimizers import Adam, SGD, RMSprop
 23 | from keras.layers import Input
 24 | from keras.models import Model
 25 | from keras_frcnn import config, data_generators
 26 | from keras_frcnn import losses as losses
 27 | import keras_frcnn.roi_helpers as roi_helpers
 28 | from keras.utils import generic_utils
 29 | from keras.layers import Lambda
 30 | from i3d_inception import Inception_Inflated3d
 31 | import collections
 32 | import sys
 33 | import matplotlib
 34 | matplotlib.use('Agg')
 35 | import matplotlib.pyplot as plt
 36 | # import tensorflow as tf
 37 | # import keras.backend.tensorflow_backend as ktf
 38 | 
 39 | sys.setrecursionlimit(40000)
 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d')
 41 | # def get_session(gpu_fraction=0.333):
 42 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction,
 43 | #                                 allow_growth=True)
 44 | #     return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 45 | # ktf.set_session(get_session())
 46 | 
 47 | 
 48 | old_stdout = sys.stdout
 49 | 
 50 | log_file = open("message.log","w")
 51 | 
 52 | 
 53 | from i3d_inception import Inception_Inflated3d
 54 | # from i3d_inception import Inception_Inflated3d
 55 | # from tensorflow.python import keras
 56 | from keras.utils import plot_model
 57 | import os
 58 | import pdb
 59 | from keras.layers import Input
 60 | import logging
 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG)
 62 | parser = OptionParser()
 63 | 
 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.")
 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc",
 66 | 				default="pascal_voc")
 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4)
 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0)
 69 | 
 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5')
 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA')
 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000)
 73 | parser.add_option("-v", "--version",dest="version", help="Output path for weights.", default='v1')
 74 | 
 75 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')
 76 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0)
 77 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False)
 78 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False)
 79 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).",
 80 | 				  action="store_true", default=False)
 81 | parser.add_option("--config_filename", dest="config_filename", help=
 82 | 				"Location to store all the metadata related to the training (to be used when testing).",
 83 | 				default="config.pickle")
 84 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.")
 85 | parser.add_option("--j", dest="job", help="If the job output should be saved")
 86 | 
 87 | (options, args) = parser.parse_args()
 88 | 
 89 | if not options.train_path:   # if filename is not given
 90 | 	parser.error('Error: path to training data must be specified. Pass --path to command line')
 91 | 
 92 | if options.parser == 'pascal_voc':
 93 | 	from keras_frcnn.pascal_voc_parser import get_data
 94 | elif options.parser == 'simple':
 95 | 	from keras_frcnn.simple_parser import get_data
 96 | else:
 97 | 	raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'")
 98 | 
 99 | # pass the settings from the command line, and persist them in the config object
100 | C = config.Config()
101 | version = options.version
102 | C.use_horizontal_flips = bool(options.horizontal_flips)
103 | C.use_vertical_flips = bool(options.vertical_flips)
104 | C.rot_90 = bool(options.rot_90)
105 | C.dataset = options.dataset
106 | C.augment = options.aug
107 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5')
108 | C.model_path = output_weight_path
109 | C.num_rois = int(options.num_rois)
110 | 
111 | if options.network == 'vgg':
112 | 	C.network = 'vgg'
113 | 	from keras_frcnn import vgg as nn
114 | elif options.network == 'resnet50':
115 | 	from keras_frcnn import resnet as nn
116 | 	C.network = 'resnet50'
117 | else:
118 | 	print('Not a valid model')
119 | 	raise ValueError
120 | 
121 | 
122 | # check if weight path was passed via command line
123 | if options.input_weight_path:
124 | 	C.base_net_weights = options.input_weight_path
125 | else:
126 | 	# set the path to weights based on backend and model
127 | 	C.base_net_weights = nn.get_weight_path()
128 | 
129 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx)
130 | 
131 | job = options.job
132 | print(len(classes_count)), len(class_mapping)
133 | if job:
134 | 	sys.stdout = log_file
135 | 
136 | 
137 | # if 'bg' not in classes_count:
138 | # 	classes_count['bg'] = 0
139 | # 	class_mapping['bg'] = len(class_mapping)
140 | 
141 | C.class_mapping = class_mapping
142 | 
143 | inv_map = {v: k for k, v in class_mapping.items()}
144 | 
145 | print('Training images per class:')
146 | pprint.pprint(classes_count)
147 | print('Num classes (including bg) = {}'.format(len(classes_count)))
148 | 
149 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle'
150 | 
151 | with open(config_output_filename, 'wb') as config_f:
152 | 	pickle.dump(C,config_f)
153 | 	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))
154 | 
155 | random.shuffle(all_imgs)
156 | 
157 | num_imgs = len(all_imgs)
158 | 
159 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
160 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test']
161 | 
162 | print('Num train samples {}'.format(len(train_imgs)))
163 | print('Num val samples {}'.format(len(val_imgs)))
164 | 
165 | 
166 | data_gen_train = data_generators.get_i3d_feature(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train')
167 | data_gen_val = data_generators.get_i3d_feature(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val')
168 | 
169 | 
170 | def get_action_dic():
171 | 
172 | 	action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv'
173 | 	ac_dic = {}
174 | 	f = open(action_csv,'r')
175 | 	actions = f.read().splitlines()
176 | 	for action in actions[1:]:
177 | 		tags = action.split(',')
178 | 		tags = tags[:-1]
179 | 		ac_id = int(tags[0])
180 | 		ac = ''.join(tags[1:])
181 | 		if '"' in ac:
182 | 			ac =ac.replace('"','')
183 | 		# if ',' in ac:
184 | 		# 	ac = ''.join(ac.split(','))
185 | 
186 | 		ac_dic[ac_id] = ac
187 | 		if ac_id == 1:
188 | 			print ac
189 | 	return ac_dic
190 | 
191 | ac_id = get_action_dic()
192 | print len(class_mapping)
193 | num_classes = len(class_mapping)
194 | # if C.dataset == 'AVA':
195 | shared_layers_orig = Input(shape=(None,None,None,832), name = 'shared_layers_orig')
196 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True, version=version)
197 | 
198 | model_classifier = Model([shared_layers_orig], classifier)
199 | 
200 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
201 | model_all = Model([shared_layers_orig], classifier)
202 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True)
203 | log_folder = os.path.join(options.output_weight_path,'logs/')
204 | if not os.path.isdir(log_folder):
205 | 	os.makedirs(log_folder)
206 | tensorboard = TensorBoard(log_dir=log_folder)
207 | tensorboard.set_model(model_classifier)
208 | train_names = ['train_loss', 'train_mae']
209 | def write_log(callback, names, logs, batch_no):
210 | 	for name, value in zip(names, logs):
211 | 		summary = tf.Summary()
212 | 		summary_value = summary.value.add()
213 | 		summary_value.simple_value = value
214 | 		summary_value.tag = name
215 | 		callback.writer.add_summary(summary, batch_no)
216 | 		callback.writer.flush()
217 | 
218 | optimizer = Adam(lr=1e-5)
219 | optimizer_classifier = Adam(lr=1e-5)
220 | 
221 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label])
222 | 
223 | model_all.compile(optimizer='sgd', loss='mae')
224 | 
225 | epoch_length = 1000
226 | epoch_length_val =100
227 | num_epochs = int(options.num_epochs)
228 | iter_num = 0
229 | iter_num_tensorboard = 0
230 | total_cur_loss = []
231 | total_cur_loss_val = []
232 | iter_num_val_tensorboard = 0
233 | losses = np.zeros((epoch_length, 1))
234 | losses_val = np.zeros((epoch_length_val, 1))
235 | rpn_accuracy_rpn_monitor = []
236 | rpn_accuracy_for_epoch = []
237 | start_time = time.time()
238 | ###### val #####
239 | rpn_accuracy_rpn_monitor_val = []
240 | rpn_accuracy_for_epoch_val = []
241 | 
242 | ################
243 | best_loss = np.Inf
244 | 
245 | class_mapping_inv = {v: k for k, v in class_mapping.items()}
246 | print('Starting training')
247 | # os.makedirs('check_dataset')
248 | vis = True
249 | 
250 | for epoch_num in range(num_epochs):
251 | 
252 | 	progbar = generic_utils.Progbar(epoch_length)
253 | 	print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
254 | 	num = 0
255 | 	while True:
256 | 		try:
257 | 			img_data, i3d_f, x_img = next(data_gen_train)
258 | 			Y1 = roi_helpers.calc_label(img_data, C, class_mapping)
259 | 			# print X2, Y1
260 | 			# x1= (X2[0][0][0])
261 | 			# y1 = (X2[0][0][1])
262 | 			# x2 = (x1 + X2[0][0][2])
263 | 			# y2 = (y1 +X2[0][0][3])
264 | 			# x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16
265 | 			# # print x1, y1, x2, y2
266 | 			# # if x1>320 or x2>320 or y1>400 or y2>400:
267 | 			# # 	print "yes"
268 | 			# im_temp =cv2.imread(img_data['filepath'])
269 | 			# im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC)
270 | 			# # print im_temp.shape
271 | 			# cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3)
272 | 			# font = cv2.FONT_HERSHEY_SIMPLEX
273 | 			# cl = [i for i, e in enumerate(Y1[0][0]) if e == 1]
274 | 			# print cl
275 | 			# ind = cl[0]
276 | 			# ac =  ac_id[int(class_mapping_inv[ind])]
277 | 			#
278 | 			# # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA)
279 | 
280 | 
281 | 			# cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp)
282 | 
283 | 			num+=1
284 | 
285 |             # img_features =
286 | 
287 | 			loss_class = model_classifier.train_on_batch([i3d_f], [Y1[:, :, :]])
288 | 			losses[iter_num, 0] = loss_class
289 | 
290 | 			iter_num += 1
291 | 			write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard)
292 | 			iter_num_tensorboard+=1
293 | 			progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))])
294 | 
295 | 			if iter_num == epoch_length:
296 | 				loss_class_cls = np.mean(losses[:, 0])
297 | 				curr_loss =loss_class_cls
298 | 				write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard)
299 | 				total_cur_loss.append(curr_loss)
300 | 				iter_num = 0
301 | 				start_time = time.time()
302 | 
303 | 				#################### Val #########################################################
304 | 				iter_num_val = 0
305 | 
306 | 				while True:
307 | 						# try:
308 | 						img_data, seq_numpy, x_img = next(data_gen_val)
309 | 						# print("validation")
310 | 						Y1 = roi_helpers.calc_label(img_data, C, class_mapping)
311 | 						loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]])
312 | 						losses_val[iter_num_val,0] = loss_class
313 | 						iter_num_val += 1
314 | 						write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard)
315 | 						iter_num_val_tensorboard+=1
316 | 						if iter_num_val == epoch_length_val:
317 | 
318 | 							loss_class_cls = np.mean(losses_val[:, 0])
319 | 							curr_loss_val = loss_class_cls
320 | 							write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard)
321 | 							total_cur_loss_val.append(curr_loss_val)
322 | 							# total_cur_loss.append(curr_loss)
323 | 							iter_num_val = 0
324 | 							break
325 | 
326 | 				if curr_loss < best_loss:
327 | 					if C.verbose:
328 | 						print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
329 | 					best_loss = curr_loss
330 | 					model_all.save_weights(C.model_path)
331 | 
332 | 				break
333 | 
334 | 		except Exception as e:
335 | 			exc_type, exc_obj, exc_tb = sys.exc_info()
336 | 			fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
337 | 			print(exc_type, fname, exc_tb.tb_lineno)
338 | 			print('Exception: {}'.format(e))
339 | 			# print(seq_numpy.shape)
340 | 			continue
341 | 		sys.stdout = old_stdout
342 | 
343 | plt.plot(total_cur_loss)
344 | plt.plot(total_cur_loss_val)
345 | plt.legend(['train loss', 'val loss'], loc='upper left')
346 | 
347 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg'))
348 | plt.savefig(savefigure)
349 | print('Training complete, exiting.')
350 | log_file.close()
351 | 


--------------------------------------------------------------------------------