├── RoiPoolingConv_i3d2.py ├── check_i3d.py ├── evaluate_i3d_classifier.py ├── evaluate_sample.py ├── extract_i3d_features.py ├── i3d_inception.py ├── preprocess_script.py ├── test_ava_concat.py ├── test_final_i3d.py ├── test_final_i3d_bk.py ├── test_final_noI3d.py ├── test_final_noI3d_vis.py ├── test_frcnn_AVA.py ├── test_frcnn_ava_bk.py ├── test_i3d_whole.py ├── test_with_vis.py ├── train_frcnn_i3d_batch.py ├── train_frcnn_i3d_cls.py ├── train_frcnn_i3d_concat.py ├── train_frcnn_i3d_v2.py ├── train_frcnn_i3d_whole.py ├── train_frcnn_noI3d.py └── train_whole_noI3d.py /RoiPoolingConv_i3d2.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | if K.backend() == 'tensorflow': 5 | import tensorflow as tf 6 | 7 | class RoiPoolingConv(Layer): 8 | '''ROI pooling layer for 2D inputs. 9 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 10 | K. He, X. Zhang, S. Ren, J. Sun 11 | # Arguments 12 | pool_size: int 13 | Size of pooling region to use. pool_size = 7 will result in a 7x7 region. 14 | num_rois: number of regions of interest to be used 15 | # Input shape 16 | list of two 4D tensors [X_img,X_roi] with shape: 17 | X_img: 18 | `(1, channels, rows, cols)` if dim_ordering='th' 19 | or 4D tensor with shape: 20 | `(1, rows, cols, channels)` if dim_ordering='tf'. 21 | X_roi: 22 | `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) 23 | # Output shape 24 | 3D tensor with shape: 25 | `(1, num_rois, channels, pool_size, pool_size)` 26 | ''' 27 | def __init__(self, pool_size, num_rois, **kwargs): 28 | 29 | self.dim_ordering = K.image_dim_ordering() 30 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 31 | 32 | self.pool_size = pool_size 33 | self.num_rois = num_rois 34 | 35 | super(RoiPoolingConv, self).__init__(**kwargs) 36 | 37 | def build(self, input_shape): 38 | if self.dim_ordering == 'th': 39 | self.nb_channels = input_shape[0][1] 40 | elif self.dim_ordering == 'tf': 41 | self.nb_channels = input_shape[0][3] 42 | 43 | def compute_output_shape(self, input_shape): 44 | if self.dim_ordering == 'th': 45 | return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size 46 | else: 47 | return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels 48 | 49 | def call(self, x, mask=None): 50 | 51 | assert(len(x) == 2) 52 | 53 | img = x[0] 54 | rois = x[1] 55 | 56 | input_shape = K.shape(img) 57 | print(img.shape.as_list()) 58 | outputs = [] 59 | 60 | for roi_idx in range(self.num_rois): 61 | 62 | x = rois[0, roi_idx, 0] 63 | y = rois[0, roi_idx, 1] 64 | w = rois[0, roi_idx, 2] 65 | h = rois[0, roi_idx, 3] 66 | # print(x,y,w,h) 67 | row_length = w / float(self.pool_size) 68 | col_length = h / float(self.pool_size) 69 | 70 | num_pool_regions = self.pool_size 71 | 72 | #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op 73 | # in theano. The theano implementation is much less efficient and leads to long compile times 74 | 75 | if self.dim_ordering == 'th': 76 | for jy in range(num_pool_regions): 77 | for ix in range(num_pool_regions): 78 | x1 = x + ix * row_length 79 | x2 = x1 + row_length 80 | y1 = y + jy * col_length 81 | y2 = y1 + col_length 82 | 83 | x1 = K.cast(x1, 'int32') 84 | x2 = K.cast(x2, 'int32') 85 | y1 = K.cast(y1, 'int32') 86 | y2 = K.cast(y2, 'int32') 87 | 88 | x2 = x1 + K.maximum(1,x2-x1) 89 | y2 = y1 + K.maximum(1,y2-y1) 90 | 91 | new_shape = [input_shape[0], input_shape[1],input_shape[2], 92 | y2 - y1, x2 - x1] 93 | 94 | x_crop = img[:,:, :, y1:y2, x1:x2] 95 | xm = K.reshape(x_crop, new_shape) 96 | pooled_val = K.max(xm, axis=(3,4)) 97 | outputs.append(pooled_val) 98 | 99 | elif self.dim_ordering == 'tf': 100 | x = K.cast(x, 'int32') 101 | y = K.cast(y, 'int32') 102 | w = K.cast(w, 'int32') 103 | h = K.cast(h, 'int32') 104 | 105 | rs = tf.image.resize_images(img[:,:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) 106 | outputs.append(rs) 107 | 108 | final_output = K.concatenate(outputs, axis=0) 109 | print final_output.shape.as_list() 110 | print self.nb_channels 111 | final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) 112 | 113 | if self.dim_ordering == 'th': 114 | final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) 115 | else: 116 | final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) 117 | 118 | print("final shape",final_output.shape.as_list()) 119 | return final_output 120 | 121 | 122 | def get_config(self): 123 | config = {'pool_size': self.pool_size, 124 | 'num_rois': self.num_rois} 125 | base_config = super(RoiPoolingConv, self).get_config() 126 | return dict(list(base_config.items()) + list(config.items())) 127 | -------------------------------------------------------------------------------- /evaluate_i3d_classifier.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750' 3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d' 4 | ''' 5 | 6 | import numpy as np 7 | import argparse 8 | 9 | from check_i3d import Inception_Inflated3d 10 | # from i3d_inception import Inception_Inflated3d 11 | from tensorflow.python import keras 12 | from keras.utils import plot_model 13 | import os 14 | import pdb 15 | 16 | 17 | 18 | 19 | 20 | NUM_FRAMES = 79 21 | FRAME_HEIGHT = 224 22 | FRAME_WIDTH = 224 23 | NUM_RGB_CHANNELS = 3 24 | NUM_FLOW_CHANNELS = 2 25 | 26 | NUM_CLASSES = 400 27 | 28 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy' 29 | 30 | SAMPLE_DATA_PATH = { 31 | 'rgb' :'data/v_CricketShot_g04_c01_rgb.npy', 32 | 'flow' : 'data/v_CricketShot_g04_c01_flow.npy' 33 | } 34 | 35 | LABEL_MAP_PATH = 'data/label_map.txt' 36 | 37 | def main(args): 38 | # load the kinetics classes 39 | kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] 40 | 41 | 42 | if args.eval_type in ['rgb', 'joint']: 43 | if args.no_imagenet_pretrained: 44 | # build model for RGB data 45 | # and load pretrained weights (trained on kinetics dataset only) 46 | rgb_model = Inception_Inflated3d( 47 | include_top=False, 48 | weights='rgb_kinetics_only', 49 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 50 | classes=NUM_CLASSES) 51 | else: 52 | # build model for RGB data 53 | # and load pretrained weights (trained on imagenet and kinetics dataset) 54 | rgb_model = Inception_Inflated3d( 55 | include_top=False, 56 | weights='rgb_imagenet_and_kinetics', 57 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 58 | classes=NUM_CLASSES) 59 | # pdb.set_trace() 60 | # print rgb_model.summary() 61 | plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True) 62 | # print rgb_model.summary() 63 | 64 | 65 | # load RGB sample (just one example) 66 | ''' 67 | rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) 68 | # # 69 | # # # make prediction 70 | rgb_features = rgb_model.predict(rgb_sample) 71 | # rgb_features 72 | # print rgb_features.shape.as_list() 73 | # 74 | # print rgb_logits.shape 75 | features = rgb_features[:,11,:,:,:] 76 | features = np.array(features) 77 | print features.shape 78 | ''' 79 | ''' 80 | if args.eval_type in ['flow', 'joint']: 81 | if args.no_imagenet_pretrained: 82 | # build model for optical flow data 83 | # and load pretrained weights (trained on kinetics dataset only) 84 | flow_model = Inception_Inflated3d( 85 | include_top=True, 86 | weights='flow_kinetics_only', 87 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 88 | classes=NUM_CLASSES) 89 | else: 90 | # build model for optical flow data 91 | # and load pretrained weights (trained on imagenet and kinetics dataset) 92 | flow_model = Inception_Inflated3d( 93 | include_top=True, 94 | weights='flow_imagenet_and_kinetics', 95 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 96 | classes=NUM_CLASSES) 97 | 98 | 99 | # load flow sample (just one example) 100 | flow_sample = np.load(SAMPLE_DATA_PATH['flow']) 101 | 102 | # make prediction 103 | flow_logits = flow_model.predict(flow_sample) 104 | 105 | 106 | # produce final model logits 107 | if args.eval_type == 'rgb': 108 | sample_logits = rgb_logits 109 | elif args.eval_type == 'flow': 110 | sample_logits = flow_logits 111 | else: # joint 112 | sample_logits = rgb_logits + flow_logits 113 | 114 | # produce softmax output from model logit for class probabilities 115 | sample_logits = sample_logits[0] # we are dealing with just one example 116 | sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) 117 | 118 | sorted_indices = np.argsort(sample_predictions)[::-1] 119 | 120 | print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) 121 | print('\nTop classes and probabilities') 122 | for index in sorted_indices[:20]: 123 | print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) 124 | 125 | 126 | return 127 | 128 | ''' 129 | if __name__ == '__main__': 130 | # parse arguments 131 | parser = argparse.ArgumentParser() 132 | parser.add_argument('--eval-type', 133 | help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).', 134 | type=str, choices=['rgb', 'flow', 'joint'], default='joint') 135 | 136 | parser.add_argument('--no-imagenet-pretrained', 137 | help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.', 138 | action='store_true') 139 | 140 | 141 | args = parser.parse_args() 142 | main(args) 143 | -------------------------------------------------------------------------------- /evaluate_sample.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750' 3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d' 4 | ''' 5 | 6 | import numpy as np 7 | import argparse 8 | 9 | from i3d_inception import Inception_Inflated3d 10 | # from i3d_inception import Inception_Inflated3d 11 | from tensorflow.python import keras 12 | from keras.utils import plot_model 13 | import os 14 | import pdb 15 | from keras.layers import Input 16 | 17 | 18 | 19 | 20 | 21 | 22 | NUM_FRAMES = 79 23 | FRAME_HEIGHT = 224 24 | FRAME_WIDTH = 224 25 | NUM_RGB_CHANNELS = 3 26 | NUM_FLOW_CHANNELS = 2 27 | 28 | NUM_CLASSES = 400 29 | 30 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy' 31 | 32 | SAMPLE_DATA_PATH = { 33 | 'rgb' :'data/v_CricketShot_g04_c01_rgb.npy', 34 | 'flow' : 'data/v_CricketShot_g04_c01_flow.npy' 35 | } 36 | 37 | LABEL_MAP_PATH = 'data/label_map.txt' 38 | 39 | def main(args): 40 | # load the kinetics classes 41 | kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] 42 | 43 | 44 | if args.eval_type in ['rgb', 'joint']: 45 | if args.no_imagenet_pretrained: 46 | # build model for RGB data 47 | # and load pretrained weights (trained on kinetics dataset only) 48 | rgb_model = Inception_Inflated3d( 49 | include_top=False, 50 | weights='rgb_kinetics_only', 51 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 52 | classes=NUM_CLASSES) 53 | else: 54 | # build model for RGB data 55 | # and load pretrained weights (trained on imagenet and kinetics dataset) 56 | rgb_model = Inception_Inflated3d( 57 | include_top=False, 58 | weights='rgb_imagenet_and_kinetics', 59 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 60 | classes=NUM_CLASSES) 61 | # pdb.set_trace() 62 | # print rgb_model.summary() 63 | # plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True) 64 | # print rgb_model.summary() 65 | 66 | 67 | # load RGB sample (just one example) 68 | vid_input = Input(shape =(79, 224,224, 3)) 69 | features = rgb_model(vid_input) 70 | print features.shape.as_list() 71 | feature_shape = features.shape.as_list() 72 | mid_slice_no = (feature_shape[1]/2)+1 73 | print mid_slice_no 74 | rgb_features = features[:,mid_slice_no,:,:,:] 75 | 76 | print rgb_features.shape.as_list() 77 | ''' 78 | rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) 79 | # # 80 | # # # make prediction 81 | rgb_features = rgb_model.predict(rgb_sample) 82 | # rgb_features 83 | # print rgb_features.shape.as_list() 84 | # 85 | # print rgb_logits.shape 86 | features = rgb_features[:,11,:,:,:] 87 | features = np.array(features) 88 | print features.shape 89 | ''' 90 | ''' 91 | if args.eval_type in ['flow', 'joint']: 92 | if args.no_imagenet_pretrained: 93 | # build model for optical flow data 94 | # and load pretrained weights (trained on kinetics dataset only) 95 | flow_model = Inception_Inflated3d( 96 | include_top=True, 97 | weights='flow_kinetics_only', 98 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 99 | classes=NUM_CLASSES) 100 | else: 101 | # build model for optical flow data 102 | # and load pretrained weights (trained on imagenet and kinetics dataset) 103 | flow_model = Inception_Inflated3d( 104 | include_top=True, 105 | weights='flow_imagenet_and_kinetics', 106 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 107 | classes=NUM_CLASSES) 108 | 109 | 110 | # load flow sample (just one example) 111 | flow_sample = np.load(SAMPLE_DATA_PATH['flow']) 112 | 113 | # make prediction 114 | flow_logits = flow_model.predict(flow_sample) 115 | 116 | 117 | # produce final model logits 118 | if args.eval_type == 'rgb': 119 | sample_logits = rgb_logits 120 | elif args.eval_type == 'flow': 121 | sample_logits = flow_logits 122 | else: # joint 123 | sample_logits = rgb_logits + flow_logits 124 | 125 | # produce softmax output from model logit for class probabilities 126 | sample_logits = sample_logits[0] # we are dealing with just one example 127 | sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) 128 | 129 | sorted_indices = np.argsort(sample_predictions)[::-1] 130 | 131 | print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) 132 | print('\nTop classes and probabilities') 133 | for index in sorted_indices[:20]: 134 | print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) 135 | 136 | 137 | return 138 | 139 | ''' 140 | if __name__ == '__main__': 141 | # parse arguments 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument('--eval-type', 144 | help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).', 145 | type=str, choices=['rgb', 'flow', 'joint'], default='joint') 146 | 147 | parser.add_argument('--no-imagenet-pretrained', 148 | help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.', 149 | action='store_true') 150 | 151 | 152 | args = parser.parse_args() 153 | main(args) 154 | -------------------------------------------------------------------------------- /extract_i3d_features.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | from tqdm import tqdm 37 | import cv2 38 | from tqdm import tqdm 39 | # import tensorflow as tf 40 | # import keras.backend.tensorflow_backend as ktf 41 | 42 | sys.setrecursionlimit(40000) 43 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 44 | # def get_session(gpu_fraction=0.333): 45 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 46 | # allow_growth=True) 47 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 48 | # ktf.set_session(get_session()) 49 | 50 | 51 | old_stdout = sys.stdout 52 | 53 | log_file = open("message.log","w") 54 | 55 | 56 | from i3d_inception import Inception_Inflated3d 57 | # from i3d_inception import Inception_Inflated3d 58 | # from tensorflow.python import keras 59 | from keras.utils import plot_model 60 | import os 61 | import pdb 62 | from keras.layers import Input 63 | import logging 64 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 65 | parser = OptionParser() 66 | 67 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 68 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 69 | default="pascal_voc") 70 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 71 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 72 | 73 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 74 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 75 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 76 | 77 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 78 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 79 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 80 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 81 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 82 | action="store_true", default=False) 83 | parser.add_option("--config_filename", dest="config_filename", help= 84 | "Location to store all the metadata related to the training (to be used when testing).", 85 | default="config.pickle") 86 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 87 | parser.add_option("--j", dest="job", help="If the job output should be saved") 88 | 89 | (options, args) = parser.parse_args() 90 | 91 | if not options.train_path: # if filename is not given 92 | parser.error('Error: path to training data must be specified. Pass --path to command line') 93 | 94 | if options.parser == 'pascal_voc': 95 | from keras_frcnn.pascal_voc_parser import get_data 96 | elif options.parser == 'simple': 97 | from keras_frcnn.simple_parser import get_data 98 | else: 99 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 100 | 101 | # pass the settings from the command line, and persist them in the config object 102 | C = config.Config() 103 | 104 | C.use_horizontal_flips = bool(options.horizontal_flips) 105 | C.use_vertical_flips = bool(options.vertical_flips) 106 | C.rot_90 = bool(options.rot_90) 107 | C.dataset = options.dataset 108 | C.augment = options.aug 109 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 110 | C.model_path = output_weight_path 111 | C.num_rois = int(options.num_rois) 112 | 113 | if options.network == 'vgg': 114 | C.network = 'vgg' 115 | from keras_frcnn import vgg as nn 116 | elif options.network == 'resnet50': 117 | from keras_frcnn import resnet as nn 118 | C.network = 'resnet50' 119 | else: 120 | print('Not a valid model') 121 | raise ValueError 122 | 123 | 124 | # check if weight path was passed via command line 125 | if options.input_weight_path: 126 | C.base_net_weights = options.input_weight_path 127 | else: 128 | # set the path to weights based on backend and model 129 | C.base_net_weights = nn.get_weight_path() 130 | 131 | # all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 132 | 133 | # job = options.job 134 | # print(len(classes_count)), len(class_mapping) 135 | # if job: 136 | # sys.stdout = log_file 137 | # 138 | # 139 | # # if 'bg' not in classes_count: 140 | # # classes_count['bg'] = 0 141 | # # class_mapping['bg'] = len(class_mapping) 142 | # 143 | # C.class_mapping = class_mapping 144 | # 145 | # inv_map = {v: k for k, v in class_mapping.items()} 146 | # 147 | # print('Training images per class:') 148 | # pprint.pprint(classes_count) 149 | # print('Num classes (including bg) = {}'.format(len(classes_count))) 150 | # 151 | # config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 152 | # 153 | # with open(config_output_filename, 'wb') as config_f: 154 | # pickle.dump(C,config_f) 155 | # print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 156 | # 157 | # random.shuffle(all_imgs) 158 | # 159 | # num_imgs = len(all_imgs) 160 | # 161 | # train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 162 | # val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 163 | # 164 | # imgs = train_imgs+val_imgs 165 | # 166 | # print('Num train samples {}'.format(len(train_imgs))) 167 | # print('Num val samples {}'.format(len(val_imgs))) 168 | 169 | 170 | # data_gen_train = data_generators.get_anchor_gt(imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 171 | # data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 172 | 173 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 174 | vid_input_shape = (64, 400,320, 3) 175 | rgb_model = Inception_Inflated3d( 176 | include_top=False, 177 | weights='rgb_kinetics_only', 178 | input_shape=vid_input_shape, 179 | classes=8) 180 | roi_input = Input(shape=(None, 4),name = 'roi_input') 181 | shared_layers_orig = rgb_model(vid_input) 182 | 183 | 184 | # epoch_length = len(imgs) 185 | # epoch_length_val =100 186 | # num_epochs = int(options.num_epochs) 187 | # iter_num = 0 188 | # iter_num_tensorboard = 0 189 | # total_cur_loss = [] 190 | # total_cur_loss_val = [] 191 | # iter_num_val_tensorboard = 0 192 | # losses = np.zeros((epoch_length, 1)) 193 | # losses_val = np.zeros((epoch_length_val, 1)) 194 | # rpn_accuracy_rpn_monitor = [] 195 | # rpn_accuracy_for_epoch = [] 196 | # start_time = time.time() 197 | # ###### val ##### 198 | # rpn_accuracy_rpn_monitor_val = [] 199 | # rpn_accuracy_for_epoch_val = [] 200 | 201 | ################ 202 | # best_loss = np.Inf 203 | # 204 | # class_mapping_inv = {v: k for k, v in class_mapping.items()} 205 | # print('Starting training') 206 | # 207 | # vis = True 208 | 209 | 210 | def extract_numpy_single_frame(img): 211 | 212 | img = (img/255.)*2 - 1 213 | return img 214 | 215 | def get_frame_idx(img_path): 216 | winSize = 64 217 | tags = img_path.split(os.path.sep) 218 | vid_folder = '/'+'/'.join(tags[1:-1]) 219 | frames = os.listdir(vid_folder) 220 | if 'CAD' in img_path: 221 | frames = [f for f in frames if f.startswith('RGB')] 222 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 223 | else: 224 | frames.sort(key = lambda x: int(x.split('.')[0])) 225 | frame_index = frames.index(tags[-1]) 226 | # print frame_index 227 | fi = get_frames_index(frames,frame_index,winSize) 228 | seq =[frames[k] if k!=-1 else k for k in fi] 229 | # print(seq[0],seq[31]) 230 | # print seq 231 | return seq 232 | 233 | def get_frames_index(total_list,frame_idx,winSize): 234 | nb = (winSize/2)-1 235 | na = (winSize/2)+1 236 | final_frame_idx = len(total_list)-1 237 | sf = final_frame_idx-(winSize/2)+1 238 | ef = len(total_list) 239 | seq = [] 240 | if frame_idx < (winSize/2)-1: 241 | zp_frames = nb - frame_idx 242 | seq = [-1 for number in range(int(zp_frames))] 243 | seq.extend(range(0,int(frame_idx)+1)) 244 | seq.extend(range(int(frame_idx)+1, int(frame_idx)+int(na))) 245 | if len(seq)!=winSize: 246 | print "No" 247 | elif frame_idx >= (winSize/2)-1 and frame_idx < sf: 248 | start_index = frame_idx-(winSize/2)+1 249 | end_index = (winSize/2) + frame_idx 250 | # print frame_idx 251 | seq = range(int(start_index),int(end_index+1)) 252 | 253 | elif frame_idx >=sf and frame_idx 320 or x2>320 or y1>400 or y2>400: 284 | # # print "yes" 285 | # im_temp =cv2.imread(img_data['filepath']) 286 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 287 | # # print im_temp.shape 288 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 289 | # font = cv2.FONT_HERSHEY_SIMPLEX 290 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 291 | # print cl 292 | # ind = cl[0] 293 | # ac = ac_id[int(class_mapping_inv[ind])] 294 | # 295 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 296 | 297 | 298 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 299 | 300 | num+=1 301 | 302 | # img_features = 303 | 304 | loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 305 | losses[iter_num, 0] = loss_class 306 | 307 | iter_num += 1 308 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 309 | iter_num_tensorboard+=1 310 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 311 | 312 | if iter_num == epoch_length: 313 | loss_class_cls = np.mean(losses[:, 0]) 314 | curr_loss =loss_class_cls 315 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 316 | total_cur_loss.append(curr_loss) 317 | iter_num = 0 318 | start_time = time.time() 319 | 320 | #################### Val ######################################################### 321 | iter_num_val = 0 322 | 323 | while True: 324 | # try: 325 | img_data, seq_numpy, x_img = next(data_gen_val) 326 | # print("validation") 327 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 328 | loss_class = model_classifier.train_on_batch([x_img, X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 329 | losses_val[iter_num_val,0] = loss_class 330 | iter_num_val += 1 331 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 332 | iter_num_val_tensorboard+=1 333 | if iter_num_val == epoch_length_val: 334 | 335 | loss_class_cls = np.mean(losses_val[:, 0]) 336 | curr_loss_val = loss_class_cls 337 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 338 | total_cur_loss_val.append(curr_loss_val) 339 | # total_cur_loss.append(curr_loss) 340 | iter_num_val = 0 341 | break 342 | 343 | if curr_loss < best_loss: 344 | if C.verbose: 345 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 346 | best_loss = curr_loss 347 | model_all.save_weights(C.model_path) 348 | 349 | break 350 | 351 | except Exception as e: 352 | exc_type, exc_obj, exc_tb = sys.exc_info() 353 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 354 | print(exc_type, fname, exc_tb.tb_lineno) 355 | print('Exception: {}'.format(e)) 356 | # print(seq_numpy.shape) 357 | continue 358 | sys.stdout = old_stdout 359 | 360 | plt.plot(total_cur_loss) 361 | plt.plot(total_cur_loss_val) 362 | plt.legend(['train loss', 'val loss'], loc='upper left') 363 | 364 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 365 | plt.savefig(savefigure) 366 | print('Training complete, exiting.') 367 | log_file.close() 368 | -------------------------------------------------------------------------------- /train_frcnn_i3d_whole.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 79 | action="store_true", default=False) 80 | parser.add_option("--config_filename", dest="config_filename", help= 81 | "Location to store all the metadata related to the training (to be used when testing).", 82 | default="config.pickle") 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 84 | parser.add_option("--j", dest="job", help="If the job output should be saved") 85 | 86 | (options, args) = parser.parse_args() 87 | 88 | if not options.train_path: # if filename is not given 89 | parser.error('Error: path to training data must be specified. Pass --path to command line') 90 | 91 | if options.parser == 'pascal_voc': 92 | from keras_frcnn.pascal_voc_parser import get_data 93 | elif options.parser == 'simple': 94 | from keras_frcnn.simple_parser import get_data 95 | else: 96 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 97 | 98 | # pass the settings from the command line, and persist them in the config object 99 | C = config.Config() 100 | 101 | C.use_horizontal_flips = bool(options.horizontal_flips) 102 | C.use_vertical_flips = bool(options.vertical_flips) 103 | C.rot_90 = bool(options.rot_90) 104 | C.dataset = options.dataset 105 | C.augment = options.aug 106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 107 | C.model_path = output_weight_path 108 | C.num_rois = int(options.num_rois) 109 | 110 | if options.network == 'vgg': 111 | C.network = 'vgg' 112 | from keras_frcnn import vgg as nn 113 | elif options.network == 'resnet50': 114 | from keras_frcnn import resnet as nn 115 | C.network = 'resnet50' 116 | else: 117 | print('Not a valid model') 118 | raise ValueError 119 | 120 | 121 | # check if weight path was passed via command line 122 | if options.input_weight_path: 123 | C.base_net_weights = options.input_weight_path 124 | else: 125 | # set the path to weights based on backend and model 126 | C.base_net_weights = nn.get_weight_path() 127 | 128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 129 | 130 | job = options.job 131 | print(len(classes_count)), len(class_mapping) 132 | if job: 133 | sys.stdout = log_file 134 | 135 | 136 | # if 'bg' not in classes_count: 137 | # classes_count['bg'] = 0 138 | # class_mapping['bg'] = len(class_mapping) 139 | 140 | C.class_mapping = class_mapping 141 | 142 | inv_map = {v: k for k, v in class_mapping.items()} 143 | 144 | print('Training images per class:') 145 | pprint.pprint(classes_count) 146 | print('Num classes (including bg) = {}'.format(len(classes_count))) 147 | 148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 149 | 150 | with open(config_output_filename, 'wb') as config_f: 151 | pickle.dump(C,config_f) 152 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 153 | 154 | random.shuffle(all_imgs) 155 | 156 | num_imgs = len(all_imgs) 157 | 158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 160 | 161 | print('Num train samples {}'.format(len(train_imgs))) 162 | print('Num val samples {}'.format(len(val_imgs))) 163 | 164 | 165 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 166 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 167 | 168 | roi_input = Input(shape=(None, 4),name = 'roi_input') 169 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 170 | img_input = Input(shape=(None, None, 3), name = 'img_input') 171 | vid_input_shape = (64, 400,320, 3) 172 | rgb_model = Inception_Inflated3d( 173 | include_top=False, 174 | weights='rgb_kinetics_only', 175 | input_shape=vid_input_shape, 176 | classes=classes_count) 177 | roi_input = Input(shape=(None, 4),name = 'roi_input') 178 | shared_layers_image = nn.nn_base(img_input, trainable=True) 179 | shared_layers_orig = rgb_model(vid_input) 180 | def slice_tensor(shared_layers): 181 | 182 | feature_shape = shared_layers.shape.as_list() 183 | shared_layers = shared_layers[:,8,:,:,:] 184 | return shared_layers 185 | 186 | def get_action_dic(): 187 | 188 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 189 | ac_dic = {} 190 | f = open(action_csv,'r') 191 | actions = f.read().splitlines() 192 | for action in actions[1:]: 193 | tags = action.split(',') 194 | tags = tags[:-1] 195 | ac_id = int(tags[0]) 196 | ac = ''.join(tags[1:]) 197 | if '"' in ac: 198 | ac =ac.replace('"','') 199 | # if ',' in ac: 200 | # ac = ''.join(ac.split(',')) 201 | 202 | ac_dic[ac_id] = ac 203 | if ac_id == 1: 204 | print ac 205 | return ac_dic 206 | 207 | ac_id = get_action_dic() 208 | shared_layers = Lambda(slice_tensor)(shared_layers_orig) 209 | print len(class_mapping) 210 | num_classes = len(class_mapping) 211 | # if C.dataset == 'AVA': 212 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True) 213 | 214 | model_classifier = Model([vid_input], classifier) 215 | 216 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 217 | model_all = Model([vid_input], classifier) 218 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True) 219 | log_folder = os.path.join(options.output_weight_path,'logs/') 220 | if not os.path.isdir(log_folder): 221 | os.makedirs(log_folder) 222 | tensorboard = TensorBoard(log_dir=log_folder) 223 | tensorboard.set_model(model_classifier) 224 | train_names = ['train_loss', 'train_mae'] 225 | def write_log(callback, names, logs, batch_no): 226 | for name, value in zip(names, logs): 227 | summary = tf.Summary() 228 | summary_value = summary.value.add() 229 | summary_value.simple_value = value 230 | summary_value.tag = name 231 | callback.writer.add_summary(summary, batch_no) 232 | callback.writer.flush() 233 | 234 | optimizer = Adam(lr=1e-5) 235 | optimizer_classifier = Adam(lr=1e-5) 236 | 237 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 238 | 239 | model_all.compile(optimizer='sgd', loss='mae') 240 | 241 | epoch_length = 1000 242 | epoch_length_val =100 243 | num_epochs = int(options.num_epochs) 244 | iter_num = 0 245 | iter_num_tensorboard = 0 246 | total_cur_loss = [] 247 | total_cur_loss_val = [] 248 | iter_num_val_tensorboard = 0 249 | losses = np.zeros((epoch_length, 1)) 250 | losses_val = np.zeros((epoch_length_val, 1)) 251 | rpn_accuracy_rpn_monitor = [] 252 | rpn_accuracy_for_epoch = [] 253 | start_time = time.time() 254 | ###### val ##### 255 | rpn_accuracy_rpn_monitor_val = [] 256 | rpn_accuracy_for_epoch_val = [] 257 | 258 | ################ 259 | best_loss = np.Inf 260 | 261 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 262 | print('Starting training') 263 | # os.makedirs('check_dataset') 264 | vis = True 265 | 266 | for epoch_num in range(num_epochs): 267 | 268 | progbar = generic_utils.Progbar(epoch_length) 269 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 270 | num = 0 271 | while True: 272 | try: 273 | img_data, seq_numpy, x_img = next(data_gen_train) 274 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 275 | # print X2, Y1 276 | # x1= (X2[0][0][0]) 277 | # y1 = (X2[0][0][1]) 278 | # x2 = (x1 + X2[0][0][2]) 279 | # y2 = (y1 +X2[0][0][3]) 280 | # x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16 281 | # # print x1, y1, x2, y2 282 | # # if x1>320 or x2>320 or y1>400 or y2>400: 283 | # # print "yes" 284 | # im_temp =cv2.imread(img_data['filepath']) 285 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 286 | # # print im_temp.shape 287 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 288 | # font = cv2.FONT_HERSHEY_SIMPLEX 289 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 290 | # print cl 291 | # ind = cl[0] 292 | # ac = ac_id[int(class_mapping_inv[ind])] 293 | # 294 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 295 | 296 | 297 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 298 | 299 | num+=1 300 | 301 | # img_features = 302 | 303 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 304 | losses[iter_num, 0] = loss_class 305 | 306 | iter_num += 1 307 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 308 | iter_num_tensorboard+=1 309 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 310 | 311 | if iter_num == epoch_length: 312 | loss_class_cls = np.mean(losses[:, 0]) 313 | curr_loss =loss_class_cls 314 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 315 | total_cur_loss.append(curr_loss) 316 | iter_num = 0 317 | start_time = time.time() 318 | 319 | #################### Val ######################################################### 320 | iter_num_val = 0 321 | 322 | while True: 323 | # try: 324 | img_data, seq_numpy, x_img = next(data_gen_val) 325 | # print("validation") 326 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 327 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 328 | losses_val[iter_num_val,0] = loss_class 329 | iter_num_val += 1 330 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 331 | iter_num_val_tensorboard+=1 332 | if iter_num_val == epoch_length_val: 333 | 334 | loss_class_cls = np.mean(losses_val[:, 0]) 335 | curr_loss_val = loss_class_cls 336 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 337 | total_cur_loss_val.append(curr_loss_val) 338 | # total_cur_loss.append(curr_loss) 339 | iter_num_val = 0 340 | break 341 | 342 | if curr_loss < best_loss: 343 | if C.verbose: 344 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 345 | best_loss = curr_loss 346 | model_all.save_weights(C.model_path) 347 | 348 | break 349 | 350 | except Exception as e: 351 | exc_type, exc_obj, exc_tb = sys.exc_info() 352 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 353 | print(exc_type, fname, exc_tb.tb_lineno) 354 | print('Exception: {}'.format(e)) 355 | # print(seq_numpy.shape) 356 | continue 357 | sys.stdout = old_stdout 358 | 359 | plt.plot(total_cur_loss) 360 | plt.plot(total_cur_loss_val) 361 | plt.legend(['train loss', 'val loss'], loc='upper left') 362 | 363 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 364 | plt.savefig(savefigure) 365 | print('Training complete, exiting.') 366 | log_file.close() 367 | -------------------------------------------------------------------------------- /train_frcnn_noI3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | parser.add_option("-v", "--version",dest="version", help="Output path for weights.", default='v1') 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 79 | action="store_true", default=False) 80 | parser.add_option("--config_filename", dest="config_filename", help= 81 | "Location to store all the metadata related to the training (to be used when testing).", 82 | default="config.pickle") 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 84 | parser.add_option("--j", dest="job", help="If the job output should be saved") 85 | 86 | (options, args) = parser.parse_args() 87 | 88 | if not options.train_path: # if filename is not given 89 | parser.error('Error: path to training data must be specified. Pass --path to command line') 90 | 91 | if options.parser == 'pascal_voc': 92 | from keras_frcnn.pascal_voc_parser import get_data 93 | elif options.parser == 'simple': 94 | from keras_frcnn.simple_parser import get_data 95 | else: 96 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 97 | 98 | # pass the settings from the command line, and persist them in the config object 99 | C = config.Config() 100 | 101 | C.use_horizontal_flips = bool(options.horizontal_flips) 102 | C.use_vertical_flips = bool(options.vertical_flips) 103 | C.rot_90 = bool(options.rot_90) 104 | C.dataset = options.dataset 105 | C.augment = options.aug 106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 107 | C.model_path = output_weight_path 108 | C.num_rois = int(options.num_rois) 109 | 110 | if options.network == 'vgg': 111 | C.network = 'vgg' 112 | from keras_frcnn import vgg as nn 113 | elif options.network == 'resnet50': 114 | from keras_frcnn import resnet as nn 115 | C.network = 'resnet50' 116 | else: 117 | print('Not a valid model') 118 | raise ValueError 119 | 120 | 121 | # check if weight path was passed via command line 122 | if options.input_weight_path: 123 | C.base_net_weights = options.input_weight_path 124 | else: 125 | # set the path to weights based on backend and model 126 | C.base_net_weights = nn.get_weight_path() 127 | 128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 129 | 130 | job = options.job 131 | print(len(classes_count)), len(class_mapping) 132 | if job: 133 | sys.stdout = log_file 134 | 135 | 136 | # if 'bg' not in classes_count: 137 | # classes_count['bg'] = 0 138 | # class_mapping['bg'] = len(class_mapping) 139 | version = options.version 140 | C.class_mapping = class_mapping 141 | 142 | inv_map = {v: k for k, v in class_mapping.items()} 143 | 144 | print('Training images per class:') 145 | pprint.pprint(classes_count) 146 | print('Num classes (including bg) = {}'.format(len(classes_count))) 147 | 148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 149 | 150 | with open(config_output_filename, 'wb') as config_f: 151 | pickle.dump(C,config_f) 152 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 153 | 154 | random.shuffle(all_imgs) 155 | 156 | num_imgs = len(all_imgs) 157 | 158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 160 | 161 | print('Num train samples {}'.format(len(train_imgs))) 162 | print('Num val samples {}'.format(len(val_imgs))) 163 | 164 | 165 | data_gen_train = data_generators.get_i3d_feature(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 166 | data_gen_val = data_generators.get_i3d_feature(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 167 | 168 | roi_input = Input(shape=(None, 4),name = 'roi_input') 169 | # vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 170 | img_input = Input(shape=(None, None, 3), name = 'img_input') 171 | # vid_input_shape = (64, 400,320, 3) 172 | # rgb_model = Inception_Inflated3d( 173 | # include_top=False, 174 | # weights='rgb_kinetics_only', 175 | # input_shape=vid_input_shape, 176 | # classes=classes_count) 177 | # roi_input = Input(shape=(None, 4),name = 'roi_input') 178 | shared_layers_image = nn.nn_base(img_input, trainable=True) 179 | # shared_layers_orig = rgb_model(vid_input) 180 | 181 | 182 | def get_action_dic(): 183 | 184 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 185 | ac_dic = {} 186 | f = open(action_csv,'r') 187 | actions = f.read().splitlines() 188 | for action in actions[1:]: 189 | tags = action.split(',') 190 | tags = tags[:-1] 191 | ac_id = int(tags[0]) 192 | ac = ''.join(tags[1:]) 193 | if '"' in ac: 194 | ac =ac.replace('"','') 195 | # if ',' in ac: 196 | # ac = ''.join(ac.split(',')) 197 | 198 | ac_dic[ac_id] = ac 199 | if ac_id == 1: 200 | print ac 201 | return ac_dic 202 | 203 | ac_id = get_action_dic() 204 | # shared_layers = Lambda(slice_tensor)(shared_layers_orig) 205 | print len(class_mapping) 206 | num_classes = len(class_mapping) 207 | # if C.dataset == 'AVA': 208 | i3d_features = Input(shape=(None,None,None,832), name = 'i3d_features') 209 | if version == 'v1': 210 | classifier = nn.classifier_i3d_concat_new(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True) 211 | model_classifier = Model([img_input, roi_input, i3d_features], classifier) 212 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 213 | model_all = Model([img_input, roi_input,i3d_features], classifier) 214 | 215 | elif version == 'roi': 216 | classifier = nn.classifier_i3d(i3d_features, roi_input, 1, nb_classes=num_classes) 217 | model_classifier = Model([roi_input, i3d_features], classifier) 218 | 219 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 220 | model_all = Model([i3d_features, roi_input], classifier) 221 | # elif version == 'frcnn': 222 | # classifier = nn.classifier(i3d_features, roi_input, 1, nb_classes=num_classes) 223 | # model_classifier = Model([roi_input, i3d_features], classifier) 224 | # 225 | # # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 226 | # model_all = Model([i3d_features, roi_input], classifier) 227 | else: 228 | print "Using version 2" 229 | classifier = nn.classifier_i3d_concat_v2(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True) 230 | model_classifier = Model([img_input, roi_input, i3d_features], classifier) 231 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 232 | model_all = Model([img_input, roi_input,i3d_features], classifier) 233 | # 234 | # classifier = nn.classifier_i3d_concat_new(shared_layers_orig, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True, version = version) 235 | # 236 | # classifier = nn.classifier_i3d_concat_new(i3d_features, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True) 237 | 238 | plot_model(model_all, to_file='model_all_i3d_cls.png', show_shapes = True) 239 | log_folder = os.path.join(options.output_weight_path,'logs/') 240 | if not os.path.isdir(log_folder): 241 | os.makedirs(log_folder) 242 | tensorboard = TensorBoard(log_dir=log_folder) 243 | tensorboard.set_model(model_classifier) 244 | train_names = ['train_loss', 'train_mae'] 245 | def write_log(callback, names, logs, batch_no): 246 | for name, value in zip(names, logs): 247 | summary = tf.Summary() 248 | summary_value = summary.value.add() 249 | summary_value.simple_value = value 250 | summary_value.tag = name 251 | callback.writer.add_summary(summary, batch_no) 252 | callback.writer.flush() 253 | 254 | optimizer = Adam(lr=1e-5) 255 | optimizer_classifier = Adam(lr=1e-5) 256 | 257 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 258 | 259 | model_all.compile(optimizer='sgd', loss='mae') 260 | 261 | 262 | epoch_length = 1000 263 | epoch_length_val =100 264 | num_epochs = int(options.num_epochs) 265 | iter_num = 0 266 | iter_num_tensorboard = 0 267 | total_cur_loss = [] 268 | total_cur_loss_val = [] 269 | iter_num_val_tensorboard = 0 270 | losses = np.zeros((epoch_length, 1)) 271 | losses_val = np.zeros((epoch_length_val, 1)) 272 | rpn_accuracy_rpn_monitor = [] 273 | rpn_accuracy_for_epoch = [] 274 | start_time = time.time() 275 | ###### val ##### 276 | rpn_accuracy_rpn_monitor_val = [] 277 | rpn_accuracy_for_epoch_val = [] 278 | 279 | ################ 280 | best_loss = np.Inf 281 | 282 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 283 | print('Starting training') 284 | # os.makedirs('check_dataset') 285 | vis = True 286 | 287 | for epoch_num in range(num_epochs): 288 | 289 | progbar = generic_utils.Progbar(epoch_length) 290 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 291 | num = 0 292 | while True: 293 | try: 294 | img_data, i3d_f, x_img = next(data_gen_train) 295 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 296 | 297 | num+=1 298 | if version=='roi': 299 | loss_class = model_classifier.train_on_batch([X2[:, :, :],i3d_f], [Y1[:, :, :]]) 300 | else: 301 | loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],i3d_f], [Y1[:, :, :]]) 302 | losses[iter_num, 0] = loss_class 303 | 304 | iter_num += 1 305 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 306 | iter_num_tensorboard+=1 307 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 308 | 309 | if iter_num == epoch_length: 310 | loss_class_cls = np.mean(losses[:, 0]) 311 | curr_loss =loss_class_cls 312 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 313 | total_cur_loss.append(curr_loss) 314 | iter_num = 0 315 | start_time = time.time() 316 | 317 | #################### Val ######################################################### 318 | iter_num_val = 0 319 | 320 | while True: 321 | # try: 322 | img_data, i3d_f, x_img = next(data_gen_val) 323 | # print("validation") 324 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 325 | if version=='roi': 326 | loss_class = model_classifier.train_on_batch([X2[:, :, :],i3d_f], [Y1[:, :, :]]) 327 | else: 328 | loss_class = model_classifier.train_on_batch([x_img, X2[:, :, :],i3d_f], [Y1[:, :, :]]) 329 | losses_val[iter_num_val,0] = loss_class 330 | iter_num_val += 1 331 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 332 | iter_num_val_tensorboard+=1 333 | if iter_num_val == epoch_length_val: 334 | 335 | loss_class_cls = np.mean(losses_val[:, 0]) 336 | curr_loss_val = loss_class_cls 337 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 338 | total_cur_loss_val.append(curr_loss_val) 339 | # total_cur_loss.append(curr_loss) 340 | iter_num_val = 0 341 | break 342 | 343 | if curr_loss < best_loss: 344 | if C.verbose: 345 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 346 | best_loss = curr_loss 347 | model_all.save_weights(C.model_path) 348 | 349 | break 350 | 351 | except Exception as e: 352 | exc_type, exc_obj, exc_tb = sys.exc_info() 353 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 354 | print(exc_type, fname, exc_tb.tb_lineno) 355 | print('Exception: {}'.format(e)) 356 | # print(seq_numpy.shape) 357 | continue 358 | sys.stdout = old_stdout 359 | 360 | plt.plot(total_cur_loss) 361 | plt.plot(total_cur_loss_val) 362 | plt.legend(['train loss', 'val loss'], loc='upper left') 363 | 364 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 365 | plt.savefig(savefigure) 366 | print('Training complete, exiting.') 367 | log_file.close() 368 | -------------------------------------------------------------------------------- /train_whole_noI3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | parser.add_option("-v", "--version",dest="version", help="Output path for weights.", default='v1') 74 | 75 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 76 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 77 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 79 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 80 | action="store_true", default=False) 81 | parser.add_option("--config_filename", dest="config_filename", help= 82 | "Location to store all the metadata related to the training (to be used when testing).", 83 | default="config.pickle") 84 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 85 | parser.add_option("--j", dest="job", help="If the job output should be saved") 86 | 87 | (options, args) = parser.parse_args() 88 | 89 | if not options.train_path: # if filename is not given 90 | parser.error('Error: path to training data must be specified. Pass --path to command line') 91 | 92 | if options.parser == 'pascal_voc': 93 | from keras_frcnn.pascal_voc_parser import get_data 94 | elif options.parser == 'simple': 95 | from keras_frcnn.simple_parser import get_data 96 | else: 97 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 98 | 99 | # pass the settings from the command line, and persist them in the config object 100 | C = config.Config() 101 | version = options.version 102 | C.use_horizontal_flips = bool(options.horizontal_flips) 103 | C.use_vertical_flips = bool(options.vertical_flips) 104 | C.rot_90 = bool(options.rot_90) 105 | C.dataset = options.dataset 106 | C.augment = options.aug 107 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 108 | C.model_path = output_weight_path 109 | C.num_rois = int(options.num_rois) 110 | 111 | if options.network == 'vgg': 112 | C.network = 'vgg' 113 | from keras_frcnn import vgg as nn 114 | elif options.network == 'resnet50': 115 | from keras_frcnn import resnet as nn 116 | C.network = 'resnet50' 117 | else: 118 | print('Not a valid model') 119 | raise ValueError 120 | 121 | 122 | # check if weight path was passed via command line 123 | if options.input_weight_path: 124 | C.base_net_weights = options.input_weight_path 125 | else: 126 | # set the path to weights based on backend and model 127 | C.base_net_weights = nn.get_weight_path() 128 | 129 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 130 | 131 | job = options.job 132 | print(len(classes_count)), len(class_mapping) 133 | if job: 134 | sys.stdout = log_file 135 | 136 | 137 | # if 'bg' not in classes_count: 138 | # classes_count['bg'] = 0 139 | # class_mapping['bg'] = len(class_mapping) 140 | 141 | C.class_mapping = class_mapping 142 | 143 | inv_map = {v: k for k, v in class_mapping.items()} 144 | 145 | print('Training images per class:') 146 | pprint.pprint(classes_count) 147 | print('Num classes (including bg) = {}'.format(len(classes_count))) 148 | 149 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 150 | 151 | with open(config_output_filename, 'wb') as config_f: 152 | pickle.dump(C,config_f) 153 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 154 | 155 | random.shuffle(all_imgs) 156 | 157 | num_imgs = len(all_imgs) 158 | 159 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 160 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 161 | 162 | print('Num train samples {}'.format(len(train_imgs))) 163 | print('Num val samples {}'.format(len(val_imgs))) 164 | 165 | 166 | data_gen_train = data_generators.get_i3d_feature(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 167 | data_gen_val = data_generators.get_i3d_feature(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 168 | 169 | 170 | def get_action_dic(): 171 | 172 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 173 | ac_dic = {} 174 | f = open(action_csv,'r') 175 | actions = f.read().splitlines() 176 | for action in actions[1:]: 177 | tags = action.split(',') 178 | tags = tags[:-1] 179 | ac_id = int(tags[0]) 180 | ac = ''.join(tags[1:]) 181 | if '"' in ac: 182 | ac =ac.replace('"','') 183 | # if ',' in ac: 184 | # ac = ''.join(ac.split(',')) 185 | 186 | ac_dic[ac_id] = ac 187 | if ac_id == 1: 188 | print ac 189 | return ac_dic 190 | 191 | ac_id = get_action_dic() 192 | print len(class_mapping) 193 | num_classes = len(class_mapping) 194 | # if C.dataset == 'AVA': 195 | shared_layers_orig = Input(shape=(None,None,None,832), name = 'shared_layers_orig') 196 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True, version=version) 197 | 198 | model_classifier = Model([shared_layers_orig], classifier) 199 | 200 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 201 | model_all = Model([shared_layers_orig], classifier) 202 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True) 203 | log_folder = os.path.join(options.output_weight_path,'logs/') 204 | if not os.path.isdir(log_folder): 205 | os.makedirs(log_folder) 206 | tensorboard = TensorBoard(log_dir=log_folder) 207 | tensorboard.set_model(model_classifier) 208 | train_names = ['train_loss', 'train_mae'] 209 | def write_log(callback, names, logs, batch_no): 210 | for name, value in zip(names, logs): 211 | summary = tf.Summary() 212 | summary_value = summary.value.add() 213 | summary_value.simple_value = value 214 | summary_value.tag = name 215 | callback.writer.add_summary(summary, batch_no) 216 | callback.writer.flush() 217 | 218 | optimizer = Adam(lr=1e-5) 219 | optimizer_classifier = Adam(lr=1e-5) 220 | 221 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 222 | 223 | model_all.compile(optimizer='sgd', loss='mae') 224 | 225 | epoch_length = 1000 226 | epoch_length_val =100 227 | num_epochs = int(options.num_epochs) 228 | iter_num = 0 229 | iter_num_tensorboard = 0 230 | total_cur_loss = [] 231 | total_cur_loss_val = [] 232 | iter_num_val_tensorboard = 0 233 | losses = np.zeros((epoch_length, 1)) 234 | losses_val = np.zeros((epoch_length_val, 1)) 235 | rpn_accuracy_rpn_monitor = [] 236 | rpn_accuracy_for_epoch = [] 237 | start_time = time.time() 238 | ###### val ##### 239 | rpn_accuracy_rpn_monitor_val = [] 240 | rpn_accuracy_for_epoch_val = [] 241 | 242 | ################ 243 | best_loss = np.Inf 244 | 245 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 246 | print('Starting training') 247 | # os.makedirs('check_dataset') 248 | vis = True 249 | 250 | for epoch_num in range(num_epochs): 251 | 252 | progbar = generic_utils.Progbar(epoch_length) 253 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 254 | num = 0 255 | while True: 256 | try: 257 | img_data, i3d_f, x_img = next(data_gen_train) 258 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 259 | # print X2, Y1 260 | # x1= (X2[0][0][0]) 261 | # y1 = (X2[0][0][1]) 262 | # x2 = (x1 + X2[0][0][2]) 263 | # y2 = (y1 +X2[0][0][3]) 264 | # x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16 265 | # # print x1, y1, x2, y2 266 | # # if x1>320 or x2>320 or y1>400 or y2>400: 267 | # # print "yes" 268 | # im_temp =cv2.imread(img_data['filepath']) 269 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 270 | # # print im_temp.shape 271 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 272 | # font = cv2.FONT_HERSHEY_SIMPLEX 273 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 274 | # print cl 275 | # ind = cl[0] 276 | # ac = ac_id[int(class_mapping_inv[ind])] 277 | # 278 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 279 | 280 | 281 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 282 | 283 | num+=1 284 | 285 | # img_features = 286 | 287 | loss_class = model_classifier.train_on_batch([i3d_f], [Y1[:, :, :]]) 288 | losses[iter_num, 0] = loss_class 289 | 290 | iter_num += 1 291 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 292 | iter_num_tensorboard+=1 293 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 294 | 295 | if iter_num == epoch_length: 296 | loss_class_cls = np.mean(losses[:, 0]) 297 | curr_loss =loss_class_cls 298 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 299 | total_cur_loss.append(curr_loss) 300 | iter_num = 0 301 | start_time = time.time() 302 | 303 | #################### Val ######################################################### 304 | iter_num_val = 0 305 | 306 | while True: 307 | # try: 308 | img_data, seq_numpy, x_img = next(data_gen_val) 309 | # print("validation") 310 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 311 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 312 | losses_val[iter_num_val,0] = loss_class 313 | iter_num_val += 1 314 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 315 | iter_num_val_tensorboard+=1 316 | if iter_num_val == epoch_length_val: 317 | 318 | loss_class_cls = np.mean(losses_val[:, 0]) 319 | curr_loss_val = loss_class_cls 320 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 321 | total_cur_loss_val.append(curr_loss_val) 322 | # total_cur_loss.append(curr_loss) 323 | iter_num_val = 0 324 | break 325 | 326 | if curr_loss < best_loss: 327 | if C.verbose: 328 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 329 | best_loss = curr_loss 330 | model_all.save_weights(C.model_path) 331 | 332 | break 333 | 334 | except Exception as e: 335 | exc_type, exc_obj, exc_tb = sys.exc_info() 336 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 337 | print(exc_type, fname, exc_tb.tb_lineno) 338 | print('Exception: {}'.format(e)) 339 | # print(seq_numpy.shape) 340 | continue 341 | sys.stdout = old_stdout 342 | 343 | plt.plot(total_cur_loss) 344 | plt.plot(total_cur_loss_val) 345 | plt.legend(['train loss', 'val loss'], loc='upper left') 346 | 347 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 348 | plt.savefig(savefigure) 349 | print('Training complete, exiting.') 350 | log_file.close() 351 | --------------------------------------------------------------------------------