├── test_final_noI3d_vis.py ├── preprocess_script.py ├── RoiPoolingConv_i3d2.py ├── evaluate_i3d_classifier.py ├── evaluate_sample.py ├── test_final_i3d_bk.py ├── test_i3d_whole.py ├── test_frcnn_ava_bk.py ├── test_ava_concat.py ├── test_final_i3d.py ├── test_frcnn_AVA.py ├── train_frcnn_i3d_cls.py ├── test_final_noI3d.py ├── train_frcnn_i3d_batch.py ├── train_whole_noI3d.py ├── train_frcnn_i3d_whole.py ├── train_frcnn_i3d_v2.py ├── test_with_vis.py ├── extract_i3d_features.py └── train_frcnn_noI3d.py /test_final_noI3d_vis.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /preprocess_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | 5 | # f = open(action_csv,'r') 6 | # # train_lines = f.read().splitlines() 7 | # df = pd.read_csv(csv_file) 8 | # df.columns = ['1','2','3','4','5','6','7','8'] 9 | # 10 | # ac_id_dic = get_action_dic() 11 | # # print ac_idf 12 | # actions = f.read().splitlines() 13 | # action_list = set() 14 | # for action in actions[1:]: 15 | # tags = action.split(',') 16 | # tags = tags[:-1] 17 | # ac_id = int(tags[0]) 18 | # ac = ''.join(tags[1:]) 19 | # ac = ac.replace('"','') 20 | # action_list.add(ac) 21 | # 22 | # print action_list 23 | # final_df = pd.DataFrame() 24 | # ac_vid = {} 25 | # vids_list = [] 26 | # for ac in tqdm(action_list): 27 | # # print ac 28 | # id = ac_id_dic[ac] 29 | # vids_df = df.loc[df['7'] == id] 30 | # vid_name = vids_df.iloc[1,0] 31 | # vids_list.append(vid_name) 32 | # vid_df = df.loc[df['1']== vid_name] 33 | # 34 | # final_df = final_df.append(vid_df,ignore_index=True) 35 | # df = df.drop(df[df['1']==vids_df.iloc[1,0]].index) 36 | # 37 | # final_df['2'] = final_df['2'].apply(lambda x: str(x).zfill(4)) 38 | # 39 | # final_df.to_csv(csv_file_subset, header = False, index = False, float_format='%.3f') 40 | 41 | 42 | 43 | 44 | 45 | def form_multi_data(): 46 | dataset = open('/home/subha/hoi_vid/keras-kinetics-i3d/data/ava/ava_data_subset_new.txt','r') 47 | f = open('/home/subha/hoi_vid/keras-kinetics-i3d/data/ava/ava_data_subset_multi.txt','w+') 48 | # df = pd.read_csv(dataset) 49 | # df.columns = ['1','2','3','4','5','6'] 50 | lines = dataset.read().splitlines() 51 | while lines: 52 | # print len(lines) 53 | 54 | ann = lines[0] 55 | tags =ann.split(',') 56 | bbx = ','.join(tags[:-1]) 57 | anns = [l for l in lines if bbx in l] 58 | # print anns 59 | [lines.remove(l) for l in anns] 60 | # [f.write(l+'\n') for l in anns] 61 | for a in anns: 62 | ac = a.split(',')[-1] 63 | bbx = bbx+','+ac 64 | 65 | f.write(bbx+'\n') 66 | # print bbx 67 | 68 | form_multi_data() 69 | -------------------------------------------------------------------------------- /RoiPoolingConv_i3d2.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | if K.backend() == 'tensorflow': 5 | import tensorflow as tf 6 | 7 | class RoiPoolingConv(Layer): 8 | '''ROI pooling layer for 2D inputs. 9 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 10 | K. He, X. Zhang, S. Ren, J. Sun 11 | # Arguments 12 | pool_size: int 13 | Size of pooling region to use. pool_size = 7 will result in a 7x7 region. 14 | num_rois: number of regions of interest to be used 15 | # Input shape 16 | list of two 4D tensors [X_img,X_roi] with shape: 17 | X_img: 18 | `(1, channels, rows, cols)` if dim_ordering='th' 19 | or 4D tensor with shape: 20 | `(1, rows, cols, channels)` if dim_ordering='tf'. 21 | X_roi: 22 | `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) 23 | # Output shape 24 | 3D tensor with shape: 25 | `(1, num_rois, channels, pool_size, pool_size)` 26 | ''' 27 | def __init__(self, pool_size, num_rois, **kwargs): 28 | 29 | self.dim_ordering = K.image_dim_ordering() 30 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 31 | 32 | self.pool_size = pool_size 33 | self.num_rois = num_rois 34 | 35 | super(RoiPoolingConv, self).__init__(**kwargs) 36 | 37 | def build(self, input_shape): 38 | if self.dim_ordering == 'th': 39 | self.nb_channels = input_shape[0][1] 40 | elif self.dim_ordering == 'tf': 41 | self.nb_channels = input_shape[0][3] 42 | 43 | def compute_output_shape(self, input_shape): 44 | if self.dim_ordering == 'th': 45 | return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size 46 | else: 47 | return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels 48 | 49 | def call(self, x, mask=None): 50 | 51 | assert(len(x) == 2) 52 | 53 | img = x[0] 54 | rois = x[1] 55 | 56 | input_shape = K.shape(img) 57 | print(img.shape.as_list()) 58 | outputs = [] 59 | 60 | for roi_idx in range(self.num_rois): 61 | 62 | x = rois[0, roi_idx, 0] 63 | y = rois[0, roi_idx, 1] 64 | w = rois[0, roi_idx, 2] 65 | h = rois[0, roi_idx, 3] 66 | # print(x,y,w,h) 67 | row_length = w / float(self.pool_size) 68 | col_length = h / float(self.pool_size) 69 | 70 | num_pool_regions = self.pool_size 71 | 72 | #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op 73 | # in theano. The theano implementation is much less efficient and leads to long compile times 74 | 75 | if self.dim_ordering == 'th': 76 | for jy in range(num_pool_regions): 77 | for ix in range(num_pool_regions): 78 | x1 = x + ix * row_length 79 | x2 = x1 + row_length 80 | y1 = y + jy * col_length 81 | y2 = y1 + col_length 82 | 83 | x1 = K.cast(x1, 'int32') 84 | x2 = K.cast(x2, 'int32') 85 | y1 = K.cast(y1, 'int32') 86 | y2 = K.cast(y2, 'int32') 87 | 88 | x2 = x1 + K.maximum(1,x2-x1) 89 | y2 = y1 + K.maximum(1,y2-y1) 90 | 91 | new_shape = [input_shape[0], input_shape[1],input_shape[2], 92 | y2 - y1, x2 - x1] 93 | 94 | x_crop = img[:,:, :, y1:y2, x1:x2] 95 | xm = K.reshape(x_crop, new_shape) 96 | pooled_val = K.max(xm, axis=(3,4)) 97 | outputs.append(pooled_val) 98 | 99 | elif self.dim_ordering == 'tf': 100 | x = K.cast(x, 'int32') 101 | y = K.cast(y, 'int32') 102 | w = K.cast(w, 'int32') 103 | h = K.cast(h, 'int32') 104 | 105 | rs = tf.image.resize_images(img[:,:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) 106 | outputs.append(rs) 107 | 108 | final_output = K.concatenate(outputs, axis=0) 109 | print final_output.shape.as_list() 110 | print self.nb_channels 111 | final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) 112 | 113 | if self.dim_ordering == 'th': 114 | final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) 115 | else: 116 | final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) 117 | 118 | print("final shape",final_output.shape.as_list()) 119 | return final_output 120 | 121 | 122 | def get_config(self): 123 | config = {'pool_size': self.pool_size, 124 | 'num_rois': self.num_rois} 125 | base_config = super(RoiPoolingConv, self).get_config() 126 | return dict(list(base_config.items()) + list(config.items())) 127 | -------------------------------------------------------------------------------- /evaluate_i3d_classifier.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750' 3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d' 4 | ''' 5 | 6 | import numpy as np 7 | import argparse 8 | 9 | from check_i3d import Inception_Inflated3d 10 | # from i3d_inception import Inception_Inflated3d 11 | from tensorflow.python import keras 12 | from keras.utils import plot_model 13 | import os 14 | import pdb 15 | 16 | 17 | 18 | 19 | 20 | NUM_FRAMES = 79 21 | FRAME_HEIGHT = 224 22 | FRAME_WIDTH = 224 23 | NUM_RGB_CHANNELS = 3 24 | NUM_FLOW_CHANNELS = 2 25 | 26 | NUM_CLASSES = 400 27 | 28 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy' 29 | 30 | SAMPLE_DATA_PATH = { 31 | 'rgb' :'data/v_CricketShot_g04_c01_rgb.npy', 32 | 'flow' : 'data/v_CricketShot_g04_c01_flow.npy' 33 | } 34 | 35 | LABEL_MAP_PATH = 'data/label_map.txt' 36 | 37 | def main(args): 38 | # load the kinetics classes 39 | kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] 40 | 41 | 42 | if args.eval_type in ['rgb', 'joint']: 43 | if args.no_imagenet_pretrained: 44 | # build model for RGB data 45 | # and load pretrained weights (trained on kinetics dataset only) 46 | rgb_model = Inception_Inflated3d( 47 | include_top=False, 48 | weights='rgb_kinetics_only', 49 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 50 | classes=NUM_CLASSES) 51 | else: 52 | # build model for RGB data 53 | # and load pretrained weights (trained on imagenet and kinetics dataset) 54 | rgb_model = Inception_Inflated3d( 55 | include_top=False, 56 | weights='rgb_imagenet_and_kinetics', 57 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 58 | classes=NUM_CLASSES) 59 | # pdb.set_trace() 60 | # print rgb_model.summary() 61 | plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True) 62 | # print rgb_model.summary() 63 | 64 | 65 | # load RGB sample (just one example) 66 | ''' 67 | rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) 68 | # # 69 | # # # make prediction 70 | rgb_features = rgb_model.predict(rgb_sample) 71 | # rgb_features 72 | # print rgb_features.shape.as_list() 73 | # 74 | # print rgb_logits.shape 75 | features = rgb_features[:,11,:,:,:] 76 | features = np.array(features) 77 | print features.shape 78 | ''' 79 | ''' 80 | if args.eval_type in ['flow', 'joint']: 81 | if args.no_imagenet_pretrained: 82 | # build model for optical flow data 83 | # and load pretrained weights (trained on kinetics dataset only) 84 | flow_model = Inception_Inflated3d( 85 | include_top=True, 86 | weights='flow_kinetics_only', 87 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 88 | classes=NUM_CLASSES) 89 | else: 90 | # build model for optical flow data 91 | # and load pretrained weights (trained on imagenet and kinetics dataset) 92 | flow_model = Inception_Inflated3d( 93 | include_top=True, 94 | weights='flow_imagenet_and_kinetics', 95 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 96 | classes=NUM_CLASSES) 97 | 98 | 99 | # load flow sample (just one example) 100 | flow_sample = np.load(SAMPLE_DATA_PATH['flow']) 101 | 102 | # make prediction 103 | flow_logits = flow_model.predict(flow_sample) 104 | 105 | 106 | # produce final model logits 107 | if args.eval_type == 'rgb': 108 | sample_logits = rgb_logits 109 | elif args.eval_type == 'flow': 110 | sample_logits = flow_logits 111 | else: # joint 112 | sample_logits = rgb_logits + flow_logits 113 | 114 | # produce softmax output from model logit for class probabilities 115 | sample_logits = sample_logits[0] # we are dealing with just one example 116 | sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) 117 | 118 | sorted_indices = np.argsort(sample_predictions)[::-1] 119 | 120 | print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) 121 | print('\nTop classes and probabilities') 122 | for index in sorted_indices[:20]: 123 | print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) 124 | 125 | 126 | return 127 | 128 | ''' 129 | if __name__ == '__main__': 130 | # parse arguments 131 | parser = argparse.ArgumentParser() 132 | parser.add_argument('--eval-type', 133 | help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).', 134 | type=str, choices=['rgb', 'flow', 'joint'], default='joint') 135 | 136 | parser.add_argument('--no-imagenet-pretrained', 137 | help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.', 138 | action='store_true') 139 | 140 | 141 | args = parser.parse_args() 142 | main(args) 143 | -------------------------------------------------------------------------------- /evaluate_sample.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Loads pretrained model of I3d Inception architecture for the paper: 'https://arxiv.org/abs/1705.07750' 3 | Evaluates a RGB and Flow sample similar to the paper's github repo: 'https://github.com/deepmind/kinetics-i3d' 4 | ''' 5 | 6 | import numpy as np 7 | import argparse 8 | 9 | from i3d_inception import Inception_Inflated3d 10 | # from i3d_inception import Inception_Inflated3d 11 | from tensorflow.python import keras 12 | from keras.utils import plot_model 13 | import os 14 | import pdb 15 | from keras.layers import Input 16 | 17 | 18 | 19 | 20 | 21 | 22 | NUM_FRAMES = 79 23 | FRAME_HEIGHT = 224 24 | FRAME_WIDTH = 224 25 | NUM_RGB_CHANNELS = 3 26 | NUM_FLOW_CHANNELS = 2 27 | 28 | NUM_CLASSES = 400 29 | 30 | #'/groups/jbhuang_lab/data/action/UCF101/npy/Diving/v_Diving_g01_c01.npy' 31 | 32 | SAMPLE_DATA_PATH = { 33 | 'rgb' :'data/v_CricketShot_g04_c01_rgb.npy', 34 | 'flow' : 'data/v_CricketShot_g04_c01_flow.npy' 35 | } 36 | 37 | LABEL_MAP_PATH = 'data/label_map.txt' 38 | 39 | def main(args): 40 | # load the kinetics classes 41 | kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] 42 | 43 | 44 | if args.eval_type in ['rgb', 'joint']: 45 | if args.no_imagenet_pretrained: 46 | # build model for RGB data 47 | # and load pretrained weights (trained on kinetics dataset only) 48 | rgb_model = Inception_Inflated3d( 49 | include_top=False, 50 | weights='rgb_kinetics_only', 51 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 52 | classes=NUM_CLASSES) 53 | else: 54 | # build model for RGB data 55 | # and load pretrained weights (trained on imagenet and kinetics dataset) 56 | rgb_model = Inception_Inflated3d( 57 | include_top=False, 58 | weights='rgb_imagenet_and_kinetics', 59 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), 60 | classes=NUM_CLASSES) 61 | # pdb.set_trace() 62 | # print rgb_model.summary() 63 | # plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True) 64 | # print rgb_model.summary() 65 | 66 | 67 | # load RGB sample (just one example) 68 | vid_input = Input(shape =(79, 224,224, 3)) 69 | features = rgb_model(vid_input) 70 | print features.shape.as_list() 71 | feature_shape = features.shape.as_list() 72 | mid_slice_no = (feature_shape[1]/2)+1 73 | print mid_slice_no 74 | rgb_features = features[:,mid_slice_no,:,:,:] 75 | 76 | print rgb_features.shape.as_list() 77 | ''' 78 | rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) 79 | # # 80 | # # # make prediction 81 | rgb_features = rgb_model.predict(rgb_sample) 82 | # rgb_features 83 | # print rgb_features.shape.as_list() 84 | # 85 | # print rgb_logits.shape 86 | features = rgb_features[:,11,:,:,:] 87 | features = np.array(features) 88 | print features.shape 89 | ''' 90 | ''' 91 | if args.eval_type in ['flow', 'joint']: 92 | if args.no_imagenet_pretrained: 93 | # build model for optical flow data 94 | # and load pretrained weights (trained on kinetics dataset only) 95 | flow_model = Inception_Inflated3d( 96 | include_top=True, 97 | weights='flow_kinetics_only', 98 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 99 | classes=NUM_CLASSES) 100 | else: 101 | # build model for optical flow data 102 | # and load pretrained weights (trained on imagenet and kinetics dataset) 103 | flow_model = Inception_Inflated3d( 104 | include_top=True, 105 | weights='flow_imagenet_and_kinetics', 106 | input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), 107 | classes=NUM_CLASSES) 108 | 109 | 110 | # load flow sample (just one example) 111 | flow_sample = np.load(SAMPLE_DATA_PATH['flow']) 112 | 113 | # make prediction 114 | flow_logits = flow_model.predict(flow_sample) 115 | 116 | 117 | # produce final model logits 118 | if args.eval_type == 'rgb': 119 | sample_logits = rgb_logits 120 | elif args.eval_type == 'flow': 121 | sample_logits = flow_logits 122 | else: # joint 123 | sample_logits = rgb_logits + flow_logits 124 | 125 | # produce softmax output from model logit for class probabilities 126 | sample_logits = sample_logits[0] # we are dealing with just one example 127 | sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) 128 | 129 | sorted_indices = np.argsort(sample_predictions)[::-1] 130 | 131 | print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) 132 | print('\nTop classes and probabilities') 133 | for index in sorted_indices[:20]: 134 | print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) 135 | 136 | 137 | return 138 | 139 | ''' 140 | if __name__ == '__main__': 141 | # parse arguments 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument('--eval-type', 144 | help='specify model type. 1 stream (rgb or flow) or 2 stream (joint = rgb and flow).', 145 | type=str, choices=['rgb', 'flow', 'joint'], default='joint') 146 | 147 | parser.add_argument('--no-imagenet-pretrained', 148 | help='If set, load model weights trained only on kinetics dataset. Otherwise, load model weights trained on imagenet and kinetics dataset.', 149 | action='store_true') 150 | 151 | 152 | args = parser.parse_args() 153 | main(args) 154 | -------------------------------------------------------------------------------- /test_final_i3d_bk.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | # from keras.utils.training_utils import multi_gpu_model 30 | import time 31 | 32 | 33 | ''' 34 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 35 | 36 | ''' 37 | sys.setrecursionlimit(40000) 38 | 39 | parser = OptionParser() 40 | 41 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 42 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 43 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 44 | 45 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 46 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 47 | parser.add_option("-m", "--model_name", dest="model_name", 48 | help="Path to model.") 49 | parser.add_option("-o", "--output", dest="output", 50 | help="csv to save predictions.") 51 | parser.add_option("--config_filename", dest="config_filename", help= 52 | "Location to read the metadata related to the training (generated when training).", 53 | default="config_noI3d_cheating8_AVA.pickle") 54 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 55 | 56 | (options, args) = parser.parse_args() 57 | 58 | if not options.test_path: # if filename is not given 59 | parser.error('Error: path to test data must be specified. Pass --path to command line') 60 | 61 | 62 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 63 | config_output_filename = options.config_filename 64 | with open(config_output_filename, 'rb') as f_in: 65 | C = pickle.load(f_in) 66 | 67 | if C.network == 'resnet50': 68 | import keras_frcnn.resnet as nn 69 | elif C.network == 'vgg': 70 | import keras_frcnn.vgg as nn 71 | 72 | # turn off any data augmentation at test time 73 | C.use_horizontal_flips = False 74 | C.use_vertical_flips = False 75 | C.rot_90 = False 76 | resized_width = 320 77 | resized_height = 400 78 | img_path = options.test_path 79 | output_csv_file = os.path.join('evaluation','phase2',options.output) 80 | fc = open(output_csv_file,'w+') 81 | def format_img_size(img, C): 82 | """ formats the image size based on config """ 83 | img_min_side = float(C.im_size) 84 | (height,width,_) = img.shape 85 | ratio_w = resized_width/width 86 | ratio_h = resized_height/height 87 | new_width = resized_width 88 | new_height = resized_height 89 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 90 | return img, ratio_w, ratio_h 91 | 92 | def format_img(img, C): 93 | """ formats an image for model prediction based on config """ 94 | img, ratio_w, ratio_h= format_img_size(img, C) 95 | img = format_img_channels(img, C) 96 | return img, ratio_w, ratio_h 97 | 98 | # Method to transform the coordinates of the bounding box to its original size 99 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 100 | 101 | real_x1 = int(round(x1 // ratio_w)) 102 | real_y1 = int(round(y1 // ratio_h)) 103 | real_x2 = int(round(x2 // ratio_w)) 104 | real_y2 = int(round(y2 // ratio_h)) 105 | 106 | return (real_x1, real_y1, real_x2 ,real_y2) 107 | 108 | class_mapping = C.class_mapping 109 | classes_count = class_mapping 110 | if 'bg' not in class_mapping: 111 | class_mapping['bg'] = len(class_mapping) 112 | 113 | class_mapping = {v: k for k, v in class_mapping.items()} 114 | # print(class_mapping) 115 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 116 | C.num_rois = int(options.num_rois) 117 | # print "Num rois originally",C.num_rois 118 | if C.network == 'resnet50': 119 | num_features = 1024 120 | elif C.network == 'vgg': 121 | num_features = 512 122 | 123 | 124 | def extract_numpy_single_frame(img,C): 125 | 126 | img = (img/255.)*2 - 1 127 | return img 128 | 129 | def get_frame_idx(img_path): 130 | winSize = 64 131 | tags = img_path.split(os.path.sep) 132 | vid_folder = '/'+'/'.join(tags[1:-1]) 133 | frames = os.listdir(vid_folder) 134 | if 'CAD' in img_path: 135 | frames = [f for f in frames if f.startswith('RGB')] 136 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 137 | else: 138 | frames.sort(key = lambda x: int(x.split('.')[0])) 139 | frame_index = frames.index(tags[-1]) 140 | fi = get_frames_index(frames,frame_index,winSize) 141 | seq =[frames[k] if k!=-1 else k for k in fi] 142 | # print(seq[0],seq[31]) 143 | # print seq 144 | return seq 145 | optimizer_classifier = Adam(lr=1e-5) 146 | 147 | if K.image_dim_ordering() == 'th': 148 | input_shape_img = (3, None, None) 149 | # input_shape_features = (num_features, None, None) 150 | else: 151 | input_shape_img = (None, None, 3) 152 | 153 | # shared_layers_input= Input(shape=( None,None,832)) 154 | roi_input = Input(shape=(None, 4)) 155 | vid_input = Input(shape =(None, None, None, 3)) 156 | vid_input_shape = (64, 400,320, 3) 157 | feature_map_input = Input(shape=(None, None, None,None,832)) 158 | img_input = Input(shape=(None, None, 3)) 159 | shared_layers_image = nn.nn_base(img_input, trainable=False) 160 | rgb_model = Inception_Inflated3d( 161 | include_top=False, 162 | weights='rgb_kinetics_only', 163 | input_shape=vid_input_shape, 164 | classes=classes_count) 165 | 166 | 167 | # classifier = nn.classifier_i3d_concat_new(shared_layers_input, 1, nb_classes=num_classes, trainable=True) 168 | # model_classifier = Model([vid_input], classifier) 169 | 170 | 171 | classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image,roi_input, 1, nb_classes=len(classes_count), trainable=True) 172 | model_classifier = Model([feature_map_input, roi_input], classifier) 173 | 174 | model_name = os.path.join(options.model_name,'model.hdf5') 175 | print('Loading weights from {}'.format(model_name)) 176 | model_classifier.load_weights(model_name, by_name=True) 177 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 178 | ''' 179 | 180 | all_imgs = [] 181 | classes = {} 182 | bbox_threshold = 0.7 183 | visualise = True 184 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 185 | df = pd.read_csv(f_val) 186 | final_predictions = [] 187 | 188 | 189 | indices = range(len(df)) 190 | print len(df) 191 | for i in tqdm(range(len(df))): 192 | try: 193 | # ind = indices[i:i+bs] 194 | row = df.iloc[i,:] 195 | val_vid = row[0] 196 | vid_path = os.path.join(img_path,val_vid) 197 | img_name = str(int(row[1]))+'.jpg' 198 | filepath = os.path.join(vid_path,img_name) 199 | k = 0 200 | k+=1 201 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 202 | continue 203 | st = time.time() 204 | filepath = os.path.join(vid_path,img_name) 205 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 206 | img = cv2.imread(filepath) 207 | tags = filepath.split(os.path.sep) 208 | img_folder = '/'+'/'.join(tags[1:-1]) 209 | seq = get_frame_idx(filepath) 210 | s1 = time.time() 211 | vid_numpy = [] 212 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 213 | for frame in seq: 214 | if frame!=-1: 215 | fr_name = os.path.join(img_folder, frame) 216 | np_name = fr_name.replace('.jpg','.npy') 217 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 218 | fr_npy = np.load(np_name) 219 | vid_numpy.append(fr_npy) 220 | else: 221 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 222 | e1 = time.time() 223 | vid_numpy = np.array(vid_numpy) 224 | vid_numpy = np.expand_dims(vid_numpy,axis=0) 225 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 226 | w = x2-x1 227 | h = y2-y1 228 | roi = np.array([x1,y1,w,h]) 229 | rois = np.expand_dims(roi,axis=0) 230 | rois = np.expand_dims(rois,axis=0) 231 | s2 = time.time() 232 | shared_layers_orig = rgb_model.predict(vid_numpy) 233 | print shared_layers_orig.shape 234 | s3 = time.time() 235 | [P_cls]= model_classifier.predict([shared_layers_orig, rois]) 236 | seq_name = filepath.split(os.path.sep)[-2] 237 | f_predicted = open(output_csv_file,'a+') 238 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(10) if class_mapping[cn]!='bg'] 239 | f_predicted.close() 240 | 241 | except Exception as e: 242 | exc_type, exc_obj, exc_tb = sys.exc_info() 243 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 244 | print(exc_type, fname, exc_tb.tb_lineno) 245 | print('Exception: {}'.format(e)) 246 | print(filepath) 247 | continue 248 | ''' 249 | -------------------------------------------------------------------------------- /test_i3d_whole.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | parser.add_option("-m", "--model_name", dest="model_name", 47 | help="Path to model.") 48 | parser.add_option("-o", "--output", dest="output", 49 | help="csv to save predictions.") 50 | parser.add_option("--config_filename", dest="config_filename", help= 51 | "Location to read the metadata related to the training (generated when training).", 52 | default="config_subset_AVA.pickle") 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 54 | 55 | (options, args) = parser.parse_args() 56 | 57 | if not options.test_path: # if filename is not given 58 | parser.error('Error: path to test data must be specified. Pass --path to command line') 59 | 60 | 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 62 | config_output_filename = options.config_filename 63 | with open(config_output_filename, 'rb') as f_in: 64 | C = pickle.load(f_in) 65 | 66 | if C.network == 'resnet50': 67 | import keras_frcnn.resnet as nn 68 | elif C.network == 'vgg': 69 | import keras_frcnn.vgg as nn 70 | 71 | # turn off any data augmentation at test time 72 | C.use_horizontal_flips = False 73 | C.use_vertical_flips = False 74 | C.rot_90 = False 75 | resized_width = 320 76 | resized_height = 400 77 | img_path = options.test_path 78 | output_csv_file = os.path.join('evaluation','phase2',options.output) 79 | fc = open(output_csv_file,'w+') 80 | def format_img_size(img, C): 81 | """ formats the image size based on config """ 82 | img_min_side = float(C.im_size) 83 | (height,width,_) = img.shape 84 | ratio_w = resized_width/width 85 | ratio_h = resized_height/height 86 | new_width = resized_width 87 | new_height = resized_height 88 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 89 | return img, ratio_w, ratio_h 90 | 91 | def format_img(img, C): 92 | """ formats an image for model prediction based on config """ 93 | img, ratio_w, ratio_h= format_img_size(img, C) 94 | img = format_img_channels(img, C) 95 | return img, ratio_w, ratio_h 96 | 97 | # Method to transform the coordinates of the bounding box to its original size 98 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 99 | 100 | real_x1 = int(round(x1 // ratio_w)) 101 | real_y1 = int(round(y1 // ratio_h)) 102 | real_x2 = int(round(x2 // ratio_w)) 103 | real_y2 = int(round(y2 // ratio_h)) 104 | 105 | return (real_x1, real_y1, real_x2 ,real_y2) 106 | 107 | class_mapping = C.class_mapping 108 | classes_count = class_mapping 109 | print len(class_mapping) 110 | # bp() 111 | # if 'bg' not in class_mapping: 112 | # class_mapping['bg'] = len(class_mapping) 113 | 114 | class_mapping = {v: k for k, v in class_mapping.items()} 115 | # print(class_mapping) 116 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 117 | C.num_rois = int(options.num_rois) 118 | # print "Num rois originally",C.num_rois 119 | if C.network == 'resnet50': 120 | num_features = 1024 121 | elif C.network == 'vgg': 122 | num_features = 512 123 | 124 | if K.image_dim_ordering() == 'th': 125 | input_shape_img = (3, None, None) 126 | # input_shape_features = (num_features, None, None) 127 | else: 128 | input_shape_img = (None, None, 3) 129 | 130 | shared_layers_input= Input(shape=( None,None,None,832)) 131 | roi_input = Input(shape=(None, 4)) 132 | vid_input = Input(shape =(None, None, None, 3)) 133 | vid_input_shape = (64, 400,320, 3) 134 | img_input = Input(shape=(None, None, 3)) 135 | 136 | feature_map_input = Input(shape=(None, None,None,832)) 137 | shared_layers_image = nn.nn_base(img_input, trainable=False) 138 | 139 | 140 | rgb_model = Inception_Inflated3d( 141 | include_top=False, 142 | weights='rgb_kinetics_only', 143 | input_shape=vid_input_shape, 144 | classes=classes_count) 145 | def get_new_img_size(width, height, img_min_side, C): 146 | img_min_side =448 147 | if width <= height: 148 | f = float(img_min_side) / width 149 | resized_height = int(f * height) 150 | resized_width = img_min_side 151 | else: 152 | f = float(img_min_side) / height 153 | resized_width = int(f * width) 154 | resized_height = img_min_side 155 | 156 | if C.dataset == 'AVA': 157 | return resized_width, resized_height 158 | else: 159 | return 640, 480 160 | 161 | def extract_numpy_single_frame(img,C): 162 | 163 | img = (img/255.)*2 - 1 164 | return img 165 | 166 | def get_frame_idx(img_path): 167 | winSize = 64 168 | tags = img_path.split(os.path.sep) 169 | vid_folder = '/'+'/'.join(tags[1:-1]) 170 | frames = os.listdir(vid_folder) 171 | if 'CAD' in img_path: 172 | frames = [f for f in frames if f.startswith('RGB')] 173 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 174 | else: 175 | frames.sort(key = lambda x: int(x.split('.')[0])) 176 | frame_index = frames.index(tags[-1]) 177 | fi = get_frames_index(frames,frame_index,winSize) 178 | seq =[frames[k] if k!=-1 else k for k in fi] 179 | # print(seq[0],seq[31]) 180 | # print seq 181 | return seq 182 | optimizer_classifier = Adam(lr=1e-5) 183 | # classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True) 184 | print len(classes_count) 185 | # bp() 186 | classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=num_classes) 187 | 188 | # model_classifier_only = Model([feature_map_input, roi_input], classifier) 189 | model_classifier = Model([feature_map_input], classifier) 190 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 191 | model_name = os.path.join(options.model_name,'model.hdf5') 192 | print('Loading weights from {}'.format(model_name)) 193 | model_classifier.load_weights(model_name, by_name=True) 194 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 195 | 196 | 197 | all_imgs = [] 198 | classes = {} 199 | bbox_threshold = 0.7 200 | visualise = True 201 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 202 | df = pd.read_csv(f_val) 203 | # ac_id = get_action_dic() 204 | 205 | # for val_vid in val_vids: 206 | final_predictions = [] 207 | 208 | 209 | indices = range(len(df)) 210 | print len(df) 211 | # bp() 212 | bs = 8 213 | for i in tqdm(range(len(df))): 214 | 215 | try: 216 | # ind = indices[i:i+bs] 217 | row = df.iloc[i,:] 218 | val_vid = row[0] 219 | vid_path = os.path.join(img_path,val_vid) 220 | img_name = str(int(row[1]))+'.jpg' 221 | filepath = os.path.join(vid_path,img_name) 222 | 223 | val_vid = row[0] 224 | vid_path = os.path.join(img_path,val_vid) 225 | img_name = str(int(row[1]))+'.jpg' 226 | k = 0 227 | k+=1 228 | 229 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 230 | continue 231 | st = time.time() 232 | filepath = os.path.join(vid_path,img_name) 233 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 234 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 235 | img = cv2.imread(filepath) 236 | # x_img = 237 | tags = filepath.split(os.path.sep) 238 | img_folder = '/'+'/'.join(tags[1:-1]) 239 | seq = get_frame_idx(filepath) 240 | 241 | # print filepath, seq 242 | vid_numpy = [] 243 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 244 | x_img = np.expand_dims(x_img, axis = 0) 245 | # print filepath, seq 246 | # bp() 247 | for frame in seq: 248 | if frame!=-1: 249 | fr_name = os.path.join(img_folder, frame) 250 | np_name = fr_name.replace('.jpg','.npy') 251 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 252 | # print np_name 253 | fr_npy = np.load(np_name) 254 | vid_numpy.append(fr_npy) 255 | else: 256 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 257 | 258 | vid_numpy = np.array(vid_numpy) 259 | vid_numpy = np.expand_dims(vid_numpy,axis=0) 260 | # print vid_numpy.shape 261 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 262 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 263 | w = x2-x1 264 | h = y2-y1 265 | roi = np.array([x1,y1,w,h]) 266 | rois = np.expand_dims(roi,axis=0) 267 | rois = np.expand_dims(rois,axis=0) 268 | # try: 269 | # rois, vid_numpy = get_batch(df, ind) 270 | shared_layers_orig = rgb_model.predict(vid_numpy) 271 | 272 | # print rois, shared_layers_orig.shape 273 | [P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois]) 274 | 275 | seq_name = filepath.split(os.path.sep)[-2] 276 | f_predicted = open(output_csv_file,'a+') 277 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg'] 278 | f_predicted.close() 279 | 280 | # if k==1: 281 | # break 282 | except Exception as e: 283 | exc_type, exc_obj, exc_tb = sys.exc_info() 284 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 285 | print(exc_type, fname, exc_tb.tb_lineno) 286 | print('Exception: {}'.format(e)) 287 | print(filepath) 288 | continue 289 | -------------------------------------------------------------------------------- /test_frcnn_ava_bk.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | # from keras.utils.training_utils import multi_gpu_model 30 | import time 31 | 32 | 33 | ''' 34 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 35 | 36 | ''' 37 | sys.setrecursionlimit(40000) 38 | 39 | parser = OptionParser() 40 | 41 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 42 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 43 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 44 | 45 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 46 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 47 | parser.add_option("-m", "--model_name", dest="model_name", 48 | help="Path to model.") 49 | parser.add_option("-o", "--output", dest="output", 50 | help="csv to save predictions.") 51 | parser.add_option("--config_filename", dest="config_filename", help= 52 | "Location to read the metadata related to the training (generated when training).", 53 | default="config_subset_AVA.pickle") 54 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 55 | 56 | (options, args) = parser.parse_args() 57 | 58 | if not options.test_path: # if filename is not given 59 | parser.error('Error: path to test data must be specified. Pass --path to command line') 60 | 61 | 62 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 63 | config_output_filename = options.config_filename 64 | with open(config_output_filename, 'rb') as f_in: 65 | C = pickle.load(f_in) 66 | 67 | if C.network == 'resnet50': 68 | import keras_frcnn.resnet as nn 69 | elif C.network == 'vgg': 70 | import keras_frcnn.vgg as nn 71 | 72 | # turn off any data augmentation at test time 73 | C.use_horizontal_flips = False 74 | C.use_vertical_flips = False 75 | C.rot_90 = False 76 | resized_width = 320 77 | resized_height = 400 78 | img_path = options.test_path 79 | output_csv_file = os.path.join('evaluation','phase2',options.output) 80 | fc = open(output_csv_file,'w+') 81 | def format_img_size(img, C): 82 | """ formats the image size based on config """ 83 | img_min_side = float(C.im_size) 84 | (height,width,_) = img.shape 85 | ratio_w = resized_width/width 86 | ratio_h = resized_height/height 87 | new_width = resized_width 88 | new_height = resized_height 89 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 90 | return img, ratio_w, ratio_h 91 | 92 | def format_img(img, C): 93 | """ formats an image for model prediction based on config """ 94 | img, ratio_w, ratio_h= format_img_size(img, C) 95 | img = format_img_channels(img, C) 96 | return img, ratio_w, ratio_h 97 | 98 | # Method to transform the coordinates of the bounding box to its original size 99 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 100 | 101 | real_x1 = int(round(x1 // ratio_w)) 102 | real_y1 = int(round(y1 // ratio_h)) 103 | real_x2 = int(round(x2 // ratio_w)) 104 | real_y2 = int(round(y2 // ratio_h)) 105 | 106 | return (real_x1, real_y1, real_x2 ,real_y2) 107 | 108 | class_mapping = C.class_mapping 109 | classes_count = class_mapping 110 | if 'bg' not in class_mapping: 111 | class_mapping['bg'] = len(class_mapping) 112 | 113 | class_mapping = {v: k for k, v in class_mapping.items()} 114 | # print(class_mapping) 115 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 116 | C.num_rois = int(options.num_rois) 117 | # print "Num rois originally",C.num_rois 118 | if C.network == 'resnet50': 119 | num_features = 1024 120 | elif C.network == 'vgg': 121 | num_features = 512 122 | 123 | if K.image_dim_ordering() == 'th': 124 | input_shape_img = (3, None, None) 125 | # input_shape_features = (num_features, None, None) 126 | else: 127 | input_shape_img = (None, None, 3) 128 | 129 | shared_layers_input= Input(shape=( None,None,832)) 130 | roi_input = Input(shape=(None, 4)) 131 | vid_input = Input(shape =(None, None, None, 3)) 132 | vid_input_shape = (64, 400,320, 3) 133 | feature_map_input = Input(shape=(None, None,None,832)) 134 | 135 | rgb_model = Inception_Inflated3d( 136 | include_top=False, 137 | weights='rgb_kinetics_only', 138 | input_shape=vid_input_shape, 139 | classes=classes_count) 140 | def get_new_img_size(width, height, img_min_side, C): 141 | img_min_side =448 142 | if width <= height: 143 | f = float(img_min_side) / width 144 | resized_height = int(f * height) 145 | resized_width = img_min_side 146 | else: 147 | f = float(img_min_side) / height 148 | resized_width = int(f * width) 149 | resized_height = img_min_side 150 | 151 | if C.dataset == 'AVA': 152 | return resized_width, resized_height 153 | else: 154 | return 640, 480 155 | 156 | def extract_numpy_single_frame(img,C): 157 | 158 | img = (img/255.)*2 - 1 159 | return img 160 | 161 | def get_frame_idx(img_path): 162 | winSize = 64 163 | tags = img_path.split(os.path.sep) 164 | vid_folder = '/'+'/'.join(tags[1:-1]) 165 | frames = os.listdir(vid_folder) 166 | if 'CAD' in img_path: 167 | frames = [f for f in frames if f.startswith('RGB')] 168 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 169 | else: 170 | frames.sort(key = lambda x: int(x.split('.')[0])) 171 | frame_index = frames.index(tags[-1]) 172 | fi = get_frames_index(frames,frame_index,winSize) 173 | seq =[frames[k] if k!=-1 else k for k in fi] 174 | # print(seq[0],seq[31]) 175 | # print seq 176 | return seq 177 | optimizer_classifier = Adam(lr=1e-5) 178 | classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True) 179 | model_classifier = Model([feature_map_input, roi_input], classifier) 180 | 181 | model_name = os.path.join(options.model_name,'model.hdf5') 182 | print('Loading weights from {}'.format(model_name)) 183 | model_classifier.load_weights(model_name, by_name=True) 184 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 185 | 186 | 187 | all_imgs = [] 188 | classes = {} 189 | bbox_threshold = 0.7 190 | visualise = True 191 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 192 | df = pd.read_csv(f_val) 193 | final_predictions = [] 194 | 195 | def get_batch(df, ind): 196 | rows = df.iloc[ind] 197 | # print rows 198 | roi_batch = [] 199 | vid_numpy_batch = [] 200 | for r in range(len(rows)): 201 | row = rows.iloc[r,:] 202 | # print row 203 | # print row[1] 204 | # bp() 205 | val_vid = row[0] 206 | vid_path = os.path.join(img_path,val_vid) 207 | img_name = str(int(row[1]))+'.jpg' 208 | k = 0 209 | k+=1 210 | 211 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 212 | continue 213 | st = time.time() 214 | filepath = os.path.join(vid_path,img_name) 215 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 216 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 217 | img = cv2.imread(filepath) 218 | # x_img = 219 | tags = filepath.split(os.path.sep) 220 | img_folder = '/'+'/'.join(tags[1:-1]) 221 | seq = get_frame_idx(filepath) 222 | 223 | # print filepath, seq 224 | vid_numpy = [] 225 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 226 | for frame in seq: 227 | if frame!=-1: 228 | fr_name = os.path.join(img_folder, frame) 229 | np_name = fr_name.replace('.jpg','.npy') 230 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 231 | fr_npy = np.load(np_name) 232 | vid_numpy.append(fr_npy) 233 | else: 234 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 235 | 236 | vid_numpy = np.array(vid_numpy) 237 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 238 | w = x2-x1 239 | h = y2-y1 240 | roi = np.array([x1,y1,w,h]) 241 | rois = np.expand_dims(roi,axis=0) 242 | rois = np.expand_dims(rois,axis=0) 243 | roi_batch.append(rois) 244 | vid_numpy_batch.append(vid_numpy) 245 | return np.array(roi_batch), np.array(vid_numpy_batch) 246 | 247 | indices = range(len(df)) 248 | print len(df) 249 | for i in tqdm(range(len(df))): 250 | try: 251 | # ind = indices[i:i+bs] 252 | row = df.iloc[i,:] 253 | val_vid = row[0] 254 | vid_path = os.path.join(img_path,val_vid) 255 | img_name = str(int(row[1]))+'.jpg' 256 | filepath = os.path.join(vid_path,img_name) 257 | k = 0 258 | k+=1 259 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 260 | continue 261 | st = time.time() 262 | filepath = os.path.join(vid_path,img_name) 263 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 264 | img = cv2.imread(filepath) 265 | tags = filepath.split(os.path.sep) 266 | img_folder = '/'+'/'.join(tags[1:-1]) 267 | seq = get_frame_idx(filepath) 268 | s1 = time.time() 269 | vid_numpy = [] 270 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 271 | for frame in seq: 272 | if frame!=-1: 273 | fr_name = os.path.join(img_folder, frame) 274 | np_name = fr_name.replace('.jpg','.npy') 275 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 276 | fr_npy = np.load(np_name) 277 | vid_numpy.append(fr_npy) 278 | else: 279 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 280 | e1 = time.time() 281 | vid_numpy = np.array(vid_numpy) 282 | vid_numpy = np.expand_dims(vid_numpy,axis=0) 283 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 284 | w = x2-x1 285 | h = y2-y1 286 | roi = np.array([x1,y1,w,h]) 287 | rois = np.expand_dims(roi,axis=0) 288 | rois = np.expand_dims(rois,axis=0) 289 | s2 = time.time() 290 | shared_layers_orig = rgb_model.predict(vid_numpy) 291 | # print rois, shared_layers_orig.shape 292 | s3 = time.time() 293 | [P_cls]= model_classifier.predict([shared_layers_orig, rois]) 294 | seq_name = filepath.split(os.path.sep)[-2] 295 | f_predicted = open(output_csv_file,'a+') 296 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(10) if class_mapping[cn]!='bg'] 297 | f_predicted.close() 298 | 299 | except Exception as e: 300 | exc_type, exc_obj, exc_tb = sys.exc_info() 301 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 302 | print(exc_type, fname, exc_tb.tb_lineno) 303 | print('Exception: {}'.format(e)) 304 | print(filepath) 305 | continue 306 | -------------------------------------------------------------------------------- /test_ava_concat.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | parser.add_option("-m", "--model_name", dest="model_name", 47 | help="Path to model.") 48 | parser.add_option("-o", "--output", dest="output", 49 | help="csv to save predictions.") 50 | parser.add_option("--config_filename", dest="config_filename", help= 51 | "Location to read the metadata related to the training (generated when training).", 52 | default="config_subset_AVA.pickle") 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 54 | 55 | (options, args) = parser.parse_args() 56 | 57 | if not options.test_path: # if filename is not given 58 | parser.error('Error: path to test data must be specified. Pass --path to command line') 59 | 60 | 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 62 | config_output_filename = options.config_filename 63 | with open(config_output_filename, 'rb') as f_in: 64 | C = pickle.load(f_in) 65 | 66 | if C.network == 'resnet50': 67 | import keras_frcnn.resnet as nn 68 | elif C.network == 'vgg': 69 | import keras_frcnn.vgg as nn 70 | 71 | # turn off any data augmentation at test time 72 | C.use_horizontal_flips = False 73 | C.use_vertical_flips = False 74 | C.rot_90 = False 75 | resized_width = 320 76 | resized_height = 400 77 | img_path = options.test_path 78 | output_csv_file = os.path.join('evaluation','phase2',options.output) 79 | fc = open(output_csv_file,'w+') 80 | def format_img_size(img, C): 81 | """ formats the image size based on config """ 82 | img_min_side = float(C.im_size) 83 | (height,width,_) = img.shape 84 | ratio_w = resized_width/width 85 | ratio_h = resized_height/height 86 | new_width = resized_width 87 | new_height = resized_height 88 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 89 | return img, ratio_w, ratio_h 90 | 91 | def format_img(img, C): 92 | """ formats an image for model prediction based on config """ 93 | img, ratio_w, ratio_h= format_img_size(img, C) 94 | img = format_img_channels(img, C) 95 | return img, ratio_w, ratio_h 96 | 97 | # Method to transform the coordinates of the bounding box to its original size 98 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 99 | 100 | real_x1 = int(round(x1 // ratio_w)) 101 | real_y1 = int(round(y1 // ratio_h)) 102 | real_x2 = int(round(x2 // ratio_w)) 103 | real_y2 = int(round(y2 // ratio_h)) 104 | 105 | return (real_x1, real_y1, real_x2 ,real_y2) 106 | 107 | class_mapping = C.class_mapping 108 | classes_count = class_mapping 109 | print len(class_mapping) 110 | # bp() 111 | # if 'bg' not in class_mapping: 112 | # class_mapping['bg'] = len(class_mapping) 113 | 114 | class_mapping = {v: k for k, v in class_mapping.items()} 115 | # print(class_mapping) 116 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 117 | C.num_rois = int(options.num_rois) 118 | # print "Num rois originally",C.num_rois 119 | if C.network == 'resnet50': 120 | num_features = 1024 121 | elif C.network == 'vgg': 122 | num_features = 512 123 | 124 | if K.image_dim_ordering() == 'th': 125 | input_shape_img = (3, None, None) 126 | # input_shape_features = (num_features, None, None) 127 | else: 128 | input_shape_img = (None, None, 3) 129 | 130 | shared_layers_input= Input(shape=( None,None,None,832)) 131 | roi_input = Input(shape=(None, 4)) 132 | vid_input = Input(shape =(None, None, None, 3)) 133 | vid_input_shape = (64, 400,320, 3) 134 | img_input = Input(shape=(None, None, 3)) 135 | 136 | feature_map_input = Input(shape=(None, None,None,832)) 137 | shared_layers_image = nn.nn_base(img_input, trainable=False) 138 | 139 | 140 | rgb_model = Inception_Inflated3d( 141 | include_top=False, 142 | weights='rgb_kinetics_only', 143 | input_shape=vid_input_shape, 144 | classes=classes_count) 145 | def get_new_img_size(width, height, img_min_side, C): 146 | img_min_side =448 147 | if width <= height: 148 | f = float(img_min_side) / width 149 | resized_height = int(f * height) 150 | resized_width = img_min_side 151 | else: 152 | f = float(img_min_side) / height 153 | resized_width = int(f * width) 154 | resized_height = img_min_side 155 | 156 | if C.dataset == 'AVA': 157 | return resized_width, resized_height 158 | else: 159 | return 640, 480 160 | 161 | def extract_numpy_single_frame(img,C): 162 | 163 | img = (img/255.)*2 - 1 164 | return img 165 | 166 | def get_frame_idx(img_path): 167 | winSize = 64 168 | tags = img_path.split(os.path.sep) 169 | vid_folder = '/'+'/'.join(tags[1:-1]) 170 | frames = os.listdir(vid_folder) 171 | if 'CAD' in img_path: 172 | frames = [f for f in frames if f.startswith('RGB')] 173 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 174 | else: 175 | frames.sort(key = lambda x: int(x.split('.')[0])) 176 | frame_index = frames.index(tags[-1]) 177 | fi = get_frames_index(frames,frame_index,winSize) 178 | seq =[frames[k] if k!=-1 else k for k in fi] 179 | # print(seq[0],seq[31]) 180 | # print seq 181 | return seq 182 | optimizer_classifier = Adam(lr=1e-5) 183 | # classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True) 184 | print len(classes_count) 185 | # bp() 186 | classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True) 187 | 188 | # model_classifier_only = Model([feature_map_input, roi_input], classifier) 189 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 190 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 191 | model_name = os.path.join(options.model_name,'model.hdf5') 192 | print('Loading weights from {}'.format(model_name)) 193 | model_classifier.load_weights(model_name, by_name=True) 194 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 195 | 196 | 197 | all_imgs = [] 198 | classes = {} 199 | bbox_threshold = 0.7 200 | visualise = True 201 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 202 | df = pd.read_csv(f_val) 203 | # ac_id = get_action_dic() 204 | 205 | # for val_vid in val_vids: 206 | final_predictions = [] 207 | 208 | 209 | indices = range(len(df)) 210 | print len(df) 211 | # bp() 212 | bs = 8 213 | for i in tqdm(range(len(df))): 214 | 215 | try: 216 | # ind = indices[i:i+bs] 217 | row = df.iloc[i,:] 218 | val_vid = row[0] 219 | vid_path = os.path.join(img_path,val_vid) 220 | img_name = str(int(row[1]))+'.jpg' 221 | filepath = os.path.join(vid_path,img_name) 222 | 223 | val_vid = row[0] 224 | vid_path = os.path.join(img_path,val_vid) 225 | img_name = str(int(row[1]))+'.jpg' 226 | k = 0 227 | k+=1 228 | 229 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 230 | continue 231 | st = time.time() 232 | filepath = os.path.join(vid_path,img_name) 233 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 234 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 235 | img = cv2.imread(filepath) 236 | # x_img = 237 | tags = filepath.split(os.path.sep) 238 | img_folder = '/'+'/'.join(tags[1:-1]) 239 | seq = get_frame_idx(filepath) 240 | 241 | # print filepath, seq 242 | vid_numpy = [] 243 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 244 | x_img = np.expand_dims(x_img, axis = 0) 245 | # print filepath, seq 246 | # bp() 247 | for frame in seq: 248 | if frame!=-1: 249 | fr_name = os.path.join(img_folder, frame) 250 | np_name = fr_name.replace('.jpg','.npy') 251 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 252 | # print np_name 253 | fr_npy = np.load(np_name) 254 | vid_numpy.append(fr_npy) 255 | else: 256 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 257 | 258 | vid_numpy = np.array(vid_numpy) 259 | vid_numpy = np.expand_dims(vid_numpy,axis=0) 260 | # print vid_numpy.shape 261 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 262 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 263 | w = x2-x1 264 | h = y2-y1 265 | roi = np.array([x1,y1,w,h]) 266 | rois = np.expand_dims(roi,axis=0) 267 | rois = np.expand_dims(rois,axis=0) 268 | # try: 269 | # rois, vid_numpy = get_batch(df, ind) 270 | shared_layers_orig = rgb_model.predict(vid_numpy) 271 | 272 | # print rois, shared_layers_orig.shape 273 | [P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois]) 274 | # print y 275 | # print y.shape 276 | 277 | # except: 278 | # pass 279 | 280 | seq_name = filepath.split(os.path.sep)[-2] 281 | # bp() 282 | # line = [seq_name,str(fr_num).zfill(4),str(float(row[2])),str(float(row[3])),str(float(row[4])),str(float(row[5])),P_cls] 283 | # final_predictions.append(line) 284 | # for cn in range(P_cls.shape[1]): 285 | # class_num = cn 286 | # prob = P_cls[0,cn] 287 | # class_name = class_mapping[cn] 288 | # if class_name!='bg': 289 | # line = seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(ac_id[class_name])+','+str(prob) 290 | # # print line 291 | # f_predicted = open('evaluation/ava_predicted_cheating_subset_latest.csv','a+') 292 | # f_predicted.write(line+'\n') 293 | # f_predicted.close() 294 | f_predicted = open(output_csv_file,'a+') 295 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg'] 296 | f_predicted.close() 297 | 298 | # if k==1: 299 | # break 300 | except Exception as e: 301 | exc_type, exc_obj, exc_tb = sys.exc_info() 302 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 303 | print(exc_type, fname, exc_tb.tb_lineno) 304 | print('Exception: {}'.format(e)) 305 | print(filepath) 306 | continue 307 | -------------------------------------------------------------------------------- /test_final_i3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | # from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | parser.add_option("-m", "--model_name", dest="model_name", 47 | help="Path to model.") 48 | parser.add_option("-t", "--type", dest="type", 49 | help="type.", default = 'v1') 50 | parser.add_option("-o", "--output", dest="output", 51 | help="csv to save predictions.") 52 | parser.add_option("--config_filename", dest="config_filename", help= 53 | "Location to read the metadata related to the training (generated when training).", 54 | default="config_subset_AVA.pickle") 55 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 56 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat') 57 | 58 | (options, args) = parser.parse_args() 59 | 60 | if not options.test_path: # if filename is not given 61 | parser.error('Error: path to test data must be specified. Pass --path to command line') 62 | 63 | 64 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 65 | config_output_filename = options.config_filename 66 | with open(config_output_filename, 'rb') as f_in: 67 | C = pickle.load(f_in) 68 | 69 | if C.network == 'resnet50': 70 | import keras_frcnn.resnet as nn 71 | elif C.network == 'vgg': 72 | import keras_frcnn.vgg as nn 73 | 74 | # turn off any data augmentation at test time 75 | C.use_horizontal_flips = False 76 | C.use_vertical_flips = False 77 | C.rot_90 = False 78 | resized_width = 320 79 | resized_height = 400 80 | version = options.version 81 | img_path = options.test_path 82 | output_csv_file = os.path.join('evaluation','8_actions',options.output) 83 | fc = open(output_csv_file,'w+') 84 | fc.close() 85 | def format_img_size(img, C): 86 | """ formats the image size based on config """ 87 | img_min_side = float(C.im_size) 88 | (height,width,_) = img.shape 89 | ratio_w = resized_width/width 90 | ratio_h = resized_height/height 91 | new_width = resized_width 92 | new_height = resized_height 93 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 94 | return img, ratio_w, ratio_h 95 | 96 | def format_img(img, C): 97 | """ formats an image for model prediction based on config """ 98 | img, ratio_w, ratio_h= format_img_size(img, C) 99 | img = format_img_channels(img, C) 100 | return img, ratio_w, ratio_h 101 | 102 | # Method to transform the coordinates of the bounding box to its original size 103 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 104 | 105 | real_x1 = int(round(x1 // ratio_w)) 106 | real_y1 = int(round(y1 // ratio_h)) 107 | real_x2 = int(round(x2 // ratio_w)) 108 | real_y2 = int(round(y2 // ratio_h)) 109 | 110 | return (real_x1, real_y1, real_x2 ,real_y2) 111 | 112 | class_mapping = C.class_mapping 113 | classes_count = class_mapping 114 | print len(class_mapping) 115 | type = options.type 116 | # bp() 117 | # if 'bg' not in class_mapping: 118 | # class_mapping['bg'] = len(class_mapping) 119 | 120 | class_mapping = {v: k for k, v in class_mapping.items()} 121 | # print(class_mapping) 122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 123 | C.num_rois = int(options.num_rois) 124 | # print "Num rois originally",C.num_rois 125 | if C.network == 'resnet50': 126 | num_features = 1024 127 | elif C.network == 'vgg': 128 | num_features = 512 129 | 130 | if K.image_dim_ordering() == 'th': 131 | input_shape_img = (3, None, None) 132 | else: 133 | input_shape_img = (None, None, 3) 134 | 135 | # shared_layers_input= Input(shape=( None,None,None,832)) 136 | roi_input = Input(shape=(None, 4)) 137 | vid_input = Input(shape =(None, None, None, 3)) 138 | vid_input_shape = (64, 400,320, 3) 139 | img_input = Input(shape=(None, None, 3)) 140 | 141 | feature_map_input = Input(shape=( None,None,None,832)) 142 | shared_layers_image = nn.nn_base(img_input, trainable=False) 143 | 144 | 145 | rgb_model = Inception_Inflated3d( 146 | include_top=False, 147 | weights='rgb_kinetics_only', 148 | input_shape=vid_input_shape, 149 | classes=classes_count) 150 | 151 | def extract_numpy_single_frame(img,C): 152 | 153 | img = (img/255.)*2 - 1 154 | return img 155 | 156 | def get_frame_idx(img_path): 157 | winSize = 64 158 | tags = img_path.split(os.path.sep) 159 | vid_folder = '/'+'/'.join(tags[1:-1]) 160 | frames = os.listdir(vid_folder) 161 | if 'CAD' in img_path: 162 | frames = [f for f in frames if f.startswith('RGB')] 163 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 164 | else: 165 | frames.sort(key = lambda x: int(x.split('.')[0])) 166 | frame_index = frames.index(tags[-1]) 167 | fi = get_frames_index(frames,frame_index,winSize) 168 | seq =[frames[k] if k!=-1 else k for k in fi] 169 | # print(seq[0],seq[31]) 170 | # print seq 171 | return seq 172 | optimizer_classifier = Adam(lr=1e-5) 173 | print len(classes_count) 174 | 175 | if version == 'concat' and type == 'v1': 176 | classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True) 177 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 178 | elif version == 'whole' and type == 'v1': 179 | classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version) 180 | model_classifier = Model([feature_map_input], classifier) 181 | elif version == 'concat' and type == 'v2': 182 | classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True) 183 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 184 | elif version == 'roi': 185 | classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count)) 186 | model_classifier = Model([roi_input, feature_map_input], classifier) 187 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 188 | model_name = os.path.join(options.model_name,'model.hdf5') 189 | print('Loading weights from {}'.format(model_name)) 190 | model_classifier.load_weights(model_name, by_name=True) 191 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 192 | 193 | all_imgs = [] 194 | classes = {} 195 | bbox_threshold = 0.7 196 | visualise = True 197 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 198 | df = pd.read_csv(f_val) 199 | final_predictions = [] 200 | 201 | indices = range(len(df)) 202 | print len(df) 203 | # bp() 204 | bs = 8 205 | for i in tqdm(range(len(df))): 206 | 207 | try: 208 | # ind = indices[i:i+bs] 209 | row = df.iloc[i,:] 210 | val_vid = row[0] 211 | vid_path = os.path.join(img_path,val_vid) 212 | img_name = str(int(row[1]))+'.jpg' 213 | filepath = os.path.join(vid_path,img_name) 214 | 215 | val_vid = row[0] 216 | vid_path = os.path.join(img_path,val_vid) 217 | img_name = str(int(row[1]))+'.jpg' 218 | k = 0 219 | k+=1 220 | 221 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 222 | continue 223 | st = time.time() 224 | filepath = os.path.join(vid_path,img_name) 225 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 226 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 227 | img = cv2.imread(filepath) 228 | # x_img = 229 | tags = filepath.split(os.path.sep) 230 | img_folder = '/'+'/'.join(tags[1:-1]) 231 | seq = get_frame_idx(filepath) 232 | 233 | # print filepath, seq 234 | vid_numpy = [] 235 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 236 | x_img = np.expand_dims(x_img, axis = 0) 237 | # print filepath, seq 238 | # bp() 239 | for frame in seq: 240 | if frame!=-1: 241 | fr_name = os.path.join(img_folder, frame) 242 | np_name = fr_name.replace('.jpg','.npy') 243 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 244 | # print np_name 245 | fr_npy = np.load(np_name) 246 | vid_numpy.append(fr_npy) 247 | else: 248 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 249 | 250 | vid_numpy = np.array(vid_numpy) 251 | vid_numpy = np.expand_dims(vid_numpy,axis=0) 252 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 253 | shared_layers_orig = rgb_model.predict(vid_numpy) 254 | 255 | # print vid_numpy.shape 256 | if version == 'concat': 257 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 258 | w = x2-x1 259 | h = y2-y1 260 | roi = np.array([x1,y1,w,h]) 261 | rois = np.expand_dims(roi,axis=0) 262 | rois = np.expand_dims(rois,axis=0) 263 | # try: 264 | # rois, vid_numpy = get_batch(df, ind) 265 | # print rois, shared_layers_orig.shape 266 | [P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois]) 267 | elif version == 'whole': 268 | [P_cls]= model_classifier.predict([shared_layers_orig]) 269 | elif version == 'roi': 270 | w = x2-x1 271 | h = y2-y1 272 | roi = np.array([x1,y1,w,h]) 273 | rois = np.expand_dims(roi,axis=0) 274 | rois = np.expand_dims(rois,axis=0) 275 | [P_cls]= model_classifier.predict([rois, shared_layers_orig]) 276 | seq_name = filepath.split(os.path.sep)[-2] 277 | # print(P_cls) 278 | # bp() 279 | # print output_csv_file 280 | # print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n') 281 | et = time.time()-st 282 | print "The per frame time is {}".format(et) 283 | with open(output_csv_file,'a+') as f_predicted: 284 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg'] 285 | 286 | except Exception as e: 287 | exc_type, exc_obj, exc_tb = sys.exc_info() 288 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 289 | print(exc_type, fname, exc_tb.tb_lineno) 290 | print('Exception: {}'.format(e)) 291 | print(filepath) 292 | continue 293 | -------------------------------------------------------------------------------- /test_frcnn_AVA.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | parser.add_option("-m", "--model_name", dest="model_name", 47 | help="Path to model.") 48 | parser.add_option("-o", "--output", dest="output", 49 | help="csv to save predictions.") 50 | parser.add_option("--config_filename", dest="config_filename", help= 51 | "Location to read the metadata related to the training (generated when training).", 52 | default="config_subset_AVA.pickle") 53 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 54 | 55 | (options, args) = parser.parse_args() 56 | 57 | if not options.test_path: # if filename is not given 58 | parser.error('Error: path to test data must be specified. Pass --path to command line') 59 | 60 | 61 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 62 | config_output_filename = options.config_filename 63 | with open(config_output_filename, 'rb') as f_in: 64 | C = pickle.load(f_in) 65 | 66 | if C.network == 'resnet50': 67 | import keras_frcnn.resnet as nn 68 | elif C.network == 'vgg': 69 | import keras_frcnn.vgg as nn 70 | 71 | # turn off any data augmentation at test time 72 | C.use_horizontal_flips = False 73 | C.use_vertical_flips = False 74 | C.rot_90 = False 75 | resized_width = 320 76 | resized_height = 400 77 | img_path = options.test_path 78 | output_csv_file = os.path.join('evaluation',options.output) 79 | fc = open(output_csv_file,'w+') 80 | def format_img_size(img, C): 81 | """ formats the image size based on config """ 82 | img_min_side = float(C.im_size) 83 | (height,width,_) = img.shape 84 | ratio_w = resized_width/width 85 | ratio_h = resized_height/height 86 | # if width <= height: 87 | # ratio = img_min_side/width 88 | # new_height = int(ratio * height) 89 | # new_width = int(img_min_side) 90 | # else: 91 | # ratio = img_min_side/height 92 | # new_width = int(ratio * width) 93 | # new_height = int(img_min_side) 94 | new_width = resized_width 95 | new_height = resized_height 96 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 97 | return img, ratio_w, ratio_h 98 | 99 | def format_img(img, C): 100 | """ formats an image for model prediction based on config """ 101 | img, ratio_w, ratio_h= format_img_size(img, C) 102 | img = format_img_channels(img, C) 103 | return img, ratio_w, ratio_h 104 | 105 | # Method to transform the coordinates of the bounding box to its original size 106 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 107 | 108 | real_x1 = int(round(x1 // ratio_w)) 109 | real_y1 = int(round(y1 // ratio_h)) 110 | real_x2 = int(round(x2 // ratio_w)) 111 | real_y2 = int(round(y2 // ratio_h)) 112 | 113 | return (real_x1, real_y1, real_x2 ,real_y2) 114 | 115 | class_mapping = C.class_mapping 116 | classes_count = class_mapping 117 | if 'bg' not in class_mapping: 118 | class_mapping['bg'] = len(class_mapping) 119 | 120 | class_mapping = {v: k for k, v in class_mapping.items()} 121 | # print(class_mapping) 122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 123 | C.num_rois = int(options.num_rois) 124 | # print "Num rois originally",C.num_rois 125 | if C.network == 'resnet50': 126 | num_features = 1024 127 | elif C.network == 'vgg': 128 | num_features = 512 129 | 130 | if K.image_dim_ordering() == 'th': 131 | input_shape_img = (3, None, None) 132 | # input_shape_features = (num_features, None, None) 133 | else: 134 | input_shape_img = (None, None, 3) 135 | 136 | shared_layers_input= Input(shape=( None,None,832)) 137 | roi_input = Input(shape=(None, 4)) 138 | vid_input = Input(shape =(None, None, None, 3)) 139 | vid_input_shape = (64, 400,320, 3) 140 | feature_map_input = Input(shape=(None, None,None,832)) 141 | 142 | rgb_model = Inception_Inflated3d( 143 | include_top=False, 144 | weights='rgb_kinetics_only', 145 | input_shape=vid_input_shape, 146 | classes=classes_count) 147 | def get_new_img_size(width, height, img_min_side, C): 148 | img_min_side =448 149 | if width <= height: 150 | f = float(img_min_side) / width 151 | resized_height = int(f * height) 152 | resized_width = img_min_side 153 | else: 154 | f = float(img_min_side) / height 155 | resized_width = int(f * width) 156 | resized_height = img_min_side 157 | 158 | if C.dataset == 'AVA': 159 | return resized_width, resized_height 160 | else: 161 | return 640, 480 162 | 163 | def extract_numpy_single_frame(img,C): 164 | 165 | img = (img/255.)*2 - 1 166 | return img 167 | 168 | def get_frame_idx(img_path): 169 | winSize = 64 170 | tags = img_path.split(os.path.sep) 171 | vid_folder = '/'+'/'.join(tags[1:-1]) 172 | frames = os.listdir(vid_folder) 173 | if 'CAD' in img_path: 174 | frames = [f for f in frames if f.startswith('RGB')] 175 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 176 | else: 177 | frames.sort(key = lambda x: int(x.split('.')[0])) 178 | frame_index = frames.index(tags[-1]) 179 | fi = get_frames_index(frames,frame_index,winSize) 180 | seq =[frames[k] if k!=-1 else k for k in fi] 181 | # print(seq[0],seq[31]) 182 | # print seq 183 | return seq 184 | optimizer_classifier = Adam(lr=1e-5) 185 | classifier = nn.classifier_i3d_batch(feature_map_input, roi_input, 1, nb_classes=len(classes_count), trainable=True) 186 | # model_classifier_only = Model([feature_map_input, roi_input], classifier) 187 | model_classifier = Model([feature_map_input, roi_input], classifier) 188 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 189 | 190 | 191 | 192 | model_name = os.path.join(options.model_name,'model.hdf5') 193 | print('Loading weights from {}'.format(model_name)) 194 | model_classifier.load_weights(model_name, by_name=True) 195 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 196 | 197 | 198 | all_imgs = [] 199 | classes = {} 200 | bbox_threshold = 0.7 201 | visualise = True 202 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 203 | df = pd.read_csv(f_val) 204 | # ac_id = get_action_dic() 205 | 206 | # for val_vid in val_vids: 207 | final_predictions = [] 208 | 209 | def get_batch(df, ind): 210 | rows = df.iloc[ind] 211 | # print rows 212 | roi_batch = [] 213 | vid_numpy_batch = [] 214 | for r in range(len(rows)): 215 | row = rows.iloc[r,:] 216 | # print row 217 | # print row[1] 218 | # bp() 219 | val_vid = row[0] 220 | vid_path = os.path.join(img_path,val_vid) 221 | img_name = str(int(row[1]))+'.jpg' 222 | k = 0 223 | k+=1 224 | 225 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 226 | continue 227 | st = time.time() 228 | filepath = os.path.join(vid_path,img_name) 229 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 230 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 231 | img = cv2.imread(filepath) 232 | # x_img = 233 | tags = filepath.split(os.path.sep) 234 | img_folder = '/'+'/'.join(tags[1:-1]) 235 | seq = get_frame_idx(filepath) 236 | 237 | # print filepath, seq 238 | vid_numpy = [] 239 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 240 | for frame in seq: 241 | if frame!=-1: 242 | fr_name = os.path.join(img_folder, frame) 243 | np_name = fr_name.replace('.jpg','.npy') 244 | np_name = np_name.replace('train/keyframes','numpy_arrays_val') 245 | # print np_name 246 | fr_npy = np.load(np_name) 247 | # fr_img = cv2.imread(fr_name) 248 | # fr_img = cv2.resize(fr_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 249 | # fr_npy = extract_numpy_single_frame(fr_img,C) 250 | vid_numpy.append(fr_npy) 251 | else: 252 | vid_numpy.append(np.zeros((resized_height,resized_width,3))) 253 | 254 | vid_numpy = np.array(vid_numpy) 255 | # vid_numpy = np.expand_dims(vid_numpy,axis=0) 256 | # print vid_numpy.shape 257 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 258 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 259 | w = x2-x1 260 | h = y2-y1 261 | roi = np.array([x1,y1,w,h]) 262 | rois = np.expand_dims(roi,axis=0) 263 | # rois = np.expand_dims(rois,axis=0) 264 | roi_batch.append(rois) 265 | vid_numpy_batch.append(vid_numpy) 266 | # rois = np.expand_dims(rois,axis=0) 267 | return np.array(roi_batch), np.array(vid_numpy_batch) 268 | 269 | indices = range(len(df)) 270 | print len(df) 271 | # bp() 272 | bs = 4 273 | for i in tqdm(range(0,len(df),bs)): 274 | 275 | try: 276 | ind = indices[i:i+bs] 277 | # row = df.iloc[i,:] 278 | # val_vid = row[0] 279 | # vid_path = os.path.join(img_path,val_vid) 280 | # img_name = str(int(row[1]))+'.jpg' 281 | # filepath = os.path.join(vid_path,img_name) 282 | # try: 283 | rois, vid_numpy = get_batch(df, ind) 284 | shared_layers_orig = rgb_model.predict(vid_numpy) 285 | 286 | # print rois, shared_layers_orig.shape 287 | y= model_classifier.predict([shared_layers_orig, rois]) 288 | # print y 289 | # print y.shape 290 | 291 | # except: 292 | # pass 293 | 294 | # seq_name = filepath.split(os.path.sep)[-2] 295 | # # bp() 296 | # # line = [seq_name,str(fr_num).zfill(4),str(float(row[2])),str(float(row[3])),str(float(row[4])),str(float(row[5])),P_cls] 297 | # # final_predictions.append(line) 298 | # # for cn in range(P_cls.shape[1]): 299 | # # class_num = cn 300 | # # prob = P_cls[0,cn] 301 | # # class_name = class_mapping[cn] 302 | # # if class_name!='bg': 303 | # # line = seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(ac_id[class_name])+','+str(prob) 304 | # # # print line 305 | # # f_predicted = open('evaluation/ava_predicted_cheating_subset_latest.csv','a+') 306 | # # f_predicted.write(line+'\n') 307 | # # f_predicted.close() 308 | # f_predicted = open(output_csv_file,'a+') 309 | # [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(81) if class_mapping[cn]!='bg'] 310 | # f_predicted.close() 311 | 312 | # if k==1: 313 | # break 314 | except Exception as e: 315 | exc_type, exc_obj, exc_tb = sys.exc_info() 316 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 317 | print(exc_type, fname, exc_tb.tb_lineno) 318 | print('Exception: {}'.format(e)) 319 | # print(filepath) 320 | continue 321 | -------------------------------------------------------------------------------- /train_frcnn_i3d_cls.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras1000 17 | from keras.utils import plot_model 18 | import os 19 | 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | sys.setrecursionlimit(40000) 39 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 40 | old_stdout = sys.stdout 41 | log_file = open("message.log","w") 42 | from i3d_inception import Inception_Inflated3d 43 | # from i3d_inception import Inception_Inflated3d 44 | # from tensorflow.python import keras 45 | from keras.utils import plot_model 46 | import os 47 | import pdb 48 | from keras.layers import Input 49 | import logging 50 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 51 | parser = OptionParser() 52 | 53 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 54 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 55 | default="pascal_voc") 56 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 57 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 58 | 59 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 60 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 61 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 62 | 63 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 64 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 65 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 66 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 67 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 68 | action="store_true", default=False) 69 | parser.add_option("--config_filename", dest="config_filename", help= 70 | "Location to store all the metadata related to the training (to be used when testing).", 71 | default="config.pickle") 72 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 73 | parser.add_option("--j", dest="job", help="If the job output should be saved") 74 | 75 | (options, args) = parser.parse_args() 76 | 77 | if not options.train_path: # if filename is not given 78 | parser.error('Error: path to training data must be specified. Pass --path to command line') 79 | 80 | if options.parser == 'pascal_voc': 81 | from keras_frcnn.pascal_voc_parser import get_data 82 | elif options.parser == 'simple': 83 | from keras_frcnn.simple_parser import get_data 84 | else: 85 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 86 | 87 | # pass the settings from the command line, and persist them in the config object 88 | C = config.Config() 89 | 90 | C.use_horizontal_flips = bool(options.horizontal_flips) 91 | C.use_vertical_flips = bool(options.vertical_flips) 92 | C.rot_90 = bool(options.rot_90) 93 | C.dataset = options.dataset 94 | C.augment = options.aug 95 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 96 | C.model_path = output_weight_path 97 | C.num_rois = int(options.num_rois) 98 | 99 | if options.network == 'vgg': 100 | C.network = 'vgg' 101 | from keras_frcnn import vgg as nn 102 | elif options.network == 'resnet50': 103 | from keras_frcnn import resnet as nn 104 | C.network = 'resnet50' 105 | else: 106 | print('Not a valid model') 107 | raise ValueError 108 | 109 | 110 | # check if weight path was passed via command line 111 | if options.input_weight_path: 112 | C.base_net_weights = options.input_weight_path 113 | else: 114 | # set the path to weights based on backend and model 115 | C.base_net_weights = nn.get_weight_path() 116 | 117 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 118 | 119 | job = options.job 120 | print(len(classes_count)) 121 | if job: 122 | sys.stdout = log_file 123 | 124 | 125 | if 'bg' not in classes_count: 126 | classes_count['bg'] = 0 127 | class_mapping['bg'] = len(class_mapping) 128 | 129 | C.class_mapping = class_mapping 130 | 131 | inv_map = {v: k for k, v in class_mapping.items()} 132 | 133 | print('Training images per class:') 134 | pprint.pprint(classes_count) 135 | print('Num classes (including bg) = {}'.format(len(classes_count))) 136 | 137 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 138 | 139 | with open(config_output_filename, 'wb') as config_f: 140 | pickle.dump(C,config_f) 141 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 142 | 143 | random.shuffle(all_imgs) 144 | 145 | num_imgs = len(all_imgs) 146 | 147 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 148 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 149 | 150 | print('Num train samples {}'.format(len(train_imgs))) 151 | print('Num val samples {}'.format(len(val_imgs))) 152 | 153 | 154 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 155 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 156 | 157 | roi_input = Input(shape=(None, 4),name = 'roi_input') 158 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 159 | vid_input_shape = (64, 400,320, 3) 160 | rgb_model = Inception_Inflated3d( 161 | include_top=False, 162 | weights='rgb_kinetics_only', 163 | input_shape=vid_input_shape, 164 | classes=classes_count) 165 | shared_layers_orig = rgb_model(vid_input) 166 | classifier = nn.classifier_i3d(shared_layers_orig, roi_input, 1, nb_classes=len(classes_count), trainable=True) 167 | model_classifier = Model([roi_input, vid_input], classifier) 168 | 169 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 170 | model_all = Model([vid_input, roi_input], classifier) 171 | plot_model(model_all, to_file='model_all_i3d_cls_latest.png', show_shapes = True) 172 | log_folder = os.path.join(options.output_weight_path,'logs/') 173 | if not os.path.isdir(log_folder): 174 | os.makedirs(log_folder) 175 | tensorboard = TensorBoard(log_dir=log_folder) 176 | tensorboard.set_model(model_classifier) 177 | train_names = ['train_loss', 'train_mae'] 178 | def write_log(callback, names, logs, batch_no): 179 | for name, value in zip(names, logs): 180 | summary = tf.Summary() 181 | summary_value = summary.value.add() 182 | summary_value.simple_value = value 183 | summary_value.tag = name 184 | callback.writer.add_summary(summary, batch_no) 185 | callback.writer.flush() 186 | 187 | optimizer = Adam(lr=1e-5) 188 | optimizer_classifier = Adam(lr=1e-5) 189 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls]) 190 | model_all.compile(optimizer='sgd', loss='mae') 191 | 192 | epoch_length = 1000 193 | epoch_length_val = 100 194 | num_epochs = int(options.num_epochs) 195 | iter_num = 0 196 | iter_num_tensorboard = 0 197 | total_cur_loss = [] 198 | total_cur_loss_val = [] 199 | iter_num_val_tensorboard = 0 200 | losses = np.zeros((epoch_length, 1)) 201 | losses_val = np.zeros((epoch_length_val, 1)) 202 | rpn_accuracy_rpn_monitor = [] 203 | rpn_accuracy_for_epoch = [] 204 | start_time = time.time() 205 | ###### val ##### 206 | rpn_accuracy_rpn_monitor_val = [] 207 | rpn_accuracy_for_epoch_val = [] 208 | 209 | ################ 210 | best_loss = np.Inf 211 | 212 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 213 | print('Starting training') 214 | 215 | vis = True 216 | 217 | for epoch_num in range(num_epochs): 218 | progbar = generic_utils.Progbar(epoch_length) 219 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 220 | while True: 221 | try: 222 | img_data, seq_numpy, x_img = next(data_gen_train) 223 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 224 | 225 | loss_class = model_classifier.train_on_batch([X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 226 | losses[iter_num, 0] = loss_class 227 | iter_num += 1 228 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 229 | iter_num_tensorboard+=1 230 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 231 | 232 | if iter_num == epoch_length: 233 | loss_class_cls = np.mean(losses[:, 0]) 234 | curr_loss =loss_class_cls 235 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 236 | total_cur_loss.append(curr_loss) 237 | iter_num = 0 238 | start_time = time.time() 239 | 240 | #################### Val ######################################################### 241 | iter_num_val = 0 242 | 243 | while True: 244 | # try: 245 | img_data, seq_numpy, x_img = next(data_gen_val) 246 | # print("validation") 247 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 248 | loss_class = model_classifier.train_on_batch([ X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 249 | losses_val[iter_num_val,0] = loss_class 250 | iter_num_val += 1 251 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 252 | iter_num_val_tensorboard+=1 253 | if iter_num_val == epoch_length_val: 254 | 255 | loss_class_cls = np.mean(losses_val[:, 0]) 256 | curr_loss_val = loss_class_cls 257 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 258 | total_cur_loss_val.append(curr_loss_val) 259 | # total_cur_loss.append(curr_loss) 260 | iter_num_val = 0 261 | break 262 | 263 | if curr_loss < best_loss: 264 | if C.verbose: 265 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 266 | best_loss = curr_loss 267 | model_all.save_weights(C.model_path) 268 | 269 | break 270 | 271 | except Exception as e: 272 | exc_type, exc_obj, exc_tb = sys.exc_info() 273 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 274 | print(exc_type, fname, exc_tb.tb_lineno) 275 | print('Exception: {}'.format(e)) 276 | # print(seq_numpy.shape) 277 | continue 278 | sys.stdout = old_stdout 279 | 280 | plt.plot(total_cur_loss) 281 | plt.plot(total_cur_loss_val) 282 | plt.legend(['train loss', 'val loss'], loc='upper left') 283 | 284 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 285 | plt.savefig(savefigure) 286 | print('Training complete, exiting.') 287 | log_file.close() 288 | -------------------------------------------------------------------------------- /test_final_noI3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | # from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | parser.add_option("-m", "--model_name", dest="model_name", 47 | help="Path to model.") 48 | parser.add_option("-t", "--type", dest="type", 49 | help="type.", default = 'v1') 50 | parser.add_option("-o", "--output", dest="output", 51 | help="csv to save predictions.") 52 | parser.add_option("--config_filename", dest="config_filename", help= 53 | "Location to read the metadata related to the training (generated when training).", 54 | default="config_subset_AVA.pickle") 55 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 56 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat') 57 | 58 | (options, args) = parser.parse_args() 59 | 60 | if not options.test_path: # if filename is not given 61 | parser.error('Error: path to test data must be specified. Pass --path to command line') 62 | 63 | 64 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 65 | config_output_filename = options.config_filename 66 | with open(config_output_filename, 'rb') as f_in: 67 | C = pickle.load(f_in) 68 | 69 | if C.network == 'resnet50': 70 | import keras_frcnn.resnet as nn 71 | elif C.network == 'vgg': 72 | import keras_frcnn.vgg as nn 73 | 74 | # turn off any data augmentation at test time 75 | C.use_horizontal_flips = False 76 | C.use_vertical_flips = False 77 | C.rot_90 = False 78 | resized_width = 320 79 | resized_height = 400 80 | version = options.version 81 | img_path = options.test_path 82 | output_csv_file = os.path.join('evaluation','8_actions',options.output) 83 | fc = open(output_csv_file,'w+') 84 | fc.close() 85 | def format_img_size(img, C): 86 | """ formats the image size based on config """ 87 | img_min_side = float(C.im_size) 88 | (height,width,_) = img.shape 89 | ratio_w = resized_width/width 90 | ratio_h = resized_height/height 91 | new_width = resized_width 92 | new_height = resized_height 93 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 94 | return img, ratio_w, ratio_h 95 | 96 | def format_img(img, C): 97 | """ formats an image for model prediction based on config """ 98 | img, ratio_w, ratio_h= format_img_size(img, C) 99 | img = format_img_channels(img, C) 100 | return img, ratio_w, ratio_h 101 | 102 | # Method to transform the coordinates of the bounding box to its original size 103 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 104 | 105 | real_x1 = int(round(x1 // ratio_w)) 106 | real_y1 = int(round(y1 // ratio_h)) 107 | real_x2 = int(round(x2 // ratio_w)) 108 | real_y2 = int(round(y2 // ratio_h)) 109 | 110 | return (real_x1, real_y1, real_x2 ,real_y2) 111 | 112 | class_mapping = C.class_mapping 113 | classes_count = class_mapping 114 | print len(class_mapping) 115 | type = options.type 116 | # bp() 117 | # if 'bg' not in class_mapping: 118 | # class_mapping['bg'] = len(class_mapping) 119 | 120 | class_mapping = {v: k for k, v in class_mapping.items()} 121 | # print(class_mapping) 122 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 123 | C.num_rois = int(options.num_rois) 124 | # print "Num rois originally",C.num_rois 125 | if C.network == 'resnet50': 126 | num_features = 1024 127 | elif C.network == 'vgg': 128 | num_features = 512 129 | 130 | if K.image_dim_ordering() == 'th': 131 | input_shape_img = (3, None, None) 132 | else: 133 | input_shape_img = (None, None, 3) 134 | 135 | # shared_layers_input= Input(shape=( None,None,None,832)) 136 | roi_input = Input(shape=(None, 4)) 137 | # vid_input = Input(shape =(None, None, None, 3)) 138 | # vid_input_shape = (64, 400,320, 3) 139 | img_input = Input(shape=(None, None, 3)) 140 | 141 | feature_map_input = Input(shape=( None,None,None,832)) 142 | shared_layers_image = nn.nn_base(img_input, trainable=False) 143 | 144 | 145 | # rgb_model = Inception_Inflated3d( 146 | # include_top=False, 147 | # weights='rgb_kinetics_only', 148 | # input_shape=vid_input_shape, 149 | # classes=classes_count) 150 | 151 | def extract_numpy_single_frame(img,C): 152 | 153 | img = (img/255.)*2 - 1 154 | return img 155 | 156 | def get_frame_idx(img_path): 157 | winSize = 64 158 | tags = img_path.split(os.path.sep) 159 | vid_folder = '/'+'/'.join(tags[1:-1]) 160 | frames = os.listdir(vid_folder) 161 | if 'CAD' in img_path: 162 | frames = [f for f in frames if f.startswith('RGB')] 163 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 164 | else: 165 | frames.sort(key = lambda x: int(x.split('.')[0])) 166 | frame_index = frames.index(tags[-1]) 167 | fi = get_frames_index(frames,frame_index,winSize) 168 | seq =[frames[k] if k!=-1 else k for k in fi] 169 | # print(seq[0],seq[31]) 170 | # print seq 171 | return seq 172 | optimizer_classifier = Adam(lr=1e-5) 173 | print len(classes_count) 174 | 175 | if version == 'concat' and type == 'v1': 176 | classifier = classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=8, trainable=True) 177 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 178 | elif version == 'whole' : 179 | classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version) 180 | model_classifier = Model([feature_map_input], classifier) 181 | elif version == 'concat' and type == 'v2': 182 | classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True) 183 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 184 | 185 | elif version == 'roi': 186 | classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count)) 187 | model_classifier = Model([roi_input, feature_map_input], classifier) 188 | 189 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 190 | # model_all = Model([i3d_features, roi_input], classifier) 191 | # elif version == 'whole' and type == 'v2': 192 | # classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version) 193 | # model_classifier = Model([feature_map_input], classifier) 194 | 195 | 196 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 197 | model_name = os.path.join(options.model_name,'model.hdf5') 198 | print('Loading weights from {}'.format(model_name)) 199 | model_classifier.load_weights(model_name, by_name=True) 200 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 201 | 202 | all_imgs = [] 203 | classes = {} 204 | bbox_threshold = 0.7 205 | visualise = True 206 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 207 | df = pd.read_csv(f_val) 208 | final_predictions = [] 209 | 210 | indices = range(len(df)) 211 | print len(df) 212 | # bp() 213 | bs = 8 214 | for i in tqdm(range(len(df))): 215 | 216 | try: 217 | # ind = indices[i:i+bs] 218 | row = df.iloc[i,:] 219 | val_vid = row[0] 220 | vid_path = os.path.join(img_path,val_vid) 221 | img_name = str(int(row[1]))+'.jpg' 222 | 223 | filepath = os.path.join(vid_path,img_name) 224 | fp = filepath.replace('keyframes','numpy_8_actions') 225 | fp = fp.replace('.jpg','.npy') 226 | val_vid = row[0] 227 | vid_path = os.path.join(img_path,val_vid) 228 | img_name = str(int(row[1]))+'.jpg' 229 | k = 0 230 | k+=1 231 | 232 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 233 | continue 234 | st = time.time() 235 | filepath = os.path.join(vid_path,img_name) 236 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 237 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 238 | img = cv2.imread(filepath) 239 | # x_img = 240 | tags = filepath.split(os.path.sep) 241 | img_folder = '/'+'/'.join(tags[1:-1]) 242 | shared_layers_orig = np.load(fp) 243 | # seq = get_frame_idx(filepath) 244 | # 245 | # # print filepath, seq 246 | # vid_numpy = [] 247 | x_img = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 248 | x_img = np.expand_dims(x_img, axis = 0) 249 | # # print filepath, seq 250 | # # bp() 251 | # for frame in seq: 252 | # if frame!=-1: 253 | # fr_name = os.path.join(img_folder, frame) 254 | # np_name = fr_name.replace('.jpg','.npy') 255 | # np_name = np_name.replace('train/keyframes','numpy_arrays_val') 256 | # # print np_name 257 | # fr_npy = np.load(np_name) 258 | # vid_numpy.append(fr_npy) 259 | # else: 260 | # vid_numpy.append(np.zeros((resized_height,resized_width,3))) 261 | # 262 | # vid_numpy = np.array(vid_numpy) 263 | # vid_numpy = np.expand_dims(vid_numpy,axis=0) 264 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 265 | # shared_layers_orig = rgb_model.predict(vid_numpy) 266 | 267 | # print vid_numpy.shape 268 | if version == 'concat': 269 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 270 | w = x2-x1 271 | h = y2-y1 272 | roi = np.array([x1,y1,w,h]) 273 | rois = np.expand_dims(roi,axis=0) 274 | rois = np.expand_dims(rois,axis=0) 275 | # try: 276 | # rois, vid_numpy = get_batch(df, ind) 277 | # print rois, shared_layers_orig.shape 278 | [P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois]) 279 | elif version == 'whole': 280 | [P_cls]= model_classifier.predict([shared_layers_orig]) 281 | 282 | elif version == 'roi': 283 | w = x2-x1 284 | h = y2-y1 285 | roi = np.array([x1,y1,w,h]) 286 | rois = np.expand_dims(roi,axis=0) 287 | rois = np.expand_dims(rois,axis=0) 288 | [P_cls]= model_classifier.predict([rois, shared_layers_orig]) 289 | 290 | seq_name = filepath.split(os.path.sep)[-2] 291 | # print(P_cls) 292 | # bp() 293 | # print output_csv_file 294 | # print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n') 295 | et = time.time()-st 296 | # print "The per frame time is {}".format(et) 297 | with open(output_csv_file,'a+') as f_predicted: 298 | [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg'] 299 | 300 | # if k==1: 301 | # break 302 | except Exception as e: 303 | exc_type, exc_obj, exc_tb = sys.exc_info() 304 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 305 | print(exc_type, fname, exc_tb.tb_lineno) 306 | print('Exception: {}'.format(e)) 307 | print(filepath) 308 | continue 309 | -------------------------------------------------------------------------------- /train_frcnn_i3d_batch.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | from keras.utils.training_utils import multi_gpu_model 37 | 38 | 39 | # import tensorflow as tf 40 | # import keras.backend.tensorflow_backend as ktf 41 | 42 | sys.setrecursionlimit(40000) 43 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 44 | # def get_session(gpu_fraction=0.333): 45 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 46 | # allow_growth=True) 47 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 48 | # ktf.set_session(get_session()) 49 | 50 | 51 | old_stdout = sys.stdout 52 | 53 | log_file = open("message.log","w") 54 | 55 | 56 | from i3d_inception import Inception_Inflated3d 57 | # from i3d_inception import Inception_Inflated3d 58 | # from tensorflow.python import keras 59 | from keras.utils import plot_model 60 | import os 61 | import pdb 62 | from keras.layers import Input 63 | import logging 64 | from pdb import set_trace as bp 65 | 66 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 67 | parser = OptionParser() 68 | 69 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 70 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 71 | default="pascal_voc") 72 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 73 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 74 | 75 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 76 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 77 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 78 | 79 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 80 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 81 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 82 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 83 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 84 | action="store_true", default=False) 85 | parser.add_option("--config_filename", dest="config_filename", help= 86 | "Location to store all the metadata related to the training (to be used when testing).", 87 | default="config.pickle") 88 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 89 | parser.add_option("--j", dest="job", help="If the job output should be saved") 90 | 91 | (options, args) = parser.parse_args() 92 | 93 | if not options.train_path: # if filename is not given 94 | parser.error('Error: path to training data must be specified. Pass --path to command line') 95 | 96 | if options.parser == 'pascal_voc': 97 | from keras_frcnn.pascal_voc_parser import get_data 98 | elif options.parser == 'simple': 99 | from keras_frcnn.simple_parser import get_data 100 | else: 101 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 102 | 103 | # pass the settings from the command line, and persist them in the config object 104 | C = config.Config() 105 | 106 | C.use_horizontal_flips = bool(options.horizontal_flips) 107 | C.use_vertical_flips = bool(options.vertical_flips) 108 | C.rot_90 = bool(options.rot_90) 109 | C.dataset = options.dataset 110 | C.augment = options.aug 111 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 112 | C.model_path = output_weight_path 113 | C.num_rois = int(options.num_rois) 114 | C.bs =16 115 | if options.network == 'vgg': 116 | C.network = 'vgg' 117 | from keras_frcnn import vgg as nn 118 | elif options.network == 'resnet50': 119 | from keras_frcnn import resnet as nn 120 | C.network = 'resnet50' 121 | else: 122 | print('Not a valid model') 123 | raise ValueError 124 | 125 | 126 | # check if weight path was passed via command line 127 | if options.input_weight_path: 128 | C.base_net_weights = options.input_weight_path 129 | else: 130 | # set the path to weights based on backend and model 131 | C.base_net_weights = nn.get_weight_path() 132 | 133 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 134 | 135 | job = options.job 136 | print(len(classes_count)) 137 | if job: 138 | sys.stdout = log_file 139 | 140 | 141 | if 'bg' not in classes_count: 142 | classes_count['bg'] = 0 143 | class_mapping['bg'] = len(class_mapping) 144 | 145 | C.class_mapping = class_mapping 146 | 147 | inv_map = {v: k for k, v in class_mapping.items()} 148 | 149 | print('Training images per class:') 150 | pprint.pprint(classes_count) 151 | print('Num classes (including bg) = {}'.format(len(classes_count))) 152 | 153 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 154 | 155 | with open(config_output_filename, 'wb') as config_f: 156 | pickle.dump(C,config_f) 157 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 158 | 159 | random.shuffle(all_imgs) 160 | 161 | num_imgs = len(all_imgs) 162 | 163 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 164 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 165 | 166 | print('Num train samples {}'.format(len(train_imgs))) 167 | print('Num val samples {}'.format(len(val_imgs))) 168 | 169 | indices = range(len(train_imgs)) 170 | val_indices = range(len(val_imgs)) 171 | data_gen_train = data_generators.get_anchor_gt_batch(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(),indices, mode='train') 172 | data_gen_val = data_generators.get_anchor_gt_batch_val(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), val_indices,mode='val') 173 | 174 | roi_input = Input(shape=(None, 4),name = 'roi_input') 175 | vid_input = Input(shape =(64, 400, 320, 3),name = 'vid_input') 176 | vid_input_shape = (64, 400,320, 3) 177 | rgb_model = Inception_Inflated3d( 178 | include_top=False, 179 | weights='rgb_kinetics_only', 180 | input_shape=vid_input_shape, 181 | classes=classes_count) 182 | 183 | # shared_layers_image = nn.nn_base(img_input, trainable=True) 184 | shared_layers_orig = rgb_model(vid_input) 185 | print shared_layers_orig.shape.as_list() 186 | # bp() 187 | def slice_tensor(shared_layers): 188 | 189 | feature_shape = shared_layers.shape.as_list() 190 | shared_layers = shared_layers[:,8,:,:,:] 191 | return shared_layers 192 | 193 | 194 | shared_layers = Lambda(slice_tensor)(shared_layers_orig) 195 | classifier = nn.classifier_i3d_batch(shared_layers_orig, roi_input, 1, nb_classes=len(classes_count), trainable=True) 196 | # model_rpn = Model(vid_input, rpn[:2]) 197 | model_classifier = Model([roi_input, vid_input], classifier) 198 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 199 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 200 | model_all = Model([vid_input, roi_input], classifier) 201 | 202 | # model_all = multi_gpu_model(model_all, gpus=2) 203 | plot_model(model_all, to_file='model_all_i3d_cls_batch.png', show_shapes = True) 204 | log_folder = os.path.join(options.output_weight_path,'logs/') 205 | if not os.path.isdir(log_folder): 206 | os.makedirs(log_folder) 207 | tensorboard = TensorBoard(log_dir=log_folder) 208 | tensorboard.set_model(model_classifier) 209 | train_names = ['train_loss', 'train_mae'] 210 | def write_log(callback, names, logs, batch_no): 211 | for name, value in zip(names, logs): 212 | summary = tf.Summary() 213 | summary_value = summary.value.add() 214 | summary_value.simple_value = value 215 | summary_value.tag = name 216 | callback.writer.add_summary(summary, batch_no) 217 | callback.writer.flush() 218 | 219 | optimizer = Adam(lr=1e-5) 220 | optimizer_classifier = Adam(lr=1e-5) 221 | if C.dataset == 'AVA': 222 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 223 | else: 224 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls]) 225 | 226 | model_all.compile(optimizer='sgd', loss='mae') 227 | 228 | 229 | 230 | epoch_length = 1000 231 | epoch_length_val = 100 232 | num_epochs = int(options.num_epochs) 233 | iter_num = 0 234 | iter_num_tensorboard = 0 235 | total_cur_loss = [] 236 | total_cur_loss_val = [] 237 | iter_num_val_tensorboard = 0 238 | losses = np.zeros((epoch_length, 1)) 239 | losses_val = np.zeros((epoch_length_val, 1)) 240 | rpn_accuracy_rpn_monitor = [] 241 | rpn_accuracy_for_epoch = [] 242 | start_time = time.time() 243 | ###### val ##### 244 | rpn_accuracy_rpn_monitor_val = [] 245 | rpn_accuracy_for_epoch_val = [] 246 | 247 | ################ 248 | best_loss = np.Inf 249 | 250 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 251 | print('Starting training') 252 | 253 | vis = True 254 | 255 | for epoch_num in range(num_epochs): 256 | 257 | progbar = generic_utils.Progbar(epoch_length) 258 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 259 | 260 | while True: 261 | try: 262 | img_data, seq_numpy, x_img = next(data_gen_train) 263 | print(seq_numpy.shape, x_img.shape) 264 | # print type(img_data) 265 | X2, Y1 = roi_helpers.calc_iou_batch(img_data, C, class_mapping) 266 | print(X2.shape, Y1.shape) 267 | # bp() 268 | loss_class = model_classifier.train_on_batch([X2[ :, :,:],seq_numpy], [Y1[:, :, :]]) 269 | # losses[iter_num, 0] = loss_class 270 | # 271 | iter_num += 1 272 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 273 | iter_num_tensorboard+=1 274 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 275 | 276 | if iter_num == epoch_length: 277 | loss_class_cls = np.mean(losses[:, 0]) 278 | curr_loss =loss_class_cls 279 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 280 | total_cur_loss.append(curr_loss) 281 | iter_num = 0 282 | start_time = time.time() 283 | 284 | #################### Val ######################################################### 285 | iter_num_val = 0 286 | 287 | while True: 288 | # try: 289 | img_data, seq_numpy, x_img = next(data_gen_val) 290 | # print("validation") 291 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 292 | loss_class = model_classifier.train_on_batch([ X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 293 | losses_val[iter_num_val,0] = loss_class 294 | iter_num_val += 1 295 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 296 | iter_num_val_tensorboard+=1 297 | if iter_num_val == epoch_length_val: 298 | 299 | loss_class_cls = np.mean(losses_val[:, 0]) 300 | curr_loss_val = loss_class_cls 301 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 302 | total_cur_loss_val.append(curr_loss_val) 303 | # total_cur_loss.append(curr_loss) 304 | iter_num_val = 0 305 | break 306 | 307 | if curr_loss < best_loss: 308 | if C.verbose: 309 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 310 | best_loss = curr_loss 311 | model_all.save_weights(C.model_path) 312 | 313 | break 314 | 315 | except Exception as e: 316 | exc_type, exc_obj, exc_tb = sys.exc_info() 317 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 318 | print(exc_type, fname, exc_tb.tb_lineno) 319 | print('Exception: {}'.format(e)) 320 | # print(seq_numpy.shape) 321 | continue 322 | sys.stdout = old_stdout 323 | 324 | plt.plot(total_cur_loss) 325 | plt.plot(total_cur_loss_val) 326 | plt.legend(['train loss', 'val loss'], loc='upper left') 327 | 328 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 329 | plt.savefig(savefigure) 330 | print('Training complete, exiting.') 331 | log_file.close() 332 | -------------------------------------------------------------------------------- /train_whole_noI3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | parser.add_option("-v", "--version",dest="version", help="Output path for weights.", default='v1') 74 | 75 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 76 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 77 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 79 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 80 | action="store_true", default=False) 81 | parser.add_option("--config_filename", dest="config_filename", help= 82 | "Location to store all the metadata related to the training (to be used when testing).", 83 | default="config.pickle") 84 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 85 | parser.add_option("--j", dest="job", help="If the job output should be saved") 86 | 87 | (options, args) = parser.parse_args() 88 | 89 | if not options.train_path: # if filename is not given 90 | parser.error('Error: path to training data must be specified. Pass --path to command line') 91 | 92 | if options.parser == 'pascal_voc': 93 | from keras_frcnn.pascal_voc_parser import get_data 94 | elif options.parser == 'simple': 95 | from keras_frcnn.simple_parser import get_data 96 | else: 97 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 98 | 99 | # pass the settings from the command line, and persist them in the config object 100 | C = config.Config() 101 | version = options.version 102 | C.use_horizontal_flips = bool(options.horizontal_flips) 103 | C.use_vertical_flips = bool(options.vertical_flips) 104 | C.rot_90 = bool(options.rot_90) 105 | C.dataset = options.dataset 106 | C.augment = options.aug 107 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 108 | C.model_path = output_weight_path 109 | C.num_rois = int(options.num_rois) 110 | 111 | if options.network == 'vgg': 112 | C.network = 'vgg' 113 | from keras_frcnn import vgg as nn 114 | elif options.network == 'resnet50': 115 | from keras_frcnn import resnet as nn 116 | C.network = 'resnet50' 117 | else: 118 | print('Not a valid model') 119 | raise ValueError 120 | 121 | 122 | # check if weight path was passed via command line 123 | if options.input_weight_path: 124 | C.base_net_weights = options.input_weight_path 125 | else: 126 | # set the path to weights based on backend and model 127 | C.base_net_weights = nn.get_weight_path() 128 | 129 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 130 | 131 | job = options.job 132 | print(len(classes_count)), len(class_mapping) 133 | if job: 134 | sys.stdout = log_file 135 | 136 | 137 | # if 'bg' not in classes_count: 138 | # classes_count['bg'] = 0 139 | # class_mapping['bg'] = len(class_mapping) 140 | 141 | C.class_mapping = class_mapping 142 | 143 | inv_map = {v: k for k, v in class_mapping.items()} 144 | 145 | print('Training images per class:') 146 | pprint.pprint(classes_count) 147 | print('Num classes (including bg) = {}'.format(len(classes_count))) 148 | 149 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 150 | 151 | with open(config_output_filename, 'wb') as config_f: 152 | pickle.dump(C,config_f) 153 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 154 | 155 | random.shuffle(all_imgs) 156 | 157 | num_imgs = len(all_imgs) 158 | 159 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 160 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 161 | 162 | print('Num train samples {}'.format(len(train_imgs))) 163 | print('Num val samples {}'.format(len(val_imgs))) 164 | 165 | 166 | data_gen_train = data_generators.get_i3d_feature(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 167 | data_gen_val = data_generators.get_i3d_feature(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 168 | 169 | 170 | def get_action_dic(): 171 | 172 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 173 | ac_dic = {} 174 | f = open(action_csv,'r') 175 | actions = f.read().splitlines() 176 | for action in actions[1:]: 177 | tags = action.split(',') 178 | tags = tags[:-1] 179 | ac_id = int(tags[0]) 180 | ac = ''.join(tags[1:]) 181 | if '"' in ac: 182 | ac =ac.replace('"','') 183 | # if ',' in ac: 184 | # ac = ''.join(ac.split(',')) 185 | 186 | ac_dic[ac_id] = ac 187 | if ac_id == 1: 188 | print ac 189 | return ac_dic 190 | 191 | ac_id = get_action_dic() 192 | print len(class_mapping) 193 | num_classes = len(class_mapping) 194 | # if C.dataset == 'AVA': 195 | shared_layers_orig = Input(shape=(None,None,None,832), name = 'shared_layers_orig') 196 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True, version=version) 197 | 198 | model_classifier = Model([shared_layers_orig], classifier) 199 | 200 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 201 | model_all = Model([shared_layers_orig], classifier) 202 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True) 203 | log_folder = os.path.join(options.output_weight_path,'logs/') 204 | if not os.path.isdir(log_folder): 205 | os.makedirs(log_folder) 206 | tensorboard = TensorBoard(log_dir=log_folder) 207 | tensorboard.set_model(model_classifier) 208 | train_names = ['train_loss', 'train_mae'] 209 | def write_log(callback, names, logs, batch_no): 210 | for name, value in zip(names, logs): 211 | summary = tf.Summary() 212 | summary_value = summary.value.add() 213 | summary_value.simple_value = value 214 | summary_value.tag = name 215 | callback.writer.add_summary(summary, batch_no) 216 | callback.writer.flush() 217 | 218 | optimizer = Adam(lr=1e-5) 219 | optimizer_classifier = Adam(lr=1e-5) 220 | 221 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 222 | 223 | model_all.compile(optimizer='sgd', loss='mae') 224 | 225 | epoch_length = 1000 226 | epoch_length_val =100 227 | num_epochs = int(options.num_epochs) 228 | iter_num = 0 229 | iter_num_tensorboard = 0 230 | total_cur_loss = [] 231 | total_cur_loss_val = [] 232 | iter_num_val_tensorboard = 0 233 | losses = np.zeros((epoch_length, 1)) 234 | losses_val = np.zeros((epoch_length_val, 1)) 235 | rpn_accuracy_rpn_monitor = [] 236 | rpn_accuracy_for_epoch = [] 237 | start_time = time.time() 238 | ###### val ##### 239 | rpn_accuracy_rpn_monitor_val = [] 240 | rpn_accuracy_for_epoch_val = [] 241 | 242 | ################ 243 | best_loss = np.Inf 244 | 245 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 246 | print('Starting training') 247 | # os.makedirs('check_dataset') 248 | vis = True 249 | 250 | for epoch_num in range(num_epochs): 251 | 252 | progbar = generic_utils.Progbar(epoch_length) 253 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 254 | num = 0 255 | while True: 256 | try: 257 | img_data, i3d_f, x_img = next(data_gen_train) 258 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 259 | # print X2, Y1 260 | # x1= (X2[0][0][0]) 261 | # y1 = (X2[0][0][1]) 262 | # x2 = (x1 + X2[0][0][2]) 263 | # y2 = (y1 +X2[0][0][3]) 264 | # x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16 265 | # # print x1, y1, x2, y2 266 | # # if x1>320 or x2>320 or y1>400 or y2>400: 267 | # # print "yes" 268 | # im_temp =cv2.imread(img_data['filepath']) 269 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 270 | # # print im_temp.shape 271 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 272 | # font = cv2.FONT_HERSHEY_SIMPLEX 273 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 274 | # print cl 275 | # ind = cl[0] 276 | # ac = ac_id[int(class_mapping_inv[ind])] 277 | # 278 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 279 | 280 | 281 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 282 | 283 | num+=1 284 | 285 | # img_features = 286 | 287 | loss_class = model_classifier.train_on_batch([i3d_f], [Y1[:, :, :]]) 288 | losses[iter_num, 0] = loss_class 289 | 290 | iter_num += 1 291 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 292 | iter_num_tensorboard+=1 293 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 294 | 295 | if iter_num == epoch_length: 296 | loss_class_cls = np.mean(losses[:, 0]) 297 | curr_loss =loss_class_cls 298 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 299 | total_cur_loss.append(curr_loss) 300 | iter_num = 0 301 | start_time = time.time() 302 | 303 | #################### Val ######################################################### 304 | iter_num_val = 0 305 | 306 | while True: 307 | # try: 308 | img_data, seq_numpy, x_img = next(data_gen_val) 309 | # print("validation") 310 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 311 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 312 | losses_val[iter_num_val,0] = loss_class 313 | iter_num_val += 1 314 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 315 | iter_num_val_tensorboard+=1 316 | if iter_num_val == epoch_length_val: 317 | 318 | loss_class_cls = np.mean(losses_val[:, 0]) 319 | curr_loss_val = loss_class_cls 320 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 321 | total_cur_loss_val.append(curr_loss_val) 322 | # total_cur_loss.append(curr_loss) 323 | iter_num_val = 0 324 | break 325 | 326 | if curr_loss < best_loss: 327 | if C.verbose: 328 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 329 | best_loss = curr_loss 330 | model_all.save_weights(C.model_path) 331 | 332 | break 333 | 334 | except Exception as e: 335 | exc_type, exc_obj, exc_tb = sys.exc_info() 336 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 337 | print(exc_type, fname, exc_tb.tb_lineno) 338 | print('Exception: {}'.format(e)) 339 | # print(seq_numpy.shape) 340 | continue 341 | sys.stdout = old_stdout 342 | 343 | plt.plot(total_cur_loss) 344 | plt.plot(total_cur_loss_val) 345 | plt.legend(['train loss', 'val loss'], loc='upper left') 346 | 347 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 348 | plt.savefig(savefigure) 349 | print('Training complete, exiting.') 350 | log_file.close() 351 | -------------------------------------------------------------------------------- /train_frcnn_i3d_whole.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 79 | action="store_true", default=False) 80 | parser.add_option("--config_filename", dest="config_filename", help= 81 | "Location to store all the metadata related to the training (to be used when testing).", 82 | default="config.pickle") 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 84 | parser.add_option("--j", dest="job", help="If the job output should be saved") 85 | 86 | (options, args) = parser.parse_args() 87 | 88 | if not options.train_path: # if filename is not given 89 | parser.error('Error: path to training data must be specified. Pass --path to command line') 90 | 91 | if options.parser == 'pascal_voc': 92 | from keras_frcnn.pascal_voc_parser import get_data 93 | elif options.parser == 'simple': 94 | from keras_frcnn.simple_parser import get_data 95 | else: 96 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 97 | 98 | # pass the settings from the command line, and persist them in the config object 99 | C = config.Config() 100 | 101 | C.use_horizontal_flips = bool(options.horizontal_flips) 102 | C.use_vertical_flips = bool(options.vertical_flips) 103 | C.rot_90 = bool(options.rot_90) 104 | C.dataset = options.dataset 105 | C.augment = options.aug 106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 107 | C.model_path = output_weight_path 108 | C.num_rois = int(options.num_rois) 109 | 110 | if options.network == 'vgg': 111 | C.network = 'vgg' 112 | from keras_frcnn import vgg as nn 113 | elif options.network == 'resnet50': 114 | from keras_frcnn import resnet as nn 115 | C.network = 'resnet50' 116 | else: 117 | print('Not a valid model') 118 | raise ValueError 119 | 120 | 121 | # check if weight path was passed via command line 122 | if options.input_weight_path: 123 | C.base_net_weights = options.input_weight_path 124 | else: 125 | # set the path to weights based on backend and model 126 | C.base_net_weights = nn.get_weight_path() 127 | 128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 129 | 130 | job = options.job 131 | print(len(classes_count)), len(class_mapping) 132 | if job: 133 | sys.stdout = log_file 134 | 135 | 136 | # if 'bg' not in classes_count: 137 | # classes_count['bg'] = 0 138 | # class_mapping['bg'] = len(class_mapping) 139 | 140 | C.class_mapping = class_mapping 141 | 142 | inv_map = {v: k for k, v in class_mapping.items()} 143 | 144 | print('Training images per class:') 145 | pprint.pprint(classes_count) 146 | print('Num classes (including bg) = {}'.format(len(classes_count))) 147 | 148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 149 | 150 | with open(config_output_filename, 'wb') as config_f: 151 | pickle.dump(C,config_f) 152 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 153 | 154 | random.shuffle(all_imgs) 155 | 156 | num_imgs = len(all_imgs) 157 | 158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 160 | 161 | print('Num train samples {}'.format(len(train_imgs))) 162 | print('Num val samples {}'.format(len(val_imgs))) 163 | 164 | 165 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 166 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 167 | 168 | roi_input = Input(shape=(None, 4),name = 'roi_input') 169 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 170 | img_input = Input(shape=(None, None, 3), name = 'img_input') 171 | vid_input_shape = (64, 400,320, 3) 172 | rgb_model = Inception_Inflated3d( 173 | include_top=False, 174 | weights='rgb_kinetics_only', 175 | input_shape=vid_input_shape, 176 | classes=classes_count) 177 | roi_input = Input(shape=(None, 4),name = 'roi_input') 178 | shared_layers_image = nn.nn_base(img_input, trainable=True) 179 | shared_layers_orig = rgb_model(vid_input) 180 | def slice_tensor(shared_layers): 181 | 182 | feature_shape = shared_layers.shape.as_list() 183 | shared_layers = shared_layers[:,8,:,:,:] 184 | return shared_layers 185 | 186 | def get_action_dic(): 187 | 188 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 189 | ac_dic = {} 190 | f = open(action_csv,'r') 191 | actions = f.read().splitlines() 192 | for action in actions[1:]: 193 | tags = action.split(',') 194 | tags = tags[:-1] 195 | ac_id = int(tags[0]) 196 | ac = ''.join(tags[1:]) 197 | if '"' in ac: 198 | ac =ac.replace('"','') 199 | # if ',' in ac: 200 | # ac = ''.join(ac.split(',')) 201 | 202 | ac_dic[ac_id] = ac 203 | if ac_id == 1: 204 | print ac 205 | return ac_dic 206 | 207 | ac_id = get_action_dic() 208 | shared_layers = Lambda(slice_tensor)(shared_layers_orig) 209 | print len(class_mapping) 210 | num_classes = len(class_mapping) 211 | # if C.dataset == 'AVA': 212 | classifier = nn.classifier_i3d_concat(shared_layers_orig, 1, nb_classes=num_classes, trainable=True) 213 | 214 | model_classifier = Model([vid_input], classifier) 215 | 216 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 217 | model_all = Model([vid_input], classifier) 218 | plot_model(model_all, to_file='model_all_i3d_whole.png', show_shapes = True) 219 | log_folder = os.path.join(options.output_weight_path,'logs/') 220 | if not os.path.isdir(log_folder): 221 | os.makedirs(log_folder) 222 | tensorboard = TensorBoard(log_dir=log_folder) 223 | tensorboard.set_model(model_classifier) 224 | train_names = ['train_loss', 'train_mae'] 225 | def write_log(callback, names, logs, batch_no): 226 | for name, value in zip(names, logs): 227 | summary = tf.Summary() 228 | summary_value = summary.value.add() 229 | summary_value.simple_value = value 230 | summary_value.tag = name 231 | callback.writer.add_summary(summary, batch_no) 232 | callback.writer.flush() 233 | 234 | optimizer = Adam(lr=1e-5) 235 | optimizer_classifier = Adam(lr=1e-5) 236 | 237 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 238 | 239 | model_all.compile(optimizer='sgd', loss='mae') 240 | 241 | epoch_length = 1000 242 | epoch_length_val =100 243 | num_epochs = int(options.num_epochs) 244 | iter_num = 0 245 | iter_num_tensorboard = 0 246 | total_cur_loss = [] 247 | total_cur_loss_val = [] 248 | iter_num_val_tensorboard = 0 249 | losses = np.zeros((epoch_length, 1)) 250 | losses_val = np.zeros((epoch_length_val, 1)) 251 | rpn_accuracy_rpn_monitor = [] 252 | rpn_accuracy_for_epoch = [] 253 | start_time = time.time() 254 | ###### val ##### 255 | rpn_accuracy_rpn_monitor_val = [] 256 | rpn_accuracy_for_epoch_val = [] 257 | 258 | ################ 259 | best_loss = np.Inf 260 | 261 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 262 | print('Starting training') 263 | # os.makedirs('check_dataset') 264 | vis = True 265 | 266 | for epoch_num in range(num_epochs): 267 | 268 | progbar = generic_utils.Progbar(epoch_length) 269 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 270 | num = 0 271 | while True: 272 | try: 273 | img_data, seq_numpy, x_img = next(data_gen_train) 274 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 275 | # print X2, Y1 276 | # x1= (X2[0][0][0]) 277 | # y1 = (X2[0][0][1]) 278 | # x2 = (x1 + X2[0][0][2]) 279 | # y2 = (y1 +X2[0][0][3]) 280 | # x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16 281 | # # print x1, y1, x2, y2 282 | # # if x1>320 or x2>320 or y1>400 or y2>400: 283 | # # print "yes" 284 | # im_temp =cv2.imread(img_data['filepath']) 285 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 286 | # # print im_temp.shape 287 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 288 | # font = cv2.FONT_HERSHEY_SIMPLEX 289 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 290 | # print cl 291 | # ind = cl[0] 292 | # ac = ac_id[int(class_mapping_inv[ind])] 293 | # 294 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 295 | 296 | 297 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 298 | 299 | num+=1 300 | 301 | # img_features = 302 | 303 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 304 | losses[iter_num, 0] = loss_class 305 | 306 | iter_num += 1 307 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 308 | iter_num_tensorboard+=1 309 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 310 | 311 | if iter_num == epoch_length: 312 | loss_class_cls = np.mean(losses[:, 0]) 313 | curr_loss =loss_class_cls 314 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 315 | total_cur_loss.append(curr_loss) 316 | iter_num = 0 317 | start_time = time.time() 318 | 319 | #################### Val ######################################################### 320 | iter_num_val = 0 321 | 322 | while True: 323 | # try: 324 | img_data, seq_numpy, x_img = next(data_gen_val) 325 | # print("validation") 326 | Y1 = roi_helpers.calc_label(img_data, C, class_mapping) 327 | loss_class = model_classifier.train_on_batch([seq_numpy], [Y1[:, :, :]]) 328 | losses_val[iter_num_val,0] = loss_class 329 | iter_num_val += 1 330 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 331 | iter_num_val_tensorboard+=1 332 | if iter_num_val == epoch_length_val: 333 | 334 | loss_class_cls = np.mean(losses_val[:, 0]) 335 | curr_loss_val = loss_class_cls 336 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 337 | total_cur_loss_val.append(curr_loss_val) 338 | # total_cur_loss.append(curr_loss) 339 | iter_num_val = 0 340 | break 341 | 342 | if curr_loss < best_loss: 343 | if C.verbose: 344 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 345 | best_loss = curr_loss 346 | model_all.save_weights(C.model_path) 347 | 348 | break 349 | 350 | except Exception as e: 351 | exc_type, exc_obj, exc_tb = sys.exc_info() 352 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 353 | print(exc_type, fname, exc_tb.tb_lineno) 354 | print('Exception: {}'.format(e)) 355 | # print(seq_numpy.shape) 356 | continue 357 | sys.stdout = old_stdout 358 | 359 | plt.plot(total_cur_loss) 360 | plt.plot(total_cur_loss_val) 361 | plt.legend(['train loss', 'val loss'], loc='upper left') 362 | 363 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 364 | plt.savefig(savefigure) 365 | print('Training complete, exiting.') 366 | log_file.close() 367 | -------------------------------------------------------------------------------- /train_frcnn_i3d_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | import cv2 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | # import tensorflow as tf 37 | # import keras.backend.tensorflow_backend as ktf 38 | 39 | sys.setrecursionlimit(40000) 40 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 41 | # def get_session(gpu_fraction=0.333): 42 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 43 | # allow_growth=True) 44 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 45 | # ktf.set_session(get_session()) 46 | 47 | 48 | old_stdout = sys.stdout 49 | 50 | log_file = open("message.log","w") 51 | 52 | 53 | from i3d_inception import Inception_Inflated3d 54 | # from i3d_inception import Inception_Inflated3d 55 | # from tensorflow.python import keras 56 | from keras.utils import plot_model 57 | import os 58 | import pdb 59 | from keras.layers import Input 60 | import logging 61 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 62 | parser = OptionParser() 63 | 64 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 65 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 66 | default="pascal_voc") 67 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 68 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 69 | 70 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 71 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 72 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 73 | 74 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 75 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 76 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 77 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 78 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 79 | action="store_true", default=False) 80 | parser.add_option("--config_filename", dest="config_filename", help= 81 | "Location to store all the metadata related to the training (to be used when testing).", 82 | default="config.pickle") 83 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 84 | parser.add_option("--j", dest="job", help="If the job output should be saved") 85 | 86 | (options, args) = parser.parse_args() 87 | 88 | if not options.train_path: # if filename is not given 89 | parser.error('Error: path to training data must be specified. Pass --path to command line') 90 | 91 | if options.parser == 'pascal_voc': 92 | from keras_frcnn.pascal_voc_parser import get_data 93 | elif options.parser == 'simple': 94 | from keras_frcnn.simple_parser import get_data 95 | else: 96 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 97 | 98 | # pass the settings from the command line, and persist them in the config object 99 | C = config.Config() 100 | 101 | C.use_horizontal_flips = bool(options.horizontal_flips) 102 | C.use_vertical_flips = bool(options.vertical_flips) 103 | C.rot_90 = bool(options.rot_90) 104 | C.dataset = options.dataset 105 | C.augment = options.aug 106 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 107 | C.model_path = output_weight_path 108 | C.num_rois = int(options.num_rois) 109 | 110 | if options.network == 'vgg': 111 | C.network = 'vgg' 112 | from keras_frcnn import vgg as nn 113 | elif options.network == 'resnet50': 114 | from keras_frcnn import resnet as nn 115 | C.network = 'resnet50' 116 | else: 117 | print('Not a valid model') 118 | raise ValueError 119 | 120 | 121 | # check if weight path was passed via command line 122 | if options.input_weight_path: 123 | C.base_net_weights = options.input_weight_path 124 | else: 125 | # set the path to weights based on backend and model 126 | C.base_net_weights = nn.get_weight_path() 127 | 128 | all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 129 | 130 | job = options.job 131 | print(len(classes_count)), len(class_mapping) 132 | if job: 133 | sys.stdout = log_file 134 | 135 | 136 | # if 'bg' not in classes_count: 137 | # classes_count['bg'] = 0 138 | # class_mapping['bg'] = len(class_mapping) 139 | 140 | C.class_mapping = class_mapping 141 | 142 | inv_map = {v: k for k, v in class_mapping.items()} 143 | 144 | print('Training images per class:') 145 | pprint.pprint(classes_count) 146 | print('Num classes (including bg) = {}'.format(len(classes_count))) 147 | 148 | config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 149 | 150 | with open(config_output_filename, 'wb') as config_f: 151 | pickle.dump(C,config_f) 152 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 153 | 154 | random.shuffle(all_imgs) 155 | 156 | num_imgs = len(all_imgs) 157 | 158 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 159 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 160 | 161 | print('Num train samples {}'.format(len(train_imgs))) 162 | print('Num val samples {}'.format(len(val_imgs))) 163 | 164 | 165 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 166 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 167 | 168 | roi_input = Input(shape=(None, 4),name = 'roi_input') 169 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 170 | img_input = Input(shape=(None, None, 3), name = 'img_input') 171 | vid_input_shape = (64, 400,320, 3) 172 | rgb_model = Inception_Inflated3d( 173 | include_top=False, 174 | weights='rgb_kinetics_only', 175 | input_shape=vid_input_shape, 176 | classes=classes_count) 177 | roi_input = Input(shape=(None, 4),name = 'roi_input') 178 | shared_layers_image = nn.nn_base(img_input, trainable=True) 179 | shared_layers_orig = rgb_model(vid_input) 180 | def slice_tensor(shared_layers): 181 | 182 | feature_shape = shared_layers.shape.as_list() 183 | shared_layers = shared_layers[:,8,:,:,:] 184 | return shared_layers 185 | 186 | def get_action_dic(): 187 | 188 | action_csv = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/ava_action_list_v2.0.csv' 189 | ac_dic = {} 190 | f = open(action_csv,'r') 191 | actions = f.read().splitlines() 192 | for action in actions[1:]: 193 | tags = action.split(',') 194 | tags = tags[:-1] 195 | ac_id = int(tags[0]) 196 | ac = ''.join(tags[1:]) 197 | if '"' in ac: 198 | ac =ac.replace('"','') 199 | # if ',' in ac: 200 | # ac = ''.join(ac.split(',')) 201 | 202 | ac_dic[ac_id] = ac 203 | if ac_id == 1: 204 | print ac 205 | return ac_dic 206 | 207 | ac_id = get_action_dic() 208 | shared_layers = Lambda(slice_tensor)(shared_layers_orig) 209 | print len(class_mapping) 210 | num_classes = len(class_mapping) 211 | # if C.dataset == 'AVA': 212 | classifier = nn.classifier_i3d_concat_v2(shared_layers_orig, shared_layers_image, roi_input, 1, nb_classes=num_classes, trainable=True) 213 | 214 | model_classifier = Model([img_input, roi_input, vid_input], classifier) 215 | 216 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 217 | model_all = Model([img_input,vid_input, roi_input], classifier) 218 | plot_model(model_all, to_file='model_all_i3d_cls.png', show_shapes = True) 219 | 220 | log_folder = os.path.join(options.output_weight_path,'logs/') 221 | if not os.path.isdir(log_folder): 222 | os.makedirs(log_folder) 223 | tensorboard = TensorBoard(log_dir=log_folder) 224 | tensorboard.set_model(model_classifier) 225 | train_names = ['train_loss', 'train_mae'] 226 | def write_log(callback, names, logs, batch_no): 227 | for name, value in zip(names, logs): 228 | summary = tf.Summary() 229 | summary_value = summary.value.add() 230 | summary_value.simple_value = value 231 | summary_value.tag = name 232 | callback.writer.add_summary(summary, batch_no) 233 | callback.writer.flush() 234 | 235 | optimizer = Adam(lr=1e-5) 236 | optimizer_classifier = Adam(lr=1e-5) 237 | 238 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 239 | 240 | model_all.compile(optimizer='sgd', loss='mae') 241 | 242 | epoch_length = 1000 243 | epoch_length_val =100 244 | num_epochs = int(options.num_epochs) 245 | iter_num = 0 246 | iter_num_tensorboard = 0 247 | total_cur_loss = [] 248 | total_cur_loss_val = [] 249 | iter_num_val_tensorboard = 0 250 | losses = np.zeros((epoch_length, 1)) 251 | losses_val = np.zeros((epoch_length_val, 1)) 252 | rpn_accuracy_rpn_monitor = [] 253 | rpn_accuracy_for_epoch = [] 254 | start_time = time.time() 255 | ###### val ##### 256 | rpn_accuracy_rpn_monitor_val = [] 257 | rpn_accuracy_for_epoch_val = [] 258 | 259 | ################ 260 | best_loss = np.Inf 261 | 262 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 263 | print('Starting training') 264 | # os.makedirs('check_dataset') 265 | vis = True 266 | 267 | for epoch_num in range(num_epochs): 268 | 269 | progbar = generic_utils.Progbar(epoch_length) 270 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 271 | num = 0 272 | while True: 273 | try: 274 | img_data, seq_numpy, x_img = next(data_gen_train) 275 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 276 | # print X2, Y1 277 | # x1= (X2[0][0][0]) 278 | # y1 = (X2[0][0][1]) 279 | # x2 = (x1 + X2[0][0][2]) 280 | # y2 = (y1 +X2[0][0][3]) 281 | # x1, y1, x2, y2 = x1*16 , y1*16, x2*16, y2*16 282 | # # print x1, y1, x2, y2 283 | # # if x1>320 or x2>320 or y1>400 or y2>400: 284 | # # print "yes" 285 | # im_temp =cv2.imread(img_data['filepath']) 286 | # im_temp = cv2.resize(im_temp,(320, 400), interpolation=cv2.INTER_CUBIC) 287 | # # print im_temp.shape 288 | # cv2.rectangle(im_temp, (x1,y1),(x2,y2),(0,255,0),3) 289 | # font = cv2.FONT_HERSHEY_SIMPLEX 290 | # cl = [i for i, e in enumerate(Y1[0][0]) if e == 1] 291 | # print cl 292 | # ind = cl[0] 293 | # ac = ac_id[int(class_mapping_inv[ind])] 294 | # 295 | # # cv2.putText(im_temp,'OpenCV',(10,500), font, 4,(255,255,255),2,cv2.LINE_AA) 296 | 297 | 298 | # cv2.imwrite(os.path.join('check_dataset',ac+str(num)+'.jpg'),im_temp) 299 | 300 | num+=1 301 | 302 | # img_features = 303 | 304 | loss_class = model_classifier.train_on_batch([x_img,X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 305 | losses[iter_num, 0] = loss_class 306 | 307 | iter_num += 1 308 | write_log(tensorboard,['loss_class'],[loss_class],iter_num_tensorboard) 309 | iter_num_tensorboard+=1 310 | progbar.update(iter_num, [('class_loss', np.mean(losses[:iter_num, 0]))]) 311 | 312 | if iter_num == epoch_length: 313 | loss_class_cls = np.mean(losses[:, 0]) 314 | curr_loss =loss_class_cls 315 | write_log(tensorboard,['total train loss'],[curr_loss],iter_num_tensorboard) 316 | total_cur_loss.append(curr_loss) 317 | iter_num = 0 318 | start_time = time.time() 319 | 320 | #################### Val ######################################################### 321 | iter_num_val = 0 322 | 323 | while True: 324 | # try: 325 | img_data, seq_numpy, x_img = next(data_gen_val) 326 | # print("validation") 327 | X2, Y1 = roi_helpers.calc_iou(img_data, C, class_mapping) 328 | loss_class = model_classifier.train_on_batch([x_img, X2[:, :, :],seq_numpy], [Y1[:, :, :]]) 329 | losses_val[iter_num_val,0] = loss_class 330 | iter_num_val += 1 331 | write_log(tensorboard,['loss_class_val'],[loss_class],iter_num_val_tensorboard) 332 | iter_num_val_tensorboard+=1 333 | if iter_num_val == epoch_length_val: 334 | 335 | loss_class_cls = np.mean(losses_val[:, 0]) 336 | curr_loss_val = loss_class_cls 337 | write_log(tensorboard,['total val loss'],[curr_loss_val],iter_num_val_tensorboard) 338 | total_cur_loss_val.append(curr_loss_val) 339 | # total_cur_loss.append(curr_loss) 340 | iter_num_val = 0 341 | break 342 | 343 | if curr_loss < best_loss: 344 | if C.verbose: 345 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 346 | best_loss = curr_loss 347 | model_all.save_weights(C.model_path) 348 | 349 | break 350 | 351 | except Exception as e: 352 | exc_type, exc_obj, exc_tb = sys.exc_info() 353 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 354 | print(exc_type, fname, exc_tb.tb_lineno) 355 | print('Exception: {}'.format(e)) 356 | # print(seq_numpy.shape) 357 | continue 358 | sys.stdout = old_stdout 359 | 360 | plt.plot(total_cur_loss) 361 | plt.plot(total_cur_loss_val) 362 | plt.legend(['train loss', 'val loss'], loc='upper left') 363 | 364 | savefigure = os.path.join(os.path.join(options.output_weight_path,'loss_plot.jpg')) 365 | plt.savefig(savefigure) 366 | print('Training complete, exiting.') 367 | log_file.close() 368 | -------------------------------------------------------------------------------- /test_with_vis.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import pickle 7 | from optparse import OptionParser 8 | import time 9 | from keras_frcnn import config 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | from keras.layers import Lambda 19 | from i3d_inception import Inception_Inflated3d 20 | import collections 21 | from keras_frcnn.utils import * 22 | from pdb import set_trace as bp 23 | from tqdm import tqdm 24 | from keras_frcnn import losses as losses 25 | from keras.optimizers import Adam, SGD, RMSprop 26 | import pandas as pd 27 | from tqdm import tqdm 28 | from pdb import set_trace as bp 29 | # from keras.utils.training_utils import multi_gpu_model 30 | 31 | 32 | ''' 33 | python test_frcnn_AVA.py -m /work/newriver/subha/i3d_models/AVA_exps/multi_label/ -p /work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/ 34 | 35 | ''' 36 | sys.setrecursionlimit(40000) 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 41 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 42 | help="Number of ROIs per iteration. Higher means more memory use.", default=4) 43 | 44 | parser.add_option("-v", "--val_data", type="str", dest="val_data", 45 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 46 | 47 | parser.add_option("-i", "--vis_folder", type="str", dest="vis_folder", 48 | help="Number of ROIs per iteration. Higher means more memory use.", default='ava_val_subset_80.csv') 49 | parser.add_option("-m", "--model_name", dest="model_name", 50 | help="Path to model.") 51 | parser.add_option("-t", "--type", dest="type", 52 | help="type.", default = 'v1') 53 | parser.add_option("-o", "--output", dest="output", 54 | help="csv to save predictions.") 55 | parser.add_option("--config_filename", dest="config_filename", help= 56 | "Location to read the metadata related to the training (generated when training).", 57 | default="config_subset_AVA.pickle") 58 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 59 | parser.add_option("--version", dest="version", help="Base network to use. Supports vgg or resnet50.", default='concat') 60 | 61 | (options, args) = parser.parse_args() 62 | 63 | if not options.test_path: # if filename is not given 64 | parser.error('Error: path to test data must be specified. Pass --path to command line') 65 | 66 | 67 | # config_output_filename = '/home/subha/hoi_vid/keras-kinetics-i3d//keras-frcnn-multi/' 68 | config_output_filename = options.config_filename 69 | with open(config_output_filename, 'rb') as f_in: 70 | C = pickle.load(f_in) 71 | 72 | if C.network == 'resnet50': 73 | import keras_frcnn.resnet as nn 74 | elif C.network == 'vgg': 75 | import keras_frcnn.vgg as nn 76 | 77 | # turn off any data augmentation at test time 78 | C.use_horizontal_flips = False 79 | C.use_vertical_flips = False 80 | C.rot_90 = False 81 | resized_width = 320 82 | resized_height = 400 83 | version = options.version 84 | img_path = options.test_path 85 | output_csv_file = os.path.join('evaluation','8_actions',options.output) 86 | fc = open(output_csv_file,'w+') 87 | fc.close() 88 | def format_img_size(img, C): 89 | """ formats the image size based on config """ 90 | img_min_side = float(C.im_size) 91 | (height,width,_) = img.shape 92 | ratio_w = resized_width/width 93 | ratio_h = resized_height/height 94 | new_width = resized_width 95 | new_height = resized_height 96 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 97 | return img, ratio_w, ratio_h 98 | 99 | def format_img(img, C): 100 | """ formats an image for model prediction based on config """ 101 | img, ratio_w, ratio_h= format_img_size(img, C) 102 | img = format_img_channels(img, C) 103 | return img, ratio_w, ratio_h 104 | 105 | # Method to transform the coordinates of the bounding box to its original size 106 | def get_real_coordinates(ratio_w,ratio_h, x1, y1, x2, y2): 107 | 108 | real_x1 = int(round(x1 // ratio_w)) 109 | real_y1 = int(round(y1 // ratio_h)) 110 | real_x2 = int(round(x2 // ratio_w)) 111 | real_y2 = int(round(y2 // ratio_h)) 112 | 113 | return (real_x1, real_y1, real_x2 ,real_y2) 114 | 115 | class_mapping = C.class_mapping 116 | classes_count = class_mapping 117 | print len(class_mapping) 118 | type = options.type 119 | # bp() 120 | # if 'bg' not in class_mapping: 121 | # class_mapping['bg'] = len(class_mapping) 122 | 123 | class_mapping = {v: k for k, v in class_mapping.items()} 124 | # print(class_mapping) 125 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 126 | C.num_rois = int(options.num_rois) 127 | # print "Num rois originally",C.num_rois 128 | if C.network == 'resnet50': 129 | num_features = 1024 130 | elif C.network == 'vgg': 131 | num_features = 512 132 | 133 | if K.image_dim_ordering() == 'th': 134 | input_shape_img = (3, None, None) 135 | else: 136 | input_shape_img = (None, None, 3) 137 | 138 | # shared_layers_input= Input(shape=( None,None,None,832)) 139 | roi_input = Input(shape=(None, 4)) 140 | # vid_input = Input(shape =(None, None, None, 3)) 141 | # vid_input_shape = (64, 400,320, 3) 142 | img_input = Input(shape=(None, None, 3)) 143 | 144 | feature_map_input = Input(shape=( None,None,None,832)) 145 | shared_layers_image = nn.nn_base(img_input, trainable=False) 146 | 147 | 148 | # rgb_model = Inception_Inflated3d( 149 | # include_top=False, 150 | # weights='rgb_kinetics_only', 151 | # input_shape=vid_input_shape, 152 | # classes=classes_count) 153 | 154 | def extract_numpy_single_frame(img,C): 155 | 156 | img = (img/255.)*2 - 1 157 | return img 158 | 159 | def get_frame_idx(img_path): 160 | winSize = 64 161 | tags = img_path.split(os.path.sep) 162 | vid_folder = '/'+'/'.join(tags[1:-1]) 163 | frames = os.listdir(vid_folder) 164 | if 'CAD' in img_path: 165 | frames = [f for f in frames if f.startswith('RGB')] 166 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 167 | else: 168 | frames.sort(key = lambda x: int(x.split('.')[0])) 169 | frame_index = frames.index(tags[-1]) 170 | fi = get_frames_index(frames,frame_index,winSize) 171 | seq =[frames[k] if k!=-1 else k for k in fi] 172 | # print(seq[0],seq[31]) 173 | # print seq 174 | return seq 175 | optimizer_classifier = Adam(lr=1e-5) 176 | print len(classes_count) 177 | 178 | if version == 'concat' and type == 'v1': 179 | classifier = classifier = nn.classifier_i3d_concat_new(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=8, trainable=True) 180 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 181 | elif version == 'whole' : 182 | classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version) 183 | model_classifier = Model([feature_map_input], classifier) 184 | elif version == 'roi': 185 | classifier = nn.classifier_i3d(feature_map_input, roi_input, 1, nb_classes=len(classes_count)) 186 | model_classifier = Model([roi_input, feature_map_input], classifier) 187 | 188 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 189 | # model_all = Model([i3d_features, roi_input], classifier) 190 | elif version == 'concat' and type == 'v2': 191 | classifier = nn.classifier_i3d_concat_v2(feature_map_input, shared_layers_image, roi_input, 1, nb_classes=len(classes_count), trainable=True) 192 | model_classifier = Model([img_input, feature_map_input, roi_input], classifier) 193 | # elif version == 'whole' and type == 'v2': 194 | # classifier = nn.classifier_i3d_concat(feature_map_input, 1, nb_classes=len(classes_count), trainable=True, version=version) 195 | # model_classifier = Model([feature_map_input], classifier) 196 | 197 | 198 | # model_classifier = multi_gpu_model(model_classifier, gpus=2) 199 | model_name = os.path.join(options.model_name,'model.hdf5') 200 | print('Loading weights from {}'.format(model_name)) 201 | model_classifier.load_weights(model_name, by_name=True) 202 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_multi_label]) 203 | vis_folder = options.vis_folder 204 | all_imgs = [] 205 | classes = {} 206 | bbox_threshold = 0.7 207 | visualise = True 208 | f_val = os.path.join('/work/newriver/subha/AVA_dataset/ava-dataset-tool',options.val_data) 209 | df = pd.read_csv(f_val) 210 | final_predictions = [] 211 | 212 | indices = range(len(df)) 213 | print len(df) 214 | # bp() 215 | bs = 8 216 | for i in tqdm(range(len(df))): 217 | 218 | try: 219 | # ind = indices[i:i+bs] 220 | 221 | row = df.iloc[i,:] 222 | # if row[-2] in [62]: 223 | 224 | val_vid = row[0] 225 | vid_path = os.path.join(img_path,val_vid) 226 | img_name = str(int(row[1]))+'.jpg' 227 | 228 | filepath = os.path.join(vid_path,img_name) 229 | fp = filepath.replace('keyframes','numpy_8_actions') 230 | fp = fp.replace('.jpg','.npy') 231 | val_vid = row[0] 232 | vid_path = os.path.join(img_path,val_vid) 233 | img_name = str(int(row[1]))+'.jpg' 234 | k = 0 235 | k+=1 236 | 237 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 238 | continue 239 | st = time.time() 240 | filepath = os.path.join(vid_path,img_name) 241 | # filepath = '/work/newriver/subha/AVA_dataset/ava-dataset-tool/preproc/train/keyframes/_dBTTYDRdRQ/1589.jpg' 242 | fr_num = filepath.split(os.path.sep)[-1].split('.')[0] 243 | img = cv2.imread(filepath) 244 | # x_img = 245 | tags = filepath.split(os.path.sep) 246 | img_folder = '/'+'/'.join(tags[1:-1]) 247 | shared_layers_orig = np.load(fp) 248 | # seq = get_frame_idx(filepath) 249 | # 250 | # # print filepath, seq 251 | # vid_numpy = [] 252 | x_img_orig = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 253 | x_img = np.expand_dims(x_img_orig, axis = 0) 254 | # # print filepath, seq 255 | # # bp() 256 | # for frame in seq: 257 | # if frame!=-1: 258 | # fr_name = os.path.join(img_folder, frame) 259 | # np_name = fr_name.replace('.jpg','.npy') 260 | # np_name = np_name.replace('train/keyframes','numpy_arrays_val') 261 | # # print np_name 262 | # fr_npy = np.load(np_name) 263 | # vid_numpy.append(fr_npy) 264 | # else: 265 | # vid_numpy.append(np.zeros((resized_height,resized_width,3))) 266 | # 267 | # vid_numpy = np.array(vid_numpy) 268 | # vid_numpy = np.expand_dims(vid_numpy,axis=0) 269 | x1,y1,x2,y2 = (float(row[2])*resized_width)/float(16),(float(row[3])*resized_height)/float(16),(float(row[4])*resized_width)/float(16),(float(row[5])*resized_height)/float(16) 270 | # shared_layers_orig = rgb_model.predict(vid_numpy) 271 | 272 | # print vid_numpy.shape 273 | if version == 'concat': 274 | # [x1,y1,x2,y2] = [float(x1)/float(16),float(y1)/float(16), float(x2)/float(16), float(y2)/float(16)] 275 | w = x2-x1 276 | h = y2-y1 277 | roi = np.array([x1,y1,w,h]) 278 | rois = np.expand_dims(roi,axis=0) 279 | rois = np.expand_dims(rois,axis=0) 280 | # try: 281 | # rois, vid_numpy = get_batch(df, ind) 282 | # print rois, shared_layers_orig.shape 283 | [P_cls]= model_classifier.predict([x_img,shared_layers_orig, rois]) 284 | 285 | elif version == 'roi': 286 | w = x2-x1 287 | h = y2-y1 288 | roi = np.array([x1,y1,w,h]) 289 | rois = np.expand_dims(roi,axis=0) 290 | rois = np.expand_dims(rois,axis=0) 291 | # try: 292 | # rois, vid_numpy = get_batch(df, ind) 293 | # print rois, shared_layers_orig.shape 294 | [P_cls]= model_classifier.predict([rois,shared_layers_orig]) 295 | 296 | elif version == 'whole': 297 | [P_cls]= model_classifier.predict([shared_layers_orig]) 298 | 299 | real_x1, real_y1, real_x2, real_y2 = row[2]*320, row[3]*400, row[4]*320, row[5]*400 300 | cv2.rectangle(x_img_orig,(int(real_x1),int(real_y1)),(int(real_x2),int(real_y2)), (255,255,255), 2) 301 | # ind =np.argmax(P_cls[0,:]) 302 | gt = row[-2] 303 | pred_prob = P_cls[0,:] 304 | # pred_class = class_mapping[ind] 305 | sort_prob = sorted(range(len(pred_prob)), key=lambda i: pred_prob[i])[-4:] 306 | ac_pred = [class_mapping[ind] for ind in sort_prob] 307 | ac_pred = map(int, ac_pred) 308 | # print ac_pred 309 | # bp() 310 | # print gt, pred_class 311 | if int(gt) in ac_pred: 312 | # pred_prob = P_cls[0,:] 313 | # sort_prob = sorted(range(len(pred_prob)), key=lambda i: pred_prob[i])[-3:] 314 | # prob_str = [str(pred_prob[ind]) for ind in sort_prob] 315 | # prob_str = ','.join(prob_str) 316 | # 317 | # ac_pred = [class_mapping[ind] for ind in sort_prob] 318 | # font = cv2.FONT_HERSHEY_SIMPLEX 319 | # bottomLeftCornerOfText = (100,100) 320 | # fontScale = 1 321 | # fontColor = (255,255,255) 322 | # lineType = 2 323 | # 324 | # cv2.putText(img,','.join(ac_pred)+'-'+prob_str, 325 | # bottomLeftCornerOfText,ff 326 | # font, 327 | # fontScale, 328 | # fontColor, 329 | # lineType) 330 | seq_name = filepath.split(os.path.sep)[-2] 331 | vid_dir = os.path.join(vis_folder,str(row[-2]),'true_positives', seq_name) 332 | if not os.path.isdir(vid_dir): 333 | os.makedirs(vid_dir) 334 | 335 | 336 | save_img_name = os.path.join(vid_dir,img_name) 337 | # print save 338 | cv2.imwrite(save_img_name, x_img_orig) 339 | else: 340 | seq_name = filepath.split(os.path.sep)[-2] 341 | vid_dir = os.path.join(vis_folder,str(row[-2]),'false_positives', seq_name) 342 | if not os.path.isdir(vid_dir): 343 | os.makedirs(vid_dir) 344 | 345 | 346 | save_img_name = os.path.join(vid_dir,img_name) 347 | # print save 348 | cv2.imwrite(save_img_name, x_img_orig) 349 | # print(P_cls) 350 | # bp() 351 | # print output_csv_file 352 | # print(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[5])+','+str(P_cls[0,5])+'\n') 353 | 354 | 355 | # with open(output_csv_file,'a+') as f_predicted: 356 | # [f_predicted.write(seq_name+','+str(fr_num).zfill(4)+','+str(float(row[2]))+','+str(float(row[3]))+','+str(float(row[4]))+','+str(float(row[5]))+','+str(class_mapping[cn])+','+str(P_cls[0,cn])+'\n') for cn in range(8) if class_mapping[cn]!='bg'] 357 | 358 | # if k==1: 359 | # break 360 | except Exception as e: 361 | exc_type, exc_obj, exc_tb = sys.exc_info() 362 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 363 | print(exc_type, fname, exc_tb.tb_lineno) 364 | print('Exception: {}'.format(e)) 365 | # print(filepath) 366 | continue 367 | -------------------------------------------------------------------------------- /extract_i3d_features.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | import tensorflow as tf 10 | from keras.backend.tensorflow_backend import set_session 11 | config = tf.ConfigProto() 12 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 13 | config.log_device_placement = True # to log device placement (on which device the operation ran) 14 | # (nothing gets printed in Jupyter, only if you run it standalone) 15 | sess = tf.Session(config=config) 16 | set_session(sess) # set this TensorFlow session as the default session for Keras 17 | from keras.utils import plot_model 18 | import os 19 | 20 | from keras.callbacks import TensorBoard 21 | from keras import backend as K 22 | from keras.optimizers import Adam, SGD, RMSprop 23 | from keras.layers import Input 24 | from keras.models import Model 25 | from keras_frcnn import config, data_generators 26 | from keras_frcnn import losses as losses 27 | import keras_frcnn.roi_helpers as roi_helpers 28 | from keras.utils import generic_utils 29 | from keras.layers import Lambda 30 | from i3d_inception import Inception_Inflated3d 31 | import collections 32 | import sys 33 | import matplotlib 34 | matplotlib.use('Agg') 35 | import matplotlib.pyplot as plt 36 | from tqdm import tqdm 37 | import cv2 38 | from tqdm import tqdm 39 | # import tensorflow as tf 40 | # import keras.backend.tensorflow_backend as ktf 41 | 42 | sys.setrecursionlimit(40000) 43 | sys.path.append('/home/subha/hoi_vid/keras-kinetics-i3d') 44 | # def get_session(gpu_fraction=0.333): 45 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction, 46 | # allow_growth=True) 47 | # return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 48 | # ktf.set_session(get_session()) 49 | 50 | 51 | old_stdout = sys.stdout 52 | 53 | log_file = open("message.log","w") 54 | 55 | 56 | from i3d_inception import Inception_Inflated3d 57 | # from i3d_inception import Inception_Inflated3d 58 | # from tensorflow.python import keras 59 | from keras.utils import plot_model 60 | import os 61 | import pdb 62 | from keras.layers import Input 63 | import logging 64 | logging.basicConfig(filename='example.log',level=logging.DEBUG) 65 | parser = OptionParser() 66 | 67 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 68 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 69 | default="pascal_voc") 70 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=4) 71 | parser.add_option("-s", "--start_idx", type="int", dest="start_idx", help="Number of RoIs to process at once.", default=0) 72 | 73 | parser.add_option("-m", "--output_weight_path",dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 74 | parser.add_option("-d", "--dataset", dest="dataset", help="Number of RoIs to process at once.", default='AVA') 75 | parser.add_option("-e","--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=2000) 76 | 77 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 78 | parser.add_option("--aug", dest="aug", type = int,help="Base network to use. Supports vgg or resnet50.", default=0) 79 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 80 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 81 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 82 | action="store_true", default=False) 83 | parser.add_option("--config_filename", dest="config_filename", help= 84 | "Location to store all the metadata related to the training (to be used when testing).", 85 | default="config.pickle") 86 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 87 | parser.add_option("--j", dest="job", help="If the job output should be saved") 88 | 89 | (options, args) = parser.parse_args() 90 | 91 | if not options.train_path: # if filename is not given 92 | parser.error('Error: path to training data must be specified. Pass --path to command line') 93 | 94 | if options.parser == 'pascal_voc': 95 | from keras_frcnn.pascal_voc_parser import get_data 96 | elif options.parser == 'simple': 97 | from keras_frcnn.simple_parser import get_data 98 | else: 99 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 100 | 101 | # pass the settings from the command line, and persist them in the config object 102 | C = config.Config() 103 | 104 | C.use_horizontal_flips = bool(options.horizontal_flips) 105 | C.use_vertical_flips = bool(options.vertical_flips) 106 | C.rot_90 = bool(options.rot_90) 107 | C.dataset = options.dataset 108 | C.augment = options.aug 109 | output_weight_path = os.path.join(options.output_weight_path,'model.hdf5') 110 | C.model_path = output_weight_path 111 | C.num_rois = int(options.num_rois) 112 | 113 | if options.network == 'vgg': 114 | C.network = 'vgg' 115 | from keras_frcnn import vgg as nn 116 | elif options.network == 'resnet50': 117 | from keras_frcnn import resnet as nn 118 | C.network = 'resnet50' 119 | else: 120 | print('Not a valid model') 121 | raise ValueError 122 | 123 | 124 | # check if weight path was passed via command line 125 | if options.input_weight_path: 126 | C.base_net_weights = options.input_weight_path 127 | else: 128 | # set the path to weights based on backend and model 129 | C.base_net_weights = nn.get_weight_path() 130 | 131 | # all_imgs, classes_count, class_mapping = get_data(options.train_path,options.start_idx) 132 | 133 | # job = options.job 134 | # print(len(classes_count)), len(class_mapping) 135 | # if job: 136 | # sys.stdout = log_file 137 | # 138 | # 139 | # # if 'bg' not in classes_count: 140 | # # classes_count['bg'] = 0 141 | # # class_mapping['bg'] = len(class_mapping) 142 | # 143 | # C.class_mapping = class_mapping 144 | # 145 | # inv_map = {v: k for k, v in class_mapping.items()} 146 | # 147 | # print('Training images per class:') 148 | # pprint.pprint(classes_count) 149 | # print('Num classes (including bg) = {}'.format(len(classes_count))) 150 | # 151 | # config_output_filename = options.config_filename.split('.')[0]+'_'+C.dataset+'.pickle' 152 | # 153 | # with open(config_output_filename, 'wb') as config_f: 154 | # pickle.dump(C,config_f) 155 | # print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 156 | # 157 | # random.shuffle(all_imgs) 158 | # 159 | # num_imgs = len(all_imgs) 160 | # 161 | # train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 162 | # val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 163 | # 164 | # imgs = train_imgs+val_imgs 165 | # 166 | # print('Num train samples {}'.format(len(train_imgs))) 167 | # print('Num val samples {}'.format(len(val_imgs))) 168 | 169 | 170 | # data_gen_train = data_generators.get_anchor_gt(imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 171 | # data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 172 | 173 | vid_input = Input(shape =(None, None, None, 3),name = 'vid_input') 174 | vid_input_shape = (64, 400,320, 3) 175 | rgb_model = Inception_Inflated3d( 176 | include_top=False, 177 | weights='rgb_kinetics_only', 178 | input_shape=vid_input_shape, 179 | classes=8) 180 | roi_input = Input(shape=(None, 4),name = 'roi_input') 181 | shared_layers_orig = rgb_model(vid_input) 182 | 183 | 184 | # epoch_length = len(imgs) 185 | # epoch_length_val =100 186 | # num_epochs = int(options.num_epochs) 187 | # iter_num = 0 188 | # iter_num_tensorboard = 0 189 | # total_cur_loss = [] 190 | # total_cur_loss_val = [] 191 | # iter_num_val_tensorboard = 0 192 | # losses = np.zeros((epoch_length, 1)) 193 | # losses_val = np.zeros((epoch_length_val, 1)) 194 | # rpn_accuracy_rpn_monitor = [] 195 | # rpn_accuracy_for_epoch = [] 196 | # start_time = time.time() 197 | # ###### val ##### 198 | # rpn_accuracy_rpn_monitor_val = [] 199 | # rpn_accuracy_for_epoch_val = [] 200 | 201 | ################ 202 | # best_loss = np.Inf 203 | # 204 | # class_mapping_inv = {v: k for k, v in class_mapping.items()} 205 | # print('Starting training') 206 | # 207 | # vis = True 208 | 209 | 210 | def extract_numpy_single_frame(img): 211 | 212 | img = (img/255.)*2 - 1 213 | return img 214 | 215 | def get_frame_idx(img_path): 216 | winSize = 64 217 | tags = img_path.split(os.path.sep) 218 | vid_folder = '/'+'/'.join(tags[1:-1]) 219 | frames = os.listdir(vid_folder) 220 | if 'CAD' in img_path: 221 | frames = [f for f in frames if f.startswith('RGB')] 222 | frames.sort(key = lambda x: int(x.split('.')[0].split('_')[1])) 223 | else: 224 | frames.sort(key = lambda x: int(x.split('.')[0])) 225 | frame_index = frames.index(tags[-1]) 226 | # print frame_index 227 | fi = get_frames_index(frames,frame_index,winSize) 228 | seq =[frames[k] if k!=-1 else k for k in fi] 229 | # print(seq[0],seq[31]) 230 | # print seq 231 | return seq 232 | 233 | def get_frames_index(total_list,frame_idx,winSize): 234 | nb = (winSize/2)-1 235 | na = (winSize/2)+1 236 | final_frame_idx = len(total_list)-1 237 | sf = final_frame_idx-(winSize/2)+1 238 | ef = len(total_list) 239 | seq = [] 240 | if frame_idx < (winSize/2)-1: 241 | zp_frames = nb - frame_idx 242 | seq = [-1 for number in range(int(zp_frames))] 243 | seq.extend(range(0,int(frame_idx)+1)) 244 | seq.extend(range(int(frame_idx)+1, int(frame_idx)+int(na))) 245 | if len(seq)!=winSize: 246 | print "No" 247 | elif frame_idx >= (winSize/2)-1 and frame_idx < sf: 248 | start_index = frame_idx-(winSize/2)+1 249 | end_index = (winSize/2) + frame_idx 250 | # print frame_idx 251 | seq = range(int(start_index),int(end_index+1)) 252 | 253 | elif frame_idx >=sf and frame_idx