├── images ├── t ├── Architecture.jpeg ├── Hockey_results.png ├── movies_results.png ├── violentflow_results.png └── hyperparameters_results.JPG ├── test.py ├── read_video.py ├── README.md ├── model ├── mobilenet_v3_small.py ├── mobilenet_v3_large.py ├── LR_ASPP.py ├── layers │ └── bilinear_upsampling.py └── mobilenet_base.py ├── BuildModel.py ├── utils.py ├── shufflenetv2.py ├── BuildModel_basic.py ├── DatasetBuilder.py ├── SportsActionRecognition_SIFT_SVM.py ├── violentflow_HOG.ipynb ├── run.py ├── SportsActionRecognition_HOG_SVM.py ├── SportsActionRecognition_HOG_RF.py └── hockey_HOG.ipynb /images/t: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | m = timm.create_model('mobilenetv3_100', pretrained=True) 4 | print(m.eval()) -------------------------------------------------------------------------------- /images/Architecture.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w5688414/EfficientNet-ViolenceDetection/HEAD/images/Architecture.jpeg -------------------------------------------------------------------------------- /images/Hockey_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w5688414/EfficientNet-ViolenceDetection/HEAD/images/Hockey_results.png -------------------------------------------------------------------------------- /images/movies_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w5688414/EfficientNet-ViolenceDetection/HEAD/images/movies_results.png -------------------------------------------------------------------------------- /images/violentflow_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w5688414/EfficientNet-ViolenceDetection/HEAD/images/violentflow_results.png -------------------------------------------------------------------------------- /images/hyperparameters_results.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/w5688414/EfficientNet-ViolenceDetection/HEAD/images/hyperparameters_results.JPG -------------------------------------------------------------------------------- /read_video.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | 5 | video = "/home/eric/data/violence_recognition/HockeyFights/fi46_xvid.avi" 6 | 7 | video_capture = cv2.VideoCapture(video) 8 | if not video_capture.isOpened(): 9 | print("Error: Failed to open %s" % video) 10 | sys.exit(-1) 11 | video_length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) 12 | 13 | count = 0 14 | while(True): 15 | # Capture frame-by-frame 16 | ret, frame = video_capture.read() 17 | if not ret: 18 | break 19 | 20 | count += 1 21 | 22 | print(video_length, count) 23 | # When everything done, release the capture 24 | video_capture.release() 25 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning to Detect Violent Videos using Convolution LSTM 2 | 3 | + This work is based on violence detection model proposed by [1] with minor modications. 4 | + The original model was implemented with Pytorch [2] while in this work we implement it with Keras and TensorFlow as a back-end. 5 | + The model incorporates pre-trained convolution Neural Network (CNN) connected to Convolutional LSTM (ConvLSTM) layer. 6 | + The model takes as an inputs the raw video, converts it into frames and output a binary classication of violence or non-violence label. 7 | + 主体部分主要来自于github的一个项目,运行也挺顺利的,所以我也就把握改的代码给开源出来,供其他人来参考 8 | + 尝试了很多主流的分类模型来替换ResNet部分,发现EfficientNet表现得很稳定,参数量还比残差等网络小很多,比现有的MobileNet,ShuffleNet这些精度更高,所以是一个折中的选择。 9 | + 加入了注意力机制,是一个小小的尝试,发现效果更好一点,看来是加对了,哈哈哈 10 | + 还加了很多传统方法的对比实验,有兴趣的人可以研究一下 11 | 12 | ### Architecture structure 13 | ![alt text](https://github.com/liorsidi/ViolenceDetection_CNNLSTM/blob/master/images/Architecture.jpeg) 14 | 15 | 16 | ## Running configurations 17 | ### Video datasets paths: 18 | data path are defined as follows: 19 | - hocky - data/raw_videos/HockeyFights 20 | - violentflow - data/raw_videos/violentflow 21 | - movies - data/raw_videos/movies 22 | 23 | ### Libraries perquisites: 24 | - python 3.x 25 | - numpy 1.14.0 26 | - keras 2.2.0 27 | - tensorflow 1.9.0 28 | - Pillow 3.1.2 29 | - opencv-python 3.4.1.15 30 | - keras_efficientnets 31 | 32 | ### Running operation: 33 | just run python run.py 34 | (currently we don't support arguments from command line) 35 | 36 | ## Results 37 | #### Hyper-tuning results 38 | ![alt text](https://github.com/liorsidi/ViolenceDetection_CNNLSTM/blob/master/images/hyperparameters_results.JPG) 39 | 40 | #### Hockey dataset results 41 | ![alt text](https://github.com/liorsidi/ViolenceDetection_CNNLSTM/blob/master/images/Hockey_results.png) 42 | 43 | ## Refrences 44 | 1. Sudhakaran, Swathikiran, and Oswald Lanz. "Learning to detect violent videos 45 | using convolution long short-term memory." In Advanced Video and Signal Based 46 | Surveillance (AVSS), 2017 14th IEEE International Conference on, pp. 1-6. IEEE, 2017. 47 | 2. https://github.com/swathikirans/violence-recognition-pytorch 48 | -------------------------------------------------------------------------------- /model/mobilenet_v3_small.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 small models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from keras.models import Model 8 | from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape 9 | from keras.utils.vis_utils import plot_model 10 | 11 | from model.mobilenet_base import MobileNetBase 12 | 13 | 14 | class MobileNetV3_Small(MobileNetBase): 15 | def __init__(self, shape, n_class, alpha=1.0, include_top=True): 16 | """Init. 17 | 18 | # Arguments 19 | input_shape: An integer or tuple/list of 3 integers, shape 20 | of input tensor. 21 | n_class: Integer, number of classes. 22 | alpha: Integer, width multiplier. 23 | include_top: if inculde classification layer. 24 | 25 | # Returns 26 | MobileNetv3 model. 27 | """ 28 | super(MobileNetV3_Small, self).__init__(shape, n_class, alpha) 29 | self.include_top = include_top 30 | 31 | def build(self, plot=False): 32 | """build MobileNetV3 Small. 33 | 34 | # Arguments 35 | plot: Boolean, weather to plot model. 36 | 37 | # Returns 38 | model: Model, model. 39 | """ 40 | inputs = Input(shape=self.shape) 41 | 42 | x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') 43 | 44 | x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE') 45 | x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE') 46 | x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE') 47 | x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS') 48 | x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 49 | x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 50 | x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS') 51 | x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS') 52 | x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS') 53 | x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 54 | x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 55 | 56 | x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS') 57 | x = GlobalAveragePooling2D()(x) 58 | x = Reshape((1, 1, 576))(x) 59 | 60 | x = Conv2D(1280, (1, 1), padding='same')(x) 61 | x = self._return_activation(x, 'HS') 62 | 63 | if self.include_top: 64 | x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x) 65 | x = Reshape((self.n_class,))(x) 66 | 67 | model = Model(inputs, x) 68 | 69 | if plot: 70 | plot_model(model, to_file='images/MobileNetv3_small.png', show_shapes=True) 71 | 72 | return model 73 | -------------------------------------------------------------------------------- /model/mobilenet_v3_large.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 Large models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from keras.models import Model 8 | from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Reshape 9 | from keras.utils.vis_utils import plot_model 10 | 11 | from model.mobilenet_base import MobileNetBase 12 | 13 | 14 | class MobileNetV3_Large(MobileNetBase): 15 | def __init__(self, shape, n_class, alpha=1.0, include_top=True): 16 | """Init. 17 | 18 | # Arguments 19 | input_shape: An integer or tuple/list of 3 integers, shape 20 | of input tensor. 21 | n_class: Integer, number of classes. 22 | alpha: Integer, width multiplier. 23 | include_top: if inculde classification layer. 24 | 25 | # Returns 26 | MobileNetv3 model. 27 | """ 28 | super(MobileNetV3_Large, self).__init__(shape, n_class, alpha) 29 | self.include_top = include_top 30 | 31 | def build(self, plot=False): 32 | """build MobileNetV3 Large. 33 | 34 | # Arguments 35 | plot: Boolean, weather to plot model. 36 | 37 | # Returns 38 | model: Model, model. 39 | """ 40 | inputs = Input(shape=self.shape) 41 | 42 | x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') 43 | 44 | x = self._bottleneck(x, 16, (3, 3), e=16, s=1, squeeze=False, nl='RE') 45 | x = self._bottleneck(x, 24, (3, 3), e=64, s=2, squeeze=False, nl='RE') 46 | x = self._bottleneck(x, 24, (3, 3), e=72, s=1, squeeze=False, nl='RE') 47 | x = self._bottleneck(x, 40, (5, 5), e=72, s=2, squeeze=True, nl='RE') 48 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 49 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 50 | x = self._bottleneck(x, 80, (3, 3), e=240, s=2, squeeze=False, nl='HS') 51 | x = self._bottleneck(x, 80, (3, 3), e=200, s=1, squeeze=False, nl='HS') 52 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 53 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 54 | x = self._bottleneck(x, 112, (3, 3), e=480, s=1, squeeze=True, nl='HS') 55 | x = self._bottleneck(x, 112, (3, 3), e=672, s=1, squeeze=True, nl='HS') 56 | x = self._bottleneck(x, 160, (5, 5), e=672, s=2, squeeze=True, nl='HS') 57 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 58 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 59 | 60 | x = self._conv_block(x, 960, (1, 1), strides=(1, 1), nl='HS') 61 | x = GlobalAveragePooling2D()(x) 62 | x = Reshape((1, 1, 960))(x) 63 | 64 | x = Conv2D(1280, (1, 1), padding='same')(x) 65 | x = self._return_activation(x, 'HS') 66 | 67 | if self.include_top: 68 | x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x) 69 | x = Reshape((self.n_class,))(x) 70 | 71 | model = Model(inputs, x) 72 | 73 | if plot: 74 | plot_model(model, to_file='images/MobileNetv3_large.png', show_shapes=True) 75 | 76 | return model 77 | -------------------------------------------------------------------------------- /BuildModel.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D 2 | from keras.layers.recurrent import LSTM 3 | from keras.models import Sequential, load_model 4 | from keras.optimizers import Adam, RMSprop 5 | from keras.layers.wrappers import TimeDistributed 6 | from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D, 7 | MaxPooling2D) 8 | from collections import deque 9 | import sys 10 | import logging 11 | from keras.applications import Xception, ResNet50, InceptionV3 12 | from keras.layers import Dense, GlobalAveragePooling2D 13 | from keras.models import Model 14 | from model.mobilenet_v3_large import MobileNetV3_Large 15 | 16 | 17 | logger = logging.getLogger('Builder_moudle') 18 | logger.setLevel(logging.DEBUG) 19 | ch = logging.StreamHandler() 20 | ch.setLevel(logging.DEBUG) 21 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 22 | ch.setFormatter(formatter) 23 | logger.addHandler(ch) 24 | 25 | 26 | epoch = 10 27 | learning_rate = 0.0004 28 | batch_size = 16 29 | optimizer ='RMSprop' 30 | initial_weights = 'Xavier' 31 | 32 | default_values = dict(epoch=10,\ 33 | learning_rate=0.0004,\ 34 | batch_size=16,\ 35 | optimizer=Adam,\ 36 | initial_weights=0,\ 37 | cnn_class=Xception,\ 38 | pre_weights='Xavier',\ 39 | lstm_conf=(LSTM,dict(units = 256)),\ 40 | cnn_train_type='static' 41 | ) 42 | 43 | 44 | def build(epoch = default_values["epoch"],\ 45 | learning_rate = default_values["learning_rate"], \ 46 | batch_size = default_values["batch_size"],\ 47 | optimizer = default_values["optimizer"],\ 48 | initial_weights = default_values["initial_weights"],\ 49 | cnn_class = default_values["cnn_class"],\ 50 | pre_weights = default_values["pre_weights"], \ 51 | lstm_conf = default_values["lstm_conf"], \ 52 | cnn_train_type=default_values["cnn_train_type"]): 53 | 54 | model=0 55 | shape = (224, 224, 3) 56 | n_class=2 57 | #Create CNN 58 | if(cnn_train_type!='train'): 59 | logger.info("CNN Created with Pre-weights:{}".format(pre_weights)) 60 | # base_model = cnn_class(weights=pre_weights,include_top=False) 61 | base_model = MobileNetV3_Large(shape, n_class,include_top=False).build() 62 | else: 63 | logger.info("CNN Created with no Pre-weights") 64 | base_model = cnn_class() 65 | 66 | #control Train_able of CNNN 67 | if(cnn_train_type=='static'): 68 | logger.info("CNN set to NOT-Train") 69 | for layer in base_model.layers: 70 | layer.trainable = False 71 | if(cnn_train_type=='retrain'): 72 | logger.info("CNN set to retrain") 73 | for layer in base_model.layers: 74 | layer.trainable = True 75 | 76 | # print(base_model.summary()) 77 | # add a global spatial average pooling layer 78 | x = base_model.output 79 | logger.info("base_model.output: {}".format(base_model.output)) 80 | x = GlobalAveragePooling2D()(x) 81 | # let's add a fully-connected layer 82 | x = Dense(1024, activation='relu')(x) 83 | # and a logistic layer -- let's say we have 200 classes 84 | predictions = Dense(100 , activation='softmax')(x) 85 | 86 | model = Model(inputs=base_model.input, outputs=predictions) 87 | model.compile(optimizer=optimizer, loss='categorical_crossentropy') 88 | 89 | model.summary() 90 | print("Commit update2") 91 | print("Commit update3") 92 | return model -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | #''' 3 | # Created on 18-8-14 下午4:39 4 | # 5 | # @Author: Greg Gao(laygin) 6 | #''' 7 | import os 8 | from keras import backend as K 9 | from keras_applications.imagenet_utils import _obtain_input_shape 10 | from keras.models import Model 11 | from keras.engine.topology import get_source_inputs 12 | from keras.layers import Activation, Add, Concatenate, Conv2D, GlobalMaxPooling2D 13 | from keras.layers import GlobalAveragePooling2D,Input, Dense 14 | from keras.layers import MaxPool2D,AveragePooling2D, BatchNormalization, Lambda, DepthwiseConv2D 15 | import numpy as np 16 | 17 | 18 | def channel_split(x, name=''): 19 | # equipartition 20 | in_channles = x.shape.as_list()[-1] 21 | ip = in_channles // 2 22 | c_hat = Lambda(lambda z: z[:, :, :, 0:ip], name='%s/sp%d_slice' % (name, 0))(x) 23 | c = Lambda(lambda z: z[:, :, :, ip:], name='%s/sp%d_slice' % (name, 1))(x) 24 | return c_hat, c 25 | 26 | def channel_shuffle(x): 27 | height, width, channels = x.shape.as_list()[1:] 28 | channels_per_split = channels // 2 29 | x = K.reshape(x, [-1, height, width, 2, channels_per_split]) 30 | x = K.permute_dimensions(x, (0,1,2,4,3)) 31 | x = K.reshape(x, [-1, height, width, channels]) 32 | return x 33 | 34 | 35 | def shuffle_unit(inputs, out_channels, bottleneck_ratio,strides=2,stage=1,block=1): 36 | if K.image_data_format() == 'channels_last': 37 | bn_axis = -1 38 | else: 39 | raise ValueError('Only channels last supported') 40 | 41 | prefix = 'stage{}/block{}'.format(stage, block) 42 | bottleneck_channels = int(out_channels * bottleneck_ratio) 43 | if strides < 2: 44 | c_hat, c = channel_split(inputs, '{}/spl'.format(prefix)) 45 | inputs = c 46 | 47 | x = Conv2D(bottleneck_channels, kernel_size=(1,1), strides=1, padding='same', name='{}/1x1conv_1'.format(prefix))(inputs) 48 | x = BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_1'.format(prefix))(x) 49 | x = Activation('relu', name='{}/relu_1x1conv_1'.format(prefix))(x) 50 | x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', name='{}/3x3dwconv'.format(prefix))(x) 51 | x = BatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv'.format(prefix))(x) 52 | x = Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', name='{}/1x1conv_2'.format(prefix))(x) 53 | x = BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_2'.format(prefix))(x) 54 | x = Activation('relu', name='{}/relu_1x1conv_2'.format(prefix))(x) 55 | 56 | if strides < 2: 57 | ret = Concatenate(axis=bn_axis, name='{}/concat_1'.format(prefix))([x, c_hat]) 58 | else: 59 | s2 = DepthwiseConv2D(kernel_size=3, strides=2, padding='same', name='{}/3x3dwconv_2'.format(prefix))(inputs) 60 | s2 = BatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv_2'.format(prefix))(s2) 61 | s2 = Conv2D(bottleneck_channels, kernel_size=1,strides=1,padding='same', name='{}/1x1_conv_3'.format(prefix))(s2) 62 | s2 = BatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_3'.format(prefix))(s2) 63 | s2 = Activation('relu', name='{}/relu_1x1conv_3'.format(prefix))(s2) 64 | ret = Concatenate(axis=bn_axis, name='{}/concat_2'.format(prefix))([x, s2]) 65 | 66 | ret = Lambda(channel_shuffle, name='{}/channel_shuffle'.format(prefix))(ret) 67 | 68 | return ret 69 | 70 | 71 | def block(x, channel_map, bottleneck_ratio, repeat=1, stage=1): 72 | x = shuffle_unit(x, out_channels=channel_map[stage-1], 73 | strides=2,bottleneck_ratio=bottleneck_ratio,stage=stage,block=1) 74 | 75 | for i in range(1, repeat+1): 76 | x = shuffle_unit(x, out_channels=channel_map[stage-1],strides=1, 77 | bottleneck_ratio=bottleneck_ratio,stage=stage, block=(1+i)) 78 | 79 | return x 80 | -------------------------------------------------------------------------------- /model/LR_ASPP.py: -------------------------------------------------------------------------------- 1 | """Lite R-ASPP Semantic Segmentation based on MobileNetV3. 2 | """ 3 | 4 | 5 | from keras.models import Model 6 | from keras.layers import Conv2D, AveragePooling2D, BatchNormalization, Activation, Multiply, Add 7 | from keras.utils.vis_utils import plot_model 8 | from model.layers.bilinear_upsampling import BilinearUpSampling2D 9 | 10 | 11 | class LiteRASSP: 12 | def __init__(self, input_shape, n_class=19, alpha=1.0, weights=None, backbone='small'): 13 | """Init. 14 | 15 | # Arguments 16 | input_shape: An integer or tuple/list of 3 integers, shape 17 | of input tensor (should be 1024 × 2048 or 512 × 1024 according 18 | to the paper). 19 | n_class: Integer, number of classes. 20 | alpha: Integer, width multiplier for mobilenetV3. 21 | weights: String, weights for mobilenetv3. 22 | backbone: String, name of backbone (must be small or large). 23 | """ 24 | self.shape = input_shape 25 | self.n_class = n_class 26 | self.alpha = alpha 27 | self.weights = weights 28 | self.backbone = backbone 29 | 30 | def _extract_backbone(self): 31 | """extract feature map from backbone. 32 | """ 33 | if self.backbone == 'large': 34 | from model.mobilenet_v3_large import MobileNetV3_Large 35 | 36 | model = MobileNetV3_Large(self.shape, self.n_class, alpha=self.alpha, include_top=False).build() 37 | layer_name8 = 'batch_normalization_13' 38 | layer_name16 = 'add_5' 39 | elif self.backbone == 'small': 40 | from model.mobilenet_v3_small import MobileNetV3_Small 41 | 42 | model = MobileNetV3_Small(self.shape, self.n_class, alpha=self.alpha, include_top=False).build() 43 | layer_name8 = 'batch_normalization_7' 44 | layer_name16 = 'add_2' 45 | else: 46 | raise Exception('Invalid backbone: {}'.format(self.backbone)) 47 | 48 | if self.weights is not None: 49 | model.load_weights(self.weights, by_name=True) 50 | 51 | inputs= model.input 52 | # 1/8 feature map. 53 | out_feature8 = model.get_layer(layer_name8).output 54 | # 1/16 feature map. 55 | out_feature16 = model.get_layer(layer_name16).output 56 | 57 | return inputs, out_feature8, out_feature16 58 | 59 | def build(self, plot=False): 60 | """build Lite R-ASPP. 61 | 62 | # Arguments 63 | plot: Boolean, weather to plot model. 64 | 65 | # Returns 66 | model: Model, model. 67 | """ 68 | inputs, out_feature8, out_feature16 = self._extract_backbone() 69 | 70 | # branch1 71 | x1 = Conv2D(128, (1, 1))(out_feature16) 72 | x1 = BatchNormalization()(x1) 73 | x1 = Activation('relu')(x1) 74 | 75 | # branch2 76 | s = x1.shape 77 | 78 | x2 = AveragePooling2D(pool_size=(49, 49), strides=(16, 20))(out_feature16) 79 | x2 = Conv2D(128, (1, 1))(x2) 80 | x2 = Activation('sigmoid')(x2) 81 | x2 = BilinearUpSampling2D(target_size=(int(s[1]), int(s[2])))(x2) 82 | 83 | # branch3 84 | x3 = Conv2D(self.n_class, (1, 1))(out_feature8) 85 | 86 | # merge1 87 | x = Multiply()([x1, x2]) 88 | x = BilinearUpSampling2D(size=(2, 2))(x) 89 | x = Conv2D(self.n_class, (1, 1))(x) 90 | 91 | # merge2 92 | x = Add()([x, x3]) 93 | 94 | # out 95 | x = Activation('softmax')(x) 96 | 97 | model = Model(inputs=inputs, outputs=x) 98 | 99 | if plot: 100 | plot_model(model, to_file='images/LR_ASPP.png', show_shapes=True) 101 | 102 | return model 103 | -------------------------------------------------------------------------------- /shufflenetv2.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | #''' 3 | # Created on 18-8-14 下午4:48 4 | # 5 | # @Author: Greg Gao(laygin) 6 | #''' 7 | import numpy as np 8 | from keras.utils import plot_model 9 | from keras_applications.imagenet_utils import _obtain_input_shape 10 | from keras.engine.topology import get_source_inputs 11 | from keras.layers import Input, Conv2D, MaxPool2D, GlobalMaxPooling2D, GlobalAveragePooling2D 12 | from keras.layers import Activation, Dense 13 | from keras.models import Model 14 | import keras.backend as K 15 | from utils import block 16 | 17 | 18 | def ShuffleNetV2(include_top=True, 19 | input_tensor=None, 20 | scale_factor=1.0, 21 | pooling='max', 22 | input_shape=(224,224,3), 23 | weights_path=None, 24 | num_shuffle_units=[3,7,3], 25 | bottleneck_ratio=1, 26 | classes=1000): 27 | if K.backend() != 'tensorflow': 28 | raise RuntimeError('Only tensorflow supported for now') 29 | name = 'ShuffleNetV2_{}_{}_{}'.format(scale_factor, bottleneck_ratio, "".join([str(x) for x in num_shuffle_units])) 30 | input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, require_flatten=include_top, 31 | data_format=K.image_data_format()) 32 | out_dim_stage_two = {0.5:48, 1:116, 1.5:176, 2:244} 33 | 34 | if pooling not in ['max', 'avg']: 35 | raise ValueError('Invalid value for pooling') 36 | if not (float(scale_factor)*4).is_integer(): 37 | raise ValueError('Invalid value for scale_factor, should be x over 4') 38 | exp = np.insert(np.arange(len(num_shuffle_units), dtype=np.float32), 0, 0) # [0., 0., 1., 2.] 39 | out_channels_in_stage = 2**exp 40 | out_channels_in_stage *= out_dim_stage_two[bottleneck_ratio] # calculate output channels for each stage 41 | out_channels_in_stage[0] = 24 # first stage has always 24 output channels 42 | out_channels_in_stage *= scale_factor 43 | out_channels_in_stage = out_channels_in_stage.astype(int) 44 | 45 | if input_tensor is None: 46 | img_input = Input(shape=input_shape) 47 | else: 48 | if not K.is_keras_tensor(input_tensor): 49 | img_input = Input(tensor=input_tensor, shape=input_shape) 50 | else: 51 | img_input = input_tensor 52 | 53 | # create shufflenet architecture 54 | x = Conv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same', use_bias=False, strides=(2, 2), 55 | activation='relu', name='conv1')(img_input) 56 | x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='maxpool1')(x) 57 | 58 | # create stages containing shufflenet units beginning at stage 2 59 | for stage in range(len(num_shuffle_units)): 60 | repeat = num_shuffle_units[stage] 61 | x = block(x, out_channels_in_stage, 62 | repeat=repeat, 63 | bottleneck_ratio=bottleneck_ratio, 64 | stage=stage + 2) 65 | 66 | if bottleneck_ratio < 2: 67 | k = 1024 68 | else: 69 | k = 2048 70 | x = Conv2D(k, kernel_size=1, padding='same', strides=1, name='1x1conv5_out', activation='relu')(x) 71 | 72 | if pooling == 'avg': 73 | x = GlobalAveragePooling2D(name='global_avg_pool')(x) 74 | elif pooling == 'max': 75 | x = GlobalMaxPooling2D(name='global_max_pool')(x) 76 | 77 | if include_top: 78 | x = Dense(classes, name='fc')(x) 79 | x = Activation('softmax', name='softmax')(x) 80 | 81 | if input_tensor: 82 | inputs = get_source_inputs(input_tensor) 83 | 84 | else: 85 | inputs = img_input 86 | 87 | model = Model(inputs, x, name=name) 88 | 89 | if weights_path: 90 | model.load_weights(weights_path, by_name=True) 91 | 92 | return model 93 | 94 | if __name__ == '__main__': 95 | import os 96 | os.environ['CUDA_VISIBLE_DEVICES'] = '' 97 | model = ShuffleNetV2(include_top=True, input_shape=(224, 224, 3), bottleneck_ratio=1) 98 | plot_model(model, to_file='shufflenetv2.png', show_layer_names=True, show_shapes=True) 99 | 100 | 101 | pass 102 | 103 | 104 | -------------------------------------------------------------------------------- /BuildModel_basic.py: -------------------------------------------------------------------------------- 1 | from keras import Input 2 | from keras.callbacks import Callback 3 | from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D, ConvLSTM2D, Reshape, BatchNormalization, Activation 4 | from keras.layers.recurrent import LSTM 5 | from keras.models import Sequential, load_model 6 | from keras.optimizers import Adam, RMSprop,SGD 7 | from keras.layers.wrappers import TimeDistributed 8 | from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D, 9 | MaxPooling2D) 10 | from collections import deque 11 | from keras_layer_normalization import LayerNormalization 12 | import sys 13 | import logging 14 | from keras.applications import Xception, ResNet50, InceptionV3 15 | from keras.layers import Dense, GlobalAveragePooling2D 16 | from keras.models import Model 17 | from model.mobilenet_v3_large import MobileNetV3_Large 18 | from model.mobilenet_v3_small import MobileNetV3_Small 19 | from keras_efficientnets import EfficientNetB0 20 | from keras_efficientnets import EfficientNetB1 21 | from shufflenetv2 import ShuffleNetV2 22 | from keras.layers import Permute 23 | from keras.layers import multiply 24 | from keras.layers import add 25 | from keras.utils import plot_model 26 | 27 | 28 | TIME_STEPS = 8 29 | # first way attention 30 | def attention_3d_block(inputs): 31 | #input_dim = int(inputs.shape[2]) 32 | # a = Permute((2, 1))(inputs) 33 | a_probs = Dense(256, activation='softmax')(inputs) 34 | # a_probs = Permute((2, 1), name='attention_vec')(a) 35 | #output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul') 36 | # output_attention_mul = multiply([inputs, a_probs], name='attention_mul') 37 | output_attention_mul = add([inputs, a_probs], name='attention_mul') 38 | return output_attention_mul 39 | 40 | 41 | 42 | def build(size, seq_len , learning_rate , 43 | optimizer_class ,\ 44 | initial_weights ,\ 45 | cnn_class ,\ 46 | pre_weights , \ 47 | lstm_conf , \ 48 | cnn_train_type, classes = 1, dropout = 0.0): 49 | input_layer = Input(shape=(seq_len, size, size, 3)) 50 | if(cnn_train_type!='train'): 51 | if cnn_class.__name__ == "ResNet50": 52 | cnn = cnn_class(weights=pre_weights, include_top=False,input_shape =(size, size, 3)) 53 | elif cnn_class.__name__=="MobileNetV3_Large": 54 | cnn=cnn_class(shape =(size, size, 3),n_class=2,include_top=False).build() 55 | elif cnn_class.__name__=='MobileNetV3_Small': 56 | cnn=cnn_class(shape =(size, size, 3),n_class=2,include_top=False).build() 57 | elif cnn_class.__name__=='efn.EfficientNetB0': 58 | cnn = EfficientNetB0(input_shape=(size,size,3), classes=2, include_top=False, weights='imagenet') 59 | elif cnn_class.__name__=='efn.EfficientNetB1': 60 | cnn = EfficientNetB1(input_shape=(size,size,3), classes=2, include_top=False, weights='imagenet') 61 | elif cnn_class.__name__=="ShuffleNetV2": 62 | cnn=ShuffleNetV2(include_top=False,input_shape=(224, 224, 3),bottleneck_ratio=1) 63 | else: 64 | cnn = cnn_class(weights=pre_weights,include_top=False) 65 | else: 66 | cnn = cnn_class(include_top=False) 67 | 68 | #control Train_able of CNNN 69 | if(cnn_train_type=='static'): 70 | for layer in cnn.layers: 71 | layer.trainable = False 72 | if(cnn_train_type=='retrain'): 73 | for layer in cnn.layers: 74 | layer.trainable = True 75 | 76 | cnn = TimeDistributed(cnn)(input_layer) 77 | print(cnn) 78 | #the resnet output shape is 1,1,20148 and need to be reshape for the ConvLSTM filters 79 | # if cnn_class.__name__ == "ResNet50": 80 | # cnn = Reshape((seq_len,4, 4, 128), input_shape=(seq_len,1, 1, 2048))(cnn) 81 | # print(lstm_conf) 82 | # print(lstm_conf[0]) 83 | # print(lstm_conf[1]) 84 | lstm = lstm_conf[0](**lstm_conf[1])(cnn) 85 | lstm = MaxPooling2D(pool_size=(2, 2))(lstm) 86 | attention_mul = attention_3d_block(lstm) 87 | # print(lstm) 88 | # lstm = MaxPooling2D(pool_size=(2, 2))(lstm) 89 | flat = Flatten()(attention_mul) 90 | 91 | flat = BatchNormalization()(flat) 92 | # flag=LayerNormalization()(flat) 93 | flat = Dropout(dropout)(flat) 94 | linear = Dense(512)(flat) 95 | 96 | relu = Activation('relu')(linear) 97 | linear = Dense(256)(relu) 98 | linear = Dropout(dropout)(linear) 99 | relu = Activation('relu')(linear) 100 | linear = Dense(10)(relu) 101 | linear = Dropout(dropout)(linear) 102 | relu = Activation('relu')(linear) 103 | 104 | activation = 'sigmoid' 105 | loss_func = 'binary_crossentropy' 106 | 107 | if classes > 1: 108 | activation = 'softmax' 109 | loss_func = 'categorical_crossentropy' 110 | predictions = Dense(classes, activation=activation)(relu) 111 | 112 | model = Model(inputs=input_layer, outputs=predictions) 113 | optimizer = optimizer_class[0](lr=learning_rate, **optimizer_class[1]) 114 | model.compile(optimizer=optimizer, loss=loss_func,metrics=['acc']) 115 | 116 | print(model.summary()) 117 | plot_model(model,show_shapes=True,to_file="model.png") 118 | 119 | 120 | return model -------------------------------------------------------------------------------- /model/layers/bilinear_upsampling.py: -------------------------------------------------------------------------------- 1 | """Keras BilinearUpSampling2D Layer. 2 | """ 3 | import numpy as np 4 | import tensorflow as tf 5 | import keras.backend as K 6 | from keras.engine.topology import Layer, InputSpec 7 | 8 | 9 | def resize_images_bilinear(X, height_factor=1, width_factor=1, target_height=None, target_width=None, data_format='default'): 10 | '''Resizes the images contained in a 4D tensor of shape 11 | 12 | - [batch, channels, height, width] (for 'channels_first' data_format) 13 | - [batch, height, width, channels] (for 'channels_last' data_format) 14 | by a factor of (height_factor, width_factor). Both factors should be 15 | positive integers. 16 | ''' 17 | if data_format == 'default': 18 | data_format = K.image_data_format() 19 | 20 | if data_format == 'channels_first': 21 | original_shape = K.int_shape(X) 22 | 23 | if target_height and target_width: 24 | new_shape = tf.constant(np.array((target_height, target_width)).astype('int32')) 25 | else: 26 | new_shape = tf.shape(X)[2:] 27 | new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32')) 28 | 29 | X = K.permute_dimensions(X, [0, 2, 3, 1]) 30 | X = tf.image.resize_bilinear(X, new_shape) 31 | X = K.permute_dimensions(X, [0, 3, 1, 2]) 32 | 33 | if target_height and target_width: 34 | X.set_shape((None, None, target_height, target_width)) 35 | else: 36 | X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor)) 37 | 38 | return X 39 | elif data_format == 'channels_last': 40 | original_shape = K.int_shape(X) 41 | 42 | if target_height and target_width: 43 | new_shape = tf.constant(np.array((target_height, target_width)).astype('int32')) 44 | else: 45 | new_shape = tf.shape(X)[1:3] 46 | new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32')) 47 | 48 | X = tf.image.resize_bilinear(X, new_shape) 49 | 50 | if target_height and target_width: 51 | X.set_shape((None, target_height, target_width, None)) 52 | else: 53 | X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None)) 54 | 55 | return X 56 | else: 57 | raise Exception('Invalid data_format: ' + data_format) 58 | 59 | 60 | class BilinearUpSampling2D(Layer): 61 | def __init__(self, size=(1, 1), target_size=None, data_format='default', **kwargs): 62 | """Init. 63 | size: factor to original shape (ie. original-> size * original). 64 | target size: target size (ie. original->target). 65 | """ 66 | if data_format == 'default': 67 | data_format = K.image_data_format() 68 | self.size = tuple(size) 69 | 70 | if target_size is not None: 71 | self.target_size = tuple(target_size) 72 | else: 73 | self.target_size = None 74 | assert data_format in {'channels_last', 'channels_first'}, 'data_format must be in {tf, th}' 75 | 76 | self.data_format = data_format 77 | self.input_spec = [InputSpec(ndim=4)] 78 | 79 | super(BilinearUpSampling2D, self).__init__(**kwargs) 80 | 81 | def compute_output_shape(self, input_shape): 82 | if self.data_format == 'channels_first': 83 | width = int(self.size[0] * input_shape[2] if input_shape[2] is not None else None) 84 | height = int(self.size[1] * input_shape[3] if input_shape[3] is not None else None) 85 | 86 | if self.target_size is not None: 87 | width = self.target_size[0] 88 | height = self.target_size[1] 89 | return (input_shape[0], 90 | input_shape[1], 91 | width, 92 | height) 93 | elif self.data_format == 'channels_last': 94 | width = int(self.size[0] * input_shape[1] if input_shape[1] is not None else None) 95 | height = int(self.size[1] * input_shape[2] if input_shape[2] is not None else None) 96 | 97 | if self.target_size is not None: 98 | width = self.target_size[0] 99 | height = self.target_size[1] 100 | return (input_shape[0], 101 | width, 102 | height, 103 | input_shape[3]) 104 | else: 105 | raise Exception('Invalid data_format: ' + self.data_format) 106 | 107 | def call(self, x, mask=None): 108 | if self.target_size is not None: 109 | return resize_images_bilinear(x, target_height=self.target_size[0], target_width=self.target_size[1], data_format=self.data_format) 110 | else: 111 | return resize_images_bilinear(x, height_factor=self.size[0], width_factor=self.size[1], data_format=self.data_format) 112 | 113 | def get_config(self): 114 | config = {'size': self.size, 'target_size': self.target_size} 115 | base_config = super(BilinearUpSampling2D, self).get_config() 116 | 117 | return dict(list(base_config.items()) + list(config.items())) 118 | -------------------------------------------------------------------------------- /model/mobilenet_base.py: -------------------------------------------------------------------------------- 1 | """MobileNet v3 models for Keras. 2 | # Reference 3 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 4 | """ 5 | 6 | 7 | from keras.layers import Conv2D, DepthwiseConv2D, Dense, GlobalAveragePooling2D 8 | from keras.layers import Activation, BatchNormalization, Add, Multiply, Reshape 9 | 10 | from keras import backend as K 11 | 12 | 13 | class MobileNetBase: 14 | def __init__(self, shape, n_class, alpha=1.0): 15 | """Init 16 | 17 | # Arguments 18 | input_shape: An integer or tuple/list of 3 integers, shape 19 | of input tensor. 20 | n_class: Integer, number of classes. 21 | alpha: Integer, width multiplier. 22 | """ 23 | self.shape = shape 24 | self.n_class = n_class 25 | self.alpha = alpha 26 | 27 | def _relu6(self, x): 28 | """Relu 6 29 | """ 30 | return K.relu(x, max_value=6.0) 31 | 32 | def _hard_swish(self, x): 33 | """Hard swish 34 | """ 35 | return x * K.relu(x + 3.0, max_value=6.0) / 6.0 36 | 37 | def _return_activation(self, x, nl): 38 | """Convolution Block 39 | This function defines a activation choice. 40 | 41 | # Arguments 42 | x: Tensor, input tensor of conv layer. 43 | nl: String, nonlinearity activation type. 44 | 45 | # Returns 46 | Output tensor. 47 | """ 48 | if nl == 'HS': 49 | x = Activation(self._hard_swish)(x) 50 | if nl == 'RE': 51 | x = Activation(self._relu6)(x) 52 | 53 | return x 54 | 55 | def _conv_block(self, inputs, filters, kernel, strides, nl): 56 | """Convolution Block 57 | This function defines a 2D convolution operation with BN and activation. 58 | 59 | # Arguments 60 | inputs: Tensor, input tensor of conv layer. 61 | filters: Integer, the dimensionality of the output space. 62 | kernel: An integer or tuple/list of 2 integers, specifying the 63 | width and height of the 2D convolution window. 64 | strides: An integer or tuple/list of 2 integers, 65 | specifying the strides of the convolution along the width and height. 66 | Can be a single integer to specify the same value for 67 | all spatial dimensions. 68 | nl: String, nonlinearity activation type. 69 | 70 | # Returns 71 | Output tensor. 72 | """ 73 | 74 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 75 | 76 | x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs) 77 | x = BatchNormalization(axis=channel_axis)(x) 78 | 79 | return self._return_activation(x, nl) 80 | 81 | def _squeeze(self, inputs): 82 | """Squeeze and Excitation. 83 | This function defines a squeeze structure. 84 | 85 | # Arguments 86 | inputs: Tensor, input tensor of conv layer. 87 | """ 88 | input_channels = int(inputs.shape[-1]) 89 | 90 | x = GlobalAveragePooling2D()(inputs) 91 | x = Dense(input_channels, activation='relu')(x) 92 | x = Dense(input_channels, activation='hard_sigmoid')(x) 93 | x = Reshape((1, 1, input_channels))(x) 94 | x = Multiply()([inputs, x]) 95 | 96 | return x 97 | 98 | def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl): 99 | """Bottleneck 100 | This function defines a basic bottleneck structure. 101 | 102 | # Arguments 103 | inputs: Tensor, input tensor of conv layer. 104 | filters: Integer, the dimensionality of the output space. 105 | kernel: An integer or tuple/list of 2 integers, specifying the 106 | width and height of the 2D convolution window. 107 | e: Integer, expansion factor. 108 | t is always applied to the input size. 109 | s: An integer or tuple/list of 2 integers,specifying the strides 110 | of the convolution along the width and height.Can be a single 111 | integer to specify the same value for all spatial dimensions. 112 | squeeze: Boolean, Whether to use the squeeze. 113 | nl: String, nonlinearity activation type. 114 | 115 | # Returns 116 | Output tensor. 117 | """ 118 | 119 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 120 | input_shape = K.int_shape(inputs) 121 | 122 | tchannel = int(e) 123 | cchannel = int(self.alpha * filters) 124 | 125 | r = s == 1 and input_shape[3] == filters 126 | 127 | x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl) 128 | 129 | x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x) 130 | x = BatchNormalization(axis=channel_axis)(x) 131 | x = self._return_activation(x, nl) 132 | 133 | if squeeze: 134 | x = self._squeeze(x) 135 | 136 | x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x) 137 | x = BatchNormalization(axis=channel_axis)(x) 138 | 139 | if r: 140 | x = Add()([x, inputs]) 141 | 142 | return x 143 | 144 | def build(self): 145 | pass 146 | -------------------------------------------------------------------------------- /DatasetBuilder.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | import os 3 | import cv2 4 | import pickle 5 | import glob 6 | import numpy as np 7 | from keras.preprocessing.image import load_img, img_to_array 8 | from keras.preprocessing.sequence import pad_sequences 9 | from keras.utils import to_categorical 10 | from sklearn.model_selection import train_test_split 11 | from collections import defaultdict 12 | from keras.preprocessing import image 13 | import random 14 | 15 | corner_keys = ["Center","Left_up","Left_down","Right_up","Right_down"] 16 | 17 | Debug_Print_AUG=False 18 | 19 | def save_figures_from_video(dataset_video_path, video_filename, suffix,figures_path,skip_frames = 25,apply_norm = True, apply_diff = True,fix_len = None): 20 | seq_len = 0 21 | 22 | video_figures_path = os.path.join(figures_path ,video_filename) 23 | if not os.path.exists(video_figures_path): 24 | os.makedirs(video_figures_path) 25 | 26 | video_file = os.path.join(dataset_video_path, video_filename + suffix) 27 | label = 0 28 | print('Extracting frames from video: ', video_file) 29 | 30 | videoCapture = cv2.VideoCapture(video_file) 31 | if fix_len is not None: 32 | # vid_len = int(videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)) 33 | vid_len = int(videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)) 34 | print(vid_len) 35 | skip_frames = int(float(vid_len)/float(fix_len)) 36 | videoCapture.set(cv2.CAP_PROP_POS_MSEC, (seq_len * skip_frames)) 37 | success, figure_ = videoCapture.read() 38 | success = True 39 | files = [] 40 | while success: 41 | success, figure = videoCapture.read() 42 | 43 | if seq_len % skip_frames == 0: 44 | if success: 45 | figure_curr = figure 46 | image_file = os.path.join(video_figures_path , "frame_%d.jpg" % seq_len) 47 | files.append(image_file) 48 | cv2.imwrite(image_file, figure_curr) 49 | seq_len += 1 50 | video_images = dict(images_path = video_figures_path, name = video_filename, 51 | images_files = files, sequence_length = seq_len, label = label) 52 | 53 | return video_images 54 | 55 | def createDataset(datasets_video_path, figure_output_path,fix_len, force = False): 56 | videos_seq_length = [] 57 | datasets_images = {} 58 | videos_frames_paths = [] 59 | videos_labels = [] 60 | #Extract images for each video for each dataset 61 | for dataset_name, dataset_video_path in datasets_video_path.items(): 62 | dataset_figures_path = os.path.join(figure_output_path,dataset_name) 63 | if not os.path.exists(dataset_figures_path): 64 | os.makedirs(dataset_figures_path) 65 | dataset_images = [] 66 | for filename in os.listdir(dataset_video_path): 67 | if filename.endswith(".avi") or filename.endswith(".mpg"): 68 | video_images_file = os.path.join(dataset_figures_path,filename[:-4], 'video_summary.pkl') 69 | if os.path.isfile(video_images_file) and not force: 70 | with open(video_images_file, 'rb') as f: 71 | video_images = pickle.load(f) 72 | else: 73 | video_images = save_figures_from_video(dataset_video_path, filename[:-4],filename[-4:], dataset_figures_path, fix_len =fix_len) 74 | if dataset_name == "hocky": 75 | if filename.startswith("fi"): 76 | video_images['label'] = 1 77 | elif dataset_name == "violentflow": 78 | if "violence" in filename: 79 | video_images['label'] = 1 80 | elif dataset_name == "movies": 81 | if "fi" in filename: 82 | video_images['label'] = 1 83 | with open(video_images_file, 'wb') as f: 84 | pickle.dump(video_images, f, pickle.HIGHEST_PROTOCOL) 85 | dataset_images.append(video_images) 86 | videos_seq_length.append(video_images['sequence_length']) 87 | videos_frames_paths.append(video_images['images_path']) 88 | videos_labels.append(video_images['label']) 89 | datasets_images[dataset_name] = dataset_images 90 | avg_length = int(float(sum(videos_seq_length)) / max(len(videos_seq_length), 1)) 91 | 92 | train_path, test_path, train_y, test_y = train_test_split(videos_frames_paths,videos_labels, test_size=0.20, random_state=42) 93 | 94 | # if apply_aug: 95 | # aug_paths = [] 96 | # aug_y = [] 97 | # for train_path_, train_y_ in zip(train_path,train_y): 98 | # 99 | # aug_path = generate_augmentations(train_path_,force = False) 100 | # aug_paths.append(aug_path) 101 | # aug_y.append(train_y_) 102 | # 103 | # train_path = train_path + aug_paths 104 | # train_y = train_y + aug_y 105 | 106 | train_path, valid_path, train_y, valid_y = train_test_split(train_path, train_y, test_size=0.20, random_state=42) 107 | return train_path,valid_path, test_path,\ 108 | train_y, valid_y, test_y,\ 109 | avg_length 110 | 111 | 112 | def frame_loader(frames,figure_shape,to_norm = True): 113 | output_frames = [] 114 | for frame in frames: 115 | image = load_img(frame, target_size=(figure_shape, figure_shape),interpolation='bilinear') 116 | img_arr = img_to_array(image) 117 | # Scale 118 | figure = (img_arr / 255.).astype(np.float32) 119 | # Normalize 120 | mean = [0.485, 0.456, 0.406] 121 | std = [0.229, 0.224, 0.225] 122 | figure = (figure - mean) / std 123 | output_frames.append(figure) 124 | return output_frames 125 | 126 | 127 | def data_generator(data_paths,labels,batch_size,figure_shape,seq_length,use_aug,use_crop,crop_x_y,classes = 1): 128 | while True: 129 | indexes = np.arange(len(data_paths)) 130 | np.random.shuffle(indexes) 131 | select_indexes = indexes[:batch_size] 132 | data_paths_batch = [data_paths[i] for i in select_indexes] 133 | labels_batch = [labels[i] for i in select_indexes] 134 | 135 | X, y = get_sequences(data_paths_batch,labels_batch,figure_shape,seq_length, classes, use_augmentation = use_aug,use_crop=use_crop,crop_x_y=crop_x_y) 136 | 137 | yield X, y 138 | 139 | def data_generator_files(data,labels,batch_size): 140 | while True: 141 | indexes = np.arange(len(data)) 142 | np.random.shuffle(indexes) 143 | select_indexes = indexes[:batch_size] 144 | X = [data[i] for i in select_indexes] 145 | y = [labels[i] for i in select_indexes] 146 | yield X, y 147 | 148 | def crop_img__remove_Dark(img,x_crop,y_crop,x,y,figure_size): 149 | x_start = x_crop 150 | x_end = x-x_crop 151 | y_start = y_crop 152 | y_end = y-y_crop 153 | return cv2.resize(img[y_start:y_end,x_start:x_end,:],(figure_size,figure_size)) 154 | 155 | 156 | def crop_img(img,figure_shape,percentage=0.8,corner="Left_up"): 157 | if(corner == None): 158 | corner = random.choice(corner_keys) 159 | 160 | if corner not in corner_keys: 161 | raise ValueError( 162 | 'Invalid corner method {} specified. Supported ' 163 | 'corners are {}'.format( 164 | corner, 165 | ", ".join(corner_keys))) 166 | 167 | resize = int(figure_shape*percentage) 168 | 169 | if(corner =="Left_up"): 170 | x_start = 0 171 | x_end = resize 172 | y_start = 0 173 | y_end = resize 174 | if (corner == "Right_down"): 175 | x_start = figure_shape-resize 176 | x_end = figure_shape 177 | y_start = figure_shape-resize 178 | y_end = figure_shape 179 | if(corner =="Right_up"): 180 | x_start = 0 181 | x_end = resize 182 | y_start = figure_shape-resize 183 | y_end = figure_shape 184 | if (corner == "Left_down"): 185 | x_start = figure_shape-resize 186 | x_end = figure_shape 187 | y_start = 0 188 | y_end = resize 189 | if (corner == "Center"): 190 | half = int(figure_shape*(1-percentage)) 191 | x_start = half 192 | x_end = figure_shape-half 193 | y_start = half 194 | y_end = figure_shape-half 195 | 196 | img = cv2.resize(img[y_start:y_end,x_start:x_end, :], (figure_shape, figure_shape)).astype(np.float32) 197 | return img 198 | 199 | 200 | def get_sequences(data_paths,labels,figure_shape,seq_length,classes=1, use_augmentation = False,use_crop=True,crop_x_y=None): 201 | X, y = [], [] 202 | seq_len = 0 203 | for data_path, label in zip(data_paths,labels): 204 | frames = sorted(glob.glob(os.path.join(data_path, '*jpg'))) 205 | x = frame_loader(frames, figure_shape) 206 | if(crop_x_y): 207 | x = [crop_img__remove_Dark(x_,crop_x_y[0],crop_x_y[1],x_.shape[0],x_.shape[1],figure_shape) for x_ in x] 208 | if use_augmentation: 209 | rand = scipy.random.random() 210 | corner="" 211 | if rand > 0.5: 212 | if(use_crop): 213 | corner=random.choice(corner_keys) 214 | x = [crop_img(x_,figure_shape,0.7,corner) for x_ in x] 215 | x = [frame.transpose(1, 0, 2) for frame in x] 216 | if(Debug_Print_AUG): 217 | to_write = [list(a) for a in zip(frames, x)] 218 | [cv2.imwrite(x_[0] + "_" + corner, x_[1] * 255) for x_ in to_write] 219 | 220 | x = [x[i] - x[i+1] for i in range(len(x)-1)] 221 | X.append(x) 222 | y.append(label) 223 | X = pad_sequences(X, maxlen=seq_length, padding='pre', truncating='pre') 224 | if classes > 1: 225 | x_ = to_categorical(x_,classes) 226 | return np.array(X), np.array(y) 227 | 228 | import re 229 | 230 | def natural_sort(l): 231 | convert = lambda text: int(text) if text.isdigit() else text.lower() 232 | alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 233 | return sorted(l, key = alphanum_key) 234 | 235 | 236 | 237 | # def generate_augmentations(data_path,figure_shape = 244, force = False): 238 | # seq_len = 0 239 | # crop_path = data_path + "_crop" 240 | # if not os.path.exists(crop_path) or force: 241 | # frames = natural_sort(glob.glob(os.path.join(data_path, '*jpg'))) 242 | # frames_arr = frame_loader(frames, figure_shape,to_norm = False) 243 | # print("augmenting " + data_path) 244 | # os.makedirs(crop_path) 245 | # for frame in frames_arr: 246 | # #transpose 247 | # img_transpose = frame.transpose(1,0,2) 248 | # data_path_aug = os.path.join(crop_path,"frame_%d.jpg" % seq_len) 249 | # cv2.imwrite(data_path_aug, img_transpose) 250 | # seq_len += 1 251 | # return crop_path 252 | 253 | # def load_data(data_paths,labels,figure_shape,seq_length): 254 | # X, y = [], [] 255 | # for select_index in range(len(data_paths)): 256 | # x = get_sequence(data_paths[select_index]) 257 | # frames = sorted(glob.glob(os.path.join(data_paths[select_index], '*jpg'))) 258 | # x = frame_loader(frames, figure_shape) 259 | # X.append(x) 260 | # y.append(labels[select_index]) 261 | # X = pad_sequences(X,maxlen = seq_length, padding = 'pre' , truncating = 'pre' ) 262 | # return np.array(X), np.array(y) 263 | # 264 | # def load_data(data_paths,labels,figure_shape,seq_length): 265 | # X,y = [], [] 266 | # x, y = get_sequences(data_paths,labels) 267 | # for select_index in range(len(data_paths)): 268 | # 269 | # frames = sorted(glob.glob(os.path.join(data_paths[select_index], '*jpg'))) 270 | # x = frame_loader(frames, figure_shape) 271 | # X.append(x) 272 | # y.append(labels[select_index]) 273 | # X = pad_sequences(X,maxlen = seq_length, padding = 'pre' , truncating = 'pre' ) 274 | # return np.array(X), np.array(y) 275 | # 276 | # def data_generator(data_paths,labels,batch_size,figure_shape,seq_length): 277 | # while True: 278 | # X, y = [], [] 279 | # indexes = np.arange(len(data_paths)) 280 | # np.random.shuffle(indexes) 281 | # select_indexes = indexes[:batch_size] 282 | # for select_index in select_indexes: 283 | # frames = sorted(glob.glob(os.path.join(data_paths[select_index], '*jpg'))) 284 | # x = frame_loader(frames, figure_shape) 285 | # X.append(x) 286 | # y.append(labels[select_index]) 287 | # X = pad_sequences(X,maxlen = seq_length, padding = 'pre' , truncating = 'pre' ) 288 | # yield np.array(X), np.array(y) -------------------------------------------------------------------------------- /SportsActionRecognition_SIFT_SVM.py: -------------------------------------------------------------------------------- 1 | __author__ = 'somnath' 2 | 3 | 4 | import numpy as np 5 | import cv2 6 | import sys 7 | import os 8 | import glob 9 | from sklearn import svm 10 | 11 | 12 | 13 | sportsActionPath = "/Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action" 14 | testPath = "/Users/somnath/MY_PROG/ComputerVision/pa3/Testing/" 15 | 16 | # Sports Action Tag 17 | sportsActionTag = { 18 | 'Diving-Side': 0, 19 | 'Golf-Swing-Back':1, 20 | 'Golf-Swing-Front':2, 21 | 'Golf-Swing-Side':3, 22 | 'Kicking-Front':4, 23 | 'Kicking-Side':5, 24 | 'Lifting':6, 25 | 'Run-Side':7, 26 | 'SkateBoarding-Front':8, 27 | 'Swing-SideAngle':9, 28 | 'Walk-Front':10, 29 | 'Swing-Bench':11, 30 | 'Riding-Horse':12 31 | } 32 | 33 | 34 | featuresLimit = 100 35 | 36 | 37 | 38 | ''' 39 | Function Name: featureExtraction() 40 | Input Args : , , , 41 | Returns : 42 | Description : This function extract features from each frames of a video and consolidated them. 43 | While it extract features, it add label to feature at the beginning of feature vector based on Sports 44 | Action Type. It helps to keep tack of feature and corresponding label while shuffle the features during 45 | cross validation. 46 | 47 | ''' 48 | def featureExtraction( videoPath, actionName, type): 49 | 50 | 51 | 52 | # Set frame path, if jpeg directory doesn't exist , take images from video dir 53 | framePath = videoPath 54 | if os.path.exists( framePath + "/jpeg") : 55 | framePath += "/jpeg/" 56 | # Extract feature 57 | imageFrames = getImageList(framePath) 58 | #print "DEBUG: Image Frames - ", imageFrames 59 | frameCount = 0 60 | frameIndex = 0 61 | 62 | # Feature List for a video 63 | videoFeatures = [] 64 | 65 | for iFrame in imageFrames: 66 | 67 | frameIndex += 1 68 | # Only take alternate frames 69 | if (frameIndex % 2) == 0 : 70 | continue 71 | 72 | # Read Frame 73 | frame = cv2.imread(iFrame) 74 | # Create SIFT object 75 | sift = cv2.SIFT() 76 | gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) 77 | kp, des = sift.detectAndCompute(gray, None) 78 | 79 | fIndex = 0 # Feature Index 80 | for d in des: 81 | # Insert Label Only for Training 82 | if type == "Trng": 83 | d = np.insert(d, 0, sportsActionTag[actionName]) 84 | 85 | videoFeatures.append(d) 86 | if fIndex >= featuresLimit: 87 | break 88 | fIndex += 1 89 | 90 | 91 | if frameCount >= 23: 92 | break 93 | frameCount += 1 94 | 95 | 96 | 97 | print " \t\tFrame Count:{0}".format(frameCount) 98 | #print "Video Features: ", videoFeatures 99 | 100 | return videoFeatures 101 | 102 | ''' 103 | Function Name: getImageList() 104 | Input Args : 105 | Return : 106 | Description : This function returns list of images. 107 | ''' 108 | def getImageList(imageDirectory): 109 | 110 | # Find different type of images 111 | rImages = glob.glob(imageDirectory + "/*.jpg") 112 | rImages += glob.glob(imageDirectory + "/*.jpeg") 113 | rImages += glob.glob(imageDirectory + "/*.png") 114 | 115 | return rImages 116 | 117 | ''' 118 | Function Name: getListOfDir() 119 | Input Args : < Path > 120 | Return : 121 | Description : This function returns all the directories under the specified paths 122 | ''' 123 | def getListOfDir(path): 124 | # Read each sport action directory 125 | dirs = os.listdir(path) 126 | #print dirs 127 | 128 | sportsActionsCount = 0 129 | filtered_dir = [] 130 | # Remove . .. and hidden directory 131 | for dir in dirs: 132 | if not dir.startswith("."): 133 | filtered_dir.append(dir) 134 | 135 | return filtered_dir 136 | 137 | ''' 138 | Function Name: getSportsActionName() 139 | Input Args : < Sports Action Index> 140 | Return : 141 | Description : This function returns the name of Sports Action based on index value 142 | 143 | ''' 144 | def getSportsActionName(rIndex): 145 | 146 | keys = sportsActionTag.keys() 147 | 148 | for key in keys: 149 | if rIndex == sportsActionTag[key]: 150 | return key 151 | 152 | ''' 153 | Function Name: evaluation() 154 | Input Args : < 1D Array: Truth>, <1D Array: Predicted>, < Sports Action Index> 155 | Return : ,, 156 | Description : This function calculate evaluation metrics sensitivity, specificity and accuracy 157 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rate. 158 | 159 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 160 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 161 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 162 | 163 | ''' 164 | 165 | def evaluation( truth, predicted, categoryIndex ): 166 | # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively 167 | TP = 1 168 | FP = 1 169 | FN = 1 170 | TN = 1 171 | 172 | # Categories are Rest1=>0, Rest2=> 1, Rest3=>2 etc.. 173 | for fIndex in range(len(truth)): 174 | ''' 175 | #print "truth-%d predicted-%d" % (int (truth [iDoc]), int( predicted[iDoc][0] ) ) 176 | if ( int(truth[restIndex]) == categoryIndex): 177 | # TP=> when P[i] = T[i] = Ci 178 | if (int(truth[restIndex]) == int (predicted[restIndex])): 179 | TP += 1 180 | else: 181 | FP += 1 182 | elif ( int ( predicted[restIndex]) == categoryIndex ): 183 | FN += 1 184 | else: 185 | TN += 1 186 | ''' 187 | # Positive prediction for each feature 188 | if ( int(predicted[fIndex]) == categoryIndex): 189 | # TP=> when P[i] = T[i] = Ci 190 | if (int(truth[fIndex]) == int (predicted[fIndex])): 191 | TP += 1 192 | else: 193 | FP += 1 194 | else: # Negative Prediction 195 | if ( int ( truth[fIndex]) == categoryIndex ): 196 | FN += 1 197 | else: 198 | TN += 1 199 | 200 | # Calculate Sensitivity - True Positive Rate 201 | sensitivity = TP / float ( TP + FN ) 202 | 203 | # Specificity - True Negative Rate 204 | specificity = TN / float ( TN + FP ) 205 | 206 | #Calculate accuracy 207 | accuracy = ( TP + TN ) / float ( TP + FP + FN + TN ) 208 | 209 | 210 | return sensitivity, specificity, accuracy 211 | 212 | ''' 213 | Function Name: crossValidation() 214 | Input Args : < Array: Feature and Label List - Fits element of vector indicates action label and rest are for features> 215 | Retrun : None 216 | Description : It perform K-Fold cross validation. 217 | First, I shuffle the feature list which contains features as well as label at the very first element of 218 | the feature vector to obtain better result. The complete set of shuffled features are divided equally 219 | into k=13 sub parts. k-1 subset is used for training and one subset is used for validation. I iterate the 220 | process for k=13 times with different subset combinations for training and validation. 221 | 222 | Evaluation Metrics: 223 | At each iteration, evaluation metrics sensitivity, specificity and accuracy are calculated 224 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rates. 225 | 226 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 227 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 228 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 229 | 230 | At the end of all iterations of cross validation, I average them all to get average rate. 231 | ''' 232 | def crossValidation( featureAndLabelList): 233 | 234 | # Randomize the sample 235 | np.random.shuffle(featureAndLabelList) 236 | 237 | 238 | # Evaluation Metrics 239 | sensitivity = 0.0 240 | specificity = 0.0 241 | accuracy = 0.0 242 | 243 | 244 | 245 | # split feature set in equal subsets same as number of sports actions for cross validation 246 | subsetLength = len(featureAndLabelList) / len(sportsActionTag) 247 | for rIndex in range(len(sportsActionTag)): 248 | 249 | print "INFO: Cross Validation Iteration - ", rIndex 250 | trainigSet = [] 251 | valdationSet = [] 252 | feature = [] 253 | label = [] 254 | 255 | 256 | if ( rIndex == 0 ): 257 | trainigSet = featureAndLabelList[1*subsetLength:] 258 | valdationSet = featureAndLabelList[0: subsetLength] 259 | elif ( rIndex == 9): 260 | trainigSet = featureAndLabelList[:9*subsetLength] 261 | valdationSet = featureAndLabelList[9*subsetLength : ] 262 | else: 263 | trainigSet = np.concatenate ((featureAndLabelList[:rIndex * subsetLength] , featureAndLabelList[(rIndex + 1) * subsetLength: ]), axis=0 ) 264 | valdationSet = featureAndLabelList[rIndex * subsetLength : (rIndex + 1 ) * subsetLength] 265 | 266 | 267 | # Get all features in a array 268 | for featureAndLabel in trainigSet: 269 | label.append(int(featureAndLabel[0])) 270 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 271 | 272 | print "XX:", feature 273 | print "YY:", label 274 | #print "Training Feature Length:", len(feature) 275 | 276 | # Train model 277 | print "INFO: Training " 278 | clf = svm.SVC(gamma=0.001, C=1.0) 279 | clf.fit(feature,label) 280 | 281 | # Prepare validation feature and label to be predicted 282 | print "INFO: Prediction for ", getSportsActionName(rIndex) 283 | vFeatureList = [] 284 | vLabelList = [] # Ground Truth 285 | for featureAndLabel in valdationSet: 286 | vFeatureList.append(featureAndLabel[1:].tolist()) 287 | vLabelList.append(featureAndLabel[0]) 288 | 289 | predictedLabel = clf.predict(vFeatureList) 290 | 291 | # predict validation set and calculate accuracy 292 | print "INFO: Evaluating ... " 293 | print "\t Truth - ", vLabelList 294 | print "\t Predicted - ", str(predictedLabel.tolist()) 295 | 296 | # Evaluation < Truth>, , 297 | (sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , rIndex) 298 | 299 | sensitivity += sen 300 | specificity += spec 301 | accuracy += accu 302 | 303 | print "\t Sensitivity : ", sen 304 | print "\t Specificity : ", spec 305 | print "\t Accuracy : ", accu 306 | 307 | 308 | 309 | # Average evaluation metrics 310 | avgSensitivity = sensitivity / len(sportsActionTag) 311 | avgSpecificity = specificity / len(sportsActionTag) 312 | avgAccuracy = accuracy / len(sportsActionTag) 313 | 314 | 315 | print "\t*** Overall Evaluation ***" 316 | print "\t Average Sensitivity: ", avgSensitivity 317 | print "\t Average Specificity: ", avgSpecificity 318 | print "\t Average Accuracy : ", avgAccuracy 319 | 320 | 321 | 322 | def main(): 323 | print "INFO: Action Recognition" 324 | 325 | sportsActionList = getListOfDir( sportsActionPath ) 326 | print "INFO: Sports Action - ",sportsActionList 327 | 328 | sportsActionFeatures = [] 329 | 330 | sIndex = 0 331 | for sportsActionName in sportsActionList: 332 | sportsActionDir = sportsActionPath + "/" + sportsActionName 333 | # Get list of videos from each sports action 334 | videoList = getListOfDir(sportsActionDir) 335 | 336 | print "INFO: Video List:", videoList 337 | 338 | videoFeatures = [] 339 | # For all video in each action category 340 | for video in videoList: 341 | # complete path of video containing jpeg images 342 | videoPath = sportsActionDir + "/" + video 343 | print "\tVideo Path:", videoPath 344 | # Extract Feature 345 | videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng') 346 | #print "Video Features: ", videoFeatures 347 | # Put together all the videos 348 | if sIndex == 0: 349 | sportsActionFeatures = videoFeatures 350 | sIndex += 1 351 | else: 352 | sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0) 353 | 354 | 355 | # Cross Validation 356 | crossValidation(sportsActionFeatures) 357 | 358 | ## **** Testing with unseen data **** ## 359 | 360 | np.random.shuffle(sportsActionFeatures) 361 | label = [] 362 | feature = [] 363 | # Get all features in a array 364 | for featureAndLabel in sportsActionFeatures: 365 | label.append(int(featureAndLabel[0])) 366 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 367 | 368 | 369 | 370 | # Train model 371 | print "INFO: Training ... " 372 | clf = svm.SVC(gamma=0.01, C=13) 373 | clf.fit(feature,label) 374 | 375 | # Test Path 376 | tPath = "/Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action/Diving-Side/014" 377 | vFeatures = featureExtraction(tPath , sportsActionName, 'Test') 378 | predictedLabels = clf.predict(vFeatures) 379 | 380 | #print "Predicted Labels:", predictedLabels 381 | predictedLabelMode = (mode(predictedLabels))[0] 382 | print "\t Predicted Sports Action:{0} - {1}".format(predictedLabelMode,getSportsActionName(predictedLabelMode) ) 383 | 384 | 385 | 386 | 387 | 388 | if __name__ == "__main__": 389 | main() 390 | 391 | -------------------------------------------------------------------------------- /violentflow_HOG.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pickle\n", 10 | "import numpy as np\n", 11 | "import cv2\n", 12 | "import sys\n", 13 | "import os\n", 14 | "import glob\n", 15 | "from sklearn import svm\n", 16 | "from scipy.stats import mode\n", 17 | "from sklearn.ensemble import RandomForestClassifier\n", 18 | "from tqdm import tqdm" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "featuresLimit = 15000" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def featureExtraction( videoPath, actionName, type):\n", 37 | "\n", 38 | "\n", 39 | " # Set frame path, if jpeg directory doesn't exist , take images from video dir\n", 40 | " framePath = videoPath\n", 41 | " if os.path.exists( framePath + \"/jpeg\") :\n", 42 | " framePath += \"/jpeg/\"\n", 43 | "\n", 44 | " # Extract feature\n", 45 | "# imageFrames = getImageList(framePath)\n", 46 | "\n", 47 | " pickle_name='video_summary.pkl'\n", 48 | " file_path=os.path.join(videoPath,pickle_name)\n", 49 | " with open(file_path, 'rb') as f:\n", 50 | " data = pickle.load(f)\n", 51 | " imageFrames=data['images_files']\n", 52 | " #print \"DEBUG: Image Frames - \", imageFrames\n", 53 | "\n", 54 | " frameCount = 0\n", 55 | " frameIndex = 0\n", 56 | "\n", 57 | " # Feature List for a video\n", 58 | " videoFeatures = []\n", 59 | "\n", 60 | " for iFrame in imageFrames:\n", 61 | "\n", 62 | " frameIndex += 1\n", 63 | "\n", 64 | " # Read Frame\n", 65 | " frame = cv2.imread(iFrame)\n", 66 | " gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)\n", 67 | "\n", 68 | " # HOG Descriptor , default value it takes window size= 64x128, block size= 16x16, block stride= 8x8, cell size= 8x8, bins= 9\n", 69 | " hogDescriptor = cv2.HOGDescriptor()\n", 70 | "\n", 71 | " # Returns histogram\n", 72 | " hist = hogDescriptor.compute(gray)\n", 73 | "\n", 74 | " #sortedHogDescriptor = hogDescriptor\n", 75 | " sortedHogHist = np.sort(hist, axis=None)\n", 76 | "\n", 77 | " keyFeatures = sortedHogHist[- featuresLimit : ]\n", 78 | "\n", 79 | " if type == \"Trng\":\n", 80 | " keyFeatures = np.insert(keyFeatures, 0, sportsActionTag[actionName])\n", 81 | "\n", 82 | " videoFeatures.append(keyFeatures)\n", 83 | "\n", 84 | " # Lowest number of frame available in a video\n", 85 | " if frameCount >= 23:\n", 86 | " break\n", 87 | "\n", 88 | " frameCount += 1\n", 89 | "\n", 90 | "\n", 91 | " return videoFeatures" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "sportsActionTag = {\n", 101 | " 'no_fight': 0,\n", 102 | " 'fight':1\n", 103 | "}" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "videoPath='./data/raw_frames/violentflow/audience_violence__Riot_police_attack_peaceful_protesters_at_G20_Climate_Camp__bristlekrs__t244-zEENSs'\n", 113 | "sportsActionName='fight'" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng')" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "21" 134 | ] 135 | }, 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "len(videoFeatures)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 9, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "def getListOfDir(path):\n", 152 | " # Read each sport action directory\n", 153 | " dirs = os.listdir(path)\n", 154 | "\n", 155 | " sportsActionsCount = 0\n", 156 | " filtered_dir = []\n", 157 | " # Remove . .. and hidden directory\n", 158 | " for dir in dirs:\n", 159 | " if not dir.startswith(\".\"):\n", 160 | " filtered_dir.append(dir)\n", 161 | "\n", 162 | " return filtered_dir" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "sportsActionPath='./data/raw_frames/violentflow'\n", 172 | "sportsActionList = getListOfDir( sportsActionPath )" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 11, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "['audience_violence__Riot_police_attack_peaceful_protesters_at_G20_Climate_Camp__bristlekrs__t244-zEENSs', 'balcony_football__birmingham_promoted_to_premier_from_balcony_town_hall__clippo67_', 'balcony_football__Seoul_vs_Dae_Gu_Futbol_Roman_Candles__JOECORE__bbpgvaM2sxs', 'balcony_football_violence__Brannik_Football_Violence__Nattevandring__ysW-tGv-YjI', 'balcony_football_violence__British_Football_Hooligans_Mix_1__MorningGlory1997__pn6CNLi3UhA']\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "print(sportsActionList[:5])" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 12, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "name": "stderr", 199 | "output_type": "stream", 200 | "text": [ 201 | "100%|██████████| 246/246 [14:45<00:00, 3.35s/it]\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "\n", 207 | "firstActionFlag = 0\n", 208 | "videoFeatures = []\n", 209 | "videoCount=1\n", 210 | "for sportsActionName in tqdm(sportsActionList):\n", 211 | "# if(videoCount==10):\n", 212 | "# break\n", 213 | " sportsActionDir = sportsActionPath + \"/\" + sportsActionName\n", 214 | " if('violence' in sportsActionName):\n", 215 | " sportsActionName='fight'\n", 216 | " else:\n", 217 | " sportsActionName='no_fight'\n", 218 | " \n", 219 | " videoFeatures = featureExtraction(sportsActionDir , sportsActionName, 'Trng')\n", 220 | "# print(len(videoFeatures))\n", 221 | " # Put together all the videos\n", 222 | " if firstActionFlag == 0:\n", 223 | " sportsActionFeatures = videoFeatures\n", 224 | " firstActionFlag = 1\n", 225 | " else:\n", 226 | " sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0)\n", 227 | "\n", 228 | " videoCount += 1\n", 229 | " \n", 230 | " " 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 13, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "sportsActionNumber = len(sportsActionTag)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 16, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "def evaluation( truth, predicted, categoryIndex ):\n", 249 | "\n", 250 | " # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively\n", 251 | " TP = 1\n", 252 | " FP = 1\n", 253 | " FN = 1\n", 254 | " TN = 1\n", 255 | "\n", 256 | " # Categories are Sports Action 1=>0, Sports Action 2=> 1, Sports Action 3=>2 etc..\n", 257 | " for fIndex in range(len(truth)):\n", 258 | "\n", 259 | " # Positive prediction for each feature\n", 260 | " if ( int(predicted[fIndex]) == categoryIndex):\n", 261 | " # TP=> when P[i] = T[i] = Ci\n", 262 | " if (int(truth[fIndex]) == int (predicted[fIndex])):\n", 263 | " TP += 1\n", 264 | " else:\n", 265 | " FP += 1\n", 266 | " else: # Negative Prediction\n", 267 | " if ( int ( truth[fIndex]) == categoryIndex ):\n", 268 | " FN += 1\n", 269 | " else:\n", 270 | " TN += 1\n", 271 | "\n", 272 | "\n", 273 | " # Calculate Sensitivity - True Positive Rate - Recall\n", 274 | " sensitivity = TP / float ( TP + FN )\n", 275 | "\n", 276 | " # Specificity - True Negative Rate\n", 277 | " specificity = TN / float ( TN + FP )\n", 278 | "\n", 279 | " #Calculate accuracy\n", 280 | " accuracy = ( TP + TN ) / float ( TP + FP + FN + TN )\n", 281 | "\n", 282 | "\n", 283 | " return sensitivity, specificity, accuracy" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 17, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "def getSportsActionName(saIndex):\n", 293 | "\n", 294 | " keys = sportsActionTag.keys()\n", 295 | "\n", 296 | " for key in keys:\n", 297 | " if saIndex == sportsActionTag[key]:\n", 298 | " return key" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 18, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "name": "stdout", 308 | "output_type": "stream", 309 | "text": [ 310 | "(5513, 15001)\n" 311 | ] 312 | } 313 | ], 314 | "source": [ 315 | "print(sportsActionFeatures.shape)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 19, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "featureAndLabelList=sportsActionFeatures\n", 325 | "np.random.shuffle(featureAndLabelList)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 20, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "subsetLength = int(len(featureAndLabelList)*0.8)\n", 335 | "trainigSet = featureAndLabelList[:subsetLength]\n", 336 | "valdationSet = featureAndLabelList[subsetLength : ]" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 21, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "# Get all features in a array\n", 346 | "label = []\n", 347 | "feature = []\n", 348 | "for featureAndLabel in trainigSet:\n", 349 | " label.append(int(featureAndLabel[0]))\n", 350 | " feature.append((np.delete(featureAndLabel, 0)).tolist())" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 22, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "name": "stdout", 360 | "output_type": "stream", 361 | "text": [ 362 | "INFO: Training ... \n" 363 | ] 364 | } 365 | ], 366 | "source": [ 367 | "# Train model\n", 368 | "print(\"INFO: Training ... \")\n", 369 | "clf=RandomForestClassifier(n_estimators=13)\n", 370 | "clf=clf.fit(feature,label)\n", 371 | "\n", 372 | "# Prepare validation feature and label to be predicted\n", 373 | "# print(\"INFO: Prediction for \", getSportsActionName(rIndex))\n", 374 | "vFeatureList = []\n", 375 | "vLabelList = [] # Ground Truth\n", 376 | "for featureAndLabel in valdationSet:\n", 377 | " vFeatureList.append(featureAndLabel[1:].tolist())\n", 378 | " vLabelList.append(featureAndLabel[0])" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 23, 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "name": "stdout", 388 | "output_type": "stream", 389 | "text": [ 390 | "INFO: Evaluating ... \n" 391 | ] 392 | } 393 | ], 394 | "source": [ 395 | "# Predict the class label for Validation Feature List\n", 396 | "predictedLabel = clf.predict(vFeatureList)\n", 397 | "\n", 398 | "# predict validation set and calculate accuracy\n", 399 | "print(\"INFO: Evaluating ... \")\n", 400 | "\n", 401 | "# Evaluation < Truth>, , \n", 402 | "(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)\n", 403 | "(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 25, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "name": "stdout", 413 | "output_type": "stream", 414 | "text": [ 415 | " *** Overall Evaluation ***\n", 416 | " Average Sensitivity: 0.7548019737272074\n", 417 | " Average Specificity: 0.7548019737272074\n", 418 | " Average Accuracy : 0.7542908762420958\n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "# Evaluation Metrics\n", 424 | "sensitivity = 0.0\n", 425 | "specificity = 0.0\n", 426 | "accuracy = 0.0\n", 427 | "\n", 428 | "sensitivity += sen\n", 429 | "specificity += spec\n", 430 | "accuracy += accu\n", 431 | "\n", 432 | "sensitivity += sen1\n", 433 | "specificity += spec1\n", 434 | "accuracy += accu1\n", 435 | "# print(\"\\t Sensitivity : \", sen)\n", 436 | "# print(\"\\t Specificity : \", spec)\n", 437 | "# print(\"\\t Accuracy : \", accu)\n", 438 | "# Average evaluation metrics\n", 439 | "avgSensitivity = sensitivity / sportsActionNumber\n", 440 | "avgSpecificity = specificity / sportsActionNumber\n", 441 | "avgAccuracy = accuracy / sportsActionNumber\n", 442 | "\n", 443 | "\n", 444 | "print(\" *** Overall Evaluation ***\")\n", 445 | "print(\" Average Sensitivity: \", avgSensitivity)\n", 446 | "print(\" Average Specificity: \", avgSpecificity)\n", 447 | "print(\" Average Accuracy : \", avgAccuracy)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 26, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "from sklearn import svm" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 27, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "name": "stdout", 466 | "output_type": "stream", 467 | "text": [ 468 | "INFO: Training ... \n" 469 | ] 470 | }, 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "SVC(C=13, cache_size=200, class_weight=None, coef0=0.0,\n", 475 | " decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',\n", 476 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 477 | " tol=0.001, verbose=False)" 478 | ] 479 | }, 480 | "execution_count": 27, 481 | "metadata": {}, 482 | "output_type": "execute_result" 483 | } 484 | ], 485 | "source": [ 486 | "print(\"INFO: Training ... \")\n", 487 | "clf = svm.SVC(gamma=0.01, C=13)\n", 488 | "clf.fit(feature,label)" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 28, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "name": "stdout", 498 | "output_type": "stream", 499 | "text": [ 500 | "INFO: Evaluating ... \n" 501 | ] 502 | } 503 | ], 504 | "source": [ 505 | "# Predict the class label for Validation Feature List\n", 506 | "predictedLabel = clf.predict(vFeatureList)\n", 507 | "\n", 508 | "# predict validation set and calculate accuracy\n", 509 | "print(\"INFO: Evaluating ... \")\n", 510 | "\n", 511 | "# Evaluation < Truth>, , \n", 512 | "(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)\n", 513 | "(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 29, 519 | "metadata": {}, 520 | "outputs": [ 521 | { 522 | "name": "stdout", 523 | "output_type": "stream", 524 | "text": [ 525 | " *** Overall Evaluation ***\n", 526 | " Average Sensitivity: 0.763031827985099\n", 527 | " Average Specificity: 0.763031827985099\n", 528 | " Average Accuracy : 0.7624209575429087\n" 529 | ] 530 | } 531 | ], 532 | "source": [ 533 | "# Evaluation Metrics\n", 534 | "sensitivity = 0.0\n", 535 | "specificity = 0.0\n", 536 | "accuracy = 0.0\n", 537 | "\n", 538 | "sensitivity += sen\n", 539 | "specificity += spec\n", 540 | "accuracy += accu\n", 541 | "\n", 542 | "sensitivity += sen1\n", 543 | "specificity += spec1\n", 544 | "accuracy += accu1\n", 545 | "# print(\"\\t Sensitivity : \", sen)\n", 546 | "# print(\"\\t Specificity : \", spec)\n", 547 | "# print(\"\\t Accuracy : \", accu)\n", 548 | "# Average evaluation metrics\n", 549 | "avgSensitivity = sensitivity / sportsActionNumber\n", 550 | "avgSpecificity = specificity / sportsActionNumber\n", 551 | "avgAccuracy = accuracy / sportsActionNumber\n", 552 | "\n", 553 | "\n", 554 | "print(\" *** Overall Evaluation ***\")\n", 555 | "print(\" Average Sensitivity: \", avgSensitivity)\n", 556 | "print(\" Average Specificity: \", avgSpecificity)\n", 557 | "print(\" Average Accuracy : \", avgAccuracy)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": null, 570 | "metadata": {}, 571 | "outputs": [], 572 | "source": [] 573 | } 574 | ], 575 | "metadata": { 576 | "kernelspec": { 577 | "display_name": "Python 3", 578 | "language": "python", 579 | "name": "python3" 580 | }, 581 | "language_info": { 582 | "codemirror_mode": { 583 | "name": "ipython", 584 | "version": 3 585 | }, 586 | "file_extension": ".py", 587 | "mimetype": "text/x-python", 588 | "name": "python", 589 | "nbconvert_exporter": "python", 590 | "pygments_lexer": "ipython3", 591 | "version": "3.7.3" 592 | } 593 | }, 594 | "nbformat": 4, 595 | "nbformat_minor": 2 596 | } 597 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | from itertools import chain 3 | 4 | from keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback 5 | from keras.optimizers import RMSprop, Adam,SGD 6 | 7 | import pandas as pd 8 | from keras.applications import Xception, ResNet50, InceptionV3, MobileNet, VGG19, DenseNet121, InceptionResNetV2, VGG16 9 | from keras.layers import LSTM, ConvLSTM2D 10 | import BuildModel_basic 11 | import DatasetBuilder 12 | from model.mobilenet_v3_large import MobileNetV3_Large 13 | from numpy.random import seed, shuffle 14 | from keras_efficientnets import EfficientNetB0 15 | from keras_efficientnets import EfficientNetB1 16 | from tensorflow import set_random_seed 17 | from collections import defaultdict 18 | from shufflenetv2 import ShuffleNetV2 19 | from keras.callbacks import ModelCheckpoint 20 | 21 | class TestCallback(Callback): 22 | def __init__(self, test_data): 23 | self.test_data = test_data 24 | self.test_loss = [] 25 | self.test_acc = [] 26 | 27 | def on_epoch_end(self, epoch, logs={}): 28 | x, y = self.test_data 29 | loss, acc = self.model.evaluate(x, y, batch_size=2, verbose=0) 30 | self.test_loss.append(loss) 31 | self.test_acc.append(acc) 32 | print('\nTesting loss: {}, acc: {}\n'.format(loss, acc)) 33 | 34 | 35 | def train_eval_network(dataset_name, train_gen, validate_gen, test_x, test_y, seq_len, epochs, batch_size, 36 | batch_epoch_ratio, initial_weights, size, cnn_arch, learning_rate, 37 | optimizer, cnn_train_type, pre_weights, lstm_conf, len_train, len_valid, dropout, classes, 38 | patience_es=15, patience_lr=5): 39 | """the function build, compine fit and evaluate a certain architechtures on a dataset""" 40 | set_random_seed(2) 41 | seed(1) 42 | result = dict(dataset=dataset_name, cnn_train=cnn_train_type, 43 | cnn=cnn_arch.__name__, lstm=lstm_conf[0].__name__, epochs=epochs, 44 | learning_rate=learning_rate, batch_size=batch_size, dropout=dropout, 45 | optimizer=optimizer[0].__name__, initial_weights=initial_weights, seq_len=seq_len) 46 | print("run experimnt " + str(result)) 47 | model = BuildModel_basic.build(size=size, seq_len=seq_len, learning_rate=learning_rate, 48 | optimizer_class=optimizer, initial_weights=initial_weights, 49 | cnn_class=cnn_arch, pre_weights=pre_weights, lstm_conf=lstm_conf, 50 | cnn_train_type=cnn_train_type, dropout=dropout, classes=classes) 51 | 52 | # the network is trained on data generatores and apply the callacks when the validation loss is not improving: 53 | # 1. early stop to training after n iteration 54 | # 2. reducing the learning rate after k iteration where k< n 55 | test_history = TestCallback((test_x, test_y)) 56 | model_names = ('checkpoints/' +cnn_arch.__name__+ 57 | '_weights.{epoch:02d}-{val_loss:.4f}.hdf5') 58 | model_checkpoint = ModelCheckpoint(model_names, 59 | monitor='val_loss', 60 | verbose=1, 61 | save_best_only=True, 62 | save_weights_only=False) 63 | history = model.fit_generator( 64 | steps_per_epoch=int(float(len_train) / float(batch_size * batch_epoch_ratio)), 65 | generator=train_gen, 66 | epochs=epochs, 67 | validation_data=validate_gen, 68 | validation_steps=int(float(len_valid) / float(batch_size)), 69 | callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.001, patience=patience_es, ), 70 | ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=patience_lr, min_lr=1e-8, verbose=1), 71 | test_history, 72 | model_checkpoint 73 | ] 74 | ) 75 | history_to_save = history.history 76 | history_to_save['test accuracy'] = test_history.test_acc 77 | history_to_save['test loss'] = test_history.test_loss 78 | 79 | model_name = "" 80 | for k, v in result.items(): 81 | model_name = model_name + "_" + str(k) + "-" + str(v).replace(".", "d") 82 | model_path = os.path.join(res_path, model_name) 83 | pd.DataFrame(history_to_save).to_csv(model_path + "_train_results.csv") 84 | result['validation loss'] = min(history.history['val_loss']) 85 | result['validation accuracy'] = max(history.history['val_acc']) 86 | result['last validation loss'] = history.history['val_loss'][-1] 87 | result['last validation accuracy'] = history.history['val_acc'][-1] 88 | 89 | result['train accuracy'] = max(history.history['acc']) 90 | result['train loss'] = min(history.history['loss']) 91 | result['last train accuracy'] = history.history['acc'][-1] 92 | result['last train loss'] = history.history['loss'][-1] 93 | 94 | result['test accuracy'] = max(test_history.test_acc) 95 | result['test loss'] = min(test_history.test_loss) 96 | result['last test accuracy'] = test_history.test_acc[-1] 97 | result['last test loss'] = test_history.test_loss[-1] 98 | 99 | result['final lr'] = history.history['lr'][-1] 100 | result['total epochs'] = len(history.history['lr']) 101 | return result 102 | 103 | def evaluate(dataset_name, train_gen, validate_gen, test_x, test_y, seq_len, epochs, batch_size, 104 | batch_epoch_ratio, initial_weights, size, cnn_arch, learning_rate, 105 | optimizer, cnn_train_type, pre_weights, lstm_conf, len_train, len_valid, dropout, classes, 106 | patience_es=15, patience_lr=5): 107 | """the function build, compine fit and evaluate a certain architechtures on a dataset""" 108 | result = dict(dataset=dataset_name, cnn_train=cnn_train_type, 109 | cnn=cnn_arch.__name__, lstm=lstm_conf[0].__name__, epochs=epochs, 110 | learning_rate=learning_rate, batch_size=batch_size, dropout=dropout, 111 | optimizer=optimizer[0].__name__, initial_weights=initial_weights, seq_len=seq_len) 112 | # print("run experimnt " + str(result)) 113 | model = BuildModel_basic.build(size=size, seq_len=seq_len, learning_rate=learning_rate, 114 | optimizer_class=optimizer, initial_weights=initial_weights, 115 | cnn_class=cnn_arch, pre_weights=pre_weights, lstm_conf=lstm_conf, 116 | cnn_train_type=cnn_train_type, dropout=dropout, classes=classes) 117 | res=model.evaluate_generator(validate_gen, steps=int(float(len_valid) / float(batch_size)), max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) 118 | print(model.metrics_names) 119 | print(res) 120 | 121 | 122 | def get_generators(dataset_name, dataset_videos, datasets_frames, fix_len, figure_size, force, classes=1, use_aug=False, 123 | use_crop=True, crop_dark=None): 124 | train_path, valid_path, test_path, \ 125 | train_y, valid_y, test_y, \ 126 | avg_length = DatasetBuilder.createDataset(dataset_videos, datasets_frames, fix_len, force=force) 127 | 128 | if fix_len is not None: 129 | avg_length = fix_len 130 | crop_x_y = None 131 | if (crop_dark): 132 | crop_x_y = crop_dark[dataset_name] 133 | 134 | len_train, len_valid = len(train_path), len(valid_path) 135 | train_gen = DatasetBuilder.data_generator(train_path, train_y, batch_size, figure_size, avg_length, use_aug=use_aug, 136 | use_crop=use_crop, crop_x_y=crop_x_y, classes=classes) 137 | validate_gen = DatasetBuilder.data_generator(valid_path, valid_y, batch_size, figure_size, avg_length, 138 | use_aug=False, use_crop=False, crop_x_y=crop_x_y, classes=classes) 139 | test_x, test_y = DatasetBuilder.get_sequences(test_path, test_y, figure_size, avg_length, crop_x_y=crop_x_y, 140 | classes=classes) 141 | 142 | return train_gen, validate_gen, test_x, test_y, avg_length, len_train, len_valid 143 | 144 | 145 | def hyper_tune_network(dataset_name, epochs, batch_size, batch_epoch_ratio, figure_size, initial_weights, lstm, 146 | cnns_arch, 147 | learning_rates, optimizers, cnn_train_types, dropouts, classes, use_augs, fix_lens): 148 | """ the function train several networks parameters in a loop and select the best architechture to the next evaluation""" 149 | results = [] 150 | 151 | best_accuracy = 0.0 152 | best_loss = 10.0 153 | # static params for tunning 154 | params_to_train = dict(dataset_name=dataset_name, epochs=epochs, batch_size=batch_size, 155 | batch_epoch_ratio=batch_epoch_ratio, initial_weights=initial_weights, size=figure_size, 156 | pre_weights=weights, lstm_conf=lstm, classes=classes, patience_es=5, patience_lr=3) 157 | 158 | # the tunning is not evaluation all possible combinations 159 | # given the importance order of the hyperparams, in each iteraction we choose the best performing parmaters 160 | exp_params_order = ['cnn_arch', 'learning_rate', 'seq_len', 'use_aug', 'dropout', 161 | 'cnn_train_type'] # 'cnn_arch','learning_rate','fix_len','use_aug','dropout', 'optimizer','optimizer', 162 | # 163 | print(cnns_arch.values()) 164 | best_params_train = dict(optimizer=optimizers[0], learning_rate=learning_rates[0], 165 | cnn_train_type=cnn_train_types[0], cnn_arch=list(cnns_arch.values())[0], 166 | dropout=dropouts[0]) 167 | exp_params_train = dict(optimizer=optimizers[1:], learning_rate=learning_rates[1:], 168 | cnn_train_type=cnn_train_types[1:], dropout=dropouts[1:], 169 | cnn_arch=list(cnns_arch.values())) 170 | 171 | best_params_data = dict(use_aug=use_augs[0], seq_len=fix_lens[0]) 172 | exp_params_data = dict(use_aug=use_augs[1:], seq_len=fix_lens[1:]) 173 | 174 | for exp_param in exp_params_order: 175 | temp_param = dict(best_params_train) 176 | temp_param_data = dict(best_params_data) 177 | if exp_param in exp_params_data: 178 | exp_params_ = exp_params_data 179 | else: 180 | exp_params_ = exp_params_train 181 | for param in exp_params_[exp_param]: 182 | if exp_param in best_params_data: 183 | temp_param_data[exp_param] = param 184 | else: 185 | temp_param[exp_param] = param 186 | 187 | print(temp_param_data) 188 | print(temp_param) 189 | params_to_train['train_gen'], params_to_train['validate_gen'], params_to_train['test_x'], \ 190 | params_to_train['test_y'], params_to_train['seq_len'], params_to_train['len_train'], \ 191 | params_to_train['len_valid'] = get_generators(dataset_name, datasets_videos[dataset_name], datasets_frames, 192 | temp_param_data['seq_len'], 193 | figure_size, use_aug=temp_param_data['use_aug'], force=force, 194 | classes=classes) 195 | 196 | params_to_train.update(temp_param) 197 | result = train_eval_network(**params_to_train) 198 | result.update(temp_param_data) 199 | print(result) 200 | results.append(result) 201 | if result['test accuracy'] >= best_accuracy: # and result['test loss'] <= best_loss : 202 | best_accuracy = result['test accuracy'] 203 | best_loss = result['test loss'] 204 | if exp_param in best_params_data: 205 | best_params_data[exp_param] = param 206 | else: 207 | best_params_train[exp_param] = param 208 | print("best accuracy update " + str(best_accuracy)) 209 | best_params_train.update(best_params_data) 210 | return best_params_train, results 211 | 212 | root_dir='/home/eric/data/violence_recognition' 213 | # static parameter for the netwotk 214 | datasets_videos = dict( 215 | hocky=dict(hocky=os.path.join(root_dir,"HockeyFights")), 216 | violentflow=dict(violentflow=os.path.join(root_dir,"violentflow")), 217 | # movies=dict(movies="data/raw_videos/movies") 218 | ) 219 | 220 | crop_dark = dict( 221 | hocky=(11, 38), 222 | violentflow=None, 223 | movies=None 224 | ) 225 | dataset_name='violentflow' 226 | 227 | # crop_dark['hocky']=(11, 38) 228 | 229 | datasets_frames = "data/raw_frames" 230 | res_path = "results" 231 | figure_size = 244 232 | # split_ratio = 0.1 233 | batch_size = 2 234 | # batch_epoch_ratio = 0.5 #double the size because we use augmentation 235 | fix_len = 20 236 | initial_weights = 'glorot_uniform' 237 | weights = 'imagenet' 238 | force = True 239 | lstm = (ConvLSTM2D, dict(filters=256, kernel_size=(3, 3), padding='same', return_sequences=False)) 240 | classes = 1 241 | 242 | # hyper parameters for tunning the network 243 | cnns_arch = dict(ResNet50=ResNet50, InceptionV3=InceptionV3, InceptionResNetV2=InceptionResNetV2) # 244 | learning_rates = [1e-4, 1e-3] 245 | use_augs = [True, False, ] 246 | fix_lens = [20, 10] 247 | optimizers = [(Adam,{})] 248 | dropouts = [0.5] 249 | cnn_train_types = ['retrain', 'static','evaluate'] 250 | 251 | apply_hyper = False 252 | 253 | if apply_hyper: 254 | # the hyper tunning symulate the architechture behavior 255 | # we set the batch_epoch_ratio - reduced by X to have the hypertunning faster with epoches shorter 256 | hyper, results = hyper_tune_network(dataset_name=dataset_name, epochs=30, 257 | batch_size=batch_size, batch_epoch_ratio=1, figure_size=figure_size, 258 | initial_weights=initial_weights, lstm=lstm, 259 | cnns_arch=cnns_arch, learning_rates=learning_rates, 260 | optimizers=optimizers, cnn_train_types=cnn_train_types, dropouts=dropouts, 261 | classes=classes, use_augs=use_augs, fix_lens=fix_lens) 262 | 263 | pd.DataFrame(results).to_csv("results_hyper.csv") 264 | cnn_arch, learning_rate, optimizer, cnn_train_type, dropout, use_aug, fix_len = hyper['cnn_arch'], \ 265 | hyper['learning_rate'], \ 266 | hyper['optimizer'], \ 267 | hyper['cnn_train_type'], \ 268 | hyper['dropout'], hyper['use_aug'], \ 269 | hyper['seq_len'], 270 | else: 271 | results = [] 272 | # ResNet50 MobileNetV3_Large EfficientNetB0 ShuffleNetV2 273 | # RMSprop Adam 274 | 275 | cnn_arch, learning_rate, optimizer, cnn_train_type, dropout, use_aug, fix_len = EfficientNetB1, 0.0001, ( 276 | Adam, {}), cnn_train_types[0], 0.0, True, 20 277 | 278 | if(cnn_train_type=='retrain'): 279 | # apply best architechture on all datasets with more epochs 280 | for dataset_name, dataset_videos in datasets_videos.items(): 281 | train_gen, validate_gen, test_x, test_y, seq_len, len_train, len_valid = get_generators(dataset_name, 282 | dataset_videos, 283 | datasets_frames, fix_len, 284 | figure_size, 285 | force=force, 286 | classes=classes, 287 | use_aug=use_aug, 288 | use_crop=True, 289 | crop_dark=crop_dark) 290 | result = train_eval_network(epochs=35, dataset_name=dataset_name, train_gen=train_gen, validate_gen=validate_gen, 291 | test_x=test_x, test_y=test_y, seq_len=seq_len, batch_size=batch_size, 292 | batch_epoch_ratio=0.5, initial_weights=initial_weights, size=figure_size, 293 | cnn_arch=cnn_arch, learning_rate=learning_rate, 294 | optimizer=optimizer, cnn_train_type=cnn_train_type, 295 | pre_weights=weights, lstm_conf=lstm, len_train=len_train, len_valid=len_valid, 296 | dropout=dropout, classes=classes) 297 | results.append(result) 298 | pd.DataFrame(results).to_csv("results_datasets.csv") 299 | print(result) 300 | pd.DataFrame(results).to_csv("results.csv") 301 | elif(cnn_train_type=='evaluate'): 302 | for dataset_name, dataset_videos in datasets_videos.items(): 303 | 304 | train_gen, validate_gen, test_x, test_y, seq_len, len_train, len_valid = get_generators(dataset_name, 305 | dataset_videos, 306 | datasets_frames, fix_len, 307 | figure_size, 308 | force=force, 309 | classes=classes, 310 | use_aug=use_aug, 311 | use_crop=True, 312 | crop_dark=crop_dark) 313 | evaluate(epochs=35, dataset_name=dataset_name, train_gen=train_gen, validate_gen=validate_gen, 314 | test_x=test_x, test_y=test_y, seq_len=seq_len, batch_size=batch_size, 315 | batch_epoch_ratio=0.5, initial_weights=initial_weights, size=figure_size, 316 | cnn_arch=cnn_arch, learning_rate=learning_rate, 317 | optimizer=optimizer, cnn_train_type=cnn_train_type, 318 | pre_weights=weights, lstm_conf=lstm, len_train=len_train, len_valid=len_valid, 319 | dropout=dropout, classes=classes) 320 | 321 | 322 | -------------------------------------------------------------------------------- /SportsActionRecognition_HOG_SVM.py: -------------------------------------------------------------------------------- 1 | __author__ = 'somnath' 2 | 3 | 4 | import numpy as np 5 | import cv2 6 | import sys 7 | import os 8 | import glob 9 | from sklearn import svm 10 | from scipy.stats import mode 11 | 12 | ''' 13 | Program: Sports Action Recognition 14 | 15 | Description: 16 | This program perform the sports action recognition task. First it processes the input videos from UCF sports action 17 | data set.The data set contains 13 different sports action which individually contains multiple videos. A video 18 | directory contain a video file and corresponding frames. I iterate over the different sports actions and read video 19 | frames from each video directory to extract features. I take equal number of videos from each categories. 20 | Further, to optimize the process, I sorted features with highest gradient for HOG. I have used SVM classifier and 21 | cross validation for classification and evaluation respectively. 22 | 23 | I have used provided image frame for each video as I found issue to process *.avi file in Mac. 24 | 25 | 26 | 27 | Feature Extraction: I have used Histogram of Oriented Gradient ( HOG ) method to extract features vector. 28 | 29 | HOG: It is constructed by dividing the image into cells and for each cell computing the 30 | distribution of intensity gradients or edge directions. The concatenating each of these gradient 31 | orientation histograms yields the HOG. 32 | 33 | hogDescriptor = cv2.HOGDescriptor() 34 | hist = hogDescriptor.compute(gray) 35 | 36 | I use above two functions to create HOG Descriptor and histogram. 37 | Further, I sort the histogram values and take max 15000 values from each frame for evaluation. 38 | 39 | Classifier: I have used Support Vector Machine (SVM) classifier. The classifier parameters are set based on best result 40 | achieved from different runs. Following are the parameters that has been decided based on the multiple executions. 41 | 42 | Parameters: 43 | gamma=0.01 Lowering the gamma value gives better result, but takes more time. Optimum value has been chosen. 44 | C=13 45 | kernel_type = rbf ( default ) 46 | degree = 3 ( default ) 47 | 48 | Evaluation: It is based on K-Fold cross validation mechanism. 49 | First, I shuffle the feature list which contains features as well as label at the very first element of 50 | the feature vector to obtain better result. The complete set of shuffled features are divided equally 51 | into k=13 sub parts. k-1 subset is used for training and one subset is used for validation. I iterate the 52 | process for k=13 times with different subset combinations for training and validation. 53 | 54 | Evaluation Metrics: 55 | At each iteration, evaluation metrics sensitivity, specificity and accuracy are calculated 56 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rates. 57 | 58 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 59 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 60 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 61 | 62 | At the end of all iterations of cross validation, I average them all to get average rate. 63 | 64 | 65 | Testing: I also have tested my model to check if that works with unseen data or videos. 66 | For that, I have taken one video from "Diving-Side/014" which has been correctly predicted by my model. 67 | Result is given below. 68 | ''' 69 | 70 | sportsActionPath = "/Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action" 71 | #sportsActionPath = "/Users/somnath/MY_PROG/ComputerVision/pa3/Training" 72 | 73 | 74 | # Sports Action Tag 75 | sportsActionTag = { 76 | 'Diving-Side': 0, 77 | 'Golf-Swing-Back':1, 78 | 'Golf-Swing-Front':2, 79 | 'Golf-Swing-Side':3, 80 | 'Kicking-Front':4, 81 | 'Kicking-Side':5, 82 | 'Lifting':6, 83 | 'Run-Side':7, 84 | 'SkateBoarding-Front':8, 85 | 'Swing-SideAngle':9, 86 | 'Walk-Front':10, 87 | 'Swing-Bench':11, 88 | 'Riding-Horse':12 89 | } 90 | 91 | 92 | 93 | # Distinct Sports Action Number 94 | sportsActionNumber = len(sportsActionTag) 95 | 96 | featuresLimit = 15000 97 | 98 | 99 | ''' 100 | Function Name: featureExtraction() 101 | Input Args : , , , 102 | Returns : 103 | Description : This function extract features from each frames of a video and consolidated them. 104 | While it extract features, it add label to feature at the beginning of feature vector based on Sports 105 | Action Type. It helps to keep tack of feature and corresponding label while shuffle the features during 106 | cross validation. 107 | 108 | - I have used histogram of oriented gradient (HOG) method to extract the features. 109 | Following methods from cv2 have been used. 110 | hogDescriptor = cv2.HOGDescriptor() 111 | - It takes default parameter values as Window Size= 64 x 128, block size= 16x16, 112 | block stride= 8x8, cell size= 8x8, bins= 9 113 | hist = hogDescriptor.compute(gray) 114 | - Returns the list of histogram 115 | 116 | - Sorted the Histogram and taken top 15000 for evaluation. 117 | - I take equal number of image frame from all the videos. 118 | ''' 119 | def featureExtraction( videoPath, actionName, type): 120 | 121 | 122 | # Set frame path, if jpeg directory doesn't exist , take images from video dir 123 | framePath = videoPath 124 | if os.path.exists( framePath + "/jpeg") : 125 | framePath += "/jpeg/" 126 | 127 | # Extract feature 128 | imageFrames = getImageList(framePath) 129 | #print "DEBUG: Image Frames - ", imageFrames 130 | 131 | frameCount = 0 132 | frameIndex = 0 133 | 134 | # Feature List for a video 135 | videoFeatures = [] 136 | 137 | for iFrame in imageFrames: 138 | 139 | frameIndex += 1 140 | 141 | # Read Frame 142 | frame = cv2.imread(iFrame) 143 | gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) 144 | 145 | # HOG Descriptor , default value it takes window size= 64x128, block size= 16x16, block stride= 8x8, cell size= 8x8, bins= 9 146 | hogDescriptor = cv2.HOGDescriptor() 147 | 148 | # Returns histogram 149 | hist = hogDescriptor.compute(gray) 150 | 151 | #sortedHogDescriptor = hogDescriptor 152 | sortedHogHist = np.sort(hist, axis=None) 153 | 154 | keyFeatures = sortedHogHist[- featuresLimit : ] 155 | 156 | if type == "Trng": 157 | keyFeatures = np.insert(keyFeatures, 0, sportsActionTag[actionName]) 158 | 159 | videoFeatures.append(keyFeatures) 160 | 161 | # Lowest number of frame available in a video 162 | if frameCount >= 23: 163 | break 164 | 165 | frameCount += 1 166 | 167 | 168 | return videoFeatures 169 | 170 | 171 | ''' 172 | Function Name: getImageList() 173 | Input Args : 174 | Return : 175 | Description : This function returns list of images. 176 | ''' 177 | def getImageList(imageDirectory): 178 | 179 | # Find different type of images 180 | rImages = glob.glob(imageDirectory + "/*.jpg") 181 | rImages += glob.glob(imageDirectory + "/*.jpeg") 182 | rImages += glob.glob(imageDirectory + "/*.png") 183 | 184 | return rImages 185 | 186 | 187 | ''' 188 | Function Name: getListOfDir() 189 | Input Args : < Path > 190 | Return : 191 | Description : This function returns all the directories under the specified paths 192 | ''' 193 | def getListOfDir(path): 194 | # Read each sport action directory 195 | dirs = os.listdir(path) 196 | 197 | sportsActionsCount = 0 198 | filtered_dir = [] 199 | # Remove . .. and hidden directory 200 | for dir in dirs: 201 | if not dir.startswith("."): 202 | filtered_dir.append(dir) 203 | 204 | return filtered_dir 205 | 206 | ''' 207 | Function Name: getSportsActionName() 208 | Input Args : < Sports Action Index> 209 | Return : 210 | Description : This function returns the name of Sports Action based on index value 211 | 212 | ''' 213 | def getSportsActionName(saIndex): 214 | 215 | keys = sportsActionTag.keys() 216 | 217 | for key in keys: 218 | if saIndex == sportsActionTag[key]: 219 | return key 220 | 221 | ''' 222 | Function Name: evaluation() 223 | Input Args : < 1D Array: Truth>, <1D Array: Predicted>, < Sports Action Index> 224 | Return : ,, 225 | Description : This function calculate evaluation metrics sensitivity, specificity and accuracy 226 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rate. 227 | 228 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 229 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 230 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 231 | 232 | ''' 233 | 234 | def evaluation( truth, predicted, categoryIndex ): 235 | 236 | # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively 237 | TP = 1 238 | FP = 1 239 | FN = 1 240 | TN = 1 241 | 242 | # Categories are Sports Action 1=>0, Sports Action 2=> 1, Sports Action 3=>2 etc.. 243 | for fIndex in range(len(truth)): 244 | 245 | # Positive prediction for each feature 246 | if ( int(predicted[fIndex]) == categoryIndex): 247 | # TP=> when P[i] = T[i] = Ci 248 | if (int(truth[fIndex]) == int (predicted[fIndex])): 249 | TP += 1 250 | else: 251 | FP += 1 252 | else: # Negative Prediction 253 | if ( int ( truth[fIndex]) == categoryIndex ): 254 | FN += 1 255 | else: 256 | TN += 1 257 | 258 | 259 | # Calculate Sensitivity - True Positive Rate - Recall 260 | sensitivity = TP / float ( TP + FN ) 261 | 262 | # Specificity - True Negative Rate 263 | specificity = TN / float ( TN + FP ) 264 | 265 | #Calculate accuracy 266 | accuracy = ( TP + TN ) / float ( TP + FP + FN + TN ) 267 | 268 | 269 | return sensitivity, specificity, accuracy 270 | 271 | ''' 272 | Function Name: crossValidation() 273 | Input Args : < Array: Feature and Label List - Fits element of vector indicates action label and rest are for features> 274 | Retrun : None 275 | Description : It perform K-Fold cross validation. 276 | First, I shuffle the feature list which contains features as well as label at the very first element of 277 | the feature vector to obtain better result. The complete set of shuffled features are divided equally 278 | into k=13 sub parts. k-1 subset is used for training and one subset is used for validation. I iterate the 279 | process for k=13 times with different subset combinations for training and validation. 280 | 281 | Evaluation Metrics: 282 | At each iteration, evaluation metrics sensitivity, specificity and accuracy are calculated 283 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rates. 284 | 285 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 286 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 287 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 288 | 289 | At the end of all iterations of cross validation, I average them all to get average rate. 290 | ''' 291 | def crossValidation( featureAndLabelList): 292 | 293 | # Randomize the sample 294 | np.random.shuffle(featureAndLabelList) 295 | 296 | 297 | # Evaluation Metrics 298 | sensitivity = 0.0 299 | specificity = 0.0 300 | accuracy = 0.0 301 | 302 | 303 | # split feature set in equal subsets same as number of sports actions for cross validation 304 | subsetLength = len(featureAndLabelList) / sportsActionNumber 305 | for rIndex in range(sportsActionNumber): 306 | 307 | print "INFO: Cross Validation Iteration - ", rIndex 308 | trainigSet = [] 309 | valdationSet = [] 310 | feature = [] 311 | label = [] 312 | 313 | 314 | if ( rIndex == 0 ): 315 | trainigSet = featureAndLabelList[1*subsetLength:] 316 | valdationSet = featureAndLabelList[0: subsetLength] 317 | elif ( rIndex == (sportsActionNumber -1) ): 318 | trainigSet = featureAndLabelList[:(sportsActionNumber -1)*subsetLength] 319 | valdationSet = featureAndLabelList[(sportsActionNumber -1)*subsetLength : ] 320 | else: 321 | trainigSet = np.concatenate ((featureAndLabelList[:rIndex * subsetLength] , featureAndLabelList[(rIndex + 1) * subsetLength: ]), axis=0 ) 322 | valdationSet = featureAndLabelList[rIndex * subsetLength : (rIndex + 1 ) * subsetLength] 323 | 324 | # Get all features in a array 325 | for featureAndLabel in trainigSet: 326 | label.append(int(featureAndLabel[0])) 327 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 328 | 329 | 330 | # Train model 331 | print "INFO: Training ... " 332 | clf = svm.SVC(gamma=0.01, C=13) 333 | clf.fit(feature,label) 334 | 335 | # Prepare validation feature and label to be predicted 336 | print "INFO: Prediction for ", getSportsActionName(rIndex) 337 | vFeatureList = [] 338 | vLabelList = [] # Ground Truth 339 | for featureAndLabel in valdationSet: 340 | vFeatureList.append(featureAndLabel[1:].tolist()) 341 | vLabelList.append(featureAndLabel[0]) 342 | 343 | # Predict the class label for Validation Feature List 344 | predictedLabel = clf.predict(vFeatureList) 345 | 346 | # predict validation set and calculate accuracy 347 | print "INFO: Evaluating ... " 348 | #print "\t Truth - ", vLabelList 349 | #print "\t Predicted - ", str(predictedLabel.tolist()) 350 | 351 | # Evaluation < Truth>, , 352 | (sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , rIndex) 353 | 354 | sensitivity += sen 355 | specificity += spec 356 | accuracy += accu 357 | 358 | print "\t Sensitivity : ", sen 359 | print "\t Specificity : ", spec 360 | print "\t Accuracy : ", accu 361 | 362 | 363 | # Average evaluation metrics 364 | avgSensitivity = sensitivity / sportsActionNumber 365 | avgSpecificity = specificity / sportsActionNumber 366 | avgAccuracy = accuracy / sportsActionNumber 367 | 368 | 369 | print " *** Overall Evaluation ***" 370 | print " Average Sensitivity: ", avgSensitivity 371 | print " Average Specificity: ", avgSpecificity 372 | print " Average Accuracy : ", avgAccuracy 373 | 374 | 375 | 376 | def main(): 377 | print "INFO: Action Recognition" 378 | 379 | sportsActionList = getListOfDir( sportsActionPath ) 380 | print "INFO: Sports Action - ",sportsActionList 381 | 382 | sportsActionFeatures = [] 383 | 384 | firstActionFlag = 0 385 | for sportsActionName in sportsActionList: 386 | sportsActionDir = sportsActionPath + "/" + sportsActionName 387 | # Get list of videos from each sports action 388 | videoList = getListOfDir(sportsActionDir) 389 | 390 | print "INFO: Video List:", videoList 391 | 392 | videoCount = 1 393 | videoFeatures = [] 394 | # For all video in each action category 395 | for video in videoList: 396 | 397 | # For good result decided to use same number of videos from Action Sports. And same number of frame from each frame 398 | if videoCount > 5: 399 | break 400 | 401 | # complete path of video containing jpeg images 402 | videoPath = sportsActionDir + "/" + video 403 | print "\tVideo Path:", videoPath 404 | 405 | # Extract Features 406 | videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng') 407 | 408 | # Put together all the videos 409 | if firstActionFlag == 0: 410 | sportsActionFeatures = videoFeatures 411 | firstActionFlag = 1 412 | else: 413 | sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0) 414 | 415 | videoCount += 1 416 | 417 | ## K-Fold Cross Validation method 418 | crossValidation(sportsActionFeatures) 419 | 420 | ## **** Testing with unseen data **** ## 421 | 422 | np.random.shuffle(sportsActionFeatures) 423 | label = [] 424 | feature = [] 425 | # Get all features in a array 426 | for featureAndLabel in sportsActionFeatures: 427 | label.append(int(featureAndLabel[0])) 428 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 429 | 430 | 431 | 432 | # Train model 433 | print "INFO: Training ... " 434 | clf = svm.SVC(gamma=0.01, C=13) 435 | clf.fit(feature,label) 436 | 437 | # Test Path 438 | tPath = "/Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action/Diving-Side/014" 439 | vFeatures = featureExtraction(tPath , sportsActionName, 'Test') 440 | predictedLabels = clf.predict(vFeatures) 441 | 442 | #print "Predicted Labels:", predictedLabels 443 | predictedLabelMode = (mode(predictedLabels))[0] 444 | print "\t Predicted Sports Action:{0} - {1}".format(predictedLabelMode,getSportsActionName(predictedLabelMode) ) 445 | 446 | 447 | if __name__ == "__main__": 448 | main() 449 | 450 | 451 | 452 | 453 | ''' 454 | RESULT: 455 | INFO: Cross Validation Iteration - 0 456 | INFO: Training ... 457 | INFO: Prediction for Diving-Side 458 | INFO: Evaluating ... 459 | Sensitivity : 0.692307692308 460 | Specificity : 0.963636363636 461 | Accuracy : 0.934959349593 462 | INFO: Cross Validation Iteration - 1 463 | INFO: Training ... 464 | INFO: Prediction for Golf-Swing-Back 465 | INFO: Evaluating ... 466 | Sensitivity : 0.272727272727 467 | Specificity : 0.910714285714 468 | Accuracy : 0.853658536585 469 | INFO: Cross Validation Iteration - 2 470 | INFO: Training ... 471 | INFO: Prediction for Golf-Swing-Front 472 | INFO: Evaluating ... 473 | Sensitivity : 0.5 474 | Specificity : 0.965811965812 475 | Accuracy : 0.943089430894 476 | INFO: Cross Validation Iteration - 3 477 | INFO: Training ... 478 | INFO: Prediction for Golf-Swing-Side 479 | INFO: Evaluating ... 480 | Sensitivity : 0.9 481 | Specificity : 0.946902654867 482 | Accuracy : 0.943089430894 483 | INFO: Cross Validation Iteration - 4 484 | INFO: Training ... 485 | INFO: Prediction for Kicking-Front 486 | INFO: Evaluating ... 487 | Sensitivity : 0.2 488 | Specificity : 0.982300884956 489 | Accuracy : 0.918699186992 490 | INFO: Cross Validation Iteration - 5 491 | INFO: Training ... 492 | INFO: Prediction for Kicking-Side 493 | INFO: Evaluating ... 494 | Sensitivity : 0.1 495 | Specificity : 0.982300884956 496 | Accuracy : 0.910569105691 497 | INFO: Cross Validation Iteration - 6 498 | INFO: Training ... 499 | INFO: Prediction for Lifting 500 | INFO: Evaluating ... 501 | Sensitivity : 0.888888888889 502 | Specificity : 0.973684210526 503 | Accuracy : 0.967479674797 504 | INFO: Cross Validation Iteration - 7 505 | INFO: Training ... 506 | INFO: Prediction for Run-Side 507 | INFO: Evaluating ... 508 | Sensitivity : 0.583333333333 509 | Specificity : 0.90990990991 510 | Accuracy : 0.878048780488 511 | INFO: Cross Validation Iteration - 8 512 | INFO: Training ... 513 | INFO: Prediction for SkateBoarding-Front 514 | INFO: Evaluating ... 515 | Sensitivity : 0.3 516 | Specificity : 0.955752212389 517 | Accuracy : 0.90243902439 518 | INFO: Cross Validation Iteration - 9 519 | INFO: Training ... 520 | INFO: Prediction for Swing-SideAngle 521 | INFO: Evaluating ... 522 | Sensitivity : 0.46511627907 523 | Specificity : 0.934090909091 524 | Accuracy : 0.892339544513 525 | INFO: Cross Validation Iteration - 10 526 | INFO: Training ... 527 | INFO: Prediction for Walk-Front 528 | INFO: Evaluating ... 529 | Sensitivity : 0.363636363636 530 | Specificity : 0.955357142857 531 | Accuracy : 0.90243902439 532 | INFO: Cross Validation Iteration - 11 533 | INFO: Training ... 534 | INFO: Prediction for Swing-Bench 535 | INFO: Evaluating ... 536 | Sensitivity : 0.8 537 | Specificity : 0.940677966102 538 | Accuracy : 0.934959349593 539 | INFO: Cross Validation Iteration - 12 540 | INFO: Training ... 541 | INFO: Prediction for Riding-Horse 542 | INFO: Evaluating ... 543 | Sensitivity : 0.9 544 | Specificity : 0.902654867257 545 | Accuracy : 0.90243902439 546 | *** Overall Evaluation *** 547 | Average Sensitivity: 0.535846909997 548 | Average Specificity: 0.947984173698 549 | Average Accuracy : 0.914169958709 550 | 551 | 552 | ### Testing with unseen data or video which has not been used for training 553 | Test Video: /Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action/Diving-Side/014 554 | 555 | INFO: Training ... 556 | Predicted Sports Action:[0] - Diving-Side 557 | 558 | ''' 559 | -------------------------------------------------------------------------------- /SportsActionRecognition_HOG_RF.py: -------------------------------------------------------------------------------- 1 | __author__ = 'somnath' 2 | 3 | 4 | import numpy as np 5 | import cv2 6 | import sys 7 | import os 8 | import glob 9 | from sklearn import svm 10 | from scipy.stats import mode 11 | from sklearn.ensemble import RandomForestClassifier 12 | from tqdm import tqdm 13 | ''' 14 | Program: Sports Action Recognition 15 | 16 | Description: 17 | This program perform the sports action recognition task. First it processes the input videos from UCF sports action 18 | data set.The data set contains 13 different sports action which individually contains multiple videos. A video 19 | directory contain a video file and corresponding frames. I iterate over the different sports actions and read video 20 | frames from each video directory to extract features. I take equal number of videos from each categories. 21 | Further, to optimize the process, I sorted features with highest gradient for HOG. I have used SVM classifier and 22 | cross validation for classification and evaluation respectively. 23 | 24 | I have used provided image frame for each video as I found issue to process *.avi file in Mac. 25 | 26 | 27 | 28 | Feature Extraction: I have used Histogram of Oriented Gradient ( HOG ) method to extract features vector. 29 | 30 | HOG: It is constructed by dividing the image into cells and for each cell computing the 31 | distribution of intensity gradients or edge directions. The concatenating each of these gradient 32 | orientation histograms yields the HOG. 33 | 34 | hogDescriptor = cv2.HOGDescriptor() 35 | hist = hogDescriptor.compute(gray) 36 | 37 | I use above two functions to create HOG Descriptor and histogram. 38 | Further, I sort the histogram values and take max 15000 values from each frame for evaluation. 39 | 40 | Classifier: Random Forest classifier has been used. The classifier parameters are set based on best result 41 | achieved from different runs. Following are the parameters that has been decided based on the multiple executions. 42 | 43 | Parameters: 44 | gamma=0.01 Lowering the gamma value gives better result, but takes more time. Optimum value has been chosen. 45 | C=13 46 | kernel_type = rbf ( default ) 47 | degree = 3 ( default ) 48 | 49 | Evaluation: It is based on Leave-One-Out ( LOO ) cross validation mechanism. 50 | First, I shuffle the feature list which contains features as well as label at the very first element of 51 | the feature vector to obtain better result. The complete set of shuffled features are divided equally 52 | into k=13 sub parts. k-1 subset is used for training and one subset is used for validation. I iterate the 53 | process for k=13 times with different subset combinations for training and validation. 54 | 55 | Evaluation Metrics: 56 | At each iteration, evaluation metrics sensitivity, specificity and accuracy are calculated 57 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rates. 58 | 59 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 60 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 61 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 62 | 63 | At the end of all iterations of cross validation, I average them all to get average rate. 64 | 65 | 66 | Testing: I also have tested my model to check if that works with unseen data or videos. 67 | For that, I have taken one video from "Diving-Side/014" which has been correctly predicted by my model. 68 | Result is given below. 69 | ''' 70 | 71 | sportsActionPath = "/Users/admin/Documents/data/ucf_sports_actions/ucf_action" 72 | #sportsActionPath = "/Users/somnath/MY_PROG/ComputerVision/pa3/Training" 73 | 74 | 75 | # Sports Action Tag 76 | sportsActionTag = { 77 | 'Diving-Side': 0, 78 | 'Golf-Swing-Back':1, 79 | 'Golf-Swing-Front':2, 80 | 'Golf-Swing-Side':3, 81 | 'Kicking-Front':4, 82 | 'Kicking-Side':5, 83 | 'Lifting':6, 84 | 'Run-Side':7, 85 | 'SkateBoarding-Front':8, 86 | 'Swing-SideAngle':9, 87 | 'Walk-Front':10, 88 | 'Swing-Bench':11, 89 | 'Riding-Horse':12 90 | } 91 | 92 | 93 | 94 | # Distinct Sports Action Number 95 | sportsActionNumber = len(sportsActionTag) 96 | 97 | featuresLimit = 15000 98 | 99 | 100 | ''' 101 | Function Name: featureExtraction() 102 | Input Args : , , , 103 | Returns : 104 | Description : This function extract features from each frames of a video and consolidated them. 105 | While it extract features, it add label to feature at the beginning of feature vector based on Sports 106 | Action Type. It helps to keep tack of feature and corresponding label while shuffle the features during 107 | cross validation. 108 | 109 | - I have used histogram of oriented gradient (HOG) method to extract the features. 110 | Following methods from cv2 have been used. 111 | hogDescriptor = cv2.HOGDescriptor() 112 | - It takes default parameter values as Window Size= 64 x 128, block size= 16x16, 113 | block stride= 8x8, cell size= 8x8, bins= 9 114 | hist = hogDescriptor.compute(gray) 115 | - Returns the list of histogram 116 | 117 | - Sorted the Histogram and taken top 15000 for evaluation. 118 | - I take equal number of image frame from all the videos. 119 | ''' 120 | def featureExtraction( videoPath, actionName, type): 121 | 122 | 123 | # Set frame path, if jpeg directory doesn't exist , take images from video dir 124 | framePath = videoPath 125 | if os.path.exists( framePath + "/jpeg") : 126 | framePath += "/jpeg/" 127 | 128 | # Extract feature 129 | imageFrames = getImageList(framePath) 130 | #print "DEBUG: Image Frames - ", imageFrames 131 | 132 | frameCount = 0 133 | frameIndex = 0 134 | 135 | # Feature List for a video 136 | videoFeatures = [] 137 | 138 | for iFrame in imageFrames: 139 | 140 | frameIndex += 1 141 | 142 | # Read Frame 143 | frame = cv2.imread(iFrame) 144 | gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) 145 | 146 | # HOG Descriptor , default value it takes window size= 64x128, block size= 16x16, block stride= 8x8, cell size= 8x8, bins= 9 147 | hogDescriptor = cv2.HOGDescriptor() 148 | 149 | # Returns histogram 150 | hist = hogDescriptor.compute(gray) 151 | 152 | #sortedHogDescriptor = hogDescriptor 153 | sortedHogHist = np.sort(hist, axis=None) 154 | 155 | keyFeatures = sortedHogHist[- featuresLimit : ] 156 | 157 | if type == "Trng": 158 | keyFeatures = np.insert(keyFeatures, 0, sportsActionTag[actionName]) 159 | 160 | videoFeatures.append(keyFeatures) 161 | 162 | # Lowest number of frame available in a video 163 | if frameCount >= 23: 164 | break 165 | 166 | frameCount += 1 167 | 168 | 169 | return videoFeatures 170 | 171 | 172 | ''' 173 | Function Name: getImageList() 174 | Input Args : 175 | Return : 176 | Description : This function returns list of images. 177 | ''' 178 | def getImageList(imageDirectory): 179 | 180 | # Find different type of images 181 | rImages = glob.glob(imageDirectory + "/*.jpg") 182 | rImages += glob.glob(imageDirectory + "/*.jpeg") 183 | rImages += glob.glob(imageDirectory + "/*.png") 184 | 185 | return rImages 186 | 187 | 188 | ''' 189 | Function Name: getListOfDir() 190 | Input Args : < Path > 191 | Return : 192 | Description : This function returns all the directories under the specified paths 193 | ''' 194 | def getListOfDir(path): 195 | # Read each sport action directory 196 | dirs = os.listdir(path) 197 | 198 | sportsActionsCount = 0 199 | filtered_dir = [] 200 | # Remove . .. and hidden directory 201 | for dir in dirs: 202 | if not dir.startswith("."): 203 | filtered_dir.append(dir) 204 | 205 | return filtered_dir 206 | 207 | ''' 208 | Function Name: getSportsActionName() 209 | Input Args : < Sports Action Index> 210 | Return : 211 | Description : This function returns the name of Sports Action based on index value 212 | 213 | ''' 214 | def getSportsActionName(saIndex): 215 | 216 | keys = sportsActionTag.keys() 217 | 218 | for key in keys: 219 | if saIndex == sportsActionTag[key]: 220 | return key 221 | 222 | ''' 223 | Function Name: evaluation() 224 | Input Args : < 1D Array: Truth>, <1D Array: Predicted>, < Sports Action Index> 225 | Return : ,, 226 | Description : This function calculate evaluation metrics sensitivity, specificity and accuracy 227 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rate. 228 | 229 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 230 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 231 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 232 | 233 | ''' 234 | 235 | def evaluation( truth, predicted, categoryIndex ): 236 | 237 | # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively 238 | TP = 1 239 | FP = 1 240 | FN = 1 241 | TN = 1 242 | 243 | # Categories are Sports Action 1=>0, Sports Action 2=> 1, Sports Action 3=>2 etc.. 244 | for fIndex in range(len(truth)): 245 | 246 | # Positive prediction for each feature 247 | if ( int(predicted[fIndex]) == categoryIndex): 248 | # TP=> when P[i] = T[i] = Ci 249 | if (int(truth[fIndex]) == int (predicted[fIndex])): 250 | TP += 1 251 | else: 252 | FP += 1 253 | else: # Negative Prediction 254 | if ( int ( truth[fIndex]) == categoryIndex ): 255 | FN += 1 256 | else: 257 | TN += 1 258 | 259 | 260 | # Calculate Sensitivity - True Positive Rate - Recall 261 | sensitivity = TP / float ( TP + FN ) 262 | 263 | # Specificity - True Negative Rate 264 | specificity = TN / float ( TN + FP ) 265 | 266 | #Calculate accuracy 267 | accuracy = ( TP + TN ) / float ( TP + FP + FN + TN ) 268 | 269 | 270 | return sensitivity, specificity, accuracy 271 | 272 | ''' 273 | Function Name: crossValidation() 274 | Input Args : < Array: Feature and Label List - Fits element of vector indicates action label and rest are for features> 275 | Retrun : None 276 | Description : It perform K-Fold cross validation. 277 | First, I shuffle the feature list which contains features as well as label at the very first element of 278 | the feature vector to obtain better result. The complete set of shuffled features are divided equally 279 | into k=13 sub parts. k-1 subset is used for training and one subset is used for validation. I iterate the 280 | process for k=13 times with different subset combinations for training and validation. 281 | 282 | Evaluation Metrics: 283 | At each iteration, evaluation metrics sensitivity, specificity and accuracy are calculated 284 | based on True Positive (TP), False Positive (FP), False Negative (FN) and True Negative (TN) rates. 285 | 286 | Sensitivity = ( True Positive Rate) = TP / ( TP + FN ) 287 | Specificity = ( True Negative Rate) = TN / ( TN + FP ) 288 | Accuracy = ( TP + TN ) / ( TP + FN + FP + TN ) 289 | 290 | At the end of all iterations of cross validation, I average them all to get average rate. 291 | ''' 292 | def crossValidation( featureAndLabelList): 293 | 294 | # Randomize the sample 295 | np.random.shuffle(featureAndLabelList) 296 | 297 | 298 | # Evaluation Metrics 299 | sensitivity = 0.0 300 | specificity = 0.0 301 | accuracy = 0.0 302 | 303 | 304 | # split feature set in equal subsets same as number of sports actions for cross validation 305 | subsetLength = len(featureAndLabelList) / sportsActionNumber 306 | for rIndex in range(sportsActionNumber): 307 | 308 | print "INFO: Cross Validation Iteration - ", rIndex 309 | trainigSet = [] 310 | valdationSet = [] 311 | feature = [] 312 | label = [] 313 | 314 | 315 | if ( rIndex == 0 ): 316 | trainigSet = featureAndLabelList[1*subsetLength:] 317 | valdationSet = featureAndLabelList[0: subsetLength] 318 | elif ( rIndex == (sportsActionNumber -1) ): 319 | trainigSet = featureAndLabelList[:(sportsActionNumber -1)*subsetLength] 320 | valdationSet = featureAndLabelList[(sportsActionNumber -1)*subsetLength : ] 321 | else: 322 | trainigSet = np.concatenate ((featureAndLabelList[:rIndex * subsetLength] , featureAndLabelList[(rIndex + 1) * subsetLength: ]), axis=0 ) 323 | valdationSet = featureAndLabelList[rIndex * subsetLength : (rIndex + 1 ) * subsetLength] 324 | 325 | # Get all features in a array 326 | for featureAndLabel in trainigSet: 327 | label.append(int(featureAndLabel[0])) 328 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 329 | 330 | 331 | # Train model 332 | print "INFO: Training ... " 333 | clf=RandomForestClassifier(n_estimators=13) 334 | clf=clf.fit(feature,label) 335 | 336 | # Prepare validation feature and label to be predicted 337 | print "INFO: Prediction for ", getSportsActionName(rIndex) 338 | vFeatureList = [] 339 | vLabelList = [] # Ground Truth 340 | for featureAndLabel in valdationSet: 341 | vFeatureList.append(featureAndLabel[1:].tolist()) 342 | vLabelList.append(featureAndLabel[0]) 343 | 344 | # Predict the class label for Validation Feature List 345 | predictedLabel = clf.predict(vFeatureList) 346 | 347 | # predict validation set and calculate accuracy 348 | print "INFO: Evaluating ... " 349 | 350 | # Evaluation < Truth>, , 351 | (sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , rIndex) 352 | 353 | sensitivity += sen 354 | specificity += spec 355 | accuracy += accu 356 | 357 | print "\t Sensitivity : ", sen 358 | print "\t Specificity : ", spec 359 | print "\t Accuracy : ", accu 360 | 361 | 362 | # Average evaluation metrics 363 | avgSensitivity = sensitivity / sportsActionNumber 364 | avgSpecificity = specificity / sportsActionNumber 365 | avgAccuracy = accuracy / sportsActionNumber 366 | 367 | 368 | print " *** Overall Evaluation ***" 369 | print " Average Sensitivity: ", avgSensitivity 370 | print " Average Specificity: ", avgSpecificity 371 | print " Average Accuracy : ", avgAccuracy 372 | 373 | 374 | 375 | def main(): 376 | print "INFO: Action Recognition" 377 | 378 | sportsActionList = getListOfDir( sportsActionPath ) 379 | print "INFO: Sports Action - ",sportsActionList 380 | 381 | sportsActionFeatures = [] 382 | 383 | firstActionFlag = 0 384 | for sportsActionName in tqdm(sportsActionList): 385 | sportsActionDir = sportsActionPath + "/" + sportsActionName 386 | # Get list of videos from each sports action 387 | videoList = getListOfDir(sportsActionDir) 388 | 389 | print "INFO: Video List:", videoList 390 | 391 | videoCount = 1 392 | videoFeatures = [] 393 | # For all video in each action category 394 | for video in videoList: 395 | 396 | # For good result decided to use same number of videos from Action Sports. And same number of frame from each frame 397 | if videoCount > 2: 398 | break 399 | 400 | # complete path of video containing jpeg images 401 | videoPath = sportsActionDir + "/" + video 402 | print "\tVideo Path:", videoPath 403 | 404 | # Extract Features 405 | videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng') 406 | 407 | # Put together all the videos 408 | if firstActionFlag == 0: 409 | sportsActionFeatures = videoFeatures 410 | firstActionFlag = 1 411 | else: 412 | sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0) 413 | 414 | videoCount += 1 415 | 416 | ## K-Fold Cross Validation 417 | crossValidation(sportsActionFeatures) 418 | 419 | ## **** Testing with unseen data **** ## 420 | 421 | np.random.shuffle(sportsActionFeatures) 422 | print(sportsActionFeatures.shape) 423 | label = [] 424 | feature = [] 425 | # Get all features in a array 426 | for featureAndLabel in sportsActionFeatures: 427 | label.append(int(featureAndLabel[0])) 428 | feature.append((np.delete(featureAndLabel, 0)).tolist()) 429 | 430 | 431 | 432 | # Train model 433 | print "INFO: Training ... " 434 | #clf = svm.SVC(gamma=0.01, C=13) 435 | #clf.fit(feature,label) 436 | clf=RandomForestClassifier(n_estimators=13) 437 | clf=clf.fit(feature,label) 438 | 439 | # Test Path 440 | tPath = "/Users/admin/Documents/data/ucf_sports_actions/ucf_action/Walk-Front/015" 441 | vFeatures = featureExtraction(tPath , sportsActionName, 'Test') 442 | #predictedLabels = clf.predict(vFeatures) 443 | predictedLabels = clf.predict(vFeatures) 444 | #print "Predicted Labels:", predictedLabels 445 | predictedLabelMode = (mode(predictedLabels))[0] 446 | print "\t Predicted Sports Action:{0} - {1}".format(predictedLabelMode,getSportsActionName(predictedLabelMode) ) 447 | 448 | 449 | if __name__ == "__main__": 450 | main() 451 | 452 | 453 | 454 | 455 | ''' 456 | RESULT: 457 | INFO: Cross Validation Iteration - 0 458 | INFO: Training ... 459 | INFO: Prediction for Diving-Side 460 | INFO: Evaluating ... 461 | Sensitivity : 0.692307692308 462 | Specificity : 0.963636363636 463 | Accuracy : 0.934959349593 464 | INFO: Cross Validation Iteration - 1 465 | INFO: Training ... 466 | INFO: Prediction for Golf-Swing-Back 467 | INFO: Evaluating ... 468 | Sensitivity : 0.272727272727 469 | Specificity : 0.910714285714 470 | Accuracy : 0.853658536585 471 | INFO: Cross Validation Iteration - 2 472 | INFO: Training ... 473 | INFO: Prediction for Golf-Swing-Front 474 | INFO: Evaluating ... 475 | Sensitivity : 0.5 476 | Specificity : 0.965811965812 477 | Accuracy : 0.943089430894 478 | INFO: Cross Validation Iteration - 3 479 | INFO: Training ... 480 | INFO: Prediction for Golf-Swing-Side 481 | INFO: Evaluating ... 482 | Sensitivity : 0.9 483 | Specificity : 0.946902654867 484 | Accuracy : 0.943089430894 485 | INFO: Cross Validation Iteration - 4 486 | INFO: Training ... 487 | INFO: Prediction for Kicking-Front 488 | INFO: Evaluating ... 489 | Sensitivity : 0.2 490 | Specificity : 0.982300884956 491 | Accuracy : 0.918699186992 492 | INFO: Cross Validation Iteration - 5 493 | INFO: Training ... 494 | INFO: Prediction for Kicking-Side 495 | INFO: Evaluating ... 496 | Sensitivity : 0.1 497 | Specificity : 0.982300884956 498 | Accuracy : 0.910569105691 499 | INFO: Cross Validation Iteration - 6 500 | INFO: Training ... 501 | INFO: Prediction for Lifting 502 | INFO: Evaluating ... 503 | Sensitivity : 0.888888888889 504 | Specificity : 0.973684210526 505 | Accuracy : 0.967479674797 506 | INFO: Cross Validation Iteration - 7 507 | INFO: Training ... 508 | INFO: Prediction for Run-Side 509 | INFO: Evaluating ... 510 | Sensitivity : 0.583333333333 511 | Specificity : 0.90990990991 512 | Accuracy : 0.878048780488 513 | INFO: Cross Validation Iteration - 8 514 | INFO: Training ... 515 | INFO: Prediction for SkateBoarding-Front 516 | INFO: Evaluating ... 517 | Sensitivity : 0.3 518 | Specificity : 0.955752212389 519 | Accuracy : 0.90243902439 520 | INFO: Cross Validation Iteration - 9 521 | INFO: Training ... 522 | INFO: Prediction for Swing-SideAngle 523 | INFO: Evaluating ... 524 | Sensitivity : 0.46511627907 525 | Specificity : 0.934090909091 526 | Accuracy : 0.892339544513 527 | INFO: Cross Validation Iteration - 10 528 | INFO: Training ... 529 | INFO: Prediction for Walk-Front 530 | INFO: Evaluating ... 531 | Sensitivity : 0.363636363636 532 | Specificity : 0.955357142857 533 | Accuracy : 0.90243902439 534 | INFO: Cross Validation Iteration - 11 535 | INFO: Training ... 536 | INFO: Prediction for Swing-Bench 537 | INFO: Evaluating ... 538 | Sensitivity : 0.8 539 | Specificity : 0.940677966102 540 | Accuracy : 0.934959349593 541 | INFO: Cross Validation Iteration - 12 542 | INFO: Training ... 543 | INFO: Prediction for Riding-Horse 544 | INFO: Evaluating ... 545 | Sensitivity : 0.9 546 | Specificity : 0.902654867257 547 | Accuracy : 0.90243902439 548 | *** Overall Evaluation *** 549 | Average Sensitivity: 0.535846909997 550 | Average Specificity: 0.947984173698 551 | Average Accuracy : 0.914169958709 552 | 553 | 554 | ### Testing with unseen data or video which has not been used for training 555 | Test Video: /Users/somnath/MY_PROG/ComputerVision/PA3/ucf_sports_actions/ucf_action/Diving-Side/014 556 | 557 | INFO: Training ... 558 | Predicted Sports Action:[0] - Diving-Side 559 | 560 | ''' 561 | -------------------------------------------------------------------------------- /hockey_HOG.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pickle\n", 10 | "import numpy as np\n", 11 | "import cv2\n", 12 | "import sys\n", 13 | "import os\n", 14 | "import glob\n", 15 | "from sklearn import svm\n", 16 | "from scipy.stats import mode\n", 17 | "from sklearn.ensemble import RandomForestClassifier\n", 18 | "from tqdm import tqdm" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "path='./data/raw_frames/hocky/fi1_xvid'\n", 28 | "pickle_name='video_summary.pkl'" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "file_path=os.path.join(path,pickle_name)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "with open(file_path, 'rb') as f:\n", 47 | " data = pickle.load(f)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 5, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "{'images_path': 'data/raw_frames/hocky/fi1_xvid',\n", 59 | " 'name': 'fi1_xvid',\n", 60 | " 'images_files': ['data/raw_frames/hocky/fi1_xvid/frame_0.jpg',\n", 61 | " 'data/raw_frames/hocky/fi1_xvid/frame_2.jpg',\n", 62 | " 'data/raw_frames/hocky/fi1_xvid/frame_4.jpg',\n", 63 | " 'data/raw_frames/hocky/fi1_xvid/frame_6.jpg',\n", 64 | " 'data/raw_frames/hocky/fi1_xvid/frame_8.jpg',\n", 65 | " 'data/raw_frames/hocky/fi1_xvid/frame_10.jpg',\n", 66 | " 'data/raw_frames/hocky/fi1_xvid/frame_12.jpg',\n", 67 | " 'data/raw_frames/hocky/fi1_xvid/frame_14.jpg',\n", 68 | " 'data/raw_frames/hocky/fi1_xvid/frame_16.jpg',\n", 69 | " 'data/raw_frames/hocky/fi1_xvid/frame_18.jpg',\n", 70 | " 'data/raw_frames/hocky/fi1_xvid/frame_20.jpg',\n", 71 | " 'data/raw_frames/hocky/fi1_xvid/frame_22.jpg',\n", 72 | " 'data/raw_frames/hocky/fi1_xvid/frame_24.jpg',\n", 73 | " 'data/raw_frames/hocky/fi1_xvid/frame_26.jpg',\n", 74 | " 'data/raw_frames/hocky/fi1_xvid/frame_28.jpg',\n", 75 | " 'data/raw_frames/hocky/fi1_xvid/frame_30.jpg',\n", 76 | " 'data/raw_frames/hocky/fi1_xvid/frame_32.jpg',\n", 77 | " 'data/raw_frames/hocky/fi1_xvid/frame_34.jpg',\n", 78 | " 'data/raw_frames/hocky/fi1_xvid/frame_36.jpg',\n", 79 | " 'data/raw_frames/hocky/fi1_xvid/frame_38.jpg'],\n", 80 | " 'sequence_length': 41,\n", 81 | " 'label': 1}" 82 | ] 83 | }, 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "data" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 6, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "['data/raw_frames/hocky/fi1_xvid/frame_0.jpg',\n", 102 | " 'data/raw_frames/hocky/fi1_xvid/frame_2.jpg',\n", 103 | " 'data/raw_frames/hocky/fi1_xvid/frame_4.jpg',\n", 104 | " 'data/raw_frames/hocky/fi1_xvid/frame_6.jpg',\n", 105 | " 'data/raw_frames/hocky/fi1_xvid/frame_8.jpg',\n", 106 | " 'data/raw_frames/hocky/fi1_xvid/frame_10.jpg',\n", 107 | " 'data/raw_frames/hocky/fi1_xvid/frame_12.jpg',\n", 108 | " 'data/raw_frames/hocky/fi1_xvid/frame_14.jpg',\n", 109 | " 'data/raw_frames/hocky/fi1_xvid/frame_16.jpg',\n", 110 | " 'data/raw_frames/hocky/fi1_xvid/frame_18.jpg',\n", 111 | " 'data/raw_frames/hocky/fi1_xvid/frame_20.jpg',\n", 112 | " 'data/raw_frames/hocky/fi1_xvid/frame_22.jpg',\n", 113 | " 'data/raw_frames/hocky/fi1_xvid/frame_24.jpg',\n", 114 | " 'data/raw_frames/hocky/fi1_xvid/frame_26.jpg',\n", 115 | " 'data/raw_frames/hocky/fi1_xvid/frame_28.jpg',\n", 116 | " 'data/raw_frames/hocky/fi1_xvid/frame_30.jpg',\n", 117 | " 'data/raw_frames/hocky/fi1_xvid/frame_32.jpg',\n", 118 | " 'data/raw_frames/hocky/fi1_xvid/frame_34.jpg',\n", 119 | " 'data/raw_frames/hocky/fi1_xvid/frame_36.jpg',\n", 120 | " 'data/raw_frames/hocky/fi1_xvid/frame_38.jpg']" 121 | ] 122 | }, 123 | "execution_count": 6, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "data['images_files']" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "featuresLimit = 15000" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 8, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "def featureExtraction( videoPath, actionName, type):\n", 148 | "\n", 149 | "\n", 150 | " # Set frame path, if jpeg directory doesn't exist , take images from video dir\n", 151 | " framePath = videoPath\n", 152 | " if os.path.exists( framePath + \"/jpeg\") :\n", 153 | " framePath += \"/jpeg/\"\n", 154 | "\n", 155 | " # Extract feature\n", 156 | "# imageFrames = getImageList(framePath)\n", 157 | "\n", 158 | " pickle_name='video_summary.pkl'\n", 159 | " file_path=os.path.join(videoPath,pickle_name)\n", 160 | " with open(file_path, 'rb') as f:\n", 161 | " data = pickle.load(f)\n", 162 | " imageFrames=data['images_files']\n", 163 | " #print \"DEBUG: Image Frames - \", imageFrames\n", 164 | "\n", 165 | " frameCount = 0\n", 166 | " frameIndex = 0\n", 167 | "\n", 168 | " # Feature List for a video\n", 169 | " videoFeatures = []\n", 170 | "\n", 171 | " for iFrame in imageFrames:\n", 172 | "\n", 173 | " frameIndex += 1\n", 174 | "\n", 175 | " # Read Frame\n", 176 | " frame = cv2.imread(iFrame)\n", 177 | " gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)\n", 178 | "\n", 179 | " # HOG Descriptor , default value it takes window size= 64x128, block size= 16x16, block stride= 8x8, cell size= 8x8, bins= 9\n", 180 | " hogDescriptor = cv2.HOGDescriptor()\n", 181 | "\n", 182 | " # Returns histogram\n", 183 | " hist = hogDescriptor.compute(gray)\n", 184 | "\n", 185 | " #sortedHogDescriptor = hogDescriptor\n", 186 | " sortedHogHist = np.sort(hist, axis=None)\n", 187 | "\n", 188 | " keyFeatures = sortedHogHist[- featuresLimit : ]\n", 189 | "\n", 190 | " if type == \"Trng\":\n", 191 | " keyFeatures = np.insert(keyFeatures, 0, sportsActionTag[actionName])\n", 192 | "\n", 193 | " videoFeatures.append(keyFeatures)\n", 194 | "\n", 195 | " # Lowest number of frame available in a video\n", 196 | " if frameCount >= 23:\n", 197 | " break\n", 198 | "\n", 199 | " frameCount += 1\n", 200 | "\n", 201 | "\n", 202 | " return videoFeatures" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "def getImageList(imageDirectory):\n", 212 | "\n", 213 | " # Find different type of images\n", 214 | " rImages = glob.glob(imageDirectory + \"/*.jpg\")\n", 215 | " rImages += glob.glob(imageDirectory + \"/*.jpeg\")\n", 216 | " rImages += glob.glob(imageDirectory + \"/*.png\")\n", 217 | "\n", 218 | " return rImages" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 10, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "sportsActionTag = {\n", 228 | " 'no_fight': 0,\n", 229 | " 'fight':1\n", 230 | "}" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 11, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "videoPath='./data/raw_frames/hocky/fi1_xvid'\n", 240 | "sportsActionName='fight'" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 12, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "videoFeatures = featureExtraction(videoPath , sportsActionName, 'Trng')" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 13, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "20" 261 | ] 262 | }, 263 | "execution_count": 13, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "len(videoFeatures)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 14, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "[1. 0.62515384 0.62515384 ... 0.9804249 0.9804249 0.9804249 ]\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "print(videoFeatures[0])" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 15, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "def getListOfDir(path):\n", 296 | " # Read each sport action directory\n", 297 | " dirs = os.listdir(path)\n", 298 | "\n", 299 | " sportsActionsCount = 0\n", 300 | " filtered_dir = []\n", 301 | " # Remove . .. and hidden directory\n", 302 | " for dir in dirs:\n", 303 | " if not dir.startswith(\".\"):\n", 304 | " filtered_dir.append(dir)\n", 305 | "\n", 306 | " return filtered_dir" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 16, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "sportsActionPath='./data/raw_frames/hocky'\n", 316 | "sportsActionList = getListOfDir( sportsActionPath )" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 17, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "['fi100_xvid', 'fi101_xvid', 'fi102_xvid', 'fi103_xvid', 'fi104_xvid']\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "print(sportsActionList[:5])" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 18, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stderr", 343 | "output_type": "stream", 344 | "text": [ 345 | "100%|██████████| 1000/1000 [9:53:31<00:00, 5.27s/it] \n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "\n", 351 | "firstActionFlag = 0\n", 352 | "videoFeatures = []\n", 353 | "videoCount=1\n", 354 | "for sportsActionName in tqdm(sportsActionList):\n", 355 | "# if(videoCount==10):\n", 356 | "# break\n", 357 | " sportsActionDir = sportsActionPath + \"/\" + sportsActionName\n", 358 | " if(sportsActionName.startswith(\"fi\")):\n", 359 | " sportsActionName='fight'\n", 360 | " elif(sportsActionName.startswith(\"no\")):\n", 361 | " sportsActionName='no_fight'\n", 362 | " \n", 363 | " videoFeatures = featureExtraction(sportsActionDir , sportsActionName, 'Trng')\n", 364 | "# print(len(videoFeatures))\n", 365 | " # Put together all the videos\n", 366 | " if firstActionFlag == 0:\n", 367 | " sportsActionFeatures = videoFeatures\n", 368 | " firstActionFlag = 1\n", 369 | " else:\n", 370 | " sportsActionFeatures = np.concatenate( (sportsActionFeatures, videoFeatures), axis=0)\n", 371 | "\n", 372 | " videoCount += 1\n", 373 | " \n", 374 | " " 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 19, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "# np.random.shuffle(sportsActionFeatures)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 20, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "# sportsActionFeatures.shape" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 21, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "\n", 402 | "# for featureAndLabel in tqdm(sportsActionFeatures):\n", 403 | "# label.append(int(featureAndLabel[0]))\n", 404 | "# feature.append((np.delete(featureAndLabel, 0)).tolist())\n", 405 | "# print(len(label))" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 22, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "# print(label[-5:])\n", 422 | "sportsActionNumber = len(sportsActionTag)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 23, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "def evaluation( truth, predicted, categoryIndex ):\n", 432 | "\n", 433 | " # TP,FP,FN,TN indicate True Positive, False Positive, False Negative, True Negative respectively\n", 434 | " TP = 1\n", 435 | " FP = 1\n", 436 | " FN = 1\n", 437 | " TN = 1\n", 438 | "\n", 439 | " # Categories are Sports Action 1=>0, Sports Action 2=> 1, Sports Action 3=>2 etc..\n", 440 | " for fIndex in range(len(truth)):\n", 441 | "\n", 442 | " # Positive prediction for each feature\n", 443 | " if ( int(predicted[fIndex]) == categoryIndex):\n", 444 | " # TP=> when P[i] = T[i] = Ci\n", 445 | " if (int(truth[fIndex]) == int (predicted[fIndex])):\n", 446 | " TP += 1\n", 447 | " else:\n", 448 | " FP += 1\n", 449 | " else: # Negative Prediction\n", 450 | " if ( int ( truth[fIndex]) == categoryIndex ):\n", 451 | " FN += 1\n", 452 | " else:\n", 453 | " TN += 1\n", 454 | "\n", 455 | "\n", 456 | " # Calculate Sensitivity - True Positive Rate - Recall\n", 457 | " sensitivity = TP / float ( TP + FN )\n", 458 | "\n", 459 | " # Specificity - True Negative Rate\n", 460 | " specificity = TN / float ( TN + FP )\n", 461 | "\n", 462 | " #Calculate accuracy\n", 463 | " accuracy = ( TP + TN ) / float ( TP + FP + FN + TN )\n", 464 | "\n", 465 | "\n", 466 | " return sensitivity, specificity, accuracy" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 24, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "def getSportsActionName(saIndex):\n", 476 | "\n", 477 | " keys = sportsActionTag.keys()\n", 478 | "\n", 479 | " for key in keys:\n", 480 | " if saIndex == sportsActionTag[key]:\n", 481 | " return key" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 25, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "def crossValidation( featureAndLabelList):\n", 491 | "\n", 492 | " # Randomize the sample\n", 493 | " np.random.shuffle(featureAndLabelList)\n", 494 | "\n", 495 | "\n", 496 | " # Evaluation Metrics\n", 497 | " sensitivity = 0.0\n", 498 | " specificity = 0.0\n", 499 | " accuracy = 0.0\n", 500 | "\n", 501 | "\n", 502 | " # split feature set in equal subsets same as number of sports actions for cross validation\n", 503 | "# subsetLength = len(featureAndLabelList) / sportsActionNumber\n", 504 | " subsetLength = int(len(featureAndLabelList)*0.8)\n", 505 | " for rIndex in range(sportsActionNumber):\n", 506 | "\n", 507 | " print(\"INFO: Cross Validation Iteration - \", rIndex)\n", 508 | " trainigSet = []\n", 509 | " valdationSet = []\n", 510 | " feature = []\n", 511 | " label = []\n", 512 | "\n", 513 | "\n", 514 | " if ( rIndex == 0 ):\n", 515 | " trainigSet = featureAndLabelList[1*subsetLength:]\n", 516 | " valdationSet = featureAndLabelList[0: subsetLength]\n", 517 | " elif ( rIndex == (sportsActionNumber -1) ):\n", 518 | " trainigSet = featureAndLabelList[:(sportsActionNumber -1)*subsetLength]\n", 519 | " valdationSet = featureAndLabelList[(sportsActionNumber -1)*subsetLength : ]\n", 520 | " else:\n", 521 | " trainigSet = np.concatenate ((featureAndLabelList[:rIndex * subsetLength] , featureAndLabelList[(rIndex + 1) * subsetLength: ]), axis=0 )\n", 522 | " valdationSet = featureAndLabelList[rIndex * subsetLength : (rIndex + 1 ) * subsetLength]\n", 523 | "\n", 524 | " # Get all features in a array\n", 525 | " for featureAndLabel in trainigSet:\n", 526 | " label.append(int(featureAndLabel[0]))\n", 527 | " feature.append((np.delete(featureAndLabel, 0)).tolist())\n", 528 | "\n", 529 | "\n", 530 | " # Train model\n", 531 | " print(\"INFO: Training ... \")\n", 532 | " clf=RandomForestClassifier(n_estimators=13)\n", 533 | " clf=clf.fit(feature,label)\n", 534 | "\n", 535 | " # Prepare validation feature and label to be predicted\n", 536 | " print(\"INFO: Prediction for \", getSportsActionName(rIndex))\n", 537 | " vFeatureList = []\n", 538 | " vLabelList = [] # Ground Truth\n", 539 | " for featureAndLabel in valdationSet:\n", 540 | " vFeatureList.append(featureAndLabel[1:].tolist())\n", 541 | " vLabelList.append(featureAndLabel[0])\n", 542 | "\n", 543 | " # Predict the class label for Validation Feature List\n", 544 | " predictedLabel = clf.predict(vFeatureList)\n", 545 | "\n", 546 | " # predict validation set and calculate accuracy\n", 547 | " print(\"INFO: Evaluating ... \")\n", 548 | "\n", 549 | " # Evaluation < Truth>, , \n", 550 | " (sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , rIndex)\n", 551 | "\n", 552 | " sensitivity += sen\n", 553 | " specificity += spec\n", 554 | " accuracy += accu\n", 555 | "\n", 556 | " print(\"\\t Sensitivity : \", sen)\n", 557 | " print(\"\\t Specificity : \", spec)\n", 558 | " print(\"\\t Accuracy : \", accu)\n", 559 | "\n", 560 | "\n", 561 | " # Average evaluation metrics\n", 562 | " avgSensitivity = sensitivity / sportsActionNumber\n", 563 | " avgSpecificity = specificity / sportsActionNumber\n", 564 | " avgAccuracy = accuracy / sportsActionNumber\n", 565 | "\n", 566 | "\n", 567 | " print(\" *** Overall Evaluation ***\")\n", 568 | " print(\" Average Sensitivity: \", avgSensitivity)\n", 569 | " print(\" Average Specificity: \", avgSpecificity)\n", 570 | " print(\" Average Accuracy : \", avgAccuracy)\n" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 26, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "name": "stdout", 580 | "output_type": "stream", 581 | "text": [ 582 | "(20032, 15001)\n" 583 | ] 584 | } 585 | ], 586 | "source": [ 587 | "print(sportsActionFeatures.shape)" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 27, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "# crossValidation(sportsActionFeatures)\n", 597 | "featureAndLabelList=sportsActionFeatures\n", 598 | "np.random.shuffle(featureAndLabelList)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 28, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "subsetLength = int(len(featureAndLabelList)*0.8)\n", 608 | "trainigSet = featureAndLabelList[:subsetLength]\n", 609 | "valdationSet = featureAndLabelList[subsetLength : ]" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 30, 615 | "metadata": {}, 616 | "outputs": [], 617 | "source": [ 618 | "# Get all features in a array\n", 619 | "label = []\n", 620 | "feature = []\n", 621 | "for featureAndLabel in trainigSet:\n", 622 | " label.append(int(featureAndLabel[0]))\n", 623 | " feature.append((np.delete(featureAndLabel, 0)).tolist())" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 31, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "name": "stdout", 633 | "output_type": "stream", 634 | "text": [ 635 | "INFO: Training ... \n" 636 | ] 637 | } 638 | ], 639 | "source": [ 640 | "# Train model\n", 641 | "print(\"INFO: Training ... \")\n", 642 | "clf=RandomForestClassifier(n_estimators=13)\n", 643 | "clf=clf.fit(feature,label)\n", 644 | "\n", 645 | "# Prepare validation feature and label to be predicted\n", 646 | "# print(\"INFO: Prediction for \", getSportsActionName(rIndex))\n", 647 | "vFeatureList = []\n", 648 | "vLabelList = [] # Ground Truth\n", 649 | "for featureAndLabel in valdationSet:\n", 650 | " vFeatureList.append(featureAndLabel[1:].tolist())\n", 651 | " vLabelList.append(featureAndLabel[0])" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 32, 657 | "metadata": {}, 658 | "outputs": [ 659 | { 660 | "name": "stdout", 661 | "output_type": "stream", 662 | "text": [ 663 | "INFO: Evaluating ... \n" 664 | ] 665 | } 666 | ], 667 | "source": [ 668 | "# Predict the class label for Validation Feature List\n", 669 | "predictedLabel = clf.predict(vFeatureList)\n", 670 | "\n", 671 | "# predict validation set and calculate accuracy\n", 672 | "print(\"INFO: Evaluating ... \")\n", 673 | "\n", 674 | "# Evaluation < Truth>, , \n", 675 | "(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)\n", 676 | "(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": 33, 682 | "metadata": {}, 683 | "outputs": [ 684 | { 685 | "name": "stdout", 686 | "output_type": "stream", 687 | "text": [ 688 | " *** Overall Evaluation ***\n", 689 | " Average Sensitivity: 0.7569041874262317\n", 690 | " Average Specificity: 0.7569041874262317\n", 691 | " Average Accuracy : 0.7571677885814011\n" 692 | ] 693 | } 694 | ], 695 | "source": [ 696 | "# Evaluation Metrics\n", 697 | "sensitivity = 0.0\n", 698 | "specificity = 0.0\n", 699 | "accuracy = 0.0\n", 700 | "\n", 701 | "sensitivity += sen\n", 702 | "specificity += spec\n", 703 | "accuracy += accu\n", 704 | "\n", 705 | "sensitivity += sen1\n", 706 | "specificity += spec1\n", 707 | "accuracy += accu1\n", 708 | "# print(\"\\t Sensitivity : \", sen)\n", 709 | "# print(\"\\t Specificity : \", spec)\n", 710 | "# print(\"\\t Accuracy : \", accu)\n", 711 | "# Average evaluation metrics\n", 712 | "avgSensitivity = sensitivity / sportsActionNumber\n", 713 | "avgSpecificity = specificity / sportsActionNumber\n", 714 | "avgAccuracy = accuracy / sportsActionNumber\n", 715 | "\n", 716 | "\n", 717 | "print(\" *** Overall Evaluation ***\")\n", 718 | "print(\" Average Sensitivity: \", avgSensitivity)\n", 719 | "print(\" Average Specificity: \", avgSpecificity)\n", 720 | "print(\" Average Accuracy : \", avgAccuracy)" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": null, 726 | "metadata": {}, 727 | "outputs": [], 728 | "source": [ 729 | "from sklearn import svm" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 35, 735 | "metadata": {}, 736 | "outputs": [ 737 | { 738 | "name": "stdout", 739 | "output_type": "stream", 740 | "text": [ 741 | "INFO: Training ... \n" 742 | ] 743 | }, 744 | { 745 | "data": { 746 | "text/plain": [ 747 | "SVC(C=13, cache_size=200, class_weight=None, coef0=0.0,\n", 748 | " decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',\n", 749 | " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", 750 | " tol=0.001, verbose=False)" 751 | ] 752 | }, 753 | "execution_count": 35, 754 | "metadata": {}, 755 | "output_type": "execute_result" 756 | } 757 | ], 758 | "source": [ 759 | "print(\"INFO: Training ... \")\n", 760 | "clf = svm.SVC(gamma=0.01, C=13)\n", 761 | "clf.fit(feature,label)" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 36, 767 | "metadata": {}, 768 | "outputs": [ 769 | { 770 | "name": "stdout", 771 | "output_type": "stream", 772 | "text": [ 773 | "INFO: Evaluating ... \n" 774 | ] 775 | } 776 | ], 777 | "source": [ 778 | "# Predict the class label for Validation Feature List\n", 779 | "predictedLabel = clf.predict(vFeatureList)\n", 780 | "\n", 781 | "# predict validation set and calculate accuracy\n", 782 | "print(\"INFO: Evaluating ... \")\n", 783 | "\n", 784 | "# Evaluation < Truth>, , \n", 785 | "(sen, spec , accu ) = evaluation(vLabelList , predictedLabel.tolist() , 0)\n", 786 | "(sen1, spec1 , accu1 ) = evaluation(vLabelList , predictedLabel.tolist() , 1)" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 37, 792 | "metadata": {}, 793 | "outputs": [ 794 | { 795 | "name": "stdout", 796 | "output_type": "stream", 797 | "text": [ 798 | " *** Overall Evaluation ***\n", 799 | " Average Sensitivity: 0.7186830863075654\n", 800 | " Average Specificity: 0.7186830863075654\n", 801 | " Average Accuracy : 0.7190226876090751\n" 802 | ] 803 | } 804 | ], 805 | "source": [ 806 | "# Evaluation Metrics\n", 807 | "sensitivity = 0.0\n", 808 | "specificity = 0.0\n", 809 | "accuracy = 0.0\n", 810 | "\n", 811 | "sensitivity += sen\n", 812 | "specificity += spec\n", 813 | "accuracy += accu\n", 814 | "\n", 815 | "sensitivity += sen1\n", 816 | "specificity += spec1\n", 817 | "accuracy += accu1\n", 818 | "# print(\"\\t Sensitivity : \", sen)\n", 819 | "# print(\"\\t Specificity : \", spec)\n", 820 | "# print(\"\\t Accuracy : \", accu)\n", 821 | "# Average evaluation metrics\n", 822 | "avgSensitivity = sensitivity / sportsActionNumber\n", 823 | "avgSpecificity = specificity / sportsActionNumber\n", 824 | "avgAccuracy = accuracy / sportsActionNumber\n", 825 | "\n", 826 | "\n", 827 | "print(\" *** Overall Evaluation ***\")\n", 828 | "print(\" Average Sensitivity: \", avgSensitivity)\n", 829 | "print(\" Average Specificity: \", avgSpecificity)\n", 830 | "print(\" Average Accuracy : \", avgAccuracy)" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": null, 836 | "metadata": {}, 837 | "outputs": [], 838 | "source": [] 839 | } 840 | ], 841 | "metadata": { 842 | "kernelspec": { 843 | "display_name": "Python 3", 844 | "language": "python", 845 | "name": "python3" 846 | }, 847 | "language_info": { 848 | "codemirror_mode": { 849 | "name": "ipython", 850 | "version": 3 851 | }, 852 | "file_extension": ".py", 853 | "mimetype": "text/x-python", 854 | "name": "python", 855 | "nbconvert_exporter": "python", 856 | "pygments_lexer": "ipython3", 857 | "version": "3.6.4" 858 | } 859 | }, 860 | "nbformat": 4, 861 | "nbformat_minor": 2 862 | } 863 | --------------------------------------------------------------------------------