├── .gitignore ├── LICENSE ├── README.md ├── demo ├── __init__.py ├── cnn_predict.py ├── cnn_train.py ├── models │ └── UCF-101 │ │ ├── .gitignore │ │ ├── cnn-architecture.json │ │ ├── cnn-config.npy │ │ ├── cnn-weights.h5 │ │ ├── conv3d-architecture.json │ │ ├── conv3d-weights.h5 │ │ ├── vgg16-bidirectional-lstm-architecture.json │ │ ├── vgg16-bidirectional-lstm-config.npy │ │ ├── vgg16-bidirectional-lstm-hi-dim-architecture.json │ │ ├── vgg16-bidirectional-lstm-hi-dim-config.npy │ │ ├── vgg16-bidirectional-lstm-weights.h5 │ │ ├── vgg16-lstm-architecture.json │ │ ├── vgg16-lstm-config.npy │ │ ├── vgg16-lstm-hi-dim-architecture.json │ │ ├── vgg16-lstm-hi-dim-config.npy │ │ └── vgg16-lstm-weights.h5 ├── reports │ └── UCF-101 │ │ ├── cnn-history.png │ │ ├── vgg16-bidirectional-lstm-hi-dim-history.png │ │ ├── vgg16-bidirectional-lstm-history.png │ │ ├── vgg16-lstm-hi-dim-history.png │ │ └── vgg16-lstm-history.png ├── very_large_data │ └── .gitignore ├── vgg16_bidirectional_lstm_hi_dim_predict.py ├── vgg16_bidirectional_lstm_hi_dim_train.py ├── vgg16_bidirectional_lstm_predict.py ├── vgg16_bidirectional_lstm_train.py ├── vgg16_lstm_hi_dim_predict.py ├── vgg16_lstm_hi_dim_train.py ├── vgg16_lstm_predict.py └── vgg16_lstm_train.py ├── keras_video_classifier ├── __init__.py └── library │ ├── __init__.py │ ├── convolutional.py │ ├── recurrent_networks.py │ └── utility │ ├── __init__.py │ ├── device_utils.py │ ├── frame_extractors │ ├── __init__.py │ ├── frame_extractor.py │ └── vgg16_feature_extractor.py │ ├── plot_utils.py │ └── ucf │ ├── UCF101_loader.py │ └── __init__.py ├── notes └── ReadMe.md ├── requirements-on-my-python-env.txt ├── requirements.txt ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | .idea/ 10 | *.iml 11 | 12 | Thumbs.db 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # dotenv 88 | .env 89 | 90 | # virtualenv 91 | .venv 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Xianshun Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keras-video-classifier-web-api 2 | 3 | Keras implementation of video classifiers serving as web 4 | 5 | The training data is [UCF101 - Action Recognition Data Set](http://crcv.ucf.edu/data/UCF101.php). 6 | Codes are included that will download the UCF101 if they do not exist (due to their large size) in 7 | the [demo/very_large_data](demo/very_large_data) folder. The download utility codes can be found in 8 | [keras_video_classifier/library/utility/ucf](keras_video_classifier/library/utility/ucf) directory 9 | 10 | The video classifiers are defined and implemented in the [keras_video_classifier/library](keras_video_classifier/library) directory. 11 | 12 | By default the classifiers are trained using video files inside the dataset "UCF-101" located in 13 | [demo/very_large_data](demo/very_large_data) (the videos files will be downloaded if not exist during 14 | training). However, the classifiers are generic and can be used to train on any other datasets 15 | (just change the data_set_name parameter in its fit() method to other dataset name instead of UCF-101 16 | will allow it to be trained on other video datasets) 17 | 18 | The opencv-python is used to extract frames from the videos. 19 | 20 | # Deep Learning Models 21 | 22 | The following deep learning models have been implemented and studied: 23 | 24 | * VGG16+LSTM: this approach uses VGG16 to extract features from individual frame of the video, the sequence of frame features are then taken into LSTM recurrent networks for classifier. 25 | * training: [demo/vgg16_lstm_train.py](demo/vgg16_lstm_train.py) 26 | * predictor: [demo/vgg16_lstm_predict.py](demo/vgg16_lstm_predict.py) 27 | * training: [demo/vgg16_lstm_hi_dim_train.py](demo/vgg16_lstm_hi_dim_train.py) (VGG16 top not included) 28 | * predictor: [demo/vgg16_lstm_hi_dim_predict.py](demo/vgg16_lstm_hi_dim_predict.py) (VGG16 top not included) 29 | 30 | * VGG16+Bidirectional LSTM: this approach uses VGG16 to extract features from individual frame of the video, the sequence of frame features are then taken into bidirectional LSTM recurrent networks for classifier. 31 | * training: [demo/vgg16_bidirectional_lstm_train.py](demo/vgg16_bidirectional_lstm_train.py) 32 | * predictor: [demo/vgg16_bidirectional_lstm_predict.py](demo/vgg16_bidirectional_lstm_predict.py) 33 | * training: [demo/vgg16_bidirectional_lstm_hi_dim_train.py](demo/vgg16_bidirectional_lstm_hi_dim_train.py) (VGG16 top not included) 34 | * predictor: [demo/vgg16_bidirectional_lstm_hi_dim_predict.py](demo/vgg16_bidirectional_lstm_hi_dim_predict.py) (VGG16 top not included) 35 | 36 | * Convolutional Network: this approach uses stores frames into the "channels" of input of the CNN which then classify the "image" (video frames stacked in the channels) 37 | * training: demo/cnn_train.py 38 | * predictor: demo/cnn_predict.py 39 | 40 | The trained models are available in the demo/models/UCF-101 folder 41 | (Weight files of two of the trained model are not included as they are too big to upload, they are 42 | * demo/models/UCF-101/vgg16-lstm-hi-dim-weights.h5 43 | * demo/models/UCF-101/vgg16-bidirectional-lstm-hi-dim-weights.h5 44 | ) 45 | 46 | # Usage 47 | 48 | ### Train Deep Learning model 49 | 50 | To train a deep learning model, say VGG16BidirectionalLSTMVideoClassifier, run the following commands: 51 | 52 | ```bash 53 | pip install -r requirements.txt 54 | 55 | cd demo 56 | python vgg16_bidirectional_lstm_train.py 57 | ``` 58 | 59 | The training code in vgg16_bidirectional_lstm_train.py is quite straightforward and illustrated below: 60 | 61 | ```python 62 | import numpy as np 63 | from keras import backend as K 64 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 65 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 66 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 67 | 68 | K.set_image_dim_ordering('tf') 69 | 70 | data_set_name = 'UCF-101' 71 | input_dir_path = './very_large_data' 72 | output_dir_path = './models/' + data_set_name 73 | report_dir_path = './reports/' + data_set_name 74 | 75 | np.random.seed(42) 76 | 77 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 78 | load_ucf(input_dir_path) 79 | 80 | classifier = VGG16BidirectionalLSTMVideoClassifier() 81 | 82 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, data_set_name=data_set_name) 83 | 84 | plot_and_save_history(history, VGG16BidirectionalLSTMVideoClassifier.model_name, 85 | report_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-history.png') 86 | 87 | ``` 88 | 89 | After the training is completed, the trained models will be saved as cf-v1-*.* in the demo/models. 90 | 91 | ### Predict Video Class Label 92 | 93 | To use the trained deep learning model to predict the class label of a video, you can use the following code: 94 | 95 | ```python 96 | 97 | import numpy as np 98 | 99 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 100 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 101 | 102 | vgg16_include_top = True 103 | data_set_name = 'UCF-101' 104 | data_dir_path = './very_large_data' 105 | model_dir_path = './models/' + data_set_name 106 | config_file_path = VGG16BidirectionalLSTMVideoClassifier.get_config_file_path(model_dir_path, 107 | vgg16_include_top=vgg16_include_top) 108 | weight_file_path = VGG16BidirectionalLSTMVideoClassifier.get_weight_file_path(model_dir_path, 109 | vgg16_include_top=vgg16_include_top) 110 | 111 | np.random.seed(42) 112 | 113 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 114 | load_ucf(data_dir_path) 115 | 116 | predictor = VGG16BidirectionalLSTMVideoClassifier() 117 | predictor.load_model(config_file_path, weight_file_path) 118 | 119 | # scan_ucf returns a dictionary object of (video_file_path, video_class_label) where video_file_path 120 | # is the key and video_class_label is the value 121 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 122 | 123 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 124 | np.random.shuffle(video_file_path_list) 125 | 126 | correct_count = 0 127 | count = 0 128 | 129 | for video_file_path in video_file_path_list: 130 | label = videos[video_file_path] 131 | predicted_label = predictor.predict(video_file_path) 132 | print('predicted: ' + predicted_label + ' actual: ' + label) 133 | correct_count = correct_count + 1 if label == predicted_label else correct_count 134 | count += 1 135 | accuracy = correct_count / count 136 | print('accuracy: ', accuracy) 137 | ``` 138 | 139 | Below shows the print out of [demo/vgg16_bidirectional_lstm_predict.py](demo/vgg16_bidirectional_lstm_predict.py) 140 | towards the end of its execution: 141 | 142 | ```text 143 | predicted: Biking actual: Biking 144 | accuracy: 0.8593481989708405 145 | Extracting frames from video: ./very_large_data/UCF-101\Billiards\v_Billiards_g24_c01.avi 146 | predicted: Billiards actual: Billiards 147 | accuracy: 0.8595890410958904 148 | Extracting frames from video: ./very_large_data/UCF-101\BabyCrawling\v_BabyCrawling_g22_c06.avi 149 | predicted: BabyCrawling actual: BabyCrawling 150 | accuracy: 0.8598290598290599 151 | Extracting frames from video: ./very_large_data/UCF-101\Bowling\v_Bowling_g13_c01.avi 152 | predicted: Bowling actual: Bowling 153 | accuracy: 0.8600682593856656 154 | Extracting frames from video: ./very_large_data/UCF-101\BalanceBeam\v_BalanceBeam_g24_c04.avi 155 | predicted: BalanceBeam actual: BalanceBeam 156 | accuracy: 0.8603066439522998 157 | Extracting frames from video: ./very_large_data/UCF-101\BrushingTeeth\v_BrushingTeeth_g12_c02.avi 158 | predicted: BrushingTeeth actual: BrushingTeeth 159 | accuracy: 0.8605442176870748 160 | Extracting frames from video: ./very_large_data/UCF-101\BasketballDunk\v_BasketballDunk_g04_c01.avi 161 | predicted: BasketballDunk actual: BasketballDunk 162 | accuracy: 0.8607809847198642 163 | Extracting frames from video: ./very_large_data/UCF-101\Bowling\v_Bowling_g04_c03.avi 164 | predicted: BenchPress actual: Bowling 165 | accuracy: 0.8593220338983051 166 | Extracting frames from video: ./very_large_data/UCF-101\BaseballPitch\v_BaseballPitch_g19_c01.avi 167 | predicted: BaseballPitch actual: BaseballPitch 168 | accuracy: 0.8595600676818951 169 | Extracting frames from video: ./very_large_data/UCF-101\Archery\v_Archery_g18_c03.avi 170 | predicted: Archery actual: Archery 171 | accuracy: 0.8597972972972973 172 | Extracting frames from video: ./very_large_data/UCF-101\Bowling\v_Bowling_g19_c03.avi 173 | ... 174 | ``` 175 | 176 | # Evaluation 177 | 178 | 20 classes from UCF101 is used to train the video classifier. 20 epochs are set for the training 179 | 180 | 181 | ### Evaluate VGG16+LSTM (top included for VGG16) 182 | 183 | Below is the train history for the VGG16+LSTM (top included for VGG16): 184 | 185 | ![vgg16-lstm-history](demo/reports/UCF-101/vgg16-lstm-history.png) 186 | 187 | The LSTM with VGG16 (top included) feature extractor: (accuracy around 68.9% for training and 55% for validation) 188 | 189 | ### Evaluate VGG16+Bidirectional LSTM (top included for VGG16): 190 | 191 | Below is the train history for the VGG16+Bidirectional LSTM (top included for VGG16): 192 | 193 | ![vgg16-bidirectional-lstm-history](demo/reports/UCF-101/vgg16-bidirectional-lstm-history.png) 194 | 195 | The bidirectional LSTM with VGG16 (top included) feature extractor: (accuracy around 89% for training and 77% for validation) 196 | 197 | ### Evaluate VGG16+LSTM (top not included for VGG16) 198 | 199 | Below is the train history for the VGG16+LSTM (top not included for VGG16): 200 | 201 | ![vgg16-lstm-history](demo/reports/UCF-101/vgg16-lstm-hi-dim-history.png) 202 | 203 | The LSTM with VGG16 (top not included)feature extractor: (accuracy around 100% for training and 98.83% for validation) 204 | 205 | ### Evaluate VGG16+Bidirectional LSTM (top not included for VGG16) 206 | 207 | Below is the train history for the VGG16+LSTM (top not included for VGG16): 208 | 209 | ![vgg16-lstm-history](demo/reports/UCF-101/vgg16-bidirectional-lstm-hi-dim-history.png) 210 | 211 | The LSTM with VGG16 (top not included) feature extractor: (accuracy around 100% for training and 98.57% for validation) 212 | 213 | 214 | ### Evaluate Convolutional Network 215 | 216 | Below is the train history for the Convolutional Network: 217 | 218 | ![cnn-history](demo/reports/UCF-101/cnn-history.png) 219 | 220 | The Convolutional Network: (accuracy around 22.73% for training and 28.75% for validation) 221 | 222 | # Note 223 | 224 | ### Configure Keras to run on GPU on Windows 225 | 226 | * Step 1: Change tensorflow to tensorflow-gpu in requirements.txt and install tensorflow-gpu 227 | * Step 2: Download and install the [CUDA® Toolkit 9.0](https://developer.nvidia.com/cuda-90-download-archive) (Please note that 228 | currently CUDA® Toolkit 9.1 is not yet supported by tensorflow, therefore you should download CUDA® Toolkit 9.0) 229 | * Step 3: Download and unzip the [cuDNN 7.0.4 for CUDA@ Toolkit 9.0](https://developer.nvidia.com/cudnn) and add the 230 | bin folder of the unzipped directory to the $PATH of your Windows environment 231 | -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/__init__.py -------------------------------------------------------------------------------- /demo/cnn_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import os 4 | import sys 5 | 6 | K.set_image_dim_ordering('tf') 7 | 8 | 9 | def patch_path(path): 10 | return os.path.join(os.path.dirname(__file__), path) 11 | 12 | 13 | def main(): 14 | sys.path.append(patch_path('..')) 15 | 16 | data_dir_path = patch_path('very_large_data') 17 | model_dir_path = patch_path('models/UCF-101') 18 | 19 | from keras_video_classifier.library.convolutional import CnnVideoClassifier 20 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 21 | config_file_path = CnnVideoClassifier.get_config_file_path(model_dir_path) 22 | weight_file_path = CnnVideoClassifier.get_weight_file_path(model_dir_path) 23 | 24 | np.random.seed(42) 25 | 26 | load_ucf(data_dir_path) 27 | 28 | predictor = CnnVideoClassifier() 29 | predictor.load_model(config_file_path, weight_file_path) 30 | 31 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 32 | 33 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 34 | np.random.shuffle(video_file_path_list) 35 | 36 | for video_file_path in video_file_path_list: 37 | label = videos[video_file_path] 38 | predicted_label = predictor.predict(video_file_path) 39 | print('predicted: ' + predicted_label + ' actual: ' + label) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() -------------------------------------------------------------------------------- /demo/cnn_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import os 4 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 5 | 6 | from keras_video_classifier.library.convolutional import CnnVideoClassifier 7 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 8 | 9 | K.set_image_dim_ordering('tf') 10 | 11 | 12 | def patch_path(path): 13 | return os.path.join(os.path.dirname(__file__), path) 14 | 15 | 16 | def main(): 17 | data_set_name = 'UCF-101' 18 | input_dir_path = patch_path('very_large_data') 19 | output_dir_path = patch_path('models/' + data_set_name) 20 | report_dir_path = patch_path('reports/' + data_set_name) 21 | 22 | np.random.seed(42) 23 | 24 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 25 | load_ucf(input_dir_path) 26 | 27 | classifier = CnnVideoClassifier() 28 | 29 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, 30 | data_set_name=data_set_name, 31 | max_frames=10) 32 | 33 | plot_and_save_history(history, CnnVideoClassifier.model_name, 34 | report_dir_path + '/' + CnnVideoClassifier.model_name + '-history.png') 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /demo/models/UCF-101/.gitignore: -------------------------------------------------------------------------------- 1 | vgg16-lstm-hi-dim-weights.h5 2 | vgg16-bidirectional-lstm-hi-dim-weights.h5 -------------------------------------------------------------------------------- /demo/models/UCF-101/cnn-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "batch_input_shape": [null, 240, 240, 20], "dtype": "float32", "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "filters": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_2", "trainable": true, "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.25, "noise_shape": null, "seed": null}}, {"class_name": "Conv2D", "config": {"name": "conv2d_3", "trainable": true, "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_3", "trainable": true, "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_3", "trainable": true, "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Conv2D", "config": {"name": "conv2d_4", "trainable": true, "filters": 64, "kernel_size": [3, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_4", "trainable": true, "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_4", "trainable": true, "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "rate": 0.25, "noise_shape": null, "seed": null}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_5", "trainable": true, "activation": "relu"}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 4, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_6", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/cnn-config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/cnn-config.npy -------------------------------------------------------------------------------- /demo/models/UCF-101/cnn-weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/cnn-weights.h5 -------------------------------------------------------------------------------- /demo/models/UCF-101/conv3d-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "batch_input_shape": [null, 23, 1000], "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.5, "recurrent_dropout": 0.0, "implementation": 1}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 6, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/conv3d-weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/conv3d-weights.h5 -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-bidirectional-lstm-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "batch_input_shape": [null, 7, 1000], "dtype": "float32", "layer": {"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 512, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, "merge_mode": "concat"}}, {"class_name": "Bidirectional", "config": {"name": "bidirectional_2", "trainable": true, "layer": {"class_name": "LSTM", "config": {"name": "lstm_2", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 10, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, "merge_mode": "concat"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 20, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-bidirectional-lstm-config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-bidirectional-lstm-config.npy -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-bidirectional-lstm-hi-dim-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "batch_input_shape": [null, 7, 25088], "dtype": "float32", "layer": {"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 512, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, "merge_mode": "concat"}}, {"class_name": "Bidirectional", "config": {"name": "bidirectional_2", "trainable": true, "layer": {"class_name": "LSTM", "config": {"name": "lstm_2", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 10, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, "merge_mode": "concat"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 20, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-bidirectional-lstm-hi-dim-config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-bidirectional-lstm-hi-dim-config.npy -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-bidirectional-lstm-weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-bidirectional-lstm-weights.h5 -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-lstm-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "batch_input_shape": [null, null, 1000], "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 512, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.5, "recurrent_dropout": 0.0, "implementation": 1}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 20, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-lstm-config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-lstm-config.npy -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-lstm-hi-dim-architecture.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "batch_input_shape": [null, null, 25088], "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 512, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.5, "recurrent_dropout": 0.0, "implementation": 1}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.5, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 20, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "softmax"}}], "keras_version": "2.1.2", "backend": "tensorflow"} -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-lstm-hi-dim-config.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-lstm-hi-dim-config.npy -------------------------------------------------------------------------------- /demo/models/UCF-101/vgg16-lstm-weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/models/UCF-101/vgg16-lstm-weights.h5 -------------------------------------------------------------------------------- /demo/reports/UCF-101/cnn-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/reports/UCF-101/cnn-history.png -------------------------------------------------------------------------------- /demo/reports/UCF-101/vgg16-bidirectional-lstm-hi-dim-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/reports/UCF-101/vgg16-bidirectional-lstm-hi-dim-history.png -------------------------------------------------------------------------------- /demo/reports/UCF-101/vgg16-bidirectional-lstm-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/reports/UCF-101/vgg16-bidirectional-lstm-history.png -------------------------------------------------------------------------------- /demo/reports/UCF-101/vgg16-lstm-hi-dim-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/reports/UCF-101/vgg16-lstm-hi-dim-history.png -------------------------------------------------------------------------------- /demo/reports/UCF-101/vgg16-lstm-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/demo/reports/UCF-101/vgg16-lstm-history.png -------------------------------------------------------------------------------- /demo/very_large_data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /demo/vgg16_bidirectional_lstm_hi_dim_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | 5 | 6 | def main(): 7 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 10 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 11 | 12 | vgg16_include_top = False 13 | data_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 14 | model_dir_path = os.path.join(os.path.dirname(__file__), 'models/UCF-101') 15 | 16 | config_file_path = VGG16BidirectionalLSTMVideoClassifier.get_config_file_path(model_dir_path, 17 | vgg16_include_top=vgg16_include_top) 18 | weight_file_path = VGG16BidirectionalLSTMVideoClassifier.get_weight_file_path(model_dir_path, 19 | vgg16_include_top=vgg16_include_top) 20 | 21 | np.random.seed(42) 22 | 23 | load_ucf(data_dir_path) 24 | 25 | predictor = VGG16BidirectionalLSTMVideoClassifier() 26 | predictor.load_model(config_file_path, weight_file_path) 27 | 28 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 29 | 30 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 31 | np.random.shuffle(video_file_path_list) 32 | 33 | correct_count = 0 34 | count = 0 35 | 36 | for video_file_path in video_file_path_list: 37 | label = videos[video_file_path] 38 | predicted_label = predictor.predict(video_file_path) 39 | print('predicted: ' + predicted_label + ' actual: ' + label) 40 | correct_count = correct_count + 1 if label == predicted_label else correct_count 41 | count += 1 42 | accuracy = correct_count / count 43 | print('accuracy: ', accuracy) 44 | 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /demo/vgg16_bidirectional_lstm_hi_dim_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import sys 4 | import os 5 | 6 | 7 | def main(): 8 | K.set_image_dim_ordering('tf') 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | 11 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 12 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 13 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 14 | 15 | data_set_name = 'UCF-101' 16 | input_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 17 | output_dir_path = os.path.join(os.path.dirname(__file__), 'models', data_set_name) 18 | report_dir_path = os.path.join(os.path.dirname(__file__), 'reports', data_set_name) 19 | 20 | np.random.seed(42) 21 | 22 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 23 | load_ucf(input_dir_path) 24 | 25 | classifier = VGG16BidirectionalLSTMVideoClassifier() 26 | 27 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, vgg16_include_top=False, 28 | data_set_name=data_set_name) 29 | 30 | plot_and_save_history(history, VGG16BidirectionalLSTMVideoClassifier.model_name, 31 | report_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-hi-dim-history.png') 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /demo/vgg16_bidirectional_lstm_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | 5 | 6 | def main(): 7 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 10 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 11 | 12 | vgg16_include_top = True 13 | data_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 14 | model_dir_path = os.path.join(os.path.dirname(__file__), 'models', 'UCF-101') 15 | config_file_path = VGG16BidirectionalLSTMVideoClassifier.get_config_file_path(model_dir_path, 16 | vgg16_include_top=vgg16_include_top) 17 | weight_file_path = VGG16BidirectionalLSTMVideoClassifier.get_weight_file_path(model_dir_path, 18 | vgg16_include_top=vgg16_include_top) 19 | 20 | np.random.seed(42) 21 | 22 | load_ucf(data_dir_path) 23 | 24 | predictor = VGG16BidirectionalLSTMVideoClassifier() 25 | predictor.load_model(config_file_path, weight_file_path) 26 | 27 | print('reaching here three') 28 | 29 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 30 | 31 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 32 | np.random.shuffle(video_file_path_list) 33 | 34 | correct_count = 0 35 | count = 0 36 | 37 | for video_file_path in video_file_path_list: 38 | label = videos[video_file_path] 39 | predicted_label = predictor.predict(video_file_path) 40 | print('predicted: ' + predicted_label + ' actual: ' + label) 41 | correct_count = correct_count + 1 if label == predicted_label else correct_count 42 | count += 1 43 | accuracy = correct_count / count 44 | print('accuracy: ', accuracy) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /demo/vgg16_bidirectional_lstm_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import os 4 | import sys 5 | 6 | 7 | def patch_path(path): 8 | return os.path.join(os.path.dirname(__file__), path) 9 | 10 | 11 | def main(): 12 | K.set_image_dim_ordering('tf') 13 | sys.path.append(patch_path('..')) 14 | 15 | from keras_video_classifier.library.recurrent_networks import VGG16BidirectionalLSTMVideoClassifier 16 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 17 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 18 | 19 | data_set_name = 'UCF-101' 20 | input_dir_path = patch_path('very_large_data') 21 | output_dir_path = patch_path('models/' + data_set_name) 22 | report_dir_path = patch_path('reports/' + data_set_name) 23 | 24 | np.random.seed(42) 25 | 26 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 27 | load_ucf(input_dir_path) 28 | 29 | classifier = VGG16BidirectionalLSTMVideoClassifier() 30 | 31 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, data_set_name=data_set_name) 32 | 33 | plot_and_save_history(history, VGG16BidirectionalLSTMVideoClassifier.model_name, 34 | report_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-history.png') 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /demo/vgg16_lstm_hi_dim_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import sys 4 | import os 5 | 6 | 7 | def main(): 8 | K.set_image_dim_ordering('tf') 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | 11 | from keras_video_classifier.library.recurrent_networks import VGG16LSTMVideoClassifier 12 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 13 | 14 | vgg16_include_top = False 15 | data_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 16 | model_dir_path = os.path.join(os.path.dirname(__file__), 'models', 'UCF-101') 17 | config_file_path = VGG16LSTMVideoClassifier.get_config_file_path(model_dir_path, 18 | vgg16_include_top=vgg16_include_top) 19 | weight_file_path = VGG16LSTMVideoClassifier.get_weight_file_path(model_dir_path, 20 | vgg16_include_top=vgg16_include_top) 21 | 22 | np.random.seed(42) 23 | 24 | load_ucf(data_dir_path) 25 | 26 | predictor = VGG16LSTMVideoClassifier() 27 | predictor.load_model(config_file_path, weight_file_path) 28 | 29 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 30 | 31 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 32 | np.random.shuffle(video_file_path_list) 33 | 34 | correct_count = 0 35 | count = 0 36 | 37 | for video_file_path in video_file_path_list: 38 | label = videos[video_file_path] 39 | predicted_label = predictor.predict(video_file_path) 40 | print('predicted: ' + predicted_label + ' actual: ' + label) 41 | correct_count = correct_count + 1 if label == predicted_label else correct_count 42 | count += 1 43 | accuracy = correct_count / count 44 | print('accuracy: ', accuracy) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /demo/vgg16_lstm_hi_dim_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | K.set_image_dim_ordering('tf') 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | 11 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 12 | from keras_video_classifier.library.recurrent_networks import VGG16LSTMVideoClassifier 13 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 14 | 15 | data_set_name = 'UCF-101' 16 | input_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 17 | output_dir_path = os.path.join(os.path.dirname(__file__), 'models', data_set_name) 18 | report_dir_path = os.path.join(os.path.dirname(__file__), 'reports', data_set_name) 19 | 20 | np.random.seed(42) 21 | 22 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 23 | load_ucf(input_dir_path) 24 | 25 | classifier = VGG16LSTMVideoClassifier() 26 | 27 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, vgg16_include_top=False, 28 | data_set_name=data_set_name) 29 | 30 | plot_and_save_history(history, VGG16LSTMVideoClassifier.model_name, 31 | report_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-hi-dim-history.png') 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /demo/vgg16_lstm_predict.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import sys 4 | import os 5 | 6 | 7 | def main(): 8 | K.set_image_dim_ordering('tf') 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | 11 | from keras_video_classifier.library.recurrent_networks import VGG16LSTMVideoClassifier 12 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf, scan_ucf_with_labels 13 | 14 | vgg16_include_top = True 15 | data_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 16 | model_dir_path = os.path.join(os.path.dirname(__file__), 'models', 'UCF-101') 17 | config_file_path = VGG16LSTMVideoClassifier.get_config_file_path(model_dir_path, 18 | vgg16_include_top=vgg16_include_top) 19 | weight_file_path = VGG16LSTMVideoClassifier.get_weight_file_path(model_dir_path, 20 | vgg16_include_top=vgg16_include_top) 21 | 22 | np.random.seed(42) 23 | 24 | load_ucf(data_dir_path) 25 | 26 | predictor = VGG16LSTMVideoClassifier() 27 | predictor.load_model(config_file_path, weight_file_path) 28 | 29 | videos = scan_ucf_with_labels(data_dir_path, [label for (label, label_index) in predictor.labels.items()]) 30 | 31 | video_file_path_list = np.array([file_path for file_path in videos.keys()]) 32 | np.random.shuffle(video_file_path_list) 33 | 34 | correct_count = 0 35 | count = 0 36 | 37 | for video_file_path in video_file_path_list: 38 | label = videos[video_file_path] 39 | predicted_label = predictor.predict(video_file_path) 40 | print('predicted: ' + predicted_label + ' actual: ' + label) 41 | correct_count = correct_count + 1 if label == predicted_label else correct_count 42 | count += 1 43 | accuracy = correct_count / count 44 | print('accuracy: ', accuracy) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /demo/vgg16_lstm_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | import sys 4 | import os 5 | 6 | 7 | def main(): 8 | K.set_image_dim_ordering('tf') 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | 11 | from keras_video_classifier.library.utility.plot_utils import plot_and_save_history 12 | from keras_video_classifier.library.recurrent_networks import VGG16LSTMVideoClassifier 13 | from keras_video_classifier.library.utility.ucf.UCF101_loader import load_ucf 14 | 15 | data_set_name = 'UCF-101' 16 | input_dir_path = os.path.join(os.path.dirname(__file__), 'very_large_data') 17 | output_dir_path = os.path.join(os.path.dirname(__file__), 'models', data_set_name) 18 | report_dir_path = os.path.join(os.path.dirname(__file__), 'reports', data_set_name) 19 | 20 | np.random.seed(42) 21 | 22 | # this line downloads the video files of UCF-101 dataset if they are not available in the very_large_data folder 23 | load_ucf(input_dir_path) 24 | 25 | classifier = VGG16LSTMVideoClassifier() 26 | 27 | history = classifier.fit(data_dir_path=input_dir_path, model_dir_path=output_dir_path, data_set_name=data_set_name) 28 | 29 | plot_and_save_history(history, VGG16LSTMVideoClassifier.model_name, 30 | report_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-history.png') 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /keras_video_classifier/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/keras_video_classifier/__init__.py -------------------------------------------------------------------------------- /keras_video_classifier/library/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/keras_video_classifier/library/__init__.py -------------------------------------------------------------------------------- /keras_video_classifier/library/convolutional.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import Sequential 3 | from keras.callbacks import ModelCheckpoint 4 | from keras.layers import Conv2D, Activation, MaxPooling2D, Dropout, Flatten, Dense 5 | from keras.utils import np_utils 6 | from sklearn.model_selection import train_test_split 7 | from keras.utils.vis_utils import plot_model 8 | 9 | from keras_video_classifier.library.utility.frame_extractors.frame_extractor import scan_and_extract_videos_for_conv2d, \ 10 | extract_videos_for_conv2d 11 | 12 | BATCH_SIZE = 32 13 | NUM_EPOCHS = 20 14 | 15 | 16 | def generate_batch(x_samples, y_samples): 17 | num_batches = len(x_samples) // BATCH_SIZE 18 | 19 | while True: 20 | for batchIdx in range(0, num_batches): 21 | start = batchIdx * BATCH_SIZE 22 | end = (batchIdx + 1) * BATCH_SIZE 23 | yield np.array(x_samples[start:end]), y_samples[start:end] 24 | 25 | 26 | class CnnVideoClassifier(object): 27 | model_name = 'cnn' 28 | 29 | def __init__(self): 30 | self.img_width = None 31 | self.img_height = None 32 | self.img_channels = None 33 | self.nb_classes = None 34 | self.labels = None 35 | self.labels_idx2word = None 36 | self.model = None 37 | self.expected_frames = None 38 | self.config = None 39 | 40 | def create_model(self, input_shape, nb_classes): 41 | model = Sequential() 42 | model.add(Conv2D(filters=32, input_shape=input_shape, padding='same', kernel_size=(3, 3))) 43 | model.add(Activation('relu')) 44 | model.add(MaxPooling2D(pool_size=(2, 2))) 45 | 46 | model.add(Conv2D(filters=32, padding='same', kernel_size=(3, 3))) 47 | model.add(Activation('relu')) 48 | model.add(MaxPooling2D(pool_size=(2, 2))) 49 | 50 | model.add(Dropout(rate=0.25)) 51 | 52 | model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='same')) 53 | model.add(Activation('relu')) 54 | model.add(MaxPooling2D(pool_size=(2, 2))) 55 | 56 | model.add(Conv2D(filters=64, padding='same', kernel_size=(3, 3))) 57 | model.add(Activation('relu')) 58 | model.add(MaxPooling2D(pool_size=(2, 2))) 59 | 60 | model.add(Dropout(rate=0.25)) 61 | 62 | model.add(Flatten()) 63 | model.add(Dense(units=512)) 64 | model.add(Activation('relu')) 65 | model.add(Dropout(rate=0.5)) 66 | model.add(Dense(units=nb_classes)) 67 | model.add(Activation('softmax')) 68 | 69 | model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) 70 | 71 | return model 72 | 73 | @staticmethod 74 | def get_config_file_path(model_dir_path): 75 | return model_dir_path + '/' + CnnVideoClassifier.model_name + '-config.npy' 76 | 77 | @staticmethod 78 | def get_weight_file_path(model_dir_path): 79 | return model_dir_path + '/' + CnnVideoClassifier.model_name + '-weights.h5' 80 | 81 | @staticmethod 82 | def get_architecture_file_path(model_dir_path): 83 | return model_dir_path + '/' + CnnVideoClassifier.model_name + '-architecture.json' 84 | 85 | def load_model(self, config_file_path, weight_file_path): 86 | 87 | config = np.load(config_file_path).item() 88 | self.img_width = config['img_width'] 89 | self.img_height = config['img_height'] 90 | self.nb_classes = config['nb_classes'] 91 | self.labels = config['labels'] 92 | self.expected_frames = config['expected_frames'] 93 | self.labels_idx2word = dict([(idx, word) for word, idx in self.labels.items()]) 94 | self.config = config 95 | 96 | self.model = self.create_model( 97 | input_shape=(self.img_width, self.img_height, self.expected_frames), 98 | nb_classes=self.nb_classes) 99 | self.model.load_weights(weight_file_path) 100 | 101 | def predict(self, video_file_path): 102 | x = extract_videos_for_conv2d(video_file_path, None, self.expected_frames) 103 | frames = x.shape[2] 104 | if frames > self.expected_frames: 105 | x = x[:, :, 0:self.expected_frames] 106 | elif frames < self.expected_frames: 107 | temp = np.zeros(shape=(x.shape[0], x.shape[1], self.expected_frames)) 108 | temp[:, :, 0:frames] = x 109 | x = temp 110 | predicted_class = np.argmax(self.model.predict(np.array([x]))[0]) 111 | predicted_label = self.labels_idx2word[predicted_class] 112 | return predicted_label 113 | 114 | def fit(self, data_dir_path, model_dir_path, epochs=NUM_EPOCHS, data_set_name='UCF-101', max_frames=10, 115 | test_size=0.3, 116 | random_state=42): 117 | 118 | config_file_path = self.get_config_file_path(model_dir_path) 119 | weight_file_path = self.get_weight_file_path(model_dir_path) 120 | architecture_file_path = self.get_architecture_file_path(model_dir_path) 121 | 122 | self.labels = dict() 123 | x_samples, y_samples = scan_and_extract_videos_for_conv2d(data_dir_path, 124 | max_frames=max_frames, 125 | data_set_name=data_set_name) 126 | self.img_width, self.img_height, _ = x_samples[0].shape 127 | frames_list = [] 128 | for x in x_samples: 129 | frames = x.shape[2] 130 | frames_list.append(frames) 131 | max_frames = max(frames, max_frames) 132 | self.expected_frames = int(np.mean(frames_list)) 133 | print('max frames: ', max_frames) 134 | print('expected frames: ', self.expected_frames) 135 | for i in range(len(x_samples)): 136 | x = x_samples[i] 137 | frames = x.shape[2] 138 | if frames > self.expected_frames: 139 | x = x[:, :, 0:self.expected_frames] 140 | x_samples[i] = x 141 | elif frames < self.expected_frames: 142 | temp = np.zeros(shape=(x.shape[0], x.shape[1], self.expected_frames)) 143 | temp[:, :, 0:frames] = x 144 | x_samples[i] = temp 145 | for y in y_samples: 146 | if y not in self.labels: 147 | self.labels[y] = len(self.labels) 148 | print(self.labels) 149 | for i in range(len(y_samples)): 150 | y_samples[i] = self.labels[y_samples[i]] 151 | 152 | self.nb_classes = len(self.labels) 153 | 154 | y_samples = np_utils.to_categorical(y_samples, self.nb_classes) 155 | 156 | config = dict() 157 | config['labels'] = self.labels 158 | config['nb_classes'] = self.nb_classes 159 | config['img_width'] = self.img_width 160 | config['img_height'] = self.img_height 161 | config['expected_frames'] = self.expected_frames 162 | 163 | print(config) 164 | 165 | self.config = config 166 | 167 | np.save(config_file_path, config) 168 | 169 | model = self.create_model(input_shape=(self.img_width, self.img_height, self.expected_frames), 170 | nb_classes=self.nb_classes) 171 | open(architecture_file_path, 'w').write(model.to_json()) 172 | 173 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(x_samples, y_samples, test_size=test_size, 174 | random_state=random_state) 175 | 176 | train_gen = generate_batch(Xtrain, Ytrain) 177 | test_gen = generate_batch(Xtest, Ytest) 178 | 179 | train_num_batches = len(Xtrain) // BATCH_SIZE 180 | test_num_batches = len(Xtest) // BATCH_SIZE 181 | 182 | print('start fit_generator') 183 | 184 | checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=True) 185 | history = model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches, 186 | epochs=epochs, 187 | verbose=1, validation_data=test_gen, validation_steps=test_num_batches, 188 | callbacks=[checkpoint]) 189 | model.save_weights(weight_file_path) 190 | 191 | return history 192 | 193 | def save_graph(self, to_file): 194 | plot_model(self.model, to_file=to_file) 195 | 196 | -------------------------------------------------------------------------------- /keras_video_classifier/library/recurrent_networks.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Activation, Dropout, Bidirectional 2 | from keras.layers.recurrent import LSTM 3 | from keras.models import Sequential 4 | from keras.applications.vgg16 import VGG16 5 | from keras.optimizers import SGD 6 | from keras import backend as K 7 | from keras.utils import np_utils 8 | from sklearn.model_selection import train_test_split 9 | from keras.callbacks import ModelCheckpoint 10 | import os 11 | import numpy as np 12 | 13 | from keras_video_classifier.library.utility.frame_extractors.vgg16_feature_extractor import extract_vgg16_features_live, \ 14 | scan_and_extract_vgg16_features 15 | 16 | BATCH_SIZE = 64 17 | NUM_EPOCHS = 20 18 | VERBOSE = 1 19 | HIDDEN_UNITS = 512 20 | MAX_ALLOWED_FRAMES = 20 21 | EMBEDDING_SIZE = 100 22 | 23 | K.set_image_dim_ordering('tf') 24 | 25 | 26 | def generate_batch(x_samples, y_samples): 27 | num_batches = len(x_samples) // BATCH_SIZE 28 | 29 | while True: 30 | for batchIdx in range(0, num_batches): 31 | start = batchIdx * BATCH_SIZE 32 | end = (batchIdx + 1) * BATCH_SIZE 33 | yield np.array(x_samples[start:end]), y_samples[start:end] 34 | 35 | 36 | class VGG16BidirectionalLSTMVideoClassifier(object): 37 | model_name = 'vgg16-bidirectional-lstm' 38 | 39 | def __init__(self): 40 | self.num_input_tokens = None 41 | self.nb_classes = None 42 | self.labels = None 43 | self.labels_idx2word = None 44 | self.model = None 45 | self.vgg16_model = None 46 | self.expected_frames = None 47 | self.vgg16_include_top = True 48 | self.config = None 49 | 50 | def create_model(self): 51 | model = Sequential() 52 | model.add(Bidirectional(LSTM(units=HIDDEN_UNITS, return_sequences=True), 53 | input_shape=(self.expected_frames, self.num_input_tokens))) 54 | model.add(Bidirectional(LSTM(10))) 55 | model.add(Dense(512, activation='relu')) 56 | model.add(Dropout(0.5)) 57 | 58 | model.add(Dense(self.nb_classes)) 59 | 60 | model.add(Activation('softmax')) 61 | 62 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 63 | 64 | return model 65 | 66 | @staticmethod 67 | def get_config_file_path(model_dir_path, vgg16_include_top=None): 68 | if vgg16_include_top is None: 69 | vgg16_include_top = True 70 | if vgg16_include_top: 71 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-config.npy' 72 | else: 73 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-hi-dim-config.npy' 74 | 75 | @staticmethod 76 | def get_weight_file_path(model_dir_path, vgg16_include_top=None): 77 | if vgg16_include_top is None: 78 | vgg16_include_top = True 79 | if vgg16_include_top: 80 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-weights.h5' 81 | else: 82 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-hi-dim-weights.h5' 83 | 84 | @staticmethod 85 | def get_architecture_file_path(model_dir_path, vgg16_include_top=None): 86 | if vgg16_include_top is None: 87 | vgg16_include_top = True 88 | if vgg16_include_top: 89 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-architecture.json' 90 | else: 91 | return model_dir_path + '/' + VGG16BidirectionalLSTMVideoClassifier.model_name + '-hi-dim-architecture.json' 92 | 93 | def load_model(self, config_file_path, weight_file_path): 94 | if os.path.exists(config_file_path): 95 | print('loading configuration from ', config_file_path) 96 | else: 97 | raise ValueError('cannot locate config file {}'.format(config_file_path)) 98 | 99 | config = np.load(config_file_path).item() 100 | self.num_input_tokens = config['num_input_tokens'] 101 | self.nb_classes = config['nb_classes'] 102 | self.labels = config['labels'] 103 | self.expected_frames = config['expected_frames'] 104 | self.vgg16_include_top = config['vgg16_include_top'] 105 | self.labels_idx2word = dict([(idx, word) for word, idx in self.labels.items()]) 106 | self.config = config 107 | 108 | self.model = self.create_model() 109 | if os.path.exists(weight_file_path): 110 | print('loading network weights from ', weight_file_path) 111 | else: 112 | raise ValueError('cannot local weight file {}'.format(weight_file_path)) 113 | 114 | self.model.load_weights(weight_file_path) 115 | 116 | print('build vgg16 with pre-trained model') 117 | vgg16_model = VGG16(include_top=self.vgg16_include_top, weights='imagenet') 118 | vgg16_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) 119 | self.vgg16_model = vgg16_model 120 | 121 | def predict(self, video_file_path): 122 | x = extract_vgg16_features_live(self.vgg16_model, video_file_path) 123 | frames = x.shape[0] 124 | if frames > self.expected_frames: 125 | x = x[0:self.expected_frames, :] 126 | elif frames < self.expected_frames: 127 | temp = np.zeros(shape=(self.expected_frames, x.shape[1])) 128 | temp[0:frames, :] = x 129 | x = temp 130 | predicted_class = np.argmax(self.model.predict(np.array([x]))[0]) 131 | predicted_label = self.labels_idx2word[predicted_class] 132 | return predicted_label 133 | 134 | def fit(self, data_dir_path, model_dir_path, vgg16_include_top=True, data_set_name='UCF-101', test_size=0.3, 135 | random_state=42): 136 | 137 | self.vgg16_include_top = vgg16_include_top 138 | 139 | config_file_path = self.get_config_file_path(model_dir_path, vgg16_include_top) 140 | weight_file_path = self.get_weight_file_path(model_dir_path, vgg16_include_top) 141 | architecture_file_path = self.get_architecture_file_path(model_dir_path, vgg16_include_top) 142 | 143 | self.vgg16_model = VGG16(include_top=self.vgg16_include_top, weights='imagenet') 144 | self.vgg16_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) 145 | 146 | feature_dir_name = data_set_name + '-VGG16-Features' 147 | if not vgg16_include_top: 148 | feature_dir_name = data_set_name + '-VGG16-HiDimFeatures' 149 | max_frames = 0 150 | self.labels = dict() 151 | x_samples, y_samples = scan_and_extract_vgg16_features(data_dir_path, 152 | output_dir_path=feature_dir_name, 153 | model=self.vgg16_model, 154 | data_set_name=data_set_name) 155 | self.num_input_tokens = x_samples[0].shape[1] 156 | frames_list = [] 157 | for x in x_samples: 158 | frames = x.shape[0] 159 | frames_list.append(frames) 160 | max_frames = max(frames, max_frames) 161 | self.expected_frames = int(np.mean(frames_list)) 162 | print('max frames: ', max_frames) 163 | print('expected frames: ', self.expected_frames) 164 | for i in range(len(x_samples)): 165 | x = x_samples[i] 166 | frames = x.shape[0] 167 | if frames > self.expected_frames: 168 | x = x[0:self.expected_frames, :] 169 | x_samples[i] = x 170 | elif frames < self.expected_frames: 171 | temp = np.zeros(shape=(self.expected_frames, x.shape[1])) 172 | temp[0:frames, :] = x 173 | x_samples[i] = temp 174 | for y in y_samples: 175 | if y not in self.labels: 176 | self.labels[y] = len(self.labels) 177 | print(self.labels) 178 | for i in range(len(y_samples)): 179 | y_samples[i] = self.labels[y_samples[i]] 180 | 181 | self.nb_classes = len(self.labels) 182 | 183 | y_samples = np_utils.to_categorical(y_samples, self.nb_classes) 184 | 185 | config = dict() 186 | config['labels'] = self.labels 187 | config['nb_classes'] = self.nb_classes 188 | config['num_input_tokens'] = self.num_input_tokens 189 | config['expected_frames'] = self.expected_frames 190 | config['vgg16_include_top'] = self.vgg16_include_top 191 | 192 | self.config = config 193 | 194 | np.save(config_file_path, config) 195 | 196 | model = self.create_model() 197 | open(architecture_file_path, 'w').write(model.to_json()) 198 | 199 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(x_samples, y_samples, test_size=test_size, 200 | random_state=random_state) 201 | 202 | train_gen = generate_batch(Xtrain, Ytrain) 203 | test_gen = generate_batch(Xtest, Ytest) 204 | 205 | train_num_batches = len(Xtrain) // BATCH_SIZE 206 | test_num_batches = len(Xtest) // BATCH_SIZE 207 | 208 | checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=True) 209 | history = model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches, 210 | epochs=NUM_EPOCHS, 211 | verbose=1, validation_data=test_gen, validation_steps=test_num_batches, 212 | callbacks=[checkpoint]) 213 | model.save_weights(weight_file_path) 214 | 215 | return history 216 | 217 | 218 | class VGG16LSTMVideoClassifier(object): 219 | model_name = 'vgg16-lstm' 220 | 221 | def __init__(self): 222 | self.num_input_tokens = None 223 | self.nb_classes = None 224 | self.labels = None 225 | self.labels_idx2word = None 226 | self.model = None 227 | self.vgg16_model = None 228 | self.expected_frames = None 229 | self.vgg16_include_top = None 230 | self.config = None 231 | 232 | @staticmethod 233 | def get_config_file_path(model_dir_path, vgg16_include_top=None): 234 | if vgg16_include_top is None: 235 | vgg16_include_top = True 236 | if vgg16_include_top: 237 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-config.npy' 238 | else: 239 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-hi-dim-config.npy' 240 | 241 | @staticmethod 242 | def get_weight_file_path(model_dir_path, vgg16_include_top=None): 243 | if vgg16_include_top is None: 244 | vgg16_include_top = True 245 | if vgg16_include_top: 246 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-weights.h5' 247 | else: 248 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-hi-dim-weights.h5' 249 | 250 | @staticmethod 251 | def get_architecture_file_path(model_dir_path, vgg16_include_top=None): 252 | if vgg16_include_top is None: 253 | vgg16_include_top = True 254 | if vgg16_include_top: 255 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-architecture.json' 256 | else: 257 | return model_dir_path + '/' + VGG16LSTMVideoClassifier.model_name + '-hi-dim-architecture.json' 258 | 259 | def create_model(self): 260 | model = Sequential() 261 | 262 | model.add( 263 | LSTM(units=HIDDEN_UNITS, input_shape=(None, self.num_input_tokens), return_sequences=False, dropout=0.5)) 264 | model.add(Dense(512, activation='relu')) 265 | model.add(Dropout(0.5)) 266 | model.add(Dense(self.nb_classes)) 267 | model.add(Activation('softmax')) 268 | 269 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 270 | return model 271 | 272 | def load_model(self, config_file_path, weight_file_path): 273 | 274 | config = np.load(config_file_path).item() 275 | self.num_input_tokens = config['num_input_tokens'] 276 | self.nb_classes = config['nb_classes'] 277 | self.labels = config['labels'] 278 | self.expected_frames = config['expected_frames'] 279 | self.vgg16_include_top = config['vgg16_include_top'] 280 | self.labels_idx2word = dict([(idx, word) for word, idx in self.labels.items()]) 281 | 282 | self.model = self.create_model() 283 | self.model.load_weights(weight_file_path) 284 | 285 | vgg16_model = VGG16(include_top=self.vgg16_include_top, weights='imagenet') 286 | vgg16_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) 287 | self.vgg16_model = vgg16_model 288 | 289 | def predict(self, video_file_path): 290 | x = extract_vgg16_features_live(self.vgg16_model, video_file_path) 291 | frames = x.shape[0] 292 | if frames > self.expected_frames: 293 | x = x[0:self.expected_frames, :] 294 | elif frames < self.expected_frames: 295 | temp = np.zeros(shape=(self.expected_frames, x.shape[1])) 296 | temp[0:frames, :] = x 297 | x = temp 298 | predicted_class = np.argmax(self.model.predict(np.array([x]))[0]) 299 | predicted_label = self.labels_idx2word[predicted_class] 300 | return predicted_label 301 | 302 | def fit(self, data_dir_path, model_dir_path, vgg16_include_top=True, data_set_name='UCF-101', test_size=0.3, random_state=42): 303 | self.vgg16_include_top = vgg16_include_top 304 | 305 | config_file_path = self.get_config_file_path(model_dir_path, vgg16_include_top) 306 | weight_file_path = self.get_weight_file_path(model_dir_path, vgg16_include_top) 307 | architecture_file_path = self.get_architecture_file_path(model_dir_path, vgg16_include_top) 308 | 309 | vgg16_model = VGG16(include_top=self.vgg16_include_top, weights='imagenet') 310 | vgg16_model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) 311 | self.vgg16_model = vgg16_model 312 | 313 | feature_dir_name = data_set_name + '-VGG16-Features' 314 | if not vgg16_include_top: 315 | feature_dir_name = data_set_name + '-VGG16-HiDimFeatures' 316 | max_frames = 0 317 | self.labels = dict() 318 | x_samples, y_samples = scan_and_extract_vgg16_features(data_dir_path, 319 | output_dir_path=feature_dir_name, 320 | model=self.vgg16_model, 321 | data_set_name=data_set_name) 322 | self.num_input_tokens = x_samples[0].shape[1] 323 | frames_list = [] 324 | for x in x_samples: 325 | frames = x.shape[0] 326 | frames_list.append(frames) 327 | max_frames = max(frames, max_frames) 328 | self.expected_frames = int(np.mean(frames_list)) 329 | print('max frames: ', max_frames) 330 | print('expected frames: ', self.expected_frames) 331 | for i in range(len(x_samples)): 332 | x = x_samples[i] 333 | frames = x.shape[0] 334 | print(x.shape) 335 | if frames > self.expected_frames: 336 | x = x[0:self.expected_frames, :] 337 | x_samples[i] = x 338 | elif frames < self.expected_frames: 339 | temp = np.zeros(shape=(self.expected_frames, x.shape[1])) 340 | temp[0:frames, :] = x 341 | x_samples[i] = temp 342 | for y in y_samples: 343 | if y not in self.labels: 344 | self.labels[y] = len(self.labels) 345 | print(self.labels) 346 | for i in range(len(y_samples)): 347 | y_samples[i] = self.labels[y_samples[i]] 348 | 349 | self.nb_classes = len(self.labels) 350 | 351 | y_samples = np_utils.to_categorical(y_samples, self.nb_classes) 352 | 353 | config = dict() 354 | config['labels'] = self.labels 355 | config['nb_classes'] = self.nb_classes 356 | config['num_input_tokens'] = self.num_input_tokens 357 | config['expected_frames'] = self.expected_frames 358 | config['vgg16_include_top'] = self.vgg16_include_top 359 | self.config = config 360 | 361 | np.save(config_file_path, config) 362 | 363 | model = self.create_model() 364 | open(architecture_file_path, 'w').write(model.to_json()) 365 | 366 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(x_samples, y_samples, test_size=test_size, 367 | random_state=random_state) 368 | 369 | train_gen = generate_batch(Xtrain, Ytrain) 370 | test_gen = generate_batch(Xtest, Ytest) 371 | 372 | train_num_batches = len(Xtrain) // BATCH_SIZE 373 | test_num_batches = len(Xtest) // BATCH_SIZE 374 | 375 | checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=True) 376 | history = model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches, 377 | epochs=NUM_EPOCHS, 378 | verbose=1, validation_data=test_gen, validation_steps=test_num_batches, 379 | callbacks=[checkpoint]) 380 | model.save_weights(weight_file_path) 381 | 382 | return history 383 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/keras_video_classifier/library/utility/__init__.py -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/device_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from keras import backend as K 4 | 5 | # Make sure that you have tensorflow-gpu installed if you want o use init_devices('gpu') 6 | 7 | 8 | def init_devices(device_type=None): 9 | if device_type is None: 10 | device_type = 'cpu' 11 | 12 | num_cores = 4 13 | 14 | if device_type == 'gpu': 15 | num_GPU = 1 16 | num_CPU = 1 17 | else: 18 | num_CPU = 1 19 | num_GPU = 0 20 | 21 | config = tf.ConfigProto(intra_op_parallelism_threads=num_cores, 22 | inter_op_parallelism_threads=num_cores, allow_soft_placement=True, 23 | device_count={'CPU': num_CPU, 'GPU': num_GPU}) 24 | session = tf.Session(config=config) 25 | K.set_session(session) 26 | 27 | 28 | def print_devices(): 29 | from tensorflow.python.client import device_lib 30 | print(device_lib.list_local_devices()) 31 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/frame_extractors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/keras_video_classifier/library/utility/frame_extractors/__init__.py -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/frame_extractors/frame_extractor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | 5 | MAX_NB_CLASSES = 4 6 | 7 | 8 | def extract_images(video_input_file_path, image_output_dir_path): 9 | if os.path.exists(image_output_dir_path): 10 | return 11 | count = 0 12 | print('Extracting frames from video: ', video_input_file_path) 13 | vidcap = cv2.VideoCapture(video_input_file_path) 14 | success, image = vidcap.read() 15 | success = True 16 | while success: 17 | vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) # added this line 18 | success, image = vidcap.read() 19 | # print('Read a new frame: ', success) 20 | if success: 21 | cv2.imwrite(image_output_dir_path + os.path.sep + "frame%d.jpg" % count, image) # save frame as JPEG file 22 | count = count + 1 23 | 24 | 25 | def extract_features(video_input_file_path, feature_output_file_path): 26 | if os.path.exists(feature_output_file_path): 27 | return np.load(feature_output_file_path) 28 | count = 0 29 | print('Extracting frames from video: ', video_input_file_path) 30 | vidcap = cv2.VideoCapture(video_input_file_path) 31 | success, image = vidcap.read() 32 | features = [] 33 | success = True 34 | while success: 35 | vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) # added this line 36 | success, image = vidcap.read() 37 | # print('Read a new frame: ', success) 38 | if success: 39 | img = cv2.resize(image, (40, 40), interpolation=cv2.INTER_AREA) 40 | features.append(image) 41 | count = count + 1 42 | unscaled_features = np.array(features) 43 | print(unscaled_features.shape) 44 | np.save(feature_output_file_path, unscaled_features) 45 | return unscaled_features 46 | 47 | 48 | def extract_videos_for_conv2d(video_input_file_path, feature_output_file_path, max_frames): 49 | if feature_output_file_path is not None: 50 | if os.path.exists(feature_output_file_path): 51 | return np.load(feature_output_file_path) 52 | count = 0 53 | print('Extracting frames from video: ', video_input_file_path) 54 | vidcap = cv2.VideoCapture(video_input_file_path) 55 | success, image = vidcap.read() 56 | features = [] 57 | success = True 58 | while success and count < max_frames: 59 | vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) # added this line 60 | success, image = vidcap.read() 61 | # print('Read a new frame: ', success) 62 | if success: 63 | image = cv2.resize(image, (240, 240), interpolation=cv2.INTER_AREA) 64 | channels = image.shape[2] 65 | for channel in range(channels): 66 | features.append(image[:, :, channel]) 67 | count = count + 1 68 | unscaled_features = np.array(features) 69 | unscaled_features = np.transpose(unscaled_features, axes=(1, 2, 0)) 70 | print(unscaled_features.shape) 71 | if feature_output_file_path is not None: 72 | np.save(feature_output_file_path, unscaled_features) 73 | return unscaled_features 74 | 75 | 76 | def scan_and_extract_images(data_dir_path): 77 | input_data_dir_path = data_dir_path + '/UCF-101' 78 | output_frame_data_dir_path = data_dir_path + '/UCF-101-Frames' 79 | 80 | if not os.path.exists(output_frame_data_dir_path): 81 | os.makedirs(output_frame_data_dir_path) 82 | 83 | dir_count = 0 84 | for f in os.listdir(input_data_dir_path): 85 | file_path = input_data_dir_path + os.path.sep + f 86 | if not os.path.isfile(file_path): 87 | output_dir_name = f 88 | output_dir_path = output_frame_data_dir_path + os.path.sep + output_dir_name 89 | if not os.path.exists(output_dir_path): 90 | os.makedirs(output_dir_path) 91 | dir_count += 1 92 | for ff in os.listdir(file_path): 93 | video_file_path = file_path + os.path.sep + ff 94 | output_image_folder_path = output_dir_path + os.path.sep + ff.split('.')[0] 95 | if not os.path.exists(output_image_folder_path): 96 | os.makedirs(output_image_folder_path) 97 | extract_images(video_file_path, output_image_folder_path) 98 | if dir_count == MAX_NB_CLASSES: 99 | break 100 | 101 | 102 | def scan_and_extract_features(data_dir_path, data_set_name=None): 103 | if data_set_name is None: 104 | data_set_name = 'UCF-101' 105 | input_data_dir_path = data_dir_path + '/' + data_set_name 106 | output_feature_data_dir_path = data_dir_path + '/' + data_set_name + '-Features' 107 | 108 | if not os.path.exists(output_feature_data_dir_path): 109 | os.makedirs(output_feature_data_dir_path) 110 | 111 | y_samples = [] 112 | x_samples = [] 113 | 114 | dir_count = 0 115 | for f in os.listdir(input_data_dir_path): 116 | file_path = input_data_dir_path + os.path.sep + f 117 | if not os.path.isfile(file_path): 118 | output_dir_name = f 119 | output_dir_path = output_feature_data_dir_path + os.path.sep + output_dir_name 120 | if not os.path.exists(output_dir_path): 121 | os.makedirs(output_dir_path) 122 | dir_count += 1 123 | for ff in os.listdir(file_path): 124 | video_file_path = file_path + os.path.sep + ff 125 | output_feature_file_path = output_dir_path + os.path.sep + ff.split('.')[0] + '.npy' 126 | x = extract_features(video_file_path, output_feature_file_path) 127 | y = f 128 | y_samples.append(y) 129 | x_samples.append(x) 130 | 131 | if dir_count == MAX_NB_CLASSES: 132 | break 133 | 134 | return x_samples, y_samples 135 | 136 | 137 | def scan_and_extract_videos_for_conv2d(data_dir_path, data_set_name=None, max_frames=None): 138 | if data_set_name is None: 139 | data_set_name = 'UCF-101' 140 | if max_frames is None: 141 | max_frames = 10 142 | 143 | input_data_dir_path = data_dir_path + '/' + data_set_name 144 | output_feature_data_dir_path = data_dir_path + '/' + data_set_name + '-Conv2d' 145 | 146 | if not os.path.exists(output_feature_data_dir_path): 147 | os.makedirs(output_feature_data_dir_path) 148 | 149 | y_samples = [] 150 | x_samples = [] 151 | 152 | dir_count = 0 153 | for f in os.listdir(input_data_dir_path): 154 | file_path = input_data_dir_path + os.path.sep + f 155 | if not os.path.isfile(file_path): 156 | output_dir_name = f 157 | output_dir_path = output_feature_data_dir_path + os.path.sep + output_dir_name 158 | if not os.path.exists(output_dir_path): 159 | os.makedirs(output_dir_path) 160 | dir_count += 1 161 | for ff in os.listdir(file_path): 162 | video_file_path = file_path + os.path.sep + ff 163 | output_feature_file_path = output_dir_path + os.path.sep + ff.split('.')[0] + '.npy' 164 | x = extract_videos_for_conv2d(video_file_path, output_feature_file_path, max_frames) 165 | y = f 166 | y_samples.append(y) 167 | x_samples.append(x) 168 | 169 | if dir_count == MAX_NB_CLASSES: 170 | break 171 | 172 | return x_samples, y_samples 173 | 174 | 175 | def main(): 176 | print(cv2.__version__) 177 | data_dir_path = '.././very_large_data' 178 | X, Y = scan_and_extract_videos_for_conv2d(data_dir_path) 179 | print(X[0].shape) 180 | 181 | 182 | if __name__ == '__main__': 183 | main() 184 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/frame_extractors/vgg16_feature_extractor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | from keras.applications.vgg16 import VGG16, preprocess_input 5 | from keras.preprocessing.image import img_to_array 6 | from keras.optimizers import SGD 7 | 8 | MAX_NB_CLASSES = 20 9 | 10 | 11 | def extract_vgg16_features_live(model, video_input_file_path): 12 | print('Extracting frames from video: ', video_input_file_path) 13 | vidcap = cv2.VideoCapture(video_input_file_path) 14 | success, image = vidcap.read() 15 | features = [] 16 | success = True 17 | count = 0 18 | while success: 19 | vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) # added this line 20 | success, image = vidcap.read() 21 | # print('Read a new frame: ', success) 22 | if success: 23 | img = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA) 24 | input = img_to_array(img) 25 | input = np.expand_dims(input, axis=0) 26 | input = preprocess_input(input) 27 | feature = model.predict(input).ravel() 28 | features.append(feature) 29 | count = count + 1 30 | unscaled_features = np.array(features) 31 | return unscaled_features 32 | 33 | 34 | def extract_vgg16_features(model, video_input_file_path, feature_output_file_path): 35 | if os.path.exists(feature_output_file_path): 36 | return np.load(feature_output_file_path) 37 | count = 0 38 | print('Extracting frames from video: ', video_input_file_path) 39 | vidcap = cv2.VideoCapture(video_input_file_path) 40 | success, image = vidcap.read() 41 | features = [] 42 | success = True 43 | while success: 44 | vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) # added this line 45 | success, image = vidcap.read() 46 | # print('Read a new frame: ', success) 47 | if success: 48 | img = cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA) 49 | input = img_to_array(img) 50 | input = np.expand_dims(input, axis=0) 51 | input = preprocess_input(input) 52 | feature = model.predict(input).ravel() 53 | features.append(feature) 54 | count = count + 1 55 | unscaled_features = np.array(features) 56 | np.save(feature_output_file_path, unscaled_features) 57 | return unscaled_features 58 | 59 | 60 | def scan_and_extract_vgg16_features(data_dir_path, output_dir_path, model=None, data_set_name=None): 61 | if data_set_name is None: 62 | data_set_name = 'UCF-101' 63 | 64 | input_data_dir_path = data_dir_path + '/' + data_set_name 65 | output_feature_data_dir_path = data_dir_path + '/' + output_dir_path 66 | 67 | if model is None: 68 | model = VGG16(include_top=True, weights='imagenet') 69 | model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) 70 | 71 | if not os.path.exists(output_feature_data_dir_path): 72 | os.makedirs(output_feature_data_dir_path) 73 | 74 | y_samples = [] 75 | x_samples = [] 76 | 77 | dir_count = 0 78 | for f in os.listdir(input_data_dir_path): 79 | file_path = input_data_dir_path + os.path.sep + f 80 | if not os.path.isfile(file_path): 81 | output_dir_name = f 82 | output_dir_path = output_feature_data_dir_path + os.path.sep + output_dir_name 83 | if not os.path.exists(output_dir_path): 84 | os.makedirs(output_dir_path) 85 | dir_count += 1 86 | for ff in os.listdir(file_path): 87 | video_file_path = file_path + os.path.sep + ff 88 | output_feature_file_path = output_dir_path + os.path.sep + ff.split('.')[0] + '.npy' 89 | x = extract_vgg16_features(model, video_file_path, output_feature_file_path) 90 | y = f 91 | y_samples.append(y) 92 | x_samples.append(x) 93 | 94 | if dir_count == MAX_NB_CLASSES: 95 | break 96 | 97 | return x_samples, y_samples 98 | 99 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/plot_utils.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | import itertools 4 | 5 | 6 | def plot_confusion_matrix(cm, classes, 7 | normalize=False, 8 | title='Confusion matrix', 9 | cmap=plt.cm.Blues): 10 | """ 11 | See full source and example: 12 | http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html 13 | 14 | This function prints and plots the confusion matrix. 15 | Normalization can be applied by setting `normalize=True`. 16 | """ 17 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 18 | plt.title(title) 19 | plt.colorbar() 20 | tick_marks = np.arange(len(classes)) 21 | plt.xticks(tick_marks, classes, rotation=45) 22 | plt.yticks(tick_marks, classes) 23 | 24 | if normalize: 25 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 26 | print("Normalized confusion matrix") 27 | else: 28 | print('Confusion matrix, without normalization') 29 | 30 | thresh = cm.max() / 2. 31 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 32 | plt.text(j, i, cm[i, j], 33 | horizontalalignment="center", 34 | color="white" if cm[i, j] > thresh else "black") 35 | 36 | plt.tight_layout() 37 | plt.ylabel('True label') 38 | plt.xlabel('Predicted label') 39 | plt.show() 40 | 41 | 42 | def most_informative_feature_for_binary_classification(vectorizer, classifier, n=100): 43 | """ 44 | See: https://stackoverflow.com/a/26980472 45 | 46 | Identify most important features if given a vectorizer and binary classifier. Set n to the number 47 | of weighted features you would like to show. (Note: current implementation merely prints and does not 48 | return top classes.) 49 | """ 50 | 51 | class_labels = classifier.classes_ 52 | feature_names = vectorizer.get_feature_names() 53 | topn_class1 = sorted(zip(classifier.coef_[0], feature_names))[:n] 54 | topn_class2 = sorted(zip(classifier.coef_[0], feature_names))[-n:] 55 | 56 | for coef, feat in topn_class1: 57 | print(class_labels[0], coef, feat) 58 | 59 | print() 60 | 61 | for coef, feat in reversed(topn_class2): 62 | print(class_labels[1], coef, feat) 63 | 64 | 65 | def plot_history_2win(history): 66 | plt.subplot(211) 67 | plt.title('Accuracy') 68 | plt.plot(history.history['acc'], color='g', label='Train') 69 | plt.plot(history.history['val_acc'], color='b', label='Validation') 70 | plt.legend(loc='best') 71 | 72 | plt.subplot(212) 73 | plt.title('Loss') 74 | plt.plot(history.history['loss'], color='g', label='Train') 75 | plt.plot(history.history['val_loss'], color='b', label='Validation') 76 | plt.legend(loc='best') 77 | 78 | plt.tight_layout() 79 | plt.show() 80 | 81 | 82 | def create_history_plot(history, model_name, metrics=None): 83 | plt.title('Accuracy and Loss (' + model_name + ')') 84 | if metrics is None: 85 | metrics = {'acc', 'loss'} 86 | if 'acc' in metrics: 87 | plt.plot(history.history['acc'], color='g', label='Train Accuracy') 88 | plt.plot(history.history['val_acc'], color='b', label='Validation Accuracy') 89 | if 'loss' in metrics: 90 | plt.plot(history.history['loss'], color='r', label='Train Loss') 91 | plt.plot(history.history['val_loss'], color='m', label='Validation Loss') 92 | plt.legend(loc='best') 93 | 94 | plt.tight_layout() 95 | 96 | 97 | def plot_history(history, model_name): 98 | create_history_plot(history, model_name) 99 | plt.show() 100 | 101 | 102 | def plot_and_save_history(history, model_name, file_path, metrics=None): 103 | if metrics is None: 104 | metrics = {'acc', 'loss'} 105 | create_history_plot(history, model_name, metrics) 106 | plt.savefig(file_path) 107 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/ucf/UCF101_loader.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import os 3 | import sys 4 | import patoolib 5 | 6 | URL_LINK = 'http://crcv.ucf.edu/data/UCF101/UCF101.rar' 7 | 8 | 9 | def reporthook(block_num, block_size, total_size): 10 | read_so_far = block_num * block_size 11 | if total_size > 0: 12 | percent = read_so_far * 1e2 / total_size 13 | s = "\r%5.1f%% %*d / %d" % ( 14 | percent, len(str(total_size)), read_so_far, total_size) 15 | sys.stderr.write(s) 16 | if read_so_far >= total_size: # near the end 17 | sys.stderr.write("\n") 18 | else: # total size is unknown 19 | sys.stderr.write("read %d\n" % (read_so_far,)) 20 | 21 | 22 | def download_ucf(data_dir_path): 23 | ucf_rar = data_dir_path + '/UCF101.rar' 24 | 25 | if not os.path.exists(data_dir_path): 26 | os.makedirs(data_dir_path) 27 | 28 | if not os.path.exists(ucf_rar): 29 | print('ucf file does not exist, downloading from internet') 30 | urllib.request.urlretrieve(url=URL_LINK, filename=ucf_rar, 31 | reporthook=reporthook) 32 | 33 | print('unzipping ucf file') 34 | patoolib.extract_archive(ucf_rar, outdir=data_dir_path) 35 | 36 | 37 | def scan_ucf(data_dir_path, limit): 38 | input_data_dir_path = data_dir_path + '/UCF-101' 39 | 40 | result = dict() 41 | 42 | dir_count = 0 43 | for f in os.listdir(input_data_dir_path): 44 | file_path = input_data_dir_path + os.path.sep + f 45 | if not os.path.isfile(file_path): 46 | dir_count += 1 47 | for ff in os.listdir(file_path): 48 | video_file_path = file_path + os.path.sep + ff 49 | result[video_file_path] = f 50 | if dir_count == limit: 51 | break 52 | return result 53 | 54 | 55 | def scan_ucf_with_labels(data_dir_path, labels): 56 | input_data_dir_path = data_dir_path + '/UCF-101' 57 | 58 | result = dict() 59 | 60 | dir_count = 0 61 | for label in labels: 62 | file_path = input_data_dir_path + os.path.sep + label 63 | if not os.path.isfile(file_path): 64 | dir_count += 1 65 | for ff in os.listdir(file_path): 66 | video_file_path = file_path + os.path.sep + ff 67 | result[video_file_path] = label 68 | return result 69 | 70 | 71 | 72 | def load_ucf(data_dir_path): 73 | UFC101_data_dir_path = data_dir_path + "/UCF-101" 74 | if not os.path.exists(UFC101_data_dir_path): 75 | download_ucf(data_dir_path) 76 | 77 | 78 | def main(): 79 | data_dir_path = '../very_large_data' 80 | load_ucf(data_dir_path) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /keras_video_classifier/library/utility/ucf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chen0040/keras-video-classifier/062e5cd4f730de347a4d835d56da1909b859c974/keras_video_classifier/library/utility/ucf/__init__.py -------------------------------------------------------------------------------- /notes/ReadMe.md: -------------------------------------------------------------------------------- 1 | # DataSet 2 | 3 | * [UCF101 - Action Recognition Data Set](http://crcv.ucf.edu/data/UCF101.php) 4 | * [CV Datasets on the web](http://www.cvpapers.com/datasets.html) 5 | * [Deep Learning Datasets](http://deeplearning.net/datasets/) 6 | 7 | # Install Some Libraries 8 | 9 | To install opencv3 on Windows with Anaconda: 10 | 11 | If you are using python 3.5 and below, install opencv3 using the following command: 12 | 13 | ```bash 14 | conda install -c menpo opencv3 15 | ``` 16 | 17 | If you are using python 3.6, install opencv using the following command: 18 | 19 | ```bash 20 | pip install opencv-python 21 | ``` 22 | 23 | # Split Data for Training and Test 24 | 25 | ```python 26 | 27 | import numpy as np 28 | from keras.models import Sequential 29 | from keras.layers import Dense 30 | from sklearn.model_selection import StratifiedKFold 31 | 32 | seed = 7 33 | np.random.seed(seed) 34 | 35 | X = [] 36 | Y = [] 37 | 38 | kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed) 39 | for train, test in kfold.split(X, Y): 40 | model = Sequential() 41 | model.add(Dense(64, input_dim=12, activation='relu')) 42 | model.add(Dense(1)) 43 | model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) 44 | model.fit(X[train], Y[train], epochs=10, verbose=1) 45 | 46 | ``` 47 | 48 | # Model Evaluation 49 | 50 | ### Classification Model Evaluation 51 | 52 | * The confusion matrix, which is a breakdown of predictions into a table showing correct predictions and the types of incorrect predictions made. Ideally, you will only see numbers in the diagonal, which means that all your predictions were correct! 53 | * Precision is a measure of a classifier’s exactness. The higher the precision, the more accurate the classifier. 54 | * Recall is a measure of a classifier’s completeness. The higher the recall, the more cases the classifier covers. 55 | * The F1 Score or F-score is a weighted average of precision and recall. 56 | * The Kappa or Cohen’s kappa is the classification accuracy normalized by the imbalance of the classes in the data. 57 | 58 | ```python 59 | 60 | # Import the modules from `sklearn.metrics` 61 | from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score 62 | 63 | y_test = [[0, 0, 1], [0, 1, 0]] 64 | y_pred = [[1, 0, 0], [0, 1, 0]] 65 | 66 | # Confusion matrix 67 | confusion_matrix(y_test, y_pred) 68 | 69 | # Precision 70 | precision_score(y_test, y_pred) 71 | 72 | # Recall 73 | recall_score(y_test, y_pred) 74 | 75 | # F1 score 76 | f1_score(y_test,y_pred) 77 | 78 | # Cohen's kappa 79 | cohen_kappa_score(y_test, y_pred) 80 | ``` 81 | 82 | ### Regression Model Evaluation 83 | 84 | * R2 85 | * MSE: mean squared error 86 | * MAE: mean absolute error 87 | 88 | ```python 89 | from sklearn.metrics import r2_score 90 | 91 | y_test = [[0, 0, 1], [0, 1, 0]] 92 | y_pred = [[1, 0, 0], [0, 1, 0]] 93 | 94 | r2_score(y_test, y_pred) 95 | 96 | ``` 97 | 98 | -------------------------------------------------------------------------------- /requirements-on-my-python-env.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.1.13 2 | alabaster==0.7.10 3 | amqp==2.1.4 4 | anaconda-client==1.6.3 5 | anaconda-navigator==1.6.2 6 | anaconda-project==0.6.0 7 | asn1crypto==0.22.0 8 | astor==0.6.2 9 | astroid==1.4.9 10 | astropy==1.3.2 11 | Babel==2.4.0 12 | backports.shutil-get-terminal-size==1.0.0 13 | backports.weakref==1.0rc1 14 | beautifulsoup4==4.6.0 15 | billiard==3.5.0.2 16 | bitarray==0.8.1 17 | blaze==0.10.1 18 | bleach==1.5.0 19 | bokeh==0.12.5 20 | boto==2.46.1 21 | Bottleneck==1.2.1 22 | bz2file==0.98 23 | celery==4.1.0 24 | cffi==1.10.0 25 | chardet==3.0.3 26 | click==6.7 27 | cloudpickle==0.2.2 28 | clyent==1.2.2 29 | colorama==0.3.9 30 | comtypes==1.1.2 31 | conda==4.4.1 32 | contextlib2==0.5.5 33 | cryptography==1.8.1 34 | cycler==0.10.0 35 | Cython==0.25.2 36 | cytoolz==0.8.2 37 | dask==0.14.3 38 | datashape==0.5.4 39 | decorator==4.0.11 40 | distributed==1.16.3 41 | docutils==0.13.1 42 | entrypoints==0.2.2 43 | enum34==1.1.6 44 | et-xmlfile==1.0.1 45 | fastcache==1.0.2 46 | Flask==0.12.2 47 | Flask-Cors==3.0.2 48 | gast==0.2.0 49 | gensim==2.3.0 50 | gevent==1.2.2 51 | greenlet==0.4.12 52 | grpcio==1.10.1 53 | h5py==2.7.1 54 | HeapDict==1.0.0 55 | html5lib==0.9999999 56 | idna==2.5 57 | imagesize==0.7.1 58 | ipykernel==4.6.1 59 | ipython==5.3.0 60 | ipython-genutils==0.2.0 61 | ipywidgets==6.0.0 62 | isort==4.2.5 63 | itsdangerous==0.24 64 | jdcal==1.3 65 | jedi==0.10.2 66 | Jinja2==2.9.6 67 | jsonschema==2.6.0 68 | jupyter==1.0.0 69 | jupyter-client==5.0.1 70 | jupyter-console==5.1.0 71 | jupyter-core==4.3.0 72 | Keras==2.1.2 73 | kombu==4.1.0 74 | lazy-object-proxy==1.2.2 75 | llvmlite==0.18.0 76 | locket==0.2.0 77 | lxml==3.7.3 78 | Markdown==2.6.11 79 | MarkupSafe==0.23 80 | matplotlib==2.1.1 81 | menuinst==1.4.7 82 | mistune==0.7.4 83 | mpmath==0.19 84 | msgpack-python==0.4.8 85 | multipledispatch==0.4.9 86 | navigator-updater==0.1.0 87 | nbconvert==5.1.1 88 | nbformat==4.3.0 89 | networkx==1.11 90 | nltk==3.2.5 91 | nose==1.3.7 92 | notebook==5.0.0 93 | numba==0.33.0 94 | numexpr==2.6.4 95 | numpy==1.13.3 96 | numpydoc==0.6.0 97 | odo==0.5.0 98 | olefile==0.44 99 | opencv-python==3.4.0.12 100 | openpyxl==2.4.7 101 | packaging==16.8 102 | pandas==0.20.1 103 | pandocfilters==1.4.1 104 | partd==0.3.8 105 | path.py==10.3.1 106 | pathlib2==2.2.1 107 | patool==1.12 108 | patsy==0.4.1 109 | pep8==1.7.0 110 | pickleshare==0.7.4 111 | Pillow==4.3.0 112 | plot==0.6.1 113 | ply==3.10 114 | prompt-toolkit==1.0.14 115 | protobuf==3.4.1 116 | psutil==5.2.2 117 | py==1.4.33 118 | pycosat==0.6.3 119 | pycparser==2.17 120 | pycrypto==2.6.1 121 | pycurl==7.43.0 122 | pyflakes==1.5.0 123 | pygame==1.9.3 124 | Pygments==2.2.0 125 | pylint==1.6.4 126 | pyodbc==4.0.16 127 | pyOpenSSL==17.0.0 128 | pyparsing==2.1.4 129 | pytest==3.0.7 130 | pytest-runner==2.12.1 131 | python-dateutil==2.6.0 132 | pytz==2017.2 133 | PyWavelets==0.5.2 134 | pywin32==220 135 | PyYAML==3.12 136 | pyzmq==16.0.2 137 | QtAwesome==0.4.4 138 | qtconsole==4.3.0 139 | QtPy==1.2.1 140 | requests==2.14.2 141 | rope-py3k==0.9.4.post1 142 | scikit-image==0.13.0 143 | scikit-learn==0.19.1 144 | scipy==1.0.0 145 | seaborn==0.7.1 146 | simplegeneric==0.8.1 147 | simplejson==3.11.1 148 | singledispatch==3.4.0.3 149 | six==1.10.0 150 | smart-open==1.5.3 151 | snowballstemmer==1.2.1 152 | sortedcollections==0.5.3 153 | sortedcontainers==1.5.7 154 | sphinx==1.5.6 155 | spyder==3.1.4 156 | SQLAlchemy==1.1.9 157 | statsmodels==0.8.0 158 | sympy==1.0 159 | tables==3.2.2 160 | tblib==1.3.2 161 | tensorboard==1.6.0 162 | tensorflow==1.4.0 163 | tensorflow-gpu==1.1.0 164 | tensorflow-tensorboard==0.4.0 165 | termcolor==1.1.0 166 | testpath==0.3 167 | Theano==0.9.0 168 | toolz==0.8.2 169 | torch==0.3.0b0+591e73e 170 | tornado==4.5.1 171 | traitlets==4.3.2 172 | typing==3.6.1 173 | unicodecsv==0.14.1 174 | vine==1.1.4 175 | wcwidth==0.1.7 176 | Werkzeug==0.12.2 177 | widgetsnbextension==2.0.0 178 | win-unicode-console==0.5 179 | wrapt==1.10.10 180 | xlrd==1.0.0 181 | XlsxWriter==0.9.6 182 | xlwings==0.10.4 183 | xlwt==1.2.0 184 | zict==0.1.2 185 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | keras==2.1.2 2 | numpy==1.13.3 3 | h5py==2.7.1 4 | pillow 5 | patool 6 | opencv-python 7 | scikit-learn 8 | tensorflow==1.4.0 9 | matplotlib 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='keras_video_classifier', 5 | packages=['keras_video_classifier'], 6 | include_package_data=True, 7 | install_requires=[ 8 | 'flask', 9 | 'keras', 10 | 'tensorflow', 11 | 'numpy', 12 | 'matplotlib', 13 | 'opencv-python', 14 | 'h5py', 15 | 'scikit-learn' 16 | ], 17 | setup_requires=[ 18 | 'pytest-runner', 19 | ], 20 | tests_require=[ 21 | 'pytest', 22 | ], 23 | ) --------------------------------------------------------------------------------