├── .gitignore ├── LICENSE ├── README.md ├── calculate_video_level_scores.py ├── create_data_folders.sh ├── dataset └── .gitignore ├── docs ├── bibliography │ └── bibliography.bib ├── chapterheader.sty ├── chapters │ ├── 01_introduction.tex │ ├── 02_taxonomy.tex │ ├── 03_problem_description.tex │ ├── 04_experimentation.tex │ └── 05_conclusions.tex ├── images │ ├── 2d_conv.pdf │ ├── 3d_conv.pdf │ ├── avenue-anomaly.png │ ├── boss-anomaly.png │ ├── cnn_lstm.pdf │ ├── cnn_lstm_violence.pdf │ ├── extractor_acc.pdf │ ├── extractor_loss.pdf │ ├── original_model.pdf │ ├── pr_overlay.pdf │ ├── roc-curve.jpg │ ├── roc_overlay.pdf │ ├── sdae_psvm.pdf │ ├── sketches │ │ ├── 2d_3d_conv.drawio │ │ ├── cnn-lstm.drawio │ │ ├── original_model.drawio │ │ └── taxonomy-steps.drawio │ ├── taxonomy_steps.pdf │ ├── ucf-examples │ │ ├── arson-abnormal.png │ │ ├── arson-normal.png │ │ ├── explosion-abnormal.png │ │ ├── explosion-normal.png │ │ ├── normal-1.png │ │ ├── normal-2.png │ │ ├── normal-3.png │ │ ├── normal-4.png │ │ ├── roadaccident-abnormal.png │ │ ├── roadaccident-normal.png │ │ ├── stealing-abnormal.png │ │ └── stealing-normal.png │ ├── ucsd-anomaly.png │ └── umn-anomaly.png ├── main.tex ├── memoria_TFM_Luque_Sanchez_Francisco.pdf ├── prefaces │ ├── cover.tex │ ├── english_abstract.tex │ ├── licensing.tex │ └── spanish_abstract.tex ├── slides │ ├── images │ │ ├── gifs │ │ │ ├── Assault049_x264.gif │ │ │ └── Stealing019_x264.gif │ │ ├── original-model.pdf │ │ ├── proposal.pdf │ │ ├── taxonomy-steps.pdf │ │ └── ucf │ │ │ ├── arson-abnormal.png │ │ │ ├── normal-1.png │ │ │ ├── roadaccident-abnormal.png │ │ │ └── stealing-abnormal.png │ ├── slides.pdf │ └── slides.tex └── variables.sty ├── original_model ├── README.md ├── c3d.py ├── calculate_metrics.py ├── classifier.py ├── compute_frames.py ├── configuration.py ├── display_predictions.py ├── extract_features.py ├── parameters.py ├── predict_test_set.py ├── preprocess_features.py ├── train_classifier.py ├── trained_models │ └── .gitignore └── utils │ ├── array_util.py │ ├── video_util.py │ └── visualization_util.py ├── overlay_curves.py ├── proposal ├── README.md ├── calculate_metrics.py ├── classifier.py ├── configuration.py ├── display_predictions.py ├── extract_temporal_features.py ├── models.py ├── parameters.py ├── predict_test_set.py ├── preprocess_features.py ├── train_classifier.py ├── train_feature_extractor.py ├── trained_models │ └── .gitignore ├── utils │ ├── array_util.py │ ├── video_util.py │ └── visualization_util.py └── video_data_generator.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Data folders 132 | dataset/ 133 | predictions_c3d/ 134 | processed_c3d_features/ 135 | raw_c3d_features/ 136 | processed_lstm_features/ 137 | raw_lstm_features/ 138 | raw_lstm_features_old/ 139 | predictions_lstm/ 140 | ucf101/ 141 | 142 | # Trained models folders 143 | original_model/trained_models/ 144 | proposal/trained_models/ 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning for Crowd Behavior Analysis in Videosurveillance 2 | 3 | Master's thesis in Data Sciences: Study on the use of Deep Learning 4 | for Crowd Behavior Analysis from videosurveillance 5 | sources. 6 | 7 | ## Documents 8 | 9 | Main report (in spanish) can be donwloaded from 10 | [here](https://github.com/fluque1995/tfm-anomaly-detection/blob/master/docs/memoria_TFM_Luque_Sanchez_Francisco.pdf). 11 | 12 | Slides used in the public defense (also in spanish) can be downloaded 13 | from 14 | [here](https://github.com/fluque1995/tfm-anomaly-detection/blob/master/docs/slides/slides.pdf). 15 | 16 | ## Theoretical study 17 | 18 | Theoretical study consists of a proposal of taxonomy for crowd 19 | behavior analysis, published on Information Fusion with the title 20 | _Revisiting crowd behavior analysis through deep learning: Taxonomy, 21 | anomaly detection, crowd emotions, datasets, opportunities and 22 | prospects_, which can be found in 23 | https://www.sciencedirect.com/science/article/pii/S1566253520303201. 24 | 25 | ## Experimental analysis 26 | 27 | In the experimental analysis, we have studied the usage of 28 | spatio-temporal features extracted by deep learning models for crowd 29 | anomaly detection. Specifically, we have proposed an enhancement over 30 | the model in _Real-world Anomaly Detection in Surveillance Videos_ 31 | (https://arxiv.org/abs/1801.04264). Instead of using 3D convolutional 32 | features, we propose a model which employs convolutional analysis for 33 | frames together with a recurrent network (specifically, an LSTM model) 34 | to learn the temporal structure of the convolutional features. 35 | 36 | Experiments show that our spatio-temporal extractor outperforms the 37 | original proposal by a decent margin, even when is pretrained on a 38 | smaller dataset for video classification. 39 | 40 | ### Baseline implementations 41 | 42 | This implementation, specially the original model replica (which 43 | can be found in `original_model` folder) strongly relies in 44 | these previous works: 45 | 46 | - https://github.com/WaqasSultani/AnomalyDetectionCVPR2018: Original 47 | implementation of the model 48 | - https://github.com/ptirupat/AnomalyDetection_CVPR18: Reimplementation 49 | of the original world using Keras 50 | - https://github.com/adamcasson/c3d: Implementation of C3D feature 51 | extractor in Keras using Tensorflow as backend 52 | 53 | The original model has been adapted in order to be self-contained in 54 | this repo and fully executable in Python. Original proposals rely on 55 | external resources and MATLAB for some of the executions, while our 56 | implementation is completely designed in Python, which ease the 57 | execution. 58 | 59 | ### Software requirements 60 | 61 | The project is completely written in Python 3, using the following 62 | libraries: 63 | 64 | - Keras 2.2.4 (TensorFlow GPU backend) 65 | - numpy 1.16.2 66 | - scipy 1.2.0 67 | - opencv_contrib_python 4.0.0.21 68 | - pandas 1.0.5 69 | - matplotlib 3.0.2 70 | - scikit_learn 0.23.2 71 | 72 | A requirements file is provided for `pip` installation. In order to 73 | install dependencies, navigate to the project root folder and execute: 74 | 75 | ``` shell 76 | pip install -r requirements.txt 77 | ``` 78 | 79 | ### Data folders structure and datasets 80 | 81 | In order to properly execute the models, some folders must be created 82 | in advance. Executing the script `create_data_folders.sh` at root 83 | project level will create the required folders with their default 84 | names. Also, datasets must be downloaded. In particular: 85 | 86 | - UCF-101 Dataset (https://www.crcv.ucf.edu/data/UCF101.php) is used 87 | to pretrain our feature extractor proposal. You can download the 88 | dataset with the proper folder structure for our experiments from 89 | [here](https://drive.google.com/file/d/1R2E9WjQS8c48S2z7mNTT8Gc1H1z2mnqP/view?usp=sharing) 90 | and place it into the root project folder 91 | - UCF-Crime Dataset (https://www.crcv.ucf.edu/projects/real-world/) is 92 | used for evaluation. We provide a curated version of the dataset 93 | with the proper train-test splits for anomaly detection, as we have 94 | used it in our experiments. In order to use the dataset, you should 95 | download the following files. The main dataset has been split in 96 | three parts due to its size: 97 | - [Test split](https://drive.google.com/file/d/1ynzUmzihaAZkLXJ9jzhppK0eLMskhh1F/view?usp=sharing): 98 | This file contains the test split, and should be decompressed in 99 | the folder `dataset/test`. 100 | - [Train split - normal 101 | videos](https://drive.google.com/file/d/1k63Qlfn3aU3_CpXxxAzPJ_hYqeSo38PP/view?usp=sharing): 102 | This file contains the normal videos for the train split. The 103 | videos contained in this file should be placed inside folder 104 | `dataset/train/normal` 105 | - [Train split - abnormal 106 | videos](https://drive.google.com/file/d/1Zv1CU7PxPDY5WyGc70Kt6SCDFqpjV7gX/view?usp=sharing): 107 | This file contains the abnormal videos for the train split. The 108 | videos contained in this file should be placed inside folder 109 | `dataset/train/abnormal` 110 | 111 | **WARNING**: Datasets are heavy, and models are resource-consuming. 112 | We strongly recommend using dedicated GPUs and computing nodes to 113 | replicate the experiments, since usual PCs are not capable of handling 114 | such volumes of data. 115 | 116 | ### Pretrained models 117 | 118 | We provide several pretrained models used in our experiments: 119 | 120 | - Models from the original proposal: These models represent the 121 | original feature extractor based on C3D and the two sets of weights 122 | for the classifier; the original trained model by the authors 123 | (`weights_L1L2.mat`) and the replica trained by us 124 | (`weights_own.mat`). These models can be downloaded from 125 | [here](https://drive.google.com/file/d/1s3qBXLZzMGAsmG8U0YTJJ4NOOK3KBakl/view?usp=sharing). 126 | The uncompressed folder must be placed in 127 | `original_model/trained_models` folder 128 | - Models from our proposal: These models represent our proposed 129 | extractor based on a spatio-temporal network and the classifier 130 | model trained by us. These models can be downloaded from 131 | [here](https://drive.google.com/file/d/1XJ8DLRSHowEA3JB2xAUQGOzTo1y0ofQj/view?usp=sharing). 132 | The uncompressed folder must be placed in `proposal/trained_models` 133 | folder 134 | 135 | ### Code structure 136 | 137 | Developed code is placed in two main folders, together with some 138 | scripts to calculate results: 139 | 140 | - `calculate_video_level_scores.py`: It calculates the percentage of 141 | normal and abnormal videos in which an alarm has been triggered. For 142 | normal videos, a lesser percentage means lesser false alarms, and 143 | thus a better model. For abnormal videos, a greater percentage means 144 | better capability of detection anomalies. 145 | - `overlay_curves.py`: This script computes the ROC and PR curves 146 | given the predictions of both models, and represents them in two 147 | different graphs (one for ROCs and one for PRs). 148 | - `original_model` folder: The code in this folder is prepared to 149 | replicate the original experiments, from feature extraction with C3D 150 | to training and evaluation of the anomaly classifier. 151 | - `proposal` folder: The code in this folder is prepared to replicate 152 | our experiments. There are scripts to train the feature extractor 153 | over UCF-101, extract features from UCF-Crime dataset using the 154 | pretrained extractor, train and evaluate the anomaly classifier. 155 | 156 | There is more information on how to reproduce the experiments in the 157 | README files inside each folder. 158 | -------------------------------------------------------------------------------- /calculate_video_level_scores.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | 4 | models = ["c3d", "lstm"] 5 | 6 | for model in models: 7 | normal_predictions_regex = "predictions_{}/Normal*".format(model) 8 | abnormal_predictions_regex = "predictions_{}/[!Normal]*".format(model) 9 | 10 | normal_predictions = glob.glob(normal_predictions_regex) 11 | abnormal_predictions = glob.glob(abnormal_predictions_regex) 12 | 13 | normal_pos_preds = 0 14 | normal_videos = 0 15 | for vid in normal_predictions: 16 | preds = np.load(vid) 17 | normal_videos += 1 18 | normal_pos_preds += np.max(np.round(preds)) 19 | 20 | abnormal_pos_preds = 0 21 | abnormal_videos = 0 22 | for vid in abnormal_predictions: 23 | preds = np.load(vid) 24 | abnormal_videos += 1 25 | abnormal_pos_preds += np.max(np.round(preds)) 26 | 27 | print("MODEL: {}".format(model)) 28 | print("Normal videos with positive labels: {} %".format( 29 | 100*normal_pos_preds/normal_videos)) 30 | 31 | print("Abnormal videos with positive labels: {} %".format( 32 | 100*abnormal_pos_preds/abnormal_videos)) 33 | -------------------------------------------------------------------------------- /create_data_folders.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Creating dataset folders..." 4 | mkdir -p dataset/{train/{abnormal,normal},test} 5 | echo "Creating features folders..." 6 | mkdir -p {raw,processed}_{c3d,lstm}_features/{train/{abnormal,normal},test} 7 | echo "Creating predictions folders..." 8 | mkdir -p predictions_{c3d,lstm} 9 | echo "Done" 10 | 11 | 12 | -------------------------------------------------------------------------------- /dataset/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/dataset/.gitignore -------------------------------------------------------------------------------- /docs/bibliography/bibliography.bib: -------------------------------------------------------------------------------- 1 | @book{ma2009intelligent, 2 | title={Intelligent video surveillance: systems and technology}, 3 | author={Ma, Yunqian and Qian, Gang}, 4 | year={2009}, 5 | publisher={CRC Press} 6 | } 7 | 8 | @article{zitouni2016advances, 9 | title={Advances and trends in visual crowd analysis: A systematic survey and evaluation of crowd modelling techniques}, 10 | author={Zitouni, M Sami and Bhaskar, Harish and Dias, J and Al-Mualla, Mohammed E}, 11 | journal={Neurocomputing}, 12 | volume={186}, 13 | pages={139--159}, 14 | year={2016}, 15 | publisher={Elsevier} 16 | } 17 | 18 | @inproceedings{swathi2017crowd, 19 | title={Crowd behavior analysis: A survey}, 20 | author={Swathi, HY and Shivakumar, G and Mohana, HS}, 21 | booktitle={2017 international conference on recent advances in electronics and communication technology (ICRAECT)}, 22 | pages={169--178}, 23 | year={2017}, 24 | organization={IEEE} 25 | } 26 | 27 | @article{nguyen2016human, 28 | title={Human detection from images and videos: A survey}, 29 | author={Nguyen, Duc Thanh and Li, Wanqing and Ogunbona, Philip O}, 30 | journal={Pattern Recognition}, 31 | volume={51}, 32 | pages={148--175}, 33 | year={2016}, 34 | publisher={Elsevier} 35 | } 36 | 37 | @inproceedings{garate2009crowd, 38 | title={Crowd event recognition using hog tracker}, 39 | author={Garate, Carolina and Bilinsky, Piotr and Bremond, Fran{\c{c}}ois}, 40 | booktitle={2009 Twelfth IEEE International Workshop on Performance Evaluation of Tracking and Surveillance}, 41 | pages={1--6}, 42 | year={2009}, 43 | organization={IEEE} 44 | } 45 | 46 | @article{ciaparrone2020deep, 47 | title={Deep learning in video multi-object tracking: A survey}, 48 | author={Ciaparrone, Gioele and S{\'a}nchez, Francisco Luque and Tabik, Siham and Troiano, Luigi and Tagliaferri, Roberto and Herrera, Francisco}, 49 | journal={Neurocomputing}, 50 | volume={381}, 51 | pages={61--88}, 52 | year={2020}, 53 | publisher={Elsevier} 54 | } 55 | 56 | @inproceedings{mahadevan2010anomaly, 57 | title={Anomaly detection in crowded scenes}, 58 | author={Mahadevan, Vijay and Li, Weixin and Bhalodia, Viral and Vasconcelos, Nuno}, 59 | booktitle={2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, 60 | pages={1975--1981}, 61 | year={2010}, 62 | organization={IEEE} 63 | } 64 | 65 | @inproceedings{lu2013abnormal, 66 | title={Abnormal event detection at 150 fps in matlab}, 67 | author={Lu, Cewu and Shi, Jianping and Jia, Jiaya}, 68 | booktitle={Proceedings of the IEEE international conference on computer vision}, 69 | pages={2720--2727}, 70 | year={2013} 71 | } 72 | 73 | @inproceedings{mehran2009abnormal, 74 | title={Abnormal crowd behavior detection using social force model}, 75 | author={Mehran, Ramin and Oyama, Alexis and Shah, Mubarak}, 76 | booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition}, 77 | pages={935--942}, 78 | year={2009}, 79 | organization={IEEE} 80 | } 81 | 82 | @inproceedings{liu2018future, 83 | title={Future frame prediction for anomaly detection--a new baseline}, 84 | author={Liu, Wen and Luo, Weixin and Lian, Dongze and Gao, Shenghua}, 85 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 86 | pages={6536--6545}, 87 | year={2018} 88 | } 89 | 90 | @article{blunsden2010behave, 91 | title={The BEHAVE video dataset: ground truthed video for multi-person behavior classification}, 92 | author={Blunsden, Scott and Fisher, RB}, 93 | journal={Annals of the BMVA}, 94 | volume={4}, 95 | number={1-12}, 96 | pages={4}, 97 | year={2010}, 98 | publisher={British Machine Vision Association} 99 | } 100 | 101 | @inproceedings{velastin2017people, 102 | title={People Detection and Pose Classification Inside a Moving Train Using Computer Vision}, 103 | author={Velastin, Sergio A and G{\'o}mez-Lira, Diego A}, 104 | booktitle={International Visual Informatics Conference}, 105 | pages={319--330}, 106 | year={2017}, 107 | organization={Springer} 108 | } 109 | 110 | @misc{ut-interaction, 111 | author = "Ryoo, M. S. and Aggarwal, J. K.", 112 | title = "{UT}-{I}nteraction {D}ataset, {ICPR} contest on {S}emantic {D}escription of {H}uman {A}ctivities ({SDHA})", 113 | year = "2010", 114 | url = {http://cvrc.ece.utexas.edu/SDHA2010/Human\_Interaction.html} 115 | } 116 | 117 | @inproceedings{sultani2018real, 118 | title={Real-world anomaly detection in surveillance videos}, 119 | author={Sultani, Waqas and Chen, Chen and Shah, Mubarak}, 120 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 121 | pages={6479--6488}, 122 | year={2018} 123 | } 124 | 125 | @inproceedings{nievas2011violence, 126 | title= {Movies Fight Detection Dataset}, 127 | author= {Nievas, Enrique Bermejo and Suarez, Oscar Deniz and Garcia, Gloria Bueno and Sukthankar, Rahul}, 128 | booktitle= {Computer Analysis of Images and Patterns}, 129 | pages= {332--339}, 130 | year= {2011}, 131 | organization= {Springer}, 132 | keywords= {action recognition, fight detection, video surveillance}, 133 | terms= {}, 134 | url= {http://visilab.etsii.uclm.es/personas/oscar/FightDetection/} 135 | } 136 | 137 | @inproceedings{nievas2011hockey, 138 | title= {Hockey Fight Detection Dataset}, 139 | author= {Nievas, Enrique Bermejo and Suarez, Oscar Deniz and Garcia, Gloria Bueno and Sukthankar, Rahul}, 140 | booktitle= {Computer Analysis of Images and Patterns}, 141 | pages= {332--339}, 142 | year= {2011}, 143 | organization= {Springer}, 144 | keywords= {action recognition, fight detection, video surveillance}, 145 | terms= {}, 146 | url= {http://visilab.etsii.uclm.es/personas/oscar/FightDetection/} 147 | } 148 | 149 | @inproceedings{scholkopf2000support, 150 | title={Support vector method for novelty detection}, 151 | author={Sch{\"o}lkopf, Bernhard and Williamson, Robert C and Smola, Alex J and Shawe-Taylor, John and Platt, John C}, 152 | booktitle={Advances in neural information processing systems}, 153 | pages={582--588}, 154 | year={2000} 155 | } 156 | 157 | @article{xu2015learning, 158 | title={Learning deep representations of appearance and motion for anomalous event detection}, 159 | author={Xu, Dan and Ricci, Elisa and Yan, Yan and Song, Jingkuan and Sebe, Nicu}, 160 | journal={arXiv preprint arXiv:1510.01553}, 161 | year={2015} 162 | } 163 | 164 | @inproceedings{vincent2008extracting, 165 | title={Extracting and composing robust features with denoising autoencoders}, 166 | author={Vincent, Pascal and Larochelle, Hugo and Bengio, Yoshua and Manzagol, Pierre-Antoine}, 167 | booktitle={Proceedings of the 25th international conference on Machine learning}, 168 | pages={1096--1103}, 169 | year={2008} 170 | } 171 | 172 | @inproceedings{horn1981determining, 173 | title={Determining optical flow}, 174 | author={Horn, Berthold KP and Schunck, Brian G}, 175 | booktitle={Techniques and Applications of Image Understanding}, 176 | volume={281}, 177 | pages={319--331}, 178 | year={1981}, 179 | organization={International Society for Optics and Photonics} 180 | } 181 | 182 | @inproceedings{gutoski2017detection, 183 | title={Detection of video anomalies using convolutional autoencoders and one-class support vector machines}, 184 | author={Gutoski, Matheus and Aquino, Nelson Marcelo Romero and Ribeiro, Manass{\'e}s and Lazzaretti, Andr{\'e} Eng{\^e}nio and Lopes, Heitor Silv{\'e}rio}, 185 | booktitle={XIII Brazilian Congress on Computational Intelligence}, 186 | volume={2017}, 187 | year={2017} 188 | } 189 | 190 | @article{canny1986computational, 191 | title={A computational approach to edge detection}, 192 | author={Canny, John}, 193 | journal={IEEE Transactions on pattern analysis and machine intelligence}, 194 | number={6}, 195 | pages={679--698}, 196 | year={1986}, 197 | publisher={Ieee} 198 | } 199 | 200 | @inproceedings{yang2019deep, 201 | title={Deep Learning and One-class SVM based Anomalous Crowd Detection}, 202 | author={Yang, Meng and Rajasegarar, Sutharshan and Erfani, Sarah M and Leckie, Christopher}, 203 | booktitle={2019 International Joint Conference on Neural Networks (IJCNN)}, 204 | pages={1--8}, 205 | year={2019}, 206 | organization={IEEE} 207 | } 208 | 209 | @article{lucas1981iterative, 210 | title={An iterative image registration technique with an application to stereo vision}, 211 | author={Lucas, Bruce D and Kanade, Takeo and others}, 212 | year={1981}, 213 | publisher={Vancouver, British Columbia} 214 | } 215 | 216 | @article{hinton2006fast, 217 | title={A fast learning algorithm for deep belief nets}, 218 | author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye}, 219 | journal={Neural computation}, 220 | volume={18}, 221 | number={7}, 222 | pages={1527--1554}, 223 | year={2006}, 224 | publisher={MIT Press} 225 | } 226 | 227 | @article{fang2016abnormal, 228 | title={Abnormal event detection in crowded scenes based on deep learning}, 229 | author={Fang, Zhijun and Fei, Fengchang and Fang, Yuming and Lee, Changhoon and Xiong, Naixue and Shu, Lei and Chen, Sheng}, 230 | journal={Multimedia Tools and Applications}, 231 | volume={75}, 232 | number={22}, 233 | pages={14617--14639}, 234 | year={2016}, 235 | publisher={Springer} 236 | } 237 | 238 | @article{fang2011bottom, 239 | title={Bottom-up saliency detection model based on human visual sensitivity and amplitude spectrum}, 240 | author={Fang, Yuming and Lin, Weisi and Lee, Bu-Sung and Lau, Chiew-Tong and Chen, Zhenzhong and Lin, Chia-Wen}, 241 | journal={IEEE Transactions on Multimedia}, 242 | volume={14}, 243 | number={1}, 244 | pages={187--198}, 245 | year={2011}, 246 | publisher={IEEE} 247 | } 248 | 249 | @article{chan2015pcanet, 250 | title={PCANet: A simple deep learning baseline for image classification?}, 251 | author={Chan, Tsung-Han and Jia, Kui and Gao, Shenghua and Lu, Jiwen and Zeng, Zinan and Ma, Yi}, 252 | journal={IEEE transactions on image processing}, 253 | volume={24}, 254 | number={12}, 255 | pages={5017--5032}, 256 | year={2015}, 257 | publisher={IEEE} 258 | } 259 | 260 | @inproceedings{smeureanu2017deep, 261 | title={Deep appearance features for abnormal behavior detection in video}, 262 | author={Smeureanu, Sorina and Ionescu, Radu Tudor and Popescu, Marius and Alexe, Bogdan}, 263 | booktitle={International Conference on Image Analysis and Processing}, 264 | pages={779--789}, 265 | year={2017}, 266 | organization={Springer} 267 | } 268 | 269 | @article{chatfield2014return, 270 | title={Return of the devil in the details: Delving deep into convolutional nets}, 271 | author={Chatfield, Ken and Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew}, 272 | journal={arXiv preprint arXiv:1405.3531}, 273 | year={2014} 274 | } 275 | 276 | @article{sun2019abnormal, 277 | title={Abnormal event detection for video surveillance using deep one-class learning}, 278 | author={Sun, Jiayu and Shao, Jie and He, Chengkun}, 279 | journal={Multimedia Tools and Applications}, 280 | volume={78}, 281 | number={3}, 282 | pages={3633--3647}, 283 | year={2019}, 284 | publisher={Springer} 285 | } 286 | 287 | @article{singh2020crowd, 288 | title={Crowd anomaly detection using aggregation of ensembles of fine-tuned ConvNets}, 289 | author={Singh, Kuldeep and Rajora, Shantanu and Vishwakarma, Dinesh Kumar and Tripathi, Gaurav and Kumar, Sandeep and Walia, Gurjit Singh}, 290 | journal={Neurocomputing}, 291 | volume={371}, 292 | pages={188--198}, 293 | year={2020}, 294 | publisher={Elsevier} 295 | } 296 | 297 | @article{huang2018learning, 298 | title={Learning multimodal deep representations for crowd anomaly event detection}, 299 | author={Huang, Shaonian and Huang, Dongjun and Zhou, Xinmin}, 300 | journal={Mathematical Problems in Engineering}, 301 | volume={2018}, 302 | year={2018}, 303 | publisher={Hindawi} 304 | } 305 | 306 | @inproceedings{lee2009convolutional, 307 | title={Convolutional deep belief networks for scalable unsupervised learning of hierarchical representations}, 308 | author={Lee, Honglak and Grosse, Roger and Ranganath, Rajesh and Ng, Andrew Y}, 309 | booktitle={Proceedings of the 26th annual international conference on machine learning}, 310 | pages={609--616}, 311 | year={2009} 312 | } 313 | 314 | @inproceedings{hinami2017joint, 315 | title={Joint detection and recounting of abnormal events by learning deep generic knowledge}, 316 | author={Hinami, Ryota and Mei, Tao and Satoh, Shin'ichi}, 317 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 318 | pages={3619--3627}, 319 | year={2017} 320 | } 321 | 322 | @inproceedings{girshick2015fast, 323 | title={Fast r-cnn}, 324 | author={Girshick, Ross}, 325 | booktitle={Proceedings of the IEEE international conference on computer vision}, 326 | pages={1440--1448}, 327 | year={2015} 328 | } 329 | 330 | @inproceedings{sabokrou2015real, 331 | title={Real-time anomaly detection and localization in crowded scenes}, 332 | author={Sabokrou, Mohammad and Fathy, Mahmood and Hoseini, Mojtaba and Klette, Reinhard}, 333 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition workshops}, 334 | pages={56--62}, 335 | year={2015} 336 | } 337 | 338 | @article{sabokrou2017fast, 339 | title={Fast and accurate detection and localization of abnormal behavior in crowded scenes}, 340 | author={Sabokrou, Mohammad and Fathy, Mahmood and Moayed, Zahra and Klette, Reinhard}, 341 | journal={Machine Vision and Applications}, 342 | volume={28}, 343 | number={8}, 344 | pages={965--985}, 345 | year={2017}, 346 | publisher={Springer} 347 | } 348 | 349 | @article{sabokrou2017deep, 350 | title={Deep-cascade: Cascading 3d deep neural networks for fast anomaly detection and localization in crowded scenes}, 351 | author={Sabokrou, Mohammad and Fayyaz, Mohsen and Fathy, Mahmood and Klette, Reinhard}, 352 | journal={IEEE Transactions on Image Processing}, 353 | volume=26, 354 | number=4, 355 | pages={1992--2004}, 356 | year=2017, 357 | publisher={IEEE} 358 | } 359 | 360 | @article{feng2017learning, 361 | title={Learning deep event models for crowd anomaly detection}, 362 | author={Feng, Yachuang and Yuan, Yuan and Lu, Xiaoqiang}, 363 | journal={Neurocomputing}, 364 | volume={219}, 365 | pages={548--556}, 366 | year={2017}, 367 | publisher={Elsevier} 368 | } 369 | 370 | @article{viroli2019deep, 371 | title={Deep gaussian mixture models}, 372 | author={Viroli, Cinzia and McLachlan, Geoffrey J}, 373 | journal={Statistics and Computing}, 374 | volume={29}, 375 | number={1}, 376 | pages={43--51}, 377 | year={2019}, 378 | publisher={Springer} 379 | } 380 | 381 | @article{ramchandran2019unsupervised, 382 | title={Unsupervised deep learning system for local anomaly event detection in crowded scenes}, 383 | author={Ramchandran, Anitha and Sangaiah, Arun Kumar}, 384 | journal={Multimedia Tools and Applications}, 385 | pages={1--21}, 386 | year={2019}, 387 | publisher={Springer} 388 | } 389 | 390 | @inproceedings{ravanbakhsh2018plug, 391 | title={Plug-and-play cnn for crowd motion analysis: An application in abnormal event detection}, 392 | author={Ravanbakhsh, Mahdyar and Nabi, Moin and Mousavi, Hossein and Sangineto, Enver and Sebe, Nicu}, 393 | booktitle={2018 IEEE Winter Conference on Applications of Computer Vision (WACV)}, 394 | pages={1689--1698}, 395 | year={2018}, 396 | organization={IEEE} 397 | } 398 | 399 | @article{zhou2016spatial, 400 | title={Spatial--temporal convolutional neural networks for anomaly detection and localization in crowded scenes}, 401 | author={Zhou, Shifu and Shen, Wei and Zeng, Dan and Fang, Mei and Wei, Yuanwang and Zhang, Zhijiang}, 402 | journal={Signal Processing: Image Communication}, 403 | volume={47}, 404 | pages={358--368}, 405 | year={2016}, 406 | publisher={Elsevier} 407 | } 408 | 409 | @inproceedings{ravanbakhsh2019training, 410 | title={Training adversarial discriminators for cross-channel abnormal event detection in crowds}, 411 | author={Ravanbakhsh, Mahdyar and Sangineto, Enver and Nabi, Moin and Sebe, Nicu}, 412 | booktitle={2019 IEEE Winter Conference on Applications of Computer Vision (WACV)}, 413 | pages={1896--1904}, 414 | year={2019}, 415 | organization={IEEE} 416 | } 417 | 418 | @inproceedings{goodfellow2014generative, 419 | title={Generative adversarial nets}, 420 | author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, 421 | booktitle={Advances in neural information processing systems}, 422 | pages={2672--2680}, 423 | year={2014} 424 | } 425 | 426 | @inproceedings{kumar2017d, 427 | title={D-CAD: Deep and crowded anomaly detection}, 428 | author={Kumar, Krishan and Kumar, Anurag and Bahuguna, Ayush}, 429 | booktitle={Proceedings of the 7th International Conference on Computer and Communication Technology}, 430 | pages={100--105}, 431 | year={2017} 432 | } 433 | 434 | @article{wiskott2002slow, 435 | title={Slow feature analysis: Unsupervised learning of invariances}, 436 | author={Wiskott, Laurenz and Sejnowski, Terrence J}, 437 | journal={Neural computation}, 438 | volume={14}, 439 | number={4}, 440 | pages={715--770}, 441 | year={2002}, 442 | publisher={MIT Press} 443 | } 444 | 445 | @article{sabokrou2018deep, 446 | title={Deep-anomaly: Fully convolutional neural network for fast anomaly detection in crowded scenes}, 447 | author={Sabokrou, Mohammad and Fayyaz, Mohsen and Fathy, Mahmood and Moayed, Zahra and Klette, Reinhard}, 448 | journal={Computer Vision and Image Understanding}, 449 | volume={172}, 450 | pages={88--97}, 451 | year={2018}, 452 | publisher={Elsevier} 453 | } 454 | 455 | @article{wang2019abnormal, 456 | title={Abnormal behavior detection in videos using deep learning}, 457 | author={Wang, Jun and Xia, Limin}, 458 | journal={Cluster Computing}, 459 | volume={22}, 460 | number={4}, 461 | pages={9229--9239}, 462 | year={2019}, 463 | publisher={Springer} 464 | } 465 | 466 | @incollection{tay2019robust, 467 | title={A robust abnormal behavior detection method using convolutional neural network}, 468 | author={Tay, Nian Chi and Connie, Tee and Ong, Thian Song and Goh, Kah Ong Michael and Teh, Pin Shen}, 469 | booktitle={Computational Science and Technology}, 470 | pages={37--47}, 471 | year={2019}, 472 | publisher={Springer} 473 | } 474 | 475 | @article{kecceli2017violent, 476 | title={Violent activity detection with transfer learning method}, 477 | author={Ke{\c{c}}eli, AS and Kaya, AYDIN}, 478 | journal={Electronics Letters}, 479 | volume={53}, 480 | number={15}, 481 | pages={1047--1048}, 482 | year={2017}, 483 | publisher={IET} 484 | } 485 | 486 | @article{kononenko1997overcoming, 487 | title={Overcoming the myopia of inductive learning algorithms with RELIEFF}, 488 | author={Kononenko, Igor and {\v{S}}imec, Edvard and Robnik-{\v{S}}ikonja, Marko}, 489 | journal={Applied Intelligence}, 490 | volume={7}, 491 | number={1}, 492 | pages={39--55}, 493 | year={1997}, 494 | publisher={Springer} 495 | } 496 | 497 | @inproceedings{sudhakaran2017learning, 498 | title={Learning to detect violent videos using convolutional long short-term memory}, 499 | author={Sudhakaran, Swathikiran and Lanz, Oswald}, 500 | booktitle={2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)}, 501 | pages={1--6}, 502 | year={2017}, 503 | organization={IEEE} 504 | } 505 | 506 | @inproceedings{marsden2017resnetcrowd, 507 | title={ResnetCrowd: A residual deep learning architecture for crowd counting, violent behaviour detection and crowd density level classification}, 508 | author={Marsden, Mark and McGuinness, Kevin and Little, Suzanne and O'Connor, Noel E}, 509 | booktitle={2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)}, 510 | pages={1--7}, 511 | year={2017}, 512 | organization={IEEE} 513 | } 514 | 515 | @article{song2019novel, 516 | title={A novel violent video detection scheme based on modified 3D convolutional neural networks}, 517 | author={Song, Wei and Zhang, Dongliang and Zhao, Xiaobing and Yu, Jing and Zheng, Rui and Wang, Antai}, 518 | journal={IEEE Access}, 519 | volume={7}, 520 | pages={39172--39179}, 521 | year={2019}, 522 | publisher={IEEE} 523 | } 524 | 525 | @article{fenil2019real, 526 | title={Real time violence detection framework for football stadium comprising of big data analysis and deep learning through bidirectional LSTM}, 527 | author={Fenil, E and Manogaran, Gunasekaran and Vivekananda, GN and Thanjaivadivel, T and Jeeva, S and Ahilan, A and others}, 528 | journal={Computer Networks}, 529 | volume={151}, 530 | pages={191--200}, 531 | year={2019}, 532 | publisher={Elsevier} 533 | } 534 | 535 | @inproceedings{sumon2019violent, 536 | title={Violent crowd flow detection using deep learning}, 537 | author={Sumon, Shakil Ahmed and Shahria, MD Tanzil and Goni, MD Raihan and Hasan, Nazmul and Almarufuzzaman, AM and Rahman, Rashedur M}, 538 | booktitle={Asian Conference on Intelligent Information and Database Systems}, 539 | pages={613--625}, 540 | year={2019}, 541 | organization={Springer} 542 | } 543 | 544 | @inproceedings{cheng2017abnormal, 545 | title={Abnormal behavior detection for harbour operator safety under complex video surveillance scenes}, 546 | author={Cheng, Guoan and Wang, Shengke and Guo, Teng and Han, Xiao and Cai, Guiyan and Gao, Feng and Dong, Junyu}, 547 | booktitle={2017 International Conference on Security, Pattern Analysis, and Cybernetics (SPAC)}, 548 | pages={324--328}, 549 | year={2017}, 550 | organization={IEEE} 551 | } 552 | 553 | @inproceedings{liu2016ssd, 554 | title={Ssd: Single shot multibox detector}, 555 | author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C}, 556 | booktitle={European conference on computer vision}, 557 | pages={21--37}, 558 | year={2016}, 559 | organization={Springer} 560 | } 561 | 562 | @inproceedings{tran2015learning, 563 | title={Learning spatiotemporal features with 3D convolutional networks}, 564 | author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar}, 565 | booktitle={Proceedings of the IEEE international conference on computer vision}, 566 | pages={4489--4497}, 567 | year={2015} 568 | } 569 | 570 | @inproceedings{karpathy2014large, 571 | title={Large-scale video classification with convolutional neural networks}, 572 | author={Karpathy, Andrej and Toderici, George and Shetty, Sanketh and Leung, Thomas and Sukthankar, Rahul and Fei-Fei, Li}, 573 | booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, 574 | pages={1725--1732}, 575 | year={2014} 576 | } 577 | 578 | @inproceedings{chollet2017xception, 579 | title={Xception: Deep learning with depthwise separable convolutions}, 580 | author={Chollet, Fran{\c{c}}ois}, 581 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, 582 | pages={1251--1258}, 583 | year={2017} 584 | } 585 | 586 | @inproceedings{deng2009imagenet, 587 | title={Imagenet: A large-scale hierarchical image database}, 588 | author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, 589 | booktitle={2009 IEEE conference on computer vision and pattern recognition}, 590 | pages={248--255}, 591 | year={2009}, 592 | organization={Ieee} 593 | } 594 | 595 | @misc{chollet2015keras, 596 | title={Keras}, 597 | author={Chollet, Fran\c{c}ois and others}, 598 | year={2015}, 599 | howpublished={\url{https://keras.io}}, 600 | } 601 | 602 | @article{hochreiter1997long, 603 | title={Long short-term memory}, 604 | author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen}, 605 | journal={Neural computation}, 606 | volume={9}, 607 | number={8}, 608 | pages={1735--1780}, 609 | year={1997}, 610 | publisher={MIT Press} 611 | } 612 | 613 | @article{soomro2012ucf, 614 | title={UCF101: A dataset of 101 human actions classes from videos in the wild}, 615 | author={Soomro, Khurram and Zamir, Amir Roshan and Shah, Mubarak}, 616 | journal={arXiv preprint arXiv:1212.0402}, 617 | year=2012 618 | } 619 | 620 | @article{bradski2000opencv, 621 | title={OpenCV}, 622 | author={Bradski, Gary and Kaehler, Adrian}, 623 | journal={Dr. Dobb’s journal of software tools}, 624 | volume={3}, 625 | year={2000} 626 | } 627 | 628 | @book{oliphant2006guide, 629 | title={A guide to NumPy}, 630 | author={Oliphant, Travis E}, 631 | volume={1}, 632 | year={2006}, 633 | publisher={Trelgol Publishing USA} 634 | } 635 | 636 | @article{van2011numpy, 637 | title={The NumPy array: a structure for efficient numerical computation}, 638 | author={Van Der Walt, Stefan and Colbert, S Chris and Varoquaux, Gael}, 639 | journal={Computing in Science \& Engineering}, 640 | volume={13}, 641 | number={2}, 642 | pages={22}, 643 | year={2011}, 644 | publisher={IEEE Computer Society} 645 | } 646 | 647 | @software{reback2020pandas, 648 | author = {The pandas development team}, 649 | title = {pandas-dev/pandas: Pandas}, 650 | month = feb, 651 | year = 2020, 652 | publisher = {Zenodo}, 653 | version = {latest}, 654 | doi = {10.5281/zenodo.3509134}, 655 | url = {https://doi.org/10.5281/zenodo.3509134} 656 | } 657 | 658 | @misc{tensorflow2015-whitepaper, 659 | title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems}, 660 | url={https://www.tensorflow.org/}, 661 | note={Software available from tensorflow.org}, 662 | author={ 663 | Martín~Abadi and 664 | Ashish~Agarwal and 665 | Paul~Barham and 666 | Eugene~Brevdo and 667 | Zhifeng~Chen and 668 | Craig~Citro and 669 | Greg~S.~Corrado and 670 | Andy~Davis and 671 | Jeffrey~Dean and 672 | Matthieu~Devin and 673 | Sanjay~Ghemawat and 674 | Ian~Goodfellow and 675 | Andrew~Harp and 676 | Geoffrey~Irving and 677 | Michael~Isard and 678 | Yangqing Jia and 679 | Rafal~Jozefowicz and 680 | Lukasz~Kaiser and 681 | Manjunath~Kudlur and 682 | Josh~Levenberg and 683 | Dandelion~Mané and 684 | Rajat~Monga and 685 | Sherry~Moore and 686 | Derek~Murray and 687 | Chris~Olah and 688 | Mike~Schuster and 689 | Jonathon~Shlens and 690 | Benoit~Steiner and 691 | Ilya~Sutskever and 692 | Kunal~Talwar and 693 | Paul~Tucker and 694 | Vincent~Vanhoucke and 695 | Vijay~Vasudevan and 696 | Fernanda~Viegas and 697 | Oriol~Vinyals and 698 | Pete~Warden and 699 | Martin~Wattenberg and 700 | Martin~Wicke and 701 | Yuan~Yu and 702 | Xiaoqiang~Zheng}, 703 | year={2015}, 704 | } 705 | 706 | @article{sanchez2020revisiting, 707 | title={Revisiting crowd behaviour analysis through deep learning: Taxonomy, anomaly detection, crowd emotions, datasets, opportunities and prospects}, 708 | author={Luque S{\'a}nchez, Francisco and Hupont, Isabelle and Tabik, Siham and Herrera, Francisco}, 709 | journal={Information Fusion}, 710 | year={2020}, 711 | publisher={Elsevier} 712 | } 713 | 714 | @article{abu2016youtube, 715 | title={Youtube-8m: A large-scale video classification benchmark}, 716 | author={Abu-El-Haija, Sami and Kothari, Nisarg and Lee, Joonseok and Natsev, Paul and Toderici, George and Varadarajan, Balakrishnan and Vijayanarasimhan, Sudheendra}, 717 | journal={arXiv preprint arXiv:1609.08675}, 718 | year={2016} 719 | } 720 | 721 | @inproceedings{xingjian2015convolutional, 722 | title={Convolutional LSTM network: A machine learning approach for precipitation nowcasting}, 723 | author={Xingjian, SHI and Chen, Zhourong and Wang, Hao and Yeung, Dit-Yan and Wong, Wai-Kin and Woo, Wang-chun}, 724 | booktitle={Advances in neural information processing systems}, 725 | pages={802--810}, 726 | year={2015} 727 | } 728 | 729 | -------------------------------------------------------------------------------- /docs/chapterheader.sty: -------------------------------------------------------------------------------- 1 | \ProvidesPackage{chapterheader}[2017/08/18 Chapter fancy header] 2 | \usepackage[T1]{fontenc} 3 | \usepackage{kpfonts} 4 | \setSingleSpace{1.1} 5 | \SingleSpacing 6 | \usepackage{xcolor,calc, blindtext} 7 | \definecolor{chaptercolor}{gray}{0.8} 8 | % helper macros 9 | \newcommand\numlifter[1]{\raisebox{-2cm}[0pt][0pt]{\smash{#1}}} 10 | \newcommand\numindent{\kern37pt} 11 | \newlength\chaptertitleboxheight 12 | \makechapterstyle{hansen}{ 13 | \renewcommand\printchaptername{\raggedleft} 14 | \renewcommand\printchapternum{% 15 | \begingroup% 16 | \leavevmode% 17 | \chapnumfont% 18 | \strut% 19 | \numlifter{\thechapter}% 20 | \numindent% 21 | \endgroup% 22 | } 23 | \renewcommand*{\printchapternonum}{% 24 | \vphantom{\begingroup% 25 | \leavevmode% 26 | \chapnumfont% 27 | \numlifter{\vphantom{9}}% 28 | \numindent% 29 | \endgroup} 30 | \afterchapternum} 31 | \setlength\midchapskip{0pt} 32 | \setlength\beforechapskip{0.5\baselineskip} 33 | \setlength{\afterchapskip}{0.5\baselineskip} 34 | \renewcommand\chapnumfont{% 35 | \fontsize{4cm}{0cm}% 36 | \bfseries% 37 | \sffamily% 38 | \color{chaptercolor}% 39 | } 40 | \renewcommand\chaptitlefont{% 41 | \normalfont% 42 | \huge% 43 | \bfseries% 44 | \raggedleft% 45 | }% 46 | \settototalheight\chaptertitleboxheight{% 47 | \parbox{\textwidth}{\chaptitlefont \strut bg\\bg\strut}} 48 | \renewcommand\printchaptertitle[1]{% 49 | \parbox[t][\chaptertitleboxheight][t]{\textwidth}{% 50 | % \microtypesetup{protrusion=false}% add this if you use microtype 51 | \chaptitlefont\strut ##1\strut}\\~\\ 52 | \rule{\linewidth}{0.4pt} 53 | }} 54 | \chapterstyle{hansen} 55 | \aliaspagestyle{chapter}{empty} % just to save some space 56 | -------------------------------------------------------------------------------- /docs/chapters/01_introduction.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{book} 2 | 3 | \begin{document} 4 | 5 | \chapter{Introducción} 6 | 7 | En las últimas décadas se ha experimentado un crecimiento poblacional 8 | sin precedentes alrededor de todo el mundo, con el consecuente aumento 9 | de las aglomeraciones de personas, las cuales llegan a involucrar a 10 | miles de individuos. Además, las tasas de criminalidad y terrorismo se 11 | han disparado de forma similar. La combinación de estos hechos ha 12 | provocado que la videovigilancia masiva se convierta en una 13 | herramienta prioritaria. El número de cámaras de vigilancia instaladas 14 | en el mundo, tanto dentro del ámbito público como en el privado, se ha 15 | multiplicado en los últimos años. El desarrollo tecnológico, además, 16 | está produciendo una importante mejora en la calidad de los vídeos que 17 | se recopilan, a cambio de un aumento importante en el volumen de 18 | información almacenada. Aparece, por tanto, la necesidad de procesar 19 | una gran cantidad de información en forma de archivos de vídeo.\\ 20 | 21 | Históricamente, dicho procesamiento se ha realizado de forma manual, 22 | consumiendo una gran cantidad de recursos humanos. Hoy en día, la 23 | velocidad a la que se genera dicha información, y el volumen tan 24 | abismal que se genera diariamente, hace casi imposible la gestión 25 | manual de esta información de forma exhaustiva y adecuada. Además, 26 | esta información debe procesarse en tiempo real en la medida de lo 27 | posible, ya que la respuesta rápida en situaciones de emergencia es 28 | crucial para reducir los efectos de una posible catástrofe. Esto ha 29 | hecho que los métodos clásicos de supervisión humana queden 30 | paulatinamente obsoletos, y aparezca la necesidad de automatizar el 31 | proceso. En este contexto aparece el concepto de la videovigilancia 32 | automática.\\ 33 | 34 | La videovigilancia automática es una rama de investigación cuyo 35 | objetivo es el análisis de múltiples fuentes de vídeo en tiempo real, 36 | para la extracción automática de información relevante relacionada con 37 | el comportamiento de los individuos \cite{ma2009intelligent}. Esta 38 | área de investigación aúna dos grandes campos de trabajo dentro del 39 | aprendizaje automático; la visión por computador y el análisis de 40 | series temporales. Dado que el tipo de dato más común dentro de este 41 | contexto son las secuencias de vídeo, por un lado se ha de extraer 42 | información de cada uno de los fotogramas, y por otro información 43 | temporal derivada de la secuencia de dichos fotogramas.\\ 44 | 45 | El auge del aprendizaje profundo, además, ha supuesto un avance muy 46 | importante en el desarrollo de modelos en este contexto. Según 47 | \cite{zitouni2016advances}, existen cuatro áreas principales en el 48 | contexto de la videovigilancia automática: 49 | 50 | \begin{enumerate} 51 | \item Detección y seguimiento de individuos 52 | \item Recuento y estimación de densidad de individuos 53 | \item Análisis y clasificación de comportamientos 54 | \item Detección de comportamientos anómalos 55 | \end{enumerate} 56 | 57 | Las tareas 1 y 2 han sido ampliamente estudiadas y los modelos de 58 | aprendizaje clásico son suficientes para la obtención de resultados de 59 | calidad. No obstante, en las tareas 3 y 4 los resultados eran muy 60 | limitados. La aparición de los modelos de aprendizaje profundo y el 61 | aumento de la capacidad de cálculo ha supuesto un avance importante 62 | para todas las áreas. En los dos primeros casos, ha permitido que la 63 | densidad de individuos presentes en la imagen sea más alta antes de 64 | que se produzca una pérdida de rendimiento, y en los dos últimos casos 65 | ha provocado una mejoría muy notable, ya que la complejidad de la 66 | información que estos modelos son capaces de extraer es notablemente 67 | más alta que la extraída por los modelos clásicos.\\ 68 | 69 | Esta mejora tan significativa ha provocado que en los últimos años 70 | aparezcan una gran cantidad de trabajos que resuelven alguno de los 71 | problemas relacionados con la videovigilancia automática aplicando 72 | modelos de redes neuronales. No obstante, estos trabajos aparecen 73 | dispersos, y es difícil establecer una comparativa sobre ellos. Esta 74 | problemática es especialmente relevante cuando se tratan de 75 | desarrollar nuevos modelos, ya que es difícil recopilar el 76 | conocimiento previo sobre la temática. Esta dispersión radica en 77 | varios factores:\\ 78 | 79 | \begin{enumerate} 80 | \item No existe un consenso claro sobre las tareas que deben abordarse 81 | dentro de esta área de investigación. 82 | \item No hay una taxonomía clara para organizar los distintos trabajos 83 | previamente desarrollados. 84 | \item No existe una recopilación de trabajos que afronten esta 85 | problemática desde la perspectiva del aprendizaje profundo. 86 | \end{enumerate} 87 | 88 | En particular, nuestra propuesta de trabajo se engloba dentro del 89 | proyecto de investigación \textit{AI\_MARS-DeepLABD: Artificial 90 | Intelligence system for Monitoring, Alert and Response for Security 91 | in events. Deep Learning for Abnomal Behavior Detection}, el cual 92 | pretende diseñar e implementar sistemas de aprendizaje profundo para 93 | la detección de comportamientos anómalos, y por tanto, enfocaremos el 94 | trabajo en esa dirección. Esto hace que nos centremos especialmente en 95 | la última de las tareas. Es la más novedosa de las áreas listadas 96 | anteriormente, y esto hace que exista una especial incertidumbre 97 | alrededor de la misma. En particular, resulta muy difícil establecer 98 | una organización clara para los trabajos que afrontan esta 99 | problemática, porque el concepto de comportamiento anómalo puede 100 | incluir definiciones muy diversas. Por ejemplo, podemos considerar 101 | como anómalo la presencia de una persona en un área restringida, una 102 | multitud corriendo despavorida, o un pequeño grupo de personas que 103 | inicia una pelea por la calle. Claramente, la fuente de la anomalía en 104 | los tres casos es completamente distinta, y difícilmente comparable. 105 | Esto provoca que la comparativa entre modelos sea compleja. Además, el 106 | número de conjuntos de datos públicos que permitan establecer 107 | comparaciones entre los modelos es relativamente escaso, y un gran 108 | número de trabajos utilizan sus propios conjuntos de datos diseñados 109 | específicamente para el problema que tratan de resolver.\\ 110 | 111 | \section{Objetivos del trabajo} 112 | 113 | Dado el contexto previo, los objetivos de este trabajo tratan de 114 | cubrir un estudio profundo del área de la videovigilancia automática, 115 | en particular centrado en la detección de comportamientos anómalos en 116 | vídeo. Los objetivos concretos que se han planteado para el trabajo 117 | son los siguientes: 118 | 119 | \begin{enumerate} 120 | \item Proponer una taxonomía para la organización de los trabajos que 121 | afrontan el problema del análisis de multitudes en videovigilancia. 122 | \item Revisar detalladamente los trabajos propuestos dentro del área 123 | de la detección de comportamientos anómalos utilizando aprendizaje 124 | profundo. 125 | \item Estudiar la extracción de características en vídeo utilizando 126 | modelos de aprendizaje profundo. En concreto, se estudia un modelo 127 | del estado del arte en la detección de anomalías en videovigilancia, 128 | y se propone una mejora basada en un extractor de características 129 | profundo de mayor potencia. 130 | \end{enumerate} 131 | 132 | El resto del trabajo se estructura de la siguiente manera. En el 133 | capítulo \ref{sec:state-of-the-art} se expone el estudio teórico del 134 | trabajo. En él, se propone una taxonomía en etapas que permite agrupar 135 | los trabajos hasta el momento dentro de cuatro etapas, en las que cada 136 | una recae en los resultados de las anteriores. Además, se estudian los 137 | principales conjuntos de datos disponibles públicamente y las métricas 138 | que se utilizan para evaluar la calidad de los modelos dentro de esta 139 | área de conocimiento. Finalmente, se resumen los principales trabajos 140 | que utilizan aprendizaje profundo para resolver el problema de la 141 | detección de anomalías en multitudes, estableciendo una división de los 142 | mismos en función de la taxonomía previa.\\ 143 | 144 | En el capítulo \ref{sec:model-analysis} se llevará a cabo el análisis 145 | del uso de características espacio-temporales para la detección de 146 | acciones anómalas. Concretamente, tomaremos el trabajo propuesto en 147 | \cite{sultani2018real} y trataremos de mejorar sus resultados 148 | empleando un extractor de características más potente. En dicho 149 | trabajo, proponen una red neuronal convolucional en tres dimensiones 150 | como extractor de características en vídeo. A pesar de ser un modelo 151 | relativamente bueno para capturar características tanto temporales 152 | como espaciales, creemos que las características temporales se ven 153 | infrarrepresentadas. Nuestra hipótesis expone que el uso de un 154 | extractor de características más potente, que combine la capacidad de 155 | trabajar con imágenes de las redes convolucionales 2D con la capacidad 156 | de analizar series temporales de las redes recurrentes, obtendrá 157 | mejores resultados en el problema tratado. En este capítulo se realiza 158 | un estudio teórico del modelo original y se realiza nuestra propuesta 159 | de mejora. 160 | 161 | A continuación, en el capítulo \ref{sec:experiments-and-results} se 162 | detalla la experimentación realizada y los resultados 163 | obtenidos. Finalmente, en \ref{sec:conclusions-future-work} se exponen 164 | las conclusiones derivadas del estudio y posibles estudios futuros. 165 | 166 | \end{document} 167 | 168 | %%% Local Variables: 169 | %%% mode: latex 170 | %%% TeX-master: "../main" 171 | %%% End: 172 | -------------------------------------------------------------------------------- /docs/chapters/05_conclusions.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{memoir} 2 | 3 | \begin{document} 4 | 5 | \chapter{Conclusiones y trabajo futuro} 6 | \label{sec:conclusions-future-work} 7 | 8 | \section{Conclusiones} 9 | 10 | En este trabajo se ha realizado un estudio exhaustivo del uso del 11 | aprendizaje profundo para el análisis de multitudes en 12 | videovigilancia. En primer lugar, se ha llevado a cabo un análisis 13 | detallado del estado del arte, que ha resultado en la publicación de 14 | un artículo científico en la revista \textit{Information Fusion}, y 15 | que lleva por título ``Revisiting crowd behaviour analysis through 16 | deep learning: Taxonomy, anomaly detection, crowd emotions, datasets, 17 | opportunities and prospects'' \cite{sanchez2020revisiting}. En dicho 18 | estudio, se ha propuesto una taxonomía que permite organizar los 19 | nuevos trabajos en una secuencia de pasos, de forma que los resultados 20 | de cada una de las etapas tienen una fuerte influencia en las etapas 21 | posteriores. Para la tercera de las etapas de la taxonomía propuesta, 22 | que corresponde a la fase de extracción de características, se han 23 | establecido las principales propiedades que se extraen de las 24 | secuencias de vídeo para el análisis de comportamientos en multitudes.\\ 25 | 26 | Además, se ha realizado una revisión bibliográfica exhaustiva de los 27 | modelos basados en aprendizaje profundo para la detección de anomalías 28 | en multitudes. En primer lugar, se han identificado las distintas 29 | subtareas que componen esta área, las cuales vienen determinadas por 30 | las diferentes fuentes que producen la anomalía. Para los tipos de 31 | anomalía identificados, se han recopilado los principales conjuntos de 32 | datos públicos y las principales métricas que se utilizan para evaluar 33 | la calidad de los modelos. Finalmente, se han resumido los diferentes 34 | trabajos que resuelven cada una de las subtareas identificadas 35 | utilizando aprendizaje profundo.\\ 36 | 37 | Para el apartado práctico del trabajo, se ha estudiado la eficacia del 38 | uso de características espacio-temporales extraídas con modelos de 39 | aprendizaje profundo para la detección de anomalías en 40 | vídeo. Concretamente, se ha experimentado sobre un modelo de detección 41 | de anomalías en multitudes que empleaba un extractor de 42 | características basado exclusivamente en redes neuronales 43 | convolucionales en tres dimensiones. Para dicho modelo, se ha 44 | sustituido el extractor de características por un compuesto de capas 45 | convolucionales y recurrentes. Nuestra hipótesis de partida defendía 46 | que las redes neuronales recurrentes iban a ser mejores extractores de 47 | características temporales que las redes convolucionales 3D.\\ 48 | 49 | A raíz de los resultados extraídos de la experimentación, hemos podido 50 | comprobar que en efecto el modelo combinado convolucional-recurrente 51 | tiene un mejor comportamiento que el modelo puramente convolucional 52 | para el análisis de secuencias de vídeo.\\ 53 | 54 | Por un lado, trabajando con el conjunto de datos UCF-101, hemos 55 | preentrenado el extractor de características que hemos utilizado 56 | después en el experimento principal. Durante esta fase de 57 | entrenamiento previa, hemos obtenido un modelo con una mejor capacidad 58 | de clasificación que el modelo basado en C3D (el extractor del trabajo 59 | original), con una mejora de más de 15 puntos porcentuales en la 60 | clasificación Top-1. Esta mejora se ha producido para todos los 61 | extractores de características propuestos, independientemente de la 62 | dimensión de la representación obtenida, lo cual pone de manifiesto 63 | que nos encontramos ante un extractor de mayor potencia.\\ 64 | 65 | Por otro lado, en el experimento final, que involucraba la detección 66 | de fotogramas anómalos dentro del conjunto de datos UCF-Crime, hemos 67 | observado cómo el uso del extractor de características con capas 68 | recurrentes obtiene unos resultados mejores que el sistema 69 | original. Nuestros modelos superaban a los dos modelos originales, 70 | tanto el preentrenado por los autores como la réplica entrenada por 71 | nosotros, en todas las métricas que hemos calculado. Podemos remarcar 72 | especialmente la mejora en la métrica AUC que hemos conseguido con el 73 | modelo de dimensión 768, ya que esta era la única métrica que se 74 | utilizaba en el artículo original para la comparación de modelos. 75 | Hemos conseguido una arquitectura que mejora a la propuesta inicial, 76 | por lo que consideramos que los experimentos propuestos han sido 77 | exitosos. Además, dado que en primera instancia consideramos que 78 | utilizar sólo esta métrica podía dar lugar a una comparación pobre, 79 | hemos utilizado otras métricas que dan información sobre el 80 | comportamiento del modelo en la clase positiva, obteniendo también 81 | resultados que superan a la experimentación original.\\ 82 | 83 | Otra mejora importante que hemos detectado es la capacidad de 84 | predicción de nuestros modelos a nivel de vídeo, en lugar de a nivel 85 | de fotograma. Aunque los resultados obtenidos por nuestros modelos no 86 | suponen una mejora tan representativa a la hora de localizar las 87 | anomalías dentro de los vídeos, sí que suponen un avance importante a 88 | la hora de detectar qué vídeos presentan anomalía. Concretamente, 89 | nuestro mejor modelo consigue una mejora de más de 10 puntos 90 | porcentuales sobre el modelo original en este contexto, lo cual es un 91 | aumento muy significativo.\\ 92 | 93 | Es importante destacar también que esta mejora en los resultados se ha 94 | producido a pesar de que nuestros extractores de características están 95 | entrenados, a priori, en un conjunto de datos de menor calidad que el 96 | extractor de características original. Mientras que el modelo 97 | convolucional 3D estaba entrenado en un conjunto de datos de más de 98 | 1000000 de vídeos y cerca de 500 clases, el nuestro está entrenado en 99 | un conjunto mucho más pequeño, de unos 10000 vídeos y 101 clases. Esta 100 | diferencia hace que el modelo original parta, presumiblemente, de una 101 | posición ventajosa respecto al nuestro, lo que hace que esta mejora 102 | resulte especialmente relevante.\\ 103 | 104 | Finalmente, a pesar de que los resultados obtenidos son mejores que 105 | los de la experimentación original, se puede observar que aún hay un 106 | amplio margen de mejora en este conjunto de datos. El número de falsos 107 | negativos es aún muy elevado, clasificándose correctamente menos del 108 | 50 \% de los fotogramas positivos. Es posible que esta problemática 109 | venga justificada, en parte, por el tipo de etiquetado del 110 | conjunto. Al tener que entrenar sin la localización exacta de las 111 | anomalías, resulta complicado enseñar al modelo a localizar de forma 112 | precisa la anomalía en el vídeo anómalo completo. Esto implica que, 113 | probablemente, se estén cometiendo errores en los primeros y últimos 114 | fotogramas alrededor de las anomalías. Además, hemos visto que en un 115 | cuarto de los vídeos etiquetados como anómalos no generamos ninguna 116 | etiqueta positiva, es decir, ignoramos casi el 25 \% de las anomalías 117 | presentes en el conjunto. Estamos hablando de un número muy importante 118 | de errores, que requerirán de modelos más potentes para ser detectados.\\ 119 | 120 | A raíz de las conclusiones obtenidas del estudio, exponemos a 121 | continuación posibles vías de trabajo futuro. 122 | 123 | \section{Trabajo futuro} 124 | 125 | Dados los problemas que hemos encontrado durante el desarrollo del 126 | trabajo, especialmente en el apartado práctico del mismo, aparecen las 127 | siguientes líneas de trabajo a investigar: 128 | 129 | \begin{itemize} 130 | \item Utilizar una base de datos de entrenamiento para el extractor de 131 | características de mayor tamaño: Por falta de capacidad de cómputo, 132 | no se han utilizado bases de datos de mayor tamaño para el 133 | entrenamiento del modelo que se usa posteriormente para la 134 | extracción de características. Probablemente, el uso de bases de 135 | datos con mayor diversidad producirá unos resultados mejores. El 136 | modelo original, como ya dijimos, está entrenado sobre Sports-1M, de 137 | tamaño significativamente mayor al empleado por nosotros. Existen 138 | conjuntos para clasificación de vídeos de mayor tamaño, como el 139 | conjunto YouTube-8M \cite{abu2016youtube}. Puede ser interesante 140 | estudiar cómo el uso de un conjunto de datos u otro influye a la 141 | hora de entrenar el extractor de características. Teniendo en cuenta 142 | que los resultados obtenidos por nuestros modelos tras entrenar en 143 | el conjunto pequeño son comparables con los resultados originales, y 144 | mejores para la mayoría de las métricas calculadas, la mejora 145 | supuesta por un mejor preentrenamiento podría demostrar por completo 146 | que nos encontramos ante un modelo más potente. 147 | \item Afinar la arquitectura del modelo propuesto: En nuestra 148 | experimentación hemos propuesto un modelo basado en convoluciones 2D 149 | para extraer información de los fotogramas junto con una LSTM para 150 | extraer información temporal. En nuestra experimentación hemos 151 | estudiado el uso de tres representaciones de distintos tamaños, 512, 152 | 768 y 1024 elementos. No obstante, no se han explorado 153 | representaciones mayores, ya que los resultados obtenidos mejoraban 154 | la experimentación original y estamos ante modelos costosos, que 155 | requieren de muchas horas de cómputo para ser entrenados. Además, se 156 | ha utilizado Xception como red neuronal convolucional debido a su 157 | buen funcionamiento y pequeño tamaño, pero podríamos haber optado 158 | por otras arquitecturas disponibles. Es posible que las decisiones 159 | tomadas en el diseño hayan provocado que no nos encontremos ante el 160 | mejor modelo posible de este tipo y quede aún margen de mejora. 161 | \item Explorar nuevas arquitecturas para el extractor de 162 | características: Existen modelos llamados redes LSTM convolucionales 163 | \cite{xingjian2015convolutional} que sustituyen los productos 164 | internos de las LSTM clásicas por operaciones de convolución, por lo 165 | que son capaces de trabajar directamente con vídeos como dato de 166 | entrada. En este caso, no necesitaríamos una primera etapa de la red 167 | basada en una arquitectura convolucional, y podríamos aplicar 168 | directamente esta arquitectura. No obstante, tras primeras pruebas 169 | con este modelo, decidimos descartarlo por obtener malos resultados 170 | al ser entrenado completamente desde cero. Uniendo esta arquitectura 171 | al uso de conjuntos de datos de mayor tamaño podrían mejorarse los 172 | resultados obtenidos. 173 | \item Modificar la política de entrenamiento del modelo: En nuestra 174 | experimentación hemos construido un modelo con una arquitectura 175 | similar al original, en el que hemos sustituido el extractor de 176 | características por uno que creíamos de mayor potencia. No obstante, 177 | el resto del modelo se ha mantenido más o menos igual que el de 178 | partida para no influir de otra forma en el modelo. Debido a que el 179 | margen de mejora actual en el conjunto de datos es bastante grande, 180 | usar una política de entrenamiento distinta a la actual podría 181 | suponer una mejora en los resultados obtenidos, así que puede ser 182 | interesante explorar esta vía. 183 | \item Proponer modelos combinados: Los modelos que hemos utilizado en 184 | esta experimentación han sido estudiados de forma independiente, ya 185 | que nuestra intención era comprobar si las características 186 | espacio-temporales eran más potentes que las convolucionales puras 187 | para este problema. No hemos buscado, por tanto, obtener los mejores 188 | resultados posibles en el conjunto de datos. Durante la 189 | experimentación hemos observado cómo el modelo original y el modelo 190 | propuesto tienen características diferentes, y un buen 191 | comportamiento en distintos puntos (por ejemplo, para fotogramas 192 | fácilmente clasificables, el modelo original funciona ligeramente 193 | mejor que el nuestro). La utilización de los dos enfoques en un 194 | modelo combinado probablemente obtenga mejores resultados que los 195 | dos modelos por separado. 196 | \end{itemize} 197 | 198 | \section{Publicaciones asociadas} 199 | 200 | Debido a los resultados obtenidos en el desarrollo del trabajo, tanto 201 | a nivel teórico como práctico, se han propuesto dos publicaciones 202 | relacionadas con el mismo. Dichas publicaciones son las siguientes: 203 | 204 | \begin{itemize} 205 | \item Luque-Sánchez, F., Hupont, I., Tabik, S., \& Herrera, 206 | F. (2020). \textbf{Revisiting crowd behaviour analysis through deep 207 | learning: Taxonomy, anomaly detection, crowd emotions, datasets, 208 | opportunities and prospects}. \textit{Information Fusion}. Esta 209 | publicación consiste en una revisión sobre el estado del arte en 210 | técnicas de análisis de multitudes en videovigilancia utilizando 211 | aprendizaje profundo. En dicho artículo se establece la taxonomía 212 | que se describe en el apartado teórico del trabajo, se revisan los 213 | principales trabajos que resuelven este problema utilizando 214 | aprendizaje profundo, y se pone de manifiesto la necesidad de 215 | introducir características basadas en emociones para el análisis de 216 | multitudes. 217 | \item Luque-Sánchez, F., Hupont, I., Tabik, S., \& Herrera, 218 | F. (2020). \textbf{Xception-LSTM: Deep Spatio-temporal features for 219 | crowd anomaly detection}. En preparación. Esta publicación 220 | extiende la experimentación de llevada a cabo en el trabajo a partir 221 | de las propuestas de trabajo futuro, para estudiar en profundidad 222 | los modelos espacio-temporales para la detección de anomalías. Se 223 | proponen nuevas arquitecturas basadas en capas CNN-LSTM combinadas, 224 | y se preentrenan los extractores de características en conjuntos 225 | de datos de mayor tamaño. 226 | \end{itemize} 227 | 228 | \end{document} 229 | 230 | %%% Local Variables: 231 | %%% mode: latex 232 | %%% TeX-master: "../main" 233 | %%% End: 234 | -------------------------------------------------------------------------------- /docs/images/2d_conv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/2d_conv.pdf -------------------------------------------------------------------------------- /docs/images/3d_conv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/3d_conv.pdf -------------------------------------------------------------------------------- /docs/images/avenue-anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/avenue-anomaly.png -------------------------------------------------------------------------------- /docs/images/boss-anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/boss-anomaly.png -------------------------------------------------------------------------------- /docs/images/cnn_lstm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/cnn_lstm.pdf -------------------------------------------------------------------------------- /docs/images/cnn_lstm_violence.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/cnn_lstm_violence.pdf -------------------------------------------------------------------------------- /docs/images/extractor_acc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/extractor_acc.pdf -------------------------------------------------------------------------------- /docs/images/extractor_loss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/extractor_loss.pdf -------------------------------------------------------------------------------- /docs/images/original_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/original_model.pdf -------------------------------------------------------------------------------- /docs/images/pr_overlay.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/pr_overlay.pdf -------------------------------------------------------------------------------- /docs/images/roc-curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/roc-curve.jpg -------------------------------------------------------------------------------- /docs/images/roc_overlay.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/roc_overlay.pdf -------------------------------------------------------------------------------- /docs/images/sdae_psvm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/sdae_psvm.pdf -------------------------------------------------------------------------------- /docs/images/sketches/2d_3d_conv.drawio: -------------------------------------------------------------------------------- 1 | 7Z3bdttWlkW/xo/hwMEdj46dVPXouqQ7fUk9SiJjq6KIbopOnPr6JkVCIglIouG9gZmTpXooC5EgCXMDXGsCBF5lb37+9KfVxYf3f13OFzev0mT+6VX29lWaplmVbP5vu+S33ZKQVvVuybvV9Xy/7HHB99f/WuwX7r/x3cfr+eLu6AvXy+XN+vrD8cKr5e3t4mp9tOxitVr+evxlPy5vjn/qh4t3i86C768ubrpL//d6vn6/W1oXyePyPy+u371vf3JI9v/l54v2i/cL7t5fzJe/HizKvnmVvVktl+vdv37+9GZxs9167XbZfd+3T/zXh19stbhdn/MN79fffPP239N//efqLrn587/+Y/HPf379VVM0u/X8cnHzcf8n73/d9W/tNljczl9vN+Xms6ubi7u766tX2dfv1z/fbBaEzT/v1qvlT4s3y5vl6v4bslDlxbeb3+PrH69vbg6WL8K8WFTb5cvb9cHy5P5js3x+cfd+Md+vdvd7LOYdQI9/cTsRF6t3i/Vzf2ZeP2zxzawulj8v1qvfNt/56yPTFun7A5ztstXi5mJ9/cvxL3KxH613D6t7+AnfLa83v2Ka7PeDsl3Pfi+oQ3a8irvlx9XVYv9dhwxPVlTVxazK8+bh43S1s5AVSfuRHv+Q3Vbq/JDNPw42yeOi+/H5jFEKLw/SZhf4sP3n1cfLzf99/ev76/Xi+w8XV9tlv24OIMdjdbn8eDtfzP9y+bDg4uqnd6vt0r9/XN9c3y72y+cXq5/+vlnN9Xq7IZJZUhwvTO+Xbr9ytVxvOC5vNwua7bzd7Y43IenO6o8/Lsqrq77pnlfN5f207v/gxWq9+PT8iHYHr/2GIptl9RHFvEhn5X4+Dsczm1VFz4Sms+LpcTxi/LlAMwEdALQpmDRz0fx8mpsAw6RZiOYAmlU9K1Mm0FJAPx9oFhomzUo0B9AsE+zuWQvo5wPNkxQL9IzaK6AdoHk+S3Mm0FYYieiX9s+QEnBKJ1i0TwbLVCwNuieDpayQUfNk4JQWsuidDJaSQkatk4FTSsioczJwygkZNU4GThkhm7qZVRSBICVk0DgxOFP5IIPSycEpH2TTOzlEZYUMqicHp8SQTfvkEJUbsimgHKIyRDYdlENUksiohmYEq5BKEll0UAZLGSKLAspgKTdk1D4RODO5IYvqyWApMWTUOxk4ZYWMSicDp6yQUeNk4JQSsqmbaQMRCJmUkEHj5OCUDzIonRyc8kE2vZNDVFbIoHpycEoM2bRPDNFcbsimgHKIyhDZdFAO0dgkUX216Cd6WRd54VhDC4JVyGOTROPgPO2gDJaxGaJRWHYKKIOl3JBR+2TglBuyqJ4MlhJDRr2TgVNWyKh0MnDKChk1TgTOIjYlNFndDBCBUMSmhKZpnBic8kEWpRODMzYfNFnvxBCNzQpNUz0xOCWGjNonhqjckFEBxRCVITLqoBiisUmiqWpoqBBWITZJNEkHZbAsZYgMCiiEZWxuaKr2CcEZmxuapHpCWEoM2fROCE5ZIZvSCcEpK2TTOCE4Y1NCo2TaIjtxtlXTEpqWZmw6aByadVfZUoDKBg0AWqZdY0sBKh9kcbzNN78owdZWUkJGB1wM0djE0GRHXAzR2NzQJIfc0DBeQKvY7NBUR1wM0Nj80FQHXArQENvbVubFop7nfUTr9DIrSxuiVXp8wM3TgvH6GWJ738pIPKvuI+c4SCOroeMgrUP3sXMYpLG9f2UkpEX30XMcpJF10ZGQNjl4L42sjI6DtMlK8F4aWSOdJu5mdc5oL7G9i2WytMshGtklC5OFXQ7RyK5amCzrcojKGdlEXQ5RWSObpMshKmlkEXRzSnGJ7f0s0yVdDlIZI6Ooy0EqY2SUdTlIZYyMwi4HqayRUdrlIJU2soi7CaS9xPZOl+nSLoaonJFR2MUQlTMyyroYonJGRlGXQjS2t7xMl3QxRCWNDIJuWlKKS2xveJks6YKQyhjZRF0QUhkjm6wLQipjZBN2QUhljWzSLghpZNponDccduJuSmkvckZGaRdDVM7IKOxSiNZyRkZZF0NUzsgo6mKIyhoZJV0M0TOk0eJ2/nq1Wv66hXpzcXd3fXXMcLMZVr/9sN3+7Sf/OPzk7ad26+8+3W6C+z9pv+C7xep689csVu03fbpe/3Dw738c/Pt+XQ+f/Hbwyela5hd37xfz/W94gn0RNuCrPuyhyotvv33Avpi/WzwPfbOhlh9XV4tntnErcdYXq3eL9csxtTtHhyOS9MzHftlqcbMZ/F+Of+O+kdn/hO+W15u/5fF2Iflxem/S4zXs/tD9Nz3OXXc9zelNKo7Xs9sOnfXcz+/DH/0lI32GNDt7pMPhSIejkT6e6GTwOO/2+oORDvyZbk/C02f69A5VRRg41EV9MtTl2FN9hjd8capfGsVwdGhtD9I9k2h8yOcMdnte7IzBrqYc7Dw5ufVaOnCw8+J4RdXYc32GPD13rsPBXIdXn5sYDI/5nHlur2jEz/PJYybKwfNcnhzxRx/oM9Qx60D9+xvqcOZQ76L3ZDOdNbMkKY5zQ7pZlDePHyfN7jNCdmfd5aZTJifrc5/2M8y6RX98CMkPcxq8BtVu5zMc+fYBWS+OfInK2504ce58V1kxq/KDHeV4tSHMsmrsUT/jlMNLo240mpigYzjhrSZ4ecLLKSc8nASVh+djfWmjzLKRg0pzxgkX73ke2RxOceQuzlUlu0PlZHNdHIeJwYWy2CSTMj1e2f0b9tJxj9f5ObcY3p5p+LD/sq7iv1re3i6u1heX7ZcnT4zB0+q/6ElpzeNr1wHbrMpmWdnlm5WbdRRPJ0a7cwFNZOfrrB7b9dx4PUk+PaDcppY6maUF4rRP6/aF2gB1lcyahos6ssvCJ0UdQopmHdn14tOyLvJZGbisI7uQfFrWTY7eryO7wnzaZJaV6P06tkvPJ2W9fRg9mHVk16VPyjpLN1yzU9YBwzqyK9Zx7To78E6Tok6TyC5lx7VrEGo5M+d2DWItaebcrkGsZc2c2zWItayZc7sGsZY1c27XINayZs7tOk8wrGXNfNt1QpFmaSJp5tuuQajlzJzbNYd1kDRzbtcg1rJmzu0axFrWzLldg1jLmjm3axBrWTPndp1Qzl2n7RRGw9rodohW7ToUGGkWJM1c2zUJtZyZb7smsZY0823XJNayZr7tGsQ6lTXzbdck1rJmvu2axFrWzLddhxJz7jqNzZrR2nWKkWappJlvuwahjs2Z4do1iLWkmXO7BrGWNXNu1yDWsmbO7RrEWtbMuV1zWGeyZs7tOsWcu85is2awdl1hnFl7A81oSMPKNYi0jJlvtwahljHzrdYg1DJmvs0ahFrCzLdYg1DLl/n2ahBq6TLfWl1jzllnsmWurTrfurKe5wRNSz2XN3Nt2FDqMmi+bRuKXTrNt3lDscfm1mgtHIpdns23kUOxy7n5tnModvk336aeByR2uTjX1o7RM7lMnGtR54CWh/Pt5hjShdybbx3nkI7Nt9EaOIe0FJtv6eaQllXz7dkc0hJpvtWaQ1ruzPJ1unvP8QbzhpFCvswOdXnfp5PHDy52+TLDPTzpuf84iLWUmSHrvOf+4yDWsmaGrOue+49zWJfyZobNK+25/ziItcyZIeuy5/7jINZyZ4Y9O/Tcf7zBvIe7lD3zbdo55j0kpfTZeE0bhF0uzblpg1hLpjk3bRBrGTTnpg1iLYPm3LRBrGXQnJs2h3Ulg+bctAvMOe1KBs23aQeMQKsk0MZr2iDscmnOTRvEWjLNuWmDWMugOTdtEGsZNOemDWItg+bctEGsZdCcm3bAnNOuYjNok95VqeepXyVGoNWxCbQpUb/QtEnY5dJ8mzaJtWSab9MmsZZB823aJNYyaL5Nm8RaBs23aZNYy6D5Nu1QYc5p17EZNFrTzjACrY5NoJGbNgh7bC4N17Q5rJvYZBquaYNYy6A5N20Qaxk056YNYi2D5ty0Qaxl0JybdoY5p92+ukTDGta0a4w/a2LzZ+CiDaIem0mj9WwQ6thMGq1mg1DLnvm2bAzqLJE88y3ZINRyZ74dG4Ra6sy3YjeUc9lZEps5gzXsgvi4viyJzaGB2zZ0AmIza7TmDcUem2ajtXAodnk230YOxS7n5tvOodjl33ybOhN7kIvzbe0F8RncWZCX870aHeNqgrTceBejc6jHZuhoTR2EOjYtR2vnINTyb85XonNQy7k5X4jOQS3P5nwdOge13JrzZegc1PJpdqi7BTtLNlUsy2FaJZVNs4NePF+2oRMgsWa423d7N5S6JJthMcu6HRyKXZbNEHvV7eNQ7DJuhoUtdLs5FLvsmyH2otvTodhl4gw7e9Lt7FDssnKe/X17KjVLcNDl50br79AJkLZz7e9M6pm8nW9/h2KXrPPt71DssnW+/R2KXbbOt79DscvW+fZ3KHbZOtf+nhKlTSZXN15/Z06AtJ1vf2dSl7dz7u9M7JJ1zv0diT2XrXPu70zssnXO/Z2JXbbOub8zscvWefb3UBOlTS5XN1p/h06AtJ1rf4dSl7fz7e9Q7JJ1vv0dil22zre/Q7HL1vn2dyb2QrbOt79Dscdm66a8W3hPf8+J0qaIzdVNCf2l/s6cAGk73/7OpC5v59zfmdgl65z7OxO7bJ1zf2dil61z7u9M7LJ1zv2diT02Wwfr7wlR2pSxuTpyf2dOgLSdb39nUo/N2+H6OxO7ZJ1zf2dil61z7u9M7LJ1zv2diV22zrm/M7HHZutY/b08cTY5A3psrg7c36ETIG3n2t+Z1KvYvB2tv0OxxybraP0dil22zre/Q7HL1vn2dyh22Trf/g7FLlvnev86orOppOrGu30dcgBik3awq+eZ0GOTdrTyzqQem6ijdXck9VqizvnWdUjq8nTOd65DUpemc75xHZK6LJ0d9a+SWZkeQ89CAL5bopaks6Oev/Dgd+YEyNfZTUDd8+B3JnUZO8MWl95398f9PvweJkDKznACyp5nwDOxS9oZVrqk5xnwTOyydobY855nwCOxN9J2htibnmfAM7HL27l2+bQiGpxG3m60Lg+dADk81y4PpS6HN1qXh06AHJ5vl4dil8Tz7fJQ7DJ3vl0eil3mzrfLQ7HL3Pl2+Qx4F8M8kbgbr8szJ0AOz7fLM6nL4Y3X5ZkTIIfn3OWZ2CXxnLs8E7vMnXOXZ2KXuXPu8kzsMneuXT40QIOTJxJ3o3V56ATI4bl2eSb1IIc3WpeHToAcnm+Xh2KXxPPt8lDsMne+XR6KXebOt8tDscdm7qa8YXVfly+IBifEJu6mpP5Sl2dOgByeb5dnUpfDG6/LMydADs+5yyOxp5J4zl2eiV3mzrnLM7HL3Dl3eSb22MwdrcsHosFJYxN35C7PnIDYHB6tyzOpx+bwyF2eOQFyeM5dnoldEs+5yzOxy9w5d3kk9kzmzrnLM7HHZu5gXb4iCpwsNm8HrvLMAYjN4MGaPBN6bAIPXOSZAyB/59vjmdTl73xrPJO6nJ1vi2dSl7LzLfFM6jJ2vs+iI4qbXL5uvEfRIQcgNnMH6/BM6LGZO3CHZw6AzJ3zc+iQ1KXunB9Dh6QuX+f8FDokdfk654fQIamfYW62yD48ua2ulre3i6v1xWX75Ul3G4ZnN1ZRzJLDDXPyyljWzeyhGR1srazKZq1kPtxcWblZX9FdncPGkwD5nF2mfmGXmWXVMfm03uxFBWI/KaQ97FBXSfd2zyDUEhx2qEPouac/iLW8hiHrIu/e2BnEWgrDkHXTc8t+EGuJC8NklvXcpx/EWrrCkHXVc3N+EGtJCjvWWdpzR/46YFjrUiLfdp01j5ccTIxa1w/5tmsQajkz53bNYV1Kmjm3axBrWTPndg1iLWvm3K5BrGXNnNs1iLWsmXO7zhMMa1kz33adYKRZKWnm265BqOXMnNs1iLWkmXO7BrGWNXNu1xzWlayZc7sGsZY1c27XINayZs7tOsGcu65kzVzbdSgw0qySNHNt1yTUcma+7ZrEWtLMt12TWMua+bZrEmtZM992TWIta+bbrkGsa1kz33YdSsy56zo2a+Z7H/HPb9cpRprVsUmzKVH3tWsQajkz53YNYi1p5tyuQaxlzZzbNYi1rJlzuwaxljVzbtcg1rJmzu06xZy7rmOzZrB2XWGcWRObM4OVaxBpGTPfbg1CHZsxo1VrEGoZM99mDUItYeZbrEGo5ct8ezUItXSZb62uMeesm9hsGaxV51tX1nMT+Ympx+bNYA0bSl0GzbdtI7EXSWw6jda8odhjc2u0Fg7FLs/m28ih2OXcfNs5FLv8m29TzwMSu1yca2un6JkikYlzLeoc0LF5ONpV5hzSsbk3Wh3nkI7Nt9EaOIZ0kGLzLd0c0rJqvj2bQ1oizbdac0jLnVm+TnfvOd5Q3jBSBPkyO9TlfZ9OHj+42OXLDPfwpOf+4yDWUmaGrPOe+4+DWMuaGbKue+4/DmItb2bYvNKe+4+DWMucGbIue+4/zmGdyp0Z9uzQc//xhvIe7iKVPfNt2jnlPSRFS1aoR2jaIOxyac5NG8RaMs25aYNYy6A5N20Qaxk056YNYi2D5ty0Qaxl0JybdoE5p53KoPk27YARaJkE2nhNG4RdLs25aYNYS6Y5N20Qaxk056YNYi2D5ty0Qaxl0JybNoi1DJpz0w6Yc9qZDJpr0w4lRqBlEmijNW0Sdrk036YNYp1Lpvk2bRJrGTTfpk1iLYPm27RJrGXQfJs2ibUMmm/TDhXmnHYem0Gb9P7FPU07wwi0PDaBNiXql5o2CHtsLm3SPbyvaYNYS6Y5N20Qaxk056bNYV3IoDk3bRBrGTTnpg1iLYPm3LQzzDntIjaDBmvaNcafFbH5M3DRBlGXSfPt2SDUsZk0Ws0GoY7NntFaNgi15JlvyQahljvz7dgc1KXUmW/FbjDnssvYzBmsYRfIx/WVsTk0cNuGToDMmm/zhmKPTbPRWjgUe2yejdbIodjl3HzbORS7/JtvU4dil4vzbe0F8hncZWxeDtbgA8bVVLFpOXBrB1GXoXO+Fp2DOjYtR2vnINSx+TdaIwehlnNzvhCdg1qezfk6dA5quTXny9A5qOXT7FB3C3aWbKpYlsO0SiWbZge9eL5sQycgNrE26W7f7d1M6nVskm3SYpZ1OzgUe2yWbVLsVbePQ7HLuBkWttDt5lDssm+G2ItuT4dil4kz7OxJt7NDscvKefb37anULMFBl58brb9DJ0DazrW/Q6nL2/n2dyh2yTrf/s7E3sjW+fZ3KHbZOt/+DsUuW+fb36HYZetc+3tKlDaNXN14/Z05AdJ2vv2dSV3ezrm/M7FL1jn3dyZ22Trn/s7ELlvn3N+J2MtEts65vzOxy9Z59vdQA6VNmcjVjdbfoRMgbefa36HU5e18+zsUu2Sdb3+HYpet8+3vUOyydb79HYpdts63v0Oxy9a59vecKG2CXN14/Z05AdJ2vv2dSV3ezrm/M7FL1jn3dyZ22Trn/s7ELlvn3N+Z2GXrnPs7E3tstm7KO0j33SCeKG1CbK5uSugv9XfmBMSm7Sbd7fvuG0+knsrbOfd3JnbJOuf+zsQuW+fc35nYZeuc+zsTu2ydc39nYo/N1rH6e3nibHIG9NhcHbi/QydA2s61v0Opx+btaP0dij02WUfr70zsmWydb3+HYpet8+3vUOyydb79HYo9NlvH6u8p0dlksak6cH1nDoCkne/d65DQY5N2tPLOpB6bqKN1dyZ1iTrnW9chqcvTOd+5jkg9l6ZzvnEdkrosnR31r5LZw4UM7dMCQwC+WyKXpLOjnr/w4HfmBMTm66acgLrnwe9M6rEZu0lbXHrf3R/3+/B7mIDYlN2kE1D2PAOeiV3SzrDSJT3PgGdil7UzxJ73PAOeiV3azhB70/MMeCZ2eTvXLp9WRINTyNuN1uWhEyCH59rlodTl8Ebr8tAJkMPz7fJQ7JJ4vl0eil3mzrfLQ7HL3Pl2eSh2mTvfLp8R72JYSNyN1+WZEyCH59vlkdRLObzxujxzAuTwnLs8E7sknnOXZ2KXuXPu8kzsMnfOXZ6JXebOtcuHhmhwSom70bo8dALk8Fy7PJS6HN5oXR46AXJ4vl2eib2SxPPt8lDsMne+XR6KXebOt8tDscvc+Xb5gmhwKom78bo8cwLk8Hy7PJO6HN54XZ45AXJ4zl2eiV0Sz7nLM7HL3Dl3eST2WubOucszscdm7qa8fXVflw9Eg1PHJu6mpP5Sl2dOQGwOb8oJ6OvyTOpyeON1eeYEyOE5d3kmdkk85y7PxC5z59zlmdhl7py7PBN7bOYO1uUrosBpYvN24CrPHAAZPNcmz4Qem8ADF3nmAMTm72g9nkld/s63xjOpy9n5tngmdSk73xLPpB6bsYN1+IwobprYfB24wzMHQObO90l0QOhVEpu5A3d45gDEZu5oHZ5JXerO+TF0SOrydc5PoUNSl69zfggdkXp2hrnZIvvw5La6Wt7eLq7WF5ftlyfdbRie3VibJDQ7rT5lXT8uO9hEoShnIe1uo3Cw7Rz2jdispmfn2c/U0/tGmGXVMe36QH5MfBSMzWROSbpMn9UbIOoymXbUQ9KVGiDU8peuh/I8zNLiIOUwYk4iaTnaYZ05ASE2g0k7xEOxx+YtJ8XedA73mNf1thQKtMUR/vnzkhzosZlK2kGdQzo2Ozkl6e5hvOGAlmqzA108fxwHUZd2M9y9u8dxEGmpNs8DecFsYZJuox3UoRMgAed6gGdSTyXdPA/2AfOqnkqzjXaAB1GXc3M9qINIS7QZirbuWVNOP0sl2gyP5M+fKQVRl3Vz3b+h8VzWbbR9HToBsnF2E9B3S+2Ec4iXg7ND/dJ9tEHYJd7ssPfdPJuDOpNtcz2Yl8iX8EzqbbTjOnQCpOFcD/FQ6lJyrkd7zrVvmZTcaEd4EHUpOdejOoh0ZBJmXizqed5Huk4vs7I8l/Tz7xqvqrABeIS0rutZmSOQRiZbxkFah2zWnn/iIY1MpIyEtCi4SPPIhMlISJty1j6jiYc0MhsyDtImq8B7aWR6YySkddLZS7fXGzCQRuYupku8eT1rb804MdLIJMV0iZeDNDIDMV3i5SCN7Iqf6RIvB6nskVHi5SCVPTJKvEVCQSp7ZJR4E0ovLWSPjBIvB6nskVHi5SCVPTJKvBykskdGiZeDVPbIKPEGiuMtZI9sEm9VUnppIXtkk3hBSGWPbBIvCKnskU3iBSGVPbJJvBykpeyRTeKtKorjLWWPjBJvSuml7S8mpF+aeDlIZY+MEi8HqeyRUeLlIJU9Mkq8HKSyR0aJN6M43lL2aFDizU4Tb9lgSozs0aDE24lHIKSyR4MSb+e1lIO0kj0alHjr09dSEFLZo0GJtwHvpbJHNokXhFT2aEjiLTuJd3ssZpSYSvZoyGtp0r3ok4NU9mgI0rx7hSAHqezREKR1J/GCkMoeDYlHaSfxgpDKHg1BWnUSb5NQHG8le2SUeAvKtUe17JFR4uUglT0ySrwcpLJHRomXg1T2yCjxcpDKHtkk3rqkON5a9sgo8QZKL61lj4wSLwep7JFR4uUglT0ySrwcpLJHRokXg7SRPTJKvCnF8TayRzaJt6oovbSRPbJJvCCkskc2iReEVPbIJvGCkMoe2SReEFLZI5vEW9UUx9vIHhkl3gzTS2WPjBIvB6nskVHipSCtE9kjo8TLQSp7ZJR4OUhlj4wSbw5xvHUie2SUeBNIiakT2SOjxMtBKntklHg5SGWPjBIvB6nskVHi5SCVPTJKvBykskdDEm/RuR9vQ3lOTB1kj4YgbTr34wUhlT0aEo+yTokBIZU9GoK06tyPF4RU9mhIPAqdXgpCKns0BGnZvVcD5ZlrdZA9skm8NeU5MXWQPbJJvCCkskc2iReEVPbIJvGCkMoe2SReDtJU9sgm8daUZ67VqeyRUeKlPCembgkK6ZcmXg5S2SOjxMtBKntklHg5SGWPjBIvB6nskVHipTxzrU5lj2wSb1VjeqnskU3iBSGVPbJJvBykmeyRTeIFIZU9skm8IKSyRzaJt6I8c63OZI+MEm9O6aWZ7JFR4uUglT0ySrwcpLJHRomXg1T2yCjxcpDKHhkl3oLieDPZo0GJt3uvhkApMbns0aDE241HHKSyR4MSb/e1lINU9mhQ4u3eq4GDVPZoUOLt3quBg1T2yCjxcpDKHg2JR3kn8TaU58TUuezREKR19439HKSyR0PiUdp9FzgHqezREKRlJ/GCkMoeDYlHSSfxcpAWskdDkBbdezVQnrlWF7JHNom3pjwnpi5kj2wSLwip7JFN4gUhlT2ySbwgpLJHNokXhFT2yCbx1pRnrtWF7JFR4sU8J6aQPTJKvBykskdGiReDtJQ9Mkq8HKSyR0aJl4NU9sgo8WKeuVbKHtkk3qqh9NJS9sgm8YKQyh7ZJF4QUtkjm8QLQip7ZJN4QUhlj4wSL+aZa6XskVHiLSi9tJI9Mkq8HKSyR0aJl4NU9sgo8XKQyh4ZJV4OUtkjm8RblRTHW8keGSXelFJiKtkjo8TLQSp7ZJR4OUhlj4wSLwep7JFR4sUgrWWPjBIvB6ns0ZB4tH2CyHHibTAPFallj4YgrdLOW4Y5SGWPhsSjkIORyh4NQVoUnXeBc5DKHg1B2pTgvVT2aEjizRrwXip7ZJN4a8xDRWrZI5vEC0Iqe2STeDlIG9kjm8QLQip7ZJN4QUhlj2wSLwip7JFR4sU8VKSRPTJKvBykskdGiZeDVPbIKPFykMoeGSVeDlLZI6PEy0Eqe2SUeCkPFWkS2SOjxMtBKntklHg5SGWPjBIvB6nskVHi5SCVPTJKvBykskc2ibeqICWmSWSPbBIvCKnskU3iBSGVPbJJvCCkskc2iZeDNMge2SReEFLZo0GJN+0k3oxSYoLs0aDE241HHKSyR4MSb/e1lINU9mhQ4q06r6UcpLJHgxJvDd5LZY+MEi8HqezRkHiUdhJvQ3moSBNkj4YgLbtv7OcglT0a8lqadN8FjkGayh4NQZp3Ei8IqezREKR1J/GCkMoeDUm8afdeDRykskdGiZfyUJEmlT0ySrwcpLJHRomXg1T2yCjxcpDKHhklXg5S2SOjxMtBKntkk3hrykNFmkz2yCbxgpDKHtkkXhBS2SObxAtCKntkk3hBSGWPbBIvCKnskVHipTxUpMlkj4wSLwep7JFR4uUglT0ySrwcpLJHRokXgzSXPTJKvBykskc2ibeqKSUmlz2ySbwgpLJHNokXhFT2yCbxgpDKHtkkXhBS2SObxAtCGpk9+rG+Wlxd9SG9rIu8SPwSb44pMbJHRomXg1T2yCjxYpAWskdGiZeDVPbIKPFykMoeGSVeDtIz7NHidv56tVr+uqV6c3F3d311DLGz9UKVF99+293ei7DZ4tV2+fJ2fbA8uf/YLN9s0dVvP2xRtp/84/CTt59akLtPt1vzfuPsF3y3WF1vtsti1X7Tp+v1D7vBqMv95/crnGVJtf/8fp3tF9+vsf3kdG3zi7v3i/n+b95tpMX83eL5qdhsyOXH1dXiGQZVe2XJ+mL1brF+Ocx2J+1whpKeAdovWy1uNrvGL8e/ct9M7X/Cd8vrzR/zMMhl+2TU/RSX7TmfdhW7P3X/XY+T2VlRlR+vqGhOVrTbEp0V3Y/4w5/9JVN/hmBjTf3u0HA4+I/7wZNjfzjyh+P+sLJ25MPoM9++f+CMma8mnfnQzJqmKpL9R89tlJL04b+eHKbP3h+qfPMKURyvumhmWf2w5vaBFaPtIWf4StYecvyykJ/zsvDU/jHty0F7K0L8y8Efddc4w/uydo0hkempfSMc7RztqkaNS+fvH9O+dKTP7h+5zf5R9+wf5bT7xxnGdYL9IxzuH+GZl44Xc1U42DcC53WjfeM0f78oZyEcj2x9H0kfR/Zkpee/WhSzKs+bh4/jH5Jks5BNtl+UZzjOl/aLo+H5/J3EsBlYlxbDPaGsz32F2NeQqfaE/KQHl+nAQp2fNPOiGrlQl2fI3qln++zkYnlQ3j0OdLIBCycDlg0csIdvbCPG6aS6D9gZ6pkVuj8zVJxXRvPx83ZRpeceTXfNdbJckffkisomVzTP5op02lxhoPAxx17DuG6ZKKrzE8Wk2bo4OU6HZKijL+rjNTVjO/r2irC4xtrG0liOdnNubdyLl6lGO9u0t7p86hhcpvWsrIdKlE1RObmArRPF3cfdQLifOxaTxdHsOESGgbxC+sKK3GEZKOA/CqzT9TRjszrjmt7fO6uQJ7P0+BKTUNSzdCCyND1rde7kDETyH4xcz+rSbAJyZ1yh+3snZ3V8PL2ophyZVasKj1iVN+u9/thC211Bt1n6fx+X2+Xh8Z+bf73b///9t1weQW6/aruir3areb35gqz48Km7in/76+s/ffO3dkWbP+bydOWbZbtfqV18MlLrxaf1c8rndrm9nvAo7u8XXdxcv9teMXi1ma1tyv56e8Xf9dXFzev9f/j5ej7f/pjeCxpXu6sY9+H87qfF+ur9/pMTg1S82f7vlcl1hWGTWtt7Y7TeNylm7eUDB3tBlhUP+fZwTwhJ8vTUf9l1hVVfe5tiqv7n1ZvsVf327Td/12SdO1lF8XB16oNHKPJZ+1T4iSerryhNMVl/205W8/rNXz5ztJLTX3fYjz/4C3YL0rea8HMnPM+aWXl87MxCtRnl5tQvTDzsfUXz9zTs5cXPW8q3l3cfjoe/861ftjdkb5/+FXp+2z/28CfJ6fDXedgEB9rw9zX3KZNp8p/ffP/ff/mv128f9oHL1fNzpmNv39uFj9VtXm0OvN3Q2tvdPKetzzZMnVg1cV4PmmqyWfuAcI+h23y6Wm6xPnb1zRZ4/9flfLH9iv8H -------------------------------------------------------------------------------- /docs/images/sketches/taxonomy-steps.drawio: -------------------------------------------------------------------------------- 1 | 7Vtdd6JIEP01PmYOHwL62Gk7mT5HwRV19+zLHkY6yizaLmI0++un+VLBSqKjEDyJDwpFU0Dde+mqAhsqnm8fA2c563GX+Q1FcrcNtdNQFEVtS+InsrykFk0zEss08NzEJu8Ntvc/S43pjtO157JVbmDIuR96y7xxwhcLNglzNicI+CY/7In7+aMunSk7MtgTxz+2/um54SyxtjRpb//OvOksO7IspVvmTjY4Naxmjss3ByaVNFQccB4mS/MtZn4UvSwuyX4Pr2zdnVjAFuEpO4wo6m9wOPrnp0zvX0jwvA42d6mXZ8dfpxecnmz4kkUg4OuFyyInUkO938y8kNlLZxJt3QjQhW0Wzn2xJovF1B0LQrZ99Tzl3dUL3jA+Z2HwIoakO9wpGWlSzjQzLmz2AMhyFtbZQfSbemp0UtSnO+/7wIiFNDZnxEkB4qT74rD3T1xc1mHA9P/WPNtwt4oJjcSAprTc7jeKpWn0i8wGVhstuUttaotRHSK+eqPukA5HHRJZiCm+xrRDrDF9pF1kYoqyY4tLSQ6fODuCToAQ5vFZhQH/l2Hu80BYFnzBojP1fL9gcnxvuhCrEwEaE/b7CFJPyAKlG+ae60aHAQmRp8xVOGHkOaEqACekFsCJVlmUUOsoHSUfJq0FhAmKklKacpoXK0cFlZN5+ZEZOmRIMKaRntqqeSCRH8VdPpFsCnxoGvJpfDDKooNWI9W8JZJ2pSLRqxKJTR5HtEeJObQ+t0JqI4h2jQSRpe1Hs8hxkJRmpQLJUsDyFUL+Gg7Q4TyS5GQYCeuQDOJ0rWMPKUb251ZQRhatblOMDBU153FFkSGuFDOM42z9NOyrTwulPEa6BMx4kKDbpWF0eUF1DkZ9MrAtE90OQob64QhB9c01EMqlILciIeUEAWmVwnN5XXUyPHVXj3KCdqoFB8rnC0FazZxltPjksy2KGpUiFmzhpoudie+sVt4kH6vsgsTdMx6ctkv1b7qRGLIWpSoVcoeGoj7Fn13EmXvU+Hw33gcBheKZ2QLmO6H3nHcPBTk9Qp97MWOzm2EhozCk9jfZkHafgsMVXwcTlvrYA3fkVj7La+gEUxYeeY3psIvIBQxpfTHk9xkiGwCWenv3af0eQxT1LLclU0S5vOSB7/Dxbd20aQeJPaVdARScW8JUfptXtXcn4Z3Kc48FSnsqUFalga0uwUM6TjC6ETj0D4ejrKJiTLoWviUsDODxSMVYlFU+dOgAbvPXGxCgx1wxIFDB8MFR0gpVL9SJV6EolddoVKAHFKU0GnEX2fSBYnTca7R6fWswREkVZseT9EO8EeptINPqoW7Sl/zqSSY80vO8ahpARVhpT1Ix3lffV77/Wr7fBCpCSbq4ItSAkvANt2Xn+1BJeOaMCd55sGU+EntIrd1sWf9cvwiN1gYUDPVDy1Mw9KDuGvCQMcIjVPuMRtfrhkhWrV8dkX7yTpZJsVVXNIr60A3to9G4Qi0Mo0G6pL6956IsagDEFapgeB5BSZJ5kGXe3kRiAF2KivG5QmUMv0vaIyb6+3akUgMoLn+IJmsnFXvXcUvMB+uPUfzElOKBZeMkZ+i/NVHdejkY8FCUDjza6659rfci1WKnXwPu2lBBU1p7Rr287/BRVERfVLyEioXaVpcBKkJvo2rnU1Gs7v9vktSt+7/tqOQX -------------------------------------------------------------------------------- /docs/images/taxonomy_steps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/taxonomy_steps.pdf -------------------------------------------------------------------------------- /docs/images/ucf-examples/arson-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/arson-abnormal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/arson-normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/arson-normal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/explosion-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/explosion-abnormal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/explosion-normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/explosion-normal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/normal-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-1.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/normal-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-2.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/normal-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-3.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/normal-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-4.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/roadaccident-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/roadaccident-abnormal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/roadaccident-normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/roadaccident-normal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/stealing-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/stealing-abnormal.png -------------------------------------------------------------------------------- /docs/images/ucf-examples/stealing-normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/stealing-normal.png -------------------------------------------------------------------------------- /docs/images/ucsd-anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucsd-anomaly.png -------------------------------------------------------------------------------- /docs/images/umn-anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/umn-anomaly.png -------------------------------------------------------------------------------- /docs/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[a4paper,11pt,oneside]{memoir} 2 | \setcounter{secnumdepth}{2} 3 | \setcounter{tocdepth}{2} 4 | \usepackage{listings} 5 | \usepackage[utf8]{inputenc} 6 | \usepackage[spanish]{babel} 7 | \usepackage[T1]{fontenc} 8 | \usepackage{kpfonts} 9 | \usepackage{amsmath} 10 | \usepackage[backend=biber, sorting=none]{biblatex} 11 | \addbibresource{bibliography/bibliography.bib} 12 | 13 | \usepackage{variables} 14 | 15 | \usepackage{chapterheader} 16 | 17 | \usepackage{subfiles} 18 | 19 | \usepackage{subcaption} 20 | 21 | \decimalpoint 22 | 23 | \usepackage{dcolumn} 24 | \newcolumntype{.}{D{.}{\esperiod}{-1}} \makeatletter 25 | \addto\shorthandsspanish{\let\esperiod\es@period@code} \makeatother 26 | 27 | \usepackage{algorithm} 28 | \usepackage[noend]{algpseudocode} 29 | 30 | \RequirePackage{verbatim} 31 | % \RequirePackage[Glenn]{fncychap} 32 | \usepackage{fancyhdr} 33 | \usepackage{graphicx} 34 | \usepackage{afterpage} 35 | \usepackage{float} 36 | \usepackage{tikz} 37 | \usetikzlibrary{matrix,chains,positioning, 38 | decorations.pathreplacing,arrows,babel,fit, 39 | arrows.meta,shapes} 40 | \usepackage{pgfplots} 41 | \def\layersep{2.2cm} 42 | \newcommand{\empt}[2]{$#1^{\langle #2 \rangle}$} 43 | 44 | \newtheorem{theorem}{Teorema} 45 | \usepackage{longtable} 46 | \usepackage{xcolor} 47 | 48 | \usepackage[pdfborder={000}]{hyperref} %referencia 49 | 50 | % TABLES 51 | \usepackage{booktabs} 52 | \usepackage{ctable} 53 | \usepackage{multirow} 54 | \usepackage{makecell} 55 | \addto\captionsspanish{\renewcommand{\tablename}{Tabla}} 56 | 57 | \hypersetup{ 58 | pdfauthor = {\AuthorName (fluque1995@correo.ugr.es)}, 59 | pdftitle = {\ProjectTitle}, 60 | pdfsubject = {}, 61 | pdfkeywords = {palabra_clave1, palabra_clave2, palabra_clave3, ...}, 62 | pdfproducer = {pdflatex}, 63 | colorlinks = True, 64 | linkcolor = darkgray, 65 | citecolor = blue 66 | } 67 | 68 | % \hyphenation{} 69 | 70 | % \usepackage{doxygen/doxygen} \usepackage{pdfpages} 71 | \usepackage{url} 72 | \usepackage{colortbl,longtable} 73 | % \usepackage[stable]{Footmisc} 74 | % \usepackage{index} 75 | 76 | % \makeindex \usepackage[style=long, 77 | % cols=2,border=plain,toc=true,number=none]{glossary} \makeglossary 78 | 79 | % Definición de comandos que me son tiles: 80 | % \renewcommand{\indexname}{Índice alfabético} 81 | % \renewcommand{\glossaryname}{Glosario} 82 | 83 | \pagestyle{fancy} \fancyhf{} \fancyhead[LO]{\leftmark} 84 | \fancyhead[RE]{\rightmark} \fancyhead[RO,LE]{\textbf{\thepage}} 85 | \renewcommand{\chaptermark}[1]{\markboth{\textbf{#1}}{}} 86 | \renewcommand{\sectionmark}[1]{\markright{\textbf{\thesection. #1}}} 87 | 88 | \setlength{\headheight}{1.5\headheight} 89 | 90 | \newcommand{\HRule}{\rule{\linewidth}{0.5mm}} 91 | 92 | \newcommand{\bigrule}{\titlerule[0.5mm]} 93 | 94 | % Para conseguir que en las páginas en blanco no ponga cabecerass 95 | \makeatletter 96 | \def\clearpage{% 97 | \ifvmode \ifnum \@dbltopnum =\m@ne \ifdim \pagetotal <\topskip 98 | \hbox{} \fi \fi \fi 99 | \newpage 100 | \thispagestyle{empty} \write\m@ne{} \vbox{} \penalty -\@Mi } 101 | \makeatother 102 | 103 | \begin{document} 104 | 105 | \subfile{prefaces/cover} 106 | 107 | \subfile{prefaces/spanish_abstract} 108 | 109 | \subfile{prefaces/english_abstract} 110 | 111 | \subfile{prefaces/licensing} 112 | 113 | \tableofcontents 114 | 115 | \subfile{chapters/01_introduction} 116 | 117 | \subfile{chapters/02_taxonomy} 118 | 119 | \subfile{chapters/03_problem_description} 120 | 121 | \subfile{chapters/04_experimentation} 122 | 123 | \subfile{chapters/05_conclusions} 124 | 125 | \nocite{*} 126 | \printbibliography 127 | 128 | \appendix 129 | 130 | %\subfile{appendices/ECG_explanation} 131 | 132 | \end{document} 133 | -------------------------------------------------------------------------------- /docs/memoria_TFM_Luque_Sanchez_Francisco.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/memoria_TFM_Luque_Sanchez_Francisco.pdf -------------------------------------------------------------------------------- /docs/prefaces/cover.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{memoir} 2 | 3 | \begin{document} 4 | \begin{titlingpage} 5 | 6 | \newlength{\centeroffset} 7 | \setlength{\centeroffset}{-0.5\oddsidemargin} 8 | \addtolength{\centeroffset}{0.5\evensidemargin} 9 | \thispagestyle{empty} 10 | 11 | \noindent\hspace*{\centeroffset} 12 | \begin{minipage}{\textwidth} 13 | \vspace{-2cm} 14 | \centering 15 | \includegraphics[width=0.5\textwidth]{images/logo_ugr.jpg} 16 | 17 | \textsc{ \Large TRABAJO FIN DE MÁSTER\\[0.2cm]} 18 | \textsc{ \Degree }\\[0.5cm] 19 | 20 | {\Large\bfseries 21 | \ProjectTitle\\ 22 | \noindent\rule[-1ex]{\textwidth}{1pt}\\[2ex] 23 | \ProjectTitleEng\\ 24 | } 25 | \noindent\rule[-1ex]{\textwidth}{3pt}\\[2ex] 26 | \end{minipage} 27 | 28 | \vspace{1.5cm} 29 | \noindent\hspace*{\centeroffset}\begin{minipage}{\textwidth} 30 | \centering 31 | 32 | \textbf{Autor}\\ {\AuthorName}\\[2ex] 33 | \textbf{Directores}\\ 34 | {\MainProf\\ 35 | \SecondProf}\\[1cm] 36 | \includegraphics[height=1.5cm]{images/etsiit_logo.png}\\[1cm] 37 | \textsc{\FacultyOne}\\ 38 | \textsc{---}\\ 39 | \Location, \Time 40 | \end{minipage} 41 | % \addtolength{\textwidth}{\centeroffset} 42 | % \vspace{\stretch{2}} 43 | 44 | \end{titlingpage} 45 | \end{document} 46 | 47 | %%% Local Variables: 48 | %%% mode: latex 49 | %%% TeX-master: "../main" 50 | %%% End: 51 | -------------------------------------------------------------------------------- /docs/prefaces/english_abstract.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{memoir} 2 | 3 | \begin{document} 4 | \thispagestyle{empty} 5 | 6 | \begin{center} 7 | {\large\bfseries \ProjectTitleEng}\\ 8 | \end{center} 9 | 10 | \begin{center} 11 | \AuthorName\\ 12 | \vspace{0.7cm} 13 | \noindent{\textbf{Keywords}:}\\ 14 | Deep learning, crowd analysis, anomaly detection, video-surveillance\\ 15 | \vspace{0.7cm} 16 | \noindent{\textbf{Abstract}}\\ 17 | \end{center} 18 | 19 | Last decades have experimented and unprecedented growth in population 20 | all around the globe. Crime and terrorism rates are also increasing 21 | rapidly. This two issues have converted video-surveillance into a 22 | fundamental tool worldwide. The number of security cameras installed 23 | both at public and private environments is huge nowadays, and thus 24 | processing the information retrieved by those cameras is getting 25 | harder everyday. Then, there is a need of automation of that process, 26 | using intelligent models capable of extracting information from video 27 | sources.\\ 28 | 29 | Advances in deep learning produced in the last years have improved the 30 | results obtained in this research area by a large margin. Current 31 | models are able to extract complex information automatically, and to 32 | work in more difficult environments, specially in terms of density of 33 | individuals. Despite this recent advances, the area is still 34 | relatively modern, and consequently is not properly structured. Due to 35 | this fact, comparing works that tackle different sub-tasks within this 36 | area is usually difficult.\\ 37 | 38 | In the previous context, this work tries to solve different tasks 39 | related to the automatic treatment of video-surveillance sources. In 40 | particular, we will focus our efforts in the analysis of crowd 41 | behaviors from video-surveillance sources. The work is divided in two 42 | parts. In the first part, which is the theoretical study, a sequential 43 | taxonomy is proposed. This taxonomy following a sequence allows to 44 | organize the different sub-tasks inside the topic as stages of a 45 | pipeline. The results of the latter stages of the pipeline strongly 46 | rely on the previous ones, and thus its results are heavily influenced 47 | by the first models.\\ 48 | 49 | Apart from the proposed taxonomy, an in-depth review of the 50 | state-of-the-art is conducted. Particularly, we center our study in 51 | the works that use deep learning models to solve the crowd anomaly 52 | detection problem. Inside this topic, we analyze the main public 53 | datasets and proposals, organizing them depending on the specific type 54 | of anomaly to be detected.\\ 55 | 56 | Finally, in the experimental part of this work, the use of 57 | spatio-temporal features for action anomaly detection is studied. To 58 | this end, one of the retrieved works is deeply analyzed and set as 59 | baseline for comparison. After the complete study, a new model for 60 | action anomaly detection is proposed. In particular, the original 61 | model employs a 3D convolutional feature extractor, which processes 62 | batches of consecutive frames. In our approach, the feature extractor 63 | proposed is a combination of 2D CNNs for spatial feature extraction, 64 | processing frames independently, together with a recurrent neural 65 | network, which learns to process the sequence of features from 66 | consecutive frames in the video. Our hypothesis defends that the 67 | combination of convolutions and recurrent networks better preserves 68 | the semantic structure of information in the video, and thus the 69 | obtained model extracts more meaningful information.\\ 70 | 71 | Our experimentation suggests that our model outperforms the 72 | classification capability of the original model, despite being 73 | pretrained on a much smaller dataset. In particular, the original 74 | feature extractor is trained on a set 1000 times bigger than ours. 75 | This fact allows us to conclude that our proposal is better than 76 | the original one in terms of classification capability, validating 77 | our hypothesis.\\ 78 | 79 | The code developed for experimentation, together with the instructions 80 | to replicate the experiments, can be found in the repository 81 | \url{https://github.com/fluque1995/tfm-anomaly-detection}. 82 | 83 | \newpage 84 | \end{document} 85 | 86 | %%% Local Variables: 87 | %%% mode: latex 88 | %%% TeX-master: "../main" 89 | %%% End: 90 | -------------------------------------------------------------------------------- /docs/prefaces/licensing.tex: -------------------------------------------------------------------------------- 1 | \documentclass['../proyecto.tex']{memoir} 2 | 3 | \begin{document} 4 | 5 | \thispagestyle{empty} 6 | 7 | \noindent\rule[-1ex]{\textwidth}{2pt}\\[4.5ex] 8 | 9 | Yo, \textbf{\AuthorName}, alumno del Máster Universitario Oficial en 10 | Ciencia de Datos e Ingeniería de Computadores de la 11 | \textbf{\FacultyOne de la \University}, con DNI 31008316S, autorizo la 12 | ubicación de la siguiente copia de mi Trabajo Fin de Máster en la 13 | biblioteca del centro para que pueda ser consultada por las personas 14 | que lo deseen. 15 | 16 | \vspace{6cm} 17 | 18 | \includegraphics[width=3.5cm]{images/yo_firma} 19 | \\~\\ 20 | \noindent Fdo: Francisco Luque Sánchez 21 | 22 | \vspace{2cm} 23 | 24 | \begin{flushright} 25 | \Location, \Time 26 | \end{flushright} 27 | 28 | \newpage 29 | 30 | \thispagestyle{empty} 31 | 32 | \noindent\rule[-1ex]{\textwidth}{2pt}\\[4.5ex] 33 | 34 | D. \textbf{\MainProf} y D.ª \textbf{\SecondProf}, profesores del 35 | \Department de la \University. 36 | 37 | \vspace{0.5cm} 38 | 39 | \textbf{Informan:} 40 | 41 | \vspace{0.5cm} 42 | 43 | Que el presente trabajo, titulado \textit{\textbf{\ProjectTitle}}, ha 44 | sido realizado bajo su supervisión por \textbf{\AuthorName}, y 45 | autorizamos la defensa de dicho trabajo ante el tribunal que 46 | corresponda. 47 | 48 | \vspace{0.5cm} 49 | 50 | Y para que conste, expiden y firman el presente informe en \Location a \Time 51 | 52 | \vspace{1cm} 53 | 54 | \textbf{Los directores:} 55 | 56 | \vspace{5cm} 57 | 58 | \begin{minipage}{0.45\linewidth} 59 | \begin{center} 60 | \includegraphics[width=5cm]{images/paco_firma} 61 | \textbf{\MainProf} 62 | \end{center} 63 | \end{minipage} 64 | \begin{minipage}{0.45\linewidth} 65 | \begin{center} 66 | \includegraphics[width=5cm]{images/siham_firma} 67 | \textbf{\SecondProf} 68 | \end{center} 69 | \end{minipage} 70 | 71 | \newpage 72 | \end{document} 73 | -------------------------------------------------------------------------------- /docs/prefaces/spanish_abstract.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{memoir} 2 | \begin{document} 3 | \thispagestyle{empty} 4 | 5 | \begin{center} 6 | {\large\bfseries \ProjectTitle}\\ 7 | \end{center} 8 | 9 | \begin{center} 10 | \AuthorName\\ 11 | \vspace{0.7cm} 12 | \noindent{\textbf{Palabras clave}:}\\ 13 | Aprendizaje profundo, análisis de multitudes, detección de 14 | anomalías, videovigilancia\\ 15 | \vspace{0.7cm} 16 | \noindent{\textbf{Resumen}}\\ 17 | 18 | \end{center} 19 | 20 | En las últimas décadas se ha producido un crecimiento poblacional sin 21 | precedentes en todas las partes del mundo, y las tasas de criminalidad 22 | y terrorismo se han disparado en muchos territorios. Esto ha provocado 23 | que la videovigilancia se convierta en una herramienta prioritaria a 24 | nivel mundial. El número de cámaras de seguridad instaladas tanto en 25 | ámbito público como privado ha crecido significativamente, y con ello 26 | la dificultad de gestionar la información recogida por las mismas de 27 | forma manual. Aparece, por tanto, la necesidad de automatizar este 28 | proceso, utilizando para ello modelos inteligentes capaces de extraer 29 | información de las secuencias de vídeo recogidas por las cámaras.\\ 30 | 31 | Los avances en aprendizaje profundo de los últimos años han permitido 32 | que los resultados obtenidos sobre esta área de investigación mejoren 33 | considerablemente. Los modelos actuales son capaces de extraer 34 | información más compleja, y trabajar en entornos de mayor dificultad, 35 | especialmente en cuanto a densidad de individuos se refiere. A pesar 36 | de ello, el estudio de esta tarea es relativamente reciente, por lo 37 | que no está correctamente estructurada, y resulta difícil comparar los 38 | trabajos propuestos.\\ 39 | 40 | Dado el contexto anterior, este trabajo trata de resolver varias 41 | tareas relacionadas con el tratamiento automático de fuentes de 42 | videovigilancia, en particular con el análisis de comportamientos de 43 | multitudes. Por un lado, se realiza un estudio teórico de la temática, 44 | con una propuesta taxonómica que permite agrupar los distintos 45 | trabajos siguiendo una secuencia de tareas. Esta organización sitúa 46 | las distintas subtareas consideradas dentro de la temática en 47 | distintos pasos de la secuencia, de forma que los resultados de los 48 | pasos posteriores se ven fuertemente influenciados por los obtenidos 49 | en los pasos previos.\\ 50 | 51 | Además de la propuesta taxonómica, en el estudio teórico se hace una 52 | revisión exhaustiva de la literatura que utiliza modelos de 53 | aprendizaje profundo para resolver el problema de la detección de 54 | anomalías en multitudes. Para esta subtarea, se analizan los 55 | principales conjuntos de datos disponibles públicamente, y se estudian 56 | los trabajos del estado del arte, agrupando los mismos por el tipo 57 | concreto de anomalía que tratan de identificar.\\ 58 | 59 | En el apartado práctico del trabajo, se estudia el uso de 60 | características espacio-temporales para la detección de acciones 61 | anómalas en vídeo. Para llevar a cabo dicho estudio, se establece como 62 | punto de partida un modelo de detección de anomalías basado en un 63 | extractor de características convolucional en tres 64 | dimensiones. Nuestra propuesta, en lugar de utilizar un extractor 65 | exclusivamente convolucional, aprovecha la potencia de las redes 66 | neuronales convolucionales 2D para el análisis de los fotogramas por 67 | separado, extrayendo información espacial, y la capacidad de las redes 68 | neuronales recurrentes para extraer información temporal de la 69 | secuencia de características de los fotogramas consecutivos. Dicho 70 | extractor de características es más complejo que la propuesta 71 | original, y conserva mejor la estructura temporal del vídeo, lo cual 72 | permite la extracción de información de mayor calidad. El código 73 | desarrollado para el experimentación se encuentra disponible en el 74 | repositorio \url{https://github.com/fluque1995/tfm-anomaly-detection}.\\ 75 | 76 | Los resultados obtenidos del estudio sugieren que nuestro modelo tiene 77 | mejor capacidad de clasificación que el modelo original, incluso a 78 | pesar de estar entrenado en un conjunto de datos de tamaño 1000 veces 79 | menor que el extractor de características de partida. Este hecho 80 | nos permite concluir que nuestra propuesta es de mayor calidad que el 81 | modelo de partida, validando nuestra hipótesis inicial. 82 | 83 | \newpage 84 | \end{document} 85 | 86 | %%% Local Variables: 87 | %%% mode: latex 88 | %%% TeX-master: "../main" 89 | %%% End: 90 | -------------------------------------------------------------------------------- /docs/slides/images/gifs/Assault049_x264.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/gifs/Assault049_x264.gif -------------------------------------------------------------------------------- /docs/slides/images/gifs/Stealing019_x264.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/gifs/Stealing019_x264.gif -------------------------------------------------------------------------------- /docs/slides/images/original-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/original-model.pdf -------------------------------------------------------------------------------- /docs/slides/images/proposal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/proposal.pdf -------------------------------------------------------------------------------- /docs/slides/images/taxonomy-steps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/taxonomy-steps.pdf -------------------------------------------------------------------------------- /docs/slides/images/ucf/arson-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/arson-abnormal.png -------------------------------------------------------------------------------- /docs/slides/images/ucf/normal-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/normal-1.png -------------------------------------------------------------------------------- /docs/slides/images/ucf/roadaccident-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/roadaccident-abnormal.png -------------------------------------------------------------------------------- /docs/slides/images/ucf/stealing-abnormal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/stealing-abnormal.png -------------------------------------------------------------------------------- /docs/slides/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/slides.pdf -------------------------------------------------------------------------------- /docs/slides/slides.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt]{beamer} 2 | 3 | \usetheme[progressbar=frametitle]{metropolis} 4 | 5 | \usepackage[utf8]{inputenc} 6 | \usepackage[spanish]{babel} 7 | \usepackage{booktabs} 8 | \usepackage{graphicx} 9 | \usepackage{subcaption} 10 | \usepackage{animate} 11 | \metroset{block=fill} 12 | 13 | \newcommand\blfootnote[1]{% 14 | \begingroup 15 | \renewcommand\thefootnote{}\footnote{#1}% 16 | \addtocounter{footnote}{-1}% 17 | \endgroup 18 | } 19 | 20 | \title{Aprendizaje profundo para el análisis de comportamientos en 21 | videovigilancia} 22 | \subtitle{Trabajo de Fin de Máster} 23 | \date{\today} 24 | \author{Francisco Luque Sánchez} 25 | \institute{Universidad de Granada - Máster en Ciencia de Datos 26 | e Ingeniería de Computadores} 27 | 28 | \begin{document} 29 | 30 | \maketitle 31 | 32 | \begin{frame}{Índice} 33 | \setbeamertemplate{section in toc}[sections numbered] 34 | \tableofcontents[hideallsubsections] 35 | \end{frame} 36 | 37 | \section{Descripción del problema} 38 | 39 | \begin{frame}{Descripción del problema} 40 | \begin{block}{Detección de comportamientos anómalos en multitudes} 41 | \begin{itemize} 42 | \item Estudio de secuencias de vídeo. 43 | \item Extraídas de cámaras de videovigilancia. 44 | \item Análisis de comportamientos. 45 | \item Idenficación de comportamientos extraños. 46 | \item Problema complejo y de gran variabilidad. 47 | \end{itemize} 48 | \end{block} 49 | \end{frame} 50 | 51 | \section{Revisión bibliográfica y taxonomía} 52 | 53 | \begin{frame}{Revisión de la literatura} 54 | \begin{block}{Detección de comportamientos anómalos} 55 | \begin{itemize} 56 | \item Categorización de las anomalías por tipos (acciones, 57 | movimientos, apariencia). 58 | \item Conjuntos de datos públicos. 59 | \item Modelos basados en aprendizaje profundo. 60 | \end{itemize} 61 | \end{block} 62 | \end{frame} 63 | 64 | \begin{frame}{Propuesta taxonómica} 65 | \begin{figure} 66 | \centering 67 | \includegraphics[width=.7\textwidth]{images/taxonomy-steps.pdf} 68 | \caption{Taxonomía secuencial para el análisis de multitudes} 69 | \end{figure} 70 | 71 | \blfootnote{\textbf{Luque-Sánchez, F.}, Hupont, I., Tabik, S., \& Herrera, 72 | F. (2020). Revisiting crowd behaviour analysis through deep 73 | learning: Taxonomy, anomaly detection, crowd emotions, datasets, 74 | opportunities and prospects. \textit{Information Fusion}} 75 | \end{frame} 76 | 77 | \section{Detección de anomalías en videovigilancia} 78 | 79 | \begin{frame}{Hipótesis de partida} 80 | \begin{block}{Hipótesis} 81 | Los extractores de características espacio-temporales (CNN+LSTM) 82 | obtienen características de calidad para detectar comportamientos 83 | anómalos. 84 | \end{block} 85 | \end{frame} 86 | 87 | \begin{frame}{UCF-Crime Dataset} 88 | \begin{block}{Características} 89 | \begin{itemize} 90 | \item 1900 vídeos 91 | \item 128 horas en total 92 | \item 13 clases de comportamientos anómalos 93 | \item Etiquetado débil 94 | \end{itemize} 95 | \end{block} 96 | \end{frame} 97 | 98 | \begin{frame}{UCF-Crime Dataset} 99 | \begin{figure}[hbtp] 100 | \centering 101 | \begin{subfigure}{0.35\textwidth} 102 | \centering 103 | \includegraphics[width=\linewidth]{images/ucf/normal-1} 104 | \caption{Vídeo normal} 105 | \end{subfigure} 106 | \begin{subfigure}{0.35\textwidth} 107 | \centering 108 | \includegraphics[width=\linewidth]{images/ucf/arson-abnormal} 109 | \caption{Fuego provocado} 110 | \end{subfigure} 111 | \begin{subfigure}{0.35\textwidth} 112 | \centering 113 | \includegraphics[width=\linewidth]{images/ucf/stealing-abnormal} 114 | \caption{Robo} 115 | \end{subfigure} 116 | \begin{subfigure}{0.35\textwidth} 117 | \centering 118 | \includegraphics[width=\linewidth]{images/ucf/roadaccident-abnormal} 119 | \caption{Accidente} 120 | \end{subfigure} 121 | \end{figure} 122 | \end{frame} 123 | 124 | \begin{frame}{Modelo original} 125 | \begin{figure} 126 | \centering 127 | \includegraphics[width=.9\textwidth]{images/original-model.pdf} 128 | \caption{Modelo original} 129 | \end{figure} 130 | 131 | \blfootnote{Sultani, W., Chen, C., \& Shah, M. (2018). Real-world 132 | anomaly detection in surveillance videos. In Proceedings of the 133 | IEEE Conference on Computer Vision and Pattern Recognition 134 | (pp. 6479-6488).} 135 | \end{frame} 136 | 137 | \begin{frame}{Entrenamiento multiinstancia} 138 | \begin{block}{Entrenamiento del modelo} 139 | \begin{itemize} 140 | \item Una etiqueta por vídeo. 141 | \item Vídeos anómalos compuestos mayoritariamente por fotogramas normales. 142 | \item Predicción a nivel de fotograma. 143 | \end{itemize} 144 | \end{block} 145 | 146 | Solución: Función de pérdida multiinstancia 147 | \begin{align*} 148 | l(\mathcal{B}_a, \mathcal{B}_n) 149 | &= \max{(0, 1 - \max_{i \in \mathcal{B}_a}{f(\mathcal{V}^i_a)} + 150 | \max_{i \in \mathcal{B}_n}{f(\mathcal{V}^i_n)})}\\ 151 | & + \lambda_1 152 | \sum_{i=0}^{n-1} (f(\mathcal{V}_a^i) - f(\mathcal{V}_a^{i+1})) + 153 | \lambda_2 \sum_{i=0}^{n} f(\mathcal{V}_a^i). 154 | \end{align*} 155 | 156 | \end{frame} 157 | 158 | \begin{frame}{Propuesta: Xception-LSTM} 159 | \begin{figure} 160 | \centering 161 | \includegraphics[width=.8\textwidth]{images/proposal.pdf} 162 | \caption{Arquitectura propuesta} 163 | \end{figure} 164 | \end{frame} 165 | 166 | \begin{frame}{Propuesta: Xception-LSTM} 167 | \begin{block}{Entrenamiento del modelo} 168 | \begin{itemize} 169 | \item Preentrenamiento del extractor: Clasificación sobre UCF-101 170 | (13000 vídeos, 101 clases). 171 | \item Congelación del extractor y extracción de características. 172 | \item Entrenamiento del clasificador. 173 | \end{itemize} 174 | \end{block} 175 | \end{frame} 176 | 177 | \section{Resultados} 178 | 179 | \begin{frame}{Resultados a nivel de fotograma} 180 | \begin{table}[H] 181 | \centering 182 | \resizebox{\textwidth}{!}{ 183 | \begin{tabular}{lccccc} 184 | \toprule 185 | Modelo & Exactitud & AUC & $F_1$ & EER & AP \\ 186 | \midrule 187 | Original - preentrenado & 0.8428 & 0.7508 & 0.2838 & 0.3119 & \textbf{0.2057} \\ 188 | Original - replicado & 0.7411 & 0.7369 & 0.2201 & 0.3253 & 0.2014 \\ 189 | Xception-LSTM - 1024 & 0.8236 & 0.7504 & \textbf{0.2907} & 0.3221 & 0.1823 \\ 190 | Xception-LSTM - 768 & \textbf{0.8455} & \textbf{0.7674} & 0.2681 & \textbf{0.2980} & 0.1770 \\ 191 | Xception-LSTM - 512 & 0.8436 & 0.7177 & 0.2140 & 0.3388 & 0.1451 \\ 192 | \bottomrule 193 | \end{tabular} 194 | } 195 | \caption{Tabla de resultados obtenidos por los modelos.} 196 | \label{tab:confusion-matrices} 197 | \end{table} 198 | \end{frame} 199 | 200 | \begin{frame}{Resultados a nivel de vídeo} 201 | \begin{table}[H] 202 | \centering 203 | \begin{tabular}{lcc} 204 | \toprule 205 | Modelo & \% Vídeos normales & \% Vídeos anómalos \\ 206 | \midrule 207 | Original & 13.33 & 64.89 \\ 208 | Réplica & 11.11 & 74.05 \\ 209 | Xception-LSTM - 1024 & 15.55 & \textbf{77.86} \\ 210 | Xception-LSTM - 768 & 12.59 & 72.52 \\ 211 | Xception-LSTM - 512 & \textbf{8.15} & 71.76 \\ 212 | \bottomrule 213 | \end{tabular} 214 | \caption{Porcentaje de vídeos normales y anómalos en los que se ha 215 | generado una alarma.} 216 | \end{table} 217 | \end{frame} 218 | 219 | \begin{frame}{Ejemplo - Asalto} 220 | \begin{figure} 221 | \centering 222 | \animategraphics[loop,controls,width=.7\linewidth]{10}{images/gifs/assault-}{0}{132} 223 | \end{figure} 224 | \end{frame} 225 | 226 | \begin{frame}{Ejemplo - Robo} 227 | \begin{figure} 228 | \centering 229 | \animategraphics[loop,controls,width=.7\linewidth]{50}{images/gifs/stealing-}{000}{491} 230 | \end{figure} 231 | \end{frame} 232 | 233 | \section{Conclusiones y trabajo futuro} 234 | 235 | \begin{frame}{Conclusiones y trabajo futuro} 236 | \begin{block}{Conclusiones} 237 | \begin{itemize} 238 | \item La detección de anomalías en multitudes es un problema 239 | complejo, debido a la diversidad de contextos a los que puede 240 | aplicarse. 241 | \item Los modelos espacio-temporales extraen características 242 | relevantes para la resolución de este problema, mejores que los 243 | modelos completamente convolucionales. 244 | \item Existe un margen de mejora amplio para este conjunto de datos. 245 | \end{itemize} 246 | \end{block} 247 | \begin{block}{Trabajo futuro} 248 | \begin{itemize} 249 | \item Despliegue del sistema en una aplicación real. 250 | \item Estudio de modelos ConvLSTM. 251 | \item Mejora de la política de entrenamiento del modelo. 252 | \end{itemize} 253 | \end{block} 254 | \end{frame} 255 | 256 | \begin{frame}[standout] 257 | \LARGE{Muchas gracias}\\ 258 | \LARGE{¿Preguntas?} 259 | \end{frame} 260 | 261 | \end{document} 262 | -------------------------------------------------------------------------------- /docs/variables.sty: -------------------------------------------------------------------------------- 1 | \ProvidesPackage{macros}[2017/08/18 Macros declaration] 2 | 3 | % ******************************************************************** 4 | % Re-usable information 5 | % ******************************************************************** 6 | \newcommand{\ProjectTitle}{Aprendizaje profundo para el análisis de 7 | comportamientos en videovigilancia \xspace} 8 | 9 | \newcommand{\ProjectTitleEng}{Deep Learning for crowd behavior 10 | analysis in videosurveillance \xspace} 11 | 12 | \newcommand{\Degree}{Máster en Ciencia de Datos e Ingeniería de 13 | Computadores (DATCOM)} 14 | 15 | \newcommand{\AuthorName}{Francisco Luque Sánchez\xspace} 16 | 17 | \newcommand{\MainProf}{Francisco Herrera Triguero\xspace} 18 | 19 | \newcommand{\SecondProf}{Siham Tabik\xspace} 20 | % \newcommand{\mySupervisor}{Put name here\xspace} 21 | 22 | \newcommand{\FacultyOne}{Escuela Técnica Superior de Ingenierías 23 | Informática y de Telecomunicación\xspace} 24 | 25 | \newcommand{\FacultyShort}{ETSIIT\xspace} 26 | 27 | \newcommand{\Department}{Departamento de Ciencias de la Computación 28 | e Inteligencia Artificial\xspace} 29 | 30 | \newcommand{\University}{\protect{Universidad de Granada}\xspace} 31 | 32 | \newcommand{\Location}{Granada\xspace} 33 | 34 | \newcommand{\Time}{\today\xspace} 35 | 36 | \newcommand{\Version}{Version 0.1\xspace} 37 | -------------------------------------------------------------------------------- /original_model/README.md: -------------------------------------------------------------------------------- 1 | # Original experiments replication 2 | 3 | In this folder you can find the code to replicate the original 4 | experimentation from the article "Real-World Anomaly Detection in 5 | Surveillance Videos". The provided code strongly relies on the 6 | following sources: 7 | 8 | - https://github.com/WaqasSultani/AnomalyDetectionCVPR2018: Original 9 | implementation of the model 10 | - https://github.com/ptirupat/AnomalyDetection_CVPR18: Reimplementation 11 | of the original world using Keras 12 | - https://github.com/adamcasson/c3d: Implementation of C3D feature 13 | extractor in Keras using Tensorflow as backend 14 | 15 | ## Experimentation replication 16 | 17 | The folder is self-contained and fully written in Python. The 18 | experiments can be completely performed by executing code inside this 19 | folder, without depending on external resources. Code files inside 20 | this folder can be divided into two groups; resource files and scripts. 21 | In resource files, auxiliary utilities and models are defined. Scripts 22 | are provided to replicate the experiments. 23 | 24 | ### Resource files 25 | 26 | The resource files are listed and explained below, in alphabetical 27 | order: 28 | 29 | - `c3d.py`: Definition of C3D feature extractor and preprocessing 30 | functions for the input data 31 | - `classifier.py`: Definition of the classifier model, together 32 | with functions to save and load the model to disk 33 | - `configuration.py`: Configuration information for the experiments 34 | (data paths, output paths, annotation files, etc) 35 | - `parameters.py`: Information about model structure 36 | - `utils` folder: This folder contains utilities to process arrays 37 | and video files 38 | 39 | ### Scripts 40 | 41 | The developed scripts are listed in the order that should be followed 42 | to replicate the experiments. 43 | 44 | 1. `extract_features.py`: This script computes the features from the 45 | videos composing the dataset (videos contained in `dataset` folder at 46 | root project level), and stores them inside the folder 47 | `raw_c3d_features` (if default configuration has been kept). In order 48 | to work properly, the destination folder must exist. The folder 49 | structure can be created with the bash script provided at root project 50 | level. 51 | 2. `preprocess_features.py`: This script takes the previously extracted 52 | features, whose number can vary depending on the original video length, 53 | and computes a fized-size representation for each video. The new features 54 | are stored inside the folder `processed_c3d_features` 55 | 3. `train_classifier.py`: This script trains the final classifier 56 | model using the preprocessed features extracted before. After 57 | training, it stores the resulting model inside the folder `trained_models`. 58 | 4. `predict_test_set.py`: After training, this script takes the trained 59 | model and uses it to predict the test set (test features are calculated 60 | in the first two steps). 61 | 5. `calculate_metrics.py`: When the predictions have been made, this 62 | script calculates several performance metrics to validate the model. 63 | -------------------------------------------------------------------------------- /original_model/c3d.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.models import Sequential 3 | from keras.models import Model 4 | from keras.layers.core import Dense, Dropout, Flatten 5 | import configuration as cfg 6 | from keras.layers.convolutional import Conv3D, MaxPooling3D, ZeroPadding3D 7 | import numpy as np 8 | from scipy.misc import imresize 9 | from keras.utils.data_utils import get_file 10 | 11 | C3D_MEAN_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/c3d_mean.npy' 12 | 13 | 14 | def preprocess_input(video): 15 | """Preprocess video input to make it suitable for feature extraction. 16 | 17 | The video is resized, cropped, resampled and training mean is substracted 18 | to make it suitable for the network 19 | 20 | :param video: Video to be processed 21 | :returns: Preprocessed video 22 | :rtype: np.ndarray 23 | 24 | """ 25 | 26 | intervals = np.ceil(np.linspace(0, video.shape[0] - 1, 16)).astype(int) 27 | frames = video[intervals] 28 | 29 | # Reshape to 128x171 30 | reshape_frames = np.zeros((frames.shape[0], 128, 171, frames.shape[3])) 31 | for i, img in enumerate(frames): 32 | img = imresize(img, (128, 171), 'bicubic') 33 | reshape_frames[i, :, :, :] = img 34 | 35 | mean_path = get_file('c3d_mean.npy', 36 | C3D_MEAN_PATH, 37 | cache_subdir='models', 38 | md5_hash='08a07d9761e76097985124d9e8b2fe34') 39 | 40 | mean = np.load(mean_path) 41 | reshape_frames -= mean 42 | # Crop to 112x112 43 | reshape_frames = reshape_frames[:, 8:120, 30:142, :] 44 | # Add extra dimension for samples 45 | reshape_frames = np.expand_dims(reshape_frames, axis=0) 46 | 47 | return reshape_frames 48 | 49 | 50 | def C3D(weights='sports1M'): 51 | """Creation of the full C3D architecture 52 | 53 | :param weights: Weights to be loaded into the network. If None, 54 | the network is randomly initialized. 55 | :returns: Network model 56 | :rtype: keras.model 57 | 58 | """ 59 | 60 | if weights not in {'sports1M', None}: 61 | raise ValueError('weights should be either be sports1M or None') 62 | 63 | if K.image_data_format() == 'channels_last': 64 | shape = (16, 112, 112, 3) 65 | else: 66 | shape = (3, 16, 112, 112) 67 | 68 | model = Sequential() 69 | model.add( 70 | Conv3D(64, 71 | 3, 72 | activation='relu', 73 | padding='same', 74 | name='conv1', 75 | input_shape=shape)) 76 | model.add( 77 | MaxPooling3D(pool_size=(1, 2, 2), 78 | strides=(1, 2, 2), 79 | padding='same', 80 | name='pool1')) 81 | 82 | model.add(Conv3D(128, 3, activation='relu', padding='same', name='conv2')) 83 | model.add( 84 | MaxPooling3D(pool_size=(2, 2, 2), 85 | strides=(2, 2, 2), 86 | padding='valid', 87 | name='pool2')) 88 | 89 | model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3a')) 90 | model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3b')) 91 | model.add( 92 | MaxPooling3D(pool_size=(2, 2, 2), 93 | strides=(2, 2, 2), 94 | padding='valid', 95 | name='pool3')) 96 | 97 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4a')) 98 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4b')) 99 | model.add( 100 | MaxPooling3D(pool_size=(2, 2, 2), 101 | strides=(2, 2, 2), 102 | padding='valid', 103 | name='pool4')) 104 | 105 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5a')) 106 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5b')) 107 | model.add(ZeroPadding3D(padding=(0, 1, 1))) 108 | model.add( 109 | MaxPooling3D(pool_size=(2, 2, 2), 110 | strides=(2, 2, 2), 111 | padding='valid', 112 | name='pool5')) 113 | 114 | model.add(Flatten()) 115 | 116 | model.add(Dense(4096, activation='relu', name='fc6')) 117 | model.add(Dropout(0.5)) 118 | model.add(Dense(4096, activation='relu', name='fc7')) 119 | model.add(Dropout(0.5)) 120 | model.add(Dense(487, activation='softmax', name='fc8')) 121 | 122 | if weights == 'sports1M': 123 | model.load_weights(cfg.c3d_model_weights) 124 | 125 | return model 126 | 127 | 128 | def c3d_feature_extractor(): 129 | """Creation of the feature extraction architecture. This network is 130 | formed by a subset of the original C3D architecture (from the 131 | beginning to fc6 layer) 132 | 133 | :returns: Feature extraction model 134 | :rtype: keras.model 135 | 136 | """ 137 | model = C3D() 138 | layer_name = 'fc6' 139 | feature_extractor_model = Model(inputs=model.input, 140 | outputs=model.get_layer(layer_name).output) 141 | return feature_extractor_model 142 | -------------------------------------------------------------------------------- /original_model/calculate_metrics.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | import numpy as np 3 | import pandas as pd 4 | import configuration as cfg 5 | import os 6 | import utils.video_util 7 | import utils.array_util 8 | import matplotlib.pyplot as plt 9 | 10 | def eer_score(fpr, tpr, thr): 11 | """ Returns equal error rate (EER) and the corresponding threshold. """ 12 | fnr = 1-tpr 13 | abs_diffs = np.abs(fpr - fnr) 14 | min_index = np.argmin(abs_diffs) 15 | eer = np.mean((fpr[min_index], fnr[min_index])) 16 | return eer, thr[min_index] 17 | 18 | ground_truth = pd.read_csv( 19 | cfg.test_temporal_annotations, header=None, index_col=0 20 | ) 21 | 22 | preds = [] 23 | gts = [] 24 | 25 | for idx, row in ground_truth.iterrows(): 26 | preds_file_path = os.path.join(cfg.preds_folder, idx) 27 | frames = row[6] 28 | try: 29 | with open(preds_file_path, "rb") as f: 30 | curr_preds = np.load(f) 31 | 32 | padded_preds = utils.array_util.extrapolate(curr_preds, frames) 33 | except FileNotFoundError: 34 | padded_preds = np.zeros((frames,1)) 35 | print("No predictions generated for {}".format(idx)) 36 | 37 | curr_gts = np.zeros(frames) 38 | anomaly_start_1 = row[2] 39 | anomaly_end_1 = row[3] 40 | 41 | anomaly_start_2 = row[4] 42 | anomaly_end_2 = row[5] 43 | 44 | if anomaly_start_1 != -1 and anomaly_end_1 != -1: 45 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1 46 | 47 | if anomaly_start_2 != -1 and anomaly_end_2 != -1: 48 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1 49 | 50 | preds.append(padded_preds) 51 | gts.append(curr_gts) 52 | 53 | gts = np.concatenate(gts) 54 | preds = np.concatenate(preds) 55 | preds_labels = np.round(preds) 56 | 57 | acc = sklearn.metrics.accuracy_score(gts, preds_labels) 58 | ap = sklearn.metrics.average_precision_score(gts, preds) 59 | f1 = sklearn.metrics.f1_score(gts, preds_labels) 60 | fpr, tpr, thr = sklearn.metrics.roc_curve(gts, preds) 61 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds) 62 | eer, _ = eer_score(fpr, tpr, thr) 63 | conf_mat = sklearn.metrics.confusion_matrix(gts, preds_labels) 64 | auc = sklearn.metrics.auc(fpr, tpr) 65 | 66 | plt.title("Curva ROC") 67 | plt.plot(fpr, tpr, 'b', label = "AUC: {}".format(auc)) 68 | plt.legend(loc = 'lower right') 69 | plt.plot([0, 1], [0, 1],'r--') 70 | plt.xlim([0, 1]) 71 | plt.ylim([0, 1]) 72 | plt.ylabel('True Positive Rate') 73 | plt.xlabel('False Positive Rate') 74 | plt.savefig(os.path.join(cfg.output_folder, "roc.png")) 75 | 76 | plt.clf() 77 | 78 | plt.title("Curva PR") 79 | plt.plot(rec, prec, 'b', label = "Original - AP: {:.5f}".format(ap)) 80 | plt.legend(loc = 'lower right') 81 | plt.xlim([0, 1]) 82 | plt.ylim([0, 1]) 83 | plt.ylabel('Precison') 84 | plt.xlabel('Recall') 85 | plt.savefig(os.path.join(cfg.output_folder, "pr_curve.png")) 86 | 87 | print("Accuracy: {:.5f}, AUC: {:.5f}, F1: {:.5f}, EER: {:.5f}, AP: {:.5F}".format( 88 | acc, auc, f1, eer, ap 89 | )) 90 | 91 | print("Confusion matrix") 92 | print(conf_mat) 93 | -------------------------------------------------------------------------------- /original_model/classifier.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import scipy.io as sio 3 | from keras import Sequential 4 | from keras.layers import Dense, Dropout 5 | from keras.regularizers import l2 6 | 7 | import configuration as cfg 8 | 9 | def classifier_model(): 10 | """Build the classifier 11 | 12 | :returns: Classifier model 13 | :rtype: keras.Model 14 | 15 | """ 16 | model = Sequential() 17 | model.add(Dense(512, input_dim=4096, kernel_initializer='glorot_normal', 18 | kernel_regularizer=l2(0.001), activation='relu')) 19 | model.add(Dropout(0.6)) 20 | model.add(Dense(32, kernel_initializer='glorot_normal', 21 | kernel_regularizer=l2(0.001))) 22 | model.add(Dropout(0.6)) 23 | model.add(Dense(1, kernel_initializer='glorot_normal', 24 | kernel_regularizer=l2(0.001), activation='sigmoid')) 25 | return model 26 | 27 | 28 | def build_classifier_model(): 29 | """Build the classifier and load the pretrained weights 30 | 31 | :returns: 32 | :rtype: 33 | 34 | """ 35 | model = classifier_model() 36 | model = load_weights(model, cfg.classifier_model_weigts) 37 | return model 38 | 39 | 40 | def conv_dict(dict2): 41 | """Prepare the dictionary of weights to be loaded by the network 42 | 43 | :param dict2: Dictionary to format 44 | :returns: The dictionary properly formatted 45 | :rtype: dict 46 | 47 | """ 48 | dict = {} 49 | for i in range(len(dict2)): 50 | if str(i) in dict2: 51 | if dict2[str(i)].shape == (0, 0): 52 | dict[str(i)] = dict2[str(i)] 53 | else: 54 | weights = dict2[str(i)][0] 55 | weights2 = [] 56 | for weight in weights: 57 | if weight.shape in [(1, x) for x in range(0, 5000)]: 58 | weights2.append(weight[0]) 59 | else: 60 | weights2.append(weight) 61 | dict[str(i)] = weights2 62 | return dict 63 | 64 | 65 | def load_weights(model, weights_file): 66 | """Loads the pretrained weights into the network architecture 67 | 68 | :param model: keras model of the network 69 | :param weights_file: Path to the weights file 70 | :returns: The input model with the weights properly loaded 71 | :rtype: keras.model 72 | 73 | """ 74 | dict2 = sio.loadmat(weights_file) 75 | dict = conv_dict(dict2) 76 | i = 0 77 | for layer in model.layers: 78 | weights = dict[str(i)] 79 | layer.set_weights(weights) 80 | i += 1 81 | return model 82 | 83 | if __name__ == '__main__': 84 | model = build_classifier_model() 85 | model.summary() 86 | -------------------------------------------------------------------------------- /original_model/compute_frames.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import utils.video_util 4 | import configuration as cfg 5 | import os 6 | 7 | ground_truth = pd.read_csv( 8 | cfg.test_temporal_annotations, header=None, sep="\s+", index_col=0, 9 | names=['Type', 'Start1', 'End1', 'Start2', 'End2'] 10 | ) 11 | 12 | frames_list = [] 13 | for idx, row in ground_truth.iterrows(): 14 | video_file_path = os.path.join(cfg.test_set, idx[:-4] + "_x264.mp4") 15 | print(video_file_path) 16 | _, frames = utils.video_util.get_video_clips(video_file_path) 17 | print(frames) 18 | frames_list.append(frames) 19 | 20 | ground_truth['Frames'] = frames_list 21 | 22 | ground_truth.to_csv("trial.csv", header=False) 23 | -------------------------------------------------------------------------------- /original_model/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | c3d_model_weights = './trained_models/c3d_sports1m.h5' 4 | raw_features_folder = "../raw_c3d_features" 5 | processed_features_folder = "../processed_c3d_features" 6 | 7 | classifier_model_json = './trained_models/model.json' 8 | classifier_model_weigts = './trained_models/weights_L1L2.mat' 9 | 10 | preds_folder = '../predictions_c3d' 11 | 12 | input_folder = './input' 13 | output_folder = '/mnt/sdd/pacoluque/output' 14 | 15 | sample_video_path = '../dataset/train/abnormal/Arrest018_x264.mp4' 16 | 17 | raw_dataset_folder = '../dataset/' 18 | 19 | train_set = os.path.join(raw_dataset_folder, 'train') 20 | normal_videos_path = os.path.join(train_set, "normal") 21 | abnormal_videos_path = os.path.join(train_set, "abnormal") 22 | 23 | raw_features_train_set = os.path.join(raw_features_folder, 'train') 24 | raw_normal_train_features = os.path.join(raw_features_train_set, "normal") 25 | raw_abnormal_train_features = os.path.join(raw_features_train_set, "abnormal") 26 | 27 | processed_features_train_set = os.path.join(processed_features_folder, 'train') 28 | processed_normal_train_features = os.path.join(processed_features_train_set, "normal") 29 | processed_abnormal_train_features = os.path.join(processed_features_train_set, "abnormal") 30 | 31 | test_set = os.path.join(raw_dataset_folder, 'test') 32 | raw_test_features = os.path.join(raw_features_folder, 'test') 33 | processed_test_features = os.path.join(processed_features_folder, 'test') 34 | 35 | test_temporal_annotations = os.path.join(test_set, "temporal-annotation.txt") 36 | -------------------------------------------------------------------------------- /original_model/display_predictions.py: -------------------------------------------------------------------------------- 1 | import os 2 | from c3d import * 3 | from classifier import * 4 | from utils.visualization_util import * 5 | import sklearn.preprocessing 6 | import parameters as params 7 | import configuration as cfg 8 | 9 | def run_demo(): 10 | 11 | video_name = os.path.basename(cfg.sample_video_path).split('.')[0] 12 | 13 | # read video 14 | video_clips, num_frames = get_video_clips(cfg.sample_video_path) 15 | 16 | print("Number of clips in the video : ", len(video_clips)) 17 | 18 | # build models 19 | feature_extractor = c3d_feature_extractor() 20 | classifier_model = build_classifier_model() 21 | 22 | print("Models initialized") 23 | 24 | # extract features 25 | rgb_features = [] 26 | for i, clip in enumerate(video_clips): 27 | clip = np.array(clip) 28 | if len(clip) < params.frame_count: 29 | continue 30 | 31 | clip = preprocess_input(clip) 32 | rgb_feature = feature_extractor.predict(clip)[0] 33 | rgb_features.append(rgb_feature) 34 | 35 | print("Processed clip : ", i) 36 | 37 | rgb_features = np.array(rgb_features) 38 | rgb_feature_bag = interpolate(rgb_features, params.features_per_bag) 39 | 40 | # classify using the trained classifier model 41 | predictions = classifier_model.predict(rgb_feature_bag) 42 | 43 | predictions = np.array(predictions).squeeze() 44 | 45 | predictions = extrapolate(predictions, num_frames) 46 | 47 | save_path = os.path.join(cfg.output_folder, video_name + '.gif') 48 | # visualize predictions 49 | print('Executed Successfully - '+video_name + '.gif saved') 50 | visualize_predictions(cfg.sample_video_path, predictions, save_path) 51 | 52 | 53 | if __name__ == '__main__': 54 | run_demo() 55 | -------------------------------------------------------------------------------- /original_model/extract_features.py: -------------------------------------------------------------------------------- 1 | import c3d 2 | import os 3 | import configuration as cfg 4 | import numpy as np 5 | import sklearn.preprocessing 6 | 7 | from utils import video_util 8 | 9 | feature_extractor = c3d.c3d_feature_extractor() 10 | normal_videos = os.listdir(cfg.normal_videos_path) 11 | normal_videos.sort() 12 | 13 | print("Processing normal videos...") 14 | for vid_name in normal_videos: 15 | print("Processing {}".format(vid_name)) 16 | vid_path = os.path.join(cfg.normal_videos_path, vid_name) 17 | feats_path = os.path.join( 18 | cfg.raw_normal_train_features, vid_name[:-9] + ".npy" 19 | ) 20 | 21 | clips, frames = video_util.get_video_clips(vid_path) 22 | 23 | # Remove last clip if number of frames is not equal to 16 24 | if frames % 16 != 0: 25 | clips = clips[:-1] 26 | 27 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips] 28 | prep_clips = np.vstack(prep_clips) 29 | 30 | features = feature_extractor.predict(prep_clips) 31 | features = sklearn.preprocessing.normalize(features, axis=1) 32 | 33 | with open(feats_path, "wb") as f: 34 | np.save(f, features) 35 | 36 | abnormal_videos = os.listdir(cfg.abnormal_videos_path) 37 | abnormal_videos.sort() 38 | print("Processing abnormal videos...") 39 | for vid_name in abnormal_videos: 40 | print("Processing {}".format(vid_name)) 41 | vid_path = os.path.join(cfg.abnormal_videos_path, vid_name) 42 | feats_path = os.path.join( 43 | cfg.raw_abnormal_train_features, vid_name[:-9] + ".npy" 44 | ) 45 | 46 | clips, frames = video_util.get_video_clips(vid_path) 47 | 48 | # Remove last clip if number of frames is not equal to 16 49 | if frames % 16 != 0: 50 | clips = clips[:-1] 51 | 52 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips] 53 | prep_clips = np.vstack(prep_clips) 54 | 55 | features = feature_extractor.predict(prep_clips) 56 | features = sklearn.preprocessing.normalize(features, axis=1) 57 | 58 | with open(feats_path, "wb") as f: 59 | np.save(f, features) 60 | 61 | 62 | test_videos = os.listdir(cfg.test_set) 63 | test_videos.sort() 64 | print("Processing test videos...") 65 | for vid_name in test_videos: 66 | print("Processing {}".format(vid_name)) 67 | vid_path = os.path.join(cfg.test_set, vid_name) 68 | feats_path = os.path.join(cfg.raw_test_features, vid_name[:-9] + ".npy") 69 | 70 | clips, frames = video_util.get_video_clips(vid_path) 71 | 72 | # Remove last clip if number of frames is not equal to 16 73 | if frames % 16 != 0: 74 | clips = clips[:-1] 75 | 76 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips] 77 | prep_clips = np.vstack(prep_clips) 78 | 79 | features = feature_extractor.predict(prep_clips) 80 | features = sklearn.preprocessing.normalize(features, axis=1) 81 | 82 | with open(feats_path, "wb") as f: 83 | np.save(f, features) 84 | -------------------------------------------------------------------------------- /original_model/parameters.py: -------------------------------------------------------------------------------- 1 | frame_height = 240 2 | frame_width = 320 3 | channels = 3 4 | 5 | frame_count = 16 6 | 7 | features_per_bag = 32 -------------------------------------------------------------------------------- /original_model/predict_test_set.py: -------------------------------------------------------------------------------- 1 | import classifier 2 | import configuration as cfg 3 | import numpy as np 4 | import os 5 | 6 | def load_test_set(videos_path, videos_list): 7 | feats = [] 8 | 9 | for vid in videos_list: 10 | vid_path = os.path.join(videos_path, vid) 11 | with open(vid_path, "rb") as f: 12 | feat = np.load(f) 13 | feats.append(feat) 14 | 15 | feats = np.array(feats) 16 | return feats 17 | 18 | classifier_model = classifier.build_classifier_model() 19 | 20 | vid_list = os.listdir(cfg.processed_test_features) 21 | vid_list.sort() 22 | 23 | test_set = load_test_set(cfg.processed_test_features, vid_list) 24 | 25 | for filename, example in zip(vid_list, test_set): 26 | predictions_file = filename[:-4] + '.npy' 27 | pred_path = os.path.join(cfg.preds_folder, predictions_file) 28 | pred = classifier_model.predict_on_batch(example) 29 | with open(pred_path, "wb") as f: 30 | np.save(pred_path, pred, allow_pickle=True) 31 | -------------------------------------------------------------------------------- /original_model/preprocess_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import sklearn.preprocessing 4 | import configuration as cfg 5 | 6 | def transform_into_segments(features, n_segments=32): 7 | if features.shape[0] < n_segments: 8 | raise RuntimeError( 9 | "Number of prev segments lesser than expected output size" 10 | ) 11 | 12 | cuts = np.linspace(0, features.shape[0], n_segments, 13 | dtype=int, endpoint=False) 14 | 15 | new_feats = [] 16 | for i, j in zip(cuts[:-1], cuts[1:]): 17 | new_feats.append(np.mean(features[i:j,:], axis=0)) 18 | 19 | new_feats.append(np.mean(features[cuts[-1]:,:], axis=0)) 20 | 21 | new_feats = np.array(new_feats) 22 | new_feats = sklearn.preprocessing.normalize(new_feats, axis=1) 23 | return new_feats 24 | 25 | for filename in os.listdir(cfg.raw_normal_train_features): 26 | print("Processing {}".format(filename)) 27 | raw_file_path = os.path.join( 28 | cfg.raw_normal_train_features, filename 29 | ) 30 | processed_file_path = os.path.join( 31 | cfg.processed_normal_train_features, filename 32 | ) 33 | 34 | with open(raw_file_path, "rb") as f: 35 | feats = np.load(f, allow_pickle=True) 36 | 37 | try: 38 | new_feats = transform_into_segments(feats) 39 | with open(processed_file_path, "wb") as f: 40 | np.save(f, new_feats, allow_pickle=True) 41 | except RuntimeError: 42 | print("Video {} too short".format(filename)) 43 | 44 | for filename in os.listdir(cfg.raw_abnormal_train_features): 45 | print("Processing {}".format(filename)) 46 | raw_file_path = os.path.join( 47 | cfg.raw_abnormal_train_features, filename 48 | ) 49 | processed_file_path = os.path.join( 50 | cfg.processed_abnormal_train_features, filename 51 | ) 52 | with open(raw_file_path, "rb") as f: 53 | feats = np.load(f, allow_pickle=True) 54 | 55 | try: 56 | new_feats = transform_into_segments(feats) 57 | with open(processed_file_path, "wb") as f: 58 | np.save(f, new_feats, allow_pickle=True) 59 | except RuntimeError: 60 | print("Video {} too short".format(filename)) 61 | 62 | for filename in os.listdir(cfg.raw_test_features): 63 | print("Processing {}".format(filename)) 64 | raw_file_path = os.path.join( 65 | cfg.raw_test_features, filename 66 | ) 67 | processed_file_path = os.path.join( 68 | cfg.processed_test_features, filename 69 | ) 70 | with open(raw_file_path, "rb") as f: 71 | feats = np.load(f, allow_pickle=True) 72 | 73 | try: 74 | new_feats = transform_into_segments(feats) 75 | with open(processed_file_path, "wb") as f: 76 | np.save(f, new_feats, allow_pickle=True) 77 | except RuntimeError: 78 | print("Video {} too short".format(filename)) 79 | -------------------------------------------------------------------------------- /original_model/train_classifier.py: -------------------------------------------------------------------------------- 1 | import keras.optimizers 2 | import scipy.io 3 | from keras.models import model_from_json 4 | import os 5 | 6 | import numpy as np 7 | import keras.backend as K 8 | import classifier 9 | 10 | from datetime import datetime 11 | 12 | def save_model(model, json_path, weight_path): 13 | json_string = model.to_json() 14 | open(json_path, 'w').write(json_string) 15 | dict = {} 16 | i = 0 17 | for layer in model.layers: 18 | weights = layer.get_weights() 19 | my_list = np.zeros(len(weights), dtype=np.object) 20 | my_list[:] = weights 21 | dict[str(i)] = my_list 22 | i += 1 23 | scipy.io.savemat(weight_path, dict) 24 | 25 | def load_model(json_path): 26 | model = model_from_json(open(json_path).read()) 27 | return model 28 | 29 | def load_batch_train(normal_path, normal_list, abnormal_path, abnormal_list): 30 | 31 | batchsize=60 32 | n_exp = int(batchsize/2) 33 | 34 | num_normal = len(normal_list) 35 | num_abnormal = len(abnormal_list) 36 | 37 | abnor_list_idx = np.random.permutation(num_abnormal) 38 | abnor_list = abnor_list_idx[:n_exp] 39 | norm_list_idx = np.random.permutation(num_normal) 40 | norm_list = norm_list_idx[:n_exp] 41 | 42 | abnormal_feats = [] 43 | for video_idx in abnor_list: 44 | video_path = os.path.join(abnormal_path, abnormal_list[video_idx]) 45 | with open(video_path, "rb") as f: 46 | feats = np.load(f) 47 | abnormal_feats.append(feats) 48 | 49 | normal_feats = [] 50 | for video_idx in norm_list: 51 | video_path = os.path.join(normal_path, normal_list[video_idx]) 52 | with open(video_path, "rb") as f: 53 | feats = np.load(f) 54 | normal_feats.append(feats) 55 | 56 | 57 | all_feats = np.vstack((*abnormal_feats, *normal_feats)) 58 | all_labels = np.zeros(32*batchsize, dtype='uint8') 59 | 60 | all_labels[:32*n_exp] = 1 61 | 62 | return all_feats, all_labels 63 | 64 | 65 | def custom_objective(y_true, y_pred): 66 | 67 | y_true = K.reshape(y_true, [-1]) 68 | y_pred = K.reshape(y_pred, [-1]) 69 | n_seg = 32 70 | nvid = 60 71 | n_exp = int(nvid / 2) 72 | 73 | max_scores_list = [] 74 | z_scores_list = [] 75 | temporal_constrains_list = [] 76 | sparsity_constrains_list = [] 77 | 78 | for i in range(0, n_exp, 1): 79 | 80 | video_predictions = y_pred[i*n_seg:(i+1)*n_seg] 81 | 82 | max_scores_list.append(K.max(video_predictions)) 83 | temporal_constrains_list.append( 84 | K.sum(K.pow(video_predictions[1:] - video_predictions[:-1], 2)) 85 | ) 86 | sparsity_constrains_list.append(K.sum(video_predictions)) 87 | 88 | for j in range(n_exp, 2*n_exp, 1): 89 | 90 | video_predictions = y_pred[j*n_seg:(j+1)*n_seg] 91 | max_scores_list.append(K.max(video_predictions)) 92 | 93 | max_scores = K.stack(max_scores_list) 94 | temporal_constrains = K.stack(temporal_constrains_list) 95 | sparsity_constrains = K.stack(sparsity_constrains_list) 96 | 97 | for ii in range(0, n_exp, 1): 98 | max_z = K.maximum(1 - max_scores[:n_exp] + max_scores[n_exp+ii], 0) 99 | z_scores_list.append(K.sum(max_z)) 100 | 101 | z_scores = K.stack(z_scores_list) 102 | z = K.mean(z_scores) 103 | 104 | return z + \ 105 | 0.00008*K.sum(temporal_constrains) + \ 106 | 0.00008*K.sum(sparsity_constrains) 107 | 108 | output_dir = "trained_models/" 109 | normal_dir = "../processed_c3d_features/train/normal" 110 | abnormal_dir = "../processed_c3d_features/train/abnormal" 111 | 112 | normal_list = os.listdir(normal_dir) 113 | normal_list.sort() 114 | abnormal_list = os.listdir(abnormal_dir) 115 | abnormal_list.sort() 116 | 117 | weights_path = output_dir + 'weights.mat' 118 | 119 | model_path = output_dir + 'model.json' 120 | 121 | #Create Full connected Model 122 | model = classifier.classifier_model() 123 | 124 | adagrad = keras.optimizers.Adagrad(lr=0.001, epsilon=1e-08) 125 | model.compile(loss=custom_objective, optimizer=adagrad) 126 | 127 | if not os.path.exists(output_dir): 128 | os.makedirs(output_dir) 129 | 130 | loss_graph =[] 131 | num_iters = 20000 132 | total_iterations = 0 133 | batchsize=60 134 | time_before = datetime.now() 135 | 136 | 137 | for it_num in range(num_iters): 138 | inputs, targets = load_batch_train( 139 | normal_dir, normal_list, abnormal_dir, abnormal_list 140 | ) 141 | batch_loss = model.train_on_batch(inputs, targets) 142 | loss_graph = np.hstack((loss_graph, batch_loss)) 143 | total_iterations += 1 144 | if total_iterations % 20 == 0: 145 | print ("Iteration={} took: {}, loss: {}".format( 146 | total_iterations, datetime.now() - time_before, batch_loss) 147 | ) 148 | 149 | print("Train Successful - Model saved") 150 | save_model(model, model_path, weights_path) 151 | -------------------------------------------------------------------------------- /original_model/trained_models/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/original_model/trained_models/.gitignore -------------------------------------------------------------------------------- /original_model/utils/array_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sliding_window(arr, size, stride): 5 | """Apply sliding window to an array, getting chunks of 6 | of specified size using the specified stride 7 | 8 | :param arr: Array to be divided 9 | :param size: Size of the chunks 10 | :param stride: Number of frames to skip for the next chunk 11 | :returns: Tensor with the resulting chunks 12 | :rtype: np.ndarray 13 | 14 | """ 15 | num_chunks = int((len(arr) - size) / stride) + 2 16 | result = [] 17 | for i in range(0, num_chunks * stride, stride): 18 | if len(arr[i:i + size]) > 0: 19 | result.append(arr[i:i + size]) 20 | return np.array(result) 21 | 22 | 23 | def interpolate(features, features_per_bag): 24 | """Transform a bag with an arbitrary number of features into a bag 25 | with a fixed amount, using interpolation of consecutive features 26 | 27 | :param features: Bag of features to pad 28 | :param features_per_bag: Number of features to obtain 29 | :returns: Interpolated features 30 | :rtype: np.ndarray 31 | 32 | """ 33 | feature_size = np.array(features).shape[1] 34 | interpolated_features = np.zeros((features_per_bag, feature_size)) 35 | interpolation_indices = np.round(np.linspace(0, len(features) - 1, num=features_per_bag + 1)) 36 | count = 0 37 | for index in range(0, len(interpolation_indices)-1): 38 | start = int(interpolation_indices[index]) 39 | end = int(interpolation_indices[index + 1]) 40 | 41 | assert end >= start 42 | 43 | if start == end: 44 | temp_vect = features[start, :] 45 | else: 46 | temp_vect = np.mean(features[start:end+1, :], axis=0) 47 | 48 | temp_vect = temp_vect / np.linalg.norm(temp_vect) 49 | 50 | if np.linalg.norm(temp_vect) == 0: 51 | print("Error") 52 | 53 | interpolated_features[count,:]=temp_vect 54 | count = count + 1 55 | 56 | return np.array(interpolated_features) 57 | 58 | 59 | def extrapolate(outputs, num_frames): 60 | """Expand output to match the video length 61 | 62 | :param outputs: Array of predicted outputs 63 | :param num_frames: Expected size of the output array 64 | :returns: Array of output size 65 | :rtype: np.ndarray 66 | 67 | """ 68 | 69 | extrapolated_outputs = [] 70 | extrapolation_indices = np.round(np.linspace(0, len(outputs) - 1, num=num_frames)) 71 | for index in extrapolation_indices: 72 | extrapolated_outputs.append(outputs[int(index)]) 73 | return np.array(extrapolated_outputs) 74 | -------------------------------------------------------------------------------- /original_model/utils/video_util.py: -------------------------------------------------------------------------------- 1 | from utils.array_util import * 2 | import parameters as params 3 | import cv2 4 | 5 | 6 | def get_video_clips(video_path): 7 | """Divides the input video into non-overlapping clips 8 | 9 | :param video_path: Path to the video 10 | :returns: Array with the fragments of video 11 | :rtype: np.ndarray 12 | 13 | """ 14 | frames = get_video_frames(video_path) 15 | clips = sliding_window(frames, params.frame_count, params.frame_count) 16 | return clips, len(frames) 17 | 18 | 19 | def get_video_frames(video_path): 20 | """Reads the video given a file path 21 | 22 | :param video_path: Path to the video 23 | :returns: Video as an array of frames 24 | :rtype: np.ndarray 25 | 26 | """ 27 | cap = cv2.VideoCapture(video_path) 28 | frames = [] 29 | while (cap.isOpened()): 30 | ret, frame = cap.read() 31 | if ret == True: 32 | frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 33 | else: 34 | break 35 | cap.release() 36 | return frames 37 | -------------------------------------------------------------------------------- /original_model/utils/visualization_util.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | import matplotlib.pyplot as plt 4 | from matplotlib.animation import FuncAnimation 5 | from utils.video_util import * 6 | 7 | 8 | def visualize_clip(clip, convert_bgr=False, save_gif=False, file_path=None): 9 | num_frames = len(clip) 10 | fig, ax = plt.subplots() 11 | fig.set_tight_layout(True) 12 | 13 | def update(i): 14 | if convert_bgr: 15 | frame = cv2.cvtColor(clip[i], cv2.COLOR_BGR2RGB) 16 | else: 17 | frame = clip[i] 18 | plt.imshow(frame) 19 | return plt 20 | 21 | # FuncAnimation will call the 'update' function for each frame; here 22 | # animating over 10 frames, with an interval of 20ms between frames. 23 | anim = FuncAnimation(fig, update, frames=np.arange(0, num_frames), interval=1) 24 | if save_gif: 25 | anim.save(file_path, dpi=80, writer='imagemagick') 26 | else: 27 | # plt.show() will just loop the animation forever. 28 | plt.show() 29 | 30 | 31 | def visualize_predictions(video_path, predictions, save_path): 32 | frames = get_video_frames(video_path) 33 | assert len(frames) == len(predictions) 34 | 35 | fig, ax = plt.subplots(figsize=(5, 5)) 36 | fig.set_tight_layout(True) 37 | 38 | line = matplotlib.lines.Line2D([], []) 39 | 40 | fig_frame = plt.subplot(2, 1, 1) 41 | img = fig_frame.imshow(frames[0]) 42 | fig_prediction = plt.subplot(2, 1, 2) 43 | fig_prediction.set_xlim(0, len(frames)) 44 | fig_prediction.set_ylim(0, 1.15) 45 | fig_prediction.add_line(line) 46 | 47 | def update(i): 48 | frame = frames[i] 49 | x = range(0, i) 50 | y = predictions[0:i] 51 | line.set_data(x, y) 52 | img.set_data(frame) 53 | return plt 54 | 55 | # FuncAnimation will call the 'update' function for each frame; here 56 | # animating over 10 frames, with an interval of 20ms between frames. 57 | 58 | anim = FuncAnimation(fig, update, frames=np.arange(0, len(frames), 10), interval=1, repeat=False) 59 | 60 | if save_path: 61 | anim.save(save_path, dpi=200, writer='imagemagick') 62 | else: 63 | plt.show() 64 | -------------------------------------------------------------------------------- /overlay_curves.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | import scipy.optimize, scipy.interpolate 3 | import numpy as np 4 | import pandas as pd 5 | import os 6 | import proposal.utils.array_util as array_util 7 | import matplotlib.pyplot as plt 8 | 9 | def calculate_information_for_curves(gts, preds): 10 | fpr, tpr, _ = sklearn.metrics.roc_curve(gts, preds) 11 | auc = sklearn.metrics.auc(fpr, tpr) 12 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds) 13 | ap = sklearn.metrics.average_precision_score(gts, preds) 14 | 15 | return fpr, tpr, auc, prec, rec, ap 16 | 17 | 18 | ground_truth = pd.read_csv("./dataset/test/temporal-annotation.txt", header=None, index_col=0) 19 | 20 | preds_c3d = [] 21 | preds_lstm = [] 22 | gts = [] 23 | 24 | for idx, row in ground_truth.iterrows(): 25 | c3d_preds_file_path = os.path.join("predictions_c3d", idx) 26 | lstm_preds_file_path = os.path.join("predictions_lstm", idx) 27 | frames = row[6] 28 | 29 | try: 30 | with open(c3d_preds_file_path, "rb") as f: 31 | curr_c3d_preds = np.load(f) 32 | with open(lstm_preds_file_path, "rb") as f: 33 | curr_lstm_preds = np.load(f) 34 | 35 | c3d_padded_preds = array_util.extrapolate(curr_c3d_preds, frames) 36 | lstm_padded_preds = array_util.extrapolate(curr_lstm_preds, frames) 37 | 38 | except FileNotFoundError: 39 | c3d_padded_preds = np.zeros((frames,1)) 40 | lstm_padded_preds = np.zeros((frames,1)) 41 | 42 | print("No predictions generated for {}".format(idx)) 43 | 44 | curr_gts = np.zeros(frames) 45 | anomaly_start_1 = row[2] 46 | anomaly_end_1 = row[3] 47 | 48 | anomaly_start_2 = row[4] 49 | anomaly_end_2 = row[5] 50 | 51 | if anomaly_start_1 != -1 and anomaly_end_1 != -1: 52 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1 53 | 54 | if anomaly_start_2 != -1 and anomaly_end_2 != -1: 55 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1 56 | 57 | preds_c3d.append(c3d_padded_preds) 58 | preds_lstm.append(lstm_padded_preds) 59 | gts.append(curr_gts) 60 | 61 | gts = np.concatenate(gts) 62 | 63 | preds_c3d = np.concatenate(preds_c3d) 64 | preds_lstm = np.concatenate(preds_lstm) 65 | 66 | ( 67 | fpr_c3d, tpr_c3d, auc_c3d, 68 | prec_c3d, rec_c3d, ap_c3d 69 | ) = calculate_information_for_curves(gts, preds_c3d) 70 | 71 | ( 72 | fpr_lstm, tpr_lstm, auc_lstm, 73 | prec_lstm, rec_lstm, ap_lstm 74 | ) = calculate_information_for_curves(gts, preds_lstm) 75 | 76 | 77 | plt.title("Curva ROC") 78 | plt.plot(fpr_c3d, tpr_c3d, 'b', label = "C3d - AUC: {:.5f}".format(auc_c3d)) 79 | plt.plot(fpr_lstm, tpr_lstm, 'g', label = "Lstm - AUC: {:.5f}".format(auc_lstm)) 80 | plt.legend(loc = 'lower right') 81 | plt.plot([0, 1], [0, 1],'k--') 82 | plt.plot([1, 0], [0, 1],'k:') 83 | plt.xlim([0, 1]) 84 | plt.ylim([0, 1]) 85 | plt.ylabel('True Positive Rate') 86 | plt.xlabel('False Positive Rate') 87 | plt.savefig("roc_overlay.pdf") 88 | 89 | plt.clf() 90 | 91 | plt.title("Curva PR") 92 | plt.plot(rec_c3d, prec_c3d, 'b', label = "C3d - AP: {:.5f}".format(ap_c3d)) 93 | plt.plot(rec_lstm, prec_lstm, 'g', label = "Lstm - AP: {:.5f}".format(ap_lstm)) 94 | plt.legend(loc = 'upper right') 95 | plt.xlim([0, 1]) 96 | plt.ylim([0, 1]) 97 | plt.ylabel('Precison') 98 | plt.xlabel('Recall') 99 | plt.savefig("pr_overlay.pdf") 100 | -------------------------------------------------------------------------------- /proposal/README.md: -------------------------------------------------------------------------------- 1 | # Proposal experiments replication 2 | 3 | In this folder you can find the code to replicate the experimentation 4 | of out proposal, using a spatio-temporal feature extractor instead of 5 | the C3D convolutional model. We have tested different models in our 6 | report, specifically extractors that provide descriptors of size 512, 7 | 768 and 1024 for each clip of 16 frames from the video. However, we 8 | only provide the model of size 1024, since it has provided the best 9 | results and the experiments are similar for all the models. 10 | 11 | ## Experimentation replication 12 | 13 | The folder is self-contained and fully written in Python. The 14 | experiments can be completely performed by executing code inside this 15 | folder, without depending on external resources. Code files inside 16 | this folder can be divided into two groups; resource files and scripts. 17 | In resource files, auxiliary utilities and models are defined. Scripts 18 | are provided to replicate the experiments. 19 | 20 | ### Resource files 21 | 22 | The resource files are listed and explained below, in alphabetical 23 | order: 24 | 25 | - `classifier.py`: Definition of the classifier model, together 26 | with functions to save and load the model to disk. 27 | - `configuration.py`: Configuration information for the experiments 28 | (data paths, output paths, annotation files, etc). 29 | - `models.py`: Definition of the feature extractor model. 30 | - `parameters.py`: Information about model structure. 31 | - `video_data_generator.py`: Adaptation of Keras datasets for video 32 | data handling. This code has been adapted from the video frame 33 | generator developed by [Patrice 34 | Ferlet](https://gist.github.com/metal3d) and the original can be 35 | downloaded from 36 | [here](https://gist.github.com/metal3d/0fe5539abfc534855ddfd351d06cfa06) 37 | - `utils` folder: This folder contains utilities to process arrays 38 | and video files. 39 | 40 | ### Scripts 41 | 42 | The developed scripts are listed in the order that should be followed 43 | to replicate the experiments. 44 | 45 | 1. `train_feature_extractor.py`: This script trains the feature 46 | extractor model, solving the video classification task over 47 | UCF-101 dataset (must be downloaded). Afterwards, the model is 48 | saved in `trained_models`. 49 | 1. `extract_temporal_features.py`: This script computes the features 50 | from the videos composing the dataset (videos contained in `dataset` 51 | folder at root project level), and stores them inside the folder. 52 | `raw_lstm_features` (if default configuration has been kept). In order 53 | to work properly, the destination folder must exist. The folder 54 | structure can be created with the bash script provided at root project 55 | level. 56 | 2. `preprocess_features.py`: This script takes the previously extracted 57 | features, whose number can vary depending on the original video length, 58 | and computes a fized-size representation for each video. The new features 59 | are stored inside the folder `processed_lstm_features`. 60 | 3. `train_classifier.py`: This script trains the final classifier 61 | model using the preprocessed features extracted before. After 62 | training, it stores the resulting model inside the folder `trained_models`. 63 | 4. `predict_test_set.py`: After training, this script takes the trained 64 | model and uses it to predict the test set (test features are calculated 65 | in the first two steps). 66 | 5. `calculate_metrics.py`: When the predictions have been made, this 67 | script calculates several performance metrics to validate the model. 68 | -------------------------------------------------------------------------------- /proposal/calculate_metrics.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | import scipy.optimize, scipy.interpolate 3 | import numpy as np 4 | import pandas as pd 5 | import configuration as cfg 6 | import os 7 | import utils.video_util 8 | import utils.array_util 9 | import matplotlib.pyplot as plt 10 | 11 | def eer_score(fpr, tpr, thr): 12 | """ Returns equal error rate (EER) and the corresponding threshold. """ 13 | fnr = 1-tpr 14 | abs_diffs = np.abs(fpr - fnr) 15 | min_index = np.argmin(abs_diffs) 16 | eer = np.mean((fpr[min_index], fnr[min_index])) 17 | return eer, thr[min_index] 18 | 19 | ground_truth = pd.read_csv(cfg.test_temporal_annotations, header=None, index_col=0) 20 | 21 | preds = [] 22 | gts = [] 23 | 24 | for idx, row in ground_truth.iterrows(): 25 | preds_file_path = os.path.join(cfg.preds_folder, idx) 26 | frames = row[6] 27 | try: 28 | with open(preds_file_path, "rb") as f: 29 | curr_preds = np.load(f) 30 | 31 | padded_preds = utils.array_util.extrapolate(curr_preds, frames) 32 | except FileNotFoundError: 33 | padded_preds = np.zeros((frames,1)) 34 | print("No predictions generated for {}".format(idx)) 35 | 36 | curr_gts = np.zeros(frames) 37 | anomaly_start_1 = row[2] 38 | anomaly_end_1 = row[3] 39 | 40 | anomaly_start_2 = row[4] 41 | anomaly_end_2 = row[5] 42 | 43 | if anomaly_start_1 != -1 and anomaly_end_1 != -1: 44 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1 45 | 46 | if anomaly_start_2 != -1 and anomaly_end_2 != -1: 47 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1 48 | 49 | preds.append(padded_preds) 50 | gts.append(curr_gts) 51 | 52 | gts = np.concatenate(gts) 53 | preds = np.concatenate(preds) 54 | preds_labels = np.round(preds) 55 | 56 | acc = sklearn.metrics.accuracy_score(gts, preds_labels) 57 | ap = sklearn.metrics.average_precision_score(gts, preds) 58 | f1 = sklearn.metrics.f1_score(gts, preds_labels) 59 | fpr, tpr, thr = sklearn.metrics.roc_curve(gts, preds) 60 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds) 61 | eer, _ = eer_score(fpr, tpr, thr) 62 | conf_mat = sklearn.metrics.confusion_matrix(gts, preds_labels) 63 | auc = sklearn.metrics.auc(fpr, tpr) 64 | 65 | plt.title("Curva ROC") 66 | plt.plot(fpr, tpr, 'b', label = "AUC: {}".format(auc)) 67 | plt.legend(loc = 'lower right') 68 | plt.plot([0, 1], [0, 1],'r--') 69 | plt.xlim([0, 1]) 70 | plt.ylim([0, 1]) 71 | plt.ylabel('True Positive Rate') 72 | plt.xlabel('False Positive Rate') 73 | plt.savefig(os.path.join(cfg.output_folder, "roc_lstm.png")) 74 | 75 | plt.clf() 76 | 77 | plt.title("Curva PR") 78 | plt.plot(rec, prec, 'r', label = "LSTM - AP: {:.5f}".format(ap)) 79 | plt.legend(loc = 'lower right') 80 | plt.xlim([0, 1]) 81 | plt.ylim([0, 1]) 82 | plt.ylabel('Precison') 83 | plt.xlabel('Recall') 84 | plt.savefig(os.path.join(cfg.output_folder, "pr_curve_lstm.png")) 85 | 86 | print("Accuracy: {:.5f}, AUC: {:.5f}, F1: {:.5f}, EER: {:.5f}, AP: {:.5F}".format( 87 | acc, auc, f1, eer, ap 88 | )) 89 | 90 | print("Confusion matrix") 91 | print(conf_mat) 92 | -------------------------------------------------------------------------------- /proposal/classifier.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import scipy.io as sio 3 | from keras import Sequential 4 | from keras.layers import Dense, Dropout 5 | from keras.regularizers import l2 6 | 7 | import configuration as cfg 8 | 9 | def classifier_model(): 10 | 11 | model = Sequential() 12 | model.add(Dense(512, input_dim=1024, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001), activation='relu')) 13 | model.add(Dropout(0.5)) 14 | model.add(Dense(64, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001))) 15 | model.add(Dropout(0.5)) 16 | model.add(Dense(1, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001), activation='sigmoid')) 17 | return model 18 | 19 | 20 | def build_classifier_model(): 21 | model = classifier_model() 22 | model = load_weights(model, cfg.classifier_model_weigts) 23 | return model 24 | 25 | 26 | def conv_dict(dict2): 27 | dict = {} 28 | for i in range(len(dict2)): 29 | if str(i) in dict2: 30 | if dict2[str(i)].shape == (0, 0): 31 | dict[str(i)] = dict2[str(i)] 32 | else: 33 | weights = dict2[str(i)][0] 34 | weights2 = [] 35 | for weight in weights: 36 | if weight.shape in [(1, x) for x in range(0, 5000)]: 37 | weights2.append(weight[0]) 38 | else: 39 | weights2.append(weight) 40 | dict[str(i)] = weights2 41 | return dict 42 | 43 | 44 | def load_weights(model, weights_file): 45 | dict2 = sio.loadmat(weights_file) 46 | dict = conv_dict(dict2) 47 | i = 0 48 | for layer in model.layers: 49 | weights = dict[str(i)] 50 | layer.set_weights(weights) 51 | i += 1 52 | return model 53 | 54 | if __name__ == '__main__': 55 | model = build_classifier_model() 56 | model.summary() 57 | -------------------------------------------------------------------------------- /proposal/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | extractor_model_weights = "./trained_models/rec_feats_weights.h5" 4 | 5 | classifier_model_weigts = './trained_models/weights_proposal.mat' 6 | classifier_model_json = './trained_models/model_proposal.json' 7 | 8 | input_folder = './input' 9 | output_folder = '/mnt/sdd/pacoluque/output' 10 | 11 | sample_video_path = '../dataset/train/abnormal/RoadAccidents021_x264.mp4' 12 | 13 | raw_dataset_folder = '../dataset/' 14 | raw_features_folder = "../raw_lstm_features" 15 | processed_features_folder = "../processed_lstm_features" 16 | 17 | train_set = os.path.join(raw_dataset_folder, 'train') 18 | normal_videos_path = os.path.join(train_set, "normal") 19 | abnormal_videos_path = os.path.join(train_set, "abnormal") 20 | 21 | raw_features_train_set = os.path.join(raw_features_folder, 'train') 22 | raw_normal_train_features = os.path.join(raw_features_train_set, "normal") 23 | raw_abnormal_train_features = os.path.join(raw_features_train_set, "abnormal") 24 | 25 | processed_features_train_set = os.path.join(processed_features_folder, 'train') 26 | processed_normal_train_features = os.path.join(processed_features_train_set, "normal") 27 | processed_abnormal_train_features = os.path.join(processed_features_train_set, "abnormal") 28 | 29 | test_set = os.path.join(raw_dataset_folder, 'test') 30 | raw_test_features = os.path.join(raw_features_folder, 'test') 31 | processed_test_features = os.path.join(processed_features_folder, 'test') 32 | 33 | preds_folder = '../predictions_lstm' 34 | 35 | test_temporal_annotations = os.path.join(test_set, "temporal-annotation.txt") 36 | -------------------------------------------------------------------------------- /proposal/display_predictions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import classifier 3 | from utils.visualization_util import * 4 | import sklearn.preprocessing 5 | import parameters as params 6 | import configuration as cfg 7 | 8 | def run_demo(): 9 | 10 | video_name = os.path.basename(cfg.sample_video_path).split('.')[0] 11 | 12 | # read video 13 | video_clips, num_frames = get_video_clips(cfg.sample_video_path) 14 | 15 | print("Number of clips in the video : ", len(video_clips)) 16 | 17 | # build models 18 | original_model = keras.models.load_model(cfg.extractor_model_weights) 19 | feature_extractor = keras.models.Model( 20 | inputs = original_model.input, 21 | outputs = original_model.get_layer("lstm_1").output 22 | ) 23 | classifier_model = build_classifier_model() 24 | 25 | print("Models initialized") 26 | 27 | # extract features 28 | rgb_features = [] 29 | for i, clip in enumerate(video_clips): 30 | clip = np.array(clip) 31 | if len(clip) < params.frame_count: 32 | continue 33 | 34 | clip = preprocess_input(clip) 35 | rgb_feature = feature_extractor.predict(clip)[0] 36 | rgb_features.append(rgb_feature) 37 | 38 | print("Processed clip : ", i) 39 | 40 | rgb_features = np.array(rgb_features) 41 | rgb_feature_bag = interpolate(rgb_features, params.features_per_bag) 42 | 43 | # classify using the trained classifier model 44 | predictions = classifier_model.predict(rgb_feature_bag) 45 | 46 | predictions = np.array(predictions).squeeze() 47 | 48 | predictions = extrapolate(predictions, num_frames) 49 | 50 | save_path = os.path.join(cfg.output_folder, video_name + '.gif') 51 | # visualize predictions 52 | print('Executed Successfully - '+video_name + '.gif saved') 53 | visualize_predictions(cfg.sample_video_path, predictions, save_path) 54 | 55 | 56 | if __name__ == '__main__': 57 | run_demo() 58 | -------------------------------------------------------------------------------- /proposal/extract_temporal_features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | import models 4 | from utils import video_util 5 | import configuration as cfg 6 | import numpy as np 7 | import sklearn.preprocessing 8 | 9 | original_model = keras.models.load_model(cfg.extractor_model_weights) 10 | spatiotemporal_extractor = keras.models.Model( 11 | inputs = original_model.input, 12 | outputs = original_model.get_layer("lstm_1").output 13 | ) 14 | 15 | normal_videos = os.listdir(cfg.normal_videos_path) 16 | normal_videos.sort() 17 | for i, vid_name in enumerate(normal_videos): 18 | print("Processing {} ({}/{})".format(vid_name, i+1, len(normal_videos))) 19 | vid_path = os.path.join(cfg.normal_videos_path, vid_name) 20 | feats_path = os.path.join(cfg.raw_normal_train_features, vid_name[:-9] + ".npy") 21 | 22 | clips, frames = video_util.get_video_clips(vid_path) 23 | 24 | # Remove last clip if number of frames is not equal to 16 25 | if frames % 16 != 0: 26 | clips = clips[:-1] 27 | 28 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip)) 29 | for clip in clips] 30 | prep_clips = np.stack(prep_clips, axis=0) 31 | 32 | features = spatiotemporal_extractor.predict(prep_clips) 33 | features = sklearn.preprocessing.normalize(features, axis=1) 34 | 35 | with open(feats_path, "wb") as f: 36 | np.save(f, features) 37 | 38 | abnormal_videos = os.listdir(cfg.abnormal_videos_path) 39 | abnormal_videos.sort() 40 | print("Processing abnormal videos...") 41 | for i, vid_name in enumerate(abnormal_videos): 42 | print("Processing {} ({}/{})".format(vid_name, i+1, len(abnormal_videos))) 43 | vid_path = os.path.join(cfg.abnormal_videos_path, vid_name) 44 | feats_path = os.path.join(cfg.raw_abnormal_train_features, vid_name[:-9] + ".npy") 45 | 46 | clips, frames = video_util.get_video_clips(vid_path) 47 | 48 | # Remove last clip if number of frames is not equal to 16 49 | if frames % 16 != 0: 50 | clips = clips[:-1] 51 | 52 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip)) 53 | for clip in clips] 54 | prep_clips = np.stack(prep_clips, axis=0) 55 | 56 | features = spatiotemporal_extractor.predict(prep_clips) 57 | features = sklearn.preprocessing.normalize(features, axis=1) 58 | 59 | with open(feats_path, "wb") as f: 60 | np.save(f, features) 61 | 62 | 63 | test_videos = os.listdir(cfg.test_set) 64 | test_videos.sort() 65 | print("Processing test videos...") 66 | for i, vid_name in enumerate(test_videos): 67 | print("Processing {} ({}/{})".format(vid_name, i+1, len(test_videos))) 68 | vid_path = os.path.join(cfg.test_set, vid_name) 69 | feats_path = os.path.join(cfg.raw_test_features, vid_name[:-9] + ".npy") 70 | 71 | clips, frames = video_util.get_video_clips(vid_path) 72 | 73 | # Remove last clip if number of frames is not equal to 16 74 | if frames % 16 != 0: 75 | clips = clips[:-1] 76 | 77 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip)) 78 | for clip in clips] 79 | prep_clips = np.stack(prep_clips, axis=0) 80 | 81 | features = spatiotemporal_extractor.predict(prep_clips) 82 | features = sklearn.preprocessing.normalize(features, axis=1) 83 | 84 | with open(feats_path, "wb") as f: 85 | np.save(f, features) 86 | -------------------------------------------------------------------------------- /proposal/models.py: -------------------------------------------------------------------------------- 1 | import keras 2 | 3 | def recurrent_feats_model(): 4 | 5 | xception = keras.applications.Xception(include_top=True, weights='imagenet') 6 | 7 | extractor = keras.models.Model(inputs=xception.layers[0].input, 8 | outputs=xception.layers[-2].output) 9 | for layer in extractor.layers: 10 | layer.trainable=False 11 | 12 | input_layer = keras.layers.Input((None,299,299,3)) 13 | td_layer = keras.layers.TimeDistributed(extractor)(input_layer) 14 | 15 | recurrent_layer = keras.layers.LSTM( 16 | 1024, 17 | return_sequences=False, 18 | dropout=0.6 19 | )(td_layer) 20 | linear = keras.layers.Dense(512, activation='relu')(recurrent_layer) 21 | linear = keras.layers.Dropout(0.5)(linear) 22 | linear = keras.layers.Dense(128, activation='relu')(linear) 23 | linear = keras.layers.Dropout(0.5)(linear) 24 | predictions = keras.layers.Dense(101, activation='softmax')(linear) 25 | 26 | model = keras.models.Model(inputs=input_layer, outputs=predictions) 27 | return model 28 | -------------------------------------------------------------------------------- /proposal/parameters.py: -------------------------------------------------------------------------------- 1 | frame_height = 240 2 | frame_width = 320 3 | channels = 3 4 | 5 | frame_count = 16 6 | 7 | features_per_bag = 32 8 | -------------------------------------------------------------------------------- /proposal/predict_test_set.py: -------------------------------------------------------------------------------- 1 | import classifier 2 | import configuration as cfg 3 | import numpy as np 4 | import os 5 | 6 | def load_test_set(videos_path, videos_list): 7 | feats = [] 8 | 9 | for vid in videos_list: 10 | vid_path = os.path.join(videos_path, vid) 11 | with open(vid_path, "rb") as f: 12 | feat = np.load(f) 13 | feats.append(feat) 14 | 15 | feats = np.array(feats) 16 | return feats 17 | 18 | classifier_model = classifier.build_classifier_model() 19 | 20 | vid_list = os.listdir(cfg.processed_test_features) 21 | vid_list.sort() 22 | 23 | test_set = load_test_set(cfg.processed_test_features, vid_list) 24 | 25 | for filename, example in zip(vid_list, test_set): 26 | predictions_file = filename[:-4] + '.npy' 27 | pred_path = os.path.join(cfg.preds_folder, predictions_file) 28 | pred = classifier_model.predict_on_batch(example) 29 | with open(pred_path, "wb") as f: 30 | np.save(pred_path, pred, allow_pickle=True) 31 | -------------------------------------------------------------------------------- /proposal/preprocess_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import sklearn.preprocessing 4 | import configuration as cfg 5 | 6 | def transform_into_segments(features, n_segments=32): 7 | if features.shape[0] < n_segments: 8 | raise RuntimeError("Number of prev segments lesser than expected output size") 9 | 10 | cuts = np.linspace(0, features.shape[0], n_segments, dtype=int, endpoint=False) 11 | 12 | new_feats = [] 13 | for i, j in zip(cuts[:-1], cuts[1:]): 14 | new_feats.append(np.mean(features[i:j,:], axis=0)) 15 | 16 | new_feats.append(np.mean(features[cuts[-1]:,:], axis=0)) 17 | 18 | new_feats = np.array(new_feats) 19 | new_feats = sklearn.preprocessing.normalize(new_feats, axis=1) 20 | return new_feats 21 | 22 | for filename in os.listdir(cfg.raw_normal_train_features): 23 | print("Processing {}".format(filename)) 24 | raw_file_path = os.path.join( 25 | cfg.raw_normal_train_features, filename 26 | ) 27 | processed_file_path = os.path.join( 28 | cfg.processed_normal_train_features, filename 29 | ) 30 | 31 | with open(raw_file_path, "rb") as f: 32 | feats = np.load(f, allow_pickle=True) 33 | 34 | try: 35 | new_feats = transform_into_segments(feats) 36 | with open(processed_file_path, "wb") as f: 37 | np.save(f, new_feats, allow_pickle=True) 38 | except RuntimeError: 39 | print("Video {} too short".format(filename)) 40 | 41 | for filename in os.listdir(cfg.raw_abnormal_train_features): 42 | print("Processing {}".format(filename)) 43 | raw_file_path = os.path.join( 44 | cfg.raw_abnormal_train_features, filename 45 | ) 46 | processed_file_path = os.path.join( 47 | cfg.processed_abnormal_train_features, filename 48 | ) 49 | with open(raw_file_path, "rb") as f: 50 | feats = np.load(f, allow_pickle=True) 51 | 52 | try: 53 | new_feats = transform_into_segments(feats) 54 | with open(processed_file_path, "wb") as f: 55 | np.save(f, new_feats, allow_pickle=True) 56 | except RuntimeError: 57 | print("Video {} too short".format(filename)) 58 | 59 | for filename in os.listdir(cfg.raw_test_features): 60 | print("Processing {}".format(filename)) 61 | raw_file_path = os.path.join( 62 | cfg.raw_test_features, filename 63 | ) 64 | processed_file_path = os.path.join( 65 | cfg.processed_test_features, filename 66 | ) 67 | with open(raw_file_path, "rb") as f: 68 | feats = np.load(f, allow_pickle=True) 69 | 70 | try: 71 | new_feats = transform_into_segments(feats) 72 | with open(processed_file_path, "wb") as f: 73 | np.save(f, new_feats, allow_pickle=True) 74 | except RuntimeError: 75 | print("Video {} too short".format(filename)) 76 | -------------------------------------------------------------------------------- /proposal/train_classifier.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense, Dropout 3 | from keras.regularizers import l2 4 | from keras.optimizers import Adagrad, Adam 5 | from scipy.io import savemat 6 | from keras.models import model_from_json 7 | import os 8 | import configuration as cfg 9 | 10 | from os import listdir 11 | import numpy as np 12 | import keras.backend as K 13 | import classifier 14 | 15 | from datetime import datetime 16 | 17 | def save_model(model, json_path, weight_path): 18 | json_string = model.to_json() 19 | open(json_path, 'w').write(json_string) 20 | dict = {} 21 | i = 0 22 | for layer in model.layers: 23 | weights = layer.get_weights() 24 | my_list = np.zeros(len(weights), dtype=np.object) 25 | my_list[:] = weights 26 | dict[str(i)] = my_list 27 | i += 1 28 | savemat(weight_path, dict) 29 | 30 | def load_model(json_path): 31 | model = model_from_json(open(json_path).read()) 32 | return model 33 | 34 | def load_batch_train(normal_path, normal_list, abnormal_path, abnormal_list): 35 | 36 | batchsize=60 37 | n_exp = int(batchsize/2) 38 | 39 | num_normal = len(normal_list) 40 | num_abnormal = len(abnormal_list) 41 | 42 | abnor_list_idx = np.random.permutation(num_abnormal) 43 | abnor_list = abnor_list_idx[:n_exp] 44 | norm_list_idx = np.random.permutation(num_normal) 45 | norm_list = norm_list_idx[:n_exp] 46 | 47 | abnormal_feats = [] 48 | for video_idx in abnor_list: 49 | video_path = os.path.join(abnormal_path, abnormal_list[video_idx]) 50 | with open(video_path, "rb") as f: 51 | feats = np.load(f) 52 | abnormal_feats.append(feats) 53 | 54 | normal_feats = [] 55 | for video_idx in norm_list: 56 | video_path = os.path.join(normal_path, normal_list[video_idx]) 57 | with open(video_path, "rb") as f: 58 | feats = np.load(f) 59 | normal_feats.append(feats) 60 | 61 | 62 | all_feats = np.vstack((*abnormal_feats, *normal_feats)) 63 | all_labels = np.zeros(32*batchsize, dtype='uint8') 64 | 65 | all_labels[:32*n_exp] = 1 66 | 67 | return all_feats, all_labels 68 | 69 | 70 | def custom_objective(y_true, y_pred): 71 | 72 | y_true = K.reshape(y_true, [-1]) 73 | y_pred = K.reshape(y_pred, [-1]) 74 | n_seg = 32 75 | nvid = 60 76 | n_exp = int(nvid / 2) 77 | 78 | max_scores_list = [] 79 | z_scores_list = [] 80 | temporal_constrains_list = [] 81 | sparsity_constrains_list = [] 82 | 83 | for i in range(0, n_exp, 1): 84 | 85 | video_predictions = y_pred[i*n_seg:(i+1)*n_seg] 86 | 87 | max_scores_list.append(K.max(video_predictions)) 88 | temporal_constrains_list.append( 89 | K.sum(K.pow(video_predictions[1:] - video_predictions[:-1], 2)) 90 | ) 91 | sparsity_constrains_list.append(K.sum(video_predictions)) 92 | 93 | for j in range(n_exp, 2*n_exp, 1): 94 | 95 | video_predictions = y_pred[j*n_seg:(j+1)*n_seg] 96 | max_scores_list.append(K.max(video_predictions)) 97 | 98 | max_scores = K.stack(max_scores_list) 99 | temporal_constrains = K.stack(temporal_constrains_list) 100 | sparsity_constrains = K.stack(sparsity_constrains_list) 101 | 102 | for ii in range(0, n_exp, 1): 103 | max_z = K.maximum(1 - max_scores[:n_exp] + max_scores[n_exp+ii], 0) 104 | z_scores_list.append(K.sum(max_z)) 105 | 106 | z_scores = K.stack(z_scores_list) 107 | z = K.mean(z_scores) 108 | 109 | return z + \ 110 | 0.00004*K.sum(temporal_constrains) + \ 111 | 0.00004*K.sum(sparsity_constrains) 112 | 113 | output_dir = "trained_models/" 114 | normal_dir = cfg.processed_normal_train_features 115 | abnormal_dir = cfg.processed_abnormal_train_features 116 | 117 | normal_list = os.listdir(normal_dir) 118 | normal_list.sort() 119 | abnormal_list = os.listdir(abnormal_dir) 120 | abnormal_list.sort() 121 | 122 | weights_path = output_dir + 'weights_proposal.mat' 123 | 124 | model_path = output_dir + 'model_proposal.json' 125 | 126 | #Create Full connected Model 127 | model = classifier.classifier_model() 128 | 129 | adagrad=Adagrad(lr=0.002, epsilon=1e-07) 130 | model.compile(loss=custom_objective, optimizer=adagrad) 131 | 132 | if not os.path.exists(output_dir): 133 | os.makedirs(output_dir) 134 | 135 | loss_graph =[] 136 | num_iters = 20000 137 | total_iterations = 0 138 | batchsize=60 139 | time_before = datetime.now() 140 | 141 | 142 | for it_num in range(num_iters): 143 | inputs, targets = load_batch_train( 144 | normal_dir, normal_list, abnormal_dir, abnormal_list 145 | ) 146 | batch_loss = model.train_on_batch(inputs, targets) 147 | loss_graph = np.hstack((loss_graph, batch_loss)) 148 | total_iterations += 1 149 | if total_iterations % 20 == 0: 150 | print ("Iteration=" + str(total_iterations) + " took: " + str(datetime.now() - time_before) + ", with loss of " + str(batch_loss)) 151 | if total_iterations % 1000 == 0: 152 | save_model(model, model_path, output_dir + "weights_proposal_{}.mat".format(total_iterations)) 153 | 154 | print("Train Successful - Model saved") 155 | save_model(model, model_path, weights_path) 156 | -------------------------------------------------------------------------------- /proposal/train_feature_extractor.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import video_data_generator 3 | import models 4 | import configuration as cfg 5 | 6 | videogen_train = video_data_generator.VideoFrameGenerator("../ucf101/train", batch_size=16) 7 | videogen_test = video_data_generator.VideoFrameGenerator("../ucf101/test", batch_size=16) 8 | model = models.recurrent_feats_model() 9 | 10 | opt = keras.optimizers.Adam(lr=1e-5, decay=1e-6) 11 | model.compile(optimizer=opt, loss='categorical_crossentropy', 12 | metrics=[ 13 | keras.metrics.categorical_accuracy, 14 | keras.metrics.top_k_categorical_accuracy 15 | ]) 16 | 17 | model.fit_generator(videogen_train, epochs = 500, validation_data=videogen_test, 18 | callbacks=[ 19 | keras.callbacks.ModelCheckpoint( 20 | filepath="trained_models/rec_feats_weights.{epoch:03d}.h5", 21 | save_best_only=True, 22 | monitor="val_categorical_accuracy", 23 | period=20 24 | ), 25 | keras.callbacks.CSVLogger( 26 | filename="train_history.csv" 27 | ) 28 | ]) 29 | 30 | model.save(cfg.extractor_model_weights) 31 | -------------------------------------------------------------------------------- /proposal/trained_models/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/proposal/trained_models/.gitignore -------------------------------------------------------------------------------- /proposal/utils/array_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def sliding_window(arr, size, stride): 5 | num_chunks = int((len(arr) - size) / stride) + 2 6 | result = [] 7 | for i in range(0, num_chunks * stride, stride): 8 | if len(arr[i:i + size]) > 0: 9 | result.append(arr[i:i + size]) 10 | return np.array(result) 11 | 12 | 13 | def chunks(l, n): 14 | for i in range(0, len(l), n): 15 | yield l[i:i + n] 16 | 17 | 18 | def interpolate(features, features_per_bag): 19 | feature_size = np.array(features).shape[1] 20 | interpolated_features = np.zeros((features_per_bag, feature_size)) 21 | interpolation_indicies = np.round(np.linspace(0, len(features) - 1, num=features_per_bag + 1)) 22 | count = 0 23 | for index in range(0, len(interpolation_indicies)-1): 24 | start = int(interpolation_indicies[index]) 25 | end = int(interpolation_indicies[index + 1]) 26 | 27 | assert end >= start 28 | 29 | if start == end: 30 | temp_vect = features[start, :] 31 | else: 32 | temp_vect = np.mean(features[start:end+1, :], axis=0) 33 | 34 | temp_vect = temp_vect / np.linalg.norm(temp_vect) 35 | 36 | if np.linalg.norm(temp_vect) == 0: 37 | print("Error") 38 | 39 | interpolated_features[count,:]=temp_vect 40 | count = count + 1 41 | 42 | return np.array(interpolated_features) 43 | 44 | 45 | def extrapolate(outputs, num_frames): 46 | extrapolated_outputs = [] 47 | extrapolation_indicies = np.round(np.linspace(0, len(outputs) - 1, num=num_frames)) 48 | for index in extrapolation_indicies: 49 | extrapolated_outputs.append(outputs[int(index)]) 50 | return np.array(extrapolated_outputs) 51 | 52 | 53 | def test_interpolate(): 54 | test_case1 = np.random.randn(24, 2048) 55 | output_case1 = interpolate(test_case1, 32) 56 | assert output_case1.shape == (32, 2048) 57 | 58 | test_case2 = np.random.randn(32, 2048) 59 | output_case2 = interpolate(test_case2, 32) 60 | assert output_case2.shape == (32, 2048) 61 | 62 | test_case3 = np.random.randn(42, 2048) 63 | output_case3 = interpolate(test_case3, 32) 64 | assert output_case3.shape == (32, 2048) 65 | 66 | -------------------------------------------------------------------------------- /proposal/utils/video_util.py: -------------------------------------------------------------------------------- 1 | from utils.array_util import * 2 | import parameters as params 3 | import cv2 4 | 5 | 6 | def get_video_clips(video_path): 7 | frames = get_video_frames(video_path) 8 | clips = sliding_window(frames, params.frame_count, params.frame_count) 9 | return clips, len(frames) 10 | 11 | 12 | def get_video_frames(video_path): 13 | cap = cv2.VideoCapture(video_path) 14 | frames = [] 15 | while (cap.isOpened()): 16 | ret, frame = cap.read() 17 | if ret == True: 18 | frame = cv2.resize(frame, (299,299)) 19 | frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 20 | else: 21 | break 22 | cap.release() 23 | return frames 24 | -------------------------------------------------------------------------------- /proposal/utils/visualization_util.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | import matplotlib.pyplot as plt 4 | from matplotlib.animation import FuncAnimation 5 | from utils.video_util import * 6 | 7 | 8 | def visualize_clip(clip, convert_bgr=False, save_gif=False, file_path=None): 9 | num_frames = len(clip) 10 | fig, ax = plt.subplots() 11 | fig.set_tight_layout(True) 12 | 13 | def update(i): 14 | if convert_bgr: 15 | frame = cv2.cvtColor(clip[i], cv2.COLOR_BGR2RGB) 16 | else: 17 | frame = clip[i] 18 | plt.imshow(frame) 19 | return plt 20 | 21 | # FuncAnimation will call the 'update' function for each frame; here 22 | # animating over 10 frames, with an interval of 20ms between frames. 23 | anim = FuncAnimation(fig, update, frames=np.arange(0, num_frames), interval=1) 24 | if save_gif: 25 | anim.save(file_path, dpi=80, writer='imagemagick') 26 | else: 27 | # plt.show() will just loop the animation forever. 28 | plt.show() 29 | 30 | 31 | def visualize_predictions(video_path, predictions, save_path): 32 | frames = get_video_frames(video_path) 33 | assert len(frames) == len(predictions) 34 | 35 | fig, ax = plt.subplots(figsize=(5, 5)) 36 | fig.set_tight_layout(True) 37 | 38 | line = matplotlib.lines.Line2D([], []) 39 | 40 | fig_frame = plt.subplot(2, 1, 1) 41 | img = fig_frame.imshow(frames[0]) 42 | fig_prediction = plt.subplot(2, 1, 2) 43 | fig_prediction.set_xlim(0, len(frames)) 44 | fig_prediction.set_ylim(0, 1.15) 45 | fig_prediction.add_line(line) 46 | 47 | def update(i): 48 | frame = frames[i] 49 | x = range(0, i) 50 | y = predictions[0:i] 51 | line.set_data(x, y) 52 | img.set_data(frame) 53 | return plt 54 | 55 | # FuncAnimation will call the 'update' function for each frame; here 56 | # animating over 10 frames, with an interval of 20ms between frames. 57 | 58 | anim = FuncAnimation(fig, update, frames=np.arange(0, len(frames), 10), interval=1, repeat=False) 59 | 60 | if save_path: 61 | anim.save(save_path, dpi=200, writer='imagemagick') 62 | else: 63 | plt.show() 64 | 65 | return 66 | 67 | 68 | -------------------------------------------------------------------------------- /proposal/video_data_generator.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import cv2 as cv 3 | import glob 4 | import numpy as np 5 | import os 6 | import random 7 | 8 | # author: Patrice Ferlet 9 | # licence: MIT 10 | 11 | class VideoFrameGenerator(keras.utils.Sequence): 12 | ''' 13 | Video frame generator generates batch of frames from a video directory. Videos should be 14 | classified in classes directories. E.g: 15 | videos/class1/file1.avi 16 | videos/class1/file2.avi 17 | videos/class2/file3.avi 18 | ''' 19 | def __init__(self, from_dir, batch_size=8, shape=(299, 299, 3), nbframe=16, 20 | shuffle=True, transform:keras.preprocessing.image.ImageDataGenerator=None 21 | ): 22 | """ 23 | Create a Video Frame Generator with data augmentation. 24 | 25 | Usage example: 26 | gen = VideoFrameGenerator('./out/videos/', 27 | batch_size=5, 28 | nbframe=3, 29 | transform=keras.preprocessing.image.ImageDataGenerator(rotation_range=5, horizontal_flip=True)) 30 | 31 | Arguments: 32 | - from_dir: path to the data directory where resides videos, 33 | videos should be splitted in directories that are name as labels 34 | - batch_size: number of videos to generate 35 | - nbframe: number of frames per video to send 36 | - shuffle: boolean, shuffle data at start and after each epoch 37 | - transform: a keras ImageGenerator configured with random transformations 38 | to apply on each frame. Each video will be processed with the same 39 | transformation at one time to not break consistence. 40 | """ 41 | 42 | self.from_dir = from_dir 43 | self.nbframe = nbframe 44 | self.batch_size = batch_size 45 | self.target_shape = shape 46 | self.shuffle = shuffle 47 | self.transform = transform 48 | 49 | # the list of classes, built in __list_all_files 50 | self.classes = [] 51 | self.files = [] 52 | self.data = [] 53 | 54 | # prepare the list 55 | self.__filecount = 0 56 | self.__list_all_files() 57 | 58 | 59 | def __len__(self): 60 | """ Length of the generator 61 | Warning: it gives the number of loop to do, not the number of files or 62 | frames. The result is number_of_video/batch_size. You can use it as 63 | `step_per_epoch` or `validation_step` for `model.fit_generator` parameters. 64 | """ 65 | return self.__filecount//self.batch_size 66 | 67 | def __getitem__(self, index): 68 | """ Generator needed method - return a batch of `batch_size` video 69 | block with `self.nbframe` for each 70 | """ 71 | indexes = self.data[index*self.batch_size:(index+1)*self.batch_size] 72 | X, Y = self.__data_aug(indexes) 73 | return X, Y 74 | 75 | def on_epoch_end(self): 76 | """ When epoch has finished, random shuffle images in memory """ 77 | if self.shuffle: 78 | random.shuffle(self.data) 79 | 80 | def __list_all_files(self): 81 | """ List and inject images in memory """ 82 | self.classes = glob.glob(os.path.join(self.from_dir, '*')) 83 | self.classes = [os.path.basename(c) for c in self.classes] 84 | self.__filecount = len(glob.glob(os.path.join(self.from_dir, '*/*'))) 85 | 86 | i = 1 87 | print("Inject frames in memory, could take a while...") 88 | for classname in self.classes: 89 | files = glob.glob(os.path.join(self.from_dir, classname, '*')) 90 | for file in files: 91 | print('\rProcessing file %d/%d' % (i, self.__filecount), end='') 92 | i+=1 93 | self.__openframe(classname, file) 94 | 95 | if self.shuffle: 96 | random.shuffle(self.data) 97 | 98 | 99 | def __openframe(self, classname, file): 100 | """Append ORIGNALS frames in memory, transformations are made on the fly""" 101 | frames = [] 102 | vid = cv.VideoCapture(file) 103 | while True: 104 | grabbed, frame = vid.read() 105 | if not grabbed: 106 | break 107 | frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) 108 | frame = cv.resize(frame, self.target_shape[:2]) 109 | frames.append(frame) 110 | 111 | step = len(frames)//self.nbframe 112 | frames = frames[::step] 113 | if len(frames) >= self.nbframe: 114 | frames = frames[:self.nbframe] 115 | 116 | # add frames in memory 117 | frames = np.array(frames, dtype=np.float32) 118 | frames = keras.applications.xception.preprocess_input(frames) 119 | if len(frames) == self.nbframe: 120 | self.data.append((classname, frames)) 121 | else: 122 | print('\n%s/%s has not enought frames ==> %d' % (classname, file, len(frames))) 123 | 124 | def __data_aug(self, batch): 125 | """ Make random transformation based on ImageGenerator arguments""" 126 | T = None 127 | if self.transform: 128 | T = self.transform.get_random_transform(self.target_shape[:2]) 129 | 130 | X, Y = [], [] 131 | for y, images in batch: 132 | Y.append(self.classes.index(y)) # label 133 | x = [] 134 | for img in images: 135 | if T: 136 | x.append(self.transform.apply_transform(img, T)) 137 | else: 138 | x.append(img) 139 | 140 | X.append(x) 141 | 142 | return np.array(X), keras.utils.to_categorical(Y, num_classes=len(self.classes)) 143 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.2.4 2 | numpy==1.16.2 3 | scipy==1.2.0 4 | opencv_contrib_python==4.2.0.32 5 | pandas==1.0.5 6 | matplotlib==3.0.2 7 | scikit_learn==0.23.2 8 | --------------------------------------------------------------------------------