├── .gitignore
├── LICENSE
├── README.md
├── calculate_video_level_scores.py
├── create_data_folders.sh
├── dataset
└── .gitignore
├── docs
├── bibliography
│ └── bibliography.bib
├── chapterheader.sty
├── chapters
│ ├── 01_introduction.tex
│ ├── 02_taxonomy.tex
│ ├── 03_problem_description.tex
│ ├── 04_experimentation.tex
│ └── 05_conclusions.tex
├── images
│ ├── 2d_conv.pdf
│ ├── 3d_conv.pdf
│ ├── avenue-anomaly.png
│ ├── boss-anomaly.png
│ ├── cnn_lstm.pdf
│ ├── cnn_lstm_violence.pdf
│ ├── extractor_acc.pdf
│ ├── extractor_loss.pdf
│ ├── original_model.pdf
│ ├── pr_overlay.pdf
│ ├── roc-curve.jpg
│ ├── roc_overlay.pdf
│ ├── sdae_psvm.pdf
│ ├── sketches
│ │ ├── 2d_3d_conv.drawio
│ │ ├── cnn-lstm.drawio
│ │ ├── original_model.drawio
│ │ └── taxonomy-steps.drawio
│ ├── taxonomy_steps.pdf
│ ├── ucf-examples
│ │ ├── arson-abnormal.png
│ │ ├── arson-normal.png
│ │ ├── explosion-abnormal.png
│ │ ├── explosion-normal.png
│ │ ├── normal-1.png
│ │ ├── normal-2.png
│ │ ├── normal-3.png
│ │ ├── normal-4.png
│ │ ├── roadaccident-abnormal.png
│ │ ├── roadaccident-normal.png
│ │ ├── stealing-abnormal.png
│ │ └── stealing-normal.png
│ ├── ucsd-anomaly.png
│ └── umn-anomaly.png
├── main.tex
├── memoria_TFM_Luque_Sanchez_Francisco.pdf
├── prefaces
│ ├── cover.tex
│ ├── english_abstract.tex
│ ├── licensing.tex
│ └── spanish_abstract.tex
├── slides
│ ├── images
│ │ ├── gifs
│ │ │ ├── Assault049_x264.gif
│ │ │ └── Stealing019_x264.gif
│ │ ├── original-model.pdf
│ │ ├── proposal.pdf
│ │ ├── taxonomy-steps.pdf
│ │ └── ucf
│ │ │ ├── arson-abnormal.png
│ │ │ ├── normal-1.png
│ │ │ ├── roadaccident-abnormal.png
│ │ │ └── stealing-abnormal.png
│ ├── slides.pdf
│ └── slides.tex
└── variables.sty
├── original_model
├── README.md
├── c3d.py
├── calculate_metrics.py
├── classifier.py
├── compute_frames.py
├── configuration.py
├── display_predictions.py
├── extract_features.py
├── parameters.py
├── predict_test_set.py
├── preprocess_features.py
├── train_classifier.py
├── trained_models
│ └── .gitignore
└── utils
│ ├── array_util.py
│ ├── video_util.py
│ └── visualization_util.py
├── overlay_curves.py
├── proposal
├── README.md
├── calculate_metrics.py
├── classifier.py
├── configuration.py
├── display_predictions.py
├── extract_temporal_features.py
├── models.py
├── parameters.py
├── predict_test_set.py
├── preprocess_features.py
├── train_classifier.py
├── train_feature_extractor.py
├── trained_models
│ └── .gitignore
├── utils
│ ├── array_util.py
│ ├── video_util.py
│ └── visualization_util.py
└── video_data_generator.py
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Data folders
132 | dataset/
133 | predictions_c3d/
134 | processed_c3d_features/
135 | raw_c3d_features/
136 | processed_lstm_features/
137 | raw_lstm_features/
138 | raw_lstm_features_old/
139 | predictions_lstm/
140 | ucf101/
141 |
142 | # Trained models folders
143 | original_model/trained_models/
144 | proposal/trained_models/
145 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning for Crowd Behavior Analysis in Videosurveillance
2 |
3 | Master's thesis in Data Sciences: Study on the use of Deep Learning
4 | for Crowd Behavior Analysis from videosurveillance
5 | sources.
6 |
7 | ## Documents
8 |
9 | Main report (in spanish) can be donwloaded from
10 | [here](https://github.com/fluque1995/tfm-anomaly-detection/blob/master/docs/memoria_TFM_Luque_Sanchez_Francisco.pdf).
11 |
12 | Slides used in the public defense (also in spanish) can be downloaded
13 | from
14 | [here](https://github.com/fluque1995/tfm-anomaly-detection/blob/master/docs/slides/slides.pdf).
15 |
16 | ## Theoretical study
17 |
18 | Theoretical study consists of a proposal of taxonomy for crowd
19 | behavior analysis, published on Information Fusion with the title
20 | _Revisiting crowd behavior analysis through deep learning: Taxonomy,
21 | anomaly detection, crowd emotions, datasets, opportunities and
22 | prospects_, which can be found in
23 | https://www.sciencedirect.com/science/article/pii/S1566253520303201.
24 |
25 | ## Experimental analysis
26 |
27 | In the experimental analysis, we have studied the usage of
28 | spatio-temporal features extracted by deep learning models for crowd
29 | anomaly detection. Specifically, we have proposed an enhancement over
30 | the model in _Real-world Anomaly Detection in Surveillance Videos_
31 | (https://arxiv.org/abs/1801.04264). Instead of using 3D convolutional
32 | features, we propose a model which employs convolutional analysis for
33 | frames together with a recurrent network (specifically, an LSTM model)
34 | to learn the temporal structure of the convolutional features.
35 |
36 | Experiments show that our spatio-temporal extractor outperforms the
37 | original proposal by a decent margin, even when is pretrained on a
38 | smaller dataset for video classification.
39 |
40 | ### Baseline implementations
41 |
42 | This implementation, specially the original model replica (which
43 | can be found in `original_model` folder) strongly relies in
44 | these previous works:
45 |
46 | - https://github.com/WaqasSultani/AnomalyDetectionCVPR2018: Original
47 | implementation of the model
48 | - https://github.com/ptirupat/AnomalyDetection_CVPR18: Reimplementation
49 | of the original world using Keras
50 | - https://github.com/adamcasson/c3d: Implementation of C3D feature
51 | extractor in Keras using Tensorflow as backend
52 |
53 | The original model has been adapted in order to be self-contained in
54 | this repo and fully executable in Python. Original proposals rely on
55 | external resources and MATLAB for some of the executions, while our
56 | implementation is completely designed in Python, which ease the
57 | execution.
58 |
59 | ### Software requirements
60 |
61 | The project is completely written in Python 3, using the following
62 | libraries:
63 |
64 | - Keras 2.2.4 (TensorFlow GPU backend)
65 | - numpy 1.16.2
66 | - scipy 1.2.0
67 | - opencv_contrib_python 4.0.0.21
68 | - pandas 1.0.5
69 | - matplotlib 3.0.2
70 | - scikit_learn 0.23.2
71 |
72 | A requirements file is provided for `pip` installation. In order to
73 | install dependencies, navigate to the project root folder and execute:
74 |
75 | ``` shell
76 | pip install -r requirements.txt
77 | ```
78 |
79 | ### Data folders structure and datasets
80 |
81 | In order to properly execute the models, some folders must be created
82 | in advance. Executing the script `create_data_folders.sh` at root
83 | project level will create the required folders with their default
84 | names. Also, datasets must be downloaded. In particular:
85 |
86 | - UCF-101 Dataset (https://www.crcv.ucf.edu/data/UCF101.php) is used
87 | to pretrain our feature extractor proposal. You can download the
88 | dataset with the proper folder structure for our experiments from
89 | [here](https://drive.google.com/file/d/1R2E9WjQS8c48S2z7mNTT8Gc1H1z2mnqP/view?usp=sharing)
90 | and place it into the root project folder
91 | - UCF-Crime Dataset (https://www.crcv.ucf.edu/projects/real-world/) is
92 | used for evaluation. We provide a curated version of the dataset
93 | with the proper train-test splits for anomaly detection, as we have
94 | used it in our experiments. In order to use the dataset, you should
95 | download the following files. The main dataset has been split in
96 | three parts due to its size:
97 | - [Test split](https://drive.google.com/file/d/1ynzUmzihaAZkLXJ9jzhppK0eLMskhh1F/view?usp=sharing):
98 | This file contains the test split, and should be decompressed in
99 | the folder `dataset/test`.
100 | - [Train split - normal
101 | videos](https://drive.google.com/file/d/1k63Qlfn3aU3_CpXxxAzPJ_hYqeSo38PP/view?usp=sharing):
102 | This file contains the normal videos for the train split. The
103 | videos contained in this file should be placed inside folder
104 | `dataset/train/normal`
105 | - [Train split - abnormal
106 | videos](https://drive.google.com/file/d/1Zv1CU7PxPDY5WyGc70Kt6SCDFqpjV7gX/view?usp=sharing):
107 | This file contains the abnormal videos for the train split. The
108 | videos contained in this file should be placed inside folder
109 | `dataset/train/abnormal`
110 |
111 | **WARNING**: Datasets are heavy, and models are resource-consuming.
112 | We strongly recommend using dedicated GPUs and computing nodes to
113 | replicate the experiments, since usual PCs are not capable of handling
114 | such volumes of data.
115 |
116 | ### Pretrained models
117 |
118 | We provide several pretrained models used in our experiments:
119 |
120 | - Models from the original proposal: These models represent the
121 | original feature extractor based on C3D and the two sets of weights
122 | for the classifier; the original trained model by the authors
123 | (`weights_L1L2.mat`) and the replica trained by us
124 | (`weights_own.mat`). These models can be downloaded from
125 | [here](https://drive.google.com/file/d/1s3qBXLZzMGAsmG8U0YTJJ4NOOK3KBakl/view?usp=sharing).
126 | The uncompressed folder must be placed in
127 | `original_model/trained_models` folder
128 | - Models from our proposal: These models represent our proposed
129 | extractor based on a spatio-temporal network and the classifier
130 | model trained by us. These models can be downloaded from
131 | [here](https://drive.google.com/file/d/1XJ8DLRSHowEA3JB2xAUQGOzTo1y0ofQj/view?usp=sharing).
132 | The uncompressed folder must be placed in `proposal/trained_models`
133 | folder
134 |
135 | ### Code structure
136 |
137 | Developed code is placed in two main folders, together with some
138 | scripts to calculate results:
139 |
140 | - `calculate_video_level_scores.py`: It calculates the percentage of
141 | normal and abnormal videos in which an alarm has been triggered. For
142 | normal videos, a lesser percentage means lesser false alarms, and
143 | thus a better model. For abnormal videos, a greater percentage means
144 | better capability of detection anomalies.
145 | - `overlay_curves.py`: This script computes the ROC and PR curves
146 | given the predictions of both models, and represents them in two
147 | different graphs (one for ROCs and one for PRs).
148 | - `original_model` folder: The code in this folder is prepared to
149 | replicate the original experiments, from feature extraction with C3D
150 | to training and evaluation of the anomaly classifier.
151 | - `proposal` folder: The code in this folder is prepared to replicate
152 | our experiments. There are scripts to train the feature extractor
153 | over UCF-101, extract features from UCF-Crime dataset using the
154 | pretrained extractor, train and evaluate the anomaly classifier.
155 |
156 | There is more information on how to reproduce the experiments in the
157 | README files inside each folder.
158 |
--------------------------------------------------------------------------------
/calculate_video_level_scores.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 |
4 | models = ["c3d", "lstm"]
5 |
6 | for model in models:
7 | normal_predictions_regex = "predictions_{}/Normal*".format(model)
8 | abnormal_predictions_regex = "predictions_{}/[!Normal]*".format(model)
9 |
10 | normal_predictions = glob.glob(normal_predictions_regex)
11 | abnormal_predictions = glob.glob(abnormal_predictions_regex)
12 |
13 | normal_pos_preds = 0
14 | normal_videos = 0
15 | for vid in normal_predictions:
16 | preds = np.load(vid)
17 | normal_videos += 1
18 | normal_pos_preds += np.max(np.round(preds))
19 |
20 | abnormal_pos_preds = 0
21 | abnormal_videos = 0
22 | for vid in abnormal_predictions:
23 | preds = np.load(vid)
24 | abnormal_videos += 1
25 | abnormal_pos_preds += np.max(np.round(preds))
26 |
27 | print("MODEL: {}".format(model))
28 | print("Normal videos with positive labels: {} %".format(
29 | 100*normal_pos_preds/normal_videos))
30 |
31 | print("Abnormal videos with positive labels: {} %".format(
32 | 100*abnormal_pos_preds/abnormal_videos))
33 |
--------------------------------------------------------------------------------
/create_data_folders.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "Creating dataset folders..."
4 | mkdir -p dataset/{train/{abnormal,normal},test}
5 | echo "Creating features folders..."
6 | mkdir -p {raw,processed}_{c3d,lstm}_features/{train/{abnormal,normal},test}
7 | echo "Creating predictions folders..."
8 | mkdir -p predictions_{c3d,lstm}
9 | echo "Done"
10 |
11 |
12 |
--------------------------------------------------------------------------------
/dataset/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/dataset/.gitignore
--------------------------------------------------------------------------------
/docs/bibliography/bibliography.bib:
--------------------------------------------------------------------------------
1 | @book{ma2009intelligent,
2 | title={Intelligent video surveillance: systems and technology},
3 | author={Ma, Yunqian and Qian, Gang},
4 | year={2009},
5 | publisher={CRC Press}
6 | }
7 |
8 | @article{zitouni2016advances,
9 | title={Advances and trends in visual crowd analysis: A systematic survey and evaluation of crowd modelling techniques},
10 | author={Zitouni, M Sami and Bhaskar, Harish and Dias, J and Al-Mualla, Mohammed E},
11 | journal={Neurocomputing},
12 | volume={186},
13 | pages={139--159},
14 | year={2016},
15 | publisher={Elsevier}
16 | }
17 |
18 | @inproceedings{swathi2017crowd,
19 | title={Crowd behavior analysis: A survey},
20 | author={Swathi, HY and Shivakumar, G and Mohana, HS},
21 | booktitle={2017 international conference on recent advances in electronics and communication technology (ICRAECT)},
22 | pages={169--178},
23 | year={2017},
24 | organization={IEEE}
25 | }
26 |
27 | @article{nguyen2016human,
28 | title={Human detection from images and videos: A survey},
29 | author={Nguyen, Duc Thanh and Li, Wanqing and Ogunbona, Philip O},
30 | journal={Pattern Recognition},
31 | volume={51},
32 | pages={148--175},
33 | year={2016},
34 | publisher={Elsevier}
35 | }
36 |
37 | @inproceedings{garate2009crowd,
38 | title={Crowd event recognition using hog tracker},
39 | author={Garate, Carolina and Bilinsky, Piotr and Bremond, Fran{\c{c}}ois},
40 | booktitle={2009 Twelfth IEEE International Workshop on Performance Evaluation of Tracking and Surveillance},
41 | pages={1--6},
42 | year={2009},
43 | organization={IEEE}
44 | }
45 |
46 | @article{ciaparrone2020deep,
47 | title={Deep learning in video multi-object tracking: A survey},
48 | author={Ciaparrone, Gioele and S{\'a}nchez, Francisco Luque and Tabik, Siham and Troiano, Luigi and Tagliaferri, Roberto and Herrera, Francisco},
49 | journal={Neurocomputing},
50 | volume={381},
51 | pages={61--88},
52 | year={2020},
53 | publisher={Elsevier}
54 | }
55 |
56 | @inproceedings{mahadevan2010anomaly,
57 | title={Anomaly detection in crowded scenes},
58 | author={Mahadevan, Vijay and Li, Weixin and Bhalodia, Viral and Vasconcelos, Nuno},
59 | booktitle={2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
60 | pages={1975--1981},
61 | year={2010},
62 | organization={IEEE}
63 | }
64 |
65 | @inproceedings{lu2013abnormal,
66 | title={Abnormal event detection at 150 fps in matlab},
67 | author={Lu, Cewu and Shi, Jianping and Jia, Jiaya},
68 | booktitle={Proceedings of the IEEE international conference on computer vision},
69 | pages={2720--2727},
70 | year={2013}
71 | }
72 |
73 | @inproceedings{mehran2009abnormal,
74 | title={Abnormal crowd behavior detection using social force model},
75 | author={Mehran, Ramin and Oyama, Alexis and Shah, Mubarak},
76 | booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition},
77 | pages={935--942},
78 | year={2009},
79 | organization={IEEE}
80 | }
81 |
82 | @inproceedings{liu2018future,
83 | title={Future frame prediction for anomaly detection--a new baseline},
84 | author={Liu, Wen and Luo, Weixin and Lian, Dongze and Gao, Shenghua},
85 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
86 | pages={6536--6545},
87 | year={2018}
88 | }
89 |
90 | @article{blunsden2010behave,
91 | title={The BEHAVE video dataset: ground truthed video for multi-person behavior classification},
92 | author={Blunsden, Scott and Fisher, RB},
93 | journal={Annals of the BMVA},
94 | volume={4},
95 | number={1-12},
96 | pages={4},
97 | year={2010},
98 | publisher={British Machine Vision Association}
99 | }
100 |
101 | @inproceedings{velastin2017people,
102 | title={People Detection and Pose Classification Inside a Moving Train Using Computer Vision},
103 | author={Velastin, Sergio A and G{\'o}mez-Lira, Diego A},
104 | booktitle={International Visual Informatics Conference},
105 | pages={319--330},
106 | year={2017},
107 | organization={Springer}
108 | }
109 |
110 | @misc{ut-interaction,
111 | author = "Ryoo, M. S. and Aggarwal, J. K.",
112 | title = "{UT}-{I}nteraction {D}ataset, {ICPR} contest on {S}emantic {D}escription of {H}uman {A}ctivities ({SDHA})",
113 | year = "2010",
114 | url = {http://cvrc.ece.utexas.edu/SDHA2010/Human\_Interaction.html}
115 | }
116 |
117 | @inproceedings{sultani2018real,
118 | title={Real-world anomaly detection in surveillance videos},
119 | author={Sultani, Waqas and Chen, Chen and Shah, Mubarak},
120 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
121 | pages={6479--6488},
122 | year={2018}
123 | }
124 |
125 | @inproceedings{nievas2011violence,
126 | title= {Movies Fight Detection Dataset},
127 | author= {Nievas, Enrique Bermejo and Suarez, Oscar Deniz and Garcia, Gloria Bueno and Sukthankar, Rahul},
128 | booktitle= {Computer Analysis of Images and Patterns},
129 | pages= {332--339},
130 | year= {2011},
131 | organization= {Springer},
132 | keywords= {action recognition, fight detection, video surveillance},
133 | terms= {},
134 | url= {http://visilab.etsii.uclm.es/personas/oscar/FightDetection/}
135 | }
136 |
137 | @inproceedings{nievas2011hockey,
138 | title= {Hockey Fight Detection Dataset},
139 | author= {Nievas, Enrique Bermejo and Suarez, Oscar Deniz and Garcia, Gloria Bueno and Sukthankar, Rahul},
140 | booktitle= {Computer Analysis of Images and Patterns},
141 | pages= {332--339},
142 | year= {2011},
143 | organization= {Springer},
144 | keywords= {action recognition, fight detection, video surveillance},
145 | terms= {},
146 | url= {http://visilab.etsii.uclm.es/personas/oscar/FightDetection/}
147 | }
148 |
149 | @inproceedings{scholkopf2000support,
150 | title={Support vector method for novelty detection},
151 | author={Sch{\"o}lkopf, Bernhard and Williamson, Robert C and Smola, Alex J and Shawe-Taylor, John and Platt, John C},
152 | booktitle={Advances in neural information processing systems},
153 | pages={582--588},
154 | year={2000}
155 | }
156 |
157 | @article{xu2015learning,
158 | title={Learning deep representations of appearance and motion for anomalous event detection},
159 | author={Xu, Dan and Ricci, Elisa and Yan, Yan and Song, Jingkuan and Sebe, Nicu},
160 | journal={arXiv preprint arXiv:1510.01553},
161 | year={2015}
162 | }
163 |
164 | @inproceedings{vincent2008extracting,
165 | title={Extracting and composing robust features with denoising autoencoders},
166 | author={Vincent, Pascal and Larochelle, Hugo and Bengio, Yoshua and Manzagol, Pierre-Antoine},
167 | booktitle={Proceedings of the 25th international conference on Machine learning},
168 | pages={1096--1103},
169 | year={2008}
170 | }
171 |
172 | @inproceedings{horn1981determining,
173 | title={Determining optical flow},
174 | author={Horn, Berthold KP and Schunck, Brian G},
175 | booktitle={Techniques and Applications of Image Understanding},
176 | volume={281},
177 | pages={319--331},
178 | year={1981},
179 | organization={International Society for Optics and Photonics}
180 | }
181 |
182 | @inproceedings{gutoski2017detection,
183 | title={Detection of video anomalies using convolutional autoencoders and one-class support vector machines},
184 | author={Gutoski, Matheus and Aquino, Nelson Marcelo Romero and Ribeiro, Manass{\'e}s and Lazzaretti, Andr{\'e} Eng{\^e}nio and Lopes, Heitor Silv{\'e}rio},
185 | booktitle={XIII Brazilian Congress on Computational Intelligence},
186 | volume={2017},
187 | year={2017}
188 | }
189 |
190 | @article{canny1986computational,
191 | title={A computational approach to edge detection},
192 | author={Canny, John},
193 | journal={IEEE Transactions on pattern analysis and machine intelligence},
194 | number={6},
195 | pages={679--698},
196 | year={1986},
197 | publisher={Ieee}
198 | }
199 |
200 | @inproceedings{yang2019deep,
201 | title={Deep Learning and One-class SVM based Anomalous Crowd Detection},
202 | author={Yang, Meng and Rajasegarar, Sutharshan and Erfani, Sarah M and Leckie, Christopher},
203 | booktitle={2019 International Joint Conference on Neural Networks (IJCNN)},
204 | pages={1--8},
205 | year={2019},
206 | organization={IEEE}
207 | }
208 |
209 | @article{lucas1981iterative,
210 | title={An iterative image registration technique with an application to stereo vision},
211 | author={Lucas, Bruce D and Kanade, Takeo and others},
212 | year={1981},
213 | publisher={Vancouver, British Columbia}
214 | }
215 |
216 | @article{hinton2006fast,
217 | title={A fast learning algorithm for deep belief nets},
218 | author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye},
219 | journal={Neural computation},
220 | volume={18},
221 | number={7},
222 | pages={1527--1554},
223 | year={2006},
224 | publisher={MIT Press}
225 | }
226 |
227 | @article{fang2016abnormal,
228 | title={Abnormal event detection in crowded scenes based on deep learning},
229 | author={Fang, Zhijun and Fei, Fengchang and Fang, Yuming and Lee, Changhoon and Xiong, Naixue and Shu, Lei and Chen, Sheng},
230 | journal={Multimedia Tools and Applications},
231 | volume={75},
232 | number={22},
233 | pages={14617--14639},
234 | year={2016},
235 | publisher={Springer}
236 | }
237 |
238 | @article{fang2011bottom,
239 | title={Bottom-up saliency detection model based on human visual sensitivity and amplitude spectrum},
240 | author={Fang, Yuming and Lin, Weisi and Lee, Bu-Sung and Lau, Chiew-Tong and Chen, Zhenzhong and Lin, Chia-Wen},
241 | journal={IEEE Transactions on Multimedia},
242 | volume={14},
243 | number={1},
244 | pages={187--198},
245 | year={2011},
246 | publisher={IEEE}
247 | }
248 |
249 | @article{chan2015pcanet,
250 | title={PCANet: A simple deep learning baseline for image classification?},
251 | author={Chan, Tsung-Han and Jia, Kui and Gao, Shenghua and Lu, Jiwen and Zeng, Zinan and Ma, Yi},
252 | journal={IEEE transactions on image processing},
253 | volume={24},
254 | number={12},
255 | pages={5017--5032},
256 | year={2015},
257 | publisher={IEEE}
258 | }
259 |
260 | @inproceedings{smeureanu2017deep,
261 | title={Deep appearance features for abnormal behavior detection in video},
262 | author={Smeureanu, Sorina and Ionescu, Radu Tudor and Popescu, Marius and Alexe, Bogdan},
263 | booktitle={International Conference on Image Analysis and Processing},
264 | pages={779--789},
265 | year={2017},
266 | organization={Springer}
267 | }
268 |
269 | @article{chatfield2014return,
270 | title={Return of the devil in the details: Delving deep into convolutional nets},
271 | author={Chatfield, Ken and Simonyan, Karen and Vedaldi, Andrea and Zisserman, Andrew},
272 | journal={arXiv preprint arXiv:1405.3531},
273 | year={2014}
274 | }
275 |
276 | @article{sun2019abnormal,
277 | title={Abnormal event detection for video surveillance using deep one-class learning},
278 | author={Sun, Jiayu and Shao, Jie and He, Chengkun},
279 | journal={Multimedia Tools and Applications},
280 | volume={78},
281 | number={3},
282 | pages={3633--3647},
283 | year={2019},
284 | publisher={Springer}
285 | }
286 |
287 | @article{singh2020crowd,
288 | title={Crowd anomaly detection using aggregation of ensembles of fine-tuned ConvNets},
289 | author={Singh, Kuldeep and Rajora, Shantanu and Vishwakarma, Dinesh Kumar and Tripathi, Gaurav and Kumar, Sandeep and Walia, Gurjit Singh},
290 | journal={Neurocomputing},
291 | volume={371},
292 | pages={188--198},
293 | year={2020},
294 | publisher={Elsevier}
295 | }
296 |
297 | @article{huang2018learning,
298 | title={Learning multimodal deep representations for crowd anomaly event detection},
299 | author={Huang, Shaonian and Huang, Dongjun and Zhou, Xinmin},
300 | journal={Mathematical Problems in Engineering},
301 | volume={2018},
302 | year={2018},
303 | publisher={Hindawi}
304 | }
305 |
306 | @inproceedings{lee2009convolutional,
307 | title={Convolutional deep belief networks for scalable unsupervised learning of hierarchical representations},
308 | author={Lee, Honglak and Grosse, Roger and Ranganath, Rajesh and Ng, Andrew Y},
309 | booktitle={Proceedings of the 26th annual international conference on machine learning},
310 | pages={609--616},
311 | year={2009}
312 | }
313 |
314 | @inproceedings{hinami2017joint,
315 | title={Joint detection and recounting of abnormal events by learning deep generic knowledge},
316 | author={Hinami, Ryota and Mei, Tao and Satoh, Shin'ichi},
317 | booktitle={Proceedings of the IEEE International Conference on Computer Vision},
318 | pages={3619--3627},
319 | year={2017}
320 | }
321 |
322 | @inproceedings{girshick2015fast,
323 | title={Fast r-cnn},
324 | author={Girshick, Ross},
325 | booktitle={Proceedings of the IEEE international conference on computer vision},
326 | pages={1440--1448},
327 | year={2015}
328 | }
329 |
330 | @inproceedings{sabokrou2015real,
331 | title={Real-time anomaly detection and localization in crowded scenes},
332 | author={Sabokrou, Mohammad and Fathy, Mahmood and Hoseini, Mojtaba and Klette, Reinhard},
333 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition workshops},
334 | pages={56--62},
335 | year={2015}
336 | }
337 |
338 | @article{sabokrou2017fast,
339 | title={Fast and accurate detection and localization of abnormal behavior in crowded scenes},
340 | author={Sabokrou, Mohammad and Fathy, Mahmood and Moayed, Zahra and Klette, Reinhard},
341 | journal={Machine Vision and Applications},
342 | volume={28},
343 | number={8},
344 | pages={965--985},
345 | year={2017},
346 | publisher={Springer}
347 | }
348 |
349 | @article{sabokrou2017deep,
350 | title={Deep-cascade: Cascading 3d deep neural networks for fast anomaly detection and localization in crowded scenes},
351 | author={Sabokrou, Mohammad and Fayyaz, Mohsen and Fathy, Mahmood and Klette, Reinhard},
352 | journal={IEEE Transactions on Image Processing},
353 | volume=26,
354 | number=4,
355 | pages={1992--2004},
356 | year=2017,
357 | publisher={IEEE}
358 | }
359 |
360 | @article{feng2017learning,
361 | title={Learning deep event models for crowd anomaly detection},
362 | author={Feng, Yachuang and Yuan, Yuan and Lu, Xiaoqiang},
363 | journal={Neurocomputing},
364 | volume={219},
365 | pages={548--556},
366 | year={2017},
367 | publisher={Elsevier}
368 | }
369 |
370 | @article{viroli2019deep,
371 | title={Deep gaussian mixture models},
372 | author={Viroli, Cinzia and McLachlan, Geoffrey J},
373 | journal={Statistics and Computing},
374 | volume={29},
375 | number={1},
376 | pages={43--51},
377 | year={2019},
378 | publisher={Springer}
379 | }
380 |
381 | @article{ramchandran2019unsupervised,
382 | title={Unsupervised deep learning system for local anomaly event detection in crowded scenes},
383 | author={Ramchandran, Anitha and Sangaiah, Arun Kumar},
384 | journal={Multimedia Tools and Applications},
385 | pages={1--21},
386 | year={2019},
387 | publisher={Springer}
388 | }
389 |
390 | @inproceedings{ravanbakhsh2018plug,
391 | title={Plug-and-play cnn for crowd motion analysis: An application in abnormal event detection},
392 | author={Ravanbakhsh, Mahdyar and Nabi, Moin and Mousavi, Hossein and Sangineto, Enver and Sebe, Nicu},
393 | booktitle={2018 IEEE Winter Conference on Applications of Computer Vision (WACV)},
394 | pages={1689--1698},
395 | year={2018},
396 | organization={IEEE}
397 | }
398 |
399 | @article{zhou2016spatial,
400 | title={Spatial--temporal convolutional neural networks for anomaly detection and localization in crowded scenes},
401 | author={Zhou, Shifu and Shen, Wei and Zeng, Dan and Fang, Mei and Wei, Yuanwang and Zhang, Zhijiang},
402 | journal={Signal Processing: Image Communication},
403 | volume={47},
404 | pages={358--368},
405 | year={2016},
406 | publisher={Elsevier}
407 | }
408 |
409 | @inproceedings{ravanbakhsh2019training,
410 | title={Training adversarial discriminators for cross-channel abnormal event detection in crowds},
411 | author={Ravanbakhsh, Mahdyar and Sangineto, Enver and Nabi, Moin and Sebe, Nicu},
412 | booktitle={2019 IEEE Winter Conference on Applications of Computer Vision (WACV)},
413 | pages={1896--1904},
414 | year={2019},
415 | organization={IEEE}
416 | }
417 |
418 | @inproceedings{goodfellow2014generative,
419 | title={Generative adversarial nets},
420 | author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
421 | booktitle={Advances in neural information processing systems},
422 | pages={2672--2680},
423 | year={2014}
424 | }
425 |
426 | @inproceedings{kumar2017d,
427 | title={D-CAD: Deep and crowded anomaly detection},
428 | author={Kumar, Krishan and Kumar, Anurag and Bahuguna, Ayush},
429 | booktitle={Proceedings of the 7th International Conference on Computer and Communication Technology},
430 | pages={100--105},
431 | year={2017}
432 | }
433 |
434 | @article{wiskott2002slow,
435 | title={Slow feature analysis: Unsupervised learning of invariances},
436 | author={Wiskott, Laurenz and Sejnowski, Terrence J},
437 | journal={Neural computation},
438 | volume={14},
439 | number={4},
440 | pages={715--770},
441 | year={2002},
442 | publisher={MIT Press}
443 | }
444 |
445 | @article{sabokrou2018deep,
446 | title={Deep-anomaly: Fully convolutional neural network for fast anomaly detection in crowded scenes},
447 | author={Sabokrou, Mohammad and Fayyaz, Mohsen and Fathy, Mahmood and Moayed, Zahra and Klette, Reinhard},
448 | journal={Computer Vision and Image Understanding},
449 | volume={172},
450 | pages={88--97},
451 | year={2018},
452 | publisher={Elsevier}
453 | }
454 |
455 | @article{wang2019abnormal,
456 | title={Abnormal behavior detection in videos using deep learning},
457 | author={Wang, Jun and Xia, Limin},
458 | journal={Cluster Computing},
459 | volume={22},
460 | number={4},
461 | pages={9229--9239},
462 | year={2019},
463 | publisher={Springer}
464 | }
465 |
466 | @incollection{tay2019robust,
467 | title={A robust abnormal behavior detection method using convolutional neural network},
468 | author={Tay, Nian Chi and Connie, Tee and Ong, Thian Song and Goh, Kah Ong Michael and Teh, Pin Shen},
469 | booktitle={Computational Science and Technology},
470 | pages={37--47},
471 | year={2019},
472 | publisher={Springer}
473 | }
474 |
475 | @article{kecceli2017violent,
476 | title={Violent activity detection with transfer learning method},
477 | author={Ke{\c{c}}eli, AS and Kaya, AYDIN},
478 | journal={Electronics Letters},
479 | volume={53},
480 | number={15},
481 | pages={1047--1048},
482 | year={2017},
483 | publisher={IET}
484 | }
485 |
486 | @article{kononenko1997overcoming,
487 | title={Overcoming the myopia of inductive learning algorithms with RELIEFF},
488 | author={Kononenko, Igor and {\v{S}}imec, Edvard and Robnik-{\v{S}}ikonja, Marko},
489 | journal={Applied Intelligence},
490 | volume={7},
491 | number={1},
492 | pages={39--55},
493 | year={1997},
494 | publisher={Springer}
495 | }
496 |
497 | @inproceedings{sudhakaran2017learning,
498 | title={Learning to detect violent videos using convolutional long short-term memory},
499 | author={Sudhakaran, Swathikiran and Lanz, Oswald},
500 | booktitle={2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)},
501 | pages={1--6},
502 | year={2017},
503 | organization={IEEE}
504 | }
505 |
506 | @inproceedings{marsden2017resnetcrowd,
507 | title={ResnetCrowd: A residual deep learning architecture for crowd counting, violent behaviour detection and crowd density level classification},
508 | author={Marsden, Mark and McGuinness, Kevin and Little, Suzanne and O'Connor, Noel E},
509 | booktitle={2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS)},
510 | pages={1--7},
511 | year={2017},
512 | organization={IEEE}
513 | }
514 |
515 | @article{song2019novel,
516 | title={A novel violent video detection scheme based on modified 3D convolutional neural networks},
517 | author={Song, Wei and Zhang, Dongliang and Zhao, Xiaobing and Yu, Jing and Zheng, Rui and Wang, Antai},
518 | journal={IEEE Access},
519 | volume={7},
520 | pages={39172--39179},
521 | year={2019},
522 | publisher={IEEE}
523 | }
524 |
525 | @article{fenil2019real,
526 | title={Real time violence detection framework for football stadium comprising of big data analysis and deep learning through bidirectional LSTM},
527 | author={Fenil, E and Manogaran, Gunasekaran and Vivekananda, GN and Thanjaivadivel, T and Jeeva, S and Ahilan, A and others},
528 | journal={Computer Networks},
529 | volume={151},
530 | pages={191--200},
531 | year={2019},
532 | publisher={Elsevier}
533 | }
534 |
535 | @inproceedings{sumon2019violent,
536 | title={Violent crowd flow detection using deep learning},
537 | author={Sumon, Shakil Ahmed and Shahria, MD Tanzil and Goni, MD Raihan and Hasan, Nazmul and Almarufuzzaman, AM and Rahman, Rashedur M},
538 | booktitle={Asian Conference on Intelligent Information and Database Systems},
539 | pages={613--625},
540 | year={2019},
541 | organization={Springer}
542 | }
543 |
544 | @inproceedings{cheng2017abnormal,
545 | title={Abnormal behavior detection for harbour operator safety under complex video surveillance scenes},
546 | author={Cheng, Guoan and Wang, Shengke and Guo, Teng and Han, Xiao and Cai, Guiyan and Gao, Feng and Dong, Junyu},
547 | booktitle={2017 International Conference on Security, Pattern Analysis, and Cybernetics (SPAC)},
548 | pages={324--328},
549 | year={2017},
550 | organization={IEEE}
551 | }
552 |
553 | @inproceedings{liu2016ssd,
554 | title={Ssd: Single shot multibox detector},
555 | author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C},
556 | booktitle={European conference on computer vision},
557 | pages={21--37},
558 | year={2016},
559 | organization={Springer}
560 | }
561 |
562 | @inproceedings{tran2015learning,
563 | title={Learning spatiotemporal features with 3D convolutional networks},
564 | author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
565 | booktitle={Proceedings of the IEEE international conference on computer vision},
566 | pages={4489--4497},
567 | year={2015}
568 | }
569 |
570 | @inproceedings{karpathy2014large,
571 | title={Large-scale video classification with convolutional neural networks},
572 | author={Karpathy, Andrej and Toderici, George and Shetty, Sanketh and Leung, Thomas and Sukthankar, Rahul and Fei-Fei, Li},
573 | booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
574 | pages={1725--1732},
575 | year={2014}
576 | }
577 |
578 | @inproceedings{chollet2017xception,
579 | title={Xception: Deep learning with depthwise separable convolutions},
580 | author={Chollet, Fran{\c{c}}ois},
581 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
582 | pages={1251--1258},
583 | year={2017}
584 | }
585 |
586 | @inproceedings{deng2009imagenet,
587 | title={Imagenet: A large-scale hierarchical image database},
588 | author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
589 | booktitle={2009 IEEE conference on computer vision and pattern recognition},
590 | pages={248--255},
591 | year={2009},
592 | organization={Ieee}
593 | }
594 |
595 | @misc{chollet2015keras,
596 | title={Keras},
597 | author={Chollet, Fran\c{c}ois and others},
598 | year={2015},
599 | howpublished={\url{https://keras.io}},
600 | }
601 |
602 | @article{hochreiter1997long,
603 | title={Long short-term memory},
604 | author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
605 | journal={Neural computation},
606 | volume={9},
607 | number={8},
608 | pages={1735--1780},
609 | year={1997},
610 | publisher={MIT Press}
611 | }
612 |
613 | @article{soomro2012ucf,
614 | title={UCF101: A dataset of 101 human actions classes from videos in the wild},
615 | author={Soomro, Khurram and Zamir, Amir Roshan and Shah, Mubarak},
616 | journal={arXiv preprint arXiv:1212.0402},
617 | year=2012
618 | }
619 |
620 | @article{bradski2000opencv,
621 | title={OpenCV},
622 | author={Bradski, Gary and Kaehler, Adrian},
623 | journal={Dr. Dobb’s journal of software tools},
624 | volume={3},
625 | year={2000}
626 | }
627 |
628 | @book{oliphant2006guide,
629 | title={A guide to NumPy},
630 | author={Oliphant, Travis E},
631 | volume={1},
632 | year={2006},
633 | publisher={Trelgol Publishing USA}
634 | }
635 |
636 | @article{van2011numpy,
637 | title={The NumPy array: a structure for efficient numerical computation},
638 | author={Van Der Walt, Stefan and Colbert, S Chris and Varoquaux, Gael},
639 | journal={Computing in Science \& Engineering},
640 | volume={13},
641 | number={2},
642 | pages={22},
643 | year={2011},
644 | publisher={IEEE Computer Society}
645 | }
646 |
647 | @software{reback2020pandas,
648 | author = {The pandas development team},
649 | title = {pandas-dev/pandas: Pandas},
650 | month = feb,
651 | year = 2020,
652 | publisher = {Zenodo},
653 | version = {latest},
654 | doi = {10.5281/zenodo.3509134},
655 | url = {https://doi.org/10.5281/zenodo.3509134}
656 | }
657 |
658 | @misc{tensorflow2015-whitepaper,
659 | title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
660 | url={https://www.tensorflow.org/},
661 | note={Software available from tensorflow.org},
662 | author={
663 | Martín~Abadi and
664 | Ashish~Agarwal and
665 | Paul~Barham and
666 | Eugene~Brevdo and
667 | Zhifeng~Chen and
668 | Craig~Citro and
669 | Greg~S.~Corrado and
670 | Andy~Davis and
671 | Jeffrey~Dean and
672 | Matthieu~Devin and
673 | Sanjay~Ghemawat and
674 | Ian~Goodfellow and
675 | Andrew~Harp and
676 | Geoffrey~Irving and
677 | Michael~Isard and
678 | Yangqing Jia and
679 | Rafal~Jozefowicz and
680 | Lukasz~Kaiser and
681 | Manjunath~Kudlur and
682 | Josh~Levenberg and
683 | Dandelion~Mané and
684 | Rajat~Monga and
685 | Sherry~Moore and
686 | Derek~Murray and
687 | Chris~Olah and
688 | Mike~Schuster and
689 | Jonathon~Shlens and
690 | Benoit~Steiner and
691 | Ilya~Sutskever and
692 | Kunal~Talwar and
693 | Paul~Tucker and
694 | Vincent~Vanhoucke and
695 | Vijay~Vasudevan and
696 | Fernanda~Viegas and
697 | Oriol~Vinyals and
698 | Pete~Warden and
699 | Martin~Wattenberg and
700 | Martin~Wicke and
701 | Yuan~Yu and
702 | Xiaoqiang~Zheng},
703 | year={2015},
704 | }
705 |
706 | @article{sanchez2020revisiting,
707 | title={Revisiting crowd behaviour analysis through deep learning: Taxonomy, anomaly detection, crowd emotions, datasets, opportunities and prospects},
708 | author={Luque S{\'a}nchez, Francisco and Hupont, Isabelle and Tabik, Siham and Herrera, Francisco},
709 | journal={Information Fusion},
710 | year={2020},
711 | publisher={Elsevier}
712 | }
713 |
714 | @article{abu2016youtube,
715 | title={Youtube-8m: A large-scale video classification benchmark},
716 | author={Abu-El-Haija, Sami and Kothari, Nisarg and Lee, Joonseok and Natsev, Paul and Toderici, George and Varadarajan, Balakrishnan and Vijayanarasimhan, Sudheendra},
717 | journal={arXiv preprint arXiv:1609.08675},
718 | year={2016}
719 | }
720 |
721 | @inproceedings{xingjian2015convolutional,
722 | title={Convolutional LSTM network: A machine learning approach for precipitation nowcasting},
723 | author={Xingjian, SHI and Chen, Zhourong and Wang, Hao and Yeung, Dit-Yan and Wong, Wai-Kin and Woo, Wang-chun},
724 | booktitle={Advances in neural information processing systems},
725 | pages={802--810},
726 | year={2015}
727 | }
728 |
729 |
--------------------------------------------------------------------------------
/docs/chapterheader.sty:
--------------------------------------------------------------------------------
1 | \ProvidesPackage{chapterheader}[2017/08/18 Chapter fancy header]
2 | \usepackage[T1]{fontenc}
3 | \usepackage{kpfonts}
4 | \setSingleSpace{1.1}
5 | \SingleSpacing
6 | \usepackage{xcolor,calc, blindtext}
7 | \definecolor{chaptercolor}{gray}{0.8}
8 | % helper macros
9 | \newcommand\numlifter[1]{\raisebox{-2cm}[0pt][0pt]{\smash{#1}}}
10 | \newcommand\numindent{\kern37pt}
11 | \newlength\chaptertitleboxheight
12 | \makechapterstyle{hansen}{
13 | \renewcommand\printchaptername{\raggedleft}
14 | \renewcommand\printchapternum{%
15 | \begingroup%
16 | \leavevmode%
17 | \chapnumfont%
18 | \strut%
19 | \numlifter{\thechapter}%
20 | \numindent%
21 | \endgroup%
22 | }
23 | \renewcommand*{\printchapternonum}{%
24 | \vphantom{\begingroup%
25 | \leavevmode%
26 | \chapnumfont%
27 | \numlifter{\vphantom{9}}%
28 | \numindent%
29 | \endgroup}
30 | \afterchapternum}
31 | \setlength\midchapskip{0pt}
32 | \setlength\beforechapskip{0.5\baselineskip}
33 | \setlength{\afterchapskip}{0.5\baselineskip}
34 | \renewcommand\chapnumfont{%
35 | \fontsize{4cm}{0cm}%
36 | \bfseries%
37 | \sffamily%
38 | \color{chaptercolor}%
39 | }
40 | \renewcommand\chaptitlefont{%
41 | \normalfont%
42 | \huge%
43 | \bfseries%
44 | \raggedleft%
45 | }%
46 | \settototalheight\chaptertitleboxheight{%
47 | \parbox{\textwidth}{\chaptitlefont \strut bg\\bg\strut}}
48 | \renewcommand\printchaptertitle[1]{%
49 | \parbox[t][\chaptertitleboxheight][t]{\textwidth}{%
50 | % \microtypesetup{protrusion=false}% add this if you use microtype
51 | \chaptitlefont\strut ##1\strut}\\~\\
52 | \rule{\linewidth}{0.4pt}
53 | }}
54 | \chapterstyle{hansen}
55 | \aliaspagestyle{chapter}{empty} % just to save some space
56 |
--------------------------------------------------------------------------------
/docs/chapters/01_introduction.tex:
--------------------------------------------------------------------------------
1 | \documentclass[../main.tex]{book}
2 |
3 | \begin{document}
4 |
5 | \chapter{Introducción}
6 |
7 | En las últimas décadas se ha experimentado un crecimiento poblacional
8 | sin precedentes alrededor de todo el mundo, con el consecuente aumento
9 | de las aglomeraciones de personas, las cuales llegan a involucrar a
10 | miles de individuos. Además, las tasas de criminalidad y terrorismo se
11 | han disparado de forma similar. La combinación de estos hechos ha
12 | provocado que la videovigilancia masiva se convierta en una
13 | herramienta prioritaria. El número de cámaras de vigilancia instaladas
14 | en el mundo, tanto dentro del ámbito público como en el privado, se ha
15 | multiplicado en los últimos años. El desarrollo tecnológico, además,
16 | está produciendo una importante mejora en la calidad de los vídeos que
17 | se recopilan, a cambio de un aumento importante en el volumen de
18 | información almacenada. Aparece, por tanto, la necesidad de procesar
19 | una gran cantidad de información en forma de archivos de vídeo.\\
20 |
21 | Históricamente, dicho procesamiento se ha realizado de forma manual,
22 | consumiendo una gran cantidad de recursos humanos. Hoy en día, la
23 | velocidad a la que se genera dicha información, y el volumen tan
24 | abismal que se genera diariamente, hace casi imposible la gestión
25 | manual de esta información de forma exhaustiva y adecuada. Además,
26 | esta información debe procesarse en tiempo real en la medida de lo
27 | posible, ya que la respuesta rápida en situaciones de emergencia es
28 | crucial para reducir los efectos de una posible catástrofe. Esto ha
29 | hecho que los métodos clásicos de supervisión humana queden
30 | paulatinamente obsoletos, y aparezca la necesidad de automatizar el
31 | proceso. En este contexto aparece el concepto de la videovigilancia
32 | automática.\\
33 |
34 | La videovigilancia automática es una rama de investigación cuyo
35 | objetivo es el análisis de múltiples fuentes de vídeo en tiempo real,
36 | para la extracción automática de información relevante relacionada con
37 | el comportamiento de los individuos \cite{ma2009intelligent}. Esta
38 | área de investigación aúna dos grandes campos de trabajo dentro del
39 | aprendizaje automático; la visión por computador y el análisis de
40 | series temporales. Dado que el tipo de dato más común dentro de este
41 | contexto son las secuencias de vídeo, por un lado se ha de extraer
42 | información de cada uno de los fotogramas, y por otro información
43 | temporal derivada de la secuencia de dichos fotogramas.\\
44 |
45 | El auge del aprendizaje profundo, además, ha supuesto un avance muy
46 | importante en el desarrollo de modelos en este contexto. Según
47 | \cite{zitouni2016advances}, existen cuatro áreas principales en el
48 | contexto de la videovigilancia automática:
49 |
50 | \begin{enumerate}
51 | \item Detección y seguimiento de individuos
52 | \item Recuento y estimación de densidad de individuos
53 | \item Análisis y clasificación de comportamientos
54 | \item Detección de comportamientos anómalos
55 | \end{enumerate}
56 |
57 | Las tareas 1 y 2 han sido ampliamente estudiadas y los modelos de
58 | aprendizaje clásico son suficientes para la obtención de resultados de
59 | calidad. No obstante, en las tareas 3 y 4 los resultados eran muy
60 | limitados. La aparición de los modelos de aprendizaje profundo y el
61 | aumento de la capacidad de cálculo ha supuesto un avance importante
62 | para todas las áreas. En los dos primeros casos, ha permitido que la
63 | densidad de individuos presentes en la imagen sea más alta antes de
64 | que se produzca una pérdida de rendimiento, y en los dos últimos casos
65 | ha provocado una mejoría muy notable, ya que la complejidad de la
66 | información que estos modelos son capaces de extraer es notablemente
67 | más alta que la extraída por los modelos clásicos.\\
68 |
69 | Esta mejora tan significativa ha provocado que en los últimos años
70 | aparezcan una gran cantidad de trabajos que resuelven alguno de los
71 | problemas relacionados con la videovigilancia automática aplicando
72 | modelos de redes neuronales. No obstante, estos trabajos aparecen
73 | dispersos, y es difícil establecer una comparativa sobre ellos. Esta
74 | problemática es especialmente relevante cuando se tratan de
75 | desarrollar nuevos modelos, ya que es difícil recopilar el
76 | conocimiento previo sobre la temática. Esta dispersión radica en
77 | varios factores:\\
78 |
79 | \begin{enumerate}
80 | \item No existe un consenso claro sobre las tareas que deben abordarse
81 | dentro de esta área de investigación.
82 | \item No hay una taxonomía clara para organizar los distintos trabajos
83 | previamente desarrollados.
84 | \item No existe una recopilación de trabajos que afronten esta
85 | problemática desde la perspectiva del aprendizaje profundo.
86 | \end{enumerate}
87 |
88 | En particular, nuestra propuesta de trabajo se engloba dentro del
89 | proyecto de investigación \textit{AI\_MARS-DeepLABD: Artificial
90 | Intelligence system for Monitoring, Alert and Response for Security
91 | in events. Deep Learning for Abnomal Behavior Detection}, el cual
92 | pretende diseñar e implementar sistemas de aprendizaje profundo para
93 | la detección de comportamientos anómalos, y por tanto, enfocaremos el
94 | trabajo en esa dirección. Esto hace que nos centremos especialmente en
95 | la última de las tareas. Es la más novedosa de las áreas listadas
96 | anteriormente, y esto hace que exista una especial incertidumbre
97 | alrededor de la misma. En particular, resulta muy difícil establecer
98 | una organización clara para los trabajos que afrontan esta
99 | problemática, porque el concepto de comportamiento anómalo puede
100 | incluir definiciones muy diversas. Por ejemplo, podemos considerar
101 | como anómalo la presencia de una persona en un área restringida, una
102 | multitud corriendo despavorida, o un pequeño grupo de personas que
103 | inicia una pelea por la calle. Claramente, la fuente de la anomalía en
104 | los tres casos es completamente distinta, y difícilmente comparable.
105 | Esto provoca que la comparativa entre modelos sea compleja. Además, el
106 | número de conjuntos de datos públicos que permitan establecer
107 | comparaciones entre los modelos es relativamente escaso, y un gran
108 | número de trabajos utilizan sus propios conjuntos de datos diseñados
109 | específicamente para el problema que tratan de resolver.\\
110 |
111 | \section{Objetivos del trabajo}
112 |
113 | Dado el contexto previo, los objetivos de este trabajo tratan de
114 | cubrir un estudio profundo del área de la videovigilancia automática,
115 | en particular centrado en la detección de comportamientos anómalos en
116 | vídeo. Los objetivos concretos que se han planteado para el trabajo
117 | son los siguientes:
118 |
119 | \begin{enumerate}
120 | \item Proponer una taxonomía para la organización de los trabajos que
121 | afrontan el problema del análisis de multitudes en videovigilancia.
122 | \item Revisar detalladamente los trabajos propuestos dentro del área
123 | de la detección de comportamientos anómalos utilizando aprendizaje
124 | profundo.
125 | \item Estudiar la extracción de características en vídeo utilizando
126 | modelos de aprendizaje profundo. En concreto, se estudia un modelo
127 | del estado del arte en la detección de anomalías en videovigilancia,
128 | y se propone una mejora basada en un extractor de características
129 | profundo de mayor potencia.
130 | \end{enumerate}
131 |
132 | El resto del trabajo se estructura de la siguiente manera. En el
133 | capítulo \ref{sec:state-of-the-art} se expone el estudio teórico del
134 | trabajo. En él, se propone una taxonomía en etapas que permite agrupar
135 | los trabajos hasta el momento dentro de cuatro etapas, en las que cada
136 | una recae en los resultados de las anteriores. Además, se estudian los
137 | principales conjuntos de datos disponibles públicamente y las métricas
138 | que se utilizan para evaluar la calidad de los modelos dentro de esta
139 | área de conocimiento. Finalmente, se resumen los principales trabajos
140 | que utilizan aprendizaje profundo para resolver el problema de la
141 | detección de anomalías en multitudes, estableciendo una división de los
142 | mismos en función de la taxonomía previa.\\
143 |
144 | En el capítulo \ref{sec:model-analysis} se llevará a cabo el análisis
145 | del uso de características espacio-temporales para la detección de
146 | acciones anómalas. Concretamente, tomaremos el trabajo propuesto en
147 | \cite{sultani2018real} y trataremos de mejorar sus resultados
148 | empleando un extractor de características más potente. En dicho
149 | trabajo, proponen una red neuronal convolucional en tres dimensiones
150 | como extractor de características en vídeo. A pesar de ser un modelo
151 | relativamente bueno para capturar características tanto temporales
152 | como espaciales, creemos que las características temporales se ven
153 | infrarrepresentadas. Nuestra hipótesis expone que el uso de un
154 | extractor de características más potente, que combine la capacidad de
155 | trabajar con imágenes de las redes convolucionales 2D con la capacidad
156 | de analizar series temporales de las redes recurrentes, obtendrá
157 | mejores resultados en el problema tratado. En este capítulo se realiza
158 | un estudio teórico del modelo original y se realiza nuestra propuesta
159 | de mejora.
160 |
161 | A continuación, en el capítulo \ref{sec:experiments-and-results} se
162 | detalla la experimentación realizada y los resultados
163 | obtenidos. Finalmente, en \ref{sec:conclusions-future-work} se exponen
164 | las conclusiones derivadas del estudio y posibles estudios futuros.
165 |
166 | \end{document}
167 |
168 | %%% Local Variables:
169 | %%% mode: latex
170 | %%% TeX-master: "../main"
171 | %%% End:
172 |
--------------------------------------------------------------------------------
/docs/chapters/05_conclusions.tex:
--------------------------------------------------------------------------------
1 | \documentclass[../main.tex]{memoir}
2 |
3 | \begin{document}
4 |
5 | \chapter{Conclusiones y trabajo futuro}
6 | \label{sec:conclusions-future-work}
7 |
8 | \section{Conclusiones}
9 |
10 | En este trabajo se ha realizado un estudio exhaustivo del uso del
11 | aprendizaje profundo para el análisis de multitudes en
12 | videovigilancia. En primer lugar, se ha llevado a cabo un análisis
13 | detallado del estado del arte, que ha resultado en la publicación de
14 | un artículo científico en la revista \textit{Information Fusion}, y
15 | que lleva por título ``Revisiting crowd behaviour analysis through
16 | deep learning: Taxonomy, anomaly detection, crowd emotions, datasets,
17 | opportunities and prospects'' \cite{sanchez2020revisiting}. En dicho
18 | estudio, se ha propuesto una taxonomía que permite organizar los
19 | nuevos trabajos en una secuencia de pasos, de forma que los resultados
20 | de cada una de las etapas tienen una fuerte influencia en las etapas
21 | posteriores. Para la tercera de las etapas de la taxonomía propuesta,
22 | que corresponde a la fase de extracción de características, se han
23 | establecido las principales propiedades que se extraen de las
24 | secuencias de vídeo para el análisis de comportamientos en multitudes.\\
25 |
26 | Además, se ha realizado una revisión bibliográfica exhaustiva de los
27 | modelos basados en aprendizaje profundo para la detección de anomalías
28 | en multitudes. En primer lugar, se han identificado las distintas
29 | subtareas que componen esta área, las cuales vienen determinadas por
30 | las diferentes fuentes que producen la anomalía. Para los tipos de
31 | anomalía identificados, se han recopilado los principales conjuntos de
32 | datos públicos y las principales métricas que se utilizan para evaluar
33 | la calidad de los modelos. Finalmente, se han resumido los diferentes
34 | trabajos que resuelven cada una de las subtareas identificadas
35 | utilizando aprendizaje profundo.\\
36 |
37 | Para el apartado práctico del trabajo, se ha estudiado la eficacia del
38 | uso de características espacio-temporales extraídas con modelos de
39 | aprendizaje profundo para la detección de anomalías en
40 | vídeo. Concretamente, se ha experimentado sobre un modelo de detección
41 | de anomalías en multitudes que empleaba un extractor de
42 | características basado exclusivamente en redes neuronales
43 | convolucionales en tres dimensiones. Para dicho modelo, se ha
44 | sustituido el extractor de características por un compuesto de capas
45 | convolucionales y recurrentes. Nuestra hipótesis de partida defendía
46 | que las redes neuronales recurrentes iban a ser mejores extractores de
47 | características temporales que las redes convolucionales 3D.\\
48 |
49 | A raíz de los resultados extraídos de la experimentación, hemos podido
50 | comprobar que en efecto el modelo combinado convolucional-recurrente
51 | tiene un mejor comportamiento que el modelo puramente convolucional
52 | para el análisis de secuencias de vídeo.\\
53 |
54 | Por un lado, trabajando con el conjunto de datos UCF-101, hemos
55 | preentrenado el extractor de características que hemos utilizado
56 | después en el experimento principal. Durante esta fase de
57 | entrenamiento previa, hemos obtenido un modelo con una mejor capacidad
58 | de clasificación que el modelo basado en C3D (el extractor del trabajo
59 | original), con una mejora de más de 15 puntos porcentuales en la
60 | clasificación Top-1. Esta mejora se ha producido para todos los
61 | extractores de características propuestos, independientemente de la
62 | dimensión de la representación obtenida, lo cual pone de manifiesto
63 | que nos encontramos ante un extractor de mayor potencia.\\
64 |
65 | Por otro lado, en el experimento final, que involucraba la detección
66 | de fotogramas anómalos dentro del conjunto de datos UCF-Crime, hemos
67 | observado cómo el uso del extractor de características con capas
68 | recurrentes obtiene unos resultados mejores que el sistema
69 | original. Nuestros modelos superaban a los dos modelos originales,
70 | tanto el preentrenado por los autores como la réplica entrenada por
71 | nosotros, en todas las métricas que hemos calculado. Podemos remarcar
72 | especialmente la mejora en la métrica AUC que hemos conseguido con el
73 | modelo de dimensión 768, ya que esta era la única métrica que se
74 | utilizaba en el artículo original para la comparación de modelos.
75 | Hemos conseguido una arquitectura que mejora a la propuesta inicial,
76 | por lo que consideramos que los experimentos propuestos han sido
77 | exitosos. Además, dado que en primera instancia consideramos que
78 | utilizar sólo esta métrica podía dar lugar a una comparación pobre,
79 | hemos utilizado otras métricas que dan información sobre el
80 | comportamiento del modelo en la clase positiva, obteniendo también
81 | resultados que superan a la experimentación original.\\
82 |
83 | Otra mejora importante que hemos detectado es la capacidad de
84 | predicción de nuestros modelos a nivel de vídeo, en lugar de a nivel
85 | de fotograma. Aunque los resultados obtenidos por nuestros modelos no
86 | suponen una mejora tan representativa a la hora de localizar las
87 | anomalías dentro de los vídeos, sí que suponen un avance importante a
88 | la hora de detectar qué vídeos presentan anomalía. Concretamente,
89 | nuestro mejor modelo consigue una mejora de más de 10 puntos
90 | porcentuales sobre el modelo original en este contexto, lo cual es un
91 | aumento muy significativo.\\
92 |
93 | Es importante destacar también que esta mejora en los resultados se ha
94 | producido a pesar de que nuestros extractores de características están
95 | entrenados, a priori, en un conjunto de datos de menor calidad que el
96 | extractor de características original. Mientras que el modelo
97 | convolucional 3D estaba entrenado en un conjunto de datos de más de
98 | 1000000 de vídeos y cerca de 500 clases, el nuestro está entrenado en
99 | un conjunto mucho más pequeño, de unos 10000 vídeos y 101 clases. Esta
100 | diferencia hace que el modelo original parta, presumiblemente, de una
101 | posición ventajosa respecto al nuestro, lo que hace que esta mejora
102 | resulte especialmente relevante.\\
103 |
104 | Finalmente, a pesar de que los resultados obtenidos son mejores que
105 | los de la experimentación original, se puede observar que aún hay un
106 | amplio margen de mejora en este conjunto de datos. El número de falsos
107 | negativos es aún muy elevado, clasificándose correctamente menos del
108 | 50 \% de los fotogramas positivos. Es posible que esta problemática
109 | venga justificada, en parte, por el tipo de etiquetado del
110 | conjunto. Al tener que entrenar sin la localización exacta de las
111 | anomalías, resulta complicado enseñar al modelo a localizar de forma
112 | precisa la anomalía en el vídeo anómalo completo. Esto implica que,
113 | probablemente, se estén cometiendo errores en los primeros y últimos
114 | fotogramas alrededor de las anomalías. Además, hemos visto que en un
115 | cuarto de los vídeos etiquetados como anómalos no generamos ninguna
116 | etiqueta positiva, es decir, ignoramos casi el 25 \% de las anomalías
117 | presentes en el conjunto. Estamos hablando de un número muy importante
118 | de errores, que requerirán de modelos más potentes para ser detectados.\\
119 |
120 | A raíz de las conclusiones obtenidas del estudio, exponemos a
121 | continuación posibles vías de trabajo futuro.
122 |
123 | \section{Trabajo futuro}
124 |
125 | Dados los problemas que hemos encontrado durante el desarrollo del
126 | trabajo, especialmente en el apartado práctico del mismo, aparecen las
127 | siguientes líneas de trabajo a investigar:
128 |
129 | \begin{itemize}
130 | \item Utilizar una base de datos de entrenamiento para el extractor de
131 | características de mayor tamaño: Por falta de capacidad de cómputo,
132 | no se han utilizado bases de datos de mayor tamaño para el
133 | entrenamiento del modelo que se usa posteriormente para la
134 | extracción de características. Probablemente, el uso de bases de
135 | datos con mayor diversidad producirá unos resultados mejores. El
136 | modelo original, como ya dijimos, está entrenado sobre Sports-1M, de
137 | tamaño significativamente mayor al empleado por nosotros. Existen
138 | conjuntos para clasificación de vídeos de mayor tamaño, como el
139 | conjunto YouTube-8M \cite{abu2016youtube}. Puede ser interesante
140 | estudiar cómo el uso de un conjunto de datos u otro influye a la
141 | hora de entrenar el extractor de características. Teniendo en cuenta
142 | que los resultados obtenidos por nuestros modelos tras entrenar en
143 | el conjunto pequeño son comparables con los resultados originales, y
144 | mejores para la mayoría de las métricas calculadas, la mejora
145 | supuesta por un mejor preentrenamiento podría demostrar por completo
146 | que nos encontramos ante un modelo más potente.
147 | \item Afinar la arquitectura del modelo propuesto: En nuestra
148 | experimentación hemos propuesto un modelo basado en convoluciones 2D
149 | para extraer información de los fotogramas junto con una LSTM para
150 | extraer información temporal. En nuestra experimentación hemos
151 | estudiado el uso de tres representaciones de distintos tamaños, 512,
152 | 768 y 1024 elementos. No obstante, no se han explorado
153 | representaciones mayores, ya que los resultados obtenidos mejoraban
154 | la experimentación original y estamos ante modelos costosos, que
155 | requieren de muchas horas de cómputo para ser entrenados. Además, se
156 | ha utilizado Xception como red neuronal convolucional debido a su
157 | buen funcionamiento y pequeño tamaño, pero podríamos haber optado
158 | por otras arquitecturas disponibles. Es posible que las decisiones
159 | tomadas en el diseño hayan provocado que no nos encontremos ante el
160 | mejor modelo posible de este tipo y quede aún margen de mejora.
161 | \item Explorar nuevas arquitecturas para el extractor de
162 | características: Existen modelos llamados redes LSTM convolucionales
163 | \cite{xingjian2015convolutional} que sustituyen los productos
164 | internos de las LSTM clásicas por operaciones de convolución, por lo
165 | que son capaces de trabajar directamente con vídeos como dato de
166 | entrada. En este caso, no necesitaríamos una primera etapa de la red
167 | basada en una arquitectura convolucional, y podríamos aplicar
168 | directamente esta arquitectura. No obstante, tras primeras pruebas
169 | con este modelo, decidimos descartarlo por obtener malos resultados
170 | al ser entrenado completamente desde cero. Uniendo esta arquitectura
171 | al uso de conjuntos de datos de mayor tamaño podrían mejorarse los
172 | resultados obtenidos.
173 | \item Modificar la política de entrenamiento del modelo: En nuestra
174 | experimentación hemos construido un modelo con una arquitectura
175 | similar al original, en el que hemos sustituido el extractor de
176 | características por uno que creíamos de mayor potencia. No obstante,
177 | el resto del modelo se ha mantenido más o menos igual que el de
178 | partida para no influir de otra forma en el modelo. Debido a que el
179 | margen de mejora actual en el conjunto de datos es bastante grande,
180 | usar una política de entrenamiento distinta a la actual podría
181 | suponer una mejora en los resultados obtenidos, así que puede ser
182 | interesante explorar esta vía.
183 | \item Proponer modelos combinados: Los modelos que hemos utilizado en
184 | esta experimentación han sido estudiados de forma independiente, ya
185 | que nuestra intención era comprobar si las características
186 | espacio-temporales eran más potentes que las convolucionales puras
187 | para este problema. No hemos buscado, por tanto, obtener los mejores
188 | resultados posibles en el conjunto de datos. Durante la
189 | experimentación hemos observado cómo el modelo original y el modelo
190 | propuesto tienen características diferentes, y un buen
191 | comportamiento en distintos puntos (por ejemplo, para fotogramas
192 | fácilmente clasificables, el modelo original funciona ligeramente
193 | mejor que el nuestro). La utilización de los dos enfoques en un
194 | modelo combinado probablemente obtenga mejores resultados que los
195 | dos modelos por separado.
196 | \end{itemize}
197 |
198 | \section{Publicaciones asociadas}
199 |
200 | Debido a los resultados obtenidos en el desarrollo del trabajo, tanto
201 | a nivel teórico como práctico, se han propuesto dos publicaciones
202 | relacionadas con el mismo. Dichas publicaciones son las siguientes:
203 |
204 | \begin{itemize}
205 | \item Luque-Sánchez, F., Hupont, I., Tabik, S., \& Herrera,
206 | F. (2020). \textbf{Revisiting crowd behaviour analysis through deep
207 | learning: Taxonomy, anomaly detection, crowd emotions, datasets,
208 | opportunities and prospects}. \textit{Information Fusion}. Esta
209 | publicación consiste en una revisión sobre el estado del arte en
210 | técnicas de análisis de multitudes en videovigilancia utilizando
211 | aprendizaje profundo. En dicho artículo se establece la taxonomía
212 | que se describe en el apartado teórico del trabajo, se revisan los
213 | principales trabajos que resuelven este problema utilizando
214 | aprendizaje profundo, y se pone de manifiesto la necesidad de
215 | introducir características basadas en emociones para el análisis de
216 | multitudes.
217 | \item Luque-Sánchez, F., Hupont, I., Tabik, S., \& Herrera,
218 | F. (2020). \textbf{Xception-LSTM: Deep Spatio-temporal features for
219 | crowd anomaly detection}. En preparación. Esta publicación
220 | extiende la experimentación de llevada a cabo en el trabajo a partir
221 | de las propuestas de trabajo futuro, para estudiar en profundidad
222 | los modelos espacio-temporales para la detección de anomalías. Se
223 | proponen nuevas arquitecturas basadas en capas CNN-LSTM combinadas,
224 | y se preentrenan los extractores de características en conjuntos
225 | de datos de mayor tamaño.
226 | \end{itemize}
227 |
228 | \end{document}
229 |
230 | %%% Local Variables:
231 | %%% mode: latex
232 | %%% TeX-master: "../main"
233 | %%% End:
234 |
--------------------------------------------------------------------------------
/docs/images/2d_conv.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/2d_conv.pdf
--------------------------------------------------------------------------------
/docs/images/3d_conv.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/3d_conv.pdf
--------------------------------------------------------------------------------
/docs/images/avenue-anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/avenue-anomaly.png
--------------------------------------------------------------------------------
/docs/images/boss-anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/boss-anomaly.png
--------------------------------------------------------------------------------
/docs/images/cnn_lstm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/cnn_lstm.pdf
--------------------------------------------------------------------------------
/docs/images/cnn_lstm_violence.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/cnn_lstm_violence.pdf
--------------------------------------------------------------------------------
/docs/images/extractor_acc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/extractor_acc.pdf
--------------------------------------------------------------------------------
/docs/images/extractor_loss.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/extractor_loss.pdf
--------------------------------------------------------------------------------
/docs/images/original_model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/original_model.pdf
--------------------------------------------------------------------------------
/docs/images/pr_overlay.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/pr_overlay.pdf
--------------------------------------------------------------------------------
/docs/images/roc-curve.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/roc-curve.jpg
--------------------------------------------------------------------------------
/docs/images/roc_overlay.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/roc_overlay.pdf
--------------------------------------------------------------------------------
/docs/images/sdae_psvm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/sdae_psvm.pdf
--------------------------------------------------------------------------------
/docs/images/sketches/2d_3d_conv.drawio:
--------------------------------------------------------------------------------
1 | 
--------------------------------------------------------------------------------
/docs/images/sketches/taxonomy-steps.drawio:
--------------------------------------------------------------------------------
1 | 7Vtdd6JIEP01PmYOHwL62Gk7mT5HwRV19+zLHkY6yizaLmI0++un+VLBSqKjEDyJDwpFU0Dde+mqAhsqnm8fA2c563GX+Q1FcrcNtdNQFEVtS+InsrykFk0zEss08NzEJu8Ntvc/S43pjtO157JVbmDIuR96y7xxwhcLNglzNicI+CY/7In7+aMunSk7MtgTxz+2/um54SyxtjRpb//OvOksO7IspVvmTjY4Naxmjss3ByaVNFQccB4mS/MtZn4UvSwuyX4Pr2zdnVjAFuEpO4wo6m9wOPrnp0zvX0jwvA42d6mXZ8dfpxecnmz4kkUg4OuFyyInUkO938y8kNlLZxJt3QjQhW0Wzn2xJovF1B0LQrZ99Tzl3dUL3jA+Z2HwIoakO9wpGWlSzjQzLmz2AMhyFtbZQfSbemp0UtSnO+/7wIiFNDZnxEkB4qT74rD3T1xc1mHA9P/WPNtwt4oJjcSAprTc7jeKpWn0i8wGVhstuUttaotRHSK+eqPukA5HHRJZiCm+xrRDrDF9pF1kYoqyY4tLSQ6fODuCToAQ5vFZhQH/l2Hu80BYFnzBojP1fL9gcnxvuhCrEwEaE/b7CFJPyAKlG+ae60aHAQmRp8xVOGHkOaEqACekFsCJVlmUUOsoHSUfJq0FhAmKklKacpoXK0cFlZN5+ZEZOmRIMKaRntqqeSCRH8VdPpFsCnxoGvJpfDDKooNWI9W8JZJ2pSLRqxKJTR5HtEeJObQ+t0JqI4h2jQSRpe1Hs8hxkJRmpQLJUsDyFUL+Gg7Q4TyS5GQYCeuQDOJ0rWMPKUb251ZQRhatblOMDBU153FFkSGuFDOM42z9NOyrTwulPEa6BMx4kKDbpWF0eUF1DkZ9MrAtE90OQob64QhB9c01EMqlILciIeUEAWmVwnN5XXUyPHVXj3KCdqoFB8rnC0FazZxltPjksy2KGpUiFmzhpoudie+sVt4kH6vsgsTdMx6ctkv1b7qRGLIWpSoVcoeGoj7Fn13EmXvU+Hw33gcBheKZ2QLmO6H3nHcPBTk9Qp97MWOzm2EhozCk9jfZkHafgsMVXwcTlvrYA3fkVj7La+gEUxYeeY3psIvIBQxpfTHk9xkiGwCWenv3af0eQxT1LLclU0S5vOSB7/Dxbd20aQeJPaVdARScW8JUfptXtXcn4Z3Kc48FSnsqUFalga0uwUM6TjC6ETj0D4ejrKJiTLoWviUsDODxSMVYlFU+dOgAbvPXGxCgx1wxIFDB8MFR0gpVL9SJV6EolddoVKAHFKU0GnEX2fSBYnTca7R6fWswREkVZseT9EO8EeptINPqoW7Sl/zqSSY80vO8ahpARVhpT1Ix3lffV77/Wr7fBCpCSbq4ItSAkvANt2Xn+1BJeOaMCd55sGU+EntIrd1sWf9cvwiN1gYUDPVDy1Mw9KDuGvCQMcIjVPuMRtfrhkhWrV8dkX7yTpZJsVVXNIr60A3to9G4Qi0Mo0G6pL6956IsagDEFapgeB5BSZJ5kGXe3kRiAF2KivG5QmUMv0vaIyb6+3akUgMoLn+IJmsnFXvXcUvMB+uPUfzElOKBZeMkZ+i/NVHdejkY8FCUDjza6659rfci1WKnXwPu2lBBU1p7Rr287/BRVERfVLyEioXaVpcBKkJvo2rnU1Gs7v9vktSt+7/tqOQX
--------------------------------------------------------------------------------
/docs/images/taxonomy_steps.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/taxonomy_steps.pdf
--------------------------------------------------------------------------------
/docs/images/ucf-examples/arson-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/arson-abnormal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/arson-normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/arson-normal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/explosion-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/explosion-abnormal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/explosion-normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/explosion-normal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/normal-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-1.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/normal-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-2.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/normal-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-3.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/normal-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/normal-4.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/roadaccident-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/roadaccident-abnormal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/roadaccident-normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/roadaccident-normal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/stealing-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/stealing-abnormal.png
--------------------------------------------------------------------------------
/docs/images/ucf-examples/stealing-normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucf-examples/stealing-normal.png
--------------------------------------------------------------------------------
/docs/images/ucsd-anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/ucsd-anomaly.png
--------------------------------------------------------------------------------
/docs/images/umn-anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/images/umn-anomaly.png
--------------------------------------------------------------------------------
/docs/main.tex:
--------------------------------------------------------------------------------
1 | \documentclass[a4paper,11pt,oneside]{memoir}
2 | \setcounter{secnumdepth}{2}
3 | \setcounter{tocdepth}{2}
4 | \usepackage{listings}
5 | \usepackage[utf8]{inputenc}
6 | \usepackage[spanish]{babel}
7 | \usepackage[T1]{fontenc}
8 | \usepackage{kpfonts}
9 | \usepackage{amsmath}
10 | \usepackage[backend=biber, sorting=none]{biblatex}
11 | \addbibresource{bibliography/bibliography.bib}
12 |
13 | \usepackage{variables}
14 |
15 | \usepackage{chapterheader}
16 |
17 | \usepackage{subfiles}
18 |
19 | \usepackage{subcaption}
20 |
21 | \decimalpoint
22 |
23 | \usepackage{dcolumn}
24 | \newcolumntype{.}{D{.}{\esperiod}{-1}} \makeatletter
25 | \addto\shorthandsspanish{\let\esperiod\es@period@code} \makeatother
26 |
27 | \usepackage{algorithm}
28 | \usepackage[noend]{algpseudocode}
29 |
30 | \RequirePackage{verbatim}
31 | % \RequirePackage[Glenn]{fncychap}
32 | \usepackage{fancyhdr}
33 | \usepackage{graphicx}
34 | \usepackage{afterpage}
35 | \usepackage{float}
36 | \usepackage{tikz}
37 | \usetikzlibrary{matrix,chains,positioning,
38 | decorations.pathreplacing,arrows,babel,fit,
39 | arrows.meta,shapes}
40 | \usepackage{pgfplots}
41 | \def\layersep{2.2cm}
42 | \newcommand{\empt}[2]{$#1^{\langle #2 \rangle}$}
43 |
44 | \newtheorem{theorem}{Teorema}
45 | \usepackage{longtable}
46 | \usepackage{xcolor}
47 |
48 | \usepackage[pdfborder={000}]{hyperref} %referencia
49 |
50 | % TABLES
51 | \usepackage{booktabs}
52 | \usepackage{ctable}
53 | \usepackage{multirow}
54 | \usepackage{makecell}
55 | \addto\captionsspanish{\renewcommand{\tablename}{Tabla}}
56 |
57 | \hypersetup{
58 | pdfauthor = {\AuthorName (fluque1995@correo.ugr.es)},
59 | pdftitle = {\ProjectTitle},
60 | pdfsubject = {},
61 | pdfkeywords = {palabra_clave1, palabra_clave2, palabra_clave3, ...},
62 | pdfproducer = {pdflatex},
63 | colorlinks = True,
64 | linkcolor = darkgray,
65 | citecolor = blue
66 | }
67 |
68 | % \hyphenation{}
69 |
70 | % \usepackage{doxygen/doxygen} \usepackage{pdfpages}
71 | \usepackage{url}
72 | \usepackage{colortbl,longtable}
73 | % \usepackage[stable]{Footmisc}
74 | % \usepackage{index}
75 |
76 | % \makeindex \usepackage[style=long,
77 | % cols=2,border=plain,toc=true,number=none]{glossary} \makeglossary
78 |
79 | % Definición de comandos que me son tiles:
80 | % \renewcommand{\indexname}{Índice alfabético}
81 | % \renewcommand{\glossaryname}{Glosario}
82 |
83 | \pagestyle{fancy} \fancyhf{} \fancyhead[LO]{\leftmark}
84 | \fancyhead[RE]{\rightmark} \fancyhead[RO,LE]{\textbf{\thepage}}
85 | \renewcommand{\chaptermark}[1]{\markboth{\textbf{#1}}{}}
86 | \renewcommand{\sectionmark}[1]{\markright{\textbf{\thesection. #1}}}
87 |
88 | \setlength{\headheight}{1.5\headheight}
89 |
90 | \newcommand{\HRule}{\rule{\linewidth}{0.5mm}}
91 |
92 | \newcommand{\bigrule}{\titlerule[0.5mm]}
93 |
94 | % Para conseguir que en las páginas en blanco no ponga cabecerass
95 | \makeatletter
96 | \def\clearpage{%
97 | \ifvmode \ifnum \@dbltopnum =\m@ne \ifdim \pagetotal <\topskip
98 | \hbox{} \fi \fi \fi
99 | \newpage
100 | \thispagestyle{empty} \write\m@ne{} \vbox{} \penalty -\@Mi }
101 | \makeatother
102 |
103 | \begin{document}
104 |
105 | \subfile{prefaces/cover}
106 |
107 | \subfile{prefaces/spanish_abstract}
108 |
109 | \subfile{prefaces/english_abstract}
110 |
111 | \subfile{prefaces/licensing}
112 |
113 | \tableofcontents
114 |
115 | \subfile{chapters/01_introduction}
116 |
117 | \subfile{chapters/02_taxonomy}
118 |
119 | \subfile{chapters/03_problem_description}
120 |
121 | \subfile{chapters/04_experimentation}
122 |
123 | \subfile{chapters/05_conclusions}
124 |
125 | \nocite{*}
126 | \printbibliography
127 |
128 | \appendix
129 |
130 | %\subfile{appendices/ECG_explanation}
131 |
132 | \end{document}
133 |
--------------------------------------------------------------------------------
/docs/memoria_TFM_Luque_Sanchez_Francisco.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/memoria_TFM_Luque_Sanchez_Francisco.pdf
--------------------------------------------------------------------------------
/docs/prefaces/cover.tex:
--------------------------------------------------------------------------------
1 | \documentclass[../main.tex]{memoir}
2 |
3 | \begin{document}
4 | \begin{titlingpage}
5 |
6 | \newlength{\centeroffset}
7 | \setlength{\centeroffset}{-0.5\oddsidemargin}
8 | \addtolength{\centeroffset}{0.5\evensidemargin}
9 | \thispagestyle{empty}
10 |
11 | \noindent\hspace*{\centeroffset}
12 | \begin{minipage}{\textwidth}
13 | \vspace{-2cm}
14 | \centering
15 | \includegraphics[width=0.5\textwidth]{images/logo_ugr.jpg}
16 |
17 | \textsc{ \Large TRABAJO FIN DE MÁSTER\\[0.2cm]}
18 | \textsc{ \Degree }\\[0.5cm]
19 |
20 | {\Large\bfseries
21 | \ProjectTitle\\
22 | \noindent\rule[-1ex]{\textwidth}{1pt}\\[2ex]
23 | \ProjectTitleEng\\
24 | }
25 | \noindent\rule[-1ex]{\textwidth}{3pt}\\[2ex]
26 | \end{minipage}
27 |
28 | \vspace{1.5cm}
29 | \noindent\hspace*{\centeroffset}\begin{minipage}{\textwidth}
30 | \centering
31 |
32 | \textbf{Autor}\\ {\AuthorName}\\[2ex]
33 | \textbf{Directores}\\
34 | {\MainProf\\
35 | \SecondProf}\\[1cm]
36 | \includegraphics[height=1.5cm]{images/etsiit_logo.png}\\[1cm]
37 | \textsc{\FacultyOne}\\
38 | \textsc{---}\\
39 | \Location, \Time
40 | \end{minipage}
41 | % \addtolength{\textwidth}{\centeroffset}
42 | % \vspace{\stretch{2}}
43 |
44 | \end{titlingpage}
45 | \end{document}
46 |
47 | %%% Local Variables:
48 | %%% mode: latex
49 | %%% TeX-master: "../main"
50 | %%% End:
51 |
--------------------------------------------------------------------------------
/docs/prefaces/english_abstract.tex:
--------------------------------------------------------------------------------
1 | \documentclass[../main.tex]{memoir}
2 |
3 | \begin{document}
4 | \thispagestyle{empty}
5 |
6 | \begin{center}
7 | {\large\bfseries \ProjectTitleEng}\\
8 | \end{center}
9 |
10 | \begin{center}
11 | \AuthorName\\
12 | \vspace{0.7cm}
13 | \noindent{\textbf{Keywords}:}\\
14 | Deep learning, crowd analysis, anomaly detection, video-surveillance\\
15 | \vspace{0.7cm}
16 | \noindent{\textbf{Abstract}}\\
17 | \end{center}
18 |
19 | Last decades have experimented and unprecedented growth in population
20 | all around the globe. Crime and terrorism rates are also increasing
21 | rapidly. This two issues have converted video-surveillance into a
22 | fundamental tool worldwide. The number of security cameras installed
23 | both at public and private environments is huge nowadays, and thus
24 | processing the information retrieved by those cameras is getting
25 | harder everyday. Then, there is a need of automation of that process,
26 | using intelligent models capable of extracting information from video
27 | sources.\\
28 |
29 | Advances in deep learning produced in the last years have improved the
30 | results obtained in this research area by a large margin. Current
31 | models are able to extract complex information automatically, and to
32 | work in more difficult environments, specially in terms of density of
33 | individuals. Despite this recent advances, the area is still
34 | relatively modern, and consequently is not properly structured. Due to
35 | this fact, comparing works that tackle different sub-tasks within this
36 | area is usually difficult.\\
37 |
38 | In the previous context, this work tries to solve different tasks
39 | related to the automatic treatment of video-surveillance sources. In
40 | particular, we will focus our efforts in the analysis of crowd
41 | behaviors from video-surveillance sources. The work is divided in two
42 | parts. In the first part, which is the theoretical study, a sequential
43 | taxonomy is proposed. This taxonomy following a sequence allows to
44 | organize the different sub-tasks inside the topic as stages of a
45 | pipeline. The results of the latter stages of the pipeline strongly
46 | rely on the previous ones, and thus its results are heavily influenced
47 | by the first models.\\
48 |
49 | Apart from the proposed taxonomy, an in-depth review of the
50 | state-of-the-art is conducted. Particularly, we center our study in
51 | the works that use deep learning models to solve the crowd anomaly
52 | detection problem. Inside this topic, we analyze the main public
53 | datasets and proposals, organizing them depending on the specific type
54 | of anomaly to be detected.\\
55 |
56 | Finally, in the experimental part of this work, the use of
57 | spatio-temporal features for action anomaly detection is studied. To
58 | this end, one of the retrieved works is deeply analyzed and set as
59 | baseline for comparison. After the complete study, a new model for
60 | action anomaly detection is proposed. In particular, the original
61 | model employs a 3D convolutional feature extractor, which processes
62 | batches of consecutive frames. In our approach, the feature extractor
63 | proposed is a combination of 2D CNNs for spatial feature extraction,
64 | processing frames independently, together with a recurrent neural
65 | network, which learns to process the sequence of features from
66 | consecutive frames in the video. Our hypothesis defends that the
67 | combination of convolutions and recurrent networks better preserves
68 | the semantic structure of information in the video, and thus the
69 | obtained model extracts more meaningful information.\\
70 |
71 | Our experimentation suggests that our model outperforms the
72 | classification capability of the original model, despite being
73 | pretrained on a much smaller dataset. In particular, the original
74 | feature extractor is trained on a set 1000 times bigger than ours.
75 | This fact allows us to conclude that our proposal is better than
76 | the original one in terms of classification capability, validating
77 | our hypothesis.\\
78 |
79 | The code developed for experimentation, together with the instructions
80 | to replicate the experiments, can be found in the repository
81 | \url{https://github.com/fluque1995/tfm-anomaly-detection}.
82 |
83 | \newpage
84 | \end{document}
85 |
86 | %%% Local Variables:
87 | %%% mode: latex
88 | %%% TeX-master: "../main"
89 | %%% End:
90 |
--------------------------------------------------------------------------------
/docs/prefaces/licensing.tex:
--------------------------------------------------------------------------------
1 | \documentclass['../proyecto.tex']{memoir}
2 |
3 | \begin{document}
4 |
5 | \thispagestyle{empty}
6 |
7 | \noindent\rule[-1ex]{\textwidth}{2pt}\\[4.5ex]
8 |
9 | Yo, \textbf{\AuthorName}, alumno del Máster Universitario Oficial en
10 | Ciencia de Datos e Ingeniería de Computadores de la
11 | \textbf{\FacultyOne de la \University}, con DNI 31008316S, autorizo la
12 | ubicación de la siguiente copia de mi Trabajo Fin de Máster en la
13 | biblioteca del centro para que pueda ser consultada por las personas
14 | que lo deseen.
15 |
16 | \vspace{6cm}
17 |
18 | \includegraphics[width=3.5cm]{images/yo_firma}
19 | \\~\\
20 | \noindent Fdo: Francisco Luque Sánchez
21 |
22 | \vspace{2cm}
23 |
24 | \begin{flushright}
25 | \Location, \Time
26 | \end{flushright}
27 |
28 | \newpage
29 |
30 | \thispagestyle{empty}
31 |
32 | \noindent\rule[-1ex]{\textwidth}{2pt}\\[4.5ex]
33 |
34 | D. \textbf{\MainProf} y D.ª \textbf{\SecondProf}, profesores del
35 | \Department de la \University.
36 |
37 | \vspace{0.5cm}
38 |
39 | \textbf{Informan:}
40 |
41 | \vspace{0.5cm}
42 |
43 | Que el presente trabajo, titulado \textit{\textbf{\ProjectTitle}}, ha
44 | sido realizado bajo su supervisión por \textbf{\AuthorName}, y
45 | autorizamos la defensa de dicho trabajo ante el tribunal que
46 | corresponda.
47 |
48 | \vspace{0.5cm}
49 |
50 | Y para que conste, expiden y firman el presente informe en \Location a \Time
51 |
52 | \vspace{1cm}
53 |
54 | \textbf{Los directores:}
55 |
56 | \vspace{5cm}
57 |
58 | \begin{minipage}{0.45\linewidth}
59 | \begin{center}
60 | \includegraphics[width=5cm]{images/paco_firma}
61 | \textbf{\MainProf}
62 | \end{center}
63 | \end{minipage}
64 | \begin{minipage}{0.45\linewidth}
65 | \begin{center}
66 | \includegraphics[width=5cm]{images/siham_firma}
67 | \textbf{\SecondProf}
68 | \end{center}
69 | \end{minipage}
70 |
71 | \newpage
72 | \end{document}
73 |
--------------------------------------------------------------------------------
/docs/prefaces/spanish_abstract.tex:
--------------------------------------------------------------------------------
1 | \documentclass[../main.tex]{memoir}
2 | \begin{document}
3 | \thispagestyle{empty}
4 |
5 | \begin{center}
6 | {\large\bfseries \ProjectTitle}\\
7 | \end{center}
8 |
9 | \begin{center}
10 | \AuthorName\\
11 | \vspace{0.7cm}
12 | \noindent{\textbf{Palabras clave}:}\\
13 | Aprendizaje profundo, análisis de multitudes, detección de
14 | anomalías, videovigilancia\\
15 | \vspace{0.7cm}
16 | \noindent{\textbf{Resumen}}\\
17 |
18 | \end{center}
19 |
20 | En las últimas décadas se ha producido un crecimiento poblacional sin
21 | precedentes en todas las partes del mundo, y las tasas de criminalidad
22 | y terrorismo se han disparado en muchos territorios. Esto ha provocado
23 | que la videovigilancia se convierta en una herramienta prioritaria a
24 | nivel mundial. El número de cámaras de seguridad instaladas tanto en
25 | ámbito público como privado ha crecido significativamente, y con ello
26 | la dificultad de gestionar la información recogida por las mismas de
27 | forma manual. Aparece, por tanto, la necesidad de automatizar este
28 | proceso, utilizando para ello modelos inteligentes capaces de extraer
29 | información de las secuencias de vídeo recogidas por las cámaras.\\
30 |
31 | Los avances en aprendizaje profundo de los últimos años han permitido
32 | que los resultados obtenidos sobre esta área de investigación mejoren
33 | considerablemente. Los modelos actuales son capaces de extraer
34 | información más compleja, y trabajar en entornos de mayor dificultad,
35 | especialmente en cuanto a densidad de individuos se refiere. A pesar
36 | de ello, el estudio de esta tarea es relativamente reciente, por lo
37 | que no está correctamente estructurada, y resulta difícil comparar los
38 | trabajos propuestos.\\
39 |
40 | Dado el contexto anterior, este trabajo trata de resolver varias
41 | tareas relacionadas con el tratamiento automático de fuentes de
42 | videovigilancia, en particular con el análisis de comportamientos de
43 | multitudes. Por un lado, se realiza un estudio teórico de la temática,
44 | con una propuesta taxonómica que permite agrupar los distintos
45 | trabajos siguiendo una secuencia de tareas. Esta organización sitúa
46 | las distintas subtareas consideradas dentro de la temática en
47 | distintos pasos de la secuencia, de forma que los resultados de los
48 | pasos posteriores se ven fuertemente influenciados por los obtenidos
49 | en los pasos previos.\\
50 |
51 | Además de la propuesta taxonómica, en el estudio teórico se hace una
52 | revisión exhaustiva de la literatura que utiliza modelos de
53 | aprendizaje profundo para resolver el problema de la detección de
54 | anomalías en multitudes. Para esta subtarea, se analizan los
55 | principales conjuntos de datos disponibles públicamente, y se estudian
56 | los trabajos del estado del arte, agrupando los mismos por el tipo
57 | concreto de anomalía que tratan de identificar.\\
58 |
59 | En el apartado práctico del trabajo, se estudia el uso de
60 | características espacio-temporales para la detección de acciones
61 | anómalas en vídeo. Para llevar a cabo dicho estudio, se establece como
62 | punto de partida un modelo de detección de anomalías basado en un
63 | extractor de características convolucional en tres
64 | dimensiones. Nuestra propuesta, en lugar de utilizar un extractor
65 | exclusivamente convolucional, aprovecha la potencia de las redes
66 | neuronales convolucionales 2D para el análisis de los fotogramas por
67 | separado, extrayendo información espacial, y la capacidad de las redes
68 | neuronales recurrentes para extraer información temporal de la
69 | secuencia de características de los fotogramas consecutivos. Dicho
70 | extractor de características es más complejo que la propuesta
71 | original, y conserva mejor la estructura temporal del vídeo, lo cual
72 | permite la extracción de información de mayor calidad. El código
73 | desarrollado para el experimentación se encuentra disponible en el
74 | repositorio \url{https://github.com/fluque1995/tfm-anomaly-detection}.\\
75 |
76 | Los resultados obtenidos del estudio sugieren que nuestro modelo tiene
77 | mejor capacidad de clasificación que el modelo original, incluso a
78 | pesar de estar entrenado en un conjunto de datos de tamaño 1000 veces
79 | menor que el extractor de características de partida. Este hecho
80 | nos permite concluir que nuestra propuesta es de mayor calidad que el
81 | modelo de partida, validando nuestra hipótesis inicial.
82 |
83 | \newpage
84 | \end{document}
85 |
86 | %%% Local Variables:
87 | %%% mode: latex
88 | %%% TeX-master: "../main"
89 | %%% End:
90 |
--------------------------------------------------------------------------------
/docs/slides/images/gifs/Assault049_x264.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/gifs/Assault049_x264.gif
--------------------------------------------------------------------------------
/docs/slides/images/gifs/Stealing019_x264.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/gifs/Stealing019_x264.gif
--------------------------------------------------------------------------------
/docs/slides/images/original-model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/original-model.pdf
--------------------------------------------------------------------------------
/docs/slides/images/proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/proposal.pdf
--------------------------------------------------------------------------------
/docs/slides/images/taxonomy-steps.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/taxonomy-steps.pdf
--------------------------------------------------------------------------------
/docs/slides/images/ucf/arson-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/arson-abnormal.png
--------------------------------------------------------------------------------
/docs/slides/images/ucf/normal-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/normal-1.png
--------------------------------------------------------------------------------
/docs/slides/images/ucf/roadaccident-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/roadaccident-abnormal.png
--------------------------------------------------------------------------------
/docs/slides/images/ucf/stealing-abnormal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/images/ucf/stealing-abnormal.png
--------------------------------------------------------------------------------
/docs/slides/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/docs/slides/slides.pdf
--------------------------------------------------------------------------------
/docs/slides/slides.tex:
--------------------------------------------------------------------------------
1 | \documentclass[10pt]{beamer}
2 |
3 | \usetheme[progressbar=frametitle]{metropolis}
4 |
5 | \usepackage[utf8]{inputenc}
6 | \usepackage[spanish]{babel}
7 | \usepackage{booktabs}
8 | \usepackage{graphicx}
9 | \usepackage{subcaption}
10 | \usepackage{animate}
11 | \metroset{block=fill}
12 |
13 | \newcommand\blfootnote[1]{%
14 | \begingroup
15 | \renewcommand\thefootnote{}\footnote{#1}%
16 | \addtocounter{footnote}{-1}%
17 | \endgroup
18 | }
19 |
20 | \title{Aprendizaje profundo para el análisis de comportamientos en
21 | videovigilancia}
22 | \subtitle{Trabajo de Fin de Máster}
23 | \date{\today}
24 | \author{Francisco Luque Sánchez}
25 | \institute{Universidad de Granada - Máster en Ciencia de Datos
26 | e Ingeniería de Computadores}
27 |
28 | \begin{document}
29 |
30 | \maketitle
31 |
32 | \begin{frame}{Índice}
33 | \setbeamertemplate{section in toc}[sections numbered]
34 | \tableofcontents[hideallsubsections]
35 | \end{frame}
36 |
37 | \section{Descripción del problema}
38 |
39 | \begin{frame}{Descripción del problema}
40 | \begin{block}{Detección de comportamientos anómalos en multitudes}
41 | \begin{itemize}
42 | \item Estudio de secuencias de vídeo.
43 | \item Extraídas de cámaras de videovigilancia.
44 | \item Análisis de comportamientos.
45 | \item Idenficación de comportamientos extraños.
46 | \item Problema complejo y de gran variabilidad.
47 | \end{itemize}
48 | \end{block}
49 | \end{frame}
50 |
51 | \section{Revisión bibliográfica y taxonomía}
52 |
53 | \begin{frame}{Revisión de la literatura}
54 | \begin{block}{Detección de comportamientos anómalos}
55 | \begin{itemize}
56 | \item Categorización de las anomalías por tipos (acciones,
57 | movimientos, apariencia).
58 | \item Conjuntos de datos públicos.
59 | \item Modelos basados en aprendizaje profundo.
60 | \end{itemize}
61 | \end{block}
62 | \end{frame}
63 |
64 | \begin{frame}{Propuesta taxonómica}
65 | \begin{figure}
66 | \centering
67 | \includegraphics[width=.7\textwidth]{images/taxonomy-steps.pdf}
68 | \caption{Taxonomía secuencial para el análisis de multitudes}
69 | \end{figure}
70 |
71 | \blfootnote{\textbf{Luque-Sánchez, F.}, Hupont, I., Tabik, S., \& Herrera,
72 | F. (2020). Revisiting crowd behaviour analysis through deep
73 | learning: Taxonomy, anomaly detection, crowd emotions, datasets,
74 | opportunities and prospects. \textit{Information Fusion}}
75 | \end{frame}
76 |
77 | \section{Detección de anomalías en videovigilancia}
78 |
79 | \begin{frame}{Hipótesis de partida}
80 | \begin{block}{Hipótesis}
81 | Los extractores de características espacio-temporales (CNN+LSTM)
82 | obtienen características de calidad para detectar comportamientos
83 | anómalos.
84 | \end{block}
85 | \end{frame}
86 |
87 | \begin{frame}{UCF-Crime Dataset}
88 | \begin{block}{Características}
89 | \begin{itemize}
90 | \item 1900 vídeos
91 | \item 128 horas en total
92 | \item 13 clases de comportamientos anómalos
93 | \item Etiquetado débil
94 | \end{itemize}
95 | \end{block}
96 | \end{frame}
97 |
98 | \begin{frame}{UCF-Crime Dataset}
99 | \begin{figure}[hbtp]
100 | \centering
101 | \begin{subfigure}{0.35\textwidth}
102 | \centering
103 | \includegraphics[width=\linewidth]{images/ucf/normal-1}
104 | \caption{Vídeo normal}
105 | \end{subfigure}
106 | \begin{subfigure}{0.35\textwidth}
107 | \centering
108 | \includegraphics[width=\linewidth]{images/ucf/arson-abnormal}
109 | \caption{Fuego provocado}
110 | \end{subfigure}
111 | \begin{subfigure}{0.35\textwidth}
112 | \centering
113 | \includegraphics[width=\linewidth]{images/ucf/stealing-abnormal}
114 | \caption{Robo}
115 | \end{subfigure}
116 | \begin{subfigure}{0.35\textwidth}
117 | \centering
118 | \includegraphics[width=\linewidth]{images/ucf/roadaccident-abnormal}
119 | \caption{Accidente}
120 | \end{subfigure}
121 | \end{figure}
122 | \end{frame}
123 |
124 | \begin{frame}{Modelo original}
125 | \begin{figure}
126 | \centering
127 | \includegraphics[width=.9\textwidth]{images/original-model.pdf}
128 | \caption{Modelo original}
129 | \end{figure}
130 |
131 | \blfootnote{Sultani, W., Chen, C., \& Shah, M. (2018). Real-world
132 | anomaly detection in surveillance videos. In Proceedings of the
133 | IEEE Conference on Computer Vision and Pattern Recognition
134 | (pp. 6479-6488).}
135 | \end{frame}
136 |
137 | \begin{frame}{Entrenamiento multiinstancia}
138 | \begin{block}{Entrenamiento del modelo}
139 | \begin{itemize}
140 | \item Una etiqueta por vídeo.
141 | \item Vídeos anómalos compuestos mayoritariamente por fotogramas normales.
142 | \item Predicción a nivel de fotograma.
143 | \end{itemize}
144 | \end{block}
145 |
146 | Solución: Función de pérdida multiinstancia
147 | \begin{align*}
148 | l(\mathcal{B}_a, \mathcal{B}_n)
149 | &= \max{(0, 1 - \max_{i \in \mathcal{B}_a}{f(\mathcal{V}^i_a)} +
150 | \max_{i \in \mathcal{B}_n}{f(\mathcal{V}^i_n)})}\\
151 | & + \lambda_1
152 | \sum_{i=0}^{n-1} (f(\mathcal{V}_a^i) - f(\mathcal{V}_a^{i+1})) +
153 | \lambda_2 \sum_{i=0}^{n} f(\mathcal{V}_a^i).
154 | \end{align*}
155 |
156 | \end{frame}
157 |
158 | \begin{frame}{Propuesta: Xception-LSTM}
159 | \begin{figure}
160 | \centering
161 | \includegraphics[width=.8\textwidth]{images/proposal.pdf}
162 | \caption{Arquitectura propuesta}
163 | \end{figure}
164 | \end{frame}
165 |
166 | \begin{frame}{Propuesta: Xception-LSTM}
167 | \begin{block}{Entrenamiento del modelo}
168 | \begin{itemize}
169 | \item Preentrenamiento del extractor: Clasificación sobre UCF-101
170 | (13000 vídeos, 101 clases).
171 | \item Congelación del extractor y extracción de características.
172 | \item Entrenamiento del clasificador.
173 | \end{itemize}
174 | \end{block}
175 | \end{frame}
176 |
177 | \section{Resultados}
178 |
179 | \begin{frame}{Resultados a nivel de fotograma}
180 | \begin{table}[H]
181 | \centering
182 | \resizebox{\textwidth}{!}{
183 | \begin{tabular}{lccccc}
184 | \toprule
185 | Modelo & Exactitud & AUC & $F_1$ & EER & AP \\
186 | \midrule
187 | Original - preentrenado & 0.8428 & 0.7508 & 0.2838 & 0.3119 & \textbf{0.2057} \\
188 | Original - replicado & 0.7411 & 0.7369 & 0.2201 & 0.3253 & 0.2014 \\
189 | Xception-LSTM - 1024 & 0.8236 & 0.7504 & \textbf{0.2907} & 0.3221 & 0.1823 \\
190 | Xception-LSTM - 768 & \textbf{0.8455} & \textbf{0.7674} & 0.2681 & \textbf{0.2980} & 0.1770 \\
191 | Xception-LSTM - 512 & 0.8436 & 0.7177 & 0.2140 & 0.3388 & 0.1451 \\
192 | \bottomrule
193 | \end{tabular}
194 | }
195 | \caption{Tabla de resultados obtenidos por los modelos.}
196 | \label{tab:confusion-matrices}
197 | \end{table}
198 | \end{frame}
199 |
200 | \begin{frame}{Resultados a nivel de vídeo}
201 | \begin{table}[H]
202 | \centering
203 | \begin{tabular}{lcc}
204 | \toprule
205 | Modelo & \% Vídeos normales & \% Vídeos anómalos \\
206 | \midrule
207 | Original & 13.33 & 64.89 \\
208 | Réplica & 11.11 & 74.05 \\
209 | Xception-LSTM - 1024 & 15.55 & \textbf{77.86} \\
210 | Xception-LSTM - 768 & 12.59 & 72.52 \\
211 | Xception-LSTM - 512 & \textbf{8.15} & 71.76 \\
212 | \bottomrule
213 | \end{tabular}
214 | \caption{Porcentaje de vídeos normales y anómalos en los que se ha
215 | generado una alarma.}
216 | \end{table}
217 | \end{frame}
218 |
219 | \begin{frame}{Ejemplo - Asalto}
220 | \begin{figure}
221 | \centering
222 | \animategraphics[loop,controls,width=.7\linewidth]{10}{images/gifs/assault-}{0}{132}
223 | \end{figure}
224 | \end{frame}
225 |
226 | \begin{frame}{Ejemplo - Robo}
227 | \begin{figure}
228 | \centering
229 | \animategraphics[loop,controls,width=.7\linewidth]{50}{images/gifs/stealing-}{000}{491}
230 | \end{figure}
231 | \end{frame}
232 |
233 | \section{Conclusiones y trabajo futuro}
234 |
235 | \begin{frame}{Conclusiones y trabajo futuro}
236 | \begin{block}{Conclusiones}
237 | \begin{itemize}
238 | \item La detección de anomalías en multitudes es un problema
239 | complejo, debido a la diversidad de contextos a los que puede
240 | aplicarse.
241 | \item Los modelos espacio-temporales extraen características
242 | relevantes para la resolución de este problema, mejores que los
243 | modelos completamente convolucionales.
244 | \item Existe un margen de mejora amplio para este conjunto de datos.
245 | \end{itemize}
246 | \end{block}
247 | \begin{block}{Trabajo futuro}
248 | \begin{itemize}
249 | \item Despliegue del sistema en una aplicación real.
250 | \item Estudio de modelos ConvLSTM.
251 | \item Mejora de la política de entrenamiento del modelo.
252 | \end{itemize}
253 | \end{block}
254 | \end{frame}
255 |
256 | \begin{frame}[standout]
257 | \LARGE{Muchas gracias}\\
258 | \LARGE{¿Preguntas?}
259 | \end{frame}
260 |
261 | \end{document}
262 |
--------------------------------------------------------------------------------
/docs/variables.sty:
--------------------------------------------------------------------------------
1 | \ProvidesPackage{macros}[2017/08/18 Macros declaration]
2 |
3 | % ********************************************************************
4 | % Re-usable information
5 | % ********************************************************************
6 | \newcommand{\ProjectTitle}{Aprendizaje profundo para el análisis de
7 | comportamientos en videovigilancia \xspace}
8 |
9 | \newcommand{\ProjectTitleEng}{Deep Learning for crowd behavior
10 | analysis in videosurveillance \xspace}
11 |
12 | \newcommand{\Degree}{Máster en Ciencia de Datos e Ingeniería de
13 | Computadores (DATCOM)}
14 |
15 | \newcommand{\AuthorName}{Francisco Luque Sánchez\xspace}
16 |
17 | \newcommand{\MainProf}{Francisco Herrera Triguero\xspace}
18 |
19 | \newcommand{\SecondProf}{Siham Tabik\xspace}
20 | % \newcommand{\mySupervisor}{Put name here\xspace}
21 |
22 | \newcommand{\FacultyOne}{Escuela Técnica Superior de Ingenierías
23 | Informática y de Telecomunicación\xspace}
24 |
25 | \newcommand{\FacultyShort}{ETSIIT\xspace}
26 |
27 | \newcommand{\Department}{Departamento de Ciencias de la Computación
28 | e Inteligencia Artificial\xspace}
29 |
30 | \newcommand{\University}{\protect{Universidad de Granada}\xspace}
31 |
32 | \newcommand{\Location}{Granada\xspace}
33 |
34 | \newcommand{\Time}{\today\xspace}
35 |
36 | \newcommand{\Version}{Version 0.1\xspace}
37 |
--------------------------------------------------------------------------------
/original_model/README.md:
--------------------------------------------------------------------------------
1 | # Original experiments replication
2 |
3 | In this folder you can find the code to replicate the original
4 | experimentation from the article "Real-World Anomaly Detection in
5 | Surveillance Videos". The provided code strongly relies on the
6 | following sources:
7 |
8 | - https://github.com/WaqasSultani/AnomalyDetectionCVPR2018: Original
9 | implementation of the model
10 | - https://github.com/ptirupat/AnomalyDetection_CVPR18: Reimplementation
11 | of the original world using Keras
12 | - https://github.com/adamcasson/c3d: Implementation of C3D feature
13 | extractor in Keras using Tensorflow as backend
14 |
15 | ## Experimentation replication
16 |
17 | The folder is self-contained and fully written in Python. The
18 | experiments can be completely performed by executing code inside this
19 | folder, without depending on external resources. Code files inside
20 | this folder can be divided into two groups; resource files and scripts.
21 | In resource files, auxiliary utilities and models are defined. Scripts
22 | are provided to replicate the experiments.
23 |
24 | ### Resource files
25 |
26 | The resource files are listed and explained below, in alphabetical
27 | order:
28 |
29 | - `c3d.py`: Definition of C3D feature extractor and preprocessing
30 | functions for the input data
31 | - `classifier.py`: Definition of the classifier model, together
32 | with functions to save and load the model to disk
33 | - `configuration.py`: Configuration information for the experiments
34 | (data paths, output paths, annotation files, etc)
35 | - `parameters.py`: Information about model structure
36 | - `utils` folder: This folder contains utilities to process arrays
37 | and video files
38 |
39 | ### Scripts
40 |
41 | The developed scripts are listed in the order that should be followed
42 | to replicate the experiments.
43 |
44 | 1. `extract_features.py`: This script computes the features from the
45 | videos composing the dataset (videos contained in `dataset` folder at
46 | root project level), and stores them inside the folder
47 | `raw_c3d_features` (if default configuration has been kept). In order
48 | to work properly, the destination folder must exist. The folder
49 | structure can be created with the bash script provided at root project
50 | level.
51 | 2. `preprocess_features.py`: This script takes the previously extracted
52 | features, whose number can vary depending on the original video length,
53 | and computes a fized-size representation for each video. The new features
54 | are stored inside the folder `processed_c3d_features`
55 | 3. `train_classifier.py`: This script trains the final classifier
56 | model using the preprocessed features extracted before. After
57 | training, it stores the resulting model inside the folder `trained_models`.
58 | 4. `predict_test_set.py`: After training, this script takes the trained
59 | model and uses it to predict the test set (test features are calculated
60 | in the first two steps).
61 | 5. `calculate_metrics.py`: When the predictions have been made, this
62 | script calculates several performance metrics to validate the model.
63 |
--------------------------------------------------------------------------------
/original_model/c3d.py:
--------------------------------------------------------------------------------
1 | import keras.backend as K
2 | from keras.models import Sequential
3 | from keras.models import Model
4 | from keras.layers.core import Dense, Dropout, Flatten
5 | import configuration as cfg
6 | from keras.layers.convolutional import Conv3D, MaxPooling3D, ZeroPadding3D
7 | import numpy as np
8 | from scipy.misc import imresize
9 | from keras.utils.data_utils import get_file
10 |
11 | C3D_MEAN_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/c3d_mean.npy'
12 |
13 |
14 | def preprocess_input(video):
15 | """Preprocess video input to make it suitable for feature extraction.
16 |
17 | The video is resized, cropped, resampled and training mean is substracted
18 | to make it suitable for the network
19 |
20 | :param video: Video to be processed
21 | :returns: Preprocessed video
22 | :rtype: np.ndarray
23 |
24 | """
25 |
26 | intervals = np.ceil(np.linspace(0, video.shape[0] - 1, 16)).astype(int)
27 | frames = video[intervals]
28 |
29 | # Reshape to 128x171
30 | reshape_frames = np.zeros((frames.shape[0], 128, 171, frames.shape[3]))
31 | for i, img in enumerate(frames):
32 | img = imresize(img, (128, 171), 'bicubic')
33 | reshape_frames[i, :, :, :] = img
34 |
35 | mean_path = get_file('c3d_mean.npy',
36 | C3D_MEAN_PATH,
37 | cache_subdir='models',
38 | md5_hash='08a07d9761e76097985124d9e8b2fe34')
39 |
40 | mean = np.load(mean_path)
41 | reshape_frames -= mean
42 | # Crop to 112x112
43 | reshape_frames = reshape_frames[:, 8:120, 30:142, :]
44 | # Add extra dimension for samples
45 | reshape_frames = np.expand_dims(reshape_frames, axis=0)
46 |
47 | return reshape_frames
48 |
49 |
50 | def C3D(weights='sports1M'):
51 | """Creation of the full C3D architecture
52 |
53 | :param weights: Weights to be loaded into the network. If None,
54 | the network is randomly initialized.
55 | :returns: Network model
56 | :rtype: keras.model
57 |
58 | """
59 |
60 | if weights not in {'sports1M', None}:
61 | raise ValueError('weights should be either be sports1M or None')
62 |
63 | if K.image_data_format() == 'channels_last':
64 | shape = (16, 112, 112, 3)
65 | else:
66 | shape = (3, 16, 112, 112)
67 |
68 | model = Sequential()
69 | model.add(
70 | Conv3D(64,
71 | 3,
72 | activation='relu',
73 | padding='same',
74 | name='conv1',
75 | input_shape=shape))
76 | model.add(
77 | MaxPooling3D(pool_size=(1, 2, 2),
78 | strides=(1, 2, 2),
79 | padding='same',
80 | name='pool1'))
81 |
82 | model.add(Conv3D(128, 3, activation='relu', padding='same', name='conv2'))
83 | model.add(
84 | MaxPooling3D(pool_size=(2, 2, 2),
85 | strides=(2, 2, 2),
86 | padding='valid',
87 | name='pool2'))
88 |
89 | model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3a'))
90 | model.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3b'))
91 | model.add(
92 | MaxPooling3D(pool_size=(2, 2, 2),
93 | strides=(2, 2, 2),
94 | padding='valid',
95 | name='pool3'))
96 |
97 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4a'))
98 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4b'))
99 | model.add(
100 | MaxPooling3D(pool_size=(2, 2, 2),
101 | strides=(2, 2, 2),
102 | padding='valid',
103 | name='pool4'))
104 |
105 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5a'))
106 | model.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5b'))
107 | model.add(ZeroPadding3D(padding=(0, 1, 1)))
108 | model.add(
109 | MaxPooling3D(pool_size=(2, 2, 2),
110 | strides=(2, 2, 2),
111 | padding='valid',
112 | name='pool5'))
113 |
114 | model.add(Flatten())
115 |
116 | model.add(Dense(4096, activation='relu', name='fc6'))
117 | model.add(Dropout(0.5))
118 | model.add(Dense(4096, activation='relu', name='fc7'))
119 | model.add(Dropout(0.5))
120 | model.add(Dense(487, activation='softmax', name='fc8'))
121 |
122 | if weights == 'sports1M':
123 | model.load_weights(cfg.c3d_model_weights)
124 |
125 | return model
126 |
127 |
128 | def c3d_feature_extractor():
129 | """Creation of the feature extraction architecture. This network is
130 | formed by a subset of the original C3D architecture (from the
131 | beginning to fc6 layer)
132 |
133 | :returns: Feature extraction model
134 | :rtype: keras.model
135 |
136 | """
137 | model = C3D()
138 | layer_name = 'fc6'
139 | feature_extractor_model = Model(inputs=model.input,
140 | outputs=model.get_layer(layer_name).output)
141 | return feature_extractor_model
142 |
--------------------------------------------------------------------------------
/original_model/calculate_metrics.py:
--------------------------------------------------------------------------------
1 | import sklearn.metrics
2 | import numpy as np
3 | import pandas as pd
4 | import configuration as cfg
5 | import os
6 | import utils.video_util
7 | import utils.array_util
8 | import matplotlib.pyplot as plt
9 |
10 | def eer_score(fpr, tpr, thr):
11 | """ Returns equal error rate (EER) and the corresponding threshold. """
12 | fnr = 1-tpr
13 | abs_diffs = np.abs(fpr - fnr)
14 | min_index = np.argmin(abs_diffs)
15 | eer = np.mean((fpr[min_index], fnr[min_index]))
16 | return eer, thr[min_index]
17 |
18 | ground_truth = pd.read_csv(
19 | cfg.test_temporal_annotations, header=None, index_col=0
20 | )
21 |
22 | preds = []
23 | gts = []
24 |
25 | for idx, row in ground_truth.iterrows():
26 | preds_file_path = os.path.join(cfg.preds_folder, idx)
27 | frames = row[6]
28 | try:
29 | with open(preds_file_path, "rb") as f:
30 | curr_preds = np.load(f)
31 |
32 | padded_preds = utils.array_util.extrapolate(curr_preds, frames)
33 | except FileNotFoundError:
34 | padded_preds = np.zeros((frames,1))
35 | print("No predictions generated for {}".format(idx))
36 |
37 | curr_gts = np.zeros(frames)
38 | anomaly_start_1 = row[2]
39 | anomaly_end_1 = row[3]
40 |
41 | anomaly_start_2 = row[4]
42 | anomaly_end_2 = row[5]
43 |
44 | if anomaly_start_1 != -1 and anomaly_end_1 != -1:
45 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1
46 |
47 | if anomaly_start_2 != -1 and anomaly_end_2 != -1:
48 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1
49 |
50 | preds.append(padded_preds)
51 | gts.append(curr_gts)
52 |
53 | gts = np.concatenate(gts)
54 | preds = np.concatenate(preds)
55 | preds_labels = np.round(preds)
56 |
57 | acc = sklearn.metrics.accuracy_score(gts, preds_labels)
58 | ap = sklearn.metrics.average_precision_score(gts, preds)
59 | f1 = sklearn.metrics.f1_score(gts, preds_labels)
60 | fpr, tpr, thr = sklearn.metrics.roc_curve(gts, preds)
61 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds)
62 | eer, _ = eer_score(fpr, tpr, thr)
63 | conf_mat = sklearn.metrics.confusion_matrix(gts, preds_labels)
64 | auc = sklearn.metrics.auc(fpr, tpr)
65 |
66 | plt.title("Curva ROC")
67 | plt.plot(fpr, tpr, 'b', label = "AUC: {}".format(auc))
68 | plt.legend(loc = 'lower right')
69 | plt.plot([0, 1], [0, 1],'r--')
70 | plt.xlim([0, 1])
71 | plt.ylim([0, 1])
72 | plt.ylabel('True Positive Rate')
73 | plt.xlabel('False Positive Rate')
74 | plt.savefig(os.path.join(cfg.output_folder, "roc.png"))
75 |
76 | plt.clf()
77 |
78 | plt.title("Curva PR")
79 | plt.plot(rec, prec, 'b', label = "Original - AP: {:.5f}".format(ap))
80 | plt.legend(loc = 'lower right')
81 | plt.xlim([0, 1])
82 | plt.ylim([0, 1])
83 | plt.ylabel('Precison')
84 | plt.xlabel('Recall')
85 | plt.savefig(os.path.join(cfg.output_folder, "pr_curve.png"))
86 |
87 | print("Accuracy: {:.5f}, AUC: {:.5f}, F1: {:.5f}, EER: {:.5f}, AP: {:.5F}".format(
88 | acc, auc, f1, eer, ap
89 | ))
90 |
91 | print("Confusion matrix")
92 | print(conf_mat)
93 |
--------------------------------------------------------------------------------
/original_model/classifier.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import scipy.io as sio
3 | from keras import Sequential
4 | from keras.layers import Dense, Dropout
5 | from keras.regularizers import l2
6 |
7 | import configuration as cfg
8 |
9 | def classifier_model():
10 | """Build the classifier
11 |
12 | :returns: Classifier model
13 | :rtype: keras.Model
14 |
15 | """
16 | model = Sequential()
17 | model.add(Dense(512, input_dim=4096, kernel_initializer='glorot_normal',
18 | kernel_regularizer=l2(0.001), activation='relu'))
19 | model.add(Dropout(0.6))
20 | model.add(Dense(32, kernel_initializer='glorot_normal',
21 | kernel_regularizer=l2(0.001)))
22 | model.add(Dropout(0.6))
23 | model.add(Dense(1, kernel_initializer='glorot_normal',
24 | kernel_regularizer=l2(0.001), activation='sigmoid'))
25 | return model
26 |
27 |
28 | def build_classifier_model():
29 | """Build the classifier and load the pretrained weights
30 |
31 | :returns:
32 | :rtype:
33 |
34 | """
35 | model = classifier_model()
36 | model = load_weights(model, cfg.classifier_model_weigts)
37 | return model
38 |
39 |
40 | def conv_dict(dict2):
41 | """Prepare the dictionary of weights to be loaded by the network
42 |
43 | :param dict2: Dictionary to format
44 | :returns: The dictionary properly formatted
45 | :rtype: dict
46 |
47 | """
48 | dict = {}
49 | for i in range(len(dict2)):
50 | if str(i) in dict2:
51 | if dict2[str(i)].shape == (0, 0):
52 | dict[str(i)] = dict2[str(i)]
53 | else:
54 | weights = dict2[str(i)][0]
55 | weights2 = []
56 | for weight in weights:
57 | if weight.shape in [(1, x) for x in range(0, 5000)]:
58 | weights2.append(weight[0])
59 | else:
60 | weights2.append(weight)
61 | dict[str(i)] = weights2
62 | return dict
63 |
64 |
65 | def load_weights(model, weights_file):
66 | """Loads the pretrained weights into the network architecture
67 |
68 | :param model: keras model of the network
69 | :param weights_file: Path to the weights file
70 | :returns: The input model with the weights properly loaded
71 | :rtype: keras.model
72 |
73 | """
74 | dict2 = sio.loadmat(weights_file)
75 | dict = conv_dict(dict2)
76 | i = 0
77 | for layer in model.layers:
78 | weights = dict[str(i)]
79 | layer.set_weights(weights)
80 | i += 1
81 | return model
82 |
83 | if __name__ == '__main__':
84 | model = build_classifier_model()
85 | model.summary()
86 |
--------------------------------------------------------------------------------
/original_model/compute_frames.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import utils.video_util
4 | import configuration as cfg
5 | import os
6 |
7 | ground_truth = pd.read_csv(
8 | cfg.test_temporal_annotations, header=None, sep="\s+", index_col=0,
9 | names=['Type', 'Start1', 'End1', 'Start2', 'End2']
10 | )
11 |
12 | frames_list = []
13 | for idx, row in ground_truth.iterrows():
14 | video_file_path = os.path.join(cfg.test_set, idx[:-4] + "_x264.mp4")
15 | print(video_file_path)
16 | _, frames = utils.video_util.get_video_clips(video_file_path)
17 | print(frames)
18 | frames_list.append(frames)
19 |
20 | ground_truth['Frames'] = frames_list
21 |
22 | ground_truth.to_csv("trial.csv", header=False)
23 |
--------------------------------------------------------------------------------
/original_model/configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | c3d_model_weights = './trained_models/c3d_sports1m.h5'
4 | raw_features_folder = "../raw_c3d_features"
5 | processed_features_folder = "../processed_c3d_features"
6 |
7 | classifier_model_json = './trained_models/model.json'
8 | classifier_model_weigts = './trained_models/weights_L1L2.mat'
9 |
10 | preds_folder = '../predictions_c3d'
11 |
12 | input_folder = './input'
13 | output_folder = '/mnt/sdd/pacoluque/output'
14 |
15 | sample_video_path = '../dataset/train/abnormal/Arrest018_x264.mp4'
16 |
17 | raw_dataset_folder = '../dataset/'
18 |
19 | train_set = os.path.join(raw_dataset_folder, 'train')
20 | normal_videos_path = os.path.join(train_set, "normal")
21 | abnormal_videos_path = os.path.join(train_set, "abnormal")
22 |
23 | raw_features_train_set = os.path.join(raw_features_folder, 'train')
24 | raw_normal_train_features = os.path.join(raw_features_train_set, "normal")
25 | raw_abnormal_train_features = os.path.join(raw_features_train_set, "abnormal")
26 |
27 | processed_features_train_set = os.path.join(processed_features_folder, 'train')
28 | processed_normal_train_features = os.path.join(processed_features_train_set, "normal")
29 | processed_abnormal_train_features = os.path.join(processed_features_train_set, "abnormal")
30 |
31 | test_set = os.path.join(raw_dataset_folder, 'test')
32 | raw_test_features = os.path.join(raw_features_folder, 'test')
33 | processed_test_features = os.path.join(processed_features_folder, 'test')
34 |
35 | test_temporal_annotations = os.path.join(test_set, "temporal-annotation.txt")
36 |
--------------------------------------------------------------------------------
/original_model/display_predictions.py:
--------------------------------------------------------------------------------
1 | import os
2 | from c3d import *
3 | from classifier import *
4 | from utils.visualization_util import *
5 | import sklearn.preprocessing
6 | import parameters as params
7 | import configuration as cfg
8 |
9 | def run_demo():
10 |
11 | video_name = os.path.basename(cfg.sample_video_path).split('.')[0]
12 |
13 | # read video
14 | video_clips, num_frames = get_video_clips(cfg.sample_video_path)
15 |
16 | print("Number of clips in the video : ", len(video_clips))
17 |
18 | # build models
19 | feature_extractor = c3d_feature_extractor()
20 | classifier_model = build_classifier_model()
21 |
22 | print("Models initialized")
23 |
24 | # extract features
25 | rgb_features = []
26 | for i, clip in enumerate(video_clips):
27 | clip = np.array(clip)
28 | if len(clip) < params.frame_count:
29 | continue
30 |
31 | clip = preprocess_input(clip)
32 | rgb_feature = feature_extractor.predict(clip)[0]
33 | rgb_features.append(rgb_feature)
34 |
35 | print("Processed clip : ", i)
36 |
37 | rgb_features = np.array(rgb_features)
38 | rgb_feature_bag = interpolate(rgb_features, params.features_per_bag)
39 |
40 | # classify using the trained classifier model
41 | predictions = classifier_model.predict(rgb_feature_bag)
42 |
43 | predictions = np.array(predictions).squeeze()
44 |
45 | predictions = extrapolate(predictions, num_frames)
46 |
47 | save_path = os.path.join(cfg.output_folder, video_name + '.gif')
48 | # visualize predictions
49 | print('Executed Successfully - '+video_name + '.gif saved')
50 | visualize_predictions(cfg.sample_video_path, predictions, save_path)
51 |
52 |
53 | if __name__ == '__main__':
54 | run_demo()
55 |
--------------------------------------------------------------------------------
/original_model/extract_features.py:
--------------------------------------------------------------------------------
1 | import c3d
2 | import os
3 | import configuration as cfg
4 | import numpy as np
5 | import sklearn.preprocessing
6 |
7 | from utils import video_util
8 |
9 | feature_extractor = c3d.c3d_feature_extractor()
10 | normal_videos = os.listdir(cfg.normal_videos_path)
11 | normal_videos.sort()
12 |
13 | print("Processing normal videos...")
14 | for vid_name in normal_videos:
15 | print("Processing {}".format(vid_name))
16 | vid_path = os.path.join(cfg.normal_videos_path, vid_name)
17 | feats_path = os.path.join(
18 | cfg.raw_normal_train_features, vid_name[:-9] + ".npy"
19 | )
20 |
21 | clips, frames = video_util.get_video_clips(vid_path)
22 |
23 | # Remove last clip if number of frames is not equal to 16
24 | if frames % 16 != 0:
25 | clips = clips[:-1]
26 |
27 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips]
28 | prep_clips = np.vstack(prep_clips)
29 |
30 | features = feature_extractor.predict(prep_clips)
31 | features = sklearn.preprocessing.normalize(features, axis=1)
32 |
33 | with open(feats_path, "wb") as f:
34 | np.save(f, features)
35 |
36 | abnormal_videos = os.listdir(cfg.abnormal_videos_path)
37 | abnormal_videos.sort()
38 | print("Processing abnormal videos...")
39 | for vid_name in abnormal_videos:
40 | print("Processing {}".format(vid_name))
41 | vid_path = os.path.join(cfg.abnormal_videos_path, vid_name)
42 | feats_path = os.path.join(
43 | cfg.raw_abnormal_train_features, vid_name[:-9] + ".npy"
44 | )
45 |
46 | clips, frames = video_util.get_video_clips(vid_path)
47 |
48 | # Remove last clip if number of frames is not equal to 16
49 | if frames % 16 != 0:
50 | clips = clips[:-1]
51 |
52 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips]
53 | prep_clips = np.vstack(prep_clips)
54 |
55 | features = feature_extractor.predict(prep_clips)
56 | features = sklearn.preprocessing.normalize(features, axis=1)
57 |
58 | with open(feats_path, "wb") as f:
59 | np.save(f, features)
60 |
61 |
62 | test_videos = os.listdir(cfg.test_set)
63 | test_videos.sort()
64 | print("Processing test videos...")
65 | for vid_name in test_videos:
66 | print("Processing {}".format(vid_name))
67 | vid_path = os.path.join(cfg.test_set, vid_name)
68 | feats_path = os.path.join(cfg.raw_test_features, vid_name[:-9] + ".npy")
69 |
70 | clips, frames = video_util.get_video_clips(vid_path)
71 |
72 | # Remove last clip if number of frames is not equal to 16
73 | if frames % 16 != 0:
74 | clips = clips[:-1]
75 |
76 | prep_clips = [c3d.preprocess_input(np.array(clip)) for clip in clips]
77 | prep_clips = np.vstack(prep_clips)
78 |
79 | features = feature_extractor.predict(prep_clips)
80 | features = sklearn.preprocessing.normalize(features, axis=1)
81 |
82 | with open(feats_path, "wb") as f:
83 | np.save(f, features)
84 |
--------------------------------------------------------------------------------
/original_model/parameters.py:
--------------------------------------------------------------------------------
1 | frame_height = 240
2 | frame_width = 320
3 | channels = 3
4 |
5 | frame_count = 16
6 |
7 | features_per_bag = 32
--------------------------------------------------------------------------------
/original_model/predict_test_set.py:
--------------------------------------------------------------------------------
1 | import classifier
2 | import configuration as cfg
3 | import numpy as np
4 | import os
5 |
6 | def load_test_set(videos_path, videos_list):
7 | feats = []
8 |
9 | for vid in videos_list:
10 | vid_path = os.path.join(videos_path, vid)
11 | with open(vid_path, "rb") as f:
12 | feat = np.load(f)
13 | feats.append(feat)
14 |
15 | feats = np.array(feats)
16 | return feats
17 |
18 | classifier_model = classifier.build_classifier_model()
19 |
20 | vid_list = os.listdir(cfg.processed_test_features)
21 | vid_list.sort()
22 |
23 | test_set = load_test_set(cfg.processed_test_features, vid_list)
24 |
25 | for filename, example in zip(vid_list, test_set):
26 | predictions_file = filename[:-4] + '.npy'
27 | pred_path = os.path.join(cfg.preds_folder, predictions_file)
28 | pred = classifier_model.predict_on_batch(example)
29 | with open(pred_path, "wb") as f:
30 | np.save(pred_path, pred, allow_pickle=True)
31 |
--------------------------------------------------------------------------------
/original_model/preprocess_features.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import sklearn.preprocessing
4 | import configuration as cfg
5 |
6 | def transform_into_segments(features, n_segments=32):
7 | if features.shape[0] < n_segments:
8 | raise RuntimeError(
9 | "Number of prev segments lesser than expected output size"
10 | )
11 |
12 | cuts = np.linspace(0, features.shape[0], n_segments,
13 | dtype=int, endpoint=False)
14 |
15 | new_feats = []
16 | for i, j in zip(cuts[:-1], cuts[1:]):
17 | new_feats.append(np.mean(features[i:j,:], axis=0))
18 |
19 | new_feats.append(np.mean(features[cuts[-1]:,:], axis=0))
20 |
21 | new_feats = np.array(new_feats)
22 | new_feats = sklearn.preprocessing.normalize(new_feats, axis=1)
23 | return new_feats
24 |
25 | for filename in os.listdir(cfg.raw_normal_train_features):
26 | print("Processing {}".format(filename))
27 | raw_file_path = os.path.join(
28 | cfg.raw_normal_train_features, filename
29 | )
30 | processed_file_path = os.path.join(
31 | cfg.processed_normal_train_features, filename
32 | )
33 |
34 | with open(raw_file_path, "rb") as f:
35 | feats = np.load(f, allow_pickle=True)
36 |
37 | try:
38 | new_feats = transform_into_segments(feats)
39 | with open(processed_file_path, "wb") as f:
40 | np.save(f, new_feats, allow_pickle=True)
41 | except RuntimeError:
42 | print("Video {} too short".format(filename))
43 |
44 | for filename in os.listdir(cfg.raw_abnormal_train_features):
45 | print("Processing {}".format(filename))
46 | raw_file_path = os.path.join(
47 | cfg.raw_abnormal_train_features, filename
48 | )
49 | processed_file_path = os.path.join(
50 | cfg.processed_abnormal_train_features, filename
51 | )
52 | with open(raw_file_path, "rb") as f:
53 | feats = np.load(f, allow_pickle=True)
54 |
55 | try:
56 | new_feats = transform_into_segments(feats)
57 | with open(processed_file_path, "wb") as f:
58 | np.save(f, new_feats, allow_pickle=True)
59 | except RuntimeError:
60 | print("Video {} too short".format(filename))
61 |
62 | for filename in os.listdir(cfg.raw_test_features):
63 | print("Processing {}".format(filename))
64 | raw_file_path = os.path.join(
65 | cfg.raw_test_features, filename
66 | )
67 | processed_file_path = os.path.join(
68 | cfg.processed_test_features, filename
69 | )
70 | with open(raw_file_path, "rb") as f:
71 | feats = np.load(f, allow_pickle=True)
72 |
73 | try:
74 | new_feats = transform_into_segments(feats)
75 | with open(processed_file_path, "wb") as f:
76 | np.save(f, new_feats, allow_pickle=True)
77 | except RuntimeError:
78 | print("Video {} too short".format(filename))
79 |
--------------------------------------------------------------------------------
/original_model/train_classifier.py:
--------------------------------------------------------------------------------
1 | import keras.optimizers
2 | import scipy.io
3 | from keras.models import model_from_json
4 | import os
5 |
6 | import numpy as np
7 | import keras.backend as K
8 | import classifier
9 |
10 | from datetime import datetime
11 |
12 | def save_model(model, json_path, weight_path):
13 | json_string = model.to_json()
14 | open(json_path, 'w').write(json_string)
15 | dict = {}
16 | i = 0
17 | for layer in model.layers:
18 | weights = layer.get_weights()
19 | my_list = np.zeros(len(weights), dtype=np.object)
20 | my_list[:] = weights
21 | dict[str(i)] = my_list
22 | i += 1
23 | scipy.io.savemat(weight_path, dict)
24 |
25 | def load_model(json_path):
26 | model = model_from_json(open(json_path).read())
27 | return model
28 |
29 | def load_batch_train(normal_path, normal_list, abnormal_path, abnormal_list):
30 |
31 | batchsize=60
32 | n_exp = int(batchsize/2)
33 |
34 | num_normal = len(normal_list)
35 | num_abnormal = len(abnormal_list)
36 |
37 | abnor_list_idx = np.random.permutation(num_abnormal)
38 | abnor_list = abnor_list_idx[:n_exp]
39 | norm_list_idx = np.random.permutation(num_normal)
40 | norm_list = norm_list_idx[:n_exp]
41 |
42 | abnormal_feats = []
43 | for video_idx in abnor_list:
44 | video_path = os.path.join(abnormal_path, abnormal_list[video_idx])
45 | with open(video_path, "rb") as f:
46 | feats = np.load(f)
47 | abnormal_feats.append(feats)
48 |
49 | normal_feats = []
50 | for video_idx in norm_list:
51 | video_path = os.path.join(normal_path, normal_list[video_idx])
52 | with open(video_path, "rb") as f:
53 | feats = np.load(f)
54 | normal_feats.append(feats)
55 |
56 |
57 | all_feats = np.vstack((*abnormal_feats, *normal_feats))
58 | all_labels = np.zeros(32*batchsize, dtype='uint8')
59 |
60 | all_labels[:32*n_exp] = 1
61 |
62 | return all_feats, all_labels
63 |
64 |
65 | def custom_objective(y_true, y_pred):
66 |
67 | y_true = K.reshape(y_true, [-1])
68 | y_pred = K.reshape(y_pred, [-1])
69 | n_seg = 32
70 | nvid = 60
71 | n_exp = int(nvid / 2)
72 |
73 | max_scores_list = []
74 | z_scores_list = []
75 | temporal_constrains_list = []
76 | sparsity_constrains_list = []
77 |
78 | for i in range(0, n_exp, 1):
79 |
80 | video_predictions = y_pred[i*n_seg:(i+1)*n_seg]
81 |
82 | max_scores_list.append(K.max(video_predictions))
83 | temporal_constrains_list.append(
84 | K.sum(K.pow(video_predictions[1:] - video_predictions[:-1], 2))
85 | )
86 | sparsity_constrains_list.append(K.sum(video_predictions))
87 |
88 | for j in range(n_exp, 2*n_exp, 1):
89 |
90 | video_predictions = y_pred[j*n_seg:(j+1)*n_seg]
91 | max_scores_list.append(K.max(video_predictions))
92 |
93 | max_scores = K.stack(max_scores_list)
94 | temporal_constrains = K.stack(temporal_constrains_list)
95 | sparsity_constrains = K.stack(sparsity_constrains_list)
96 |
97 | for ii in range(0, n_exp, 1):
98 | max_z = K.maximum(1 - max_scores[:n_exp] + max_scores[n_exp+ii], 0)
99 | z_scores_list.append(K.sum(max_z))
100 |
101 | z_scores = K.stack(z_scores_list)
102 | z = K.mean(z_scores)
103 |
104 | return z + \
105 | 0.00008*K.sum(temporal_constrains) + \
106 | 0.00008*K.sum(sparsity_constrains)
107 |
108 | output_dir = "trained_models/"
109 | normal_dir = "../processed_c3d_features/train/normal"
110 | abnormal_dir = "../processed_c3d_features/train/abnormal"
111 |
112 | normal_list = os.listdir(normal_dir)
113 | normal_list.sort()
114 | abnormal_list = os.listdir(abnormal_dir)
115 | abnormal_list.sort()
116 |
117 | weights_path = output_dir + 'weights.mat'
118 |
119 | model_path = output_dir + 'model.json'
120 |
121 | #Create Full connected Model
122 | model = classifier.classifier_model()
123 |
124 | adagrad = keras.optimizers.Adagrad(lr=0.001, epsilon=1e-08)
125 | model.compile(loss=custom_objective, optimizer=adagrad)
126 |
127 | if not os.path.exists(output_dir):
128 | os.makedirs(output_dir)
129 |
130 | loss_graph =[]
131 | num_iters = 20000
132 | total_iterations = 0
133 | batchsize=60
134 | time_before = datetime.now()
135 |
136 |
137 | for it_num in range(num_iters):
138 | inputs, targets = load_batch_train(
139 | normal_dir, normal_list, abnormal_dir, abnormal_list
140 | )
141 | batch_loss = model.train_on_batch(inputs, targets)
142 | loss_graph = np.hstack((loss_graph, batch_loss))
143 | total_iterations += 1
144 | if total_iterations % 20 == 0:
145 | print ("Iteration={} took: {}, loss: {}".format(
146 | total_iterations, datetime.now() - time_before, batch_loss)
147 | )
148 |
149 | print("Train Successful - Model saved")
150 | save_model(model, model_path, weights_path)
151 |
--------------------------------------------------------------------------------
/original_model/trained_models/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/original_model/trained_models/.gitignore
--------------------------------------------------------------------------------
/original_model/utils/array_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def sliding_window(arr, size, stride):
5 | """Apply sliding window to an array, getting chunks of
6 | of specified size using the specified stride
7 |
8 | :param arr: Array to be divided
9 | :param size: Size of the chunks
10 | :param stride: Number of frames to skip for the next chunk
11 | :returns: Tensor with the resulting chunks
12 | :rtype: np.ndarray
13 |
14 | """
15 | num_chunks = int((len(arr) - size) / stride) + 2
16 | result = []
17 | for i in range(0, num_chunks * stride, stride):
18 | if len(arr[i:i + size]) > 0:
19 | result.append(arr[i:i + size])
20 | return np.array(result)
21 |
22 |
23 | def interpolate(features, features_per_bag):
24 | """Transform a bag with an arbitrary number of features into a bag
25 | with a fixed amount, using interpolation of consecutive features
26 |
27 | :param features: Bag of features to pad
28 | :param features_per_bag: Number of features to obtain
29 | :returns: Interpolated features
30 | :rtype: np.ndarray
31 |
32 | """
33 | feature_size = np.array(features).shape[1]
34 | interpolated_features = np.zeros((features_per_bag, feature_size))
35 | interpolation_indices = np.round(np.linspace(0, len(features) - 1, num=features_per_bag + 1))
36 | count = 0
37 | for index in range(0, len(interpolation_indices)-1):
38 | start = int(interpolation_indices[index])
39 | end = int(interpolation_indices[index + 1])
40 |
41 | assert end >= start
42 |
43 | if start == end:
44 | temp_vect = features[start, :]
45 | else:
46 | temp_vect = np.mean(features[start:end+1, :], axis=0)
47 |
48 | temp_vect = temp_vect / np.linalg.norm(temp_vect)
49 |
50 | if np.linalg.norm(temp_vect) == 0:
51 | print("Error")
52 |
53 | interpolated_features[count,:]=temp_vect
54 | count = count + 1
55 |
56 | return np.array(interpolated_features)
57 |
58 |
59 | def extrapolate(outputs, num_frames):
60 | """Expand output to match the video length
61 |
62 | :param outputs: Array of predicted outputs
63 | :param num_frames: Expected size of the output array
64 | :returns: Array of output size
65 | :rtype: np.ndarray
66 |
67 | """
68 |
69 | extrapolated_outputs = []
70 | extrapolation_indices = np.round(np.linspace(0, len(outputs) - 1, num=num_frames))
71 | for index in extrapolation_indices:
72 | extrapolated_outputs.append(outputs[int(index)])
73 | return np.array(extrapolated_outputs)
74 |
--------------------------------------------------------------------------------
/original_model/utils/video_util.py:
--------------------------------------------------------------------------------
1 | from utils.array_util import *
2 | import parameters as params
3 | import cv2
4 |
5 |
6 | def get_video_clips(video_path):
7 | """Divides the input video into non-overlapping clips
8 |
9 | :param video_path: Path to the video
10 | :returns: Array with the fragments of video
11 | :rtype: np.ndarray
12 |
13 | """
14 | frames = get_video_frames(video_path)
15 | clips = sliding_window(frames, params.frame_count, params.frame_count)
16 | return clips, len(frames)
17 |
18 |
19 | def get_video_frames(video_path):
20 | """Reads the video given a file path
21 |
22 | :param video_path: Path to the video
23 | :returns: Video as an array of frames
24 | :rtype: np.ndarray
25 |
26 | """
27 | cap = cv2.VideoCapture(video_path)
28 | frames = []
29 | while (cap.isOpened()):
30 | ret, frame = cap.read()
31 | if ret == True:
32 | frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
33 | else:
34 | break
35 | cap.release()
36 | return frames
37 |
--------------------------------------------------------------------------------
/original_model/utils/visualization_util.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 | import matplotlib.pyplot as plt
4 | from matplotlib.animation import FuncAnimation
5 | from utils.video_util import *
6 |
7 |
8 | def visualize_clip(clip, convert_bgr=False, save_gif=False, file_path=None):
9 | num_frames = len(clip)
10 | fig, ax = plt.subplots()
11 | fig.set_tight_layout(True)
12 |
13 | def update(i):
14 | if convert_bgr:
15 | frame = cv2.cvtColor(clip[i], cv2.COLOR_BGR2RGB)
16 | else:
17 | frame = clip[i]
18 | plt.imshow(frame)
19 | return plt
20 |
21 | # FuncAnimation will call the 'update' function for each frame; here
22 | # animating over 10 frames, with an interval of 20ms between frames.
23 | anim = FuncAnimation(fig, update, frames=np.arange(0, num_frames), interval=1)
24 | if save_gif:
25 | anim.save(file_path, dpi=80, writer='imagemagick')
26 | else:
27 | # plt.show() will just loop the animation forever.
28 | plt.show()
29 |
30 |
31 | def visualize_predictions(video_path, predictions, save_path):
32 | frames = get_video_frames(video_path)
33 | assert len(frames) == len(predictions)
34 |
35 | fig, ax = plt.subplots(figsize=(5, 5))
36 | fig.set_tight_layout(True)
37 |
38 | line = matplotlib.lines.Line2D([], [])
39 |
40 | fig_frame = plt.subplot(2, 1, 1)
41 | img = fig_frame.imshow(frames[0])
42 | fig_prediction = plt.subplot(2, 1, 2)
43 | fig_prediction.set_xlim(0, len(frames))
44 | fig_prediction.set_ylim(0, 1.15)
45 | fig_prediction.add_line(line)
46 |
47 | def update(i):
48 | frame = frames[i]
49 | x = range(0, i)
50 | y = predictions[0:i]
51 | line.set_data(x, y)
52 | img.set_data(frame)
53 | return plt
54 |
55 | # FuncAnimation will call the 'update' function for each frame; here
56 | # animating over 10 frames, with an interval of 20ms between frames.
57 |
58 | anim = FuncAnimation(fig, update, frames=np.arange(0, len(frames), 10), interval=1, repeat=False)
59 |
60 | if save_path:
61 | anim.save(save_path, dpi=200, writer='imagemagick')
62 | else:
63 | plt.show()
64 |
--------------------------------------------------------------------------------
/overlay_curves.py:
--------------------------------------------------------------------------------
1 | import sklearn.metrics
2 | import scipy.optimize, scipy.interpolate
3 | import numpy as np
4 | import pandas as pd
5 | import os
6 | import proposal.utils.array_util as array_util
7 | import matplotlib.pyplot as plt
8 |
9 | def calculate_information_for_curves(gts, preds):
10 | fpr, tpr, _ = sklearn.metrics.roc_curve(gts, preds)
11 | auc = sklearn.metrics.auc(fpr, tpr)
12 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds)
13 | ap = sklearn.metrics.average_precision_score(gts, preds)
14 |
15 | return fpr, tpr, auc, prec, rec, ap
16 |
17 |
18 | ground_truth = pd.read_csv("./dataset/test/temporal-annotation.txt", header=None, index_col=0)
19 |
20 | preds_c3d = []
21 | preds_lstm = []
22 | gts = []
23 |
24 | for idx, row in ground_truth.iterrows():
25 | c3d_preds_file_path = os.path.join("predictions_c3d", idx)
26 | lstm_preds_file_path = os.path.join("predictions_lstm", idx)
27 | frames = row[6]
28 |
29 | try:
30 | with open(c3d_preds_file_path, "rb") as f:
31 | curr_c3d_preds = np.load(f)
32 | with open(lstm_preds_file_path, "rb") as f:
33 | curr_lstm_preds = np.load(f)
34 |
35 | c3d_padded_preds = array_util.extrapolate(curr_c3d_preds, frames)
36 | lstm_padded_preds = array_util.extrapolate(curr_lstm_preds, frames)
37 |
38 | except FileNotFoundError:
39 | c3d_padded_preds = np.zeros((frames,1))
40 | lstm_padded_preds = np.zeros((frames,1))
41 |
42 | print("No predictions generated for {}".format(idx))
43 |
44 | curr_gts = np.zeros(frames)
45 | anomaly_start_1 = row[2]
46 | anomaly_end_1 = row[3]
47 |
48 | anomaly_start_2 = row[4]
49 | anomaly_end_2 = row[5]
50 |
51 | if anomaly_start_1 != -1 and anomaly_end_1 != -1:
52 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1
53 |
54 | if anomaly_start_2 != -1 and anomaly_end_2 != -1:
55 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1
56 |
57 | preds_c3d.append(c3d_padded_preds)
58 | preds_lstm.append(lstm_padded_preds)
59 | gts.append(curr_gts)
60 |
61 | gts = np.concatenate(gts)
62 |
63 | preds_c3d = np.concatenate(preds_c3d)
64 | preds_lstm = np.concatenate(preds_lstm)
65 |
66 | (
67 | fpr_c3d, tpr_c3d, auc_c3d,
68 | prec_c3d, rec_c3d, ap_c3d
69 | ) = calculate_information_for_curves(gts, preds_c3d)
70 |
71 | (
72 | fpr_lstm, tpr_lstm, auc_lstm,
73 | prec_lstm, rec_lstm, ap_lstm
74 | ) = calculate_information_for_curves(gts, preds_lstm)
75 |
76 |
77 | plt.title("Curva ROC")
78 | plt.plot(fpr_c3d, tpr_c3d, 'b', label = "C3d - AUC: {:.5f}".format(auc_c3d))
79 | plt.plot(fpr_lstm, tpr_lstm, 'g', label = "Lstm - AUC: {:.5f}".format(auc_lstm))
80 | plt.legend(loc = 'lower right')
81 | plt.plot([0, 1], [0, 1],'k--')
82 | plt.plot([1, 0], [0, 1],'k:')
83 | plt.xlim([0, 1])
84 | plt.ylim([0, 1])
85 | plt.ylabel('True Positive Rate')
86 | plt.xlabel('False Positive Rate')
87 | plt.savefig("roc_overlay.pdf")
88 |
89 | plt.clf()
90 |
91 | plt.title("Curva PR")
92 | plt.plot(rec_c3d, prec_c3d, 'b', label = "C3d - AP: {:.5f}".format(ap_c3d))
93 | plt.plot(rec_lstm, prec_lstm, 'g', label = "Lstm - AP: {:.5f}".format(ap_lstm))
94 | plt.legend(loc = 'upper right')
95 | plt.xlim([0, 1])
96 | plt.ylim([0, 1])
97 | plt.ylabel('Precison')
98 | plt.xlabel('Recall')
99 | plt.savefig("pr_overlay.pdf")
100 |
--------------------------------------------------------------------------------
/proposal/README.md:
--------------------------------------------------------------------------------
1 | # Proposal experiments replication
2 |
3 | In this folder you can find the code to replicate the experimentation
4 | of out proposal, using a spatio-temporal feature extractor instead of
5 | the C3D convolutional model. We have tested different models in our
6 | report, specifically extractors that provide descriptors of size 512,
7 | 768 and 1024 for each clip of 16 frames from the video. However, we
8 | only provide the model of size 1024, since it has provided the best
9 | results and the experiments are similar for all the models.
10 |
11 | ## Experimentation replication
12 |
13 | The folder is self-contained and fully written in Python. The
14 | experiments can be completely performed by executing code inside this
15 | folder, without depending on external resources. Code files inside
16 | this folder can be divided into two groups; resource files and scripts.
17 | In resource files, auxiliary utilities and models are defined. Scripts
18 | are provided to replicate the experiments.
19 |
20 | ### Resource files
21 |
22 | The resource files are listed and explained below, in alphabetical
23 | order:
24 |
25 | - `classifier.py`: Definition of the classifier model, together
26 | with functions to save and load the model to disk.
27 | - `configuration.py`: Configuration information for the experiments
28 | (data paths, output paths, annotation files, etc).
29 | - `models.py`: Definition of the feature extractor model.
30 | - `parameters.py`: Information about model structure.
31 | - `video_data_generator.py`: Adaptation of Keras datasets for video
32 | data handling. This code has been adapted from the video frame
33 | generator developed by [Patrice
34 | Ferlet](https://gist.github.com/metal3d) and the original can be
35 | downloaded from
36 | [here](https://gist.github.com/metal3d/0fe5539abfc534855ddfd351d06cfa06)
37 | - `utils` folder: This folder contains utilities to process arrays
38 | and video files.
39 |
40 | ### Scripts
41 |
42 | The developed scripts are listed in the order that should be followed
43 | to replicate the experiments.
44 |
45 | 1. `train_feature_extractor.py`: This script trains the feature
46 | extractor model, solving the video classification task over
47 | UCF-101 dataset (must be downloaded). Afterwards, the model is
48 | saved in `trained_models`.
49 | 1. `extract_temporal_features.py`: This script computes the features
50 | from the videos composing the dataset (videos contained in `dataset`
51 | folder at root project level), and stores them inside the folder.
52 | `raw_lstm_features` (if default configuration has been kept). In order
53 | to work properly, the destination folder must exist. The folder
54 | structure can be created with the bash script provided at root project
55 | level.
56 | 2. `preprocess_features.py`: This script takes the previously extracted
57 | features, whose number can vary depending on the original video length,
58 | and computes a fized-size representation for each video. The new features
59 | are stored inside the folder `processed_lstm_features`.
60 | 3. `train_classifier.py`: This script trains the final classifier
61 | model using the preprocessed features extracted before. After
62 | training, it stores the resulting model inside the folder `trained_models`.
63 | 4. `predict_test_set.py`: After training, this script takes the trained
64 | model and uses it to predict the test set (test features are calculated
65 | in the first two steps).
66 | 5. `calculate_metrics.py`: When the predictions have been made, this
67 | script calculates several performance metrics to validate the model.
68 |
--------------------------------------------------------------------------------
/proposal/calculate_metrics.py:
--------------------------------------------------------------------------------
1 | import sklearn.metrics
2 | import scipy.optimize, scipy.interpolate
3 | import numpy as np
4 | import pandas as pd
5 | import configuration as cfg
6 | import os
7 | import utils.video_util
8 | import utils.array_util
9 | import matplotlib.pyplot as plt
10 |
11 | def eer_score(fpr, tpr, thr):
12 | """ Returns equal error rate (EER) and the corresponding threshold. """
13 | fnr = 1-tpr
14 | abs_diffs = np.abs(fpr - fnr)
15 | min_index = np.argmin(abs_diffs)
16 | eer = np.mean((fpr[min_index], fnr[min_index]))
17 | return eer, thr[min_index]
18 |
19 | ground_truth = pd.read_csv(cfg.test_temporal_annotations, header=None, index_col=0)
20 |
21 | preds = []
22 | gts = []
23 |
24 | for idx, row in ground_truth.iterrows():
25 | preds_file_path = os.path.join(cfg.preds_folder, idx)
26 | frames = row[6]
27 | try:
28 | with open(preds_file_path, "rb") as f:
29 | curr_preds = np.load(f)
30 |
31 | padded_preds = utils.array_util.extrapolate(curr_preds, frames)
32 | except FileNotFoundError:
33 | padded_preds = np.zeros((frames,1))
34 | print("No predictions generated for {}".format(idx))
35 |
36 | curr_gts = np.zeros(frames)
37 | anomaly_start_1 = row[2]
38 | anomaly_end_1 = row[3]
39 |
40 | anomaly_start_2 = row[4]
41 | anomaly_end_2 = row[5]
42 |
43 | if anomaly_start_1 != -1 and anomaly_end_1 != -1:
44 | curr_gts[anomaly_start_1:anomaly_end_1+1] = 1
45 |
46 | if anomaly_start_2 != -1 and anomaly_end_2 != -1:
47 | curr_gts[anomaly_start_2:anomaly_end_2+1] = 1
48 |
49 | preds.append(padded_preds)
50 | gts.append(curr_gts)
51 |
52 | gts = np.concatenate(gts)
53 | preds = np.concatenate(preds)
54 | preds_labels = np.round(preds)
55 |
56 | acc = sklearn.metrics.accuracy_score(gts, preds_labels)
57 | ap = sklearn.metrics.average_precision_score(gts, preds)
58 | f1 = sklearn.metrics.f1_score(gts, preds_labels)
59 | fpr, tpr, thr = sklearn.metrics.roc_curve(gts, preds)
60 | prec, rec, _ = sklearn.metrics.precision_recall_curve(gts, preds)
61 | eer, _ = eer_score(fpr, tpr, thr)
62 | conf_mat = sklearn.metrics.confusion_matrix(gts, preds_labels)
63 | auc = sklearn.metrics.auc(fpr, tpr)
64 |
65 | plt.title("Curva ROC")
66 | plt.plot(fpr, tpr, 'b', label = "AUC: {}".format(auc))
67 | plt.legend(loc = 'lower right')
68 | plt.plot([0, 1], [0, 1],'r--')
69 | plt.xlim([0, 1])
70 | plt.ylim([0, 1])
71 | plt.ylabel('True Positive Rate')
72 | plt.xlabel('False Positive Rate')
73 | plt.savefig(os.path.join(cfg.output_folder, "roc_lstm.png"))
74 |
75 | plt.clf()
76 |
77 | plt.title("Curva PR")
78 | plt.plot(rec, prec, 'r', label = "LSTM - AP: {:.5f}".format(ap))
79 | plt.legend(loc = 'lower right')
80 | plt.xlim([0, 1])
81 | plt.ylim([0, 1])
82 | plt.ylabel('Precison')
83 | plt.xlabel('Recall')
84 | plt.savefig(os.path.join(cfg.output_folder, "pr_curve_lstm.png"))
85 |
86 | print("Accuracy: {:.5f}, AUC: {:.5f}, F1: {:.5f}, EER: {:.5f}, AP: {:.5F}".format(
87 | acc, auc, f1, eer, ap
88 | ))
89 |
90 | print("Confusion matrix")
91 | print(conf_mat)
92 |
--------------------------------------------------------------------------------
/proposal/classifier.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import scipy.io as sio
3 | from keras import Sequential
4 | from keras.layers import Dense, Dropout
5 | from keras.regularizers import l2
6 |
7 | import configuration as cfg
8 |
9 | def classifier_model():
10 |
11 | model = Sequential()
12 | model.add(Dense(512, input_dim=1024, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001), activation='relu'))
13 | model.add(Dropout(0.5))
14 | model.add(Dense(64, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001)))
15 | model.add(Dropout(0.5))
16 | model.add(Dense(1, kernel_initializer='glorot_normal', kernel_regularizer=l2(0.001), activation='sigmoid'))
17 | return model
18 |
19 |
20 | def build_classifier_model():
21 | model = classifier_model()
22 | model = load_weights(model, cfg.classifier_model_weigts)
23 | return model
24 |
25 |
26 | def conv_dict(dict2):
27 | dict = {}
28 | for i in range(len(dict2)):
29 | if str(i) in dict2:
30 | if dict2[str(i)].shape == (0, 0):
31 | dict[str(i)] = dict2[str(i)]
32 | else:
33 | weights = dict2[str(i)][0]
34 | weights2 = []
35 | for weight in weights:
36 | if weight.shape in [(1, x) for x in range(0, 5000)]:
37 | weights2.append(weight[0])
38 | else:
39 | weights2.append(weight)
40 | dict[str(i)] = weights2
41 | return dict
42 |
43 |
44 | def load_weights(model, weights_file):
45 | dict2 = sio.loadmat(weights_file)
46 | dict = conv_dict(dict2)
47 | i = 0
48 | for layer in model.layers:
49 | weights = dict[str(i)]
50 | layer.set_weights(weights)
51 | i += 1
52 | return model
53 |
54 | if __name__ == '__main__':
55 | model = build_classifier_model()
56 | model.summary()
57 |
--------------------------------------------------------------------------------
/proposal/configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | extractor_model_weights = "./trained_models/rec_feats_weights.h5"
4 |
5 | classifier_model_weigts = './trained_models/weights_proposal.mat'
6 | classifier_model_json = './trained_models/model_proposal.json'
7 |
8 | input_folder = './input'
9 | output_folder = '/mnt/sdd/pacoluque/output'
10 |
11 | sample_video_path = '../dataset/train/abnormal/RoadAccidents021_x264.mp4'
12 |
13 | raw_dataset_folder = '../dataset/'
14 | raw_features_folder = "../raw_lstm_features"
15 | processed_features_folder = "../processed_lstm_features"
16 |
17 | train_set = os.path.join(raw_dataset_folder, 'train')
18 | normal_videos_path = os.path.join(train_set, "normal")
19 | abnormal_videos_path = os.path.join(train_set, "abnormal")
20 |
21 | raw_features_train_set = os.path.join(raw_features_folder, 'train')
22 | raw_normal_train_features = os.path.join(raw_features_train_set, "normal")
23 | raw_abnormal_train_features = os.path.join(raw_features_train_set, "abnormal")
24 |
25 | processed_features_train_set = os.path.join(processed_features_folder, 'train')
26 | processed_normal_train_features = os.path.join(processed_features_train_set, "normal")
27 | processed_abnormal_train_features = os.path.join(processed_features_train_set, "abnormal")
28 |
29 | test_set = os.path.join(raw_dataset_folder, 'test')
30 | raw_test_features = os.path.join(raw_features_folder, 'test')
31 | processed_test_features = os.path.join(processed_features_folder, 'test')
32 |
33 | preds_folder = '../predictions_lstm'
34 |
35 | test_temporal_annotations = os.path.join(test_set, "temporal-annotation.txt")
36 |
--------------------------------------------------------------------------------
/proposal/display_predictions.py:
--------------------------------------------------------------------------------
1 | import os
2 | import classifier
3 | from utils.visualization_util import *
4 | import sklearn.preprocessing
5 | import parameters as params
6 | import configuration as cfg
7 |
8 | def run_demo():
9 |
10 | video_name = os.path.basename(cfg.sample_video_path).split('.')[0]
11 |
12 | # read video
13 | video_clips, num_frames = get_video_clips(cfg.sample_video_path)
14 |
15 | print("Number of clips in the video : ", len(video_clips))
16 |
17 | # build models
18 | original_model = keras.models.load_model(cfg.extractor_model_weights)
19 | feature_extractor = keras.models.Model(
20 | inputs = original_model.input,
21 | outputs = original_model.get_layer("lstm_1").output
22 | )
23 | classifier_model = build_classifier_model()
24 |
25 | print("Models initialized")
26 |
27 | # extract features
28 | rgb_features = []
29 | for i, clip in enumerate(video_clips):
30 | clip = np.array(clip)
31 | if len(clip) < params.frame_count:
32 | continue
33 |
34 | clip = preprocess_input(clip)
35 | rgb_feature = feature_extractor.predict(clip)[0]
36 | rgb_features.append(rgb_feature)
37 |
38 | print("Processed clip : ", i)
39 |
40 | rgb_features = np.array(rgb_features)
41 | rgb_feature_bag = interpolate(rgb_features, params.features_per_bag)
42 |
43 | # classify using the trained classifier model
44 | predictions = classifier_model.predict(rgb_feature_bag)
45 |
46 | predictions = np.array(predictions).squeeze()
47 |
48 | predictions = extrapolate(predictions, num_frames)
49 |
50 | save_path = os.path.join(cfg.output_folder, video_name + '.gif')
51 | # visualize predictions
52 | print('Executed Successfully - '+video_name + '.gif saved')
53 | visualize_predictions(cfg.sample_video_path, predictions, save_path)
54 |
55 |
56 | if __name__ == '__main__':
57 | run_demo()
58 |
--------------------------------------------------------------------------------
/proposal/extract_temporal_features.py:
--------------------------------------------------------------------------------
1 | import os
2 | import keras
3 | import models
4 | from utils import video_util
5 | import configuration as cfg
6 | import numpy as np
7 | import sklearn.preprocessing
8 |
9 | original_model = keras.models.load_model(cfg.extractor_model_weights)
10 | spatiotemporal_extractor = keras.models.Model(
11 | inputs = original_model.input,
12 | outputs = original_model.get_layer("lstm_1").output
13 | )
14 |
15 | normal_videos = os.listdir(cfg.normal_videos_path)
16 | normal_videos.sort()
17 | for i, vid_name in enumerate(normal_videos):
18 | print("Processing {} ({}/{})".format(vid_name, i+1, len(normal_videos)))
19 | vid_path = os.path.join(cfg.normal_videos_path, vid_name)
20 | feats_path = os.path.join(cfg.raw_normal_train_features, vid_name[:-9] + ".npy")
21 |
22 | clips, frames = video_util.get_video_clips(vid_path)
23 |
24 | # Remove last clip if number of frames is not equal to 16
25 | if frames % 16 != 0:
26 | clips = clips[:-1]
27 |
28 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip))
29 | for clip in clips]
30 | prep_clips = np.stack(prep_clips, axis=0)
31 |
32 | features = spatiotemporal_extractor.predict(prep_clips)
33 | features = sklearn.preprocessing.normalize(features, axis=1)
34 |
35 | with open(feats_path, "wb") as f:
36 | np.save(f, features)
37 |
38 | abnormal_videos = os.listdir(cfg.abnormal_videos_path)
39 | abnormal_videos.sort()
40 | print("Processing abnormal videos...")
41 | for i, vid_name in enumerate(abnormal_videos):
42 | print("Processing {} ({}/{})".format(vid_name, i+1, len(abnormal_videos)))
43 | vid_path = os.path.join(cfg.abnormal_videos_path, vid_name)
44 | feats_path = os.path.join(cfg.raw_abnormal_train_features, vid_name[:-9] + ".npy")
45 |
46 | clips, frames = video_util.get_video_clips(vid_path)
47 |
48 | # Remove last clip if number of frames is not equal to 16
49 | if frames % 16 != 0:
50 | clips = clips[:-1]
51 |
52 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip))
53 | for clip in clips]
54 | prep_clips = np.stack(prep_clips, axis=0)
55 |
56 | features = spatiotemporal_extractor.predict(prep_clips)
57 | features = sklearn.preprocessing.normalize(features, axis=1)
58 |
59 | with open(feats_path, "wb") as f:
60 | np.save(f, features)
61 |
62 |
63 | test_videos = os.listdir(cfg.test_set)
64 | test_videos.sort()
65 | print("Processing test videos...")
66 | for i, vid_name in enumerate(test_videos):
67 | print("Processing {} ({}/{})".format(vid_name, i+1, len(test_videos)))
68 | vid_path = os.path.join(cfg.test_set, vid_name)
69 | feats_path = os.path.join(cfg.raw_test_features, vid_name[:-9] + ".npy")
70 |
71 | clips, frames = video_util.get_video_clips(vid_path)
72 |
73 | # Remove last clip if number of frames is not equal to 16
74 | if frames % 16 != 0:
75 | clips = clips[:-1]
76 |
77 | prep_clips = [keras.applications.xception.preprocess_input(np.array(clip))
78 | for clip in clips]
79 | prep_clips = np.stack(prep_clips, axis=0)
80 |
81 | features = spatiotemporal_extractor.predict(prep_clips)
82 | features = sklearn.preprocessing.normalize(features, axis=1)
83 |
84 | with open(feats_path, "wb") as f:
85 | np.save(f, features)
86 |
--------------------------------------------------------------------------------
/proposal/models.py:
--------------------------------------------------------------------------------
1 | import keras
2 |
3 | def recurrent_feats_model():
4 |
5 | xception = keras.applications.Xception(include_top=True, weights='imagenet')
6 |
7 | extractor = keras.models.Model(inputs=xception.layers[0].input,
8 | outputs=xception.layers[-2].output)
9 | for layer in extractor.layers:
10 | layer.trainable=False
11 |
12 | input_layer = keras.layers.Input((None,299,299,3))
13 | td_layer = keras.layers.TimeDistributed(extractor)(input_layer)
14 |
15 | recurrent_layer = keras.layers.LSTM(
16 | 1024,
17 | return_sequences=False,
18 | dropout=0.6
19 | )(td_layer)
20 | linear = keras.layers.Dense(512, activation='relu')(recurrent_layer)
21 | linear = keras.layers.Dropout(0.5)(linear)
22 | linear = keras.layers.Dense(128, activation='relu')(linear)
23 | linear = keras.layers.Dropout(0.5)(linear)
24 | predictions = keras.layers.Dense(101, activation='softmax')(linear)
25 |
26 | model = keras.models.Model(inputs=input_layer, outputs=predictions)
27 | return model
28 |
--------------------------------------------------------------------------------
/proposal/parameters.py:
--------------------------------------------------------------------------------
1 | frame_height = 240
2 | frame_width = 320
3 | channels = 3
4 |
5 | frame_count = 16
6 |
7 | features_per_bag = 32
8 |
--------------------------------------------------------------------------------
/proposal/predict_test_set.py:
--------------------------------------------------------------------------------
1 | import classifier
2 | import configuration as cfg
3 | import numpy as np
4 | import os
5 |
6 | def load_test_set(videos_path, videos_list):
7 | feats = []
8 |
9 | for vid in videos_list:
10 | vid_path = os.path.join(videos_path, vid)
11 | with open(vid_path, "rb") as f:
12 | feat = np.load(f)
13 | feats.append(feat)
14 |
15 | feats = np.array(feats)
16 | return feats
17 |
18 | classifier_model = classifier.build_classifier_model()
19 |
20 | vid_list = os.listdir(cfg.processed_test_features)
21 | vid_list.sort()
22 |
23 | test_set = load_test_set(cfg.processed_test_features, vid_list)
24 |
25 | for filename, example in zip(vid_list, test_set):
26 | predictions_file = filename[:-4] + '.npy'
27 | pred_path = os.path.join(cfg.preds_folder, predictions_file)
28 | pred = classifier_model.predict_on_batch(example)
29 | with open(pred_path, "wb") as f:
30 | np.save(pred_path, pred, allow_pickle=True)
31 |
--------------------------------------------------------------------------------
/proposal/preprocess_features.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import sklearn.preprocessing
4 | import configuration as cfg
5 |
6 | def transform_into_segments(features, n_segments=32):
7 | if features.shape[0] < n_segments:
8 | raise RuntimeError("Number of prev segments lesser than expected output size")
9 |
10 | cuts = np.linspace(0, features.shape[0], n_segments, dtype=int, endpoint=False)
11 |
12 | new_feats = []
13 | for i, j in zip(cuts[:-1], cuts[1:]):
14 | new_feats.append(np.mean(features[i:j,:], axis=0))
15 |
16 | new_feats.append(np.mean(features[cuts[-1]:,:], axis=0))
17 |
18 | new_feats = np.array(new_feats)
19 | new_feats = sklearn.preprocessing.normalize(new_feats, axis=1)
20 | return new_feats
21 |
22 | for filename in os.listdir(cfg.raw_normal_train_features):
23 | print("Processing {}".format(filename))
24 | raw_file_path = os.path.join(
25 | cfg.raw_normal_train_features, filename
26 | )
27 | processed_file_path = os.path.join(
28 | cfg.processed_normal_train_features, filename
29 | )
30 |
31 | with open(raw_file_path, "rb") as f:
32 | feats = np.load(f, allow_pickle=True)
33 |
34 | try:
35 | new_feats = transform_into_segments(feats)
36 | with open(processed_file_path, "wb") as f:
37 | np.save(f, new_feats, allow_pickle=True)
38 | except RuntimeError:
39 | print("Video {} too short".format(filename))
40 |
41 | for filename in os.listdir(cfg.raw_abnormal_train_features):
42 | print("Processing {}".format(filename))
43 | raw_file_path = os.path.join(
44 | cfg.raw_abnormal_train_features, filename
45 | )
46 | processed_file_path = os.path.join(
47 | cfg.processed_abnormal_train_features, filename
48 | )
49 | with open(raw_file_path, "rb") as f:
50 | feats = np.load(f, allow_pickle=True)
51 |
52 | try:
53 | new_feats = transform_into_segments(feats)
54 | with open(processed_file_path, "wb") as f:
55 | np.save(f, new_feats, allow_pickle=True)
56 | except RuntimeError:
57 | print("Video {} too short".format(filename))
58 |
59 | for filename in os.listdir(cfg.raw_test_features):
60 | print("Processing {}".format(filename))
61 | raw_file_path = os.path.join(
62 | cfg.raw_test_features, filename
63 | )
64 | processed_file_path = os.path.join(
65 | cfg.processed_test_features, filename
66 | )
67 | with open(raw_file_path, "rb") as f:
68 | feats = np.load(f, allow_pickle=True)
69 |
70 | try:
71 | new_feats = transform_into_segments(feats)
72 | with open(processed_file_path, "wb") as f:
73 | np.save(f, new_feats, allow_pickle=True)
74 | except RuntimeError:
75 | print("Video {} too short".format(filename))
76 |
--------------------------------------------------------------------------------
/proposal/train_classifier.py:
--------------------------------------------------------------------------------
1 | from keras.models import Sequential
2 | from keras.layers import Dense, Dropout
3 | from keras.regularizers import l2
4 | from keras.optimizers import Adagrad, Adam
5 | from scipy.io import savemat
6 | from keras.models import model_from_json
7 | import os
8 | import configuration as cfg
9 |
10 | from os import listdir
11 | import numpy as np
12 | import keras.backend as K
13 | import classifier
14 |
15 | from datetime import datetime
16 |
17 | def save_model(model, json_path, weight_path):
18 | json_string = model.to_json()
19 | open(json_path, 'w').write(json_string)
20 | dict = {}
21 | i = 0
22 | for layer in model.layers:
23 | weights = layer.get_weights()
24 | my_list = np.zeros(len(weights), dtype=np.object)
25 | my_list[:] = weights
26 | dict[str(i)] = my_list
27 | i += 1
28 | savemat(weight_path, dict)
29 |
30 | def load_model(json_path):
31 | model = model_from_json(open(json_path).read())
32 | return model
33 |
34 | def load_batch_train(normal_path, normal_list, abnormal_path, abnormal_list):
35 |
36 | batchsize=60
37 | n_exp = int(batchsize/2)
38 |
39 | num_normal = len(normal_list)
40 | num_abnormal = len(abnormal_list)
41 |
42 | abnor_list_idx = np.random.permutation(num_abnormal)
43 | abnor_list = abnor_list_idx[:n_exp]
44 | norm_list_idx = np.random.permutation(num_normal)
45 | norm_list = norm_list_idx[:n_exp]
46 |
47 | abnormal_feats = []
48 | for video_idx in abnor_list:
49 | video_path = os.path.join(abnormal_path, abnormal_list[video_idx])
50 | with open(video_path, "rb") as f:
51 | feats = np.load(f)
52 | abnormal_feats.append(feats)
53 |
54 | normal_feats = []
55 | for video_idx in norm_list:
56 | video_path = os.path.join(normal_path, normal_list[video_idx])
57 | with open(video_path, "rb") as f:
58 | feats = np.load(f)
59 | normal_feats.append(feats)
60 |
61 |
62 | all_feats = np.vstack((*abnormal_feats, *normal_feats))
63 | all_labels = np.zeros(32*batchsize, dtype='uint8')
64 |
65 | all_labels[:32*n_exp] = 1
66 |
67 | return all_feats, all_labels
68 |
69 |
70 | def custom_objective(y_true, y_pred):
71 |
72 | y_true = K.reshape(y_true, [-1])
73 | y_pred = K.reshape(y_pred, [-1])
74 | n_seg = 32
75 | nvid = 60
76 | n_exp = int(nvid / 2)
77 |
78 | max_scores_list = []
79 | z_scores_list = []
80 | temporal_constrains_list = []
81 | sparsity_constrains_list = []
82 |
83 | for i in range(0, n_exp, 1):
84 |
85 | video_predictions = y_pred[i*n_seg:(i+1)*n_seg]
86 |
87 | max_scores_list.append(K.max(video_predictions))
88 | temporal_constrains_list.append(
89 | K.sum(K.pow(video_predictions[1:] - video_predictions[:-1], 2))
90 | )
91 | sparsity_constrains_list.append(K.sum(video_predictions))
92 |
93 | for j in range(n_exp, 2*n_exp, 1):
94 |
95 | video_predictions = y_pred[j*n_seg:(j+1)*n_seg]
96 | max_scores_list.append(K.max(video_predictions))
97 |
98 | max_scores = K.stack(max_scores_list)
99 | temporal_constrains = K.stack(temporal_constrains_list)
100 | sparsity_constrains = K.stack(sparsity_constrains_list)
101 |
102 | for ii in range(0, n_exp, 1):
103 | max_z = K.maximum(1 - max_scores[:n_exp] + max_scores[n_exp+ii], 0)
104 | z_scores_list.append(K.sum(max_z))
105 |
106 | z_scores = K.stack(z_scores_list)
107 | z = K.mean(z_scores)
108 |
109 | return z + \
110 | 0.00004*K.sum(temporal_constrains) + \
111 | 0.00004*K.sum(sparsity_constrains)
112 |
113 | output_dir = "trained_models/"
114 | normal_dir = cfg.processed_normal_train_features
115 | abnormal_dir = cfg.processed_abnormal_train_features
116 |
117 | normal_list = os.listdir(normal_dir)
118 | normal_list.sort()
119 | abnormal_list = os.listdir(abnormal_dir)
120 | abnormal_list.sort()
121 |
122 | weights_path = output_dir + 'weights_proposal.mat'
123 |
124 | model_path = output_dir + 'model_proposal.json'
125 |
126 | #Create Full connected Model
127 | model = classifier.classifier_model()
128 |
129 | adagrad=Adagrad(lr=0.002, epsilon=1e-07)
130 | model.compile(loss=custom_objective, optimizer=adagrad)
131 |
132 | if not os.path.exists(output_dir):
133 | os.makedirs(output_dir)
134 |
135 | loss_graph =[]
136 | num_iters = 20000
137 | total_iterations = 0
138 | batchsize=60
139 | time_before = datetime.now()
140 |
141 |
142 | for it_num in range(num_iters):
143 | inputs, targets = load_batch_train(
144 | normal_dir, normal_list, abnormal_dir, abnormal_list
145 | )
146 | batch_loss = model.train_on_batch(inputs, targets)
147 | loss_graph = np.hstack((loss_graph, batch_loss))
148 | total_iterations += 1
149 | if total_iterations % 20 == 0:
150 | print ("Iteration=" + str(total_iterations) + " took: " + str(datetime.now() - time_before) + ", with loss of " + str(batch_loss))
151 | if total_iterations % 1000 == 0:
152 | save_model(model, model_path, output_dir + "weights_proposal_{}.mat".format(total_iterations))
153 |
154 | print("Train Successful - Model saved")
155 | save_model(model, model_path, weights_path)
156 |
--------------------------------------------------------------------------------
/proposal/train_feature_extractor.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import video_data_generator
3 | import models
4 | import configuration as cfg
5 |
6 | videogen_train = video_data_generator.VideoFrameGenerator("../ucf101/train", batch_size=16)
7 | videogen_test = video_data_generator.VideoFrameGenerator("../ucf101/test", batch_size=16)
8 | model = models.recurrent_feats_model()
9 |
10 | opt = keras.optimizers.Adam(lr=1e-5, decay=1e-6)
11 | model.compile(optimizer=opt, loss='categorical_crossentropy',
12 | metrics=[
13 | keras.metrics.categorical_accuracy,
14 | keras.metrics.top_k_categorical_accuracy
15 | ])
16 |
17 | model.fit_generator(videogen_train, epochs = 500, validation_data=videogen_test,
18 | callbacks=[
19 | keras.callbacks.ModelCheckpoint(
20 | filepath="trained_models/rec_feats_weights.{epoch:03d}.h5",
21 | save_best_only=True,
22 | monitor="val_categorical_accuracy",
23 | period=20
24 | ),
25 | keras.callbacks.CSVLogger(
26 | filename="train_history.csv"
27 | )
28 | ])
29 |
30 | model.save(cfg.extractor_model_weights)
31 |
--------------------------------------------------------------------------------
/proposal/trained_models/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fluque1995/tfm-anomaly-detection/d2815b50837d78e13bc2d07fb2254b0aacc48b21/proposal/trained_models/.gitignore
--------------------------------------------------------------------------------
/proposal/utils/array_util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def sliding_window(arr, size, stride):
5 | num_chunks = int((len(arr) - size) / stride) + 2
6 | result = []
7 | for i in range(0, num_chunks * stride, stride):
8 | if len(arr[i:i + size]) > 0:
9 | result.append(arr[i:i + size])
10 | return np.array(result)
11 |
12 |
13 | def chunks(l, n):
14 | for i in range(0, len(l), n):
15 | yield l[i:i + n]
16 |
17 |
18 | def interpolate(features, features_per_bag):
19 | feature_size = np.array(features).shape[1]
20 | interpolated_features = np.zeros((features_per_bag, feature_size))
21 | interpolation_indicies = np.round(np.linspace(0, len(features) - 1, num=features_per_bag + 1))
22 | count = 0
23 | for index in range(0, len(interpolation_indicies)-1):
24 | start = int(interpolation_indicies[index])
25 | end = int(interpolation_indicies[index + 1])
26 |
27 | assert end >= start
28 |
29 | if start == end:
30 | temp_vect = features[start, :]
31 | else:
32 | temp_vect = np.mean(features[start:end+1, :], axis=0)
33 |
34 | temp_vect = temp_vect / np.linalg.norm(temp_vect)
35 |
36 | if np.linalg.norm(temp_vect) == 0:
37 | print("Error")
38 |
39 | interpolated_features[count,:]=temp_vect
40 | count = count + 1
41 |
42 | return np.array(interpolated_features)
43 |
44 |
45 | def extrapolate(outputs, num_frames):
46 | extrapolated_outputs = []
47 | extrapolation_indicies = np.round(np.linspace(0, len(outputs) - 1, num=num_frames))
48 | for index in extrapolation_indicies:
49 | extrapolated_outputs.append(outputs[int(index)])
50 | return np.array(extrapolated_outputs)
51 |
52 |
53 | def test_interpolate():
54 | test_case1 = np.random.randn(24, 2048)
55 | output_case1 = interpolate(test_case1, 32)
56 | assert output_case1.shape == (32, 2048)
57 |
58 | test_case2 = np.random.randn(32, 2048)
59 | output_case2 = interpolate(test_case2, 32)
60 | assert output_case2.shape == (32, 2048)
61 |
62 | test_case3 = np.random.randn(42, 2048)
63 | output_case3 = interpolate(test_case3, 32)
64 | assert output_case3.shape == (32, 2048)
65 |
66 |
--------------------------------------------------------------------------------
/proposal/utils/video_util.py:
--------------------------------------------------------------------------------
1 | from utils.array_util import *
2 | import parameters as params
3 | import cv2
4 |
5 |
6 | def get_video_clips(video_path):
7 | frames = get_video_frames(video_path)
8 | clips = sliding_window(frames, params.frame_count, params.frame_count)
9 | return clips, len(frames)
10 |
11 |
12 | def get_video_frames(video_path):
13 | cap = cv2.VideoCapture(video_path)
14 | frames = []
15 | while (cap.isOpened()):
16 | ret, frame = cap.read()
17 | if ret == True:
18 | frame = cv2.resize(frame, (299,299))
19 | frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
20 | else:
21 | break
22 | cap.release()
23 | return frames
24 |
--------------------------------------------------------------------------------
/proposal/utils/visualization_util.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 | import matplotlib.pyplot as plt
4 | from matplotlib.animation import FuncAnimation
5 | from utils.video_util import *
6 |
7 |
8 | def visualize_clip(clip, convert_bgr=False, save_gif=False, file_path=None):
9 | num_frames = len(clip)
10 | fig, ax = plt.subplots()
11 | fig.set_tight_layout(True)
12 |
13 | def update(i):
14 | if convert_bgr:
15 | frame = cv2.cvtColor(clip[i], cv2.COLOR_BGR2RGB)
16 | else:
17 | frame = clip[i]
18 | plt.imshow(frame)
19 | return plt
20 |
21 | # FuncAnimation will call the 'update' function for each frame; here
22 | # animating over 10 frames, with an interval of 20ms between frames.
23 | anim = FuncAnimation(fig, update, frames=np.arange(0, num_frames), interval=1)
24 | if save_gif:
25 | anim.save(file_path, dpi=80, writer='imagemagick')
26 | else:
27 | # plt.show() will just loop the animation forever.
28 | plt.show()
29 |
30 |
31 | def visualize_predictions(video_path, predictions, save_path):
32 | frames = get_video_frames(video_path)
33 | assert len(frames) == len(predictions)
34 |
35 | fig, ax = plt.subplots(figsize=(5, 5))
36 | fig.set_tight_layout(True)
37 |
38 | line = matplotlib.lines.Line2D([], [])
39 |
40 | fig_frame = plt.subplot(2, 1, 1)
41 | img = fig_frame.imshow(frames[0])
42 | fig_prediction = plt.subplot(2, 1, 2)
43 | fig_prediction.set_xlim(0, len(frames))
44 | fig_prediction.set_ylim(0, 1.15)
45 | fig_prediction.add_line(line)
46 |
47 | def update(i):
48 | frame = frames[i]
49 | x = range(0, i)
50 | y = predictions[0:i]
51 | line.set_data(x, y)
52 | img.set_data(frame)
53 | return plt
54 |
55 | # FuncAnimation will call the 'update' function for each frame; here
56 | # animating over 10 frames, with an interval of 20ms between frames.
57 |
58 | anim = FuncAnimation(fig, update, frames=np.arange(0, len(frames), 10), interval=1, repeat=False)
59 |
60 | if save_path:
61 | anim.save(save_path, dpi=200, writer='imagemagick')
62 | else:
63 | plt.show()
64 |
65 | return
66 |
67 |
68 |
--------------------------------------------------------------------------------
/proposal/video_data_generator.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import cv2 as cv
3 | import glob
4 | import numpy as np
5 | import os
6 | import random
7 |
8 | # author: Patrice Ferlet
9 | # licence: MIT
10 |
11 | class VideoFrameGenerator(keras.utils.Sequence):
12 | '''
13 | Video frame generator generates batch of frames from a video directory. Videos should be
14 | classified in classes directories. E.g:
15 | videos/class1/file1.avi
16 | videos/class1/file2.avi
17 | videos/class2/file3.avi
18 | '''
19 | def __init__(self, from_dir, batch_size=8, shape=(299, 299, 3), nbframe=16,
20 | shuffle=True, transform:keras.preprocessing.image.ImageDataGenerator=None
21 | ):
22 | """
23 | Create a Video Frame Generator with data augmentation.
24 |
25 | Usage example:
26 | gen = VideoFrameGenerator('./out/videos/',
27 | batch_size=5,
28 | nbframe=3,
29 | transform=keras.preprocessing.image.ImageDataGenerator(rotation_range=5, horizontal_flip=True))
30 |
31 | Arguments:
32 | - from_dir: path to the data directory where resides videos,
33 | videos should be splitted in directories that are name as labels
34 | - batch_size: number of videos to generate
35 | - nbframe: number of frames per video to send
36 | - shuffle: boolean, shuffle data at start and after each epoch
37 | - transform: a keras ImageGenerator configured with random transformations
38 | to apply on each frame. Each video will be processed with the same
39 | transformation at one time to not break consistence.
40 | """
41 |
42 | self.from_dir = from_dir
43 | self.nbframe = nbframe
44 | self.batch_size = batch_size
45 | self.target_shape = shape
46 | self.shuffle = shuffle
47 | self.transform = transform
48 |
49 | # the list of classes, built in __list_all_files
50 | self.classes = []
51 | self.files = []
52 | self.data = []
53 |
54 | # prepare the list
55 | self.__filecount = 0
56 | self.__list_all_files()
57 |
58 |
59 | def __len__(self):
60 | """ Length of the generator
61 | Warning: it gives the number of loop to do, not the number of files or
62 | frames. The result is number_of_video/batch_size. You can use it as
63 | `step_per_epoch` or `validation_step` for `model.fit_generator` parameters.
64 | """
65 | return self.__filecount//self.batch_size
66 |
67 | def __getitem__(self, index):
68 | """ Generator needed method - return a batch of `batch_size` video
69 | block with `self.nbframe` for each
70 | """
71 | indexes = self.data[index*self.batch_size:(index+1)*self.batch_size]
72 | X, Y = self.__data_aug(indexes)
73 | return X, Y
74 |
75 | def on_epoch_end(self):
76 | """ When epoch has finished, random shuffle images in memory """
77 | if self.shuffle:
78 | random.shuffle(self.data)
79 |
80 | def __list_all_files(self):
81 | """ List and inject images in memory """
82 | self.classes = glob.glob(os.path.join(self.from_dir, '*'))
83 | self.classes = [os.path.basename(c) for c in self.classes]
84 | self.__filecount = len(glob.glob(os.path.join(self.from_dir, '*/*')))
85 |
86 | i = 1
87 | print("Inject frames in memory, could take a while...")
88 | for classname in self.classes:
89 | files = glob.glob(os.path.join(self.from_dir, classname, '*'))
90 | for file in files:
91 | print('\rProcessing file %d/%d' % (i, self.__filecount), end='')
92 | i+=1
93 | self.__openframe(classname, file)
94 |
95 | if self.shuffle:
96 | random.shuffle(self.data)
97 |
98 |
99 | def __openframe(self, classname, file):
100 | """Append ORIGNALS frames in memory, transformations are made on the fly"""
101 | frames = []
102 | vid = cv.VideoCapture(file)
103 | while True:
104 | grabbed, frame = vid.read()
105 | if not grabbed:
106 | break
107 | frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
108 | frame = cv.resize(frame, self.target_shape[:2])
109 | frames.append(frame)
110 |
111 | step = len(frames)//self.nbframe
112 | frames = frames[::step]
113 | if len(frames) >= self.nbframe:
114 | frames = frames[:self.nbframe]
115 |
116 | # add frames in memory
117 | frames = np.array(frames, dtype=np.float32)
118 | frames = keras.applications.xception.preprocess_input(frames)
119 | if len(frames) == self.nbframe:
120 | self.data.append((classname, frames))
121 | else:
122 | print('\n%s/%s has not enought frames ==> %d' % (classname, file, len(frames)))
123 |
124 | def __data_aug(self, batch):
125 | """ Make random transformation based on ImageGenerator arguments"""
126 | T = None
127 | if self.transform:
128 | T = self.transform.get_random_transform(self.target_shape[:2])
129 |
130 | X, Y = [], []
131 | for y, images in batch:
132 | Y.append(self.classes.index(y)) # label
133 | x = []
134 | for img in images:
135 | if T:
136 | x.append(self.transform.apply_transform(img, T))
137 | else:
138 | x.append(img)
139 |
140 | X.append(x)
141 |
142 | return np.array(X), keras.utils.to_categorical(Y, num_classes=len(self.classes))
143 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Keras==2.2.4
2 | numpy==1.16.2
3 | scipy==1.2.0
4 | opencv_contrib_python==4.2.0.32
5 | pandas==1.0.5
6 | matplotlib==3.0.2
7 | scikit_learn==0.23.2
8 |
--------------------------------------------------------------------------------