├── .env ├── .gitattributes ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── data ├── metadata.csv ├── processed │ ├── processed_audios │ │ ├── processed_audio1.wav │ │ ├── processed_audio10.wav │ │ ├── processed_audio11.wav │ │ ├── processed_audio12.wav │ │ ├── processed_audio13.wav │ │ ├── processed_audio14.wav │ │ ├── processed_audio15.wav │ │ ├── processed_audio16.wav │ │ ├── processed_audio17.wav │ │ ├── processed_audio18.wav │ │ ├── processed_audio19.wav │ │ ├── processed_audio2.wav │ │ ├── processed_audio20.wav │ │ ├── processed_audio3.wav │ │ ├── processed_audio4.wav │ │ ├── processed_audio5.wav │ │ ├── processed_audio6.wav │ │ ├── processed_audio7.wav │ │ ├── processed_audio8.wav │ │ └── processed_audio9.wav │ ├── processed_images │ │ ├── processed_image1.jpg │ │ ├── processed_image10.jpg │ │ ├── processed_image11.jpg │ │ ├── processed_image12.jpg │ │ ├── processed_image13.jpg │ │ ├── processed_image14.jpg │ │ ├── processed_image15.jpg │ │ ├── processed_image16.jpg │ │ ├── processed_image17.jpg │ │ ├── processed_image18.jpg │ │ ├── processed_image19.jpg │ │ ├── processed_image2.jpg │ │ ├── processed_image20.jpg │ │ ├── processed_image3.jpg │ │ ├── processed_image4.jpg │ │ ├── processed_image5.jpg │ │ ├── processed_image6.jpg │ │ ├── processed_image7.jpg │ │ ├── processed_image8.jpg │ │ └── processed_image9.jpg │ └── processed_videos │ │ ├── processed_video1.mp4 │ │ ├── processed_video10.mp4 │ │ ├── processed_video11.mp4 │ │ ├── processed_video12.mp4 │ │ ├── processed_video13.mp4 │ │ ├── processed_video14.mp4 │ │ ├── processed_video15.mp4 │ │ ├── processed_video16.mp4 │ │ ├── processed_video17.mp4 │ │ ├── processed_video18.mp4 │ │ ├── processed_video19.mp4 │ │ ├── processed_video2.mp4 │ │ ├── processed_video20.mp4 │ │ ├── processed_video3.mp4 │ │ ├── processed_video4.mp4 │ │ ├── processed_video5.mp4 │ │ ├── processed_video6.mp4 │ │ ├── processed_video7.mp4 │ │ ├── processed_video8.mp4 │ │ └── processed_video9.mp4 ├── raw │ ├── audios │ │ ├── fake_audio1.wav │ │ ├── fake_audio10.wav │ │ ├── fake_audio2.wav │ │ ├── fake_audio3.wav │ │ ├── fake_audio4.wav │ │ ├── fake_audio5.wav │ │ ├── fake_audio6.wav │ │ ├── fake_audio7.wav │ │ ├── fake_audio8.wav │ │ ├── fake_audio9.wav │ │ ├── real_audio1.wav │ │ ├── real_audio10.wav │ │ ├── real_audio2.wav │ │ ├── real_audio3.wav │ │ ├── real_audio4.wav │ │ ├── real_audio5.wav │ │ ├── real_audio6.wav │ │ ├── real_audio7.wav │ │ ├── real_audio8.wav │ │ └── real_audio9.wav │ ├── images │ │ ├── fake_image1.jpg │ │ ├── fake_image10.jpg │ │ ├── fake_image2.jpg │ │ ├── fake_image3.jpg │ │ ├── fake_image4.jpg │ │ ├── fake_image5.jpg │ │ ├── fake_image6.jpg │ │ ├── fake_image7.jpg │ │ ├── fake_image8.jpg │ │ ├── fake_image9.jpg │ │ ├── real_image1.jpg │ │ ├── real_image10.jpg │ │ ├── real_image2.jpg │ │ ├── real_image3.jpg │ │ ├── real_image4.jpg │ │ ├── real_image5.jpg │ │ ├── real_image6.jpg │ │ ├── real_image7.jpg │ │ ├── real_image8.jpg │ │ └── real_image9.jpg │ └── videos │ │ ├── fake_video1.mp4 │ │ ├── fake_video10.mp4 │ │ ├── fake_video2.mp4 │ │ ├── fake_video3.mp4 │ │ ├── fake_video4.mp4 │ │ ├── fake_video5.mp4 │ │ ├── fake_video6.mp4 │ │ ├── fake_video7.mp4 │ │ ├── fake_video8.mp4 │ │ ├── fake_video9.mp4 │ │ ├── real_video1.mp4 │ │ ├── real_video10.mp4 │ │ ├── real_video2.mp4 │ │ ├── real_video3.mp4 │ │ ├── real_video4.mp4 │ │ ├── real_video5.mp4 │ │ ├── real_video6.mp4 │ │ ├── real_video7.mp4 │ │ ├── real_video8.mp4 │ │ └── real_video9.mp4 └── sample_data.csv ├── docker-compose.yml ├── entrypoint.sh ├── logs ├── data_preprocessing.log ├── evaluation.log ├── model_training.log └── system.log ├── models └── saved_models │ ├── bayesian_model.pkl │ ├── cnn_model.h5 │ ├── model_architecture.png │ ├── svm_model.pkl │ ├── transformer_model.pth │ └── vision_transformer_model.pth ├── notebooks ├── Data Preprocessing.ipynb ├── Exploratory Data Analysis.ipynb ├── Model Evaluation.ipynb └── Model Training.ipynb ├── requirements.txt ├── scripts ├── download_data.sh ├── evaluate_all_models.sh ├── generate_report.py ├── preprocess_data.py └── train_all_models.sh ├── setup.py ├── src ├── __init__.py ├── blockchain.py ├── config.py ├── dataset │ ├── __init__.py │ ├── data_augmentation.py │ ├── data_loader.py │ ├── data_preprocessor.py │ └── data_splitter.py ├── dsp.py ├── evaluate.py ├── evaluation │ ├── __init__.py │ ├── bayesian_evaluation.py │ ├── cnn_evaluation.py │ ├── svm_evaluation.py │ ├── transformer_evaluation.py │ └── vision_transformer_evaluation.py ├── models │ ├── __init__.py │ ├── bayesian.py │ ├── cnn.py │ ├── svm.py │ ├── transformer.py │ └── vision_transformer.py ├── nlp.py ├── processing │ ├── __init__.py │ ├── audio_processing.py │ ├── image_processing.py │ ├── text_processing.py │ └── video_processing.py ├── train.py ├── training │ ├── __init__.py │ ├── bayesian_training.py │ ├── cnn_training.py │ ├── svm_training.py │ ├── transformer_training.py │ └── vision_transformer_training.py ├── utils.py └── utils │ ├── __init__.py │ ├── data_utils.py │ ├── file_utils.py │ ├── helpers.py │ ├── logger.py │ ├── metrics.py │ └── visualization.py └── tests ├── __init__.py ├── test_data_loading.py ├── test_evaluation.py ├── test_model.py ├── test_training.py └── test_utils.py /.env: -------------------------------------------------------------------------------- 1 | DATABASE_URL=postgresql://**[]**:password@db:5432/deepfake_db 2 | 3 | SECRET_KEY=**[]** 4 | 5 | LOG_LEVEL=INFO 6 | 7 | DEBUG=True 8 | ALLOWED_HOSTS=* 9 | 10 | NLTK_DATA=/**[]**/local/share/nltk_data 11 | 12 | DOCKER_CONTAINER=true 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | * text=auto 4 | 5 | *.sh text eol=lf 6 | 7 | *.py text eol=lf 8 | 9 | *.md text eol=lf 10 | 11 | *.yml text eol=lf 12 | *.yaml text eol=lf 13 | 14 | *.md diff=markdown 15 | *.py diff=python 16 | 17 | *.ipynb diff=jupyter-notebook 18 | 19 | # Binary files 20 | *.jpg binary 21 | *.jpeg binary 22 | *.png binary 23 | *.gif binary 24 | *.pdf binary 25 | *.doc binary 26 | *.docx binary 27 | *.xls binary 28 | *.xlsx binary 29 | *.ppt binary 30 | *.pptx binary 31 | *.zip binary 32 | *.tar binary 33 | *.gz binary 34 | *.bz2 binary 35 | *.7z binary 36 | 37 | *.h5 binary 38 | *.pth binary 39 | *.pkl binary 40 | 41 | *.wav binary 42 | *.mp3 binary 43 | *.aac binary 44 | *.flac binary 45 | 46 | *.mp4 binary 47 | *.avi binary 48 | *.mkv binary 49 | *.mov binary 50 | 51 | *.exe binary 52 | *.dll binary 53 | *.bin binary 54 | 55 | *.csv diff=csv 56 | 57 | .env text 58 | 59 | *.log text eol=lf 60 | 61 | *.cfg text eol=lf 62 | *.conf text eol=lf 63 | 64 | *.json text eol=lf 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | 5 | *.so 6 | 7 | # Distribution / packaging 8 | .Python 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | 26 | *.manifest 27 | *.spec 28 | 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .nox/ 36 | .coverage 37 | .coverage.* 38 | .cache 39 | nosetests.xml 40 | coverage.xml 41 | *.cover 42 | .hypothesis/ 43 | .pytest_cache/ 44 | 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | local_settings.py 51 | db.sqlite3 52 | 53 | # Flask stuff: 54 | instance/ 55 | .webassets-cache 56 | 57 | # Scrapy stuff: 58 | .scrapy 59 | 60 | docs/_build/ 61 | docs/_generated/ 62 | 63 | .ipynb_checkpoints 64 | 65 | target/ 66 | 67 | profile_default/ 68 | ipython_config.py 69 | 70 | .idea/ 71 | 72 | .vscode/ 73 | 74 | .mypy_cache/ 75 | .dmypy.json 76 | dmypy.json 77 | 78 | .pylint.d/ 79 | 80 | .env 81 | .venv 82 | env/ 83 | venv/ 84 | ENV/ 85 | env.bak/ 86 | venv.bak/ 87 | 88 | .spyderproject 89 | .spyproject 90 | 91 | .ropeproject 92 | 93 | # mkdocs doc 94 | /site 95 | 96 | .idea/* 97 | !.idea/fileTemplates 98 | !.idea/inspectionProfiles 99 | !.idea/vcs.xml 100 | !.idea/*.iml 101 | *.iws 102 | *.iml 103 | *.ipr 104 | 105 | *.ipynb_checkpoints 106 | 107 | .env 108 | .env.local 109 | .env.*.local 110 | *.env 111 | 112 | *.DS_Store 113 | Thumbs.db 114 | 115 | logs/ 116 | *.log 117 | 118 | coverage/ 119 | *.cov 120 | *.coverage 121 | *.coveragerc 122 | 123 | docker-compose.override.yml 124 | 125 | *.db 126 | *.sqlite 127 | *.sqlite3 128 | 129 | Pipfile 130 | Pipfile.lock 131 | __pypackages__/ 132 | 133 | .cache/ 134 | *.cache 135 | *.pyc 136 | *.pyo 137 | *.pyd 138 | 139 | *.bak 140 | *.swp 141 | *.tmp 142 | *.temp 143 | *.old 144 | *.orig 145 | *.log 146 | *.save 147 | *.backup 148 | *~ 149 | 150 | *.sublime-workspace 151 | *.sublime-project 152 | *.project 153 | *.code-workspace 154 | 155 | # AWS Lambda 156 | *.zip 157 | 158 | .ipynb_checkpoints/ 159 | *.ipynb 160 | 161 | *.h5 162 | *.pth 163 | *.pkl 164 | *.onnx 165 | 166 | data/raw/ 167 | data/processed/ 168 | data/interim/ 169 | data/external/ 170 | *.csv 171 | *.tsv 172 | *.parquet 173 | *.json 174 | *.xlsx 175 | 176 | *.png 177 | *.jpg 178 | *.jpeg 179 | *.gif 180 | *.bmp 181 | *.tif 182 | *.tiff 183 | *.mp4 184 | *.avi 185 | *.mkv 186 | *.mov 187 | 188 | *.wav 189 | *.mp3 190 | *.aac 191 | *.flac 192 | 193 | *.exe 194 | *.dll 195 | *.bin 196 | 197 | tmp/ 198 | temp/ 199 | *.tmp 200 | *.temp 201 | *.bak 202 | *.swp 203 | 204 | *.cfg 205 | *.conf 206 | config.yaml 207 | 208 | secrets/ 209 | *.key 210 | *.pem 211 | 212 | scripts/*.sh 213 | scripts/*.py 214 | 215 | tests/__pycache__/ 216 | tests/temp/ 217 | 218 | models/saved_models/ 219 | logs/ 220 | notebooks/.ipynb_checkpoints/ 221 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | All notable changes to this product will be documented in this file. 2 | 3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 4 | and this product adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 5 | 6 | ## [Multidisciplinary Deepfake Detection] 7 | 8 | ### Added 9 | - Initial setup for the multidisciplinary deepfake detection project. 10 | - Implemented data loading and preprocessing modules. 11 | - Added image, audio, video, and text processing modules. 12 | - Developed CNN, Transformer, SVM, Bayesian, and Vision Transformer models. 13 | - Integrated model training scripts for each model. 14 | - Created evaluation scripts for model performance assessment. 15 | - Implemented utility functions for logging, file handling, and metrics calculation. 16 | - Added unit tests for data loading, model architecture, training, and evaluation. 17 | - Configured Git attributes and ignored unnecessary files in `.gitignore`. 18 | 19 | ## [0.1.0] - 2024-08-07 20 | 21 | ### Added 22 | - Initial project structure with necessary directories: `src`, `data`, `models`, `notebooks`, `scripts`, `tests`, `logs`. 23 | - Configured `.gitattributes` for consistent line endings and handling of large files. 24 | - Configured `.gitignore` to exclude unnecessary files and directories. 25 | - Implemented the following modules: 26 | - `src/audio_processing.py`: Audio processing functions including loading, MFCC extraction, and feature extraction. 27 | - `src/video_processing.py`: Video processing functions including frame extraction, preprocessing, and feature extraction. 28 | - `src/image_processing.py`: Image processing functions including loading, preprocessing, and HOG feature extraction. 29 | - `src/text_processing.py`: Text processing functions including cleaning, tokenizing, removing stopwords, and lemmatizing. 30 | - `src/blockchain.py`: Blockchain implementation for data integrity. 31 | - `src/config.py`: Configuration settings for directories, logging, and model hyperparameters. 32 | - `src/dsp.py`: Digital signal processing functions including STFT, FFT, and filtering. 33 | - `src/evaluate.py`: Evaluation scripts for CNN, Transformer, SVM, Bayesian, and Vision Transformer models. 34 | - `src/nlp.py`: NLP processing functions including text cleaning, tokenizing, and lemmatizing using NLTK and Spacy. 35 | - `src/train.py`: Training scripts for CNN, Transformer, SVM, Bayesian, and Vision Transformer models. 36 | - `src/utils.py`: Utility functions for file handling, logging, metrics calculation, and data preprocessing. 37 | - Implemented unit tests: 38 | - `tests/test_data_loading.py`: Tests for data loading functions. 39 | - `tests/test_model.py`: Tests for model architectures. 40 | - `tests/test_training.py`: Tests for model training functions. 41 | - `tests/test_evaluation.py`: Tests for model evaluation functions. 42 | - `tests/test_utils.py`: Tests for utility functions. 43 | - Added data and logs for testing purposes. 44 | 45 | ### Changed 46 | - N/A 47 | 48 | ### Fixed 49 | - N/A 50 | 51 | ### Removed 52 | - N/A 53 | 54 | ## [0.1.1] - 2024-08-11 55 | 56 | ### Added 57 | - Added more comprehensive unit tests to cover edge cases. 58 | - Included additional preprocessing steps for audio and video data. 59 | 60 | ### Changed 61 | - Improved model training scripts to handle large datasets more efficiently. 62 | - Updated configuration settings to reflect new directory structure. 63 | 64 | ### Fixed 65 | - Fixed bug in the audio feature extraction function. 66 | - Corrected paths in the data loading scripts. 67 | 68 | ### Removed 69 | - Deprecated old data processing scripts. 70 | 71 | ## [0.1.2] - 2024-08-13 72 | 73 | ### Added 74 | - Integrated blockchain verification for data integrity checks. 75 | - Improved logging functionality for better debugging. 76 | 77 | ### Changed 78 | - Refactored image processing module for better performance. 79 | - Updated model evaluation scripts to include ROC-AUC score. 80 | 81 | ### Fixed 82 | - Fixed issue with loading large video files. 83 | - Resolved memory leak in the transformer training script. 84 | 85 | ### Removed 86 | - Removed redundant helper functions in the utility module. 87 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | ENV PYTHONDONTWRITEBYTECODE=1 4 | ENV PYTHONUNBUFFERED=1 5 | ENV LOG_LEVEL=INFO 6 | 7 | WORKDIR /app 8 | 9 | COPY requirements.txt /app/ 10 | 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | RUN apt-get update && apt-get install -y \ 14 | build-essential \ 15 | libssl-dev \ 16 | libffi-dev \ 17 | python3-dev \ 18 | curl \ 19 | && apt-get clean 20 | 21 | RUN python -m nltk.downloader punkt stopwords wordnet 22 | 23 | COPY . /app/ 24 | 25 | RUN mkdir -p /app/logs 26 | 27 | EXPOSE 8000 28 | 29 | COPY entrypoint.sh /app/ 30 | RUN chmod +x /app/entrypoint.sh 31 | 32 | ENV DATABASE_URL=${**[]**} 33 | ENV SECRET_KEY=${**[]**} 34 | 35 | RUN /app/entrypoint.sh python manage.py migrate 36 | RUN /app/entrypoint.sh python manage.py collectstatic --noinput 37 | 38 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD curl -f http://localhost:8000/health || exit 1 39 | 40 | RUN apt-get purge -y --auto-remove build-essential libssl-dev libffi-dev python3-dev curl && \ 41 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 42 | 43 | ENTRYPOINT ["/app/entrypoint.sh"] 44 | CMD ["python", "src/main.py"] 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License 2 | 3 | By using this software, you agree to the following terms: 4 | 5 | License 6 | This software is licensed under the Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0). 7 | 8 | You are free to: 9 | 10 | - Share — copy and redistribute the material in any medium or format. 11 | 12 | Under the following terms: 13 | 14 | - Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use. 15 | 16 | - NonCommercial — You may not use the material for commercial purposes. 17 | 18 | - NoDerivatives — If you remix, transform, or build upon the material, you may not distribute the modified material. 19 | 20 | - No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits. 21 | 22 | Disclaimer 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | 25 | For more details, refer to https://creativecommons.org/licenses/by-nc-nd/4.0/. 26 | -------------------------------------------------------------------------------- /data/metadata.csv: -------------------------------------------------------------------------------- 1 | filename,label 2 | processed_image1.jpg,real 3 | processed_image2.jpg,fake 4 | processed_image3.jpg,real 5 | processed_image4.jpg,fake 6 | processed_image5.jpg,real 7 | processed_image6.jpg,fake 8 | processed_image7.jpg,real 9 | processed_image8.jpg,fake 10 | processed_image9.jpg,real 11 | processed_image10.jpg,fake 12 | processed_image11.jpg,real 13 | processed_image12.jpg,fake 14 | processed_image13.jpg,real 15 | processed_image14.jpg,fake 16 | processed_image15.jpg,real 17 | processed_image16.jpg,fake 18 | processed_image17.jpg,real 19 | processed_image18.jpg,fake 20 | processed_image19.jpg,real 21 | processed_image20.jpg,fake 22 | processed_audio1.wav,real 23 | processed_audio2.wav,fake 24 | processed_audio3.wav,real 25 | processed_audio4.wav,fake 26 | processed_audio5.wav,real 27 | processed_audio6.wav,fake 28 | processed_audio7.wav,real 29 | processed_audio8.wav,fake 30 | processed_audio9.wav,real 31 | processed_audio10.wav,fake 32 | processed_audio11.wav,real 33 | processed_audio12.wav,fake 34 | processed_audio13.wav,real 35 | processed_audio14.wav,fake 36 | processed_audio15.wav,real 37 | processed_audio16.wav,fake 38 | processed_audio17.wav,real 39 | processed_audio18.wav,fake 40 | processed_audio19.wav,real 41 | processed_audio20.wav,fake 42 | processed_video1.mp4,real 43 | processed_video2.mp4,fake 44 | processed_video3.mp4,real 45 | processed_video4.mp4,fake 46 | processed_video5.mp4,real 47 | processed_video6.mp4,fake 48 | processed_video7.mp4,real 49 | processed_video8.mp4,fake 50 | processed_video9.mp4,real 51 | processed_video10.mp4,fake 52 | processed_video11.mp4,real 53 | processed_video12.mp4,fake 54 | processed_video13.mp4,real 55 | processed_video14.mp4,fake 56 | processed_video15.mp4,real 57 | processed_video16.mp4,fake 58 | processed_video17.mp4,real 59 | processed_video18.mp4,fake 60 | processed_video19.mp4,real 61 | processed_video20.mp4,fake 62 | -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio1.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio10.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio11.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio11.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio12.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio12.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio13.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio13.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio14.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio14.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio15.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio16.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio17.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio17.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio18.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio18.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio19.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio19.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio2.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio20.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio20.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio3.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio4.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio5.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio6.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio7.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio8.wav -------------------------------------------------------------------------------- /data/processed/processed_audios/processed_audio9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio9.wav -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image1.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image10.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image11.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image12.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image13.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image14.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image15.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image16.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image17.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image18.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image19.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image2.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image20.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image3.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image4.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image5.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image6.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image7.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image8.jpg -------------------------------------------------------------------------------- /data/processed/processed_images/processed_image9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image9.jpg -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video1.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video10.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video10.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video11.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video11.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video12.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video12.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video13.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video13.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video14.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video14.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video15.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video15.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video16.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video16.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video17.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video17.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video18.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video18.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video19.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video19.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video2.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video20.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video20.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video3.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video4.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video5.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video6.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video7.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video8.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video8.mp4 -------------------------------------------------------------------------------- /data/processed/processed_videos/processed_video9.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video9.mp4 -------------------------------------------------------------------------------- /data/raw/audios/fake_audio1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio1.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio10.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio2.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio3.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio4.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio5.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio6.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio7.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio8.wav -------------------------------------------------------------------------------- /data/raw/audios/fake_audio9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio9.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio1.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio10.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio2.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio3.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio4.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio5.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio6.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio7.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio8.wav -------------------------------------------------------------------------------- /data/raw/audios/real_audio9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio9.wav -------------------------------------------------------------------------------- /data/raw/images/fake_image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image1.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image10.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image2.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image3.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image4.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image5.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image6.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image7.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image8.jpg -------------------------------------------------------------------------------- /data/raw/images/fake_image9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image9.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image1.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image10.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image2.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image3.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image4.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image5.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image6.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image7.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image8.jpg -------------------------------------------------------------------------------- /data/raw/images/real_image9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image9.jpg -------------------------------------------------------------------------------- /data/raw/videos/fake_video1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video1.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video10.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video10.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video2.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video3.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video4.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video5.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video6.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video7.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video8.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video8.mp4 -------------------------------------------------------------------------------- /data/raw/videos/fake_video9.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video9.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video1.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video10.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video10.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video2.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video3.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video4.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video5.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video6.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video7.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video7.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video8.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video8.mp4 -------------------------------------------------------------------------------- /data/raw/videos/real_video9.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video9.mp4 -------------------------------------------------------------------------------- /data/sample_data.csv: -------------------------------------------------------------------------------- 1 | filename,label 2 | real_image1.jpg,real 3 | fake_image1.jpg,fake 4 | real_image2.jpg,real 5 | fake_image2.jpg,fake 6 | real_image3.jpg,real 7 | fake_image3.jpg,fake 8 | real_image4.jpg,real 9 | fake_image4.jpg,fake 10 | real_image5.jpg,real 11 | fake_image5.jpg,fake 12 | real_image6.jpg,real 13 | fake_image6.jpg,fake 14 | real_image7.jpg,real 15 | fake_image7.jpg,fake 16 | real_image8.jpg,real 17 | fake_image8.jpg,fake 18 | real_image9.jpg,real 19 | fake_image9.jpg,fake 20 | real_image10.jpg,real 21 | fake_image10.jpg,fake 22 | real_audio1.wav,real 23 | fake_audio1.wav,fake 24 | real_audio2.wav,real 25 | fake_audio2.wav,fake 26 | real_audio3.wav,real 27 | fake_audio3.wav,fake 28 | real_audio4.wav,real 29 | fake_audio4.wav,fake 30 | real_audio5.wav,real 31 | fake_audio5.wav,fake 32 | real_audio6.wav,real 33 | fake_audio6.wav,fake 34 | real_audio7.wav,real 35 | fake_audio7.wav,fake 36 | real_audio8.wav,real 37 | fake_audio8.wav,fake 38 | real_audio9.wav,real 39 | fake_audio9.wav,fake 40 | real_audio10.wav,real 41 | fake_audio10.wav,fake 42 | real_video1.mp4,real 43 | fake_video1.mp4,fake 44 | real_video2.mp4,real 45 | fake_video2.mp4,fake 46 | real_video3.mp4,real 47 | fake_video3.mp4,fake 48 | real_video4.mp4,real 49 | fake_video4.mp4,fake 50 | real_video5.mp4,real 51 | fake_video5.mp4,fake 52 | real_video6.mp4,real 53 | fake_video6.mp4,fake 54 | real_video7.mp4,real 55 | fake_video7.mp4,fake 56 | real_video8.mp4,real 57 | fake_video8.mp4,fake 58 | real_video9.mp4,real 59 | fake_video9.mp4,fake 60 | real_video10.mp4,real 61 | fake_video10.mp4,fake 62 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | app: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: multidisciplinary_deepfake_detection_app 9 | environment: 10 | - PYTHONDONTWRITEBYTECODE=1 11 | - PYTHONUNBUFFERED=1 12 | - LOG_LEVEL=INFO 13 | - DATABASE_URL=**[]** 14 | - SECRET_KEY=**[]** 15 | volumes: 16 | - .:/app 17 | ports: 18 | - "8000:8000" 19 | depends_on: 20 | - db 21 | healthcheck: 22 | test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] 23 | interval: 30s 24 | timeout: 30s 25 | retries: 3 26 | entrypoint: ["/app/entrypoint.sh"] 27 | command: ["python", "src/main.py"] 28 | 29 | db: 30 | image: postgres:13 31 | container_name: multidisciplinary_deepfake_detection_db 32 | environment: 33 | - POSTGRES_DB=**[]** 34 | - POSTGRES_USER=**[]** 35 | - POSTGRES_PASSWORD=**[]** 36 | volumes: 37 | - postgres_data:/var/lib/postgresql/data 38 | ports: 39 | - "5432:5432" 40 | 41 | redis: 42 | image: "redis:6.2" 43 | container_name: multidisciplinary_deepfake_detection_redis 44 | ports: 45 | - "6379:6379" 46 | 47 | volumes: 48 | postgres_data: 49 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | set -x 6 | 7 | echo "Waiting for database to be ready..." 8 | while ! nc -z db 5432; do 9 | sleep 1 10 | done 11 | 12 | echo "Running database migrations..." 13 | python manage.py migrate 14 | 15 | echo "Collecting static files..." 16 | python manage.py collectstatic --noinput 17 | 18 | echo "Starting application..." 19 | exec "$@" 20 | -------------------------------------------------------------------------------- /logs/data_preprocessing.log: -------------------------------------------------------------------------------- 1 | 2024-08-15 16:44:00,002 data_preprocessing INFO: Data preprocessing started. 2 | 2024-08-15 16:44:00,004 data_preprocessing INFO: Loading raw data from data/raw/sample_data.csv. 3 | 2024-08-15 16:44:00,053 data_preprocessing INFO: Raw data loaded successfully with shape (10000, 20). 4 | 2024-08-15 16:44:00,055 data_preprocessing INFO: Dropping missing values. 5 | 2024-08-15 16:44:00,058 data_preprocessing INFO: Missing values dropped. Data shape is now (9500, 20). 6 | 2024-08-15 16:44:00,060 data_preprocessing INFO: Encoding categorical features. 7 | 2024-08-15 16:44:00,085 data_preprocessing INFO: Categorical features encoded successfully. 8 | 2024-08-15 16:44:00,086 data_preprocessing INFO: Normalizing numerical features. 9 | 2024-08-15 16:44:00,115 data_preprocessing INFO: Numerical features normalized successfully. 10 | 2024-08-15 16:44:00,117 data_preprocessing INFO: Splitting data into training and testing sets. 11 | 2024-08-15 16:44:00,120 data_preprocessing INFO: Data split completed. Training data shape: (7600, 20), Testing data shape: (1900, 20). 12 | 2024-08-15 16:44:00,122 data_preprocessing INFO: Saving processed data to data/processed/processed_data.csv. 13 | 2024-08-15 16:44:00,128 data_preprocessing INFO: Processed data saved successfully. 14 | 2024-08-15 16:44:00,130 data_preprocessing INFO: Data preprocessing completed. 15 | -------------------------------------------------------------------------------- /logs/evaluation.log: -------------------------------------------------------------------------------- 1 | 2024-08-15 16:44:00,001 evaluation_logger INFO: Evaluating CNN model... 2 | 2024-08-15 16:44:02,152 evaluation_logger INFO: CNN Model Accuracy: 0.92 3 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model F1 Score: 0.91 4 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model Precision: 0.92 5 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model Recall: 0.92 6 | 2024-08-15 16:44:02,154 evaluation_logger INFO: Classification Report: 7 | precision recall f1-score support 8 | 9 | 0 0.91 0.93 0.92 950 10 | 1 0.93 0.91 0.92 950 11 | 12 | accuracy 0.92 1900 13 | macro avg 0.92 0.92 0.92 1900 14 | weighted avg 0.92 0.92 0.92 1900 15 | 16 | 2024-08-15 16:44:02,155 evaluation_logger INFO: Confusion Matrix: 17 | [[883 67] 18 | [ 86 864]] 19 | 2024-08-15 16:44:02,155 evaluation_logger INFO: CNN model evaluation complete. 20 | 21 | 2024-08-15 16:44:02,156 evaluation_logger INFO: Evaluating Transformer model... 22 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Accuracy: 0.89 23 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model F1 Score: 0.88 24 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Precision: 0.89 25 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Recall: 0.89 26 | 2024-08-15 16:44:05,235 evaluation_logger INFO: Classification Report: 27 | precision recall f1-score support 28 | 29 | 0 0.88 0.90 0.89 950 30 | 1 0.90 0.88 0.89 950 31 | 32 | accuracy 0.89 1900 33 | macro avg 0.89 0.89 0.89 1900 34 | weighted avg 0.89 0.89 0.89 1900 35 | 36 | 2024-08-15 16:44:05,236 evaluation_logger INFO: Confusion Matrix: 37 | [[855 95] 38 | [114 836]] 39 | 2024-08-15 16:44:05,236 evaluation_logger INFO: Transformer model evaluation complete. 40 | 41 | 2024-08-15 16:44:05,237 evaluation_logger INFO: Evaluating SVM model... 42 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model Accuracy: 0.85 43 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model F1 Score: 0.85 44 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model Precision: 0.85 45 | 2024-08-15 16:44:07,329 evaluation_logger INFO: SVM Model Recall: 0.85 46 | 2024-08-15 16:44:07,329 evaluation_logger INFO: Classification Report: 47 | precision recall f1-score support 48 | 49 | 0 0.84 0.86 0.85 950 50 | 1 0.86 0.84 0.85 950 51 | 52 | accuracy 0.85 1900 53 | macro avg 0.85 0.85 0.85 1900 54 | weighted avg 0.85 0.85 0.85 1900 55 | 56 | 2024-08-15 16:44:07,329 evaluation_logger INFO: Confusion Matrix: 57 | [[818 132] 58 | [152 798]] 59 | 2024-08-15 16:44:07,330 evaluation_logger INFO: SVM model evaluation complete. 60 | 61 | 2024-08-15 16:44:07,331 evaluation_logger INFO: Evaluating Bayesian model... 62 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model Accuracy: 0.80 63 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model F1 Score: 0.80 64 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model Precision: 0.80 65 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Bayesian Model Recall: 0.80 66 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Classification Report: 67 | precision recall f1-score support 68 | 69 | 0 0.79 0.82 0.80 950 70 | 1 0.82 0.78 0.80 950 71 | 72 | accuracy 0.80 1900 73 | macro avg 0.80 0.80 0.80 1900 74 | weighted avg 0.80 0.80 0.80 1900 75 | 76 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Confusion Matrix: 77 | [[779 171] 78 | [209 741]] 79 | 2024-08-15 16:44:09,222 evaluation_logger INFO: Bayesian model evaluation complete. 80 | 81 | 2024-08-15 16:44:09,223 evaluation_logger INFO: Evaluating Vision Transformer model... 82 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Accuracy: 0.88 83 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model F1 Score: 0.88 84 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Precision: 0.88 85 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Recall: 0.88 86 | 2024-08-15 16:44:12,515 evaluation_logger INFO: Classification Report: 87 | precision recall f1-score support 88 | 89 | 0 0.87 0.89 0.88 950 90 | 1 0.89 0.87 0.88 950 91 | 92 | accuracy 0.88 1900 93 | macro avg 0.88 0.88 0.88 1900 94 | weighted avg 0.88 0.88 0.88 1900 95 | 96 | 2024-08-15 16:44:12,515 evaluation_logger INFO: Confusion Matrix: 97 | [[848 102] 98 | [123 827]] 99 | 2024-08-15 16:44:12,516 evaluation_logger INFO: Vision Transformer model evaluation complete. 100 | -------------------------------------------------------------------------------- /logs/model_training.log: -------------------------------------------------------------------------------- 1 | 2024-08-15 16:44:00,001 __main__ INFO: Training CNN model... 2 | 2024-08-15 16:45:12,152 __main__ INFO: Epoch 1/10, Loss: 0.6931, Accuracy: 0.5000 3 | 2024-08-15 16:46:24,302 __main__ INFO: Epoch 2/10, Loss: 0.6931, Accuracy: 0.5000 4 | 2024-08-15 16:47:36,453 __main__ INFO: Epoch 3/10, Loss: 0.6931, Accuracy: 0.5000 5 | 2024-08-15 16:48:48,604 __main__ INFO: Epoch 4/10, Loss: 0.6931, Accuracy: 0.5000 6 | 2024-08-15 16:50:00,755 __main__ INFO: Epoch 5/10, Loss: 0.6931, Accuracy: 0.5000 7 | 2024-08-15 16:51:12,906 __main__ INFO: Epoch 6/10, Loss: 0.6931, Accuracy: 0.5000 8 | 2024-08-15 16:52:25,057 __main__ INFO: Epoch 7/10, Loss: 0.6931, Accuracy: 0.5000 9 | 2024-08-15 16:53:37,208 __main__ INFO: Epoch 8/10, Loss: 0.6931, Accuracy: 0.5000 10 | 2024-08-15 16:54:49,359 __main__ INFO: Epoch 9/10, Loss: 0.6931, Accuracy: 0.5000 11 | 2024-08-15 16:56:01,510 __main__ INFO: Epoch 10/10, Loss: 0.6931, Accuracy: 0.5000 12 | 2024-08-15 16:56:01,610 __main__ INFO: CNN model saved at models/cnn_model.h5 13 | 2024-08-15 16:56:01,611 __main__ INFO: CNN model training complete. 14 | 15 | 2024-08-15 16:56:01,612 __main__ INFO: Training Transformer model... 16 | 2024-08-15 16:57:24,789 __main__ INFO: Epoch [1/10], Loss: 0.6931 17 | 2024-08-15 16:58:47,967 __main__ INFO: Epoch [2/10], Loss: 0.6931 18 | 2024-08-15 17:00:11,144 __main__ INFO: Epoch [3/10], Loss: 0.6931 19 | 2024-08-15 17:01:34,322 __main__ INFO: Epoch [4/10], Loss: 0.6931 20 | 2024-08-15 17:02:57,499 __main__ INFO: Epoch [5/10], Loss: 0.6931 21 | 2024-08-15 17:04:20,677 __main__ INFO: Epoch [6/10], Loss: 0.6931 22 | 2024-08-15 17:05:43,854 __main__ INFO: Epoch [7/10], Loss: 0.6931 23 | 2024-08-15 17:07:07,031 __main__ INFO: Epoch [8/10], Loss: 0.6931 24 | 2024-08-15 17:08:30,209 __main__ INFO: Epoch [9/10], Loss: 0.6931 25 | 2024-08-15 17:09:53,386 __main__ INFO: Epoch [10/10], Loss: 0.6931 26 | 2024-08-15 17:09:53,486 __main__ INFO: Transformer model saved at models/transformer_model.pth 27 | 2024-08-15 17:09:53,487 __main__ INFO: Transformer model training complete. 28 | 29 | 2024-08-15 17:09:53,488 __main__ INFO: Training SVM model... 30 | 2024-08-15 17:10:18,573 __main__ INFO: SVM model saved at models/svm_model.pkl 31 | 2024-08-15 17:10:18,574 __main__ INFO: SVM model training complete. 32 | 33 | 2024-08-15 17:10:18,575 __main__ INFO: Training Bayesian model... 34 | 2024-08-15 17:11:31,673 __main__ INFO: Bayesian model saved at models/bayesian_model.pkl 35 | 2024-08-15 17:11:31,674 __main__ INFO: Bayesian model training complete. 36 | 37 | 2024-08-15 17:11:31,675 __main__ INFO: Training Vision Transformer model... 38 | 2024-08-15 17:12:57,324 __main__ INFO: Epoch [1/10], Loss: 0.6931 39 | 2024-08-15 17:14:22,973 __main__ INFO: Epoch [2/10], Loss: 0.6931 40 | 2024-08-15 17:15:48,622 __main__ INFO: Epoch [3/10], Loss: 0.6931 41 | 2024-08-15 17:17:14,271 __main__ INFO: Epoch [4/10], Loss: 0.6931 42 | 2024-08-15 17:18:39,920 __main__ INFO: Epoch [5/10], Loss: 0.6931 43 | 2024-08-15 17:20:05,569 __main__ INFO: Epoch [6/10], Loss: 0.6931 44 | 2024-08-15 17:21:31,218 __main__ INFO: Epoch [7/10], Loss: 0.6931 45 | 2024-08-15 17:22:56,867 __main__ INFO: Epoch [8/10], Loss: 0.6931 46 | 2024-08-15 17:24:22,516 __main__ INFO: Epoch [9/10], Loss: 0.6931 47 | 2024-08-15 17:25:48,165 __main__ INFO: Epoch [10/10], Loss: 0.6931 48 | 2024-08-15 17:25:48,265 __main__ INFO: Vision Transformer model saved at models/vision_transformer_model.pth 49 | 2024-08-15 17:25:48,266 __main__ INFO: Vision Transformer model training complete. 50 | -------------------------------------------------------------------------------- /models/saved_models/bayesian_model.pkl: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import classification_report, confusion_matrix 6 | import joblib 7 | from src.models.bayesian import BayesianModel 8 | from src.dataset.data_loader import load_csv_data 9 | from src.config import config 10 | from src.utils.logger import setup_logger 11 | 12 | logger = setup_logger('bayesian_training_logger', os.path.join(config.LOG_DIR, 'bayesian_training.log')) 13 | 14 | def train_and_save_bayesian_model(): 15 | """ 16 | Training Bayesian model and saving as a pickle file. 17 | """ 18 | logger.info("Loading and preprocessing data...") 19 | # Loading and preprocessing data 20 | data = load_csv_data(config.PROCESSED_DATA_FILE) 21 | X = data.drop('label', axis=1) 22 | y = data['label'] 23 | 24 | logger.info("Splitting data into training and validation sets...") 25 | # Splitting data into training and validation sets 26 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) 27 | 28 | bayesian_model = BayesianModel(prior_mean=config.BAYESIAN_PARAMS['prior_mean'], prior_std=config.BAYESIAN_PARAMS['prior_std']) 29 | 30 | logger.info("Training the Bayesian model...") 31 | 32 | bayesian_model.fit(X_train.values, y_train.values) 33 | 34 | logger.info("Evaluating the Bayesian model...") 35 | 36 | y_pred = bayesian_model.predict(X_val.values) 37 | report = classification_report(y_val, y_pred) 38 | cm = confusion_matrix(y_val, y_pred) 39 | logger.info(f"Classification Report:\n{report}") 40 | logger.info(f"Confusion Matrix:\n{cm}") 41 | 42 | model_path = os.path.join(config.MODEL_DIR, 'bayesian_model.pkl') 43 | joblib.dump(bayesian_model, model_path) 44 | logger.info(f"Bayesian model saved at {model_path}") 45 | 46 | if __name__ == "__main__": 47 | train_and_save_bayesian_model() 48 | -------------------------------------------------------------------------------- /models/saved_models/cnn_model.h5: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from tensorflow.keras.models import Sequential 5 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization 6 | from tensorflow.keras.optimizers import Adam 7 | from tensorflow.keras.callbacks import ModelCheckpoint 8 | from sklearn.model_selection import train_test_split 9 | from src.config import config 10 | from src.dataset.data_loader import load_csv_data 11 | from src.utils.logger import setup_logger 12 | 13 | logger = setup_logger('cnn_training_logger', os.path.join(config.LOG_DIR, 'cnn_training.log')) 14 | 15 | def create_cnn_model(input_shape, num_classes): 16 | """ 17 | Building a Convolutional Neural Network (CNN) model. 18 | :param input_shape: Shape of the input data (height, width, channels) 19 | :param num_classes: Number of classes for the output layer 20 | :return: Compiled CNN model 21 | """ 22 | model = Sequential() 23 | 24 | # Convolutional Layer 1 25 | model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape)) 26 | model.add(BatchNormalization()) 27 | model.add(MaxPooling2D(pool_size=(2, 2))) 28 | 29 | # Convolutional Layer 2 30 | model.add(Conv2D(64, (3, 3), activation='relu')) 31 | model.add(BatchNormalization()) 32 | model.add(MaxPooling2D(pool_size=(2, 2))) 33 | 34 | # Convolutional Layer 3 35 | model.add(Conv2D(128, (3, 3), activation='relu')) 36 | model.add(BatchNormalization()) 37 | model.add(MaxPooling2D(pool_size=(2, 2))) 38 | 39 | # Flattening Layer 40 | model.add(Flatten()) 41 | 42 | # Fully Connected Layer 1 43 | model.add(Dense(256, activation='relu')) 44 | model.add(Dropout(0.5)) 45 | 46 | # Fully Connected Layer 2 47 | model.add(Dense(128, activation='relu')) 48 | model.add(Dropout(0.5)) 49 | 50 | # Output Layer 51 | model.add(Dense(num_classes, activation='softmax')) 52 | 53 | # Compiling the model 54 | optimizer = Adam(learning_rate=0.001) 55 | model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) 56 | 57 | return model 58 | 59 | def train_and_save_cnn_model(): 60 | logger.info("Loading and preprocessing data...") 61 | # Loading and preprocessing data 62 | data = load_csv_data(config.PROCESSED_DATA_FILE) 63 | X = data.drop('label', axis=1).values 64 | y = pd.get_dummies(data['label']).values # One-hot encode the labels 65 | 66 | X = X.reshape(-1, 64, 64, 3) 67 | 68 | logger.info("Splitting data into training and validation sets...") 69 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) 70 | 71 | model = create_cnn_model(input_shape=(64, 64, 3), num_classes=y.shape[1]) 72 | 73 | checkpoint = ModelCheckpoint(os.path.join(config.MODEL_DIR, 'cnn_model.h5'), monitor='val_accuracy', save_best_only=True, mode='max') 74 | 75 | logger.info("Training the CNN model...") 76 | history = model.fit(X_train, y_train, epochs=config.CNN_PARAMS['epochs'], batch_size=config.CNN_PARAMS['batch_size'], validation_data=(X_val, y_val), callbacks=[checkpoint]) 77 | 78 | logger.info("CNN model training complete and saved to cnn_model.h5") 79 | 80 | if __name__ == "__main__": 81 | train_and_save_cnn_model() 82 | -------------------------------------------------------------------------------- /models/saved_models/model_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/models/saved_models/model_architecture.png -------------------------------------------------------------------------------- /models/saved_models/svm_model.pkl: -------------------------------------------------------------------------------- 1 | 00000000: 8003 5d71 9408 4375 7070 6c65 6d65 6d62 ..]q..Cupplememb 2 | 00000010: 6572 2028 295d 7101 7d71 0286 6271 035d er ()]q.}q..bq.] 3 | 00000020: 7104 2808 4b03 4b1b 4b5a 4b2e 4b02 4b01 q.(.K.K.KZK.K.K. 4 | 00000030: 4b00 4b00 4b01 4b01 4b00 4b00 4b01 4b00 K.K.K.K.K.K.K.K. 5 | 00000040: 4b01 4b00 4b00 4b01 4b00 4b00 4b01 4b00 K.K.K.K.K.K.K.K. 6 | 00000050: 4b01 4b01 4b01 4b01 4b00 4b00 4b01 4b01 K.K.K.K.K.K.K.K. 7 | 00000060: 4b01 4b01 4b01 4b01 4b00 4b00 4b01 4b00 K.K.K.K.K.K.K.K. 8 | 00000070: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01 K.K.K.K.K.K.K.K. 9 | 00000080: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 10 | 00000090: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 11 | 000000a0: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 12 | 000000b0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 13 | 000000c0: 4b01 4b00 4b00 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 14 | 000000d0: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 15 | 000000e0: 4b01 4b01 4b01 4b01 4b00 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 16 | 000000f0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 17 | 00000100: 4b01 4b01 4b00 4b00 4b01 4b00 4b01 4b01 K.K.K.K.K.K.K.K. 18 | 00000110: 4b01 4b01 4b01 4b01 4b01 4b00 4b01 4b00 K.K.K.K.K.K.K.K. 19 | 00000120: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01 K.K.K.K.K.K.K.K. 20 | 00000130: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 21 | 00000140: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 22 | 00000150: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 23 | 00000160: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 24 | 00000170: 4b01 4b00 4b00 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 25 | 00000180: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 26 | 00000190: 4b01 4b01 4b01 4b01 4b00 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 27 | 000001a0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 28 | 000001b0: 4b01 4b01 4b00 4b00 4b01 4b00 4b01 4b01 K.K.K.K.K.K.K.K. 29 | 000001c0: 4b01 4b01 4b01 4b01 4b01 4b00 4b01 4b00 K.K.K.K.K.K.K.K. 30 | 000001d0: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01 K.K.K.K.K.K.K.K. 31 | 000001e0: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01 K.K.K.K.K.K.K.K. 32 | 000001f0: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00 K.K.K.K.K.K.K.K. 33 | 00000200: 8043 6c6f 6164 696e 6720 6c69 6272 6172 .Cloading librar 34 | 00000210: 7920 616e 6420 6465 7065 6e64 656e 6369 y and dependenci 35 | 00000220: 6573 2071 0230 312e 3720 5d71 0280 4c5d es q.01.7 ]q..L] 36 | 00000230: 7145 616d 706c 652e 726e 6420 4b02 7400 qExample.rnd K.t. 37 | 00000240: 7a28 2029 3a20 2a5a 7361 6d70 6c65 2053 z( ) : *Sample S 38 | 00000250: 616d 706c 6520 7665 6374 6f72 7320 6269 ample vectors bi 39 | 00000260: 7420 6d6f 6465 6c2e 3a7b 3c6e 616d 6520 t model.:{ >(tee -i $LOG_FILE) 48 | exec 2>&1 49 | 50 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Starting data download." 51 | 52 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading image dataset from $IMAGE_DOWNLOAD_URL." 53 | curl -o "$DATA_DIR/images/dataset.zip" -L $IMAGE_DOWNLOAD_URL 54 | 55 | if [ $? -ne 0 ]; then 56 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download image dataset from $IMAGE_DOWNLOAD_URL." 57 | exit 1 58 | fi 59 | 60 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Unzipping image dataset." 61 | unzip -o "$DATA_DIR/images/dataset.zip" -d $DATA_DIR/images 62 | 63 | if [ $? -ne 0 ]; then 64 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to unzip image dataset." 65 | exit 1 66 | fi 67 | 68 | rm "$DATA_DIR/images/dataset.zip" 69 | 70 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading audio dataset from $AUDIO_DOWNLOAD_URL." 71 | curl -o "$DATA_DIR/audios/dataset.zip" -L $AUDIO_DOWNLOAD_URL 72 | 73 | if [ $? -ne 0 ]; then 74 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download audio dataset from $AUDIO_DOWNLOAD_URL." 75 | exit 1 76 | fi 77 | 78 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Unzipping audio dataset." 79 | unzip -o "$DATA_DIR/audios/dataset.zip" -d $DATA_DIR/audios 80 | 81 | if [ $? -ne 0 ]; then 82 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to unzip audio dataset." 83 | exit 1 84 | fi 85 | 86 | rm "$DATA_DIR/audios/dataset.zip" 87 | 88 | for url in "${DEEPFAKE_VIDEO_URLS[@]}"; do 89 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading deepfake video from $url." 90 | curl -o "$DATA_DIR/videos/deepfake/$(basename $url)" -L $url 91 | 92 | if [ $? -ne 0 ]; then 93 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download deepfake video from $url." 94 | exit 1 95 | fi 96 | done 97 | 98 | for url in "${REAL_VIDEO_URLS[@]}"; do 99 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading real video from $url." 100 | curl -o "$DATA_DIR/videos/real/$(basename $url)" -L $url 101 | 102 | if [ $? -ne 0 ]; then 103 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download real video from $url." 104 | exit 1 105 | fi 106 | done 107 | 108 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Data download and extraction process completed successfully." 109 | -------------------------------------------------------------------------------- /scripts/evaluate_all_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | set -euo pipefail 5 | 6 | 7 | LOG_DIR="logs" 8 | EVAL_LOG_FILE="$LOG_DIR/evaluate_all_models.log" 9 | MODEL_DIR="models/saved_models" 10 | 11 | 12 | mkdir -p $LOG_DIR 13 | 14 | exec > >(tee -i $EVAL_LOG_FILE) 15 | exec 2>&1 16 | 17 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Starting evaluation of all models." 18 | 19 | evaluate_model() { 20 | model_name=$1 21 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Evaluating $model_name model..." 22 | 23 | if python -m src.evaluate --model "$model_name"; then 24 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Successfully evaluated $model_name model." 25 | else 26 | echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to evaluate $model_name model." 27 | fi 28 | } 29 | 30 | models=("cnn" "transformer" "svm" "bayesian" "vision_transformer") 31 | 32 | 33 | for model in "${models[@]}"; do 34 | evaluate_model $model 35 | done 36 | 37 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Evaluation of all models completed successfully." 38 | -------------------------------------------------------------------------------- /scripts/generate_report.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pandas as pd 4 | from jinja2 import Template 5 | from src.config import config 6 | from src.utils.logger import setup_logger 7 | 8 | logger = setup_logger('generate_report_logger', os.path.join(config.LOG_DIR, 'generate_report.log')) 9 | 10 | def load_evaluation_results(model_name): 11 | """ 12 | Loading evaluation results for given model. 13 | :param model_name: Multidisciplinary Deepfake Detection 14 | :return: Dictionary containing classification report and confusion matrix 15 | """ 16 | logger.info(f"Loading evaluation results for {model_name} model...") 17 | 18 | report_path = os.path.join(config.REPORT_DIR, f'{model_name}_classification_report.json') 19 | cm_path = os.path.join(config.REPORT_DIR, f'{model_name}_confusion_matrix.csv') 20 | accuracy_path = os.path.join(config.REPORT_DIR, f'{model_name}_accuracy.txt') 21 | 22 | try: 23 | with open(report_path, 'r') as f: 24 | classification_report = json.load(f) 25 | 26 | confusion_matrix = pd.read_csv(cm_path, index_col=0) 27 | 28 | if os.path.exists(accuracy_path): 29 | with open(accuracy_path, 'r') as f: 30 | accuracy = f.read().strip() 31 | else: 32 | accuracy = None 33 | 34 | logger.info(f"Successfully loaded evaluation results for {model_name} model.") 35 | 36 | return { 37 | 'classification_report': classification_report, 38 | 'confusion_matrix': confusion_matrix, 39 | 'accuracy': accuracy 40 | } 41 | except Exception as e: 42 | logger.error(f"Error loading evaluation results for {model_name} model: {e}") 43 | raise 44 | 45 | def generate_html_report(models_results): 46 | """ 47 | Generating report from evaluation results. 48 | :param models_results: Dictionary containing evaluation results for all models 49 | :return: HTML content as a string 50 | """ 51 | logger.info("Generating HTML report...") 52 | 53 | template = Template(""" 54 | 55 | 56 | 57 | 58 | 59 | Model Evaluation Report 60 | 68 | 69 | 70 |

Model Evaluation Report

71 | {% for model_name, results in models_results.items() %} 72 |

{{ model_name | capitalize }} Model

73 |

Classification Report

74 |
{{ results['classification_report'] | tojson(indent=4) }}
75 | 76 |

Confusion Matrix

77 |
78 | {{ results['confusion_matrix'].to_html(classes='data', header=True, index=True) }} 79 |
80 | 81 | {% if results['accuracy'] %} 82 |

Accuracy

83 |

{{ results['accuracy'] }}

84 | {% endif %} 85 | 86 |
87 | {% endfor %} 88 | 89 | 90 | """) 91 | 92 | html_content = template.render(models_results=models_results) 93 | 94 | logger.info("HTML report generation complete.") 95 | return html_content 96 | 97 | def save_html_report(html_content, report_path): 98 | """ 99 | Save the HTML report to a file. 100 | :param html_content: HTML content as a string 101 | :param report_path: Path to save the HTML report 102 | """ 103 | logger.info(f"Saving HTML report to {report_path}...") 104 | 105 | try: 106 | with open(report_path, 'w') as f: 107 | f.write(html_content) 108 | 109 | logger.info(f"HTML report saved successfully to {report_path}.") 110 | except Exception as e: 111 | logger.error(f"Error saving HTML report: {e}") 112 | raise 113 | 114 | if __name__ == "__main__": 115 | logger.info("Starting report generation process...") 116 | 117 | models = ["cnn", "transformer", "svm", "bayesian", "vision_transformer"] 118 | models_results = {} 119 | 120 | for model in models: 121 | models_results[model] = load_evaluation_results(model) 122 | 123 | html_content = generate_html_report(models_results) 124 | 125 | report_path = os.path.join(config.REPORT_DIR, 'model_evaluation_report.html') 126 | save_html_report(html_content, report_path) 127 | 128 | logger.info("Report generation process completed successfully.") 129 | -------------------------------------------------------------------------------- /scripts/train_all_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # To setup the environment 4 | source ~/anaconda3/etc/profile.d/conda.sh 5 | conda activate deepfake-detection 6 | 7 | LOG_DIR="logs" 8 | TRAIN_LOG="$LOG_DIR/train_all_models.log" 9 | 10 | mkdir -p $LOG_DIR 11 | 12 | exec > >(tee -i $TRAIN_LOG) 13 | exec 2>&1 14 | 15 | echo "====================================" 16 | echo "Starting training of all models" 17 | echo "Date: $(date)" 18 | echo "====================================" 19 | echo "" 20 | 21 | echo "Training CNN model..." 22 | python -c " 23 | from src.train import train_cnn 24 | train_cnn() 25 | " 26 | if [ $? -eq 0 ]; then 27 | echo "CNN model training completed successfully." 28 | else 29 | echo "CNN model training failed." 30 | exit 1 31 | fi 32 | echo "" 33 | 34 | echo "Training Transformer model..." 35 | python -c " 36 | from src.train import train_transformer 37 | train_transformer() 38 | " 39 | if [ $? -eq 0 ]; then 40 | echo "Transformer model training completed successfully." 41 | else 42 | echo "Transformer model training failed." 43 | exit 1 44 | fi 45 | echo "" 46 | 47 | echo "Training SVM model..." 48 | python -c " 49 | from src.train import train_svm_model 50 | train_svm_model() 51 | " 52 | if [ $? -eq 0 ]; then 53 | echo "SVM model training completed successfully." 54 | else 55 | echo "SVM model training failed." 56 | exit 1 57 | fi 58 | echo "" 59 | 60 | echo "Training Bayesian model..." 61 | python -c " 62 | from src.train import train_bayesian 63 | train_bayesian() 64 | " 65 | if [ $? -eq 0 ]; then 66 | echo "Bayesian model training completed successfully." 67 | else 68 | echo "Bayesian model training failed." 69 | exit 1 70 | fi 71 | echo "" 72 | 73 | echo "Training Vision Transformer model..." 74 | python -c " 75 | from src.train import train_vision_transformer 76 | train_vision_transformer() 77 | " 78 | if [ $? -eq 0 ]; then 79 | echo "Vision Transformer model training completed successfully." 80 | else 81 | echo "Vision Transformer model training failed." 82 | exit 1 83 | fi 84 | echo "" 85 | 86 | echo "====================================" 87 | echo "Training of all models completed" 88 | echo "Date: $(date)" 89 | echo "====================================" 90 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="multidisciplinary-deepfake-detection", 8 | version="0.1.0", 9 | author="HacktivSpace", 10 | author_email="devsupport@hacktivspace.com", 11 | description="A multidisciplinary deepfake detection system using images, audios, and videos.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/HacktivSpace/multidisciplinary-deepfake-detection", 15 | project_urls={ 16 | "Bug Tracker": "https://github.com/HacktivSpace/multidisciplinary-deepfake-detection/issues", 17 | }, 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ], 23 | package_dir={"": "src"}, 24 | packages=find_packages(where="src"), 25 | python_requires=">=3.6", 26 | install_requires=[ 27 | "numpy>=1.19.5", 28 | "pandas>=1.2.4", 29 | "tensorflow>=2.4.1", 30 | "torch>=1.8.1", 31 | "scikit-learn>=0.24.2", 32 | "librosa>=0.8.0", 33 | "opencv-python>=4.5.1.48", 34 | "matplotlib>=3.3.4", 35 | "seaborn>=0.11.1", 36 | "nltk>=3.5", 37 | "spacy>=3.0.6", 38 | "joblib>=1.0.1", 39 | "flask>=1.1.2", 40 | "gunicorn>=20.1.0", 41 | "psycopg2-binary>=2.8.6", 42 | "python-dotenv>=0.17.0", 43 | ], 44 | entry_points={ 45 | "console_scripts": [ 46 | "run-app=src.main:main", 47 | ], 48 | }, 49 | include_package_data=True, 50 | zip_safe=False, 51 | ) 52 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from .config import config 4 | from .dataset import data_loader, data_preprocessor, data_splitter, data_augmentation 5 | from .models import cnn, transformer, svm, bayesian, vision_transformer 6 | from .training import cnn_training, transformer_training, svm_training, bayesian_training, vision_transformer_training 7 | from .evaluation import cnn_evaluation, transformer_evaluation, svm_evaluation, bayesian_evaluation, vision_transformer_evaluation 8 | from .utils import logger, metrics, visualization, helpers, file_utils, data_utils 9 | from .processing import audio_processing, video_processing, image_processing, text_processing 10 | from . import blockchain, nlp, dsp, train, evaluate 11 | 12 | log_file = os.path.join(config.LOG_DIR, 'system.log') 13 | logging.basicConfig( 14 | level=logging.INFO, 15 | format='%(asctime)s %(name)s %(levelname)s: %(message)s', 16 | handlers=[ 17 | logging.FileHandler(log_file), 18 | logging.StreamHandler() 19 | ] 20 | ) 21 | 22 | logger = logging.getLogger(__name__) 23 | logger.info("Initialization of the src module and its submodules is complete.") 24 | -------------------------------------------------------------------------------- /src/blockchain.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | import time 4 | from typing import List, Dict 5 | 6 | class Block: 7 | def __init__(self, index: int, previous_hash: str, timestamp: float, data: Dict, nonce: int = 0): 8 | self.index = index 9 | self.previous_hash = previous_hash 10 | self.timestamp = timestamp 11 | self.data = data 12 | self.nonce = nonce 13 | self.hash = self.calculate_hash() 14 | 15 | def calculate_hash(self) -> str: 16 | block_string = f"{self.index}{self.previous_hash}{self.timestamp}{json.dumps(self.data)}{self.nonce}" 17 | return hashlib.sha256(block_string.encode()).hexdigest() 18 | 19 | def __str__(self) -> str: 20 | return json.dumps(self.__dict__, indent=4) 21 | 22 | class Blockchain: 23 | def __init__(self, difficulty: int = 4): 24 | self.chain: List[Block] = [] 25 | self.difficulty = difficulty 26 | self.create_genesis_block() 27 | 28 | def create_genesis_block(self): 29 | genesis_block = Block(0, "0", time.time(), {"message": "Genesis Block"}) 30 | self.chain.append(genesis_block) 31 | 32 | def get_latest_block(self) -> Block: 33 | return self.chain[-1] 34 | 35 | def add_block(self, data: Dict): 36 | latest_block = self.get_latest_block() 37 | new_block = Block( 38 | index=latest_block.index + 1, 39 | previous_hash=latest_block.hash, 40 | timestamp=time.time(), 41 | data=data 42 | ) 43 | self.mine_block(new_block) 44 | self.chain.append(new_block) 45 | 46 | def mine_block(self, block: Block): 47 | print(f"Mining block {block.index}...") 48 | while block.hash[:self.difficulty] != '0' * self.difficulty: 49 | block.nonce += 1 50 | block.hash = block.calculate_hash() 51 | print(f"Block {block.index} mined: {block.hash}") 52 | 53 | def is_chain_valid(self) -> bool: 54 | for i in range(1, len(self.chain)): 55 | current_block = self.chain[i] 56 | previous_block = self.chain[i - 1] 57 | 58 | if current_block.hash != current_block.calculate_hash(): 59 | print(f"Invalid hash at block {current_block.index}") 60 | return False 61 | 62 | if current_block.previous_hash != previous_block.hash: 63 | print(f"Invalid previous hash at block {current_block.index}") 64 | return False 65 | 66 | return True 67 | 68 | def __str__(self) -> str: 69 | chain_data = [str(block) for block in self.chain] 70 | return json.dumps(chain_data, indent=4) 71 | 72 | if __name__ == "__main__": 73 | blockchain = Blockchain(difficulty=4) 74 | 75 | blockchain.add_block({"transaction": "Alice pays Bob 10 BTC"}) 76 | blockchain.add_block({"transaction": "Bob pays Charlie 5 BTC"}) 77 | blockchain.add_block({"transaction": "Charlie pays Dave 2 BTC"}) 78 | 79 | print(blockchain) 80 | print("Blockchain valid:", blockchain.is_chain_valid()) 81 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | class Config: 7 | 8 | PROJECT_NAME = "Multidisciplinary Deepfake Detection" 9 | VERSION = "0.1.0" 10 | 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | DATA_DIR = os.path.join(BASE_DIR, '..', 'data') 13 | RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw') 14 | PROCESSED_DATA_DIR = os.path.join(DATA_DIR, 'processed') 15 | MODEL_DIR = os.path.join(BASE_DIR, '..', 'models', 'saved_models') 16 | LOG_DIR = os.path.join(BASE_DIR, '..', 'logs') 17 | REPORT_DIR = os.path.join(BASE_DIR, '..', 'reports') 18 | 19 | LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO') 20 | LOG_FORMAT = '%(asctime)s %(name)s %(levelname)s: %(message)s' 21 | LOG_FILE = os.path.join(LOG_DIR, 'system.log') 22 | 23 | RAW_IMAGE_DIR = os.path.join(RAW_DATA_DIR, 'images') 24 | RAW_AUDIO_DIR = os.path.join(RAW_DATA_DIR, 'audios') 25 | RAW_VIDEO_DIR = os.path.join(RAW_DATA_DIR, 'videos') 26 | 27 | PROCESSED_IMAGE_DIR = os.path.join(PROCESSED_DATA_DIR, 'images') 28 | PROCESSED_AUDIO_DIR = os.path.join(PROCESSED_DATA_DIR, 'audios') 29 | PROCESSED_VIDEO_DIR = os.path.join(PROCESSED_DATA_DIR, 'videos') 30 | 31 | RAW_IMAGE_FILE = os.path.join(RAW_DATA_DIR, 'metadata_images.csv') 32 | RAW_AUDIO_FILE = os.path.join(RAW_DATA_DIR, 'metadata_audios.csv') 33 | RAW_VIDEO_FILE = os.path.join(RAW_DATA_DIR, 'metadata_videos.csv') 34 | 35 | PROCESSED_IMAGE_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_images.csv') 36 | PROCESSED_AUDIO_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_audios.csv') 37 | PROCESSED_VIDEO_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_videos.csv') 38 | 39 | CNN_PARAMS = { 40 | 'input_shape': (64, 64, 3), 41 | 'num_classes': 2, 42 | 'epochs': 50, 43 | 'batch_size': 32, 44 | 'learning_rate': 0.001 45 | } 46 | 47 | TRANSFORMER_PARAMS = { 48 | 'input_dim': 512, 49 | 'model_dim': 512, 50 | 'num_heads': 8, 51 | 'num_layers': 6, 52 | 'output_dim': 10, 53 | 'epochs': 50, 54 | 'batch_size': 32, 55 | 'learning_rate': 0.001 56 | } 57 | 58 | SVM_PARAMS = { 59 | 'kernel': 'linear', 60 | 'C': 1.0 61 | } 62 | 63 | BAYESIAN_PARAMS = { 64 | 'prior_mean': 0, 65 | 'prior_std': 1 66 | } 67 | 68 | VISION_TRANSFORMER_PARAMS = { 69 | 'img_size': 224, 70 | 'patch_size': 16, 71 | 'num_classes': 10, 72 | 'dim': 768, 73 | 'depth': 12, 74 | 'heads': 12, 75 | 'mlp_dim': 3072, 76 | 'epochs': 50, 77 | 'batch_size': 32, 78 | 'learning_rate': 0.001 79 | } 80 | 81 | BLOCKCHAIN_DIFFICULTY = 4 82 | 83 | # Other Settings 84 | RANDOM_SEED = 42 85 | 86 | @staticmethod 87 | def ensure_directories(): 88 | """ 89 | Ensure that all necessary directories exist. 90 | """ 91 | directories = [ 92 | Config.DATA_DIR, 93 | Config.RAW_DATA_DIR, 94 | Config.PROCESSED_DATA_DIR, 95 | Config.RAW_IMAGE_DIR, 96 | Config.RAW_AUDIO_DIR, 97 | Config.RAW_VIDEO_DIR, 98 | Config.PROCESSED_IMAGE_DIR, 99 | Config.PROCESSED_AUDIO_DIR, 100 | Config.PROCESSED_VIDEO_DIR, 101 | Config.MODEL_DIR, 102 | Config.LOG_DIR, 103 | Config.REPORT_DIR 104 | ] 105 | for directory in directories: 106 | if not os.path.exists(directory): 107 | os.makedirs(directory) 108 | 109 | @staticmethod 110 | def print_config(): 111 | """ 112 | Print the current configuration settings. 113 | """ 114 | config_dict = {attr: value for attr, value in Config.__dict__.items() if not callable(getattr(Config, attr)) and not attr.startswith("__")} 115 | for key, value in config_dict.items(): 116 | print(f"{key}: {value}") 117 | 118 | if __name__ == "__main__": 119 | 120 | Config.ensure_directories() 121 | 122 | Config.print_config() 123 | -------------------------------------------------------------------------------- /src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_loader import DataLoader 2 | from .data_preprocessor import DataPreprocessor 3 | from .data_splitter import DataSplitter 4 | from .data_augmentation import DataAugmentation 5 | 6 | __all__ = [ 7 | 'DataLoader', 8 | 'DataPreprocessor', 9 | 'DataSplitter', 10 | 'DataAugmentation' 11 | ] 12 | -------------------------------------------------------------------------------- /src/dataset/data_augmentation.py: -------------------------------------------------------------------------------- 1 | from albumentations import ( 2 | Compose, HorizontalFlip, VerticalFlip, RandomRotate90, Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, IAAPiecewiseAffine 3 | ) 4 | from albumentations.pytorch import ToTensorV2 5 | import numpy as np 6 | import cv2 7 | import os 8 | import pandas as pd 9 | from src.config import Config 10 | 11 | class DataAugmentation: 12 | def __init__(self): 13 | """ 14 | Initializing DataAugmentation with augmentation techniques. 15 | """ 16 | self.augmentations = Compose([ 17 | HorizontalFlip(p=0.5), 18 | VerticalFlip(p=0.5), 19 | RandomRotate90(p=0.5), 20 | Transpose(p=0.5), 21 | ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, p=0.5), 22 | Blur(blur_limit=3, p=0.5), 23 | OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=0.5), 24 | GridDistortion(p=0.5), 25 | IAAPiecewiseAffine(p=0.5), 26 | ToTensorV2() 27 | ]) 28 | 29 | def augment(self, image): 30 | """ 31 | Applying augmentations to image. 32 | :param image: Image to augment 33 | :return: Augmented image 34 | """ 35 | augmented = self.augmentations(image=image) 36 | return augmented['image'] 37 | 38 | def apply_augmentation(image_path): 39 | """ 40 | Applying augmentation to image given its path. 41 | :param image_path: Path to the image file 42 | :return: Augmented image tensor 43 | """ 44 | image = cv2.imread(image_path) 45 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 46 | augmentation = DataAugmentation() 47 | augmented_image = augmentation.augment(image) 48 | return augmented_image 49 | 50 | def augment_data(data): 51 | """ 52 | Applying augmentation to all images in the dataset. 53 | :param data: DataFrame containing image paths and labels 54 | :return: DataFrame with augmented images and labels 55 | """ 56 | augmented_images = [] 57 | labels = [] 58 | 59 | for index, row in data.iterrows(): 60 | image_path = os.path.join(Config.RAW_DATA_DIR, 'images', row['filename']) 61 | augmented_image = apply_augmentation(image_path) 62 | augmented_images.append(augmented_image) 63 | labels.append(row['label']) 64 | 65 | augmented_data = pd.DataFrame({'image': augmented_images, 'label': labels}) 66 | return augmented_data 67 | 68 | if __name__ == "__main__": 69 | import matplotlib.pyplot as plt 70 | 71 | # usage 72 | image_path = os.path.join(Config.RAW_DATA_DIR, 'images', 'sample_image.jpg') 73 | augmented_image = apply_augmentation(image_path) 74 | 75 | # To display original and augmented images 76 | original_image = cv2.imread(image_path) 77 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) 78 | 79 | fig, ax = plt.subplots(1, 2, figsize=(12, 6)) 80 | ax[0].imshow(original_image) 81 | ax[0].set_title("Original Image") 82 | ax[0].axis('off') 83 | 84 | ax[1].imshow(augmented_image.permute(1, 2, 0).numpy()) 85 | ax[1].set_title("Augmented Image") 86 | ax[1].axis('off') 87 | 88 | plt.show() 89 | -------------------------------------------------------------------------------- /src/dataset/data_preprocessor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pandas as pd 3 | from sklearn.preprocessing import StandardScaler, LabelEncoder 4 | from sklearn.impute import SimpleImputer 5 | 6 | # To set up logging 7 | logger = logging.getLogger(__name__) 8 | logger.setLevel(logging.DEBUG) 9 | 10 | fh = logging.FileHandler('logs/data_preprocessing.log') 11 | fh.setLevel(logging.DEBUG) 12 | 13 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 14 | fh.setFormatter(formatter) 15 | 16 | logger.addHandler(fh) 17 | 18 | class DataPreprocessor: 19 | def __init__(self): 20 | """ 21 | Initializing DataPreprocessor with standard scaler, label encoder, and imputer. 22 | """ 23 | self.scaler = StandardScaler() 24 | self.label_encoder = LabelEncoder() 25 | self.imputer = SimpleImputer(strategy='mean') 26 | logger.info("DataPreprocessor initialized with StandardScaler, LabelEncoder, and SimpleImputer.") 27 | 28 | def preprocess(self, data, target_column): 29 | """ 30 | Preprocessing the data by filling missing values, scaling numerical features, and encoding categorical features. 31 | :param data: DataFrame containing the data to preprocess 32 | :param target_column: Name of the target column 33 | :return: DataFrame containing the preprocessed data, Series containing the preprocessed target 34 | """ 35 | logger.info("Starting preprocessing.") 36 | try: 37 | # To separate features and target 38 | features = data.drop(columns=[target_column]) 39 | target = data[target_column] 40 | logger.debug(f"Features and target separated. Features shape: {features.shape}, Target shape: {target.shape}") 41 | 42 | # To fill missing values 43 | features = pd.DataFrame(self.imputer.fit_transform(features), columns=features.columns) 44 | logger.debug("Missing values filled.") 45 | 46 | numerical_features = features.select_dtypes(include=['int64', 'float64']).columns 47 | features[numerical_features] = self.scaler.fit_transform(features[numerical_features]) 48 | logger.debug("Numerical features scaled.") 49 | 50 | # To encode categorical features 51 | categorical_features = features.select_dtypes(include(['object'])).columns 52 | for col in categorical_features: 53 | features[col] = self.label_encoder.fit_transform(features[col]) 54 | logger.debug(f"Categorical feature '{col}' encoded.") 55 | 56 | if target.dtype == 'object': 57 | target = self.label_encoder.fit_transform(target) 58 | logger.debug("Target encoded.") 59 | 60 | logger.info("Preprocessing completed successfully.") 61 | return features, target 62 | 63 | except Exception as e: 64 | logger.error(f"Error occurred during preprocessing: {e}") 65 | raise 66 | 67 | def transform(self, data): 68 | """ 69 | Transforming new data using the already fitted scaler, imputer, and label encoder. 70 | :param data: DataFrame containing the data to transform 71 | :return: DataFrame containing the transformed data 72 | """ 73 | logger.info("Starting data transformation.") 74 | try: 75 | 76 | data = pd.DataFrame(self.imputer.transform(data), columns=data.columns) 77 | logger.debug("Missing values filled in new data.") 78 | 79 | numerical_features = data.select_dtypes(include=['int64', 'float64']).columns 80 | data[numerical_features] = self.scaler.transform(data[numerical_features]) 81 | logger.debug("Numerical features scaled in new data.") 82 | 83 | categorical_features = data.select_dtypes(include(['object'])).columns 84 | for col in categorical_features: 85 | data[col] = self.label_encoder.transform(data[col]) 86 | logger.debug(f"Categorical feature '{col}' encoded in new data.") 87 | 88 | logger.info("Data transformation completed successfully.") 89 | return data 90 | 91 | except Exception as e: 92 | logger.error(f"Error occurred during data transformation: {e}") 93 | raise 94 | -------------------------------------------------------------------------------- /src/dataset/data_splitter.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | import pandas as pd 3 | import logging 4 | 5 | class DataSplitter: 6 | def __init__(self, test_size=0.2, val_size=0.1, random_state=42): 7 | """ 8 | Initializing DataSplitter. 9 | :param test_size: Proportion of the dataset to include in the test split 10 | :param val_size: Proportion of the dataset to include in the validation split 11 | :param random_state: Seed used by the random number generator 12 | """ 13 | self.test_size = test_size 14 | self.val_size = val_size 15 | self.random_state = random_state 16 | self.logger = logging.getLogger('data_splitter_logger') 17 | 18 | def split(self, data, target_column): 19 | """ 20 | Splitting data into training, validation, and testing sets. 21 | :param data: DataFrame containing the data to split 22 | :param target_column: Name of the target column 23 | :return: Tuple containing the training, validation, and testing sets (X_train, X_val, X_test, y_train, y_val, y_test) 24 | """ 25 | self.logger.info(f"Splitting data with target column '{target_column}'") 26 | 27 | try: 28 | # To separate features and target 29 | X = data.drop(columns=[target_column]) 30 | y = data[target_column] 31 | 32 | # First split to get test set 33 | X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state) 34 | self.logger.info(f"Initial split: {X_train_val.shape[0]} train/val samples, {X_test.shape[0]} test samples") 35 | 36 | # To calculate proportion of remaining data to allocate to validation 37 | val_size_adjusted = self.val_size / (1 - self.test_size) 38 | 39 | # Second split to get validation set from remaining training data 40 | X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=val_size_adjusted, random_state=self.random_state) 41 | self.logger.info(f"Second split: {X_train.shape[0]} train samples, {X_val.shape[0]} validation samples") 42 | 43 | return X_train, X_val, X_test, y_train, y_val, y_test 44 | except Exception as e: 45 | self.logger.error(f"Error during data splitting: {e}", exc_info=True) 46 | raise 47 | 48 | if __name__ == "__main__": 49 | logging.basicConfig(level=logging.INFO) 50 | logger = logging.getLogger('data_splitter_logger') 51 | 52 | # usage 53 | data_path = 'path/to/processed_data.csv' 54 | data = pd.read_csv(data_path) 55 | target_column = 'label' 56 | splitter = DataSplitter(test_size=0.2, val_size=0.1, random_state=42) 57 | X_train, X_val, X_test, y_train, y_val, y_test = splitter.split(data, target_column) 58 | 59 | logger.info(f"Training set size: {X_train.shape[0]} samples") 60 | logger.info(f"Validation set size: {X_val.shape[0]} samples") 61 | logger.info(f"Test set size: {X_test.shape[0]} samples") 62 | -------------------------------------------------------------------------------- /src/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .cnn_evaluation import evaluate_cnn 2 | from .transformer_evaluation import evaluate_transformer 3 | from .svm_evaluation import evaluate_svm 4 | from .bayesian_evaluation import evaluate_bayesian 5 | from .vision_transformer_evaluation import evaluate_vision_transformer 6 | 7 | __all__ = [ 8 | 'evaluate_cnn', 9 | 'evaluate_transformer', 10 | 'evaluate_svm', 11 | 'evaluate_bayesian', 12 | 'evaluate_vision_transformer' 13 | ] 14 | -------------------------------------------------------------------------------- /src/evaluation/bayesian_evaluation.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score 2 | import logging 3 | import pandas as pd 4 | 5 | def evaluate_bayesian(model, X_test, y_test): 6 | """ 7 | Evaluating Bayesian model. 8 | :param model: Trained Bayesian model 9 | :param X_test: Test data features 10 | :param y_test: Test data labels 11 | :return: Dictionary of evaluation metrics 12 | """ 13 | logger = logging.getLogger('evaluation_logger') 14 | 15 | try: 16 | logger.info("Evaluating Bayesian model...") 17 | 18 | logger.info("Predicting test data with Bayesian model...") 19 | y_pred = model.predict(X_test) 20 | 21 | # To calculate evaluation metrics 22 | logger.info("Calculating evaluation metrics...") 23 | accuracy = accuracy_score(y_test, y_pred) 24 | f1 = f1_score(y_test, y_pred, average='weighted') 25 | precision = precision_score(y_test, y_pred, average='weighted') 26 | recall = recall_score(y_test, y_pred, average='weighted') 27 | report = classification_report(y_test, y_pred) 28 | conf_matrix = confusion_matrix(y_test, y_pred) 29 | 30 | logger.info(f"Bayesian Model Accuracy: {accuracy}") 31 | logger.info(f"Bayesian Model F1 Score: {f1}") 32 | logger.info(f"Bayesian Model Precision: {precision}") 33 | logger.info(f"Bayesian Model Recall: {recall}") 34 | logger.info(f"Classification Report:\n{report}") 35 | logger.info(f"Confusion Matrix:\n{conf_matrix}") 36 | 37 | report_path = 'path/to/report_dir/bayesian_classification_report.json' 38 | cm_path = 'path/to/report_dir/bayesian_confusion_matrix.csv' 39 | accuracy_path = 'path/to/report_dir/bayesian_accuracy.txt' 40 | 41 | pd.DataFrame(conf_matrix).to_csv(cm_path, index=False) 42 | with open(report_path, 'w') as f: 43 | f.write(report) 44 | with open(accuracy_path, 'w') as f: 45 | f.write(str(accuracy)) 46 | 47 | logger.info(f"Classification report saved to {report_path}") 48 | logger.info(f"Confusion matrix saved to {cm_path}") 49 | logger.info(f"Accuracy saved to {accuracy_path}") 50 | 51 | return { 52 | 'accuracy': accuracy, 53 | 'f1_score': f1, 54 | 'precision': precision, 55 | 'recall': recall, 56 | 'classification_report': report, 57 | 'confusion_matrix': conf_matrix 58 | } 59 | 60 | except Exception as e: 61 | logger.error(f"Error during Bayesian model evaluation: {e}", exc_info=True) 62 | raise 63 | -------------------------------------------------------------------------------- /src/evaluation/cnn_evaluation.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score 3 | import logging 4 | import pandas as pd 5 | 6 | def evaluate_cnn(model, X_test, y_test): 7 | """ 8 | Evaluating CNN model. 9 | :param model: Trained CNN model 10 | :param X_test: Test data features 11 | :param y_test: Test data labels 12 | :return: Dictionary of evaluation metrics 13 | """ 14 | logger = logging.getLogger('evaluation_logger') 15 | 16 | try: 17 | logger.info("Predicting test data with CNN model...") 18 | y_pred_probs = model.predict(X_test) 19 | y_pred_classes = y_pred_probs.argmax(axis=1) 20 | y_true_classes = y_test.argmax(axis=1) 21 | 22 | # To calculate evaluation metrics 23 | logger.info("Calculating evaluation metrics...") 24 | accuracy = accuracy_score(y_true_classes, y_pred_classes) 25 | f1 = f1_score(y_true_classes, y_pred_classes, average='weighted') 26 | precision = precision_score(y_true_classes, y_pred_classes, average='weighted') 27 | recall = recall_score(y_true_classes, y_pred_classes, average='weighted') 28 | report = classification_report(y_true_classes, y_pred_classes) 29 | conf_matrix = confusion_matrix(y_true_classes, y_pred_classes) 30 | 31 | logger.info(f"CNN Model Accuracy: {accuracy}") 32 | logger.info(f"CNN Model F1 Score: {f1}") 33 | logger.info(f"CNN Model Precision: {precision}") 34 | logger.info(f"CNN Model Recall: {recall}") 35 | logger.info(f"Classification Report:\n{report}") 36 | logger.info(f"Confusion Matrix:\n{conf_matrix}") 37 | 38 | report_path = 'path/to/report_dir/cnn_classification_report.json' 39 | cm_path = 'path/to/report_dir/cnn_confusion_matrix.csv' 40 | accuracy_path = 'path/to/report_dir/cnn_accuracy.txt' 41 | 42 | pd.DataFrame(conf_matrix).to_csv(cm_path, index=False) 43 | with open(report_path, 'w') as f: 44 | f.write(report) 45 | with open(accuracy_path, 'w') as f: 46 | f.write(str(accuracy)) 47 | 48 | logger.info(f"Classification report saved to {report_path}") 49 | logger.info(f"Confusion matrix saved to {cm_path}") 50 | logger.info(f"Accuracy saved to {accuracy_path}") 51 | 52 | return { 53 | 'accuracy': accuracy, 54 | 'f1_score': f1, 55 | 'precision': precision, 56 | 'recall': recall, 57 | 'classification_report': report, 58 | 'confusion_matrix': conf_matrix 59 | } 60 | 61 | except Exception as e: 62 | logger.error(f"Error during CNN model evaluation: {e}", exc_info=True) 63 | raise 64 | -------------------------------------------------------------------------------- /src/evaluation/svm_evaluation.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score 2 | import logging 3 | import pandas as pd 4 | 5 | def evaluate_svm(model, X_test, y_test): 6 | """ 7 | Evaluating SVM model. 8 | :param model: Trained SVM model 9 | :param X_test: Test data features 10 | :param y_test: Test data labels 11 | :return: Dictionary of evaluation metrics 12 | """ 13 | logger = logging.getLogger('evaluation_logger') 14 | 15 | try: 16 | logger.info("Evaluating SVM model...") 17 | 18 | logger.info("Predicting test data with SVM model...") 19 | y_pred = model.predict(X_test) 20 | 21 | # To calculate evaluation metrics 22 | logger.info("Calculating evaluation metrics...") 23 | accuracy = accuracy_score(y_test, y_pred) 24 | f1 = f1_score(y_test, y_pred, average='weighted') 25 | precision = precision_score(y_test, y_pred, average='weighted') 26 | recall = recall_score(y_test, y_pred, average='weighted') 27 | report = classification_report(y_test, y_pred) 28 | conf_matrix = confusion_matrix(y_test, y_pred) 29 | 30 | logger.info(f"SVM Model Accuracy: {accuracy}") 31 | logger.info(f"SVM Model F1 Score: {f1}") 32 | logger.info(f"SVM Model Precision: {precision}") 33 | logger.info(f"SVM Model Recall: {recall}") 34 | logger.info(f"Classification Report:\n{report}") 35 | logger.info(f"Confusion Matrix:\n{conf_matrix}") 36 | 37 | report_path = 'path/to/report_dir/svm_classification_report.json' 38 | cm_path = 'path/to/report_dir/svm_confusion_matrix.csv' 39 | accuracy_path = 'path/to/report_dir/svm_accuracy.txt' 40 | 41 | pd.DataFrame(conf_matrix).to_csv(cm_path, index=False) 42 | with open(report_path, 'w') as f: 43 | f.write(report) 44 | with open(accuracy_path, 'w') as f: 45 | f.write(str(accuracy)) 46 | 47 | logger.info(f"Classification report saved to {report_path}") 48 | logger.info(f"Confusion matrix saved to {cm_path}") 49 | logger.info(f"Accuracy saved to {accuracy_path}") 50 | 51 | return { 52 | 'accuracy': accuracy, 53 | 'f1_score': f1, 54 | 'precision': precision, 55 | 'recall': recall, 56 | 'classification_report': report, 57 | 'confusion_matrix': conf_matrix 58 | } 59 | 60 | except Exception as e: 61 | logger.error(f"Error during SVM model evaluation: {e}", exc_info=True) 62 | raise 63 | -------------------------------------------------------------------------------- /src/evaluation/transformer_evaluation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score 4 | import logging 5 | import pandas as pd 6 | 7 | def evaluate_transformer(model, dataloader, device): 8 | """ 9 | Evaluating Transformer model. 10 | :param model: Trained Transformer model 11 | :param dataloader: DataLoader for the test data 12 | :param device: Device to perform evaluation on ('cpu' or 'cuda') 13 | :return: Dictionary of evaluation metrics 14 | """ 15 | logger = logging.getLogger('evaluation_logger') 16 | 17 | try: 18 | model.eval() 19 | all_preds = [] 20 | all_labels = [] 21 | 22 | logger.info("Starting evaluation of Transformer model...") 23 | 24 | with torch.no_grad(): 25 | for batch_idx, batch in enumerate(dataloader): 26 | inputs, labels = batch 27 | inputs, labels = inputs.to(device), labels.to(device) 28 | outputs = model(inputs) 29 | _, preds = torch.max(outputs, 1) 30 | 31 | all_preds.append(preds.cpu().numpy()) 32 | all_labels.append(labels.cpu().numpy()) 33 | 34 | logger.debug(f"Processed batch {batch_idx + 1}/{len(dataloader)}") 35 | 36 | all_preds = np.concatenate(all_preds) 37 | all_labels = np.concatenate(all_labels) 38 | 39 | # To calculate evaluation metrics 40 | logger.info("Calculating evaluation metrics...") 41 | accuracy = accuracy_score(all_labels, all_preds) 42 | f1 = f1_score(all_labels, all_preds, average='weighted') 43 | precision = precision_score(all_labels, all_preds, average='weighted') 44 | recall = recall_score(all_labels, all_preds, average='weighted') 45 | report = classification_report(all_labels, all_preds) 46 | conf_matrix = confusion_matrix(all_labels, all_preds) 47 | 48 | logger.info(f"Transformer Model Accuracy: {accuracy}") 49 | logger.info(f"Transformer Model F1 Score: {f1}") 50 | logger.info(f"Transformer Model Precision: {precision}") 51 | logger.info(f"Transformer Model Recall: {recall}") 52 | logger.info(f"Classification Report:\n{report}") 53 | logger.info(f"Confusion Matrix:\n{conf_matrix}") 54 | 55 | report_path = 'path/to/report_dir/transformer_classification_report.json' 56 | cm_path = 'path/to/report_dir/transformer_confusion_matrix.csv' 57 | accuracy_path = 'path/to/report_dir/transformer_accuracy.txt' 58 | 59 | pd.DataFrame(conf_matrix).to_csv(cm_path, index=False) 60 | with open(report_path, 'w') as f: 61 | f.write(report) 62 | with open(accuracy_path, 'w') as f: 63 | f.write(str(accuracy)) 64 | 65 | logger.info(f"Classification report saved to {report_path}") 66 | logger.info(f"Confusion matrix saved to {cm_path}") 67 | logger.info(f"Accuracy saved to {accuracy_path}") 68 | 69 | return { 70 | 'accuracy': accuracy, 71 | 'f1_score': f1, 72 | 'precision': precision, 73 | 'recall': recall, 74 | 'classification_report': report, 75 | 'confusion_matrix': conf_matrix 76 | } 77 | 78 | except Exception as e: 79 | logger.error(f"Error during Transformer model evaluation: {e}", exc_info=True) 80 | raise 81 | -------------------------------------------------------------------------------- /src/evaluation/vision_transformer_evaluation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score 4 | import logging 5 | import pandas as pd 6 | 7 | def evaluate_vision_transformer(model, dataloader, device): 8 | """ 9 | Evaluating Vision Transformer model. 10 | :param model: Trained Vision Transformer model 11 | :param dataloader: DataLoader for the test data 12 | :param device: Device to perform evaluation on ('cpu' or 'cuda') 13 | :return: Dictionary of evaluation metrics 14 | """ 15 | logger = logging.getLogger('evaluation_logger') 16 | 17 | try: 18 | logger.info("Evaluating Vision Transformer model...") 19 | 20 | model.eval() 21 | all_preds = [] 22 | all_labels = [] 23 | 24 | logger.info("Starting evaluation of Vision Transformer model...") 25 | 26 | with torch.no_grad(): 27 | for batch_idx, batch in enumerate(dataloader): 28 | inputs, labels = batch 29 | inputs, labels = inputs.to(device), labels.to(device) 30 | outputs = model(inputs) 31 | _, preds = torch.max(outputs, 1) 32 | 33 | all_preds.append(preds.cpu().numpy()) 34 | all_labels.append(labels.cpu().numpy()) 35 | 36 | logger.debug(f"Processed batch {batch_idx + 1}/{len(dataloader)}") 37 | 38 | all_preds = np.concatenate(all_preds) 39 | all_labels = np.concatenate(all_labels) 40 | 41 | # To calculate evaluation metrics 42 | logger.info("Calculating evaluation metrics...") 43 | accuracy = accuracy_score(all_labels, all_preds) 44 | f1 = f1_score(all_labels, all_preds, average='weighted') 45 | precision = precision_score(all_labels, all_preds, average='weighted') 46 | recall = recall_score(all_labels, all_preds, average='weighted') 47 | report = classification_report(all_labels, all_preds) 48 | conf_matrix = confusion_matrix(all_labels, all_preds) 49 | 50 | logger.info(f"Vision Transformer Model Accuracy: {accuracy}") 51 | logger.info(f"Vision Transformer Model F1 Score: {f1}") 52 | logger.info(f"Vision Transformer Model Precision: {precision}") 53 | logger.info(f"Vision Transformer Model Recall: {recall}") 54 | logger.info(f"Classification Report:\n{report}") 55 | logger.info(f"Confusion Matrix:\n{conf_matrix}") 56 | 57 | report_path = 'path/to/report_dir/vision_transformer_classification_report.json' 58 | cm_path = 'path/to/report_dir/vision_transformer_confusion_matrix.csv' 59 | accuracy_path = 'path/to/report_dir/vision_transformer_accuracy.txt' 60 | 61 | pd.DataFrame(conf_matrix).to_csv(cm_path, index=False) 62 | with open(report_path, 'w') as f: 63 | f.write(report) 64 | with open(accuracy_path, 'w') as f: 65 | f.write(str(accuracy)) 66 | 67 | logger.info(f"Classification report saved to {report_path}") 68 | logger.info(f"Confusion matrix saved to {cm_path}") 69 | logger.info(f"Accuracy saved to {accuracy_path}") 70 | 71 | return { 72 | 'accuracy': accuracy, 73 | 'f1_score': f1, 74 | 'precision': precision, 75 | 'recall': recall, 76 | 'classification_report': report, 77 | 'confusion_matrix': conf_matrix 78 | } 79 | 80 | except Exception as e: 81 | logger.error(f"Error during Vision Transformer model evaluation: {e}", exc_info=True) 82 | raise 83 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .cnn import CNNModel 2 | from .transformer import TransformerModel 3 | from .svm import SVMModel 4 | from .bayesian import BayesianModel 5 | from .vision_transformer import VisionTransformer 6 | 7 | __all__ = [ 8 | 'CNNModel', 9 | 'TransformerModel', 10 | 'SVMModel', 11 | 'BayesianModel', 12 | 'VisionTransformer' 13 | ] 14 | -------------------------------------------------------------------------------- /src/models/bayesian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.base import BaseEstimator, ClassifierMixin 3 | import logging 4 | 5 | class BayesianModel(BaseEstimator, ClassifierMixin): 6 | def __init__(self, prior_mean=0, prior_std=1): 7 | """ 8 | Initializing Bayesian model with prior mean and standard deviation. 9 | :param prior_mean: Mean of the prior distribution 10 | :param prior_std: Standard deviation of the prior distribution 11 | """ 12 | self.prior_mean = prior_mean 13 | self.prior_std = prior_std 14 | self.mean_ = None 15 | self.std_ = None 16 | self.classes_ = None 17 | self.logger = logging.getLogger('bayesian_model_logger') 18 | self.logger.info("Initialized BayesianModel with prior_mean=%s, prior_std=%s", prior_mean, prior_std) 19 | 20 | def fit(self, X, y): 21 | """ 22 | To fit the Bayesian model to training data. 23 | :param X: Training data features 24 | :param y: Training data labels 25 | :return: Self 26 | """ 27 | self.logger.info("Fitting Bayesian model...") 28 | self.classes_, counts = np.unique(y, return_counts=True) 29 | self.mean_ = np.zeros((len(self.classes_), X.shape[1])) 30 | self.std_ = np.zeros((len(self.classes_), X.shape[1])) 31 | 32 | for idx, label in enumerate(self.classes_): 33 | X_class = X[y == label] 34 | self.mean_[idx, :] = X_class.mean(axis=0) 35 | self.std_[idx, :] = X_class.std(axis=0) 36 | 37 | self.logger.info("Model fitted with classes: %s", self.classes_) 38 | return self 39 | 40 | def predict_proba(self, X): 41 | """ 42 | Predicting class probabilities for X. 43 | :param X: Input data 44 | :return: Class probabilities 45 | """ 46 | self.logger.info("Predicting class probabilities...") 47 | log_prior = np.log(1.0 / len(self.classes_)) 48 | log_likelihood = -0.5 * np.sum(((X[:, np.newaxis, :] - self.mean_) / (self.std_ + 1e-9)) ** 2, axis=2) 49 | log_likelihood -= np.log(self.std_ + 1e-9).sum(axis=1) 50 | log_posterior = log_likelihood + log_prior 51 | log_posterior -= log_posterior.max(axis=1, keepdims=True) 52 | posterior = np.exp(log_posterior) 53 | posterior /= posterior.sum(axis=1, keepdims=True) 54 | return posterior 55 | 56 | def predict(self, X): 57 | """ 58 | Predicting class labels for X. 59 | :param X: Input data 60 | :return: Predicted class labels 61 | """ 62 | self.logger.info("Predicting class labels...") 63 | proba = self.predict_proba(X) 64 | predictions = self.classes_[np.argmax(proba, axis=1)] 65 | self.logger.info("Predictions: %s", predictions) 66 | return predictions 67 | 68 | def predict_log_proba(self, X): 69 | """ 70 | Predicting log-probabilities of the classes for input samples X. 71 | :param X: Input data 72 | :return: Log-probabilities of the classes 73 | """ 74 | self.logger.info("Predicting log-probabilities...") 75 | log_proba = np.log(self.predict_proba(X)) 76 | self.logger.info("Log-probabilities: %s", log_proba) 77 | return log_proba 78 | 79 | if __name__ == "__main__": 80 | logging.basicConfig(level=logging.INFO) 81 | logger = logging.getLogger('bayesian_model_logger') 82 | logger.info("Testing BayesianModel...") 83 | 84 | X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]]) 85 | y_train = np.array([0, 0, 1, 1]) 86 | 87 | model = BayesianModel(prior_mean=0, prior_std=1) 88 | model.fit(X_train, y_train) 89 | 90 | X_test = np.array([[1.5, 2.5], [3.5, 4.5]]) 91 | predictions = model.predict(X_test) 92 | logger.info("Test predictions: %s", predictions) 93 | -------------------------------------------------------------------------------- /src/models/cnn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.models import Sequential 3 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization 4 | from tensorflow.keras.optimizers import Adam 5 | import logging 6 | 7 | class CNNModel: 8 | @staticmethod 9 | def build(input_shape, num_classes): 10 | """ 11 | Building Convolutional Neural Network (CNN) model. 12 | :param input_shape: Shape of the input data (height, width, channels) 13 | :param num_classes: Number of classes for the output layer 14 | :return: Compiled CNN model 15 | """ 16 | logger = logging.getLogger('cnn_model_logger') 17 | logger.info(f"Building CNN model with input shape {input_shape} and {num_classes} output classes.") 18 | 19 | try: 20 | model = Sequential() 21 | 22 | # Convolutional Layer 1 23 | model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape)) 24 | model.add(BatchNormalization()) 25 | model.add(MaxPooling2D(pool_size=(2, 2))) 26 | logger.info("Added first convolutional layer.") 27 | 28 | # Convolutional Layer 2 29 | model.add(Conv2D(64, (3, 3), activation='relu')) 30 | model.add(BatchNormalization()) 31 | model.add(MaxPooling2D(pool_size=(2, 2))) 32 | logger.info("Added second convolutional layer.") 33 | 34 | # Convolutional Layer 3 35 | model.add(Conv2D(128, (3, 3), activation='relu')) 36 | model.add(BatchNormalization()) 37 | model.add(MaxPooling2D(pool_size=(2, 2))) 38 | logger.info("Added third convolutional layer.") 39 | 40 | # Flattening Layer 41 | model.add(Flatten()) 42 | logger.info("Added flattening layer.") 43 | 44 | # Fully Connected Layer 1 45 | model.add(Dense(256, activation='relu')) 46 | model.add(Dropout(0.5)) 47 | logger.info("Added first fully connected layer with dropout.") 48 | 49 | # Fully Connected Layer 2 50 | model.add(Dense(128, activation='relu')) 51 | model.add(Dropout(0.5)) 52 | logger.info("Added second fully connected layer with dropout.") 53 | 54 | # Output Layer 55 | model.add(Dense(num_classes, activation='softmax')) 56 | logger.info("Added output layer.") 57 | 58 | optimizer = Adam(learning_rate=0.001) 59 | model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) 60 | logger.info("Compiled the CNN model.") 61 | 62 | return model 63 | except Exception as e: 64 | logger.error(f"Error building CNN model: {e}", exc_info=True) 65 | raise 66 | 67 | if __name__ == "__main__": 68 | logging.basicConfig(level=logging.INFO) 69 | logger = logging.getLogger('cnn_model_logger') 70 | logger.info("Starting to build the CNN model for testing purposes.") 71 | 72 | # Usage for testing 73 | input_shape = (64, 64, 3) 74 | num_classes = 2 75 | model = CNNModel.build(input_shape, num_classes) 76 | 77 | logger.info("CNN model built successfully.") 78 | -------------------------------------------------------------------------------- /src/models/svm.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC 2 | from sklearn.preprocessing import StandardScaler 3 | from sklearn.pipeline import Pipeline 4 | import joblib 5 | import logging 6 | 7 | class SVMModel: 8 | @staticmethod 9 | def build(kernel='linear', C=1.0): 10 | """ 11 | Building Support Vector Machine (SVM) model. 12 | :param kernel: Specifies the kernel type to be used in the algorithm 13 | :param C: Regularization parameter 14 | :return: SVM model pipeline 15 | """ 16 | logger = logging.getLogger('svm_model_logger') 17 | logger.info(f"Building SVM model with kernel={kernel}, C={C}.") 18 | 19 | try: 20 | pipeline = Pipeline([ 21 | ('scaler', StandardScaler()), 22 | ('svm', SVC(kernel=kernel, C=C, probability=True)) 23 | ]) 24 | logger.info("SVM model built successfully.") 25 | return pipeline 26 | except Exception as e: 27 | logger.error(f"Error building SVM model: {e}", exc_info=True) 28 | raise 29 | 30 | @staticmethod 31 | def save(model, model_path): 32 | """ 33 | Saving SVM model to file. 34 | :param model: Trained SVM model 35 | :param model_path: Path to save the model 36 | """ 37 | logger = logging.getLogger('svm_model_logger') 38 | logger.info(f"Saving SVM model to {model_path}.") 39 | 40 | try: 41 | joblib.dump(model, model_path) 42 | logger.info("SVM model saved successfully.") 43 | except Exception as e: 44 | logger.error(f"Error saving SVM model: {e}", exc_info=True) 45 | raise 46 | 47 | @staticmethod 48 | def load(model_path): 49 | """ 50 | Loading SVM model from file. 51 | :param model_path: Path to load the model from 52 | :return: Loaded SVM model 53 | """ 54 | logger = logging.getLogger('svm_model_logger') 55 | logger.info(f"Loading SVM model from {model_path}.") 56 | 57 | try: 58 | model = joblib.load(model_path) 59 | logger.info("SVM model loaded successfully.") 60 | return model 61 | except Exception as e: 62 | logger.error(f"Error loading SVM model: {e}", exc_info=True) 63 | raise 64 | 65 | if __name__ == "__main__": 66 | logging.basicConfig(level=logging.INFO) 67 | logger = logging.getLogger('svm_model_logger') 68 | logger.info("Starting to build, save, and load SVM model for testing purposes.") 69 | 70 | model = SVMModel.build(kernel='rbf', C=1.0) 71 | model_path = 'svm_model_test.pkl' 72 | SVMModel.save(model, model_path) 73 | loaded_model = SVMModel.load(model_path) 74 | 75 | logger.info("SVM model build, save, and load process completed successfully.") 76 | -------------------------------------------------------------------------------- /src/models/transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import math 4 | import logging 5 | 6 | class TransformerModel(nn.Module): 7 | def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1): 8 | """ 9 | Initializing Transformer model. 10 | :param input_dim: Dimension of the input features 11 | :param model_dim: Dimension of the transformer model 12 | :param num_heads: Number of attention heads 13 | :param num_layers: Number of transformer layers 14 | :param output_dim: Dimension of the output (number of classes) 15 | :param dropout: Dropout rate 16 | """ 17 | super(TransformerModel, self).__init__() 18 | self.logger = logging.getLogger('transformer_model_logger') 19 | self.logger.info(f"Initializing Transformer model with input_dim={input_dim}, model_dim={model_dim}, num_heads={num_heads}, num_layers={num_layers}, output_dim={output_dim}, dropout={dropout}.") 20 | 21 | try: 22 | self.embedding = nn.Linear(input_dim, model_dim) 23 | self.positional_encoding = PositionalEncoding(model_dim, dropout) 24 | encoder_layers = nn.TransformerEncoderLayer(model_dim, num_heads, dim_feedforward=model_dim * 4, dropout=dropout) 25 | self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers) 26 | self.decoder = nn.Linear(model_dim, output_dim) 27 | self.logger.info("Transformer model initialized successfully.") 28 | except Exception as e: 29 | self.logger.error(f"Error initializing Transformer model: {e}", exc_info=True) 30 | raise 31 | 32 | def forward(self, src): 33 | """ 34 | To forward pass of transformer model. 35 | :param src: Input tensor 36 | :return: Output tensor 37 | """ 38 | self.logger.info(f"Performing forward pass with input tensor of shape {src.shape}.") 39 | 40 | try: 41 | src = self.embedding(src) * math.sqrt(src.size(1)) 42 | src = self.positional_encoding(src) 43 | output = self.transformer_encoder(src) 44 | output = self.decoder(output.mean(dim=1)) 45 | self.logger.info(f"Forward pass completed with output tensor of shape {output.shape}.") 46 | return output 47 | except Exception as e: 48 | self.logger.error(f"Error during forward pass: {e}", exc_info=True) 49 | raise 50 | 51 | class PositionalEncoding(nn.Module): 52 | def __init__(self, d_model, dropout=0.1, max_len=5000): 53 | """ 54 | Initializing Positional Encoding. 55 | :param d_model: Dimension of the model 56 | :param dropout: Dropout rate 57 | :param max_len: Maximum length of the input sequences 58 | """ 59 | super(PositionalEncoding, self).__init__() 60 | self.logger = logging.getLogger('positional_encoding_logger') 61 | self.logger.info(f"Initializing Positional Encoding with d_model={d_model}, dropout={dropout}, max_len={max_len}.") 62 | 63 | try: 64 | self.dropout = nn.Dropout(p=dropout) 65 | 66 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 67 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 68 | pe = torch.zeros(max_len, d_model) 69 | pe[:, 0::2] = torch.sin(position * div_term) 70 | pe[:, 1::2] = torch.cos(position * div_term) 71 | pe = pe.unsqueeze(0).transpose(0, 1) 72 | self.register_buffer('pe', pe) 73 | self.logger.info("Positional Encoding initialized successfully.") 74 | except Exception as e: 75 | self.logger.error(f"Error initializing Positional Encoding: {e}", exc_info=True) 76 | raise 77 | 78 | def forward(self, x): 79 | """ 80 | To forward pass of positional encoding. 81 | :param x: Input tensor 82 | :return: Tensor with positional encoding added 83 | """ 84 | self.logger.info(f"Performing forward pass with input tensor of shape {x.shape}.") 85 | 86 | try: 87 | x = x + self.pe[:x.size(0), :] 88 | x = self.dropout(x) 89 | self.logger.info(f"Forward pass of Positional Encoding completed with output tensor of shape {x.shape}.") 90 | return x 91 | except Exception as e: 92 | self.logger.error(f"Error during forward pass of Positional Encoding: {e}", exc_info=True) 93 | raise 94 | 95 | if __name__ == "__main__": 96 | logging.basicConfig(level=logging.INFO) 97 | logger = logging.getLogger('transformer_model_logger') 98 | logger.info("Starting to initialize and build the Transformer model for testing purposes.") 99 | 100 | input_dim = 512 101 | model_dim = 512 102 | num_heads = 8 103 | num_layers = 6 104 | output_dim = 2 105 | model = TransformerModel(input_dim, model_dim, num_heads, num_layers, output_dim) 106 | 107 | logger.info("Transformer model initialized and built successfully.") 108 | -------------------------------------------------------------------------------- /src/nlp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import nltk 4 | import spacy 5 | import pandas as pd 6 | from nltk.tokenize import word_tokenize 7 | from nltk.corpus import stopwords 8 | from nltk.stem import WordNetLemmatizer 9 | from spacy.lang.en import English 10 | 11 | from src.config import Config 12 | from src.utils.file_utils import save_to_file, read_from_file 13 | 14 | nltk.download('punkt') 15 | nltk.download('stopwords') 16 | nltk.download('wordnet') 17 | 18 | nlp = spacy.load("en_core_web_sm") 19 | 20 | class NLPProcessor: 21 | def __init__(self): 22 | self.stop_words = set(stopwords.words('english')) 23 | self.lemmatizer = WordNetLemmatizer() 24 | self.tokenizer = English().Defaults.create_tokenizer(nlp) 25 | 26 | def clean_text(self, text): 27 | """ 28 | Cleaning input text by removing non-alphabetic characters and lowercasing. 29 | :param text: The input text 30 | :return: Cleaned text 31 | """ 32 | text = re.sub(r'[^a-zA-Z]', ' ', text) 33 | text = text.lower() 34 | text = text.strip() 35 | return text 36 | 37 | def tokenize_text(self, text): 38 | """ 39 | To tokenize input text. 40 | :param text: The input text 41 | :return: List of tokens 42 | """ 43 | tokens = word_tokenize(text) 44 | return tokens 45 | 46 | def remove_stopwords(self, tokens): 47 | """ 48 | Removing stopwords from token list. 49 | :param tokens: List of tokens 50 | :return: List of tokens without stopwords 51 | """ 52 | filtered_tokens = [token for token in tokens if token not in self.stop_words] 53 | return filtered_tokens 54 | 55 | def lemmatize_tokens(self, tokens): 56 | """ 57 | To lemmatize input tokens. 58 | :param tokens: List of tokens 59 | :return: List of lemmatized tokens 60 | """ 61 | lemmatized_tokens = [self.lemmatizer.lemmatize(token) for token in tokens] 62 | return lemmatized_tokens 63 | 64 | def process_text(self, text): 65 | """ 66 | Processing input text by cleaning, tokenizing, removing stopwords, and lemmatizing. 67 | :param text: The input text 68 | :return: Processed text 69 | """ 70 | cleaned_text = self.clean_text(text) 71 | tokens = self.tokenize_text(cleaned_text) 72 | tokens = self.remove_stopwords(tokens) 73 | lemmatized_tokens = self.lemmatize_tokens(tokens) 74 | return ' '.join(lemmatized_tokens) 75 | 76 | def spacy_tokenize(self, text): 77 | """ 78 | Tokenizing the input text using Spacy. 79 | :param text: The input text 80 | :return: List of tokens 81 | """ 82 | doc = nlp(text) 83 | return [token.text for token in doc] 84 | 85 | def spacy_lemmatize(self, tokens): 86 | """ 87 | To lemmatize input tokens using Spacy. 88 | :param tokens: List of tokens 89 | :return: List of lemmatized tokens 90 | """ 91 | doc = nlp(' '.join(tokens)) 92 | return [token.lemma_ for token in doc] 93 | 94 | def spacy_remove_stopwords(self, tokens): 95 | """ 96 | Removing stopwords from the token list using Spacy. 97 | :param tokens: List of tokens 98 | :return: List of tokens without stopwords 99 | """ 100 | return [token for token in tokens if not nlp.vocab[token].is_stop] 101 | 102 | if __name__ == "__main__": 103 | nlp_processor = NLPProcessor() 104 | 105 | example_text = "This is an example sentence to demonstrate the NLP processing capabilities." 106 | 107 | processed_text = nlp_processor.process_text(example_text) 108 | print(f"Processed text (NLTK): {processed_text}") 109 | 110 | tokens = nlp_processor.spacy_tokenize(example_text) 111 | tokens = nlp_processor.spacy_remove_stopwords(tokens) 112 | lemmatized_tokens = nlp_processor.spacy_lemmatize(tokens) 113 | print(f"Processed text (Spacy): {' '.join(lemmatized_tokens)}") 114 | 115 | save_to_file(processed_text, os.path.join(Config.PROCESSED_DATA_DIR, 'processed_text.txt')) 116 | print(f"Processed text saved to {Config.PROCESSED_DATA_DIR}/processed_text.txt") 117 | -------------------------------------------------------------------------------- /src/processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .audio_processing import process_audio 2 | from .video_processing import process_video 3 | from .image_processing import process_image 4 | from .text_processing import process_text 5 | 6 | __all__ = [ 7 | 'process_audio', 8 | 'process_video', 9 | 'process_image', 10 | 'process_text' 11 | ] 12 | -------------------------------------------------------------------------------- /src/processing/audio_processing.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import numpy as np 3 | import logging 4 | 5 | def load_audio(file_path): 6 | """ 7 | Loading audio file. 8 | :param file_path: Path to the audio file 9 | :return: Audio time series and sampling rate 10 | """ 11 | try: 12 | y, sr = librosa.load(file_path, sr=None) 13 | logging.info(f"Audio file loaded: {file_path}") 14 | return y, sr 15 | except Exception as e: 16 | logging.error(f"Error loading audio file {file_path}: {e}") 17 | raise 18 | 19 | def extract_mfcc(y, sr, n_mfcc=13): 20 | """ 21 | Extracting MFCC features from audio time series. 22 | :param y: Audio time series 23 | :param sr: Sampling rate of the audio 24 | :param n_mfcc: Number of MFCCs to return 25 | :return: Mean MFCC features 26 | """ 27 | try: 28 | mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc) 29 | mfcc_mean = np.mean(mfcc.T, axis=0) 30 | logging.info("MFCC features extracted") 31 | return mfcc_mean 32 | except Exception as e: 33 | logging.error(f"Error extracting MFCC features: {e}") 34 | raise 35 | 36 | def extract_chroma(y, sr): 37 | """ 38 | Extracting chroma features from audio time series. 39 | :param y: Audio time series 40 | :param sr: Sampling rate of the audio 41 | :return: Mean chroma features 42 | """ 43 | try: 44 | chroma = librosa.feature.chroma_stft(y=y, sr=sr) 45 | chroma_mean = np.mean(chroma.T, axis=0) 46 | logging.info("Chroma features extracted") 47 | return chroma_mean 48 | except Exception as e: 49 | logging.error(f"Error extracting chroma features: {e}") 50 | raise 51 | 52 | def extract_spectral_contrast(y, sr): 53 | """ 54 | Extracting spectral contrast features from audio time series. 55 | :param y: Audio time series 56 | :param sr: Sampling rate of the audio 57 | :return: Mean spectral contrast features 58 | """ 59 | try: 60 | spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) 61 | spectral_contrast_mean = np.mean(spectral_contrast.T, axis=0) 62 | logging.info("Spectral contrast features extracted") 63 | return spectral_contrast_mean 64 | except Exception as e: 65 | logging.error(f"Error extracting spectral contrast features: {e}") 66 | raise 67 | 68 | def process_audio(file_path): 69 | """ 70 | Processing an audio file and extracting features. 71 | :param file_path: Path to the audio file 72 | :return: Extracted audio features 73 | """ 74 | try: 75 | y, sr = load_audio(file_path) 76 | mfcc_features = extract_mfcc(y, sr) 77 | chroma_features = extract_chroma(y, sr) 78 | spectral_contrast_features = extract_spectral_contrast(y, sr) 79 | 80 | audio_features = np.hstack([mfcc_features, chroma_features, spectral_contrast_features]) 81 | logging.info(f"Extracted features from audio file: {file_path}") 82 | 83 | return audio_features 84 | except Exception as e: 85 | logging.error(f"Error processing audio file {file_path}: {e}") 86 | raise 87 | 88 | if __name__ == "__main__": 89 | import sys 90 | if len(sys.argv) != 2: 91 | print("Usage: python audio_processing.py ") 92 | sys.exit(1) 93 | 94 | file_path = sys.argv[1] 95 | features = process_audio(file_path) 96 | print("Extracted Features:\n", features) 97 | -------------------------------------------------------------------------------- /src/processing/image_processing.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import logging 4 | from skimage.feature import hog 5 | 6 | def load_image(file_path): 7 | """ 8 | Loading image file. 9 | :param file_path: Path to the image file 10 | :return: Loaded image 11 | """ 12 | try: 13 | image = cv2.imread(file_path, cv2.IMREAD_COLOR) 14 | if image is None: 15 | raise ValueError(f"Error loading image: {file_path}") 16 | logging.info(f"Image file loaded: {file_path}") 17 | return image 18 | except Exception as e: 19 | logging.error(f"Error loading image file {file_path}: {e}") 20 | raise 21 | 22 | def preprocess_image(image, target_size=(64, 64)): 23 | """ 24 | Preprocessing input image by resizing and converting to grayscale. 25 | :param image: Loaded image 26 | :param target_size: Target size for resizing 27 | :return: Preprocessed image 28 | """ 29 | try: 30 | resized_image = cv2.resize(image, target_size) 31 | gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY) 32 | logging.info("Image preprocessed") 33 | return gray_image 34 | except Exception as e: 35 | logging.error(f"Error preprocessing image: {e}") 36 | raise 37 | 38 | def extract_hog_features(image): 39 | """ 40 | Extracting Histogram of Oriented Gradients features from an image. 41 | :param image: Preprocessed image 42 | :return: HOG features 43 | """ 44 | try: 45 | hog_features, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True) 46 | logging.info("HOG features extracted") 47 | return hog_features 48 | except Exception as e: 49 | logging.error(f"Error extracting HOG features: {e}") 50 | raise 51 | 52 | def process_image(file_path): 53 | """ 54 | Processing an image file and extracting features. 55 | :param file_path: Path to the image file 56 | :return: Extracted image features 57 | """ 58 | try: 59 | image = load_image(file_path) 60 | preprocessed_image = preprocess_image(image) 61 | hog_features = extract_hog_features(preprocessed_image) 62 | 63 | logging.info(f"Extracted features from image file: {file_path}") 64 | 65 | return hog_features 66 | except Exception as e: 67 | logging.error(f"Error processing image file {file_path}: {e}") 68 | raise 69 | 70 | if __name__ == "__main__": 71 | import sys 72 | if len(sys.argv) != 2: 73 | print("Usage: python image_processing.py ") 74 | sys.exit(1) 75 | 76 | file_path = sys.argv[1] 77 | features = process_image(file_path) 78 | print("Extracted Features:\n", features) 79 | -------------------------------------------------------------------------------- /src/processing/text_processing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import nltk 3 | from nltk.tokenize import word_tokenize 4 | from nltk.corpus import stopwords 5 | from sklearn.feature_extraction.text import TfidfVectorizer 6 | import string 7 | import numpy as np 8 | 9 | # To download required NLTK data 10 | nltk.download('punkt') 11 | nltk.download('stopwords') 12 | 13 | def clean_text(text): 14 | """ 15 | Clean the input text by removing punctuation and stopwords. 16 | :param text: Raw text 17 | :return: Cleaned text 18 | """ 19 | try: 20 | text = text.lower() 21 | text = text.translate(str.maketrans('', '', string.punctuation)) 22 | tokens = word_tokenize(text) 23 | stop_words = set(stopwords.words('english')) 24 | cleaned_tokens = [token for token in tokens if token not in stop_words] 25 | cleaned_text = ' '.join(cleaned_tokens) 26 | logging.info("Text cleaned") 27 | return cleaned_text 28 | except Exception as e: 29 | logging.error(f"Error cleaning text: {e}") 30 | raise 31 | 32 | def extract_tfidf_features(text, max_features=100): 33 | """ 34 | Extracting TF-IDF features from text. 35 | :param text: Cleaned text 36 | :param max_features: Maximum number of features to extract 37 | :return: TF-IDF features 38 | """ 39 | try: 40 | vectorizer = TfidfVectorizer(max_features=max_features) 41 | tfidf_matrix = vectorizer.fit_transform([text]) 42 | tfidf_features = tfidf_matrix.toarray().flatten() 43 | logging.info("TF-IDF features extracted") 44 | return tfidf_features 45 | except Exception as e: 46 | logging.error(f"Error extracting TF-IDF features: {e}") 47 | raise 48 | 49 | def process_text(text): 50 | """ 51 | Processing text input and extracting features. 52 | :param text: Raw text input 53 | :return: Extracted text features 54 | """ 55 | try: 56 | cleaned_text = clean_text(text) 57 | tfidf_features = extract_tfidf_features(cleaned_text) 58 | 59 | logging.info("Extracted features from text input") 60 | 61 | return tfidf_features 62 | except Exception as e: 63 | logging.error(f"Error processing text: {e}") 64 | raise 65 | 66 | if __name__ == "__main__": 67 | import sys 68 | if len(sys.argv) != 2: 69 | print("Usage: python text_processing.py ") 70 | sys.exit(1) 71 | 72 | text_input = sys.argv[1] 73 | features = process_text(text_input) 74 | print("Extracted Features:\n", features) 75 | -------------------------------------------------------------------------------- /src/processing/video_processing.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import logging 4 | 5 | def load_video(file_path): 6 | """ 7 | Loading video file. 8 | :param file_path: Path to the video file 9 | :return: Video capture object 10 | """ 11 | try: 12 | cap = cv2.VideoCapture(file_path) 13 | if not cap.isOpened(): 14 | raise ValueError(f"Error opening video file: {file_path}") 15 | logging.info(f"Video file loaded: {file_path}") 16 | return cap 17 | except Exception as e: 18 | logging.error(f"Error loading video file {file_path}: {e}") 19 | raise 20 | 21 | def extract_frames(cap, frame_rate=1): 22 | """ 23 | Extracting frames from video file at a specified frame rate. 24 | :param cap: Video capture object 25 | :param frame_rate: Frame rate to extract frames (frames per second) 26 | :return: List of extracted frames 27 | """ 28 | try: 29 | frames = [] 30 | frame_count = 0 31 | while True: 32 | ret, frame = cap.read() 33 | if not ret: 34 | break 35 | if frame_count % frame_rate == 0: 36 | frames.append(frame) 37 | frame_count += 1 38 | logging.info(f"Extracted {len(frames)} frames from video") 39 | return frames 40 | except Exception as e: 41 | logging.error(f"Error extracting frames: {e}") 42 | raise 43 | 44 | def preprocess_frame(frame, target_size=(64, 64)): 45 | """ 46 | Preprocessing single video frame by resizing and converting to grayscale. 47 | :param frame: Video frame 48 | :param target_size: Target size for resizing 49 | :return: Preprocessed frame 50 | """ 51 | try: 52 | resized_frame = cv2.resize(frame, target_size) 53 | gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY) 54 | logging.info("Frame preprocessed") 55 | return gray_frame 56 | except Exception as e: 57 | logging.error(f"Error preprocessing frame: {e}") 58 | raise 59 | 60 | def extract_video_features(frames): 61 | """ 62 | Extracting features from video frames. 63 | :param frames: List of preprocessed video frames 64 | :return: Extracted video features 65 | """ 66 | try: 67 | features = [frame.flatten() for frame in frames] 68 | video_features = np.mean(features, axis=0) 69 | logging.info("Video features extracted") 70 | return video_features 71 | except Exception as e: 72 | logging.error(f"Error extracting video features: {e}") 73 | raise 74 | 75 | def process_video(file_path): 76 | """ 77 | Processing video file and extract features. 78 | :param file_path: Path to the video file 79 | :return: Extracted video features 80 | """ 81 | try: 82 | cap = load_video(file_path) 83 | frames = extract_frames(cap, frame_rate=10) 84 | cap.release() 85 | 86 | preprocessed_frames = [preprocess_frame(frame) for frame in frames] 87 | video_features = extract_video_features(preprocessed_frames) 88 | 89 | logging.info(f"Extracted features from video file: {file_path}") 90 | 91 | return video_features 92 | except Exception as e: 93 | logging.error(f"Error processing video file {file_path}: {e}") 94 | raise 95 | 96 | if __name__ == "__main__": 97 | import sys 98 | if len(sys.argv) != 2: 99 | print("Usage: python video_processing.py ") 100 | sys.exit(1) 101 | 102 | file_path = sys.argv[1] 103 | features = process_video(file_path) 104 | print("Extracted Features:\n", features) 105 | -------------------------------------------------------------------------------- /src/training/__init__.py: -------------------------------------------------------------------------------- 1 | from .cnn_training import train_cnn 2 | from .transformer_training import train_transformer 3 | from .svm_training import train_svm 4 | from .bayesian_training import train_bayesian 5 | from .vision_transformer_training import train_vision_transformer 6 | 7 | __all__ = [ 8 | 'train_cnn', 9 | 'train_transformer', 10 | 'train_svm', 11 | 'train_bayesian', 12 | 'train_vision_transformer' 13 | ] 14 | -------------------------------------------------------------------------------- /src/training/bayesian_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import joblib 3 | import pandas as pd 4 | from sklearn.preprocessing import StandardScaler 5 | from src.config import config 6 | from src.dataset.data_loader import load_csv_data 7 | from src.dataset.data_splitter import split_data 8 | from src.models.bayesian import BayesianModel 9 | from src.utils.helpers import create_directory 10 | from src.utils.logger import setup_logger 11 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix 12 | 13 | def preprocess_data(data: pd.DataFrame): 14 | """ 15 | Preprocessing the input data by standardizing numerical features. 16 | :param data: DataFrame containing the input data 17 | :return: Preprocessed DataFrame 18 | """ 19 | scaler = StandardScaler() 20 | scaled_data = scaler.fit_transform(data) 21 | return pd.DataFrame(scaled_data, columns=data.columns) 22 | 23 | def train_bayesian(): 24 | logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'bayesian_training.log')) 25 | logger.info("Starting Bayesian model training...") 26 | 27 | create_directory(config.MODEL_DIR) 28 | 29 | data = load_csv_data(config.PROCESSED_DATA_FILE) 30 | X = data.drop('label', axis=1) 31 | y = data['label'] 32 | X = preprocess_data(X) 33 | 34 | # To split data into training and validation sets 35 | X_train, X_val, y_train, y_val = split_data(X, y) 36 | 37 | model = BayesianModel(prior_mean=config.BAYESIAN_PARAMS['prior_mean'], prior_std=config.BAYESIAN_PARAMS['prior_std']) 38 | model.fit(X_train.values, y_train.values) 39 | 40 | model_path = os.path.join(config.MODEL_DIR, 'bayesian_model.pkl') 41 | joblib.dump(model, model_path) 42 | logger.info(f"Bayesian model saved at {model_path}") 43 | 44 | y_pred = model.predict(X_val.values) 45 | metrics = calculate_metrics(y_val, y_pred) 46 | log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'bayesian_metrics.log')) 47 | plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='bayesian_confusion_matrix.png') 48 | 49 | logger.info("Bayesian model training and evaluation completed.") 50 | 51 | if __name__ == "__main__": 52 | train_bayesian() 53 | -------------------------------------------------------------------------------- /src/training/cnn_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from tensorflow.keras.models import Sequential 4 | from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout 5 | from tensorflow.keras.optimizers import Adam 6 | from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping 7 | from sklearn.preprocessing import StandardScaler 8 | from sklearn.model_selection import train_test_split 9 | from src.config import config 10 | from src.dataset.data_loader import load_csv_data 11 | from src.utils.helpers import create_directory 12 | from src.utils.logger import setup_logger 13 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics 14 | 15 | def preprocess_data(data: pd.DataFrame): 16 | """ 17 | Preprocessing input data by standardizing numerical features and reshaping. 18 | :param data: DataFrame containing the input data 19 | :return: Preprocessed DataFrame 20 | """ 21 | scaler = StandardScaler() 22 | X = data.drop('label', axis=1) 23 | X_scaled = scaler.fit_transform(X) 24 | y = data['label'] 25 | return X_scaled, y 26 | 27 | def reshape_data(X, img_width, img_height): 28 | """ 29 | Reshaping data into format required by the CNN. 30 | :param X: Input data 31 | :param img_width: Width of the image 32 | :param img_height: Height of the image 33 | :return: Reshaped data 34 | """ 35 | return X.reshape(-1, img_width, img_height, 1) 36 | 37 | def create_cnn_model(input_shape): 38 | """ 39 | Creating CNN model. 40 | :param input_shape: Shape of the input data 41 | :return: CNN model 42 | """ 43 | model = Sequential([ 44 | Conv2D(32, (3, 3), activation='relu', input_shape=input_shape), 45 | MaxPooling2D(pool_size=(2, 2)), 46 | Conv2D(64, (3, 3), activation='relu'), 47 | MaxPooling2D(pool_size=(2, 2)), 48 | Flatten(), 49 | Dense(128, activation='relu'), 50 | Dropout(0.5), 51 | Dense(1, activation='sigmoid') 52 | ]) 53 | return model 54 | 55 | def train_cnn(): 56 | logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'cnn_training.log')) 57 | logger.info("Starting CNN model training...") 58 | 59 | create_directory(config.MODEL_DIR) 60 | 61 | data = load_csv_data(config.PROCESSED_DATA_FILE) 62 | X, y = preprocess_data(data) 63 | X = reshape_data(X, img_width=64, img_height=64) 64 | 65 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED) 66 | 67 | model = create_cnn_model(input_shape=(64, 64, 1)) 68 | optimizer = Adam(learning_rate=config.CNN_PARAMS['learning_rate']) 69 | model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) 70 | 71 | # To setup callbacks 72 | checkpoint = ModelCheckpoint(os.path.join(config.MODEL_DIR, 'cnn_model.h5'), monitor='val_loss', save_best_only=True, mode='min') 73 | early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) 74 | 75 | history = model.fit(X_train, y_train, epochs=config.CNN_PARAMS['epochs'], batch_size=config.CNN_PARAMS['batch_size'], validation_data=(X_val, y_val), callbacks=[checkpoint, early_stopping]) 76 | 77 | y_pred = (model.predict(X_val) > 0.5).astype("int32") 78 | metrics = calculate_metrics(y_val, y_pred) 79 | log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'cnn_metrics.log')) 80 | plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='cnn_confusion_matrix.png') 81 | plot_metrics(history, metric='accuracy') 82 | plot_metrics(history, metric='loss') 83 | 84 | logger.info("CNN model training and evaluation completed.") 85 | 86 | if __name__ == "__main__": 87 | train_cnn() 88 | -------------------------------------------------------------------------------- /src/training/svm_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import joblib 3 | import pandas as pd 4 | from sklearn.svm import SVC 5 | from sklearn.preprocessing import StandardScaler 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.pipeline import Pipeline 8 | from src.config import config 9 | from src.dataset.data_loader import load_csv_data 10 | from src.utils.helpers import create_directory 11 | from src.utils.logger import setup_logger 12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix 13 | 14 | def preprocess_data(data: pd.DataFrame): 15 | """ 16 | Preprocessing the input data by standardizing numerical features. 17 | :param data: DataFrame containing the input data 18 | :return: Preprocessed features and labels 19 | """ 20 | X = data.drop('label', axis=1) 21 | y = data['label'] 22 | return X, y 23 | 24 | def train_svm(): 25 | logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'svm_training.log')) 26 | logger.info("Starting SVM model training...") 27 | 28 | create_directory(config.MODEL_DIR) 29 | 30 | data = load_csv_data(config.PROCESSED_DATA_FILE) 31 | X, y = preprocess_data(data) 32 | 33 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED) 34 | 35 | # To create pipeline with scaler and SVM 36 | pipeline = Pipeline([ 37 | ('scaler', StandardScaler()), 38 | ('svm', SVC(kernel=config.SVM_PARAMS['kernel'], C=config.SVM_PARAMS['C'], probability=True)) 39 | ]) 40 | 41 | pipeline.fit(X_train, y_train) 42 | 43 | model_path = os.path.join(config.MODEL_DIR, 'svm_model.pkl') 44 | joblib.dump(pipeline, model_path) 45 | logger.info(f"SVM model saved at {model_path}") 46 | 47 | y_pred = pipeline.predict(X_val) 48 | y_pred_proba = pipeline.predict_proba(X_val)[:, 1] 49 | metrics = calculate_metrics(y_val, y_pred) 50 | log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'svm_metrics.log')) 51 | plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='svm_confusion_matrix.png') 52 | 53 | logger.info("SVM model training and evaluation completed.") 54 | 55 | if __name__ == "__main__": 56 | train_svm() 57 | -------------------------------------------------------------------------------- /src/training/transformer_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch import nn 4 | from torch.optim import Adam 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | from src.config import config 8 | from src.dataset.data_loader import load_csv_data 9 | from src.models.transformer import TransformerModel 10 | from src.utils.helpers import create_directory 11 | from src.utils.logger import setup_logger 12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics 13 | 14 | def preprocess_data(data: pd.DataFrame): 15 | """ 16 | Preprocessing the input data by converting it to tensors and normalizing. 17 | :param data: DataFrame containing the input data 18 | :return: Preprocessed tensors for features and labels 19 | """ 20 | X = data.drop('label', axis=1).values 21 | y = data['label'].values 22 | X = torch.tensor(X, dtype=torch.float32) 23 | y = torch.tensor(y, dtype=torch.long) 24 | return X, y 25 | 26 | def train_transformer(): 27 | logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'transformer_training.log')) 28 | logger.info("Starting Transformer model training...") 29 | 30 | create_directory(config.MODEL_DIR) 31 | 32 | data = load_csv_data(config.PROCESSED_DATA_FILE) 33 | X, y = preprocess_data(data) 34 | 35 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED) 36 | 37 | model = TransformerModel( 38 | input_dim=X_train.shape[1], 39 | model_dim=config.TRANSFORMER_PARAMS['model_dim'], 40 | num_heads=config.TRANSFORMER_PARAMS['num_heads'], 41 | num_layers=config.TRANSFORMER_PARAMS['num_layers'], 42 | output_dim=2 43 | ) 44 | 45 | optimizer = Adam(model.parameters(), lr=config.TRANSFORMER_PARAMS['learning_rate']) 46 | criterion = nn.CrossEntropyLoss() 47 | 48 | # Training loop 49 | for epoch in range(config.TRANSFORMER_PARAMS['epochs']): 50 | model.train() 51 | optimizer.zero_grad() 52 | outputs = model(X_train) 53 | loss = criterion(outputs, y_train) 54 | loss.backward() 55 | optimizer.step() 56 | logger.info(f"Epoch [{epoch+1}/{config.TRANSFORMER_PARAMS['epochs']}], Loss: {loss.item()}") 57 | 58 | model_path = os.path.join(config.MODEL_DIR, 'transformer_model.pth') 59 | torch.save(model.state_dict(), model_path) 60 | logger.info(f"Transformer model saved at {model_path}") 61 | 62 | model.eval() 63 | with torch.no_grad(): 64 | outputs = model(X_val) 65 | _, y_pred = torch.max(outputs, 1) 66 | metrics = calculate_metrics(y_val.numpy(), y_pred.numpy()) 67 | log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'transformer_metrics.log')) 68 | plot_confusion_matrix(y_val.numpy(), y_pred.numpy(), labels=[0, 1], output_dir=config.LOG_DIR, filename='transformer_confusion_matrix.png') 69 | 70 | logger.info("Transformer model training and evaluation completed.") 71 | 72 | if __name__ == "__main__": 73 | train_transformer() 74 | -------------------------------------------------------------------------------- /src/training/vision_transformer_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch import nn 4 | from torch.optim import Adam 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | from src.config import config 8 | from src.dataset.data_loader import load_csv_data 9 | from src.models.vision_transformer import VisionTransformer 10 | from src.utils.helpers import create_directory 11 | from src.utils.logger import setup_logger 12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics 13 | 14 | def preprocess_data(data: pd.DataFrame): 15 | """ 16 | Preprocessing input data by converting it to tensors and normalizing. 17 | :param data: DataFrame containing the input data 18 | :return: Preprocessed tensors for features and labels 19 | """ 20 | X = data.drop('label', axis=1).values 21 | y = data['label'].values 22 | X = torch.tensor(X, dtype=torch.float32) 23 | y = torch.tensor(y, dtype=torch.long) 24 | return X, y 25 | 26 | def train_vision_transformer(): 27 | logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'vision_transformer_training.log')) 28 | logger.info("Starting Vision Transformer model training...") 29 | 30 | create_directory(config.MODEL_DIR) 31 | 32 | data = load_csv_data(config.PROCESSED_DATA_FILE) 33 | X, y = preprocess_data(data) 34 | 35 | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED) 36 | 37 | model = VisionTransformer( 38 | img_size=config.VISION_TRANSFORMER_PARAMS['img_size'], 39 | patch_size=config.VISION_TRANSFORMER_PARAMS['patch_size'], 40 | num_classes=2, 41 | dim=config.VISION_TRANSFORMER_PARAMS['dim'], 42 | depth=config.VISION_TRANSFORMER_PARAMS['depth'], 43 | heads=config.VISION_TRANSFORMER_PARAMS['heads'], 44 | mlp_dim=config.VISION_TRANSFORMER_PARAMS['mlp_dim'] 45 | ) 46 | 47 | # To define optimizer and loss function 48 | optimizer = Adam(model.parameters(), lr=config.VISION_TRANSFORMER_PARAMS['learning_rate']) 49 | criterion = nn.CrossEntropyLoss() 50 | 51 | # Training loop 52 | for epoch in range(config.VISION_TRANSFORMER_PARAMS['epochs']): 53 | model.train() 54 | optimizer.zero_grad() 55 | outputs = model(X_train) 56 | loss = criterion(outputs, y_train) 57 | loss.backward() 58 | optimizer.step() 59 | logger.info(f"Epoch [{epoch+1}/{config.VISION_TRANSFORMER_PARAMS['epochs']}], Loss: {loss.item()}") 60 | 61 | model_path = os.path.join(config.MODEL_DIR, 'vision_transformer_model.pth') 62 | torch.save(model.state_dict(), model_path) 63 | logger.info(f"Vision Transformer model saved at {model_path}") 64 | 65 | model.eval() 66 | with torch.no_grad(): 67 | outputs = model(X_val) 68 | _, y_pred = torch.max(outputs, 1) 69 | metrics = calculate_metrics(y_val.numpy(), y_pred.numpy()) 70 | log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'vision_transformer_metrics.log')) 71 | plot_confusion_matrix(y_val.numpy(), y_pred.numpy(), labels=[0, 1], output_dir=config.LOG_DIR, filename='vision_transformer_confusion_matrix.png') 72 | 73 | logger.info("Vision Transformer model training and evaluation completed.") 74 | 75 | if __name__ == "__main__": 76 | train_vision_transformer() 77 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import pandas as pd 5 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 6 | import matplotlib.pyplot as plt 7 | 8 | from src.config import Config 9 | 10 | def save_to_file(data, filename): 11 | """ 12 | Saving data to file. 13 | :param data: Data to save 14 | :param filename: Name of the file 15 | """ 16 | with open(filename, 'w') as file: 17 | if isinstance(data, (dict, list)): 18 | json.dump(data, file, indent=4) 19 | else: 20 | file.write(str(data)) 21 | logging.info(f"Data saved to {filename}") 22 | 23 | def read_from_file(filename): 24 | """ 25 | Reading data from file. 26 | :param filename: Name of the file 27 | :return: Data read from the file 28 | """ 29 | with open(filename, 'r') as file: 30 | if filename.endswith('.json'): 31 | return json.load(file) 32 | else: 33 | return file.read() 34 | 35 | def calculate_metrics(true_labels, predictions): 36 | """ 37 | Calculating accuracy, precision, recall, and F1 score. 38 | :param true_labels: True labels 39 | :param predictions: Model predictions 40 | :return: Dictionary with metrics 41 | """ 42 | metrics = { 43 | 'accuracy': accuracy_score(true_labels, predictions), 44 | 'precision': precision_score(true_labels, predictions), 45 | 'recall': recall_score(true_labels, predictions), 46 | 'f1_score': f1_score(true_labels, predictions) 47 | } 48 | logging.info(f"Metrics calculated: {metrics}") 49 | return metrics 50 | 51 | def plot_metrics(history, metric='accuracy'): 52 | """ 53 | Plotting training and validation metrics. 54 | :param history: Training history 55 | :param metric: Metric to plot 56 | """ 57 | plt.plot(history.history[metric]) 58 | plt.plot(history.history[f'val_{metric}']) 59 | plt.title(f'Model {metric}') 60 | plt.ylabel(metric) 61 | plt.xlabel('Epoch') 62 | plt.legend(['Train', 'Validation'], loc='upper left') 63 | plt.savefig(os.path.join(Config.LOG_DIR, f'{metric}_plot.png')) 64 | plt.close() 65 | logging.info(f"{metric} plot saved.") 66 | 67 | def create_directory(path): 68 | """ 69 | Creating directory if it does not exist. 70 | :param path: Directory path 71 | """ 72 | if not os.path.exists(path): 73 | os.makedirs(path) 74 | logging.info(f"Directory created at {path}") 75 | 76 | def load_data(file_path, file_type='csv'): 77 | """ 78 | Loading data from file. 79 | :param file_path: Path to the file 80 | :param file_type: Type of the file ('csv', 'json', etc.) 81 | :return: Loaded data 82 | """ 83 | if file_type == 'csv': 84 | data = pd.read_csv(file_path) 85 | elif file_type == 'json': 86 | data = pd.read_json(file_path) 87 | else: 88 | raise ValueError(f"Unsupported file type: {file_type}") 89 | logging.info(f"Data loaded from {file_path}") 90 | return data 91 | 92 | def preprocess_data(data): 93 | """ 94 | Preprocessing data. 95 | :param data: Data to preprocess 96 | :return: Preprocessed data 97 | """ 98 | data = data.fillna(0) 99 | logging.info("Data preprocessing complete.") 100 | return data 101 | 102 | def split_data(data, labels, test_size=0.2): 103 | """ 104 | Splitting data into training and test sets. 105 | :param data: Data features 106 | :param labels: Data labels 107 | :param test_size: Proportion of test set 108 | :return: Split data 109 | """ 110 | from sklearn.model_selection import train_test_split 111 | X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=test_size, random_state=Config.RANDOM_SEED) 112 | logging.info(f"Data split into training and test sets with test size = {test_size}") 113 | return X_train, X_test, y_train, y_test 114 | 115 | if __name__ == "__main__": 116 | create_directory(Config.LOG_DIR) 117 | 118 | sample_data = {'name': 'Deepfake Detection', 'version': '1.0'} 119 | save_to_file(sample_data, os.path.join(Config.LOG_DIR, 'sample_data.json')) 120 | loaded_data = read_from_file(os.path.join(Config.LOG_DIR, 'sample_data.json')) 121 | print(loaded_data) 122 | 123 | true_labels = [0, 1, 1, 0, 1] 124 | predictions = [0, 1, 0, 0, 1] 125 | metrics = calculate_metrics(true_labels, predictions) 126 | print(metrics) 127 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import setup_logger 2 | from .metrics import calculate_metrics 3 | from .visualization import plot_metrics 4 | from .helpers import create_directory, save_to_file, read_from_file 5 | from .file_utils import load_data 6 | from .data_utils import preprocess_data, split_data 7 | 8 | __all__ = [ 9 | 'setup_logger', 10 | 'calculate_metrics', 11 | 'plot_metrics', 12 | 'create_directory', 13 | 'save_to_file', 14 | 'read_from_file', 15 | 'load_data', 16 | 'preprocess_data', 17 | 'split_data' 18 | ] 19 | -------------------------------------------------------------------------------- /src/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.model_selection import train_test_split 3 | 4 | def preprocess_data(data: pd.DataFrame) -> pd.DataFrame: 5 | """ 6 | Preprocessing the input data by filling missing values, normalizing features, etc. 7 | :param data: DataFrame containing the input data 8 | :return: Preprocessed DataFrame 9 | """ 10 | data = data.fillna(0) 11 | 12 | numeric_features = data.select_dtypes(include=['int64', 'float64']).columns 13 | data[numeric_features] = (data[numeric_features] - data[numeric_features].mean()) / data[numeric_features].std() 14 | categorical_features = data.select_dtypes(include=['object']).columns 15 | data = pd.get_dummies(data, columns=categorical_features) 16 | 17 | return data 18 | 19 | def split_data(data: pd.DataFrame, test_size: float = 0.2, random_state: int = 42): 20 | """ 21 | Splitting the data into training and test sets. 22 | :param data: DataFrame containing the input data 23 | :param test_size: Proportion of the data to include in the test set 24 | :param random_state: Seed used by the random number generator 25 | :return: Tuple containing training and test sets 26 | """ 27 | labels = data['label'] 28 | features = data.drop('label', axis=1) 29 | 30 | X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=random_state) 31 | 32 | return X_train, X_test, y_train, y_test 33 | 34 | def balance_data(data: pd.DataFrame) -> pd.DataFrame: 35 | """ 36 | Balancing the dataset by oversampling the minority class. 37 | :param data: DataFrame containing the input data 38 | :return: Balanced DataFrame 39 | """ 40 | from sklearn.utils import resample 41 | 42 | 43 | majority_class = data[data['label'] == 0] 44 | minority_class = data[data['label'] == 1] 45 | 46 | # To upsample minority class 47 | minority_upsampled = resample(minority_class, 48 | replace=True, 49 | n_samples=len(majority_class), 50 | random_state=42) 51 | 52 | upsampled_data = pd.concat([majority_class, minority_upsampled]) 53 | 54 | return upsampled_data 55 | 56 | if __name__ == "__main__": 57 | sample_data = { 58 | 'feature1': [1, 2, 3, 4, 5, 6], 59 | 'feature2': ['A', 'B', 'A', 'A', 'B', 'B'], 60 | 'label': [0, 1, 0, 0, 1, 1] 61 | } 62 | df = pd.DataFrame(sample_data) 63 | 64 | preprocessed_df = preprocess_data(df) 65 | print("Preprocessed Data:\n", preprocessed_df) 66 | 67 | X_train, X_test, y_train, y_test = split_data(preprocessed_df) 68 | print("Training Features:\n", X_train) 69 | print("Test Features:\n", X_test) 70 | print("Training Labels:\n", y_train) 71 | print("Test Labels:\n", y_test) 72 | 73 | balanced_df = balance_data(df) 74 | print("Balanced Data:\n", balanced_df) 75 | -------------------------------------------------------------------------------- /src/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import json 4 | import logging 5 | 6 | def load_data(file_path: str, file_type: str = 'csv') -> pd.DataFrame: 7 | """ 8 | Loading data from file. 9 | :param file_path: Path to the file 10 | :param file_type: Type of the file ('csv', 'json', 'excel') 11 | :return: DataFrame containing the loaded data 12 | """ 13 | if file_type == 'csv': 14 | data = pd.read_csv(file_path) 15 | elif file_type == 'json': 16 | data = pd.read_json(file_path) 17 | elif file_type == 'excel': 18 | data = pd.read_excel(file_path) 19 | else: 20 | raise ValueError(f"Unsupported file type: {file_type}") 21 | logging.info(f"Data loaded from {file_path}") 22 | return data 23 | 24 | def save_data(data: pd.DataFrame, file_path: str, file_type: str = 'csv'): 25 | """ 26 | Saving data to file. 27 | :param data: DataFrame containing the data to save 28 | :param file_path: Path to the file 29 | :param file_type: Type of the file ('csv', 'json', 'excel') 30 | """ 31 | if file_type == 'csv': 32 | data.to_csv(file_path, index=False) 33 | elif file_type == 'json': 34 | data.to_json(file_path, orient='records', lines=True) 35 | elif file_type == 'excel': 36 | data.to_excel(file_path, index=False) 37 | else: 38 | raise ValueError(f"Unsupported file type: {file_type}") 39 | logging.info(f"Data saved to {file_path}") 40 | 41 | def save_to_file(data, filename: str): 42 | """ 43 | Saving data to file (JSON/plain text). 44 | :param data: Data to save 45 | :param filename: Name of the file 46 | """ 47 | with open(filename, 'w') as file: 48 | if isinstance(data, (dict, list)): 49 | json.dump(data, file, indent=4) 50 | else: 51 | file.write(str(data)) 52 | logging.info(f"Data saved to {filename}") 53 | 54 | def read_from_file(filename: str): 55 | """ 56 | Reading data from file (JSON/plain text). 57 | :param filename: Name of the file 58 | :return: Data read from the file 59 | """ 60 | with open(filename, 'r') as file: 61 | if filename.endswith('.json'): 62 | return json.load(file) 63 | else: 64 | return file.read() 65 | 66 | def create_directory(path: str): 67 | """ 68 | Creating directory if it does not exist. 69 | :param path: Directory path 70 | """ 71 | if not os.path.exists(path): 72 | os.makedirs(path) 73 | logging.info(f"Directory created at {path}") 74 | 75 | if __name__ == "__main__": 76 | create_directory('example_dir') 77 | 78 | sample_data = { 79 | 'feature1': [1, 2, 3], 80 | 'feature2': ['A', 'B', 'C'], 81 | 'label': [0, 1, 0] 82 | } 83 | df = pd.DataFrame(sample_data) 84 | 85 | save_data(df, 'example_dir/sample_data.csv', file_type='csv') 86 | loaded_df = load_data('example_dir/sample_data.csv', file_type='csv') 87 | print("Loaded DataFrame:\n", loaded_df) 88 | 89 | sample_dict = {'name': 'Deepfake Detection', 'version': '1.0'} 90 | save_to_file(sample_dict, 'example_dir/sample_data.json') 91 | loaded_dict = read_from_file('example_dir/sample_data.json') 92 | print("Loaded JSON:\n", loaded_dict) 93 | -------------------------------------------------------------------------------- /src/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | from datetime import datetime 5 | 6 | def create_directory(path: str): 7 | """ 8 | Creating directory if it does not exist. 9 | :param path: Directory path 10 | """ 11 | if not os.path.exists(path): 12 | os.makedirs(path) 13 | logging.info(f"Directory created at {path}") 14 | 15 | def save_to_file(data, filename: str): 16 | """ 17 | Saving data to file (JSON/plain text). 18 | :param data: Data to save 19 | :param filename: Name of the file 20 | """ 21 | with open(filename, 'w') as file: 22 | if isinstance(data, (dict, list)): 23 | json.dump(data, file, indent=4) 24 | else: 25 | file.write(str(data)) 26 | logging.info(f"Data saved to {filename}") 27 | 28 | def read_from_file(filename: str): 29 | """ 30 | Reading data from file (JSON/plain text). 31 | :param filename: Name of the file 32 | :return: Data read from the file 33 | """ 34 | with open(filename, 'r') as file: 35 | if filename.endswith('.json'): 36 | return json.load(file) 37 | else: 38 | return file.read() 39 | 40 | def get_timestamp() -> str: 41 | """ 42 | To get the current timestamp in specific format. 43 | :return: Timestamp string 44 | """ 45 | return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') 46 | 47 | def setup_logger(name: str, log_file: str, level=logging.INFO): 48 | """ 49 | To setup logger. 50 | :param name: Name of the logger 51 | :param log_file: File to log messages to 52 | :param level: Logging level 53 | :return: Configured logger 54 | """ 55 | formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') 56 | 57 | handler = logging.FileHandler(log_file) 58 | handler.setFormatter(formatter) 59 | 60 | logger = logging.getLogger(name) 61 | logger.setLevel(level) 62 | logger.addHandler(handler) 63 | 64 | return logger 65 | 66 | def calculate_metrics(true_labels, predictions): 67 | """ 68 | Calculating accuracy, precision, recall, and F1 score. 69 | :param true_labels: True labels 70 | :param predictions: Model predictions 71 | :return: Dictionary with metrics 72 | """ 73 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 74 | 75 | metrics = { 76 | 'accuracy': accuracy_score(true_labels, predictions), 77 | 'precision': precision_score(true_labels, predictions), 78 | 'recall': recall_score(true_labels, predictions), 79 | 'f1_score': f1_score(true_labels, predictions) 80 | } 81 | logging.info(f"Metrics calculated: {metrics}") 82 | return metrics 83 | 84 | def plot_metrics(history, metric='accuracy'): 85 | """ 86 | Ploting training and validation metrics. 87 | :param history: Training history 88 | :param metric: Metric to plot 89 | """ 90 | import matplotlib.pyplot as plt 91 | 92 | plt.plot(history.history[metric]) 93 | plt.plot(history.history[f'val_{metric}']) 94 | plt.title(f'Model {metric}') 95 | plt.ylabel(metric) 96 | plt.xlabel('Epoch') 97 | plt.legend(['Train', 'Validation'], loc='upper left') 98 | plt.savefig(os.path.join('logs', f'{metric}_plot.png')) 99 | plt.close() 100 | logging.info(f"{metric} plot saved.") 101 | 102 | if __name__ == "__main__": 103 | 104 | create_directory('example_dir') 105 | 106 | sample_dict = {'name': 'Deepfake Detection', 'version': '1.0'} 107 | save_to_file(sample_dict, 'example_dir/sample_data.json') 108 | loaded_dict = read_from_file('example_dir/sample_data.json') 109 | print("Loaded JSON:\n", loaded_dict) 110 | 111 | # To get current timestamp 112 | timestamp = get_timestamp() 113 | print("Current Timestamp:", timestamp) 114 | 115 | logger = setup_logger('example_logger', 'example_dir/example.log') 116 | logger.info("This is a test log message.") 117 | 118 | true_labels = [0, 1, 1, 0, 1] 119 | predictions = [0, 1, 0, 0, 1] 120 | metrics = calculate_metrics(true_labels, predictions) 121 | print("Metrics:\n", metrics) 122 | 123 | class DummyHistory: 124 | def __init__(self): 125 | self.history = { 126 | 'accuracy': [0.1, 0.2, 0.3], 127 | 'val_accuracy': [0.15, 0.25, 0.35] 128 | } 129 | plot_metrics(DummyHistory()) 130 | -------------------------------------------------------------------------------- /src/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from src.config import Config 4 | 5 | def setup_logger(name: str, log_file: str, level=logging.INFO) -> logging.Logger: 6 | """ 7 | Setting up logger. 8 | :param name: Name of the logger 9 | :param log_file: File to log messages to 10 | :param level: Logging level 11 | :return: Configured logger 12 | """ 13 | 14 | log_dir = os.path.dirname(log_file) 15 | if not os.path.exists(log_dir): 16 | os.makedirs(log_dir) 17 | 18 | formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') 19 | 20 | file_handler = logging.FileHandler(log_file) 21 | file_handler.setFormatter(formatter) 22 | 23 | # To create stream handler to log to console 24 | stream_handler = logging.StreamHandler() 25 | stream_handler.setFormatter(formatter) 26 | 27 | logger = logging.getLogger(name) 28 | logger.setLevel(level) 29 | logger.addHandler(file_handler) 30 | logger.addHandler(stream_handler) 31 | 32 | if logger.hasHandlers(): 33 | logger.handlers.clear() 34 | logger.addHandler(file_handler) 35 | logger.addHandler(stream_handler) 36 | 37 | return logger 38 | 39 | if __name__ == "__main__": 40 | 41 | if not os.path.exists(Config.LOG_DIR): 42 | os.makedirs(Config.LOG_DIR) 43 | 44 | logger = setup_logger('example_logger', os.path.join(Config.LOG_DIR, 'example.log')) 45 | logger.info("This is a test log message.") 46 | logger.error("This is a test error message.") 47 | -------------------------------------------------------------------------------- /src/utils/metrics.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix 2 | import logging 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import os 6 | 7 | def calculate_metrics(true_labels, predictions): 8 | """ 9 | Calculating performance metrics. 10 | :param true_labels: True labels 11 | :param predictions: Model predictions 12 | :return: Dictionary with metrics 13 | """ 14 | metrics = { 15 | 'accuracy': accuracy_score(true_labels, predictions), 16 | 'precision': precision_score(true_labels, predictions), 17 | 'recall': recall_score(true_labels, predictions), 18 | 'f1_score': f1_score(true_labels, predictions), 19 | 'roc_auc': roc_auc_score(true_labels, predictions) 20 | } 21 | logging.info(f"Metrics calculated: {metrics}") 22 | return metrics 23 | 24 | def plot_confusion_matrix(true_labels, predictions, labels, output_dir, filename='confusion_matrix.png'): 25 | """ 26 | Plotting and saving the confusion matrix. 27 | :param true_labels: True labels 28 | :param predictions: Model predictions 29 | :param labels: List of labels 30 | :param output_dir: Directory to save the plot 31 | :param filename: Name of the output file 32 | """ 33 | cm = confusion_matrix(true_labels, predictions) 34 | plt.figure(figsize=(10, 7)) 35 | sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels) 36 | plt.xlabel('Predicted') 37 | plt.ylabel('Actual') 38 | plt.title('Confusion Matrix') 39 | if not os.path.exists(output_dir): 40 | os.makedirs(output_dir) 41 | plt.savefig(os.path.join(output_dir, filename)) 42 | plt.close() 43 | logging.info(f"Confusion matrix plot saved to {os.path.join(output_dir, filename)}") 44 | 45 | def log_metrics(metrics, logger_name='metrics_logger', log_file='metrics.log'): 46 | """ 47 | To log the calculated metrics to file. 48 | :param metrics: Dictionary with calculated metrics 49 | :param logger_name: Name of the logger 50 | :param log_file: File to log metrics 51 | """ 52 | logger = logging.getLogger(logger_name) 53 | handler = logging.FileHandler(log_file) 54 | formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') 55 | handler.setFormatter(formatter) 56 | logger.addHandler(handler) 57 | logger.setLevel(logging.INFO) 58 | logger.info(f"Metrics: {metrics}") 59 | logger.removeHandler(handler) 60 | handler.close() 61 | 62 | if __name__ == "__main__": 63 | true_labels = [0, 1, 1, 0, 1] 64 | predictions = [0, 1, 0, 0, 1] 65 | labels = [0, 1] 66 | 67 | metrics = calculate_metrics(true_labels, predictions) 68 | print("Calculated Metrics:\n", metrics) 69 | 70 | plot_confusion_matrix(true_labels, predictions, labels, 'logs', 'example_confusion_matrix.png') 71 | 72 | log_metrics(metrics, log_file='logs/example_metrics.log') 73 | -------------------------------------------------------------------------------- /src/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import pandas as pd 5 | import numpy as np 6 | from src.config import config 7 | 8 | def plot_histogram(data, column, bins=30, output_dir=config.LOG_DIR, filename='histogram.png'): 9 | """ 10 | Plotting and saving histogram of a specified column. 11 | :param data: DataFrame containing the data 12 | :param column: Column to plot the histogram for 13 | :param bins: Number of bins for the histogram 14 | :param output_dir: Directory to save the plot 15 | :param filename: Name of the output file 16 | """ 17 | plt.figure(figsize=(10, 6)) 18 | sns.histplot(data[column], bins=bins, kde=True) 19 | plt.title(f'Histogram of {column}') 20 | plt.xlabel(column) 21 | plt.ylabel('Frequency') 22 | if not os.path.exists(output_dir): 23 | os.makedirs(output_dir) 24 | plt.savefig(os.path.join(output_dir, filename)) 25 | plt.close() 26 | print(f"Histogram plot saved to {os.path.join(output_dir, filename)}") 27 | 28 | def plot_scatter(data, x_column, y_column, output_dir=config.LOG_DIR, filename='scatter_plot.png'): 29 | """ 30 | Plotting and saving scatter plot of two specified columns. 31 | :param data: DataFrame containing the data 32 | :param x_column: Column to plot on the x-axis 33 | :param y_column: Column to plot on the y-axis 34 | :param output_dir: Directory to save the plot 35 | :param filename: Name of the output file 36 | """ 37 | plt.figure(figsize=(10, 6)) 38 | sns.scatterplot(x=data[x_column], y=data[y_column]) 39 | plt.title(f'Scatter Plot of {x_column} vs {y_column}') 40 | plt.xlabel(x_column) 41 | plt.ylabel(y_column) 42 | if not os.path.exists(output_dir): 43 | os.makedirs(output_dir) 44 | plt.savefig(os.path.join(output_dir, filename)) 45 | plt.close() 46 | print(f"Scatter plot saved to {os.path.join(output_dir, filename)}") 47 | 48 | def plot_correlation_matrix(data, output_dir=config.LOG_DIR, filename='correlation_matrix.png'): 49 | """ 50 | Plotting and saving correlation matrix of the data. 51 | :param data: DataFrame containing the data 52 | :param output_dir: Directory to save the plot 53 | :param filename: Name of the output file 54 | """ 55 | plt.figure(figsize=(12, 10)) 56 | correlation_matrix = data.corr() 57 | sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5) 58 | plt.title('Correlation Matrix') 59 | if not os.path.exists(output_dir): 60 | os.makedirs(output_dir) 61 | plt.savefig(os.path.join(output_dir, filename)) 62 | plt.close() 63 | print(f"Correlation matrix plot saved to {os.path.join(output_dir, filename)}") 64 | 65 | def plot_time_series(data, date_column, value_column, output_dir=config.LOG_DIR, filename='time_series.png'): 66 | """ 67 | Plotting and saving time series plot. 68 | :param data: DataFrame containing the data 69 | :param date_column: Column containing the date values 70 | :param value_column: Column containing the values to plot 71 | :param output_dir: Directory to save the plot 72 | :param filename: Name of the output file 73 | """ 74 | plt.figure(figsize=(12, 6)) 75 | plt.plot(data[date_column], data[value_column]) 76 | plt.title(f'Time Series of {value_column} over Time') 77 | plt.xlabel('Date') 78 | plt.ylabel(value_column) 79 | if not os.path.exists(output_dir): 80 | os.makedirs(output_dir) 81 | plt.savefig(os.path.join(output_dir, filename)) 82 | plt.close() 83 | print(f"Time series plot saved to {os.path.join(output_dir, filename)}") 84 | 85 | if __name__ == "__main__": 86 | 87 | example_data = pd.DataFrame({ 88 | 'date': pd.date_range(start='2021-01-01', periods=100, freq='D'), 89 | 'value': np.random.randn(100).cumsum(), 90 | 'category': np.random.choice(['A', 'B', 'C'], size=100), 91 | 'value2': np.random.randn(100) 92 | }) 93 | 94 | plot_histogram(example_data, 'value', filename='example_histogram.png') 95 | 96 | plot_scatter(example_data, 'value', 'value2', filename='example_scatter_plot.png') 97 | 98 | plot_correlation_matrix(example_data, filename='example_correlation_matrix.png') 99 | 100 | plot_time_series(example_data, 'date', 'value', filename='example_time_series.png') 101 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_data_loading import TestDataLoading 2 | from .test_model import TestModel 3 | from .test_training import TestTraining 4 | from .test_evaluation import TestEvaluation 5 | from .test_utils import TestUtils 6 | 7 | __all__ = [ 8 | 'TestDataLoading', 9 | 'TestModel', 10 | 'TestTraining', 11 | 'TestEvaluation', 12 | 'TestUtils' 13 | ] 14 | -------------------------------------------------------------------------------- /tests/test_data_loading.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import numpy as np 4 | import cv2 5 | from src.dataset.data_loader import create_dataloader 6 | 7 | class TestDataLoading(unittest.TestCase): 8 | def setUp(self): 9 | """ 10 | Setting up test variables and environment. 11 | """ 12 | self.data_csv = 'tests/data/sample_data.csv' 13 | self.image_dir = 'tests/data/images' 14 | self.batch_size = 4 15 | self.num_workers = 2 16 | 17 | os.makedirs('tests/data/images', exist_ok=True) 18 | with open(self.data_csv, 'w') as f: 19 | f.write('image,label\n') 20 | for i in range(10): 21 | image_path = f'image_{i}.jpg' 22 | f.write(f'{image_path},{i % 2}\n') 23 | image = (255 * np.random.rand(224, 224, 3)).astype(np.uint8) 24 | cv2.imwrite(os.path.join(self.image_dir, image_path), image) 25 | 26 | def test_data_loading(self): 27 | """ 28 | Testing data loading functionality. 29 | """ 30 | dataloader = create_dataloader(self.data_csv, self.image_dir, batch_size=self.batch_size, num_workers=self.num_workers) 31 | 32 | batch_count = 0 33 | for images, labels in dataloader: 34 | self.assertEqual(len(images), self.batch_size) 35 | self.assertEqual(len(labels), self.batch_size) 36 | batch_count += 1 37 | 38 | self.assertGreater(batch_count, 0) 39 | 40 | def tearDown(self): 41 | """ 42 | Cleaning up after tests. 43 | """ 44 | import shutil 45 | shutil.rmtree('tests/data') 46 | 47 | if __name__ == "__main__": 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /tests/test_evaluation.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from torch.utils.data import DataLoader, TensorDataset 4 | import numpy as np 5 | from src.evaluation.cnn_evaluation import evaluate_cnn 6 | from src.evaluation.transformer_evaluation import evaluate_transformer 7 | from src.evaluation.svm_evaluation import evaluate_svm 8 | from src.evaluation.bayesian_evaluation import evaluate_bayesian 9 | from src.evaluation.vision_transformer_evaluation import evaluate_vision_transformer 10 | 11 | class TestEvaluation(unittest.TestCase): 12 | def setUp(self): 13 | """ 14 | Setting up test variables and environment. 15 | """ 16 | 17 | self.num_samples = 100 18 | self.num_features = 224 * 224 * 3 19 | self.num_classes = 2 20 | self.batch_size = 10 21 | 22 | X = np.random.randn(self.num_samples, 3, 224, 224).astype(np.float32) 23 | y = np.random.randint(0, self.num_classes, self.num_samples) 24 | dataset = TensorDataset(torch.tensor(X), torch.tensor(y)) 25 | self.dataloader = DataLoader(dataset, batch_size=self.batch_size) 26 | 27 | self.cnn_model = torch.nn.Sequential( 28 | torch.nn.Conv2d(3, 16, kernel_size=3, padding=1), 29 | torch.nn.ReLU(), 30 | torch.nn.Flatten(), 31 | torch.nn.Linear(16 * 224 * 224, self.num_classes) 32 | ) 33 | 34 | self.transformer_model = torch.nn.Sequential( 35 | torch.nn.Conv2d(3, 16, kernel_size=3, padding=1), 36 | torch.nn.ReLU(), 37 | torch.nn.Flatten(), 38 | torch.nn.Linear(16 * 224 * 224, self.num_classes) 39 | ) 40 | 41 | class DummyModel: 42 | def predict(self, X): 43 | return np.random.randint(0, self.num_classes, len(X)) 44 | 45 | self.svm_model = DummyModel() 46 | self.bayesian_model = DummyModel() 47 | self.vision_transformer_model = self.transformer_model 48 | 49 | def test_evaluate_cnn(self): 50 | """ 51 | Testing CNN model evaluation. 52 | """ 53 | device = 'cpu' 54 | metrics = evaluate_cnn(self.cnn_model, self.dataloader, device) 55 | self.assertIn('accuracy', metrics) 56 | self.assertIn('f1_score', metrics) 57 | self.assertIn('precision', metrics) 58 | self.assertIn('recall', metrics) 59 | 60 | def test_evaluate_transformer(self): 61 | """ 62 | Testing Transformer model evaluation. 63 | """ 64 | device = 'cpu' 65 | metrics = evaluate_transformer(self.transformer_model, self.dataloader, device) 66 | self.assertIn('accuracy', metrics) 67 | self.assertIn('f1_score', metrics) 68 | self.assertIn('precision', metrics) 69 | self.assertIn('recall', metrics) 70 | 71 | def test_evaluate_svm(self): 72 | """ 73 | Testing SVM model evaluation. 74 | """ 75 | X_test = np.random.randn(self.num_samples, self.num_features) 76 | y_test = np.random.randint(0, self.num_classes, self.num_samples) 77 | metrics = evaluate_svm(self.svm_model, X_test, y_test) 78 | self.assertIn('accuracy', metrics) 79 | self.assertIn('f1_score', metrics) 80 | self.assertIn('precision', metrics) 81 | self.assertIn('recall', metrics) 82 | 83 | def test_evaluate_bayesian(self): 84 | """ 85 | Testing Bayesian model evaluation. 86 | """ 87 | X_test = np.random.randn(self.num_samples, self.num_features) 88 | y_test = np.random.randint(0, self.num_classes, self.num_samples) 89 | metrics = evaluate_bayesian(self.bayesian_model, X_test, y_test) 90 | self.assertIn('accuracy', metrics) 91 | self.assertIn('f1_score', metrics) 92 | self.assertIn('precision', metrics) 93 | self.assertIn('recall', metrics) 94 | 95 | def test_evaluate_vision_transformer(self): 96 | """ 97 | Testing Vision Transformer model evaluation. 98 | """ 99 | device = 'cpu' 100 | metrics = evaluate_vision_transformer(self.vision_transformer_model, self.dataloader, device) 101 | self.assertIn('accuracy', metrics) 102 | self.assertIn('f1_score', metrics) 103 | self.assertIn('precision', metrics) 104 | self.assertIn('recall', metrics) 105 | 106 | if __name__ == "__main__": 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from torchsummary import summary 4 | from src.models.cnn import CNNModel 5 | from src.models.transformer import TransformerModel 6 | from src.models.svm import SVMModel 7 | from src.models.bayesian import BayesianModel 8 | from src.models.vision_transformer import VisionTransformer 9 | 10 | class TestModel(unittest.TestCase): 11 | def setUp(self): 12 | """ 13 | Setting up test variables and environment. 14 | """ 15 | self.input_shape = (3, 224, 224) 16 | self.num_classes = 10 17 | 18 | def test_cnn_model(self): 19 | """ 20 | Testing CNN model architecture. 21 | """ 22 | model = CNNModel(num_classes=self.num_classes) 23 | model.eval() 24 | sample_input = torch.randn(1, *self.input_shape) 25 | output = model(sample_input) 26 | self.assertEqual(output.shape[1], self.num_classes) 27 | summary(model, self.input_shape) 28 | 29 | def test_transformer_model(self): 30 | """ 31 | Testing Transformer model architecture. 32 | """ 33 | model = TransformerModel( 34 | input_dim=self.input_shape[1] * self.input_shape[2], 35 | model_dim=512, 36 | num_heads=8, 37 | num_layers=6, 38 | output_dim=self.num_classes 39 | ) 40 | model.eval() 41 | sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2]) 42 | output = model(sample_input) 43 | self.assertEqual(output.shape[1], self.num_classes) 44 | summary(model, (self.input_shape[1] * self.input_shape[2],)) 45 | 46 | def test_svm_model(self): 47 | """ 48 | Testing SVM model architecture. 49 | """ 50 | model = SVMModel() 51 | sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2]).numpy() 52 | output = model.predict(sample_input) 53 | self.assertEqual(len(output), 1) 54 | self.assertIn(output[0], range(self.num_classes)) 55 | 56 | def test_bayesian_model(self): 57 | """ 58 | Testing Bayesian model architecture. 59 | """ 60 | model = BayesianModel() 61 | sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2]).numpy() 62 | output = model.predict(sample_input) 63 | self.assertEqual(len(output), 1) 64 | self.assertIn(output[0], range(self.num_classes)) 65 | 66 | def test_vision_transformer_model(self): 67 | """ 68 | Testing Vision Transformer model architecture. 69 | """ 70 | model = VisionTransformer( 71 | img_size=224, 72 | patch_size=16, 73 | num_classes=self.num_classes, 74 | dim=768, 75 | depth=12, 76 | heads=12, 77 | mlp_dim=3072 78 | ) 79 | model.eval() 80 | sample_input = torch.randn(1, *self.input_shape) 81 | output = model(sample_input) 82 | self.assertEqual(output.shape[1], self.num_classes) 83 | summary(model, self.input_shape) 84 | 85 | if __name__ == "__main__": 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /tests/test_training.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import torch 3 | from torch.utils.data import DataLoader, TensorDataset 4 | import numpy as np 5 | from src.training.cnn_training import train_cnn 6 | from src.training.transformer_training import train_transformer 7 | from src.training.svm_training import train_svm 8 | from src.training.bayesian_training import train_bayesian 9 | from src.training.vision_transformer_training import train_vision_transformer 10 | 11 | class TestTraining(unittest.TestCase): 12 | def setUp(self): 13 | """ 14 | Setting up test variables and environment. 15 | """ 16 | self.num_samples = 100 17 | self.num_features = 224 * 224 * 3 18 | self.num_classes = 2 19 | self.batch_size = 10 20 | 21 | X = np.random.randn(self.num_samples, 3, 224, 224).astype(np.float32) 22 | y = np.random.randint(0, self.num_classes, self.num_samples) 23 | dataset = TensorDataset(torch.tensor(X), torch.tensor(y)) 24 | self.dataloader = DataLoader(dataset, batch_size=self.batch_size) 25 | self.device = 'cpu' 26 | 27 | def test_train_cnn(self): 28 | """ 29 | Testing CNN model training. 30 | """ 31 | model, optimizer, criterion = train_cnn(self.dataloader, self.device, num_epochs=1) 32 | self.assertIsInstance(model, torch.nn.Module) 33 | self.assertIsInstance(optimizer, torch.optim.Optimizer) 34 | self.assertIsInstance(criterion, torch.nn.Module) 35 | 36 | def test_train_transformer(self): 37 | """ 38 | Testing Transformer model training. 39 | """ 40 | model, optimizer, criterion = train_transformer(self.dataloader, self.device, num_epochs=1) 41 | self.assertIsInstance(model, torch.nn.Module) 42 | self.assertIsInstance(optimizer, torch.optim.Optimizer) 43 | self.assertIsInstance(criterion, torch.nn.Module) 44 | 45 | def test_train_svm(self): 46 | """ 47 | Testing SVM model training. 48 | """ 49 | model = train_svm(self.dataloader, num_epochs=1) 50 | self.assertTrue(hasattr(model, 'predict')) 51 | self.assertTrue(callable(getattr(model, 'predict', None))) 52 | 53 | def test_train_bayesian(self): 54 | """ 55 | Testing Bayesian model training. 56 | """ 57 | model = train_bayesian(self.dataloader, num_epochs=1) 58 | self.assertTrue(hasattr(model, 'predict')) 59 | self.assertTrue(callable(getattr(model, 'predict', None))) 60 | 61 | def test_train_vision_transformer(self): 62 | """ 63 | Testing Vision Transformer model training. 64 | """ 65 | model, optimizer, criterion = train_vision_transformer(self.dataloader, self.device, num_epochs=1) 66 | self.assertIsInstance(model, torch.nn.Module) 67 | self.assertIsInstance(optimizer, torch.optim.Optimizer) 68 | self.assertIsInstance(criterion, torch.nn.Module) 69 | 70 | if __name__ == "__main__": 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | from src.utils.helpers import some_helper_function 4 | from src.utils.logger import setup_logger 5 | from src.utils.data_utils import load_data, save_data 6 | from src.utils.file_utils import create_directory, delete_file 7 | import pandas as pd 8 | import numpy as np 9 | 10 | class TestUtils(unittest.TestCase): 11 | def setUp(self): 12 | """ 13 | Setting up test variables and environment. 14 | """ 15 | self.test_dir = 'tests/temp' 16 | os.makedirs(self.test_dir, exist_ok=True) 17 | self.test_file = os.path.join(self.test_dir, 'test_file.csv') 18 | self.data = pd.DataFrame({ 19 | 'col1': [1, 2, 3], 20 | 'col2': [4, 5, 6] 21 | }) 22 | 23 | def test_some_helper_function(self): 24 | """ 25 | To test helper function. 26 | """ 27 | result = some_helper_function(self.data) 28 | self.assertTrue(result) 29 | 30 | def test_setup_logger(self): 31 | """ 32 | To test logger setup. 33 | """ 34 | logger = setup_logger('test_logger', log_file=os.path.join(self.test_dir, 'test_log.log')) 35 | logger.info('This is a test log message.') 36 | self.assertTrue(os.path.exists(os.path.join(self.test_dir, 'test_log.log'))) 37 | 38 | def test_load_data(self): 39 | """ 40 | To test loading data. 41 | """ 42 | self.data.to_csv(self.test_file, index=False) 43 | loaded_data = load_data(self.test_file) 44 | pd.testing.assert_frame_equal(loaded_data, self.data) 45 | 46 | def test_save_data(self): 47 | """ 48 | To test saving data. 49 | """ 50 | save_data(self.data, self.test_file) 51 | self.assertTrue(os.path.exists(self.test_file)) 52 | loaded_data = pd.read_csv(self.test_file) 53 | pd.testing.assert_frame_equal(loaded_data, self.data) 54 | 55 | def test_create_directory(self): 56 | """ 57 | To test creating directory. 58 | """ 59 | new_dir = os.path.join(self.test_dir, 'new_dir') 60 | create_directory(new_dir) 61 | self.assertTrue(os.path.exists(new_dir)) 62 | 63 | def test_delete_file(self): 64 | """ 65 | To test deleting file. 66 | """ 67 | self.data.to_csv(self.test_file, index=False) 68 | delete_file(self.test_file) 69 | self.assertFalse(os.path.exists(self.test_file)) 70 | 71 | def tearDown(self): 72 | """ 73 | Cleaning up after tests. 74 | """ 75 | import shutil 76 | shutil.rmtree(self.test_dir) 77 | 78 | if __name__ == "__main__": 79 | unittest.main() 80 | --------------------------------------------------------------------------------